Page MenuHomeFreeBSD

No OneTemporary

This file is larger than 256 KB, so syntax highlighting was skipped.
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index 36e10e4df4c1..44743fa0206f 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -1,11869 +1,11869 @@
//===---- TargetInfo.cpp - Encapsulate target details -----------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// These classes wrap the information about a call or function
// definition used to handle ABI compliancy.
//
//===----------------------------------------------------------------------===//
#include "TargetInfo.h"
#include "ABIInfo.h"
#include "CGBlocks.h"
#include "CGCXXABI.h"
#include "CGValue.h"
#include "CodeGenFunction.h"
#include "clang/AST/Attr.h"
#include "clang/AST/RecordLayout.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/DiagnosticFrontend.h"
#include "clang/CodeGen/CGFunctionInfo.h"
#include "clang/CodeGen/SwiftCallingConv.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/IntrinsicsS390.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
using namespace clang;
using namespace CodeGen;
// Helper for coercing an aggregate argument or return value into an integer
// array of the same size (including padding) and alignment. This alternate
// coercion happens only for the RenderScript ABI and can be removed after
// runtimes that rely on it are no longer supported.
//
// RenderScript assumes that the size of the argument / return value in the IR
// is the same as the size of the corresponding qualified type. This helper
// coerces the aggregate type into an array of the same size (including
// padding). This coercion is used in lieu of expansion of struct members or
// other canonical coercions that return a coerced-type of larger size.
//
// Ty - The argument / return value type
// Context - The associated ASTContext
// LLVMContext - The associated LLVMContext
static ABIArgInfo coerceToIntArray(QualType Ty,
ASTContext &Context,
llvm::LLVMContext &LLVMContext) {
// Alignment and Size are measured in bits.
const uint64_t Size = Context.getTypeSize(Ty);
const uint64_t Alignment = Context.getTypeAlign(Ty);
llvm::Type *IntType = llvm::Type::getIntNTy(LLVMContext, Alignment);
const uint64_t NumElements = (Size + Alignment - 1) / Alignment;
return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements));
}
static void AssignToArrayRange(CodeGen::CGBuilderTy &Builder,
llvm::Value *Array,
llvm::Value *Value,
unsigned FirstIndex,
unsigned LastIndex) {
// Alternatively, we could emit this as a loop in the source.
for (unsigned I = FirstIndex; I <= LastIndex; ++I) {
llvm::Value *Cell =
Builder.CreateConstInBoundsGEP1_32(Builder.getInt8Ty(), Array, I);
Builder.CreateAlignedStore(Value, Cell, CharUnits::One());
}
}
static bool isAggregateTypeForABI(QualType T) {
return !CodeGenFunction::hasScalarEvaluationKind(T) ||
T->isMemberFunctionPointerType();
}
ABIArgInfo ABIInfo::getNaturalAlignIndirect(QualType Ty, bool ByVal,
bool Realign,
llvm::Type *Padding) const {
return ABIArgInfo::getIndirect(getContext().getTypeAlignInChars(Ty), ByVal,
Realign, Padding);
}
ABIArgInfo
ABIInfo::getNaturalAlignIndirectInReg(QualType Ty, bool Realign) const {
return ABIArgInfo::getIndirectInReg(getContext().getTypeAlignInChars(Ty),
/*ByVal*/ false, Realign);
}
Address ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const {
return Address::invalid();
}
static llvm::Type *getVAListElementType(CodeGenFunction &CGF) {
return CGF.ConvertTypeForMem(
CGF.getContext().getBuiltinVaListType()->getPointeeType());
}
bool ABIInfo::isPromotableIntegerTypeForABI(QualType Ty) const {
if (Ty->isPromotableIntegerType())
return true;
if (const auto *EIT = Ty->getAs<BitIntType>())
if (EIT->getNumBits() < getContext().getTypeSize(getContext().IntTy))
return true;
return false;
}
ABIInfo::~ABIInfo() {}
/// Does the given lowering require more than the given number of
/// registers when expanded?
///
/// This is intended to be the basis of a reasonable basic implementation
/// of should{Pass,Return}IndirectlyForSwift.
///
/// For most targets, a limit of four total registers is reasonable; this
/// limits the amount of code required in order to move around the value
/// in case it wasn't produced immediately prior to the call by the caller
/// (or wasn't produced in exactly the right registers) or isn't used
/// immediately within the callee. But some targets may need to further
/// limit the register count due to an inability to support that many
/// return registers.
static bool occupiesMoreThan(CodeGenTypes &cgt,
ArrayRef<llvm::Type*> scalarTypes,
unsigned maxAllRegisters) {
unsigned intCount = 0, fpCount = 0;
for (llvm::Type *type : scalarTypes) {
if (type->isPointerTy()) {
intCount++;
} else if (auto intTy = dyn_cast<llvm::IntegerType>(type)) {
auto ptrWidth = cgt.getTarget().getPointerWidth(0);
intCount += (intTy->getBitWidth() + ptrWidth - 1) / ptrWidth;
} else {
assert(type->isVectorTy() || type->isFloatingPointTy());
fpCount++;
}
}
return (intCount + fpCount > maxAllRegisters);
}
bool SwiftABIInfo::isLegalVectorTypeForSwift(CharUnits vectorSize,
llvm::Type *eltTy,
unsigned numElts) const {
// The default implementation of this assumes that the target guarantees
// 128-bit SIMD support but nothing more.
return (vectorSize.getQuantity() > 8 && vectorSize.getQuantity() <= 16);
}
static CGCXXABI::RecordArgABI getRecordArgABI(const RecordType *RT,
CGCXXABI &CXXABI) {
const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(RT->getDecl());
if (!RD) {
if (!RT->getDecl()->canPassInRegisters())
return CGCXXABI::RAA_Indirect;
return CGCXXABI::RAA_Default;
}
return CXXABI.getRecordArgABI(RD);
}
static CGCXXABI::RecordArgABI getRecordArgABI(QualType T,
CGCXXABI &CXXABI) {
const RecordType *RT = T->getAs<RecordType>();
if (!RT)
return CGCXXABI::RAA_Default;
return getRecordArgABI(RT, CXXABI);
}
static bool classifyReturnType(const CGCXXABI &CXXABI, CGFunctionInfo &FI,
const ABIInfo &Info) {
QualType Ty = FI.getReturnType();
if (const auto *RT = Ty->getAs<RecordType>())
if (!isa<CXXRecordDecl>(RT->getDecl()) &&
!RT->getDecl()->canPassInRegisters()) {
FI.getReturnInfo() = Info.getNaturalAlignIndirect(Ty);
return true;
}
return CXXABI.classifyReturnType(FI);
}
/// Pass transparent unions as if they were the type of the first element. Sema
/// should ensure that all elements of the union have the same "machine type".
static QualType useFirstFieldIfTransparentUnion(QualType Ty) {
if (const RecordType *UT = Ty->getAsUnionType()) {
const RecordDecl *UD = UT->getDecl();
if (UD->hasAttr<TransparentUnionAttr>()) {
assert(!UD->field_empty() && "sema created an empty transparent union");
return UD->field_begin()->getType();
}
}
return Ty;
}
CGCXXABI &ABIInfo::getCXXABI() const {
return CGT.getCXXABI();
}
ASTContext &ABIInfo::getContext() const {
return CGT.getContext();
}
llvm::LLVMContext &ABIInfo::getVMContext() const {
return CGT.getLLVMContext();
}
const llvm::DataLayout &ABIInfo::getDataLayout() const {
return CGT.getDataLayout();
}
const TargetInfo &ABIInfo::getTarget() const {
return CGT.getTarget();
}
const CodeGenOptions &ABIInfo::getCodeGenOpts() const {
return CGT.getCodeGenOpts();
}
bool ABIInfo::isAndroid() const { return getTarget().getTriple().isAndroid(); }
bool ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
return false;
}
bool ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base,
uint64_t Members) const {
return false;
}
bool ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate() const {
// For compatibility with GCC, ignore empty bitfields in C++ mode.
return getContext().getLangOpts().CPlusPlus;
}
LLVM_DUMP_METHOD void ABIArgInfo::dump() const {
raw_ostream &OS = llvm::errs();
OS << "(ABIArgInfo Kind=";
switch (TheKind) {
case Direct:
OS << "Direct Type=";
if (llvm::Type *Ty = getCoerceToType())
Ty->print(OS);
else
OS << "null";
break;
case Extend:
OS << "Extend";
break;
case Ignore:
OS << "Ignore";
break;
case InAlloca:
OS << "InAlloca Offset=" << getInAllocaFieldIndex();
break;
case Indirect:
OS << "Indirect Align=" << getIndirectAlign().getQuantity()
<< " ByVal=" << getIndirectByVal()
<< " Realign=" << getIndirectRealign();
break;
case IndirectAliased:
OS << "Indirect Align=" << getIndirectAlign().getQuantity()
<< " AadrSpace=" << getIndirectAddrSpace()
<< " Realign=" << getIndirectRealign();
break;
case Expand:
OS << "Expand";
break;
case CoerceAndExpand:
OS << "CoerceAndExpand Type=";
getCoerceAndExpandType()->print(OS);
break;
}
OS << ")\n";
}
// Dynamically round a pointer up to a multiple of the given alignment.
static llvm::Value *emitRoundPointerUpToAlignment(CodeGenFunction &CGF,
llvm::Value *Ptr,
CharUnits Align) {
llvm::Value *PtrAsInt = Ptr;
// OverflowArgArea = (OverflowArgArea + Align - 1) & -Align;
PtrAsInt = CGF.Builder.CreatePtrToInt(PtrAsInt, CGF.IntPtrTy);
PtrAsInt = CGF.Builder.CreateAdd(PtrAsInt,
llvm::ConstantInt::get(CGF.IntPtrTy, Align.getQuantity() - 1));
PtrAsInt = CGF.Builder.CreateAnd(PtrAsInt,
llvm::ConstantInt::get(CGF.IntPtrTy, -Align.getQuantity()));
PtrAsInt = CGF.Builder.CreateIntToPtr(PtrAsInt,
Ptr->getType(),
Ptr->getName() + ".aligned");
return PtrAsInt;
}
/// Emit va_arg for a platform using the common void* representation,
/// where arguments are simply emitted in an array of slots on the stack.
///
/// This version implements the core direct-value passing rules.
///
/// \param SlotSize - The size and alignment of a stack slot.
/// Each argument will be allocated to a multiple of this number of
/// slots, and all the slots will be aligned to this value.
/// \param AllowHigherAlign - The slot alignment is not a cap;
/// an argument type with an alignment greater than the slot size
/// will be emitted on a higher-alignment address, potentially
/// leaving one or more empty slots behind as padding. If this
/// is false, the returned address might be less-aligned than
/// DirectAlign.
static Address emitVoidPtrDirectVAArg(CodeGenFunction &CGF,
Address VAListAddr,
llvm::Type *DirectTy,
CharUnits DirectSize,
CharUnits DirectAlign,
CharUnits SlotSize,
bool AllowHigherAlign) {
// Cast the element type to i8* if necessary. Some platforms define
// va_list as a struct containing an i8* instead of just an i8*.
if (VAListAddr.getElementType() != CGF.Int8PtrTy)
VAListAddr = CGF.Builder.CreateElementBitCast(VAListAddr, CGF.Int8PtrTy);
llvm::Value *Ptr = CGF.Builder.CreateLoad(VAListAddr, "argp.cur");
// If the CC aligns values higher than the slot size, do so if needed.
Address Addr = Address::invalid();
if (AllowHigherAlign && DirectAlign > SlotSize) {
Addr = Address(emitRoundPointerUpToAlignment(CGF, Ptr, DirectAlign),
CGF.Int8Ty, DirectAlign);
} else {
Addr = Address(Ptr, CGF.Int8Ty, SlotSize);
}
// Advance the pointer past the argument, then store that back.
CharUnits FullDirectSize = DirectSize.alignTo(SlotSize);
Address NextPtr =
CGF.Builder.CreateConstInBoundsByteGEP(Addr, FullDirectSize, "argp.next");
CGF.Builder.CreateStore(NextPtr.getPointer(), VAListAddr);
// If the argument is smaller than a slot, and this is a big-endian
// target, the argument will be right-adjusted in its slot.
if (DirectSize < SlotSize && CGF.CGM.getDataLayout().isBigEndian() &&
!DirectTy->isStructTy()) {
Addr = CGF.Builder.CreateConstInBoundsByteGEP(Addr, SlotSize - DirectSize);
}
Addr = CGF.Builder.CreateElementBitCast(Addr, DirectTy);
return Addr;
}
/// Emit va_arg for a platform using the common void* representation,
/// where arguments are simply emitted in an array of slots on the stack.
///
/// \param IsIndirect - Values of this type are passed indirectly.
/// \param ValueInfo - The size and alignment of this type, generally
/// computed with getContext().getTypeInfoInChars(ValueTy).
/// \param SlotSizeAndAlign - The size and alignment of a stack slot.
/// Each argument will be allocated to a multiple of this number of
/// slots, and all the slots will be aligned to this value.
/// \param AllowHigherAlign - The slot alignment is not a cap;
/// an argument type with an alignment greater than the slot size
/// will be emitted on a higher-alignment address, potentially
/// leaving one or more empty slots behind as padding.
static Address emitVoidPtrVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType ValueTy, bool IsIndirect,
TypeInfoChars ValueInfo,
CharUnits SlotSizeAndAlign,
bool AllowHigherAlign) {
// The size and alignment of the value that was passed directly.
CharUnits DirectSize, DirectAlign;
if (IsIndirect) {
DirectSize = CGF.getPointerSize();
DirectAlign = CGF.getPointerAlign();
} else {
DirectSize = ValueInfo.Width;
DirectAlign = ValueInfo.Align;
}
// Cast the address we've calculated to the right type.
llvm::Type *DirectTy = CGF.ConvertTypeForMem(ValueTy), *ElementTy = DirectTy;
if (IsIndirect)
DirectTy = DirectTy->getPointerTo(0);
Address Addr =
emitVoidPtrDirectVAArg(CGF, VAListAddr, DirectTy, DirectSize, DirectAlign,
SlotSizeAndAlign, AllowHigherAlign);
if (IsIndirect) {
Addr = Address(CGF.Builder.CreateLoad(Addr), ElementTy, ValueInfo.Align);
}
return Addr;
}
static Address complexTempStructure(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty, CharUnits SlotSize,
CharUnits EltSize, const ComplexType *CTy) {
Address Addr =
emitVoidPtrDirectVAArg(CGF, VAListAddr, CGF.Int8Ty, SlotSize * 2,
SlotSize, SlotSize, /*AllowHigher*/ true);
Address RealAddr = Addr;
Address ImagAddr = RealAddr;
if (CGF.CGM.getDataLayout().isBigEndian()) {
RealAddr =
CGF.Builder.CreateConstInBoundsByteGEP(RealAddr, SlotSize - EltSize);
ImagAddr = CGF.Builder.CreateConstInBoundsByteGEP(ImagAddr,
2 * SlotSize - EltSize);
} else {
ImagAddr = CGF.Builder.CreateConstInBoundsByteGEP(RealAddr, SlotSize);
}
llvm::Type *EltTy = CGF.ConvertTypeForMem(CTy->getElementType());
RealAddr = CGF.Builder.CreateElementBitCast(RealAddr, EltTy);
ImagAddr = CGF.Builder.CreateElementBitCast(ImagAddr, EltTy);
llvm::Value *Real = CGF.Builder.CreateLoad(RealAddr, ".vareal");
llvm::Value *Imag = CGF.Builder.CreateLoad(ImagAddr, ".vaimag");
Address Temp = CGF.CreateMemTemp(Ty, "vacplx");
CGF.EmitStoreOfComplex({Real, Imag}, CGF.MakeAddrLValue(Temp, Ty),
/*init*/ true);
return Temp;
}
static Address emitMergePHI(CodeGenFunction &CGF,
Address Addr1, llvm::BasicBlock *Block1,
Address Addr2, llvm::BasicBlock *Block2,
const llvm::Twine &Name = "") {
assert(Addr1.getType() == Addr2.getType());
llvm::PHINode *PHI = CGF.Builder.CreatePHI(Addr1.getType(), 2, Name);
PHI->addIncoming(Addr1.getPointer(), Block1);
PHI->addIncoming(Addr2.getPointer(), Block2);
CharUnits Align = std::min(Addr1.getAlignment(), Addr2.getAlignment());
return Address(PHI, Addr1.getElementType(), Align);
}
TargetCodeGenInfo::TargetCodeGenInfo(std::unique_ptr<ABIInfo> Info)
: Info(std::move(Info)) {}
TargetCodeGenInfo::~TargetCodeGenInfo() = default;
// If someone can figure out a general rule for this, that would be great.
// It's probably just doomed to be platform-dependent, though.
unsigned TargetCodeGenInfo::getSizeOfUnwindException() const {
// Verified for:
// x86-64 FreeBSD, Linux, Darwin
// x86-32 FreeBSD, Linux, Darwin
// PowerPC Linux, Darwin
// ARM Darwin (*not* EABI)
// AArch64 Linux
return 32;
}
bool TargetCodeGenInfo::isNoProtoCallVariadic(const CallArgList &args,
const FunctionNoProtoType *fnType) const {
// The following conventions are known to require this to be false:
// x86_stdcall
// MIPS
// For everything else, we just prefer false unless we opt out.
return false;
}
void
TargetCodeGenInfo::getDependentLibraryOption(llvm::StringRef Lib,
llvm::SmallString<24> &Opt) const {
// This assumes the user is passing a library name like "rt" instead of a
// filename like "librt.a/so", and that they don't care whether it's static or
// dynamic.
Opt = "-l";
Opt += Lib;
}
unsigned TargetCodeGenInfo::getOpenCLKernelCallingConv() const {
// OpenCL kernels are called via an explicit runtime API with arguments
// set with clSetKernelArg(), not as normal sub-functions.
// Return SPIR_KERNEL by default as the kernel calling convention to
// ensure the fingerprint is fixed such way that each OpenCL argument
// gets one matching argument in the produced kernel function argument
// list to enable feasible implementation of clSetKernelArg() with
// aggregates etc. In case we would use the default C calling conv here,
// clSetKernelArg() might break depending on the target-specific
// conventions; different targets might split structs passed as values
// to multiple function arguments etc.
return llvm::CallingConv::SPIR_KERNEL;
}
llvm::Constant *TargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
llvm::PointerType *T, QualType QT) const {
return llvm::ConstantPointerNull::get(T);
}
LangAS TargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
const VarDecl *D) const {
assert(!CGM.getLangOpts().OpenCL &&
!(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) &&
"Address space agnostic languages only");
return D ? D->getType().getAddressSpace() : LangAS::Default;
}
llvm::Value *TargetCodeGenInfo::performAddrSpaceCast(
CodeGen::CodeGenFunction &CGF, llvm::Value *Src, LangAS SrcAddr,
LangAS DestAddr, llvm::Type *DestTy, bool isNonNull) const {
// Since target may map different address spaces in AST to the same address
// space, an address space conversion may end up as a bitcast.
if (auto *C = dyn_cast<llvm::Constant>(Src))
return performAddrSpaceCast(CGF.CGM, C, SrcAddr, DestAddr, DestTy);
// Try to preserve the source's name to make IR more readable.
return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
Src, DestTy, Src->hasName() ? Src->getName() + ".ascast" : "");
}
llvm::Constant *
TargetCodeGenInfo::performAddrSpaceCast(CodeGenModule &CGM, llvm::Constant *Src,
LangAS SrcAddr, LangAS DestAddr,
llvm::Type *DestTy) const {
// Since target may map different address spaces in AST to the same address
// space, an address space conversion may end up as a bitcast.
return llvm::ConstantExpr::getPointerCast(Src, DestTy);
}
llvm::SyncScope::ID
TargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts,
SyncScope Scope,
llvm::AtomicOrdering Ordering,
llvm::LLVMContext &Ctx) const {
return Ctx.getOrInsertSyncScopeID(""); /* default sync scope */
}
static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays);
/// isEmptyField - Return true iff a the field is "empty", that is it
/// is an unnamed bit-field or an (array of) empty record(s).
static bool isEmptyField(ASTContext &Context, const FieldDecl *FD,
bool AllowArrays) {
if (FD->isUnnamedBitfield())
return true;
QualType FT = FD->getType();
// Constant arrays of empty records count as empty, strip them off.
// Constant arrays of zero length always count as empty.
bool WasArray = false;
if (AllowArrays)
while (const ConstantArrayType *AT = Context.getAsConstantArrayType(FT)) {
if (AT->getSize() == 0)
return true;
FT = AT->getElementType();
// The [[no_unique_address]] special case below does not apply to
// arrays of C++ empty records, so we need to remember this fact.
WasArray = true;
}
const RecordType *RT = FT->getAs<RecordType>();
if (!RT)
return false;
// C++ record fields are never empty, at least in the Itanium ABI.
//
// FIXME: We should use a predicate for whether this behavior is true in the
// current ABI.
//
// The exception to the above rule are fields marked with the
// [[no_unique_address]] attribute (since C++20). Those do count as empty
// according to the Itanium ABI. The exception applies only to records,
// not arrays of records, so we must also check whether we stripped off an
// array type above.
if (isa<CXXRecordDecl>(RT->getDecl()) &&
(WasArray || !FD->hasAttr<NoUniqueAddressAttr>()))
return false;
return isEmptyRecord(Context, FT, AllowArrays);
}
/// isEmptyRecord - Return true iff a structure contains only empty
/// fields. Note that a structure with a flexible array member is not
/// considered empty.
static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays) {
const RecordType *RT = T->getAs<RecordType>();
if (!RT)
return false;
const RecordDecl *RD = RT->getDecl();
if (RD->hasFlexibleArrayMember())
return false;
// If this is a C++ record, check the bases first.
if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
for (const auto &I : CXXRD->bases())
if (!isEmptyRecord(Context, I.getType(), true))
return false;
for (const auto *I : RD->fields())
if (!isEmptyField(Context, I, AllowArrays))
return false;
return true;
}
/// isSingleElementStruct - Determine if a structure is a "single
/// element struct", i.e. it has exactly one non-empty field or
/// exactly one field which is itself a single element
/// struct. Structures with flexible array members are never
/// considered single element structs.
///
/// \return The field declaration for the single non-empty field, if
/// it exists.
static const Type *isSingleElementStruct(QualType T, ASTContext &Context) {
const RecordType *RT = T->getAs<RecordType>();
if (!RT)
return nullptr;
const RecordDecl *RD = RT->getDecl();
if (RD->hasFlexibleArrayMember())
return nullptr;
const Type *Found = nullptr;
// If this is a C++ record, check the bases first.
if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
for (const auto &I : CXXRD->bases()) {
// Ignore empty records.
if (isEmptyRecord(Context, I.getType(), true))
continue;
// If we already found an element then this isn't a single-element struct.
if (Found)
return nullptr;
// If this is non-empty and not a single element struct, the composite
// cannot be a single element struct.
Found = isSingleElementStruct(I.getType(), Context);
if (!Found)
return nullptr;
}
}
// Check for single element.
for (const auto *FD : RD->fields()) {
QualType FT = FD->getType();
// Ignore empty fields.
if (isEmptyField(Context, FD, true))
continue;
// If we already found an element then this isn't a single-element
// struct.
if (Found)
return nullptr;
// Treat single element arrays as the element.
while (const ConstantArrayType *AT = Context.getAsConstantArrayType(FT)) {
if (AT->getSize().getZExtValue() != 1)
break;
FT = AT->getElementType();
}
if (!isAggregateTypeForABI(FT)) {
Found = FT.getTypePtr();
} else {
Found = isSingleElementStruct(FT, Context);
if (!Found)
return nullptr;
}
}
// We don't consider a struct a single-element struct if it has
// padding beyond the element type.
if (Found && Context.getTypeSize(Found) != Context.getTypeSize(T))
return nullptr;
return Found;
}
namespace {
Address EmitVAArgInstr(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
const ABIArgInfo &AI) {
// This default implementation defers to the llvm backend's va_arg
// instruction. It can handle only passing arguments directly
// (typically only handled in the backend for primitive types), or
// aggregates passed indirectly by pointer (NOTE: if the "byval"
// flag has ABI impact in the callee, this implementation cannot
// work.)
// Only a few cases are covered here at the moment -- those needed
// by the default abi.
llvm::Value *Val;
if (AI.isIndirect()) {
assert(!AI.getPaddingType() &&
"Unexpected PaddingType seen in arginfo in generic VAArg emitter!");
assert(
!AI.getIndirectRealign() &&
"Unexpected IndirectRealign seen in arginfo in generic VAArg emitter!");
auto TyInfo = CGF.getContext().getTypeInfoInChars(Ty);
CharUnits TyAlignForABI = TyInfo.Align;
llvm::Type *ElementTy = CGF.ConvertTypeForMem(Ty);
llvm::Type *BaseTy = llvm::PointerType::getUnqual(ElementTy);
llvm::Value *Addr =
CGF.Builder.CreateVAArg(VAListAddr.getPointer(), BaseTy);
return Address(Addr, ElementTy, TyAlignForABI);
} else {
assert((AI.isDirect() || AI.isExtend()) &&
"Unexpected ArgInfo Kind in generic VAArg emitter!");
assert(!AI.getInReg() &&
"Unexpected InReg seen in arginfo in generic VAArg emitter!");
assert(!AI.getPaddingType() &&
"Unexpected PaddingType seen in arginfo in generic VAArg emitter!");
assert(!AI.getDirectOffset() &&
"Unexpected DirectOffset seen in arginfo in generic VAArg emitter!");
assert(!AI.getCoerceToType() &&
"Unexpected CoerceToType seen in arginfo in generic VAArg emitter!");
Address Temp = CGF.CreateMemTemp(Ty, "varet");
Val = CGF.Builder.CreateVAArg(VAListAddr.getPointer(),
CGF.ConvertTypeForMem(Ty));
CGF.Builder.CreateStore(Val, Temp);
return Temp;
}
}
/// DefaultABIInfo - The default implementation for ABI specific
/// details. This implementation provides information which results in
/// self-consistent and sensible LLVM IR generation, but does not
/// conform to any particular ABI.
class DefaultABIInfo : public ABIInfo {
public:
DefaultABIInfo(CodeGen::CodeGenTypes &CGT) : ABIInfo(CGT) {}
ABIArgInfo classifyReturnType(QualType RetTy) const;
ABIArgInfo classifyArgumentType(QualType RetTy) const;
void computeInfo(CGFunctionInfo &FI) const override {
if (!getCXXABI().classifyReturnType(FI))
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
for (auto &I : FI.arguments())
I.info = classifyArgumentType(I.type);
}
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override {
return EmitVAArgInstr(CGF, VAListAddr, Ty, classifyArgumentType(Ty));
}
};
class DefaultTargetCodeGenInfo : public TargetCodeGenInfo {
public:
DefaultTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
: TargetCodeGenInfo(std::make_unique<DefaultABIInfo>(CGT)) {}
};
ABIArgInfo DefaultABIInfo::classifyArgumentType(QualType Ty) const {
Ty = useFirstFieldIfTransparentUnion(Ty);
if (isAggregateTypeForABI(Ty)) {
// Records with non-trivial destructors/copy-constructors should not be
// passed by value.
if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
return getNaturalAlignIndirect(Ty);
}
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
Ty = EnumTy->getDecl()->getIntegerType();
ASTContext &Context = getContext();
if (const auto *EIT = Ty->getAs<BitIntType>())
if (EIT->getNumBits() >
Context.getTypeSize(Context.getTargetInfo().hasInt128Type()
? Context.Int128Ty
: Context.LongLongTy))
return getNaturalAlignIndirect(Ty);
return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
: ABIArgInfo::getDirect());
}
ABIArgInfo DefaultABIInfo::classifyReturnType(QualType RetTy) const {
if (RetTy->isVoidType())
return ABIArgInfo::getIgnore();
if (isAggregateTypeForABI(RetTy))
return getNaturalAlignIndirect(RetTy);
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
RetTy = EnumTy->getDecl()->getIntegerType();
if (const auto *EIT = RetTy->getAs<BitIntType>())
if (EIT->getNumBits() >
getContext().getTypeSize(getContext().getTargetInfo().hasInt128Type()
? getContext().Int128Ty
: getContext().LongLongTy))
return getNaturalAlignIndirect(RetTy);
return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
: ABIArgInfo::getDirect());
}
//===----------------------------------------------------------------------===//
// WebAssembly ABI Implementation
//
// This is a very simple ABI that relies a lot on DefaultABIInfo.
//===----------------------------------------------------------------------===//
class WebAssemblyABIInfo final : public SwiftABIInfo {
public:
enum ABIKind {
MVP = 0,
ExperimentalMV = 1,
};
private:
DefaultABIInfo defaultInfo;
ABIKind Kind;
public:
explicit WebAssemblyABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind)
: SwiftABIInfo(CGT), defaultInfo(CGT), Kind(Kind) {}
private:
ABIArgInfo classifyReturnType(QualType RetTy) const;
ABIArgInfo classifyArgumentType(QualType Ty) const;
// DefaultABIInfo's classifyReturnType and classifyArgumentType are
// non-virtual, but computeInfo and EmitVAArg are virtual, so we
// overload them.
void computeInfo(CGFunctionInfo &FI) const override {
if (!getCXXABI().classifyReturnType(FI))
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
for (auto &Arg : FI.arguments())
Arg.info = classifyArgumentType(Arg.type);
}
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override;
bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
bool asReturnValue) const override {
return occupiesMoreThan(CGT, scalars, /*total*/ 4);
}
bool isSwiftErrorInRegister() const override {
return false;
}
};
class WebAssemblyTargetCodeGenInfo final : public TargetCodeGenInfo {
public:
explicit WebAssemblyTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT,
WebAssemblyABIInfo::ABIKind K)
: TargetCodeGenInfo(std::make_unique<WebAssemblyABIInfo>(CGT, K)) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &CGM) const override {
TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
if (const auto *FD = dyn_cast_or_null<FunctionDecl>(D)) {
if (const auto *Attr = FD->getAttr<WebAssemblyImportModuleAttr>()) {
llvm::Function *Fn = cast<llvm::Function>(GV);
llvm::AttrBuilder B(GV->getContext());
B.addAttribute("wasm-import-module", Attr->getImportModule());
Fn->addFnAttrs(B);
}
if (const auto *Attr = FD->getAttr<WebAssemblyImportNameAttr>()) {
llvm::Function *Fn = cast<llvm::Function>(GV);
llvm::AttrBuilder B(GV->getContext());
B.addAttribute("wasm-import-name", Attr->getImportName());
Fn->addFnAttrs(B);
}
if (const auto *Attr = FD->getAttr<WebAssemblyExportNameAttr>()) {
llvm::Function *Fn = cast<llvm::Function>(GV);
llvm::AttrBuilder B(GV->getContext());
B.addAttribute("wasm-export-name", Attr->getExportName());
Fn->addFnAttrs(B);
}
}
if (auto *FD = dyn_cast_or_null<FunctionDecl>(D)) {
llvm::Function *Fn = cast<llvm::Function>(GV);
if (!FD->doesThisDeclarationHaveABody() && !FD->hasPrototype())
Fn->addFnAttr("no-prototype");
}
}
};
/// Classify argument of given type \p Ty.
ABIArgInfo WebAssemblyABIInfo::classifyArgumentType(QualType Ty) const {
Ty = useFirstFieldIfTransparentUnion(Ty);
if (isAggregateTypeForABI(Ty)) {
// Records with non-trivial destructors/copy-constructors should not be
// passed by value.
if (auto RAA = getRecordArgABI(Ty, getCXXABI()))
return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
// Ignore empty structs/unions.
if (isEmptyRecord(getContext(), Ty, true))
return ABIArgInfo::getIgnore();
// Lower single-element structs to just pass a regular value. TODO: We
// could do reasonable-size multiple-element structs too, using getExpand(),
// though watch out for things like bitfields.
if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
// For the experimental multivalue ABI, fully expand all other aggregates
if (Kind == ABIKind::ExperimentalMV) {
const RecordType *RT = Ty->getAs<RecordType>();
assert(RT);
bool HasBitField = false;
for (auto *Field : RT->getDecl()->fields()) {
if (Field->isBitField()) {
HasBitField = true;
break;
}
}
if (!HasBitField)
return ABIArgInfo::getExpand();
}
}
// Otherwise just do the default thing.
return defaultInfo.classifyArgumentType(Ty);
}
ABIArgInfo WebAssemblyABIInfo::classifyReturnType(QualType RetTy) const {
if (isAggregateTypeForABI(RetTy)) {
// Records with non-trivial destructors/copy-constructors should not be
// returned by value.
if (!getRecordArgABI(RetTy, getCXXABI())) {
// Ignore empty structs/unions.
if (isEmptyRecord(getContext(), RetTy, true))
return ABIArgInfo::getIgnore();
// Lower single-element structs to just return a regular value. TODO: We
// could do reasonable-size multiple-element structs too, using
// ABIArgInfo::getDirect().
if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext()))
return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
// For the experimental multivalue ABI, return all other aggregates
if (Kind == ABIKind::ExperimentalMV)
return ABIArgInfo::getDirect();
}
}
// Otherwise just do the default thing.
return defaultInfo.classifyReturnType(RetTy);
}
Address WebAssemblyABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const {
bool IsIndirect = isAggregateTypeForABI(Ty) &&
!isEmptyRecord(getContext(), Ty, true) &&
!isSingleElementStruct(Ty, getContext());
return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
getContext().getTypeInfoInChars(Ty),
CharUnits::fromQuantity(4),
/*AllowHigherAlign=*/true);
}
//===----------------------------------------------------------------------===//
// le32/PNaCl bitcode ABI Implementation
//
// This is a simplified version of the x86_32 ABI. Arguments and return values
// are always passed on the stack.
//===----------------------------------------------------------------------===//
class PNaClABIInfo : public ABIInfo {
public:
PNaClABIInfo(CodeGen::CodeGenTypes &CGT) : ABIInfo(CGT) {}
ABIArgInfo classifyReturnType(QualType RetTy) const;
ABIArgInfo classifyArgumentType(QualType RetTy) const;
void computeInfo(CGFunctionInfo &FI) const override;
Address EmitVAArg(CodeGenFunction &CGF,
Address VAListAddr, QualType Ty) const override;
};
class PNaClTargetCodeGenInfo : public TargetCodeGenInfo {
public:
PNaClTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
: TargetCodeGenInfo(std::make_unique<PNaClABIInfo>(CGT)) {}
};
void PNaClABIInfo::computeInfo(CGFunctionInfo &FI) const {
if (!getCXXABI().classifyReturnType(FI))
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
for (auto &I : FI.arguments())
I.info = classifyArgumentType(I.type);
}
Address PNaClABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const {
// The PNaCL ABI is a bit odd, in that varargs don't use normal
// function classification. Structs get passed directly for varargs
// functions, through a rewriting transform in
// pnacl-llvm/lib/Transforms/NaCl/ExpandVarArgs.cpp, which allows
// this target to actually support a va_arg instructions with an
// aggregate type, unlike other targets.
return EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect());
}
/// Classify argument of given type \p Ty.
ABIArgInfo PNaClABIInfo::classifyArgumentType(QualType Ty) const {
if (isAggregateTypeForABI(Ty)) {
if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
return getNaturalAlignIndirect(Ty);
} else if (const EnumType *EnumTy = Ty->getAs<EnumType>()) {
// Treat an enum type as its underlying type.
Ty = EnumTy->getDecl()->getIntegerType();
} else if (Ty->isFloatingType()) {
// Floating-point types don't go inreg.
return ABIArgInfo::getDirect();
} else if (const auto *EIT = Ty->getAs<BitIntType>()) {
// Treat bit-precise integers as integers if <= 64, otherwise pass
// indirectly.
if (EIT->getNumBits() > 64)
return getNaturalAlignIndirect(Ty);
return ABIArgInfo::getDirect();
}
return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
: ABIArgInfo::getDirect());
}
ABIArgInfo PNaClABIInfo::classifyReturnType(QualType RetTy) const {
if (RetTy->isVoidType())
return ABIArgInfo::getIgnore();
// In the PNaCl ABI we always return records/structures on the stack.
if (isAggregateTypeForABI(RetTy))
return getNaturalAlignIndirect(RetTy);
// Treat bit-precise integers as integers if <= 64, otherwise pass indirectly.
if (const auto *EIT = RetTy->getAs<BitIntType>()) {
if (EIT->getNumBits() > 64)
return getNaturalAlignIndirect(RetTy);
return ABIArgInfo::getDirect();
}
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
RetTy = EnumTy->getDecl()->getIntegerType();
return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
: ABIArgInfo::getDirect());
}
/// IsX86_MMXType - Return true if this is an MMX type.
bool IsX86_MMXType(llvm::Type *IRType) {
// Return true if the type is an MMX type <2 x i32>, <4 x i16>, or <8 x i8>.
return IRType->isVectorTy() && IRType->getPrimitiveSizeInBits() == 64 &&
cast<llvm::VectorType>(IRType)->getElementType()->isIntegerTy() &&
IRType->getScalarSizeInBits() != 64;
}
static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
StringRef Constraint,
llvm::Type* Ty) {
bool IsMMXCons = llvm::StringSwitch<bool>(Constraint)
.Cases("y", "&y", "^Ym", true)
.Default(false);
if (IsMMXCons && Ty->isVectorTy()) {
if (cast<llvm::VectorType>(Ty)->getPrimitiveSizeInBits().getFixedSize() !=
64) {
// Invalid MMX constraint
return nullptr;
}
return llvm::Type::getX86_MMXTy(CGF.getLLVMContext());
}
// No operation needed
return Ty;
}
/// Returns true if this type can be passed in SSE registers with the
/// X86_VectorCall calling convention. Shared between x86_32 and x86_64.
static bool isX86VectorTypeForVectorCall(ASTContext &Context, QualType Ty) {
if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
if (BT->isFloatingPoint() && BT->getKind() != BuiltinType::Half) {
if (BT->getKind() == BuiltinType::LongDouble) {
if (&Context.getTargetInfo().getLongDoubleFormat() ==
&llvm::APFloat::x87DoubleExtended())
return false;
}
return true;
}
} else if (const VectorType *VT = Ty->getAs<VectorType>()) {
// vectorcall can pass XMM, YMM, and ZMM vectors. We don't pass SSE1 MMX
// registers specially.
unsigned VecSize = Context.getTypeSize(VT);
if (VecSize == 128 || VecSize == 256 || VecSize == 512)
return true;
}
return false;
}
/// Returns true if this aggregate is small enough to be passed in SSE registers
/// in the X86_VectorCall calling convention. Shared between x86_32 and x86_64.
static bool isX86VectorCallAggregateSmallEnough(uint64_t NumMembers) {
return NumMembers <= 4;
}
/// Returns a Homogeneous Vector Aggregate ABIArgInfo, used in X86.
static ABIArgInfo getDirectX86Hva(llvm::Type* T = nullptr) {
auto AI = ABIArgInfo::getDirect(T);
AI.setInReg(true);
AI.setCanBeFlattened(false);
return AI;
}
//===----------------------------------------------------------------------===//
// X86-32 ABI Implementation
//===----------------------------------------------------------------------===//
/// Similar to llvm::CCState, but for Clang.
struct CCState {
CCState(CGFunctionInfo &FI)
: IsPreassigned(FI.arg_size()), CC(FI.getCallingConvention()) {}
llvm::SmallBitVector IsPreassigned;
unsigned CC = CallingConv::CC_C;
unsigned FreeRegs = 0;
unsigned FreeSSERegs = 0;
};
/// X86_32ABIInfo - The X86-32 ABI information.
class X86_32ABIInfo : public SwiftABIInfo {
enum Class {
Integer,
Float
};
static const unsigned MinABIStackAlignInBytes = 4;
bool IsDarwinVectorABI;
bool IsRetSmallStructInRegABI;
bool IsWin32StructABI;
bool IsSoftFloatABI;
bool IsMCUABI;
bool IsLinuxABI;
unsigned DefaultNumRegisterParameters;
static bool isRegisterSize(unsigned Size) {
return (Size == 8 || Size == 16 || Size == 32 || Size == 64);
}
bool isHomogeneousAggregateBaseType(QualType Ty) const override {
// FIXME: Assumes vectorcall is in use.
return isX86VectorTypeForVectorCall(getContext(), Ty);
}
bool isHomogeneousAggregateSmallEnough(const Type *Ty,
uint64_t NumMembers) const override {
// FIXME: Assumes vectorcall is in use.
return isX86VectorCallAggregateSmallEnough(NumMembers);
}
bool shouldReturnTypeInRegister(QualType Ty, ASTContext &Context) const;
/// getIndirectResult - Give a source type \arg Ty, return a suitable result
/// such that the argument will be passed in memory.
ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State) const;
ABIArgInfo getIndirectReturnResult(QualType Ty, CCState &State) const;
/// Return the alignment to use for the given type on the stack.
unsigned getTypeStackAlignInBytes(QualType Ty, unsigned Align) const;
Class classify(QualType Ty) const;
ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const;
ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const;
/// Updates the number of available free registers, returns
/// true if any registers were allocated.
bool updateFreeRegs(QualType Ty, CCState &State) const;
bool shouldAggregateUseDirect(QualType Ty, CCState &State, bool &InReg,
bool &NeedsPadding) const;
bool shouldPrimitiveUseInReg(QualType Ty, CCState &State) const;
bool canExpandIndirectArgument(QualType Ty) const;
/// Rewrite the function info so that all memory arguments use
/// inalloca.
void rewriteWithInAlloca(CGFunctionInfo &FI) const;
void addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields,
CharUnits &StackOffset, ABIArgInfo &Info,
QualType Type) const;
void runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const;
public:
void computeInfo(CGFunctionInfo &FI) const override;
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override;
X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI,
bool RetSmallStructInRegABI, bool Win32StructABI,
unsigned NumRegisterParameters, bool SoftFloatABI)
: SwiftABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI),
IsRetSmallStructInRegABI(RetSmallStructInRegABI),
IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI),
IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()),
IsLinuxABI(CGT.getTarget().getTriple().isOSLinux() ||
CGT.getTarget().getTriple().isOSCygMing()),
DefaultNumRegisterParameters(NumRegisterParameters) {}
bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
bool asReturnValue) const override {
// LLVM's x86-32 lowering currently only assigns up to three
// integer registers and three fp registers. Oddly, it'll use up to
// four vector registers for vectors, but those can overlap with the
// scalar registers.
return occupiesMoreThan(CGT, scalars, /*total*/ 3);
}
bool isSwiftErrorInRegister() const override {
// x86-32 lowering does not support passing swifterror in a register.
return false;
}
};
class X86_32TargetCodeGenInfo : public TargetCodeGenInfo {
public:
X86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI,
bool RetSmallStructInRegABI, bool Win32StructABI,
unsigned NumRegisterParameters, bool SoftFloatABI)
: TargetCodeGenInfo(std::make_unique<X86_32ABIInfo>(
CGT, DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI,
NumRegisterParameters, SoftFloatABI)) {}
static bool isStructReturnInRegABI(
const llvm::Triple &Triple, const CodeGenOptions &Opts);
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &CGM) const override;
int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
// Darwin uses different dwarf register numbers for EH.
if (CGM.getTarget().getTriple().isOSDarwin()) return 5;
return 4;
}
bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
llvm::Value *Address) const override;
llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
StringRef Constraint,
llvm::Type* Ty) const override {
return X86AdjustInlineAsmType(CGF, Constraint, Ty);
}
void addReturnRegisterOutputs(CodeGenFunction &CGF, LValue ReturnValue,
std::string &Constraints,
std::vector<llvm::Type *> &ResultRegTypes,
std::vector<llvm::Type *> &ResultTruncRegTypes,
std::vector<LValue> &ResultRegDests,
std::string &AsmString,
unsigned NumOutputs) const override;
llvm::Constant *
getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const override {
unsigned Sig = (0xeb << 0) | // jmp rel8
(0x06 << 8) | // .+0x08
('v' << 16) |
('2' << 24);
return llvm::ConstantInt::get(CGM.Int32Ty, Sig);
}
StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
return "movl\t%ebp, %ebp"
"\t\t// marker for objc_retainAutoreleaseReturnValue";
}
};
}
/// Rewrite input constraint references after adding some output constraints.
/// In the case where there is one output and one input and we add one output,
/// we need to replace all operand references greater than or equal to 1:
/// mov $0, $1
/// mov eax, $1
/// The result will be:
/// mov $0, $2
/// mov eax, $2
static void rewriteInputConstraintReferences(unsigned FirstIn,
unsigned NumNewOuts,
std::string &AsmString) {
std::string Buf;
llvm::raw_string_ostream OS(Buf);
size_t Pos = 0;
while (Pos < AsmString.size()) {
size_t DollarStart = AsmString.find('$', Pos);
if (DollarStart == std::string::npos)
DollarStart = AsmString.size();
size_t DollarEnd = AsmString.find_first_not_of('$', DollarStart);
if (DollarEnd == std::string::npos)
DollarEnd = AsmString.size();
OS << StringRef(&AsmString[Pos], DollarEnd - Pos);
Pos = DollarEnd;
size_t NumDollars = DollarEnd - DollarStart;
if (NumDollars % 2 != 0 && Pos < AsmString.size()) {
// We have an operand reference.
size_t DigitStart = Pos;
if (AsmString[DigitStart] == '{') {
OS << '{';
++DigitStart;
}
size_t DigitEnd = AsmString.find_first_not_of("0123456789", DigitStart);
if (DigitEnd == std::string::npos)
DigitEnd = AsmString.size();
StringRef OperandStr(&AsmString[DigitStart], DigitEnd - DigitStart);
unsigned OperandIndex;
if (!OperandStr.getAsInteger(10, OperandIndex)) {
if (OperandIndex >= FirstIn)
OperandIndex += NumNewOuts;
OS << OperandIndex;
} else {
OS << OperandStr;
}
Pos = DigitEnd;
}
}
AsmString = std::move(OS.str());
}
/// Add output constraints for EAX:EDX because they are return registers.
void X86_32TargetCodeGenInfo::addReturnRegisterOutputs(
CodeGenFunction &CGF, LValue ReturnSlot, std::string &Constraints,
std::vector<llvm::Type *> &ResultRegTypes,
std::vector<llvm::Type *> &ResultTruncRegTypes,
std::vector<LValue> &ResultRegDests, std::string &AsmString,
unsigned NumOutputs) const {
uint64_t RetWidth = CGF.getContext().getTypeSize(ReturnSlot.getType());
// Use the EAX constraint if the width is 32 or smaller and EAX:EDX if it is
// larger.
if (!Constraints.empty())
Constraints += ',';
if (RetWidth <= 32) {
Constraints += "={eax}";
ResultRegTypes.push_back(CGF.Int32Ty);
} else {
// Use the 'A' constraint for EAX:EDX.
Constraints += "=A";
ResultRegTypes.push_back(CGF.Int64Ty);
}
// Truncate EAX or EAX:EDX to an integer of the appropriate size.
llvm::Type *CoerceTy = llvm::IntegerType::get(CGF.getLLVMContext(), RetWidth);
ResultTruncRegTypes.push_back(CoerceTy);
// Coerce the integer by bitcasting the return slot pointer.
ReturnSlot.setAddress(
CGF.Builder.CreateElementBitCast(ReturnSlot.getAddress(CGF), CoerceTy));
ResultRegDests.push_back(ReturnSlot);
rewriteInputConstraintReferences(NumOutputs, 1, AsmString);
}
/// shouldReturnTypeInRegister - Determine if the given type should be
/// returned in a register (for the Darwin and MCU ABI).
bool X86_32ABIInfo::shouldReturnTypeInRegister(QualType Ty,
ASTContext &Context) const {
uint64_t Size = Context.getTypeSize(Ty);
// For i386, type must be register sized.
// For the MCU ABI, it only needs to be <= 8-byte
if ((IsMCUABI && Size > 64) || (!IsMCUABI && !isRegisterSize(Size)))
return false;
if (Ty->isVectorType()) {
// 64- and 128- bit vectors inside structures are not returned in
// registers.
if (Size == 64 || Size == 128)
return false;
return true;
}
// If this is a builtin, pointer, enum, complex type, member pointer, or
// member function pointer it is ok.
if (Ty->getAs<BuiltinType>() || Ty->hasPointerRepresentation() ||
Ty->isAnyComplexType() || Ty->isEnumeralType() ||
Ty->isBlockPointerType() || Ty->isMemberPointerType())
return true;
// Arrays are treated like records.
if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty))
return shouldReturnTypeInRegister(AT->getElementType(), Context);
// Otherwise, it must be a record type.
const RecordType *RT = Ty->getAs<RecordType>();
if (!RT) return false;
// FIXME: Traverse bases here too.
// Structure types are passed in register if all fields would be
// passed in a register.
for (const auto *FD : RT->getDecl()->fields()) {
// Empty fields are ignored.
if (isEmptyField(Context, FD, true))
continue;
// Check fields recursively.
if (!shouldReturnTypeInRegister(FD->getType(), Context))
return false;
}
return true;
}
static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) {
// Treat complex types as the element type.
if (const ComplexType *CTy = Ty->getAs<ComplexType>())
Ty = CTy->getElementType();
// Check for a type which we know has a simple scalar argument-passing
// convention without any padding. (We're specifically looking for 32
// and 64-bit integer and integer-equivalents, float, and double.)
if (!Ty->getAs<BuiltinType>() && !Ty->hasPointerRepresentation() &&
!Ty->isEnumeralType() && !Ty->isBlockPointerType())
return false;
uint64_t Size = Context.getTypeSize(Ty);
return Size == 32 || Size == 64;
}
static bool addFieldSizes(ASTContext &Context, const RecordDecl *RD,
uint64_t &Size) {
for (const auto *FD : RD->fields()) {
// Scalar arguments on the stack get 4 byte alignment on x86. If the
// argument is smaller than 32-bits, expanding the struct will create
// alignment padding.
if (!is32Or64BitBasicType(FD->getType(), Context))
return false;
// FIXME: Reject bit-fields wholesale; there are two problems, we don't know
// how to expand them yet, and the predicate for telling if a bitfield still
// counts as "basic" is more complicated than what we were doing previously.
if (FD->isBitField())
return false;
Size += Context.getTypeSize(FD->getType());
}
return true;
}
static bool addBaseAndFieldSizes(ASTContext &Context, const CXXRecordDecl *RD,
uint64_t &Size) {
// Don't do this if there are any non-empty bases.
for (const CXXBaseSpecifier &Base : RD->bases()) {
if (!addBaseAndFieldSizes(Context, Base.getType()->getAsCXXRecordDecl(),
Size))
return false;
}
if (!addFieldSizes(Context, RD, Size))
return false;
return true;
}
/// Test whether an argument type which is to be passed indirectly (on the
/// stack) would have the equivalent layout if it was expanded into separate
/// arguments. If so, we prefer to do the latter to avoid inhibiting
/// optimizations.
bool X86_32ABIInfo::canExpandIndirectArgument(QualType Ty) const {
// We can only expand structure types.
const RecordType *RT = Ty->getAs<RecordType>();
if (!RT)
return false;
const RecordDecl *RD = RT->getDecl();
uint64_t Size = 0;
if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
if (!IsWin32StructABI) {
// On non-Windows, we have to conservatively match our old bitcode
// prototypes in order to be ABI-compatible at the bitcode level.
if (!CXXRD->isCLike())
return false;
} else {
// Don't do this for dynamic classes.
if (CXXRD->isDynamicClass())
return false;
}
if (!addBaseAndFieldSizes(getContext(), CXXRD, Size))
return false;
} else {
if (!addFieldSizes(getContext(), RD, Size))
return false;
}
// We can do this if there was no alignment padding.
return Size == getContext().getTypeSize(Ty);
}
ABIArgInfo X86_32ABIInfo::getIndirectReturnResult(QualType RetTy, CCState &State) const {
// If the return value is indirect, then the hidden argument is consuming one
// integer register.
if (State.FreeRegs) {
--State.FreeRegs;
if (!IsMCUABI)
return getNaturalAlignIndirectInReg(RetTy);
}
return getNaturalAlignIndirect(RetTy, /*ByVal=*/false);
}
ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy,
CCState &State) const {
if (RetTy->isVoidType())
return ABIArgInfo::getIgnore();
const Type *Base = nullptr;
uint64_t NumElts = 0;
if ((State.CC == llvm::CallingConv::X86_VectorCall ||
State.CC == llvm::CallingConv::X86_RegCall) &&
isHomogeneousAggregate(RetTy, Base, NumElts)) {
// The LLVM struct type for such an aggregate should lower properly.
return ABIArgInfo::getDirect();
}
if (const VectorType *VT = RetTy->getAs<VectorType>()) {
// On Darwin, some vectors are returned in registers.
if (IsDarwinVectorABI) {
uint64_t Size = getContext().getTypeSize(RetTy);
// 128-bit vectors are a special case; they are returned in
// registers and we need to make sure to pick a type the LLVM
// backend will like.
if (Size == 128)
return ABIArgInfo::getDirect(llvm::FixedVectorType::get(
llvm::Type::getInt64Ty(getVMContext()), 2));
// Always return in register if it fits in a general purpose
// register, or if it is 64 bits and has a single element.
if ((Size == 8 || Size == 16 || Size == 32) ||
(Size == 64 && VT->getNumElements() == 1))
return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
Size));
return getIndirectReturnResult(RetTy, State);
}
return ABIArgInfo::getDirect();
}
if (isAggregateTypeForABI(RetTy)) {
if (const RecordType *RT = RetTy->getAs<RecordType>()) {
// Structures with flexible arrays are always indirect.
if (RT->getDecl()->hasFlexibleArrayMember())
return getIndirectReturnResult(RetTy, State);
}
// If specified, structs and unions are always indirect.
if (!IsRetSmallStructInRegABI && !RetTy->isAnyComplexType())
return getIndirectReturnResult(RetTy, State);
// Ignore empty structs/unions.
if (isEmptyRecord(getContext(), RetTy, true))
return ABIArgInfo::getIgnore();
// Return complex of _Float16 as <2 x half> so the backend will use xmm0.
if (const ComplexType *CT = RetTy->getAs<ComplexType>()) {
QualType ET = getContext().getCanonicalType(CT->getElementType());
if (ET->isFloat16Type())
return ABIArgInfo::getDirect(llvm::FixedVectorType::get(
llvm::Type::getHalfTy(getVMContext()), 2));
}
// Small structures which are register sized are generally returned
// in a register.
if (shouldReturnTypeInRegister(RetTy, getContext())) {
uint64_t Size = getContext().getTypeSize(RetTy);
// As a special-case, if the struct is a "single-element" struct, and
// the field is of type "float" or "double", return it in a
// floating-point register. (MSVC does not apply this special case.)
// We apply a similar transformation for pointer types to improve the
// quality of the generated IR.
if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext()))
if ((!IsWin32StructABI && SeltTy->isRealFloatingType())
|| SeltTy->hasPointerRepresentation())
return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
// FIXME: We should be able to narrow this integer in cases with dead
// padding.
return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),Size));
}
return getIndirectReturnResult(RetTy, State);
}
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
RetTy = EnumTy->getDecl()->getIntegerType();
if (const auto *EIT = RetTy->getAs<BitIntType>())
if (EIT->getNumBits() > 64)
return getIndirectReturnResult(RetTy, State);
return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
: ABIArgInfo::getDirect());
}
static bool isSIMDVectorType(ASTContext &Context, QualType Ty) {
return Ty->getAs<VectorType>() && Context.getTypeSize(Ty) == 128;
}
static bool isRecordWithSIMDVectorType(ASTContext &Context, QualType Ty) {
const RecordType *RT = Ty->getAs<RecordType>();
if (!RT)
return false;
const RecordDecl *RD = RT->getDecl();
// If this is a C++ record, check the bases first.
if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
for (const auto &I : CXXRD->bases())
if (!isRecordWithSIMDVectorType(Context, I.getType()))
return false;
for (const auto *i : RD->fields()) {
QualType FT = i->getType();
if (isSIMDVectorType(Context, FT))
return true;
if (isRecordWithSIMDVectorType(Context, FT))
return true;
}
return false;
}
unsigned X86_32ABIInfo::getTypeStackAlignInBytes(QualType Ty,
unsigned Align) const {
// Otherwise, if the alignment is less than or equal to the minimum ABI
// alignment, just use the default; the backend will handle this.
if (Align <= MinABIStackAlignInBytes)
return 0; // Use default alignment.
if (IsLinuxABI) {
// Exclude other System V OS (e.g Darwin, PS4 and FreeBSD) since we don't
// want to spend any effort dealing with the ramifications of ABI breaks.
//
// If the vector type is __m128/__m256/__m512, return the default alignment.
if (Ty->isVectorType() && (Align == 16 || Align == 32 || Align == 64))
return Align;
}
// On non-Darwin, the stack type alignment is always 4.
if (!IsDarwinVectorABI) {
// Set explicit alignment, since we may need to realign the top.
return MinABIStackAlignInBytes;
}
// Otherwise, if the type contains an SSE vector type, the alignment is 16.
if (Align >= 16 && (isSIMDVectorType(getContext(), Ty) ||
isRecordWithSIMDVectorType(getContext(), Ty)))
return 16;
return MinABIStackAlignInBytes;
}
ABIArgInfo X86_32ABIInfo::getIndirectResult(QualType Ty, bool ByVal,
CCState &State) const {
if (!ByVal) {
if (State.FreeRegs) {
--State.FreeRegs; // Non-byval indirects just use one pointer.
if (!IsMCUABI)
return getNaturalAlignIndirectInReg(Ty);
}
return getNaturalAlignIndirect(Ty, false);
}
// Compute the byval alignment.
unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8;
unsigned StackAlign = getTypeStackAlignInBytes(Ty, TypeAlign);
if (StackAlign == 0)
return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true);
// If the stack alignment is less than the type alignment, realign the
// argument.
bool Realign = TypeAlign > StackAlign;
return ABIArgInfo::getIndirect(CharUnits::fromQuantity(StackAlign),
/*ByVal=*/true, Realign);
}
X86_32ABIInfo::Class X86_32ABIInfo::classify(QualType Ty) const {
const Type *T = isSingleElementStruct(Ty, getContext());
if (!T)
T = Ty.getTypePtr();
if (const BuiltinType *BT = T->getAs<BuiltinType>()) {
BuiltinType::Kind K = BT->getKind();
if (K == BuiltinType::Float || K == BuiltinType::Double)
return Float;
}
return Integer;
}
bool X86_32ABIInfo::updateFreeRegs(QualType Ty, CCState &State) const {
if (!IsSoftFloatABI) {
Class C = classify(Ty);
if (C == Float)
return false;
}
unsigned Size = getContext().getTypeSize(Ty);
unsigned SizeInRegs = (Size + 31) / 32;
if (SizeInRegs == 0)
return false;
if (!IsMCUABI) {
if (SizeInRegs > State.FreeRegs) {
State.FreeRegs = 0;
return false;
}
} else {
// The MCU psABI allows passing parameters in-reg even if there are
// earlier parameters that are passed on the stack. Also,
// it does not allow passing >8-byte structs in-register,
// even if there are 3 free registers available.
if (SizeInRegs > State.FreeRegs || SizeInRegs > 2)
return false;
}
State.FreeRegs -= SizeInRegs;
return true;
}
bool X86_32ABIInfo::shouldAggregateUseDirect(QualType Ty, CCState &State,
bool &InReg,
bool &NeedsPadding) const {
// On Windows, aggregates other than HFAs are never passed in registers, and
// they do not consume register slots. Homogenous floating-point aggregates
// (HFAs) have already been dealt with at this point.
if (IsWin32StructABI && isAggregateTypeForABI(Ty))
return false;
NeedsPadding = false;
InReg = !IsMCUABI;
if (!updateFreeRegs(Ty, State))
return false;
if (IsMCUABI)
return true;
if (State.CC == llvm::CallingConv::X86_FastCall ||
State.CC == llvm::CallingConv::X86_VectorCall ||
State.CC == llvm::CallingConv::X86_RegCall) {
if (getContext().getTypeSize(Ty) <= 32 && State.FreeRegs)
NeedsPadding = true;
return false;
}
return true;
}
bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const {
if (!updateFreeRegs(Ty, State))
return false;
if (IsMCUABI)
return false;
if (State.CC == llvm::CallingConv::X86_FastCall ||
State.CC == llvm::CallingConv::X86_VectorCall ||
State.CC == llvm::CallingConv::X86_RegCall) {
if (getContext().getTypeSize(Ty) > 32)
return false;
return (Ty->isIntegralOrEnumerationType() || Ty->isPointerType() ||
Ty->isReferenceType());
}
return true;
}
void X86_32ABIInfo::runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const {
// Vectorcall x86 works subtly different than in x64, so the format is
// a bit different than the x64 version. First, all vector types (not HVAs)
// are assigned, with the first 6 ending up in the [XYZ]MM0-5 registers.
// This differs from the x64 implementation, where the first 6 by INDEX get
// registers.
// In the second pass over the arguments, HVAs are passed in the remaining
// vector registers if possible, or indirectly by address. The address will be
// passed in ECX/EDX if available. Any other arguments are passed according to
// the usual fastcall rules.
MutableArrayRef<CGFunctionInfoArgInfo> Args = FI.arguments();
for (int I = 0, E = Args.size(); I < E; ++I) {
const Type *Base = nullptr;
uint64_t NumElts = 0;
const QualType &Ty = Args[I].type;
if ((Ty->isVectorType() || Ty->isBuiltinType()) &&
isHomogeneousAggregate(Ty, Base, NumElts)) {
if (State.FreeSSERegs >= NumElts) {
State.FreeSSERegs -= NumElts;
Args[I].info = ABIArgInfo::getDirectInReg();
State.IsPreassigned.set(I);
}
}
}
}
ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
CCState &State) const {
// FIXME: Set alignment on indirect arguments.
bool IsFastCall = State.CC == llvm::CallingConv::X86_FastCall;
bool IsRegCall = State.CC == llvm::CallingConv::X86_RegCall;
bool IsVectorCall = State.CC == llvm::CallingConv::X86_VectorCall;
Ty = useFirstFieldIfTransparentUnion(Ty);
TypeInfo TI = getContext().getTypeInfo(Ty);
// Check with the C++ ABI first.
const RecordType *RT = Ty->getAs<RecordType>();
if (RT) {
CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI());
if (RAA == CGCXXABI::RAA_Indirect) {
return getIndirectResult(Ty, false, State);
} else if (RAA == CGCXXABI::RAA_DirectInMemory) {
// The field index doesn't matter, we'll fix it up later.
return ABIArgInfo::getInAlloca(/*FieldIndex=*/0);
}
}
// Regcall uses the concept of a homogenous vector aggregate, similar
// to other targets.
const Type *Base = nullptr;
uint64_t NumElts = 0;
if ((IsRegCall || IsVectorCall) &&
isHomogeneousAggregate(Ty, Base, NumElts)) {
if (State.FreeSSERegs >= NumElts) {
State.FreeSSERegs -= NumElts;
// Vectorcall passes HVAs directly and does not flatten them, but regcall
// does.
if (IsVectorCall)
return getDirectX86Hva();
if (Ty->isBuiltinType() || Ty->isVectorType())
return ABIArgInfo::getDirect();
return ABIArgInfo::getExpand();
}
return getIndirectResult(Ty, /*ByVal=*/false, State);
}
if (isAggregateTypeForABI(Ty)) {
// Structures with flexible arrays are always indirect.
// FIXME: This should not be byval!
if (RT && RT->getDecl()->hasFlexibleArrayMember())
return getIndirectResult(Ty, true, State);
// Ignore empty structs/unions on non-Windows.
if (!IsWin32StructABI && isEmptyRecord(getContext(), Ty, true))
return ABIArgInfo::getIgnore();
llvm::LLVMContext &LLVMContext = getVMContext();
llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext);
bool NeedsPadding = false;
bool InReg;
if (shouldAggregateUseDirect(Ty, State, InReg, NeedsPadding)) {
unsigned SizeInRegs = (TI.Width + 31) / 32;
SmallVector<llvm::Type*, 3> Elements(SizeInRegs, Int32);
llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements);
if (InReg)
return ABIArgInfo::getDirectInReg(Result);
else
return ABIArgInfo::getDirect(Result);
}
llvm::IntegerType *PaddingType = NeedsPadding ? Int32 : nullptr;
// Pass over-aligned aggregates on Windows indirectly. This behavior was
// added in MSVC 2015.
if (IsWin32StructABI && TI.isAlignRequired() && TI.Align > 32)
return getIndirectResult(Ty, /*ByVal=*/false, State);
// Expand small (<= 128-bit) record types when we know that the stack layout
// of those arguments will match the struct. This is important because the
// LLVM backend isn't smart enough to remove byval, which inhibits many
// optimizations.
// Don't do this for the MCU if there are still free integer registers
// (see X86_64 ABI for full explanation).
if (TI.Width <= 4 * 32 && (!IsMCUABI || State.FreeRegs == 0) &&
canExpandIndirectArgument(Ty))
return ABIArgInfo::getExpandWithPadding(
IsFastCall || IsVectorCall || IsRegCall, PaddingType);
return getIndirectResult(Ty, true, State);
}
if (const VectorType *VT = Ty->getAs<VectorType>()) {
// On Windows, vectors are passed directly if registers are available, or
// indirectly if not. This avoids the need to align argument memory. Pass
// user-defined vector types larger than 512 bits indirectly for simplicity.
if (IsWin32StructABI) {
if (TI.Width <= 512 && State.FreeSSERegs > 0) {
--State.FreeSSERegs;
return ABIArgInfo::getDirectInReg();
}
return getIndirectResult(Ty, /*ByVal=*/false, State);
}
// On Darwin, some vectors are passed in memory, we handle this by passing
// it as an i8/i16/i32/i64.
if (IsDarwinVectorABI) {
if ((TI.Width == 8 || TI.Width == 16 || TI.Width == 32) ||
(TI.Width == 64 && VT->getNumElements() == 1))
return ABIArgInfo::getDirect(
llvm::IntegerType::get(getVMContext(), TI.Width));
}
if (IsX86_MMXType(CGT.ConvertType(Ty)))
return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), 64));
return ABIArgInfo::getDirect();
}
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
Ty = EnumTy->getDecl()->getIntegerType();
bool InReg = shouldPrimitiveUseInReg(Ty, State);
if (isPromotableIntegerTypeForABI(Ty)) {
if (InReg)
return ABIArgInfo::getExtendInReg(Ty);
return ABIArgInfo::getExtend(Ty);
}
if (const auto *EIT = Ty->getAs<BitIntType>()) {
if (EIT->getNumBits() <= 64) {
if (InReg)
return ABIArgInfo::getDirectInReg();
return ABIArgInfo::getDirect();
}
return getIndirectResult(Ty, /*ByVal=*/false, State);
}
if (InReg)
return ABIArgInfo::getDirectInReg();
return ABIArgInfo::getDirect();
}
void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
CCState State(FI);
if (IsMCUABI)
State.FreeRegs = 3;
else if (State.CC == llvm::CallingConv::X86_FastCall) {
State.FreeRegs = 2;
State.FreeSSERegs = 3;
} else if (State.CC == llvm::CallingConv::X86_VectorCall) {
State.FreeRegs = 2;
State.FreeSSERegs = 6;
} else if (FI.getHasRegParm())
State.FreeRegs = FI.getRegParm();
else if (State.CC == llvm::CallingConv::X86_RegCall) {
State.FreeRegs = 5;
State.FreeSSERegs = 8;
} else if (IsWin32StructABI) {
// Since MSVC 2015, the first three SSE vectors have been passed in
// registers. The rest are passed indirectly.
State.FreeRegs = DefaultNumRegisterParameters;
State.FreeSSERegs = 3;
} else
State.FreeRegs = DefaultNumRegisterParameters;
if (!::classifyReturnType(getCXXABI(), FI, *this)) {
FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), State);
} else if (FI.getReturnInfo().isIndirect()) {
// The C++ ABI is not aware of register usage, so we have to check if the
// return value was sret and put it in a register ourselves if appropriate.
if (State.FreeRegs) {
--State.FreeRegs; // The sret parameter consumes a register.
if (!IsMCUABI)
FI.getReturnInfo().setInReg(true);
}
}
// The chain argument effectively gives us another free register.
if (FI.isChainCall())
++State.FreeRegs;
// For vectorcall, do a first pass over the arguments, assigning FP and vector
// arguments to XMM registers as available.
if (State.CC == llvm::CallingConv::X86_VectorCall)
runVectorCallFirstPass(FI, State);
bool UsedInAlloca = false;
MutableArrayRef<CGFunctionInfoArgInfo> Args = FI.arguments();
for (int I = 0, E = Args.size(); I < E; ++I) {
// Skip arguments that have already been assigned.
if (State.IsPreassigned.test(I))
continue;
Args[I].info = classifyArgumentType(Args[I].type, State);
UsedInAlloca |= (Args[I].info.getKind() == ABIArgInfo::InAlloca);
}
// If we needed to use inalloca for any argument, do a second pass and rewrite
// all the memory arguments to use inalloca.
if (UsedInAlloca)
rewriteWithInAlloca(FI);
}
void
X86_32ABIInfo::addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields,
CharUnits &StackOffset, ABIArgInfo &Info,
QualType Type) const {
// Arguments are always 4-byte-aligned.
CharUnits WordSize = CharUnits::fromQuantity(4);
assert(StackOffset.isMultipleOf(WordSize) && "unaligned inalloca struct");
// sret pointers and indirect things will require an extra pointer
// indirection, unless they are byval. Most things are byval, and will not
// require this indirection.
bool IsIndirect = false;
if (Info.isIndirect() && !Info.getIndirectByVal())
IsIndirect = true;
Info = ABIArgInfo::getInAlloca(FrameFields.size(), IsIndirect);
llvm::Type *LLTy = CGT.ConvertTypeForMem(Type);
if (IsIndirect)
LLTy = LLTy->getPointerTo(0);
FrameFields.push_back(LLTy);
StackOffset += IsIndirect ? WordSize : getContext().getTypeSizeInChars(Type);
// Insert padding bytes to respect alignment.
CharUnits FieldEnd = StackOffset;
StackOffset = FieldEnd.alignTo(WordSize);
if (StackOffset != FieldEnd) {
CharUnits NumBytes = StackOffset - FieldEnd;
llvm::Type *Ty = llvm::Type::getInt8Ty(getVMContext());
Ty = llvm::ArrayType::get(Ty, NumBytes.getQuantity());
FrameFields.push_back(Ty);
}
}
static bool isArgInAlloca(const ABIArgInfo &Info) {
// Leave ignored and inreg arguments alone.
switch (Info.getKind()) {
case ABIArgInfo::InAlloca:
return true;
case ABIArgInfo::Ignore:
case ABIArgInfo::IndirectAliased:
return false;
case ABIArgInfo::Indirect:
case ABIArgInfo::Direct:
case ABIArgInfo::Extend:
return !Info.getInReg();
case ABIArgInfo::Expand:
case ABIArgInfo::CoerceAndExpand:
// These are aggregate types which are never passed in registers when
// inalloca is involved.
return true;
}
llvm_unreachable("invalid enum");
}
void X86_32ABIInfo::rewriteWithInAlloca(CGFunctionInfo &FI) const {
assert(IsWin32StructABI && "inalloca only supported on win32");
// Build a packed struct type for all of the arguments in memory.
SmallVector<llvm::Type *, 6> FrameFields;
// The stack alignment is always 4.
CharUnits StackAlign = CharUnits::fromQuantity(4);
CharUnits StackOffset;
CGFunctionInfo::arg_iterator I = FI.arg_begin(), E = FI.arg_end();
// Put 'this' into the struct before 'sret', if necessary.
bool IsThisCall =
FI.getCallingConvention() == llvm::CallingConv::X86_ThisCall;
ABIArgInfo &Ret = FI.getReturnInfo();
if (Ret.isIndirect() && Ret.isSRetAfterThis() && !IsThisCall &&
isArgInAlloca(I->info)) {
addFieldToArgStruct(FrameFields, StackOffset, I->info, I->type);
++I;
}
// Put the sret parameter into the inalloca struct if it's in memory.
if (Ret.isIndirect() && !Ret.getInReg()) {
addFieldToArgStruct(FrameFields, StackOffset, Ret, FI.getReturnType());
// On Windows, the hidden sret parameter is always returned in eax.
Ret.setInAllocaSRet(IsWin32StructABI);
}
// Skip the 'this' parameter in ecx.
if (IsThisCall)
++I;
// Put arguments passed in memory into the struct.
for (; I != E; ++I) {
if (isArgInAlloca(I->info))
addFieldToArgStruct(FrameFields, StackOffset, I->info, I->type);
}
FI.setArgStruct(llvm::StructType::get(getVMContext(), FrameFields,
/*isPacked=*/true),
StackAlign);
}
Address X86_32ABIInfo::EmitVAArg(CodeGenFunction &CGF,
Address VAListAddr, QualType Ty) const {
auto TypeInfo = getContext().getTypeInfoInChars(Ty);
// x86-32 changes the alignment of certain arguments on the stack.
//
// Just messing with TypeInfo like this works because we never pass
// anything indirectly.
TypeInfo.Align = CharUnits::fromQuantity(
getTypeStackAlignInBytes(Ty, TypeInfo.Align.getQuantity()));
return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false,
TypeInfo, CharUnits::fromQuantity(4),
/*AllowHigherAlign*/ true);
}
bool X86_32TargetCodeGenInfo::isStructReturnInRegABI(
const llvm::Triple &Triple, const CodeGenOptions &Opts) {
assert(Triple.getArch() == llvm::Triple::x86);
switch (Opts.getStructReturnConvention()) {
case CodeGenOptions::SRCK_Default:
break;
case CodeGenOptions::SRCK_OnStack: // -fpcc-struct-return
return false;
case CodeGenOptions::SRCK_InRegs: // -freg-struct-return
return true;
}
if (Triple.isOSDarwin() || Triple.isOSIAMCU())
return true;
switch (Triple.getOS()) {
case llvm::Triple::DragonFly:
case llvm::Triple::FreeBSD:
case llvm::Triple::OpenBSD:
case llvm::Triple::Win32:
return true;
default:
return false;
}
}
static void addX86InterruptAttrs(const FunctionDecl *FD, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &CGM) {
if (!FD->hasAttr<AnyX86InterruptAttr>())
return;
llvm::Function *Fn = cast<llvm::Function>(GV);
Fn->setCallingConv(llvm::CallingConv::X86_INTR);
if (FD->getNumParams() == 0)
return;
auto PtrTy = cast<PointerType>(FD->getParamDecl(0)->getType());
llvm::Type *ByValTy = CGM.getTypes().ConvertType(PtrTy->getPointeeType());
llvm::Attribute NewAttr = llvm::Attribute::getWithByValType(
Fn->getContext(), ByValTy);
Fn->addParamAttr(0, NewAttr);
}
void X86_32TargetCodeGenInfo::setTargetAttributes(
const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const {
if (GV->isDeclaration())
return;
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) {
llvm::Function *Fn = cast<llvm::Function>(GV);
Fn->addFnAttr("stackrealign");
}
addX86InterruptAttrs(FD, GV, CGM);
}
}
bool X86_32TargetCodeGenInfo::initDwarfEHRegSizeTable(
CodeGen::CodeGenFunction &CGF,
llvm::Value *Address) const {
CodeGen::CGBuilderTy &Builder = CGF.Builder;
llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4);
// 0-7 are the eight integer registers; the order is different
// on Darwin (for EH), but the range is the same.
// 8 is %eip.
AssignToArrayRange(Builder, Address, Four8, 0, 8);
if (CGF.CGM.getTarget().getTriple().isOSDarwin()) {
// 12-16 are st(0..4). Not sure why we stop at 4.
// These have size 16, which is sizeof(long double) on
// platforms with 8-byte alignment for that type.
llvm::Value *Sixteen8 = llvm::ConstantInt::get(CGF.Int8Ty, 16);
AssignToArrayRange(Builder, Address, Sixteen8, 12, 16);
} else {
// 9 is %eflags, which doesn't get a size on Darwin for some
// reason.
Builder.CreateAlignedStore(
Four8, Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, Address, 9),
CharUnits::One());
// 11-16 are st(0..5). Not sure why we stop at 5.
// These have size 12, which is sizeof(long double) on
// platforms with 4-byte alignment for that type.
llvm::Value *Twelve8 = llvm::ConstantInt::get(CGF.Int8Ty, 12);
AssignToArrayRange(Builder, Address, Twelve8, 11, 16);
}
return false;
}
//===----------------------------------------------------------------------===//
// X86-64 ABI Implementation
//===----------------------------------------------------------------------===//
namespace {
/// The AVX ABI level for X86 targets.
enum class X86AVXABILevel {
None,
AVX,
AVX512
};
/// \p returns the size in bits of the largest (native) vector for \p AVXLevel.
static unsigned getNativeVectorSizeForAVXABI(X86AVXABILevel AVXLevel) {
switch (AVXLevel) {
case X86AVXABILevel::AVX512:
return 512;
case X86AVXABILevel::AVX:
return 256;
case X86AVXABILevel::None:
return 128;
}
llvm_unreachable("Unknown AVXLevel");
}
/// X86_64ABIInfo - The X86_64 ABI information.
class X86_64ABIInfo : public SwiftABIInfo {
enum Class {
Integer = 0,
SSE,
SSEUp,
X87,
X87Up,
ComplexX87,
NoClass,
Memory
};
/// merge - Implement the X86_64 ABI merging algorithm.
///
/// Merge an accumulating classification \arg Accum with a field
/// classification \arg Field.
///
/// \param Accum - The accumulating classification. This should
/// always be either NoClass or the result of a previous merge
/// call. In addition, this should never be Memory (the caller
/// should just return Memory for the aggregate).
static Class merge(Class Accum, Class Field);
/// postMerge - Implement the X86_64 ABI post merging algorithm.
///
/// Post merger cleanup, reduces a malformed Hi and Lo pair to
/// final MEMORY or SSE classes when necessary.
///
/// \param AggregateSize - The size of the current aggregate in
/// the classification process.
///
/// \param Lo - The classification for the parts of the type
/// residing in the low word of the containing object.
///
/// \param Hi - The classification for the parts of the type
/// residing in the higher words of the containing object.
///
void postMerge(unsigned AggregateSize, Class &Lo, Class &Hi) const;
/// classify - Determine the x86_64 register classes in which the
/// given type T should be passed.
///
/// \param Lo - The classification for the parts of the type
/// residing in the low word of the containing object.
///
/// \param Hi - The classification for the parts of the type
/// residing in the high word of the containing object.
///
/// \param OffsetBase - The bit offset of this type in the
/// containing object. Some parameters are classified different
/// depending on whether they straddle an eightbyte boundary.
///
/// \param isNamedArg - Whether the argument in question is a "named"
/// argument, as used in AMD64-ABI 3.5.7.
///
/// \param IsRegCall - Whether the calling conversion is regcall.
///
/// If a word is unused its result will be NoClass; if a type should
/// be passed in Memory then at least the classification of \arg Lo
/// will be Memory.
///
/// The \arg Lo class will be NoClass iff the argument is ignored.
///
/// If the \arg Lo class is ComplexX87, then the \arg Hi class will
/// also be ComplexX87.
void classify(QualType T, uint64_t OffsetBase, Class &Lo, Class &Hi,
bool isNamedArg, bool IsRegCall = false) const;
llvm::Type *GetByteVectorType(QualType Ty) const;
llvm::Type *GetSSETypeAtOffset(llvm::Type *IRType,
unsigned IROffset, QualType SourceTy,
unsigned SourceOffset) const;
llvm::Type *GetINTEGERTypeAtOffset(llvm::Type *IRType,
unsigned IROffset, QualType SourceTy,
unsigned SourceOffset) const;
/// getIndirectResult - Give a source type \arg Ty, return a suitable result
/// such that the argument will be returned in memory.
ABIArgInfo getIndirectReturnResult(QualType Ty) const;
/// getIndirectResult - Give a source type \arg Ty, return a suitable result
/// such that the argument will be passed in memory.
///
/// \param freeIntRegs - The number of free integer registers remaining
/// available.
ABIArgInfo getIndirectResult(QualType Ty, unsigned freeIntRegs) const;
ABIArgInfo classifyReturnType(QualType RetTy) const;
ABIArgInfo classifyArgumentType(QualType Ty, unsigned freeIntRegs,
unsigned &neededInt, unsigned &neededSSE,
bool isNamedArg,
bool IsRegCall = false) const;
ABIArgInfo classifyRegCallStructType(QualType Ty, unsigned &NeededInt,
unsigned &NeededSSE,
unsigned &MaxVectorWidth) const;
ABIArgInfo classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt,
unsigned &NeededSSE,
unsigned &MaxVectorWidth) const;
bool IsIllegalVectorType(QualType Ty) const;
/// The 0.98 ABI revision clarified a lot of ambiguities,
/// unfortunately in ways that were not always consistent with
/// certain previous compilers. In particular, platforms which
/// required strict binary compatibility with older versions of GCC
/// may need to exempt themselves.
bool honorsRevision0_98() const {
return !getTarget().getTriple().isOSDarwin();
}
/// GCC classifies <1 x long long> as SSE but some platform ABIs choose to
/// classify it as INTEGER (for compatibility with older clang compilers).
bool classifyIntegerMMXAsSSE() const {
// Clang <= 3.8 did not do this.
if (getContext().getLangOpts().getClangABICompat() <=
LangOptions::ClangABI::Ver3_8)
return false;
const llvm::Triple &Triple = getTarget().getTriple();
if (Triple.isOSDarwin() || Triple.isPS())
return false;
if (Triple.isOSFreeBSD() && Triple.getOSMajorVersion() >= 10)
return false;
return true;
}
// GCC classifies vectors of __int128 as memory.
bool passInt128VectorsInMem() const {
// Clang <= 9.0 did not do this.
if (getContext().getLangOpts().getClangABICompat() <=
LangOptions::ClangABI::Ver9)
return false;
const llvm::Triple &T = getTarget().getTriple();
return T.isOSLinux() || T.isOSNetBSD();
}
X86AVXABILevel AVXLevel;
// Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on
// 64-bit hardware.
bool Has64BitPointers;
public:
X86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) :
SwiftABIInfo(CGT), AVXLevel(AVXLevel),
Has64BitPointers(CGT.getDataLayout().getPointerSize(0) == 8) {
}
bool isPassedUsingAVXType(QualType type) const {
unsigned neededInt, neededSSE;
// The freeIntRegs argument doesn't matter here.
ABIArgInfo info = classifyArgumentType(type, 0, neededInt, neededSSE,
/*isNamedArg*/true);
if (info.isDirect()) {
llvm::Type *ty = info.getCoerceToType();
if (llvm::VectorType *vectorTy = dyn_cast_or_null<llvm::VectorType>(ty))
return vectorTy->getPrimitiveSizeInBits().getFixedSize() > 128;
}
return false;
}
void computeInfo(CGFunctionInfo &FI) const override;
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override;
Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override;
bool has64BitPointers() const {
return Has64BitPointers;
}
bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
bool asReturnValue) const override {
return occupiesMoreThan(CGT, scalars, /*total*/ 4);
}
bool isSwiftErrorInRegister() const override {
return true;
}
};
/// WinX86_64ABIInfo - The Windows X86_64 ABI information.
class WinX86_64ABIInfo : public SwiftABIInfo {
public:
WinX86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel)
: SwiftABIInfo(CGT), AVXLevel(AVXLevel),
IsMingw64(getTarget().getTriple().isWindowsGNUEnvironment()) {}
void computeInfo(CGFunctionInfo &FI) const override;
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override;
bool isHomogeneousAggregateBaseType(QualType Ty) const override {
// FIXME: Assumes vectorcall is in use.
return isX86VectorTypeForVectorCall(getContext(), Ty);
}
bool isHomogeneousAggregateSmallEnough(const Type *Ty,
uint64_t NumMembers) const override {
// FIXME: Assumes vectorcall is in use.
return isX86VectorCallAggregateSmallEnough(NumMembers);
}
bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type *> scalars,
bool asReturnValue) const override {
return occupiesMoreThan(CGT, scalars, /*total*/ 4);
}
bool isSwiftErrorInRegister() const override {
return true;
}
private:
ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, bool IsReturnType,
bool IsVectorCall, bool IsRegCall) const;
ABIArgInfo reclassifyHvaArgForVectorCall(QualType Ty, unsigned &FreeSSERegs,
const ABIArgInfo &current) const;
X86AVXABILevel AVXLevel;
bool IsMingw64;
};
class X86_64TargetCodeGenInfo : public TargetCodeGenInfo {
public:
X86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel)
: TargetCodeGenInfo(std::make_unique<X86_64ABIInfo>(CGT, AVXLevel)) {}
const X86_64ABIInfo &getABIInfo() const {
return static_cast<const X86_64ABIInfo&>(TargetCodeGenInfo::getABIInfo());
}
/// Disable tail call on x86-64. The epilogue code before the tail jump blocks
/// autoreleaseRV/retainRV and autoreleaseRV/unsafeClaimRV optimizations.
bool markARCOptimizedReturnCallsAsNoTail() const override { return true; }
int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
return 7;
}
bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
llvm::Value *Address) const override {
llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8);
// 0-15 are the 16 integer registers.
// 16 is %rip.
AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 16);
return false;
}
llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
StringRef Constraint,
llvm::Type* Ty) const override {
return X86AdjustInlineAsmType(CGF, Constraint, Ty);
}
bool isNoProtoCallVariadic(const CallArgList &args,
const FunctionNoProtoType *fnType) const override {
// The default CC on x86-64 sets %al to the number of SSA
// registers used, and GCC sets this when calling an unprototyped
// function, so we override the default behavior. However, don't do
// that when AVX types are involved: the ABI explicitly states it is
// undefined, and it doesn't work in practice because of how the ABI
// defines varargs anyway.
if (fnType->getCallConv() == CC_C) {
bool HasAVXType = false;
for (CallArgList::const_iterator
it = args.begin(), ie = args.end(); it != ie; ++it) {
if (getABIInfo().isPassedUsingAVXType(it->Ty)) {
HasAVXType = true;
break;
}
}
if (!HasAVXType)
return true;
}
return TargetCodeGenInfo::isNoProtoCallVariadic(args, fnType);
}
llvm::Constant *
getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const override {
unsigned Sig = (0xeb << 0) | // jmp rel8
(0x06 << 8) | // .+0x08
('v' << 16) |
('2' << 24);
return llvm::ConstantInt::get(CGM.Int32Ty, Sig);
}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &CGM) const override {
if (GV->isDeclaration())
return;
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) {
llvm::Function *Fn = cast<llvm::Function>(GV);
Fn->addFnAttr("stackrealign");
}
addX86InterruptAttrs(FD, GV, CGM);
}
}
void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc,
const FunctionDecl *Caller,
const FunctionDecl *Callee,
const CallArgList &Args) const override;
};
static void initFeatureMaps(const ASTContext &Ctx,
llvm::StringMap<bool> &CallerMap,
const FunctionDecl *Caller,
llvm::StringMap<bool> &CalleeMap,
const FunctionDecl *Callee) {
if (CalleeMap.empty() && CallerMap.empty()) {
// The caller is potentially nullptr in the case where the call isn't in a
// function. In this case, the getFunctionFeatureMap ensures we just get
// the TU level setting (since it cannot be modified by 'target'..
Ctx.getFunctionFeatureMap(CallerMap, Caller);
Ctx.getFunctionFeatureMap(CalleeMap, Callee);
}
}
static bool checkAVXParamFeature(DiagnosticsEngine &Diag,
SourceLocation CallLoc,
const llvm::StringMap<bool> &CallerMap,
const llvm::StringMap<bool> &CalleeMap,
QualType Ty, StringRef Feature,
bool IsArgument) {
bool CallerHasFeat = CallerMap.lookup(Feature);
bool CalleeHasFeat = CalleeMap.lookup(Feature);
if (!CallerHasFeat && !CalleeHasFeat)
return Diag.Report(CallLoc, diag::warn_avx_calling_convention)
<< IsArgument << Ty << Feature;
// Mixing calling conventions here is very clearly an error.
if (!CallerHasFeat || !CalleeHasFeat)
return Diag.Report(CallLoc, diag::err_avx_calling_convention)
<< IsArgument << Ty << Feature;
// Else, both caller and callee have the required feature, so there is no need
// to diagnose.
return false;
}
static bool checkAVXParam(DiagnosticsEngine &Diag, ASTContext &Ctx,
SourceLocation CallLoc,
const llvm::StringMap<bool> &CallerMap,
const llvm::StringMap<bool> &CalleeMap, QualType Ty,
bool IsArgument) {
uint64_t Size = Ctx.getTypeSize(Ty);
if (Size > 256)
return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty,
"avx512f", IsArgument);
if (Size > 128)
return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, "avx",
IsArgument);
return false;
}
void X86_64TargetCodeGenInfo::checkFunctionCallABI(
CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller,
const FunctionDecl *Callee, const CallArgList &Args) const {
llvm::StringMap<bool> CallerMap;
llvm::StringMap<bool> CalleeMap;
unsigned ArgIndex = 0;
// We need to loop through the actual call arguments rather than the the
// function's parameters, in case this variadic.
for (const CallArg &Arg : Args) {
// The "avx" feature changes how vectors >128 in size are passed. "avx512f"
// additionally changes how vectors >256 in size are passed. Like GCC, we
// warn when a function is called with an argument where this will change.
// Unlike GCC, we also error when it is an obvious ABI mismatch, that is,
// the caller and callee features are mismatched.
// Unfortunately, we cannot do this diagnostic in SEMA, since the callee can
// change its ABI with attribute-target after this call.
if (Arg.getType()->isVectorType() &&
CGM.getContext().getTypeSize(Arg.getType()) > 128) {
initFeatureMaps(CGM.getContext(), CallerMap, Caller, CalleeMap, Callee);
QualType Ty = Arg.getType();
// The CallArg seems to have desugared the type already, so for clearer
// diagnostics, replace it with the type in the FunctionDecl if possible.
if (ArgIndex < Callee->getNumParams())
Ty = Callee->getParamDecl(ArgIndex)->getType();
if (checkAVXParam(CGM.getDiags(), CGM.getContext(), CallLoc, CallerMap,
CalleeMap, Ty, /*IsArgument*/ true))
return;
}
++ArgIndex;
}
// Check return always, as we don't have a good way of knowing in codegen
// whether this value is used, tail-called, etc.
if (Callee->getReturnType()->isVectorType() &&
CGM.getContext().getTypeSize(Callee->getReturnType()) > 128) {
initFeatureMaps(CGM.getContext(), CallerMap, Caller, CalleeMap, Callee);
checkAVXParam(CGM.getDiags(), CGM.getContext(), CallLoc, CallerMap,
CalleeMap, Callee->getReturnType(),
/*IsArgument*/ false);
}
}
static std::string qualifyWindowsLibrary(llvm::StringRef Lib) {
// If the argument does not end in .lib, automatically add the suffix.
// If the argument contains a space, enclose it in quotes.
// This matches the behavior of MSVC.
bool Quote = Lib.contains(' ');
std::string ArgStr = Quote ? "\"" : "";
ArgStr += Lib;
if (!Lib.endswith_insensitive(".lib") && !Lib.endswith_insensitive(".a"))
ArgStr += ".lib";
ArgStr += Quote ? "\"" : "";
return ArgStr;
}
class WinX86_32TargetCodeGenInfo : public X86_32TargetCodeGenInfo {
public:
WinX86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT,
bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI,
unsigned NumRegisterParameters)
: X86_32TargetCodeGenInfo(CGT, DarwinVectorABI, RetSmallStructInRegABI,
Win32StructABI, NumRegisterParameters, false) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &CGM) const override;
void getDependentLibraryOption(llvm::StringRef Lib,
llvm::SmallString<24> &Opt) const override {
Opt = "/DEFAULTLIB:";
Opt += qualifyWindowsLibrary(Lib);
}
void getDetectMismatchOption(llvm::StringRef Name,
llvm::StringRef Value,
llvm::SmallString<32> &Opt) const override {
Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
}
};
static void addStackProbeTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &CGM) {
if (llvm::Function *Fn = dyn_cast_or_null<llvm::Function>(GV)) {
if (CGM.getCodeGenOpts().StackProbeSize != 4096)
Fn->addFnAttr("stack-probe-size",
llvm::utostr(CGM.getCodeGenOpts().StackProbeSize));
if (CGM.getCodeGenOpts().NoStackArgProbe)
Fn->addFnAttr("no-stack-arg-probe");
}
}
void WinX86_32TargetCodeGenInfo::setTargetAttributes(
const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const {
X86_32TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
if (GV->isDeclaration())
return;
addStackProbeTargetAttributes(D, GV, CGM);
}
class WinX86_64TargetCodeGenInfo : public TargetCodeGenInfo {
public:
WinX86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT,
X86AVXABILevel AVXLevel)
: TargetCodeGenInfo(std::make_unique<WinX86_64ABIInfo>(CGT, AVXLevel)) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &CGM) const override;
int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
return 7;
}
bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
llvm::Value *Address) const override {
llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8);
// 0-15 are the 16 integer registers.
// 16 is %rip.
AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 16);
return false;
}
void getDependentLibraryOption(llvm::StringRef Lib,
llvm::SmallString<24> &Opt) const override {
Opt = "/DEFAULTLIB:";
Opt += qualifyWindowsLibrary(Lib);
}
void getDetectMismatchOption(llvm::StringRef Name,
llvm::StringRef Value,
llvm::SmallString<32> &Opt) const override {
Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
}
};
void WinX86_64TargetCodeGenInfo::setTargetAttributes(
const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const {
TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
if (GV->isDeclaration())
return;
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) {
llvm::Function *Fn = cast<llvm::Function>(GV);
Fn->addFnAttr("stackrealign");
}
addX86InterruptAttrs(FD, GV, CGM);
}
addStackProbeTargetAttributes(D, GV, CGM);
}
}
void X86_64ABIInfo::postMerge(unsigned AggregateSize, Class &Lo,
Class &Hi) const {
// AMD64-ABI 3.2.3p2: Rule 5. Then a post merger cleanup is done:
//
// (a) If one of the classes is Memory, the whole argument is passed in
// memory.
//
// (b) If X87UP is not preceded by X87, the whole argument is passed in
// memory.
//
// (c) If the size of the aggregate exceeds two eightbytes and the first
// eightbyte isn't SSE or any other eightbyte isn't SSEUP, the whole
// argument is passed in memory. NOTE: This is necessary to keep the
// ABI working for processors that don't support the __m256 type.
//
// (d) If SSEUP is not preceded by SSE or SSEUP, it is converted to SSE.
//
// Some of these are enforced by the merging logic. Others can arise
// only with unions; for example:
// union { _Complex double; unsigned; }
//
// Note that clauses (b) and (c) were added in 0.98.
//
if (Hi == Memory)
Lo = Memory;
if (Hi == X87Up && Lo != X87 && honorsRevision0_98())
Lo = Memory;
if (AggregateSize > 128 && (Lo != SSE || Hi != SSEUp))
Lo = Memory;
if (Hi == SSEUp && Lo != SSE)
Hi = SSE;
}
X86_64ABIInfo::Class X86_64ABIInfo::merge(Class Accum, Class Field) {
// AMD64-ABI 3.2.3p2: Rule 4. Each field of an object is
// classified recursively so that always two fields are
// considered. The resulting class is calculated according to
// the classes of the fields in the eightbyte:
//
// (a) If both classes are equal, this is the resulting class.
//
// (b) If one of the classes is NO_CLASS, the resulting class is
// the other class.
//
// (c) If one of the classes is MEMORY, the result is the MEMORY
// class.
//
// (d) If one of the classes is INTEGER, the result is the
// INTEGER.
//
// (e) If one of the classes is X87, X87UP, COMPLEX_X87 class,
// MEMORY is used as class.
//
// (f) Otherwise class SSE is used.
// Accum should never be memory (we should have returned) or
// ComplexX87 (because this cannot be passed in a structure).
assert((Accum != Memory && Accum != ComplexX87) &&
"Invalid accumulated classification during merge.");
if (Accum == Field || Field == NoClass)
return Accum;
if (Field == Memory)
return Memory;
if (Accum == NoClass)
return Field;
if (Accum == Integer || Field == Integer)
return Integer;
if (Field == X87 || Field == X87Up || Field == ComplexX87 ||
Accum == X87 || Accum == X87Up)
return Memory;
return SSE;
}
void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Class &Lo,
Class &Hi, bool isNamedArg, bool IsRegCall) const {
// FIXME: This code can be simplified by introducing a simple value class for
// Class pairs with appropriate constructor methods for the various
// situations.
// FIXME: Some of the split computations are wrong; unaligned vectors
// shouldn't be passed in registers for example, so there is no chance they
// can straddle an eightbyte. Verify & simplify.
Lo = Hi = NoClass;
Class &Current = OffsetBase < 64 ? Lo : Hi;
Current = Memory;
if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
BuiltinType::Kind k = BT->getKind();
if (k == BuiltinType::Void) {
Current = NoClass;
} else if (k == BuiltinType::Int128 || k == BuiltinType::UInt128) {
Lo = Integer;
Hi = Integer;
} else if (k >= BuiltinType::Bool && k <= BuiltinType::LongLong) {
Current = Integer;
} else if (k == BuiltinType::Float || k == BuiltinType::Double ||
k == BuiltinType::Float16 || k == BuiltinType::BFloat16) {
Current = SSE;
} else if (k == BuiltinType::LongDouble) {
const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat();
if (LDF == &llvm::APFloat::IEEEquad()) {
Lo = SSE;
Hi = SSEUp;
} else if (LDF == &llvm::APFloat::x87DoubleExtended()) {
Lo = X87;
Hi = X87Up;
} else if (LDF == &llvm::APFloat::IEEEdouble()) {
Current = SSE;
} else
llvm_unreachable("unexpected long double representation!");
}
// FIXME: _Decimal32 and _Decimal64 are SSE.
// FIXME: _float128 and _Decimal128 are (SSE, SSEUp).
return;
}
if (const EnumType *ET = Ty->getAs<EnumType>()) {
// Classify the underlying integer type.
classify(ET->getDecl()->getIntegerType(), OffsetBase, Lo, Hi, isNamedArg);
return;
}
if (Ty->hasPointerRepresentation()) {
Current = Integer;
return;
}
if (Ty->isMemberPointerType()) {
if (Ty->isMemberFunctionPointerType()) {
if (Has64BitPointers) {
// If Has64BitPointers, this is an {i64, i64}, so classify both
// Lo and Hi now.
Lo = Hi = Integer;
} else {
// Otherwise, with 32-bit pointers, this is an {i32, i32}. If that
// straddles an eightbyte boundary, Hi should be classified as well.
uint64_t EB_FuncPtr = (OffsetBase) / 64;
uint64_t EB_ThisAdj = (OffsetBase + 64 - 1) / 64;
if (EB_FuncPtr != EB_ThisAdj) {
Lo = Hi = Integer;
} else {
Current = Integer;
}
}
} else {
Current = Integer;
}
return;
}
if (const VectorType *VT = Ty->getAs<VectorType>()) {
uint64_t Size = getContext().getTypeSize(VT);
if (Size == 1 || Size == 8 || Size == 16 || Size == 32) {
// gcc passes the following as integer:
// 4 bytes - <4 x char>, <2 x short>, <1 x int>, <1 x float>
// 2 bytes - <2 x char>, <1 x short>
// 1 byte - <1 x char>
Current = Integer;
// If this type crosses an eightbyte boundary, it should be
// split.
uint64_t EB_Lo = (OffsetBase) / 64;
uint64_t EB_Hi = (OffsetBase + Size - 1) / 64;
if (EB_Lo != EB_Hi)
Hi = Lo;
} else if (Size == 64) {
QualType ElementType = VT->getElementType();
// gcc passes <1 x double> in memory. :(
if (ElementType->isSpecificBuiltinType(BuiltinType::Double))
return;
// gcc passes <1 x long long> as SSE but clang used to unconditionally
// pass them as integer. For platforms where clang is the de facto
// platform compiler, we must continue to use integer.
if (!classifyIntegerMMXAsSSE() &&
(ElementType->isSpecificBuiltinType(BuiltinType::LongLong) ||
ElementType->isSpecificBuiltinType(BuiltinType::ULongLong) ||
ElementType->isSpecificBuiltinType(BuiltinType::Long) ||
ElementType->isSpecificBuiltinType(BuiltinType::ULong)))
Current = Integer;
else
Current = SSE;
// If this type crosses an eightbyte boundary, it should be
// split.
if (OffsetBase && OffsetBase != 64)
Hi = Lo;
} else if (Size == 128 ||
(isNamedArg && Size <= getNativeVectorSizeForAVXABI(AVXLevel))) {
QualType ElementType = VT->getElementType();
// gcc passes 256 and 512 bit <X x __int128> vectors in memory. :(
if (passInt128VectorsInMem() && Size != 128 &&
(ElementType->isSpecificBuiltinType(BuiltinType::Int128) ||
ElementType->isSpecificBuiltinType(BuiltinType::UInt128)))
return;
// Arguments of 256-bits are split into four eightbyte chunks. The
// least significant one belongs to class SSE and all the others to class
// SSEUP. The original Lo and Hi design considers that types can't be
// greater than 128-bits, so a 64-bit split in Hi and Lo makes sense.
// This design isn't correct for 256-bits, but since there're no cases
// where the upper parts would need to be inspected, avoid adding
// complexity and just consider Hi to match the 64-256 part.
//
// Note that per 3.5.7 of AMD64-ABI, 256-bit args are only passed in
// registers if they are "named", i.e. not part of the "..." of a
// variadic function.
//
// Similarly, per 3.2.3. of the AVX512 draft, 512-bits ("named") args are
// split into eight eightbyte chunks, one SSE and seven SSEUP.
Lo = SSE;
Hi = SSEUp;
}
return;
}
if (const ComplexType *CT = Ty->getAs<ComplexType>()) {
QualType ET = getContext().getCanonicalType(CT->getElementType());
uint64_t Size = getContext().getTypeSize(Ty);
if (ET->isIntegralOrEnumerationType()) {
if (Size <= 64)
Current = Integer;
else if (Size <= 128)
Lo = Hi = Integer;
} else if (ET->isFloat16Type() || ET == getContext().FloatTy ||
ET->isBFloat16Type()) {
Current = SSE;
} else if (ET == getContext().DoubleTy) {
Lo = Hi = SSE;
} else if (ET == getContext().LongDoubleTy) {
const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat();
if (LDF == &llvm::APFloat::IEEEquad())
Current = Memory;
else if (LDF == &llvm::APFloat::x87DoubleExtended())
Current = ComplexX87;
else if (LDF == &llvm::APFloat::IEEEdouble())
Lo = Hi = SSE;
else
llvm_unreachable("unexpected long double representation!");
}
// If this complex type crosses an eightbyte boundary then it
// should be split.
uint64_t EB_Real = (OffsetBase) / 64;
uint64_t EB_Imag = (OffsetBase + getContext().getTypeSize(ET)) / 64;
if (Hi == NoClass && EB_Real != EB_Imag)
Hi = Lo;
return;
}
if (const auto *EITy = Ty->getAs<BitIntType>()) {
if (EITy->getNumBits() <= 64)
Current = Integer;
else if (EITy->getNumBits() <= 128)
Lo = Hi = Integer;
// Larger values need to get passed in memory.
return;
}
if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) {
// Arrays are treated like structures.
uint64_t Size = getContext().getTypeSize(Ty);
// AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger
// than eight eightbytes, ..., it has class MEMORY.
// regcall ABI doesn't have limitation to an object. The only limitation
// is the free registers, which will be checked in computeInfo.
if (!IsRegCall && Size > 512)
return;
// AMD64-ABI 3.2.3p2: Rule 1. If ..., or it contains unaligned
// fields, it has class MEMORY.
//
// Only need to check alignment of array base.
if (OffsetBase % getContext().getTypeAlign(AT->getElementType()))
return;
// Otherwise implement simplified merge. We could be smarter about
// this, but it isn't worth it and would be harder to verify.
Current = NoClass;
uint64_t EltSize = getContext().getTypeSize(AT->getElementType());
uint64_t ArraySize = AT->getSize().getZExtValue();
// The only case a 256-bit wide vector could be used is when the array
// contains a single 256-bit element. Since Lo and Hi logic isn't extended
// to work for sizes wider than 128, early check and fallback to memory.
//
if (Size > 128 &&
(Size != EltSize || Size > getNativeVectorSizeForAVXABI(AVXLevel)))
return;
for (uint64_t i=0, Offset=OffsetBase; i<ArraySize; ++i, Offset += EltSize) {
Class FieldLo, FieldHi;
classify(AT->getElementType(), Offset, FieldLo, FieldHi, isNamedArg);
Lo = merge(Lo, FieldLo);
Hi = merge(Hi, FieldHi);
if (Lo == Memory || Hi == Memory)
break;
}
postMerge(Size, Lo, Hi);
assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp array classification.");
return;
}
if (const RecordType *RT = Ty->getAs<RecordType>()) {
uint64_t Size = getContext().getTypeSize(Ty);
// AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger
// than eight eightbytes, ..., it has class MEMORY.
if (Size > 512)
return;
// AMD64-ABI 3.2.3p2: Rule 2. If a C++ object has either a non-trivial
// copy constructor or a non-trivial destructor, it is passed by invisible
// reference.
if (getRecordArgABI(RT, getCXXABI()))
return;
const RecordDecl *RD = RT->getDecl();
// Assume variable sized types are passed in memory.
if (RD->hasFlexibleArrayMember())
return;
const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
// Reset Lo class, this will be recomputed.
Current = NoClass;
// If this is a C++ record, classify the bases first.
if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
for (const auto &I : CXXRD->bases()) {
assert(!I.isVirtual() && !I.getType()->isDependentType() &&
"Unexpected base class!");
const auto *Base =
cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl());
// Classify this field.
//
// AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate exceeds a
// single eightbyte, each is classified separately. Each eightbyte gets
// initialized to class NO_CLASS.
Class FieldLo, FieldHi;
uint64_t Offset =
OffsetBase + getContext().toBits(Layout.getBaseClassOffset(Base));
classify(I.getType(), Offset, FieldLo, FieldHi, isNamedArg);
Lo = merge(Lo, FieldLo);
Hi = merge(Hi, FieldHi);
if (Lo == Memory || Hi == Memory) {
postMerge(Size, Lo, Hi);
return;
}
}
}
// Classify the fields one at a time, merging the results.
unsigned idx = 0;
bool UseClang11Compat = getContext().getLangOpts().getClangABICompat() <=
LangOptions::ClangABI::Ver11 ||
getContext().getTargetInfo().getTriple().isPS();
bool IsUnion = RT->isUnionType() && !UseClang11Compat;
for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
i != e; ++i, ++idx) {
uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx);
bool BitField = i->isBitField();
// Ignore padding bit-fields.
if (BitField && i->isUnnamedBitfield())
continue;
// AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger than
// eight eightbytes, or it contains unaligned fields, it has class MEMORY.
//
// The only case a 256-bit or a 512-bit wide vector could be used is when
// the struct contains a single 256-bit or 512-bit element. Early check
// and fallback to memory.
//
// FIXME: Extended the Lo and Hi logic properly to work for size wider
// than 128.
if (Size > 128 &&
((!IsUnion && Size != getContext().getTypeSize(i->getType())) ||
Size > getNativeVectorSizeForAVXABI(AVXLevel))) {
Lo = Memory;
postMerge(Size, Lo, Hi);
return;
}
// Note, skip this test for bit-fields, see below.
if (!BitField && Offset % getContext().getTypeAlign(i->getType())) {
Lo = Memory;
postMerge(Size, Lo, Hi);
return;
}
// Classify this field.
//
// AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate
// exceeds a single eightbyte, each is classified
// separately. Each eightbyte gets initialized to class
// NO_CLASS.
Class FieldLo, FieldHi;
// Bit-fields require special handling, they do not force the
// structure to be passed in memory even if unaligned, and
// therefore they can straddle an eightbyte.
if (BitField) {
assert(!i->isUnnamedBitfield());
uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx);
uint64_t Size = i->getBitWidthValue(getContext());
uint64_t EB_Lo = Offset / 64;
uint64_t EB_Hi = (Offset + Size - 1) / 64;
if (EB_Lo) {
assert(EB_Hi == EB_Lo && "Invalid classification, type > 16 bytes.");
FieldLo = NoClass;
FieldHi = Integer;
} else {
FieldLo = Integer;
FieldHi = EB_Hi ? Integer : NoClass;
}
} else
classify(i->getType(), Offset, FieldLo, FieldHi, isNamedArg);
Lo = merge(Lo, FieldLo);
Hi = merge(Hi, FieldHi);
if (Lo == Memory || Hi == Memory)
break;
}
postMerge(Size, Lo, Hi);
}
}
ABIArgInfo X86_64ABIInfo::getIndirectReturnResult(QualType Ty) const {
// If this is a scalar LLVM value then assume LLVM will pass it in the right
// place naturally.
if (!isAggregateTypeForABI(Ty)) {
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
Ty = EnumTy->getDecl()->getIntegerType();
if (Ty->isBitIntType())
return getNaturalAlignIndirect(Ty);
return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
: ABIArgInfo::getDirect());
}
return getNaturalAlignIndirect(Ty);
}
bool X86_64ABIInfo::IsIllegalVectorType(QualType Ty) const {
if (const VectorType *VecTy = Ty->getAs<VectorType>()) {
uint64_t Size = getContext().getTypeSize(VecTy);
unsigned LargestVector = getNativeVectorSizeForAVXABI(AVXLevel);
if (Size <= 64 || Size > LargestVector)
return true;
QualType EltTy = VecTy->getElementType();
if (passInt128VectorsInMem() &&
(EltTy->isSpecificBuiltinType(BuiltinType::Int128) ||
EltTy->isSpecificBuiltinType(BuiltinType::UInt128)))
return true;
}
return false;
}
ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty,
unsigned freeIntRegs) const {
// If this is a scalar LLVM value then assume LLVM will pass it in the right
// place naturally.
//
// This assumption is optimistic, as there could be free registers available
// when we need to pass this argument in memory, and LLVM could try to pass
// the argument in the free register. This does not seem to happen currently,
// but this code would be much safer if we could mark the argument with
// 'onstack'. See PR12193.
if (!isAggregateTypeForABI(Ty) && !IsIllegalVectorType(Ty) &&
!Ty->isBitIntType()) {
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
Ty = EnumTy->getDecl()->getIntegerType();
return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
: ABIArgInfo::getDirect());
}
if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
// Compute the byval alignment. We specify the alignment of the byval in all
// cases so that the mid-level optimizer knows the alignment of the byval.
unsigned Align = std::max(getContext().getTypeAlign(Ty) / 8, 8U);
// Attempt to avoid passing indirect results using byval when possible. This
// is important for good codegen.
//
// We do this by coercing the value into a scalar type which the backend can
// handle naturally (i.e., without using byval).
//
// For simplicity, we currently only do this when we have exhausted all of the
// free integer registers. Doing this when there are free integer registers
// would require more care, as we would have to ensure that the coerced value
// did not claim the unused register. That would require either reording the
// arguments to the function (so that any subsequent inreg values came first),
// or only doing this optimization when there were no following arguments that
// might be inreg.
//
// We currently expect it to be rare (particularly in well written code) for
// arguments to be passed on the stack when there are still free integer
// registers available (this would typically imply large structs being passed
// by value), so this seems like a fair tradeoff for now.
//
// We can revisit this if the backend grows support for 'onstack' parameter
// attributes. See PR12193.
if (freeIntRegs == 0) {
uint64_t Size = getContext().getTypeSize(Ty);
// If this type fits in an eightbyte, coerce it into the matching integral
// type, which will end up on the stack (with alignment 8).
if (Align == 8 && Size <= 64)
return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
Size));
}
return ABIArgInfo::getIndirect(CharUnits::fromQuantity(Align));
}
/// The ABI specifies that a value should be passed in a full vector XMM/YMM
/// register. Pick an LLVM IR type that will be passed as a vector register.
llvm::Type *X86_64ABIInfo::GetByteVectorType(QualType Ty) const {
// Wrapper structs/arrays that only contain vectors are passed just like
// vectors; strip them off if present.
if (const Type *InnerTy = isSingleElementStruct(Ty, getContext()))
Ty = QualType(InnerTy, 0);
llvm::Type *IRType = CGT.ConvertType(Ty);
if (isa<llvm::VectorType>(IRType)) {
// Don't pass vXi128 vectors in their native type, the backend can't
// legalize them.
if (passInt128VectorsInMem() &&
cast<llvm::VectorType>(IRType)->getElementType()->isIntegerTy(128)) {
// Use a vXi64 vector.
uint64_t Size = getContext().getTypeSize(Ty);
return llvm::FixedVectorType::get(llvm::Type::getInt64Ty(getVMContext()),
Size / 64);
}
return IRType;
}
if (IRType->getTypeID() == llvm::Type::FP128TyID)
return IRType;
// We couldn't find the preferred IR vector type for 'Ty'.
uint64_t Size = getContext().getTypeSize(Ty);
assert((Size == 128 || Size == 256 || Size == 512) && "Invalid type found!");
// Return a LLVM IR vector type based on the size of 'Ty'.
return llvm::FixedVectorType::get(llvm::Type::getDoubleTy(getVMContext()),
Size / 64);
}
/// BitsContainNoUserData - Return true if the specified [start,end) bit range
/// is known to either be off the end of the specified type or being in
/// alignment padding. The user type specified is known to be at most 128 bits
/// in size, and have passed through X86_64ABIInfo::classify with a successful
/// classification that put one of the two halves in the INTEGER class.
///
/// It is conservatively correct to return false.
static bool BitsContainNoUserData(QualType Ty, unsigned StartBit,
unsigned EndBit, ASTContext &Context) {
// If the bytes being queried are off the end of the type, there is no user
// data hiding here. This handles analysis of builtins, vectors and other
// types that don't contain interesting padding.
unsigned TySize = (unsigned)Context.getTypeSize(Ty);
if (TySize <= StartBit)
return true;
if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) {
unsigned EltSize = (unsigned)Context.getTypeSize(AT->getElementType());
unsigned NumElts = (unsigned)AT->getSize().getZExtValue();
// Check each element to see if the element overlaps with the queried range.
for (unsigned i = 0; i != NumElts; ++i) {
// If the element is after the span we care about, then we're done..
unsigned EltOffset = i*EltSize;
if (EltOffset >= EndBit) break;
unsigned EltStart = EltOffset < StartBit ? StartBit-EltOffset :0;
if (!BitsContainNoUserData(AT->getElementType(), EltStart,
EndBit-EltOffset, Context))
return false;
}
// If it overlaps no elements, then it is safe to process as padding.
return true;
}
if (const RecordType *RT = Ty->getAs<RecordType>()) {
const RecordDecl *RD = RT->getDecl();
const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD);
// If this is a C++ record, check the bases first.
if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
for (const auto &I : CXXRD->bases()) {
assert(!I.isVirtual() && !I.getType()->isDependentType() &&
"Unexpected base class!");
const auto *Base =
cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl());
// If the base is after the span we care about, ignore it.
unsigned BaseOffset = Context.toBits(Layout.getBaseClassOffset(Base));
if (BaseOffset >= EndBit) continue;
unsigned BaseStart = BaseOffset < StartBit ? StartBit-BaseOffset :0;
if (!BitsContainNoUserData(I.getType(), BaseStart,
EndBit-BaseOffset, Context))
return false;
}
}
// Verify that no field has data that overlaps the region of interest. Yes
// this could be sped up a lot by being smarter about queried fields,
// however we're only looking at structs up to 16 bytes, so we don't care
// much.
unsigned idx = 0;
for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
i != e; ++i, ++idx) {
unsigned FieldOffset = (unsigned)Layout.getFieldOffset(idx);
// If we found a field after the region we care about, then we're done.
if (FieldOffset >= EndBit) break;
unsigned FieldStart = FieldOffset < StartBit ? StartBit-FieldOffset :0;
if (!BitsContainNoUserData(i->getType(), FieldStart, EndBit-FieldOffset,
Context))
return false;
}
// If nothing in this record overlapped the area of interest, then we're
// clean.
return true;
}
return false;
}
/// getFPTypeAtOffset - Return a floating point type at the specified offset.
static llvm::Type *getFPTypeAtOffset(llvm::Type *IRType, unsigned IROffset,
const llvm::DataLayout &TD) {
if (IROffset == 0 && IRType->isFloatingPointTy())
return IRType;
// If this is a struct, recurse into the field at the specified offset.
if (llvm::StructType *STy = dyn_cast<llvm::StructType>(IRType)) {
if (!STy->getNumContainedTypes())
return nullptr;
const llvm::StructLayout *SL = TD.getStructLayout(STy);
unsigned Elt = SL->getElementContainingOffset(IROffset);
IROffset -= SL->getElementOffset(Elt);
return getFPTypeAtOffset(STy->getElementType(Elt), IROffset, TD);
}
// If this is an array, recurse into the field at the specified offset.
if (llvm::ArrayType *ATy = dyn_cast<llvm::ArrayType>(IRType)) {
llvm::Type *EltTy = ATy->getElementType();
unsigned EltSize = TD.getTypeAllocSize(EltTy);
IROffset -= IROffset / EltSize * EltSize;
return getFPTypeAtOffset(EltTy, IROffset, TD);
}
return nullptr;
}
/// GetSSETypeAtOffset - Return a type that will be passed by the backend in the
/// low 8 bytes of an XMM register, corresponding to the SSE class.
llvm::Type *X86_64ABIInfo::
GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset,
QualType SourceTy, unsigned SourceOffset) const {
const llvm::DataLayout &TD = getDataLayout();
unsigned SourceSize =
(unsigned)getContext().getTypeSize(SourceTy) / 8 - SourceOffset;
llvm::Type *T0 = getFPTypeAtOffset(IRType, IROffset, TD);
if (!T0 || T0->isDoubleTy())
return llvm::Type::getDoubleTy(getVMContext());
// Get the adjacent FP type.
llvm::Type *T1 = nullptr;
unsigned T0Size = TD.getTypeAllocSize(T0);
if (SourceSize > T0Size)
T1 = getFPTypeAtOffset(IRType, IROffset + T0Size, TD);
if (T1 == nullptr) {
// Check if IRType is a half/bfloat + float. float type will be in IROffset+4 due
// to its alignment.
if (T0->is16bitFPTy() && SourceSize > 4)
T1 = getFPTypeAtOffset(IRType, IROffset + 4, TD);
// If we can't get a second FP type, return a simple half or float.
// avx512fp16-abi.c:pr51813_2 shows it works to return float for
// {float, i8} too.
if (T1 == nullptr)
return T0;
}
if (T0->isFloatTy() && T1->isFloatTy())
return llvm::FixedVectorType::get(T0, 2);
if (T0->is16bitFPTy() && T1->is16bitFPTy()) {
llvm::Type *T2 = nullptr;
if (SourceSize > 4)
T2 = getFPTypeAtOffset(IRType, IROffset + 4, TD);
if (T2 == nullptr)
return llvm::FixedVectorType::get(T0, 2);
return llvm::FixedVectorType::get(T0, 4);
}
if (T0->is16bitFPTy() || T1->is16bitFPTy())
return llvm::FixedVectorType::get(llvm::Type::getHalfTy(getVMContext()), 4);
return llvm::Type::getDoubleTy(getVMContext());
}
/// GetINTEGERTypeAtOffset - The ABI specifies that a value should be passed in
/// an 8-byte GPR. This means that we either have a scalar or we are talking
/// about the high or low part of an up-to-16-byte struct. This routine picks
/// the best LLVM IR type to represent this, which may be i64 or may be anything
/// else that the backend will pass in a GPR that works better (e.g. i8, %foo*,
/// etc).
///
/// PrefType is an LLVM IR type that corresponds to (part of) the IR type for
/// the source type. IROffset is an offset in bytes into the LLVM IR type that
/// the 8-byte value references. PrefType may be null.
///
/// SourceTy is the source-level type for the entire argument. SourceOffset is
/// an offset into this that we're processing (which is always either 0 or 8).
///
llvm::Type *X86_64ABIInfo::
GetINTEGERTypeAtOffset(llvm::Type *IRType, unsigned IROffset,
QualType SourceTy, unsigned SourceOffset) const {
// If we're dealing with an un-offset LLVM IR type, then it means that we're
// returning an 8-byte unit starting with it. See if we can safely use it.
if (IROffset == 0) {
// Pointers and int64's always fill the 8-byte unit.
if ((isa<llvm::PointerType>(IRType) && Has64BitPointers) ||
IRType->isIntegerTy(64))
return IRType;
// If we have a 1/2/4-byte integer, we can use it only if the rest of the
// goodness in the source type is just tail padding. This is allowed to
// kick in for struct {double,int} on the int, but not on
// struct{double,int,int} because we wouldn't return the second int. We
// have to do this analysis on the source type because we can't depend on
// unions being lowered a specific way etc.
if (IRType->isIntegerTy(8) || IRType->isIntegerTy(16) ||
IRType->isIntegerTy(32) ||
(isa<llvm::PointerType>(IRType) && !Has64BitPointers)) {
unsigned BitWidth = isa<llvm::PointerType>(IRType) ? 32 :
cast<llvm::IntegerType>(IRType)->getBitWidth();
if (BitsContainNoUserData(SourceTy, SourceOffset*8+BitWidth,
SourceOffset*8+64, getContext()))
return IRType;
}
}
if (llvm::StructType *STy = dyn_cast<llvm::StructType>(IRType)) {
// If this is a struct, recurse into the field at the specified offset.
const llvm::StructLayout *SL = getDataLayout().getStructLayout(STy);
if (IROffset < SL->getSizeInBytes()) {
unsigned FieldIdx = SL->getElementContainingOffset(IROffset);
IROffset -= SL->getElementOffset(FieldIdx);
return GetINTEGERTypeAtOffset(STy->getElementType(FieldIdx), IROffset,
SourceTy, SourceOffset);
}
}
if (llvm::ArrayType *ATy = dyn_cast<llvm::ArrayType>(IRType)) {
llvm::Type *EltTy = ATy->getElementType();
unsigned EltSize = getDataLayout().getTypeAllocSize(EltTy);
unsigned EltOffset = IROffset/EltSize*EltSize;
return GetINTEGERTypeAtOffset(EltTy, IROffset-EltOffset, SourceTy,
SourceOffset);
}
// Okay, we don't have any better idea of what to pass, so we pass this in an
// integer register that isn't too big to fit the rest of the struct.
unsigned TySizeInBytes =
(unsigned)getContext().getTypeSizeInChars(SourceTy).getQuantity();
assert(TySizeInBytes != SourceOffset && "Empty field?");
// It is always safe to classify this as an integer type up to i64 that
// isn't larger than the structure.
return llvm::IntegerType::get(getVMContext(),
std::min(TySizeInBytes-SourceOffset, 8U)*8);
}
/// GetX86_64ByValArgumentPair - Given a high and low type that can ideally
/// be used as elements of a two register pair to pass or return, return a
/// first class aggregate to represent them. For example, if the low part of
/// a by-value argument should be passed as i32* and the high part as float,
/// return {i32*, float}.
static llvm::Type *
GetX86_64ByValArgumentPair(llvm::Type *Lo, llvm::Type *Hi,
const llvm::DataLayout &TD) {
// In order to correctly satisfy the ABI, we need to the high part to start
// at offset 8. If the high and low parts we inferred are both 4-byte types
// (e.g. i32 and i32) then the resultant struct type ({i32,i32}) won't have
// the second element at offset 8. Check for this:
unsigned LoSize = (unsigned)TD.getTypeAllocSize(Lo);
unsigned HiAlign = TD.getABITypeAlignment(Hi);
unsigned HiStart = llvm::alignTo(LoSize, HiAlign);
assert(HiStart != 0 && HiStart <= 8 && "Invalid x86-64 argument pair!");
// To handle this, we have to increase the size of the low part so that the
// second element will start at an 8 byte offset. We can't increase the size
// of the second element because it might make us access off the end of the
// struct.
if (HiStart != 8) {
// There are usually two sorts of types the ABI generation code can produce
// for the low part of a pair that aren't 8 bytes in size: half, float or
// i8/i16/i32. This can also include pointers when they are 32-bit (X32 and
// NaCl).
// Promote these to a larger type.
if (Lo->isHalfTy() || Lo->isFloatTy())
Lo = llvm::Type::getDoubleTy(Lo->getContext());
else {
assert((Lo->isIntegerTy() || Lo->isPointerTy())
&& "Invalid/unknown lo type");
Lo = llvm::Type::getInt64Ty(Lo->getContext());
}
}
llvm::StructType *Result = llvm::StructType::get(Lo, Hi);
// Verify that the second element is at an 8-byte offset.
assert(TD.getStructLayout(Result)->getElementOffset(1) == 8 &&
"Invalid x86-64 argument pair!");
return Result;
}
ABIArgInfo X86_64ABIInfo::
classifyReturnType(QualType RetTy) const {
// AMD64-ABI 3.2.3p4: Rule 1. Classify the return type with the
// classification algorithm.
X86_64ABIInfo::Class Lo, Hi;
classify(RetTy, 0, Lo, Hi, /*isNamedArg*/ true);
// Check some invariants.
assert((Hi != Memory || Lo == Memory) && "Invalid memory classification.");
assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification.");
llvm::Type *ResType = nullptr;
switch (Lo) {
case NoClass:
if (Hi == NoClass)
return ABIArgInfo::getIgnore();
// If the low part is just padding, it takes no register, leave ResType
// null.
assert((Hi == SSE || Hi == Integer || Hi == X87Up) &&
"Unknown missing lo part");
break;
case SSEUp:
case X87Up:
llvm_unreachable("Invalid classification for lo word.");
// AMD64-ABI 3.2.3p4: Rule 2. Types of class memory are returned via
// hidden argument.
case Memory:
return getIndirectReturnResult(RetTy);
// AMD64-ABI 3.2.3p4: Rule 3. If the class is INTEGER, the next
// available register of the sequence %rax, %rdx is used.
case Integer:
ResType = GetINTEGERTypeAtOffset(CGT.ConvertType(RetTy), 0, RetTy, 0);
// If we have a sign or zero extended integer, make sure to return Extend
// so that the parameter gets the right LLVM IR attributes.
if (Hi == NoClass && isa<llvm::IntegerType>(ResType)) {
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
RetTy = EnumTy->getDecl()->getIntegerType();
if (RetTy->isIntegralOrEnumerationType() &&
isPromotableIntegerTypeForABI(RetTy))
return ABIArgInfo::getExtend(RetTy);
}
break;
// AMD64-ABI 3.2.3p4: Rule 4. If the class is SSE, the next
// available SSE register of the sequence %xmm0, %xmm1 is used.
case SSE:
ResType = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 0, RetTy, 0);
break;
// AMD64-ABI 3.2.3p4: Rule 6. If the class is X87, the value is
// returned on the X87 stack in %st0 as 80-bit x87 number.
case X87:
ResType = llvm::Type::getX86_FP80Ty(getVMContext());
break;
// AMD64-ABI 3.2.3p4: Rule 8. If the class is COMPLEX_X87, the real
// part of the value is returned in %st0 and the imaginary part in
// %st1.
case ComplexX87:
assert(Hi == ComplexX87 && "Unexpected ComplexX87 classification.");
ResType = llvm::StructType::get(llvm::Type::getX86_FP80Ty(getVMContext()),
llvm::Type::getX86_FP80Ty(getVMContext()));
break;
}
llvm::Type *HighPart = nullptr;
switch (Hi) {
// Memory was handled previously and X87 should
// never occur as a hi class.
case Memory:
case X87:
llvm_unreachable("Invalid classification for hi word.");
case ComplexX87: // Previously handled.
case NoClass:
break;
case Integer:
HighPart = GetINTEGERTypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8);
if (Lo == NoClass) // Return HighPart at offset 8 in memory.
return ABIArgInfo::getDirect(HighPart, 8);
break;
case SSE:
HighPart = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8);
if (Lo == NoClass) // Return HighPart at offset 8 in memory.
return ABIArgInfo::getDirect(HighPart, 8);
break;
// AMD64-ABI 3.2.3p4: Rule 5. If the class is SSEUP, the eightbyte
// is passed in the next available eightbyte chunk if the last used
// vector register.
//
// SSEUP should always be preceded by SSE, just widen.
case SSEUp:
assert(Lo == SSE && "Unexpected SSEUp classification.");
ResType = GetByteVectorType(RetTy);
break;
// AMD64-ABI 3.2.3p4: Rule 7. If the class is X87UP, the value is
// returned together with the previous X87 value in %st0.
case X87Up:
// If X87Up is preceded by X87, we don't need to do
// anything. However, in some cases with unions it may not be
// preceded by X87. In such situations we follow gcc and pass the
// extra bits in an SSE reg.
if (Lo != X87) {
HighPart = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8);
if (Lo == NoClass) // Return HighPart at offset 8 in memory.
return ABIArgInfo::getDirect(HighPart, 8);
}
break;
}
// If a high part was specified, merge it together with the low part. It is
// known to pass in the high eightbyte of the result. We do this by forming a
// first class struct aggregate with the high and low part: {low, high}
if (HighPart)
ResType = GetX86_64ByValArgumentPair(ResType, HighPart, getDataLayout());
return ABIArgInfo::getDirect(ResType);
}
ABIArgInfo
X86_64ABIInfo::classifyArgumentType(QualType Ty, unsigned freeIntRegs,
unsigned &neededInt, unsigned &neededSSE,
bool isNamedArg, bool IsRegCall) const {
Ty = useFirstFieldIfTransparentUnion(Ty);
X86_64ABIInfo::Class Lo, Hi;
classify(Ty, 0, Lo, Hi, isNamedArg, IsRegCall);
// Check some invariants.
// FIXME: Enforce these by construction.
assert((Hi != Memory || Lo == Memory) && "Invalid memory classification.");
assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification.");
neededInt = 0;
neededSSE = 0;
llvm::Type *ResType = nullptr;
switch (Lo) {
case NoClass:
if (Hi == NoClass)
return ABIArgInfo::getIgnore();
// If the low part is just padding, it takes no register, leave ResType
// null.
assert((Hi == SSE || Hi == Integer || Hi == X87Up) &&
"Unknown missing lo part");
break;
// AMD64-ABI 3.2.3p3: Rule 1. If the class is MEMORY, pass the argument
// on the stack.
case Memory:
// AMD64-ABI 3.2.3p3: Rule 5. If the class is X87, X87UP or
// COMPLEX_X87, it is passed in memory.
case X87:
case ComplexX87:
if (getRecordArgABI(Ty, getCXXABI()) == CGCXXABI::RAA_Indirect)
++neededInt;
return getIndirectResult(Ty, freeIntRegs);
case SSEUp:
case X87Up:
llvm_unreachable("Invalid classification for lo word.");
// AMD64-ABI 3.2.3p3: Rule 2. If the class is INTEGER, the next
// available register of the sequence %rdi, %rsi, %rdx, %rcx, %r8
// and %r9 is used.
case Integer:
++neededInt;
// Pick an 8-byte type based on the preferred type.
ResType = GetINTEGERTypeAtOffset(CGT.ConvertType(Ty), 0, Ty, 0);
// If we have a sign or zero extended integer, make sure to return Extend
// so that the parameter gets the right LLVM IR attributes.
if (Hi == NoClass && isa<llvm::IntegerType>(ResType)) {
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
Ty = EnumTy->getDecl()->getIntegerType();
if (Ty->isIntegralOrEnumerationType() &&
isPromotableIntegerTypeForABI(Ty))
return ABIArgInfo::getExtend(Ty);
}
break;
// AMD64-ABI 3.2.3p3: Rule 3. If the class is SSE, the next
// available SSE register is used, the registers are taken in the
// order from %xmm0 to %xmm7.
case SSE: {
llvm::Type *IRType = CGT.ConvertType(Ty);
ResType = GetSSETypeAtOffset(IRType, 0, Ty, 0);
++neededSSE;
break;
}
}
llvm::Type *HighPart = nullptr;
switch (Hi) {
// Memory was handled previously, ComplexX87 and X87 should
// never occur as hi classes, and X87Up must be preceded by X87,
// which is passed in memory.
case Memory:
case X87:
case ComplexX87:
llvm_unreachable("Invalid classification for hi word.");
case NoClass: break;
case Integer:
++neededInt;
// Pick an 8-byte type based on the preferred type.
HighPart = GetINTEGERTypeAtOffset(CGT.ConvertType(Ty), 8, Ty, 8);
if (Lo == NoClass) // Pass HighPart at offset 8 in memory.
return ABIArgInfo::getDirect(HighPart, 8);
break;
// X87Up generally doesn't occur here (long double is passed in
// memory), except in situations involving unions.
case X87Up:
case SSE:
HighPart = GetSSETypeAtOffset(CGT.ConvertType(Ty), 8, Ty, 8);
if (Lo == NoClass) // Pass HighPart at offset 8 in memory.
return ABIArgInfo::getDirect(HighPart, 8);
++neededSSE;
break;
// AMD64-ABI 3.2.3p3: Rule 4. If the class is SSEUP, the
// eightbyte is passed in the upper half of the last used SSE
// register. This only happens when 128-bit vectors are passed.
case SSEUp:
assert(Lo == SSE && "Unexpected SSEUp classification");
ResType = GetByteVectorType(Ty);
break;
}
// If a high part was specified, merge it together with the low part. It is
// known to pass in the high eightbyte of the result. We do this by forming a
// first class struct aggregate with the high and low part: {low, high}
if (HighPart)
ResType = GetX86_64ByValArgumentPair(ResType, HighPart, getDataLayout());
return ABIArgInfo::getDirect(ResType);
}
ABIArgInfo
X86_64ABIInfo::classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt,
unsigned &NeededSSE,
unsigned &MaxVectorWidth) const {
auto RT = Ty->getAs<RecordType>();
assert(RT && "classifyRegCallStructType only valid with struct types");
if (RT->getDecl()->hasFlexibleArrayMember())
return getIndirectReturnResult(Ty);
// Sum up bases
if (auto CXXRD = dyn_cast<CXXRecordDecl>(RT->getDecl())) {
if (CXXRD->isDynamicClass()) {
NeededInt = NeededSSE = 0;
return getIndirectReturnResult(Ty);
}
for (const auto &I : CXXRD->bases())
if (classifyRegCallStructTypeImpl(I.getType(), NeededInt, NeededSSE,
MaxVectorWidth)
.isIndirect()) {
NeededInt = NeededSSE = 0;
return getIndirectReturnResult(Ty);
}
}
// Sum up members
for (const auto *FD : RT->getDecl()->fields()) {
QualType MTy = FD->getType();
if (MTy->isRecordType() && !MTy->isUnionType()) {
if (classifyRegCallStructTypeImpl(MTy, NeededInt, NeededSSE,
MaxVectorWidth)
.isIndirect()) {
NeededInt = NeededSSE = 0;
return getIndirectReturnResult(Ty);
}
} else {
unsigned LocalNeededInt, LocalNeededSSE;
if (classifyArgumentType(MTy, UINT_MAX, LocalNeededInt, LocalNeededSSE,
true, true)
.isIndirect()) {
NeededInt = NeededSSE = 0;
return getIndirectReturnResult(Ty);
}
if (const auto *AT = getContext().getAsConstantArrayType(MTy))
MTy = AT->getElementType();
if (const auto *VT = MTy->getAs<VectorType>())
if (getContext().getTypeSize(VT) > MaxVectorWidth)
MaxVectorWidth = getContext().getTypeSize(VT);
NeededInt += LocalNeededInt;
NeededSSE += LocalNeededSSE;
}
}
return ABIArgInfo::getDirect();
}
ABIArgInfo
X86_64ABIInfo::classifyRegCallStructType(QualType Ty, unsigned &NeededInt,
unsigned &NeededSSE,
unsigned &MaxVectorWidth) const {
NeededInt = 0;
NeededSSE = 0;
MaxVectorWidth = 0;
return classifyRegCallStructTypeImpl(Ty, NeededInt, NeededSSE,
MaxVectorWidth);
}
void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
const unsigned CallingConv = FI.getCallingConvention();
// It is possible to force Win64 calling convention on any x86_64 target by
// using __attribute__((ms_abi)). In such case to correctly emit Win64
// compatible code delegate this call to WinX86_64ABIInfo::computeInfo.
if (CallingConv == llvm::CallingConv::Win64) {
WinX86_64ABIInfo Win64ABIInfo(CGT, AVXLevel);
Win64ABIInfo.computeInfo(FI);
return;
}
bool IsRegCall = CallingConv == llvm::CallingConv::X86_RegCall;
// Keep track of the number of assigned registers.
unsigned FreeIntRegs = IsRegCall ? 11 : 6;
unsigned FreeSSERegs = IsRegCall ? 16 : 8;
unsigned NeededInt = 0, NeededSSE = 0, MaxVectorWidth = 0;
if (!::classifyReturnType(getCXXABI(), FI, *this)) {
if (IsRegCall && FI.getReturnType()->getTypePtr()->isRecordType() &&
!FI.getReturnType()->getTypePtr()->isUnionType()) {
FI.getReturnInfo() = classifyRegCallStructType(
FI.getReturnType(), NeededInt, NeededSSE, MaxVectorWidth);
if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) {
FreeIntRegs -= NeededInt;
FreeSSERegs -= NeededSSE;
} else {
FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType());
}
} else if (IsRegCall && FI.getReturnType()->getAs<ComplexType>() &&
getContext().getCanonicalType(FI.getReturnType()
->getAs<ComplexType>()
->getElementType()) ==
getContext().LongDoubleTy)
// Complex Long Double Type is passed in Memory when Regcall
// calling convention is used.
FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType());
else
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
}
// If the return value is indirect, then the hidden argument is consuming one
// integer register.
if (FI.getReturnInfo().isIndirect())
--FreeIntRegs;
else if (NeededSSE && MaxVectorWidth > 0)
FI.setMaxVectorWidth(MaxVectorWidth);
// The chain argument effectively gives us another free register.
if (FI.isChainCall())
++FreeIntRegs;
unsigned NumRequiredArgs = FI.getNumRequiredArgs();
// AMD64-ABI 3.2.3p3: Once arguments are classified, the registers
// get assigned (in left-to-right order) for passing as follows...
unsigned ArgNo = 0;
for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
it != ie; ++it, ++ArgNo) {
bool IsNamedArg = ArgNo < NumRequiredArgs;
if (IsRegCall && it->type->isStructureOrClassType())
it->info = classifyRegCallStructType(it->type, NeededInt, NeededSSE,
MaxVectorWidth);
else
it->info = classifyArgumentType(it->type, FreeIntRegs, NeededInt,
NeededSSE, IsNamedArg);
// AMD64-ABI 3.2.3p3: If there are no registers available for any
// eightbyte of an argument, the whole argument is passed on the
// stack. If registers have already been assigned for some
// eightbytes of such an argument, the assignments get reverted.
if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) {
FreeIntRegs -= NeededInt;
FreeSSERegs -= NeededSSE;
if (MaxVectorWidth > FI.getMaxVectorWidth())
FI.setMaxVectorWidth(MaxVectorWidth);
} else {
it->info = getIndirectResult(it->type, FreeIntRegs);
}
}
}
static Address EmitX86_64VAArgFromMemory(CodeGenFunction &CGF,
Address VAListAddr, QualType Ty) {
Address overflow_arg_area_p =
CGF.Builder.CreateStructGEP(VAListAddr, 2, "overflow_arg_area_p");
llvm::Value *overflow_arg_area =
CGF.Builder.CreateLoad(overflow_arg_area_p, "overflow_arg_area");
// AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16
// byte boundary if alignment needed by type exceeds 8 byte boundary.
// It isn't stated explicitly in the standard, but in practice we use
// alignment greater than 16 where necessary.
CharUnits Align = CGF.getContext().getTypeAlignInChars(Ty);
if (Align > CharUnits::fromQuantity(8)) {
overflow_arg_area = emitRoundPointerUpToAlignment(CGF, overflow_arg_area,
Align);
}
// AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area.
llvm::Type *LTy = CGF.ConvertTypeForMem(Ty);
llvm::Value *Res =
CGF.Builder.CreateBitCast(overflow_arg_area,
llvm::PointerType::getUnqual(LTy));
// AMD64-ABI 3.5.7p5: Step 9. Set l->overflow_arg_area to:
// l->overflow_arg_area + sizeof(type).
// AMD64-ABI 3.5.7p5: Step 10. Align l->overflow_arg_area upwards to
// an 8 byte boundary.
uint64_t SizeInBytes = (CGF.getContext().getTypeSize(Ty) + 7) / 8;
llvm::Value *Offset =
llvm::ConstantInt::get(CGF.Int32Ty, (SizeInBytes + 7) & ~7);
overflow_arg_area = CGF.Builder.CreateGEP(CGF.Int8Ty, overflow_arg_area,
Offset, "overflow_arg_area.next");
CGF.Builder.CreateStore(overflow_arg_area, overflow_arg_area_p);
// AMD64-ABI 3.5.7p5: Step 11. Return the fetched type.
return Address(Res, LTy, Align);
}
Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const {
// Assume that va_list type is correct; should be pointer to LLVM type:
// struct {
// i32 gp_offset;
// i32 fp_offset;
// i8* overflow_arg_area;
// i8* reg_save_area;
// };
unsigned neededInt, neededSSE;
Ty = getContext().getCanonicalType(Ty);
ABIArgInfo AI = classifyArgumentType(Ty, 0, neededInt, neededSSE,
/*isNamedArg*/false);
// AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed
// in the registers. If not go to step 7.
if (!neededInt && !neededSSE)
return EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty);
// AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of
// general purpose registers needed to pass type and num_fp to hold
// the number of floating point registers needed.
// AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into
// registers. In the case: l->gp_offset > 48 - num_gp * 8 or
// l->fp_offset > 304 - num_fp * 16 go to step 7.
//
// NOTE: 304 is a typo, there are (6 * 8 + 8 * 16) = 176 bytes of
// register save space).
llvm::Value *InRegs = nullptr;
Address gp_offset_p = Address::invalid(), fp_offset_p = Address::invalid();
llvm::Value *gp_offset = nullptr, *fp_offset = nullptr;
if (neededInt) {
gp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "gp_offset_p");
gp_offset = CGF.Builder.CreateLoad(gp_offset_p, "gp_offset");
InRegs = llvm::ConstantInt::get(CGF.Int32Ty, 48 - neededInt * 8);
InRegs = CGF.Builder.CreateICmpULE(gp_offset, InRegs, "fits_in_gp");
}
if (neededSSE) {
fp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 1, "fp_offset_p");
fp_offset = CGF.Builder.CreateLoad(fp_offset_p, "fp_offset");
llvm::Value *FitsInFP =
llvm::ConstantInt::get(CGF.Int32Ty, 176 - neededSSE * 16);
FitsInFP = CGF.Builder.CreateICmpULE(fp_offset, FitsInFP, "fits_in_fp");
InRegs = InRegs ? CGF.Builder.CreateAnd(InRegs, FitsInFP) : FitsInFP;
}
llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg");
llvm::BasicBlock *InMemBlock = CGF.createBasicBlock("vaarg.in_mem");
llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end");
CGF.Builder.CreateCondBr(InRegs, InRegBlock, InMemBlock);
// Emit code to load the value if it was passed in registers.
CGF.EmitBlock(InRegBlock);
// AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with
// an offset of l->gp_offset and/or l->fp_offset. This may require
// copying to a temporary location in case the parameter is passed
// in different register classes or requires an alignment greater
// than 8 for general purpose registers and 16 for XMM registers.
//
// FIXME: This really results in shameful code when we end up needing to
// collect arguments from different places; often what should result in a
// simple assembling of a structure from scattered addresses has many more
// loads than necessary. Can we clean this up?
llvm::Type *LTy = CGF.ConvertTypeForMem(Ty);
llvm::Value *RegSaveArea = CGF.Builder.CreateLoad(
CGF.Builder.CreateStructGEP(VAListAddr, 3), "reg_save_area");
Address RegAddr = Address::invalid();
if (neededInt && neededSSE) {
// FIXME: Cleanup.
assert(AI.isDirect() && "Unexpected ABI info for mixed regs");
llvm::StructType *ST = cast<llvm::StructType>(AI.getCoerceToType());
Address Tmp = CGF.CreateMemTemp(Ty);
Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST);
assert(ST->getNumElements() == 2 && "Unexpected ABI info for mixed regs");
llvm::Type *TyLo = ST->getElementType(0);
llvm::Type *TyHi = ST->getElementType(1);
assert((TyLo->isFPOrFPVectorTy() ^ TyHi->isFPOrFPVectorTy()) &&
"Unexpected ABI info for mixed regs");
llvm::Type *PTyLo = llvm::PointerType::getUnqual(TyLo);
llvm::Type *PTyHi = llvm::PointerType::getUnqual(TyHi);
llvm::Value *GPAddr =
CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, gp_offset);
llvm::Value *FPAddr =
CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, fp_offset);
llvm::Value *RegLoAddr = TyLo->isFPOrFPVectorTy() ? FPAddr : GPAddr;
llvm::Value *RegHiAddr = TyLo->isFPOrFPVectorTy() ? GPAddr : FPAddr;
// Copy the first element.
// FIXME: Our choice of alignment here and below is probably pessimistic.
llvm::Value *V = CGF.Builder.CreateAlignedLoad(
TyLo, CGF.Builder.CreateBitCast(RegLoAddr, PTyLo),
CharUnits::fromQuantity(getDataLayout().getABITypeAlignment(TyLo)));
CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0));
// Copy the second element.
V = CGF.Builder.CreateAlignedLoad(
TyHi, CGF.Builder.CreateBitCast(RegHiAddr, PTyHi),
CharUnits::fromQuantity(getDataLayout().getABITypeAlignment(TyHi)));
CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1));
RegAddr = CGF.Builder.CreateElementBitCast(Tmp, LTy);
} else if (neededInt) {
RegAddr = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, gp_offset),
CGF.Int8Ty, CharUnits::fromQuantity(8));
RegAddr = CGF.Builder.CreateElementBitCast(RegAddr, LTy);
// Copy to a temporary if necessary to ensure the appropriate alignment.
auto TInfo = getContext().getTypeInfoInChars(Ty);
uint64_t TySize = TInfo.Width.getQuantity();
CharUnits TyAlign = TInfo.Align;
// Copy into a temporary if the type is more aligned than the
// register save area.
if (TyAlign.getQuantity() > 8) {
Address Tmp = CGF.CreateMemTemp(Ty);
CGF.Builder.CreateMemCpy(Tmp, RegAddr, TySize, false);
RegAddr = Tmp;
}
} else if (neededSSE == 1) {
RegAddr = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, fp_offset),
CGF.Int8Ty, CharUnits::fromQuantity(16));
RegAddr = CGF.Builder.CreateElementBitCast(RegAddr, LTy);
} else {
assert(neededSSE == 2 && "Invalid number of needed registers!");
// SSE registers are spaced 16 bytes apart in the register save
// area, we need to collect the two eightbytes together.
// The ABI isn't explicit about this, but it seems reasonable
// to assume that the slots are 16-byte aligned, since the stack is
// naturally 16-byte aligned and the prologue is expected to store
// all the SSE registers to the RSA.
Address RegAddrLo = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea,
fp_offset),
CGF.Int8Ty, CharUnits::fromQuantity(16));
Address RegAddrHi =
CGF.Builder.CreateConstInBoundsByteGEP(RegAddrLo,
CharUnits::fromQuantity(16));
llvm::Type *ST = AI.canHaveCoerceToType()
? AI.getCoerceToType()
: llvm::StructType::get(CGF.DoubleTy, CGF.DoubleTy);
llvm::Value *V;
Address Tmp = CGF.CreateMemTemp(Ty);
Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST);
V = CGF.Builder.CreateLoad(CGF.Builder.CreateElementBitCast(
RegAddrLo, ST->getStructElementType(0)));
CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0));
V = CGF.Builder.CreateLoad(CGF.Builder.CreateElementBitCast(
RegAddrHi, ST->getStructElementType(1)));
CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1));
RegAddr = CGF.Builder.CreateElementBitCast(Tmp, LTy);
}
// AMD64-ABI 3.5.7p5: Step 5. Set:
// l->gp_offset = l->gp_offset + num_gp * 8
// l->fp_offset = l->fp_offset + num_fp * 16.
if (neededInt) {
llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, neededInt * 8);
CGF.Builder.CreateStore(CGF.Builder.CreateAdd(gp_offset, Offset),
gp_offset_p);
}
if (neededSSE) {
llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, neededSSE * 16);
CGF.Builder.CreateStore(CGF.Builder.CreateAdd(fp_offset, Offset),
fp_offset_p);
}
CGF.EmitBranch(ContBlock);
// Emit code to load the value if it was passed in memory.
CGF.EmitBlock(InMemBlock);
Address MemAddr = EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty);
// Return the appropriate result.
CGF.EmitBlock(ContBlock);
Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, MemAddr, InMemBlock,
"vaarg.addr");
return ResAddr;
}
Address X86_64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const {
// MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
// not 1, 2, 4, or 8 bytes, must be passed by reference."
uint64_t Width = getContext().getTypeSize(Ty);
bool IsIndirect = Width > 64 || !llvm::isPowerOf2_64(Width);
return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
CGF.getContext().getTypeInfoInChars(Ty),
CharUnits::fromQuantity(8),
/*allowHigherAlign*/ false);
}
ABIArgInfo WinX86_64ABIInfo::reclassifyHvaArgForVectorCall(
QualType Ty, unsigned &FreeSSERegs, const ABIArgInfo &current) const {
const Type *Base = nullptr;
uint64_t NumElts = 0;
if (!Ty->isBuiltinType() && !Ty->isVectorType() &&
isHomogeneousAggregate(Ty, Base, NumElts) && FreeSSERegs >= NumElts) {
FreeSSERegs -= NumElts;
return getDirectX86Hva();
}
return current;
}
ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs,
bool IsReturnType, bool IsVectorCall,
bool IsRegCall) const {
if (Ty->isVoidType())
return ABIArgInfo::getIgnore();
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
Ty = EnumTy->getDecl()->getIntegerType();
TypeInfo Info = getContext().getTypeInfo(Ty);
uint64_t Width = Info.Width;
CharUnits Align = getContext().toCharUnitsFromBits(Info.Align);
const RecordType *RT = Ty->getAs<RecordType>();
if (RT) {
if (!IsReturnType) {
if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()))
return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
}
if (RT->getDecl()->hasFlexibleArrayMember())
return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
}
const Type *Base = nullptr;
uint64_t NumElts = 0;
// vectorcall adds the concept of a homogenous vector aggregate, similar to
// other targets.
if ((IsVectorCall || IsRegCall) &&
isHomogeneousAggregate(Ty, Base, NumElts)) {
if (IsRegCall) {
if (FreeSSERegs >= NumElts) {
FreeSSERegs -= NumElts;
if (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType())
return ABIArgInfo::getDirect();
return ABIArgInfo::getExpand();
}
return ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
} else if (IsVectorCall) {
if (FreeSSERegs >= NumElts &&
(IsReturnType || Ty->isBuiltinType() || Ty->isVectorType())) {
FreeSSERegs -= NumElts;
return ABIArgInfo::getDirect();
} else if (IsReturnType) {
return ABIArgInfo::getExpand();
} else if (!Ty->isBuiltinType() && !Ty->isVectorType()) {
// HVAs are delayed and reclassified in the 2nd step.
return ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
}
}
}
if (Ty->isMemberPointerType()) {
// If the member pointer is represented by an LLVM int or ptr, pass it
// directly.
llvm::Type *LLTy = CGT.ConvertType(Ty);
if (LLTy->isPointerTy() || LLTy->isIntegerTy())
return ABIArgInfo::getDirect();
}
if (RT || Ty->isAnyComplexType() || Ty->isMemberPointerType()) {
// MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
// not 1, 2, 4, or 8 bytes, must be passed by reference."
if (Width > 64 || !llvm::isPowerOf2_64(Width))
return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
// Otherwise, coerce it to a small integer.
return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Width));
}
if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
switch (BT->getKind()) {
case BuiltinType::Bool:
// Bool type is always extended to the ABI, other builtin types are not
// extended.
return ABIArgInfo::getExtend(Ty);
case BuiltinType::LongDouble:
// Mingw64 GCC uses the old 80 bit extended precision floating point
// unit. It passes them indirectly through memory.
if (IsMingw64) {
const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat();
if (LDF == &llvm::APFloat::x87DoubleExtended())
return ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
}
break;
case BuiltinType::Int128:
case BuiltinType::UInt128:
// If it's a parameter type, the normal ABI rule is that arguments larger
// than 8 bytes are passed indirectly. GCC follows it. We follow it too,
// even though it isn't particularly efficient.
if (!IsReturnType)
return ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
// Mingw64 GCC returns i128 in XMM0. Coerce to v2i64 to handle that.
// Clang matches them for compatibility.
return ABIArgInfo::getDirect(llvm::FixedVectorType::get(
llvm::Type::getInt64Ty(getVMContext()), 2));
default:
break;
}
}
if (Ty->isBitIntType()) {
// MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
// not 1, 2, 4, or 8 bytes, must be passed by reference."
// However, non-power-of-two bit-precise integers will be passed as 1, 2, 4,
// or 8 bytes anyway as long is it fits in them, so we don't have to check
// the power of 2.
if (Width <= 64)
return ABIArgInfo::getDirect();
return ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
}
return ABIArgInfo::getDirect();
}
void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
const unsigned CC = FI.getCallingConvention();
bool IsVectorCall = CC == llvm::CallingConv::X86_VectorCall;
bool IsRegCall = CC == llvm::CallingConv::X86_RegCall;
// If __attribute__((sysv_abi)) is in use, use the SysV argument
// classification rules.
if (CC == llvm::CallingConv::X86_64_SysV) {
X86_64ABIInfo SysVABIInfo(CGT, AVXLevel);
SysVABIInfo.computeInfo(FI);
return;
}
unsigned FreeSSERegs = 0;
if (IsVectorCall) {
// We can use up to 4 SSE return registers with vectorcall.
FreeSSERegs = 4;
} else if (IsRegCall) {
// RegCall gives us 16 SSE registers.
FreeSSERegs = 16;
}
if (!getCXXABI().classifyReturnType(FI))
FI.getReturnInfo() = classify(FI.getReturnType(), FreeSSERegs, true,
IsVectorCall, IsRegCall);
if (IsVectorCall) {
// We can use up to 6 SSE register parameters with vectorcall.
FreeSSERegs = 6;
} else if (IsRegCall) {
// RegCall gives us 16 SSE registers, we can reuse the return registers.
FreeSSERegs = 16;
}
unsigned ArgNum = 0;
unsigned ZeroSSERegs = 0;
for (auto &I : FI.arguments()) {
// Vectorcall in x64 only permits the first 6 arguments to be passed as
// XMM/YMM registers. After the sixth argument, pretend no vector
// registers are left.
unsigned *MaybeFreeSSERegs =
(IsVectorCall && ArgNum >= 6) ? &ZeroSSERegs : &FreeSSERegs;
I.info =
classify(I.type, *MaybeFreeSSERegs, false, IsVectorCall, IsRegCall);
++ArgNum;
}
if (IsVectorCall) {
// For vectorcall, assign aggregate HVAs to any free vector registers in a
// second pass.
for (auto &I : FI.arguments())
I.info = reclassifyHvaArgForVectorCall(I.type, FreeSSERegs, I.info);
}
}
Address WinX86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const {
// MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
// not 1, 2, 4, or 8 bytes, must be passed by reference."
uint64_t Width = getContext().getTypeSize(Ty);
bool IsIndirect = Width > 64 || !llvm::isPowerOf2_64(Width);
return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
CGF.getContext().getTypeInfoInChars(Ty),
CharUnits::fromQuantity(8),
/*allowHigherAlign*/ false);
}
static bool PPC_initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
llvm::Value *Address, bool Is64Bit,
bool IsAIX) {
// This is calculated from the LLVM and GCC tables and verified
// against gcc output. AFAIK all PPC ABIs use the same encoding.
CodeGen::CGBuilderTy &Builder = CGF.Builder;
llvm::IntegerType *i8 = CGF.Int8Ty;
llvm::Value *Four8 = llvm::ConstantInt::get(i8, 4);
llvm::Value *Eight8 = llvm::ConstantInt::get(i8, 8);
llvm::Value *Sixteen8 = llvm::ConstantInt::get(i8, 16);
// 0-31: r0-31, the 4-byte or 8-byte general-purpose registers
AssignToArrayRange(Builder, Address, Is64Bit ? Eight8 : Four8, 0, 31);
// 32-63: fp0-31, the 8-byte floating-point registers
AssignToArrayRange(Builder, Address, Eight8, 32, 63);
// 64-67 are various 4-byte or 8-byte special-purpose registers:
// 64: mq
// 65: lr
// 66: ctr
// 67: ap
AssignToArrayRange(Builder, Address, Is64Bit ? Eight8 : Four8, 64, 67);
// 68-76 are various 4-byte special-purpose registers:
// 68-75 cr0-7
// 76: xer
AssignToArrayRange(Builder, Address, Four8, 68, 76);
// 77-108: v0-31, the 16-byte vector registers
AssignToArrayRange(Builder, Address, Sixteen8, 77, 108);
// 109: vrsave
// 110: vscr
AssignToArrayRange(Builder, Address, Is64Bit ? Eight8 : Four8, 109, 110);
// AIX does not utilize the rest of the registers.
if (IsAIX)
return false;
// 111: spe_acc
// 112: spefscr
// 113: sfp
AssignToArrayRange(Builder, Address, Is64Bit ? Eight8 : Four8, 111, 113);
if (!Is64Bit)
return false;
// TODO: Need to verify if these registers are used on 64 bit AIX with Power8
// or above CPU.
// 64-bit only registers:
// 114: tfhar
// 115: tfiar
// 116: texasr
AssignToArrayRange(Builder, Address, Eight8, 114, 116);
return false;
}
// AIX
namespace {
/// AIXABIInfo - The AIX XCOFF ABI information.
class AIXABIInfo : public ABIInfo {
const bool Is64Bit;
const unsigned PtrByteSize;
CharUnits getParamTypeAlignment(QualType Ty) const;
public:
AIXABIInfo(CodeGen::CodeGenTypes &CGT, bool Is64Bit)
: ABIInfo(CGT), Is64Bit(Is64Bit), PtrByteSize(Is64Bit ? 8 : 4) {}
bool isPromotableTypeForABI(QualType Ty) const;
ABIArgInfo classifyReturnType(QualType RetTy) const;
ABIArgInfo classifyArgumentType(QualType Ty) const;
void computeInfo(CGFunctionInfo &FI) const override {
if (!getCXXABI().classifyReturnType(FI))
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
for (auto &I : FI.arguments())
I.info = classifyArgumentType(I.type);
}
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override;
};
class AIXTargetCodeGenInfo : public TargetCodeGenInfo {
const bool Is64Bit;
public:
AIXTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool Is64Bit)
: TargetCodeGenInfo(std::make_unique<AIXABIInfo>(CGT, Is64Bit)),
Is64Bit(Is64Bit) {}
int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
return 1; // r1 is the dedicated stack pointer
}
bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
llvm::Value *Address) const override;
};
} // namespace
// Return true if the ABI requires Ty to be passed sign- or zero-
// extended to 32/64 bits.
bool AIXABIInfo::isPromotableTypeForABI(QualType Ty) const {
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
Ty = EnumTy->getDecl()->getIntegerType();
// Promotable integer types are required to be promoted by the ABI.
if (Ty->isPromotableIntegerType())
return true;
if (!Is64Bit)
return false;
// For 64 bit mode, in addition to the usual promotable integer types, we also
// need to extend all 32-bit types, since the ABI requires promotion to 64
// bits.
if (const BuiltinType *BT = Ty->getAs<BuiltinType>())
switch (BT->getKind()) {
case BuiltinType::Int:
case BuiltinType::UInt:
return true;
default:
break;
}
return false;
}
ABIArgInfo AIXABIInfo::classifyReturnType(QualType RetTy) const {
if (RetTy->isAnyComplexType())
return ABIArgInfo::getDirect();
if (RetTy->isVectorType())
return ABIArgInfo::getDirect();
if (RetTy->isVoidType())
return ABIArgInfo::getIgnore();
if (isAggregateTypeForABI(RetTy))
return getNaturalAlignIndirect(RetTy);
return (isPromotableTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
: ABIArgInfo::getDirect());
}
ABIArgInfo AIXABIInfo::classifyArgumentType(QualType Ty) const {
Ty = useFirstFieldIfTransparentUnion(Ty);
if (Ty->isAnyComplexType())
return ABIArgInfo::getDirect();
if (Ty->isVectorType())
return ABIArgInfo::getDirect();
if (isAggregateTypeForABI(Ty)) {
// Records with non-trivial destructors/copy-constructors should not be
// passed by value.
if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
CharUnits CCAlign = getParamTypeAlignment(Ty);
CharUnits TyAlign = getContext().getTypeAlignInChars(Ty);
return ABIArgInfo::getIndirect(CCAlign, /*ByVal*/ true,
/*Realign*/ TyAlign > CCAlign);
}
return (isPromotableTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
: ABIArgInfo::getDirect());
}
CharUnits AIXABIInfo::getParamTypeAlignment(QualType Ty) const {
// Complex types are passed just like their elements.
if (const ComplexType *CTy = Ty->getAs<ComplexType>())
Ty = CTy->getElementType();
if (Ty->isVectorType())
return CharUnits::fromQuantity(16);
// If the structure contains a vector type, the alignment is 16.
if (isRecordWithSIMDVectorType(getContext(), Ty))
return CharUnits::fromQuantity(16);
return CharUnits::fromQuantity(PtrByteSize);
}
Address AIXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const {
auto TypeInfo = getContext().getTypeInfoInChars(Ty);
TypeInfo.Align = getParamTypeAlignment(Ty);
CharUnits SlotSize = CharUnits::fromQuantity(PtrByteSize);
// If we have a complex type and the base type is smaller than the register
// size, the ABI calls for the real and imaginary parts to be right-adjusted
// in separate words in 32bit mode or doublewords in 64bit mode. However,
// Clang expects us to produce a pointer to a structure with the two parts
// packed tightly. So generate loads of the real and imaginary parts relative
// to the va_list pointer, and store them to a temporary structure. We do the
// same as the PPC64ABI here.
if (const ComplexType *CTy = Ty->getAs<ComplexType>()) {
CharUnits EltSize = TypeInfo.Width / 2;
if (EltSize < SlotSize)
return complexTempStructure(CGF, VAListAddr, Ty, SlotSize, EltSize, CTy);
}
return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false, TypeInfo,
SlotSize, /*AllowHigher*/ true);
}
bool AIXTargetCodeGenInfo::initDwarfEHRegSizeTable(
CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const {
return PPC_initDwarfEHRegSizeTable(CGF, Address, Is64Bit, /*IsAIX*/ true);
}
// PowerPC-32
namespace {
/// PPC32_SVR4_ABIInfo - The 32-bit PowerPC ELF (SVR4) ABI information.
class PPC32_SVR4_ABIInfo : public DefaultABIInfo {
bool IsSoftFloatABI;
bool IsRetSmallStructInRegABI;
CharUnits getParamTypeAlignment(QualType Ty) const;
public:
PPC32_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, bool SoftFloatABI,
bool RetSmallStructInRegABI)
: DefaultABIInfo(CGT), IsSoftFloatABI(SoftFloatABI),
IsRetSmallStructInRegABI(RetSmallStructInRegABI) {}
ABIArgInfo classifyReturnType(QualType RetTy) const;
void computeInfo(CGFunctionInfo &FI) const override {
if (!getCXXABI().classifyReturnType(FI))
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
for (auto &I : FI.arguments())
I.info = classifyArgumentType(I.type);
}
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override;
};
class PPC32TargetCodeGenInfo : public TargetCodeGenInfo {
public:
PPC32TargetCodeGenInfo(CodeGenTypes &CGT, bool SoftFloatABI,
bool RetSmallStructInRegABI)
: TargetCodeGenInfo(std::make_unique<PPC32_SVR4_ABIInfo>(
CGT, SoftFloatABI, RetSmallStructInRegABI)) {}
static bool isStructReturnInRegABI(const llvm::Triple &Triple,
const CodeGenOptions &Opts);
int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
// This is recovered from gcc output.
return 1; // r1 is the dedicated stack pointer
}
bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
llvm::Value *Address) const override;
};
}
CharUnits PPC32_SVR4_ABIInfo::getParamTypeAlignment(QualType Ty) const {
// Complex types are passed just like their elements.
if (const ComplexType *CTy = Ty->getAs<ComplexType>())
Ty = CTy->getElementType();
if (Ty->isVectorType())
return CharUnits::fromQuantity(getContext().getTypeSize(Ty) == 128 ? 16
: 4);
// For single-element float/vector structs, we consider the whole type
// to have the same alignment requirements as its single element.
const Type *AlignTy = nullptr;
if (const Type *EltType = isSingleElementStruct(Ty, getContext())) {
const BuiltinType *BT = EltType->getAs<BuiltinType>();
if ((EltType->isVectorType() && getContext().getTypeSize(EltType) == 128) ||
(BT && BT->isFloatingPoint()))
AlignTy = EltType;
}
if (AlignTy)
return CharUnits::fromQuantity(AlignTy->isVectorType() ? 16 : 4);
return CharUnits::fromQuantity(4);
}
ABIArgInfo PPC32_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const {
uint64_t Size;
// -msvr4-struct-return puts small aggregates in GPR3 and GPR4.
if (isAggregateTypeForABI(RetTy) && IsRetSmallStructInRegABI &&
(Size = getContext().getTypeSize(RetTy)) <= 64) {
// System V ABI (1995), page 3-22, specified:
// > A structure or union whose size is less than or equal to 8 bytes
// > shall be returned in r3 and r4, as if it were first stored in the
// > 8-byte aligned memory area and then the low addressed word were
// > loaded into r3 and the high-addressed word into r4. Bits beyond
// > the last member of the structure or union are not defined.
//
// GCC for big-endian PPC32 inserts the pad before the first member,
// not "beyond the last member" of the struct. To stay compatible
// with GCC, we coerce the struct to an integer of the same size.
// LLVM will extend it and return i32 in r3, or i64 in r3:r4.
if (Size == 0)
return ABIArgInfo::getIgnore();
else {
llvm::Type *CoerceTy = llvm::Type::getIntNTy(getVMContext(), Size);
return ABIArgInfo::getDirect(CoerceTy);
}
}
return DefaultABIInfo::classifyReturnType(RetTy);
}
// TODO: this implementation is now likely redundant with
// DefaultABIInfo::EmitVAArg.
Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList,
QualType Ty) const {
if (getTarget().getTriple().isOSDarwin()) {
auto TI = getContext().getTypeInfoInChars(Ty);
TI.Align = getParamTypeAlignment(Ty);
CharUnits SlotSize = CharUnits::fromQuantity(4);
return emitVoidPtrVAArg(CGF, VAList, Ty,
classifyArgumentType(Ty).isIndirect(), TI, SlotSize,
/*AllowHigherAlign=*/true);
}
const unsigned OverflowLimit = 8;
if (const ComplexType *CTy = Ty->getAs<ComplexType>()) {
// TODO: Implement this. For now ignore.
(void)CTy;
return Address::invalid(); // FIXME?
}
// struct __va_list_tag {
// unsigned char gpr;
// unsigned char fpr;
// unsigned short reserved;
// void *overflow_arg_area;
// void *reg_save_area;
// };
bool isI64 = Ty->isIntegerType() && getContext().getTypeSize(Ty) == 64;
bool isInt = !Ty->isFloatingType();
bool isF64 = Ty->isFloatingType() && getContext().getTypeSize(Ty) == 64;
// All aggregates are passed indirectly? That doesn't seem consistent
// with the argument-lowering code.
bool isIndirect = isAggregateTypeForABI(Ty);
CGBuilderTy &Builder = CGF.Builder;
// The calling convention either uses 1-2 GPRs or 1 FPR.
Address NumRegsAddr = Address::invalid();
if (isInt || IsSoftFloatABI) {
NumRegsAddr = Builder.CreateStructGEP(VAList, 0, "gpr");
} else {
NumRegsAddr = Builder.CreateStructGEP(VAList, 1, "fpr");
}
llvm::Value *NumRegs = Builder.CreateLoad(NumRegsAddr, "numUsedRegs");
// "Align" the register count when TY is i64.
if (isI64 || (isF64 && IsSoftFloatABI)) {
NumRegs = Builder.CreateAdd(NumRegs, Builder.getInt8(1));
NumRegs = Builder.CreateAnd(NumRegs, Builder.getInt8((uint8_t) ~1U));
}
llvm::Value *CC =
Builder.CreateICmpULT(NumRegs, Builder.getInt8(OverflowLimit), "cond");
llvm::BasicBlock *UsingRegs = CGF.createBasicBlock("using_regs");
llvm::BasicBlock *UsingOverflow = CGF.createBasicBlock("using_overflow");
llvm::BasicBlock *Cont = CGF.createBasicBlock("cont");
Builder.CreateCondBr(CC, UsingRegs, UsingOverflow);
llvm::Type *DirectTy = CGF.ConvertType(Ty), *ElementTy = DirectTy;
if (isIndirect) DirectTy = DirectTy->getPointerTo(0);
// Case 1: consume registers.
Address RegAddr = Address::invalid();
{
CGF.EmitBlock(UsingRegs);
Address RegSaveAreaPtr = Builder.CreateStructGEP(VAList, 4);
RegAddr = Address(Builder.CreateLoad(RegSaveAreaPtr), CGF.Int8Ty,
CharUnits::fromQuantity(8));
assert(RegAddr.getElementType() == CGF.Int8Ty);
// Floating-point registers start after the general-purpose registers.
if (!(isInt || IsSoftFloatABI)) {
RegAddr = Builder.CreateConstInBoundsByteGEP(RegAddr,
CharUnits::fromQuantity(32));
}
// Get the address of the saved value by scaling the number of
// registers we've used by the number of
CharUnits RegSize = CharUnits::fromQuantity((isInt || IsSoftFloatABI) ? 4 : 8);
llvm::Value *RegOffset =
Builder.CreateMul(NumRegs, Builder.getInt8(RegSize.getQuantity()));
RegAddr = Address(
Builder.CreateInBoundsGEP(CGF.Int8Ty, RegAddr.getPointer(), RegOffset),
CGF.Int8Ty, RegAddr.getAlignment().alignmentOfArrayElement(RegSize));
RegAddr = Builder.CreateElementBitCast(RegAddr, DirectTy);
// Increase the used-register count.
NumRegs =
Builder.CreateAdd(NumRegs,
Builder.getInt8((isI64 || (isF64 && IsSoftFloatABI)) ? 2 : 1));
Builder.CreateStore(NumRegs, NumRegsAddr);
CGF.EmitBranch(Cont);
}
// Case 2: consume space in the overflow area.
Address MemAddr = Address::invalid();
{
CGF.EmitBlock(UsingOverflow);
Builder.CreateStore(Builder.getInt8(OverflowLimit), NumRegsAddr);
// Everything in the overflow area is rounded up to a size of at least 4.
CharUnits OverflowAreaAlign = CharUnits::fromQuantity(4);
CharUnits Size;
if (!isIndirect) {
auto TypeInfo = CGF.getContext().getTypeInfoInChars(Ty);
Size = TypeInfo.Width.alignTo(OverflowAreaAlign);
} else {
Size = CGF.getPointerSize();
}
Address OverflowAreaAddr = Builder.CreateStructGEP(VAList, 3);
Address OverflowArea =
Address(Builder.CreateLoad(OverflowAreaAddr, "argp.cur"), CGF.Int8Ty,
OverflowAreaAlign);
// Round up address of argument to alignment
CharUnits Align = CGF.getContext().getTypeAlignInChars(Ty);
if (Align > OverflowAreaAlign) {
llvm::Value *Ptr = OverflowArea.getPointer();
OverflowArea = Address(emitRoundPointerUpToAlignment(CGF, Ptr, Align),
OverflowArea.getElementType(), Align);
}
MemAddr = Builder.CreateElementBitCast(OverflowArea, DirectTy);
// Increase the overflow area.
OverflowArea = Builder.CreateConstInBoundsByteGEP(OverflowArea, Size);
Builder.CreateStore(OverflowArea.getPointer(), OverflowAreaAddr);
CGF.EmitBranch(Cont);
}
CGF.EmitBlock(Cont);
// Merge the cases with a phi.
Address Result = emitMergePHI(CGF, RegAddr, UsingRegs, MemAddr, UsingOverflow,
"vaarg.addr");
// Load the pointer if the argument was passed indirectly.
if (isIndirect) {
Result = Address(Builder.CreateLoad(Result, "aggr"), ElementTy,
getContext().getTypeAlignInChars(Ty));
}
return Result;
}
bool PPC32TargetCodeGenInfo::isStructReturnInRegABI(
const llvm::Triple &Triple, const CodeGenOptions &Opts) {
assert(Triple.isPPC32());
switch (Opts.getStructReturnConvention()) {
case CodeGenOptions::SRCK_Default:
break;
case CodeGenOptions::SRCK_OnStack: // -maix-struct-return
return false;
case CodeGenOptions::SRCK_InRegs: // -msvr4-struct-return
return true;
}
if (Triple.isOSBinFormatELF() && !Triple.isOSLinux())
return true;
return false;
}
bool
PPC32TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
llvm::Value *Address) const {
return PPC_initDwarfEHRegSizeTable(CGF, Address, /*Is64Bit*/ false,
/*IsAIX*/ false);
}
// PowerPC-64
namespace {
/// PPC64_SVR4_ABIInfo - The 64-bit PowerPC ELF (SVR4) ABI information.
class PPC64_SVR4_ABIInfo : public SwiftABIInfo {
public:
enum ABIKind {
ELFv1 = 0,
ELFv2
};
private:
static const unsigned GPRBits = 64;
ABIKind Kind;
bool IsSoftFloatABI;
public:
PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind,
bool SoftFloatABI)
: SwiftABIInfo(CGT), Kind(Kind), IsSoftFloatABI(SoftFloatABI) {}
bool isPromotableTypeForABI(QualType Ty) const;
CharUnits getParamTypeAlignment(QualType Ty) const;
ABIArgInfo classifyReturnType(QualType RetTy) const;
ABIArgInfo classifyArgumentType(QualType Ty) const;
bool isHomogeneousAggregateBaseType(QualType Ty) const override;
bool isHomogeneousAggregateSmallEnough(const Type *Ty,
uint64_t Members) const override;
// TODO: We can add more logic to computeInfo to improve performance.
// Example: For aggregate arguments that fit in a register, we could
// use getDirectInReg (as is done below for structs containing a single
// floating-point value) to avoid pushing them to memory on function
// entry. This would require changing the logic in PPCISelLowering
// when lowering the parameters in the caller and args in the callee.
void computeInfo(CGFunctionInfo &FI) const override {
if (!getCXXABI().classifyReturnType(FI))
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
for (auto &I : FI.arguments()) {
// We rely on the default argument classification for the most part.
// One exception: An aggregate containing a single floating-point
// or vector item must be passed in a register if one is available.
const Type *T = isSingleElementStruct(I.type, getContext());
if (T) {
const BuiltinType *BT = T->getAs<BuiltinType>();
if ((T->isVectorType() && getContext().getTypeSize(T) == 128) ||
(BT && BT->isFloatingPoint())) {
QualType QT(T, 0);
I.info = ABIArgInfo::getDirectInReg(CGT.ConvertType(QT));
continue;
}
}
I.info = classifyArgumentType(I.type);
}
}
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override;
bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
bool asReturnValue) const override {
return occupiesMoreThan(CGT, scalars, /*total*/ 4);
}
bool isSwiftErrorInRegister() const override {
return false;
}
};
class PPC64_SVR4_TargetCodeGenInfo : public TargetCodeGenInfo {
public:
PPC64_SVR4_TargetCodeGenInfo(CodeGenTypes &CGT,
PPC64_SVR4_ABIInfo::ABIKind Kind,
bool SoftFloatABI)
: TargetCodeGenInfo(
std::make_unique<PPC64_SVR4_ABIInfo>(CGT, Kind, SoftFloatABI)) {}
int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
// This is recovered from gcc output.
return 1; // r1 is the dedicated stack pointer
}
bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
llvm::Value *Address) const override;
};
class PPC64TargetCodeGenInfo : public DefaultTargetCodeGenInfo {
public:
PPC64TargetCodeGenInfo(CodeGenTypes &CGT) : DefaultTargetCodeGenInfo(CGT) {}
int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
// This is recovered from gcc output.
return 1; // r1 is the dedicated stack pointer
}
bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
llvm::Value *Address) const override;
};
}
// Return true if the ABI requires Ty to be passed sign- or zero-
// extended to 64 bits.
bool
PPC64_SVR4_ABIInfo::isPromotableTypeForABI(QualType Ty) const {
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
Ty = EnumTy->getDecl()->getIntegerType();
// Promotable integer types are required to be promoted by the ABI.
if (isPromotableIntegerTypeForABI(Ty))
return true;
// In addition to the usual promotable integer types, we also need to
// extend all 32-bit types, since the ABI requires promotion to 64 bits.
if (const BuiltinType *BT = Ty->getAs<BuiltinType>())
switch (BT->getKind()) {
case BuiltinType::Int:
case BuiltinType::UInt:
return true;
default:
break;
}
if (const auto *EIT = Ty->getAs<BitIntType>())
if (EIT->getNumBits() < 64)
return true;
return false;
}
/// isAlignedParamType - Determine whether a type requires 16-byte or
/// higher alignment in the parameter area. Always returns at least 8.
CharUnits PPC64_SVR4_ABIInfo::getParamTypeAlignment(QualType Ty) const {
// Complex types are passed just like their elements.
if (const ComplexType *CTy = Ty->getAs<ComplexType>())
Ty = CTy->getElementType();
auto FloatUsesVector = [this](QualType Ty){
return Ty->isRealFloatingType() && &getContext().getFloatTypeSemantics(
Ty) == &llvm::APFloat::IEEEquad();
};
// Only vector types of size 16 bytes need alignment (larger types are
// passed via reference, smaller types are not aligned).
if (Ty->isVectorType()) {
return CharUnits::fromQuantity(getContext().getTypeSize(Ty) == 128 ? 16 : 8);
} else if (FloatUsesVector(Ty)) {
// According to ABI document section 'Optional Save Areas': If extended
// precision floating-point values in IEEE BINARY 128 QUADRUPLE PRECISION
// format are supported, map them to a single quadword, quadword aligned.
return CharUnits::fromQuantity(16);
}
// For single-element float/vector structs, we consider the whole type
// to have the same alignment requirements as its single element.
const Type *AlignAsType = nullptr;
const Type *EltType = isSingleElementStruct(Ty, getContext());
if (EltType) {
const BuiltinType *BT = EltType->getAs<BuiltinType>();
if ((EltType->isVectorType() && getContext().getTypeSize(EltType) == 128) ||
(BT && BT->isFloatingPoint()))
AlignAsType = EltType;
}
// Likewise for ELFv2 homogeneous aggregates.
const Type *Base = nullptr;
uint64_t Members = 0;
if (!AlignAsType && Kind == ELFv2 &&
isAggregateTypeForABI(Ty) && isHomogeneousAggregate(Ty, Base, Members))
AlignAsType = Base;
// With special case aggregates, only vector base types need alignment.
if (AlignAsType) {
bool UsesVector = AlignAsType->isVectorType() ||
FloatUsesVector(QualType(AlignAsType, 0));
return CharUnits::fromQuantity(UsesVector ? 16 : 8);
}
// Otherwise, we only need alignment for any aggregate type that
// has an alignment requirement of >= 16 bytes.
if (isAggregateTypeForABI(Ty) && getContext().getTypeAlign(Ty) >= 128) {
return CharUnits::fromQuantity(16);
}
return CharUnits::fromQuantity(8);
}
/// isHomogeneousAggregate - Return true if a type is an ELFv2 homogeneous
/// aggregate. Base is set to the base element type, and Members is set
/// to the number of base elements.
bool ABIInfo::isHomogeneousAggregate(QualType Ty, const Type *&Base,
uint64_t &Members) const {
if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) {
uint64_t NElements = AT->getSize().getZExtValue();
if (NElements == 0)
return false;
if (!isHomogeneousAggregate(AT->getElementType(), Base, Members))
return false;
Members *= NElements;
} else if (const RecordType *RT = Ty->getAs<RecordType>()) {
const RecordDecl *RD = RT->getDecl();
if (RD->hasFlexibleArrayMember())
return false;
Members = 0;
// If this is a C++ record, check the properties of the record such as
// bases and ABI specific restrictions
if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
if (!getCXXABI().isPermittedToBeHomogeneousAggregate(CXXRD))
return false;
for (const auto &I : CXXRD->bases()) {
// Ignore empty records.
if (isEmptyRecord(getContext(), I.getType(), true))
continue;
uint64_t FldMembers;
if (!isHomogeneousAggregate(I.getType(), Base, FldMembers))
return false;
Members += FldMembers;
}
}
for (const auto *FD : RD->fields()) {
// Ignore (non-zero arrays of) empty records.
QualType FT = FD->getType();
while (const ConstantArrayType *AT =
getContext().getAsConstantArrayType(FT)) {
if (AT->getSize().getZExtValue() == 0)
return false;
FT = AT->getElementType();
}
if (isEmptyRecord(getContext(), FT, true))
continue;
if (isZeroLengthBitfieldPermittedInHomogeneousAggregate() &&
FD->isZeroLengthBitField(getContext()))
continue;
uint64_t FldMembers;
if (!isHomogeneousAggregate(FD->getType(), Base, FldMembers))
return false;
Members = (RD->isUnion() ?
std::max(Members, FldMembers) : Members + FldMembers);
}
if (!Base)
return false;
// Ensure there is no padding.
if (getContext().getTypeSize(Base) * Members !=
getContext().getTypeSize(Ty))
return false;
} else {
Members = 1;
if (const ComplexType *CT = Ty->getAs<ComplexType>()) {
Members = 2;
Ty = CT->getElementType();
}
// Most ABIs only support float, double, and some vector type widths.
if (!isHomogeneousAggregateBaseType(Ty))
return false;
// The base type must be the same for all members. Types that
// agree in both total size and mode (float vs. vector) are
// treated as being equivalent here.
const Type *TyPtr = Ty.getTypePtr();
if (!Base) {
Base = TyPtr;
// If it's a non-power-of-2 vector, its size is already a power-of-2,
// so make sure to widen it explicitly.
if (const VectorType *VT = Base->getAs<VectorType>()) {
QualType EltTy = VT->getElementType();
unsigned NumElements =
getContext().getTypeSize(VT) / getContext().getTypeSize(EltTy);
Base = getContext()
.getVectorType(EltTy, NumElements, VT->getVectorKind())
.getTypePtr();
}
}
if (Base->isVectorType() != TyPtr->isVectorType() ||
getContext().getTypeSize(Base) != getContext().getTypeSize(TyPtr))
return false;
}
return Members > 0 && isHomogeneousAggregateSmallEnough(Base, Members);
}
bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
// Homogeneous aggregates for ELFv2 must have base types of float,
// double, long double, or 128-bit vectors.
if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
if (BT->getKind() == BuiltinType::Float ||
BT->getKind() == BuiltinType::Double ||
BT->getKind() == BuiltinType::LongDouble ||
BT->getKind() == BuiltinType::Ibm128 ||
(getContext().getTargetInfo().hasFloat128Type() &&
(BT->getKind() == BuiltinType::Float128))) {
if (IsSoftFloatABI)
return false;
return true;
}
}
if (const VectorType *VT = Ty->getAs<VectorType>()) {
if (getContext().getTypeSize(VT) == 128)
return true;
}
return false;
}
bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateSmallEnough(
const Type *Base, uint64_t Members) const {
// Vector and fp128 types require one register, other floating point types
// require one or two registers depending on their size.
uint32_t NumRegs =
((getContext().getTargetInfo().hasFloat128Type() &&
Base->isFloat128Type()) ||
Base->isVectorType()) ? 1
: (getContext().getTypeSize(Base) + 63) / 64;
// Homogeneous Aggregates may occupy at most 8 registers.
return Members * NumRegs <= 8;
}
ABIArgInfo
PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const {
Ty = useFirstFieldIfTransparentUnion(Ty);
if (Ty->isAnyComplexType())
return ABIArgInfo::getDirect();
// Non-Altivec vector types are passed in GPRs (smaller than 16 bytes)
// or via reference (larger than 16 bytes).
if (Ty->isVectorType()) {
uint64_t Size = getContext().getTypeSize(Ty);
if (Size > 128)
return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
else if (Size < 128) {
llvm::Type *CoerceTy = llvm::IntegerType::get(getVMContext(), Size);
return ABIArgInfo::getDirect(CoerceTy);
}
}
if (const auto *EIT = Ty->getAs<BitIntType>())
if (EIT->getNumBits() > 128)
return getNaturalAlignIndirect(Ty, /*ByVal=*/true);
if (isAggregateTypeForABI(Ty)) {
if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
uint64_t ABIAlign = getParamTypeAlignment(Ty).getQuantity();
uint64_t TyAlign = getContext().getTypeAlignInChars(Ty).getQuantity();
// ELFv2 homogeneous aggregates are passed as array types.
const Type *Base = nullptr;
uint64_t Members = 0;
if (Kind == ELFv2 &&
isHomogeneousAggregate(Ty, Base, Members)) {
llvm::Type *BaseTy = CGT.ConvertType(QualType(Base, 0));
llvm::Type *CoerceTy = llvm::ArrayType::get(BaseTy, Members);
return ABIArgInfo::getDirect(CoerceTy);
}
// If an aggregate may end up fully in registers, we do not
// use the ByVal method, but pass the aggregate as array.
// This is usually beneficial since we avoid forcing the
// back-end to store the argument to memory.
uint64_t Bits = getContext().getTypeSize(Ty);
if (Bits > 0 && Bits <= 8 * GPRBits) {
llvm::Type *CoerceTy;
// Types up to 8 bytes are passed as integer type (which will be
// properly aligned in the argument save area doubleword).
if (Bits <= GPRBits)
CoerceTy =
llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8));
// Larger types are passed as arrays, with the base type selected
// according to the required alignment in the save area.
else {
uint64_t RegBits = ABIAlign * 8;
uint64_t NumRegs = llvm::alignTo(Bits, RegBits) / RegBits;
llvm::Type *RegTy = llvm::IntegerType::get(getVMContext(), RegBits);
CoerceTy = llvm::ArrayType::get(RegTy, NumRegs);
}
return ABIArgInfo::getDirect(CoerceTy);
}
// All other aggregates are passed ByVal.
return ABIArgInfo::getIndirect(CharUnits::fromQuantity(ABIAlign),
/*ByVal=*/true,
/*Realign=*/TyAlign > ABIAlign);
}
return (isPromotableTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
: ABIArgInfo::getDirect());
}
ABIArgInfo
PPC64_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const {
if (RetTy->isVoidType())
return ABIArgInfo::getIgnore();
if (RetTy->isAnyComplexType())
return ABIArgInfo::getDirect();
// Non-Altivec vector types are returned in GPRs (smaller than 16 bytes)
// or via reference (larger than 16 bytes).
if (RetTy->isVectorType()) {
uint64_t Size = getContext().getTypeSize(RetTy);
if (Size > 128)
return getNaturalAlignIndirect(RetTy);
else if (Size < 128) {
llvm::Type *CoerceTy = llvm::IntegerType::get(getVMContext(), Size);
return ABIArgInfo::getDirect(CoerceTy);
}
}
if (const auto *EIT = RetTy->getAs<BitIntType>())
if (EIT->getNumBits() > 128)
return getNaturalAlignIndirect(RetTy, /*ByVal=*/false);
if (isAggregateTypeForABI(RetTy)) {
// ELFv2 homogeneous aggregates are returned as array types.
const Type *Base = nullptr;
uint64_t Members = 0;
if (Kind == ELFv2 &&
isHomogeneousAggregate(RetTy, Base, Members)) {
llvm::Type *BaseTy = CGT.ConvertType(QualType(Base, 0));
llvm::Type *CoerceTy = llvm::ArrayType::get(BaseTy, Members);
return ABIArgInfo::getDirect(CoerceTy);
}
// ELFv2 small aggregates are returned in up to two registers.
uint64_t Bits = getContext().getTypeSize(RetTy);
if (Kind == ELFv2 && Bits <= 2 * GPRBits) {
if (Bits == 0)
return ABIArgInfo::getIgnore();
llvm::Type *CoerceTy;
if (Bits > GPRBits) {
CoerceTy = llvm::IntegerType::get(getVMContext(), GPRBits);
CoerceTy = llvm::StructType::get(CoerceTy, CoerceTy);
} else
CoerceTy =
llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8));
return ABIArgInfo::getDirect(CoerceTy);
}
// All other aggregates are returned indirectly.
return getNaturalAlignIndirect(RetTy);
}
return (isPromotableTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
: ABIArgInfo::getDirect());
}
// Based on ARMABIInfo::EmitVAArg, adjusted for 64-bit machine.
Address PPC64_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const {
auto TypeInfo = getContext().getTypeInfoInChars(Ty);
TypeInfo.Align = getParamTypeAlignment(Ty);
CharUnits SlotSize = CharUnits::fromQuantity(8);
// If we have a complex type and the base type is smaller than 8 bytes,
// the ABI calls for the real and imaginary parts to be right-adjusted
// in separate doublewords. However, Clang expects us to produce a
// pointer to a structure with the two parts packed tightly. So generate
// loads of the real and imaginary parts relative to the va_list pointer,
// and store them to a temporary structure.
if (const ComplexType *CTy = Ty->getAs<ComplexType>()) {
CharUnits EltSize = TypeInfo.Width / 2;
if (EltSize < SlotSize)
return complexTempStructure(CGF, VAListAddr, Ty, SlotSize, EltSize, CTy);
}
// Otherwise, just use the general rule.
return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false,
TypeInfo, SlotSize, /*AllowHigher*/ true);
}
bool
PPC64_SVR4_TargetCodeGenInfo::initDwarfEHRegSizeTable(
CodeGen::CodeGenFunction &CGF,
llvm::Value *Address) const {
return PPC_initDwarfEHRegSizeTable(CGF, Address, /*Is64Bit*/ true,
/*IsAIX*/ false);
}
bool
PPC64TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
llvm::Value *Address) const {
return PPC_initDwarfEHRegSizeTable(CGF, Address, /*Is64Bit*/ true,
/*IsAIX*/ false);
}
//===----------------------------------------------------------------------===//
// AArch64 ABI Implementation
//===----------------------------------------------------------------------===//
namespace {
class AArch64ABIInfo : public SwiftABIInfo {
public:
enum ABIKind {
AAPCS = 0,
DarwinPCS,
Win64
};
private:
ABIKind Kind;
public:
AArch64ABIInfo(CodeGenTypes &CGT, ABIKind Kind)
: SwiftABIInfo(CGT), Kind(Kind) {}
private:
ABIKind getABIKind() const { return Kind; }
bool isDarwinPCS() const { return Kind == DarwinPCS; }
ABIArgInfo classifyReturnType(QualType RetTy, bool IsVariadic) const;
ABIArgInfo classifyArgumentType(QualType RetTy, bool IsVariadic,
unsigned CallingConvention) const;
ABIArgInfo coerceIllegalVector(QualType Ty) const;
bool isHomogeneousAggregateBaseType(QualType Ty) const override;
bool isHomogeneousAggregateSmallEnough(const Type *Ty,
uint64_t Members) const override;
bool isZeroLengthBitfieldPermittedInHomogeneousAggregate() const override;
bool isIllegalVectorType(QualType Ty) const;
void computeInfo(CGFunctionInfo &FI) const override {
if (!::classifyReturnType(getCXXABI(), FI, *this))
FI.getReturnInfo() =
classifyReturnType(FI.getReturnType(), FI.isVariadic());
for (auto &it : FI.arguments())
it.info = classifyArgumentType(it.type, FI.isVariadic(),
FI.getCallingConvention());
}
Address EmitDarwinVAArg(Address VAListAddr, QualType Ty,
CodeGenFunction &CGF) const;
Address EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
CodeGenFunction &CGF) const;
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override {
llvm::Type *BaseTy = CGF.ConvertType(Ty);
if (isa<llvm::ScalableVectorType>(BaseTy))
llvm::report_fatal_error("Passing SVE types to variadic functions is "
"currently not supported");
return Kind == Win64 ? EmitMSVAArg(CGF, VAListAddr, Ty)
: isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF)
: EmitAAPCSVAArg(VAListAddr, Ty, CGF);
}
Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override;
bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
bool asReturnValue) const override {
return occupiesMoreThan(CGT, scalars, /*total*/ 4);
}
bool isSwiftErrorInRegister() const override {
return true;
}
bool isLegalVectorTypeForSwift(CharUnits totalSize, llvm::Type *eltTy,
unsigned elts) const override;
bool allowBFloatArgsAndRet() const override {
return getTarget().hasBFloat16Type();
}
};
class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
public:
AArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIInfo::ABIKind Kind)
: TargetCodeGenInfo(std::make_unique<AArch64ABIInfo>(CGT, Kind)) {}
StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
return "mov\tfp, fp\t\t// marker for objc_retainAutoreleaseReturnValue";
}
int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
return 31;
}
bool doesReturnSlotInterfereWithArgs() const override { return false; }
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &CGM) const override {
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD)
return;
const auto *TA = FD->getAttr<TargetAttr>();
if (TA == nullptr)
return;
ParsedTargetAttr Attr = TA->parse();
if (Attr.BranchProtection.empty())
return;
TargetInfo::BranchProtectionInfo BPI;
StringRef Error;
(void)CGM.getTarget().validateBranchProtection(
Attr.BranchProtection, Attr.Architecture, BPI, Error);
assert(Error.empty());
auto *Fn = cast<llvm::Function>(GV);
static const char *SignReturnAddrStr[] = {"none", "non-leaf", "all"};
Fn->addFnAttr("sign-return-address", SignReturnAddrStr[static_cast<int>(BPI.SignReturnAddr)]);
if (BPI.SignReturnAddr != LangOptions::SignReturnAddressScopeKind::None) {
Fn->addFnAttr("sign-return-address-key",
BPI.SignKey == LangOptions::SignReturnAddressKeyKind::AKey
? "a_key"
: "b_key");
}
Fn->addFnAttr("branch-target-enforcement",
BPI.BranchTargetEnforcement ? "true" : "false");
}
bool isScalarizableAsmOperand(CodeGen::CodeGenFunction &CGF,
llvm::Type *Ty) const override {
if (CGF.getTarget().hasFeature("ls64")) {
auto *ST = dyn_cast<llvm::StructType>(Ty);
if (ST && ST->getNumElements() == 1) {
auto *AT = dyn_cast<llvm::ArrayType>(ST->getElementType(0));
if (AT && AT->getNumElements() == 8 &&
AT->getElementType()->isIntegerTy(64))
return true;
}
}
return TargetCodeGenInfo::isScalarizableAsmOperand(CGF, Ty);
}
};
class WindowsAArch64TargetCodeGenInfo : public AArch64TargetCodeGenInfo {
public:
WindowsAArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIInfo::ABIKind K)
: AArch64TargetCodeGenInfo(CGT, K) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &CGM) const override;
void getDependentLibraryOption(llvm::StringRef Lib,
llvm::SmallString<24> &Opt) const override {
Opt = "/DEFAULTLIB:" + qualifyWindowsLibrary(Lib);
}
void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value,
llvm::SmallString<32> &Opt) const override {
Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
}
};
void WindowsAArch64TargetCodeGenInfo::setTargetAttributes(
const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const {
AArch64TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
if (GV->isDeclaration())
return;
addStackProbeTargetAttributes(D, GV, CGM);
}
}
ABIArgInfo AArch64ABIInfo::coerceIllegalVector(QualType Ty) const {
assert(Ty->isVectorType() && "expected vector type!");
const auto *VT = Ty->castAs<VectorType>();
if (VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector) {
assert(VT->getElementType()->isBuiltinType() && "expected builtin type!");
assert(VT->getElementType()->castAs<BuiltinType>()->getKind() ==
BuiltinType::UChar &&
"unexpected builtin type for SVE predicate!");
return ABIArgInfo::getDirect(llvm::ScalableVectorType::get(
llvm::Type::getInt1Ty(getVMContext()), 16));
}
if (VT->getVectorKind() == VectorType::SveFixedLengthDataVector) {
assert(VT->getElementType()->isBuiltinType() && "expected builtin type!");
const auto *BT = VT->getElementType()->castAs<BuiltinType>();
llvm::ScalableVectorType *ResType = nullptr;
switch (BT->getKind()) {
default:
llvm_unreachable("unexpected builtin type for SVE vector!");
case BuiltinType::SChar:
case BuiltinType::UChar:
ResType = llvm::ScalableVectorType::get(
llvm::Type::getInt8Ty(getVMContext()), 16);
break;
case BuiltinType::Short:
case BuiltinType::UShort:
ResType = llvm::ScalableVectorType::get(
llvm::Type::getInt16Ty(getVMContext()), 8);
break;
case BuiltinType::Int:
case BuiltinType::UInt:
ResType = llvm::ScalableVectorType::get(
llvm::Type::getInt32Ty(getVMContext()), 4);
break;
case BuiltinType::Long:
case BuiltinType::ULong:
ResType = llvm::ScalableVectorType::get(
llvm::Type::getInt64Ty(getVMContext()), 2);
break;
case BuiltinType::Half:
ResType = llvm::ScalableVectorType::get(
llvm::Type::getHalfTy(getVMContext()), 8);
break;
case BuiltinType::Float:
ResType = llvm::ScalableVectorType::get(
llvm::Type::getFloatTy(getVMContext()), 4);
break;
case BuiltinType::Double:
ResType = llvm::ScalableVectorType::get(
llvm::Type::getDoubleTy(getVMContext()), 2);
break;
case BuiltinType::BFloat16:
ResType = llvm::ScalableVectorType::get(
llvm::Type::getBFloatTy(getVMContext()), 8);
break;
}
return ABIArgInfo::getDirect(ResType);
}
uint64_t Size = getContext().getTypeSize(Ty);
// Android promotes <2 x i8> to i16, not i32
if (isAndroid() && (Size <= 16)) {
llvm::Type *ResType = llvm::Type::getInt16Ty(getVMContext());
return ABIArgInfo::getDirect(ResType);
}
if (Size <= 32) {
llvm::Type *ResType = llvm::Type::getInt32Ty(getVMContext());
return ABIArgInfo::getDirect(ResType);
}
if (Size == 64) {
auto *ResType =
llvm::FixedVectorType::get(llvm::Type::getInt32Ty(getVMContext()), 2);
return ABIArgInfo::getDirect(ResType);
}
if (Size == 128) {
auto *ResType =
llvm::FixedVectorType::get(llvm::Type::getInt32Ty(getVMContext()), 4);
return ABIArgInfo::getDirect(ResType);
}
return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
}
ABIArgInfo
AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadic,
unsigned CallingConvention) const {
Ty = useFirstFieldIfTransparentUnion(Ty);
// Handle illegal vector types here.
if (isIllegalVectorType(Ty))
return coerceIllegalVector(Ty);
if (!isAggregateTypeForABI(Ty)) {
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
Ty = EnumTy->getDecl()->getIntegerType();
if (const auto *EIT = Ty->getAs<BitIntType>())
if (EIT->getNumBits() > 128)
return getNaturalAlignIndirect(Ty);
return (isPromotableIntegerTypeForABI(Ty) && isDarwinPCS()
? ABIArgInfo::getExtend(Ty)
: ABIArgInfo::getDirect());
}
// Structures with either a non-trivial destructor or a non-trivial
// copy constructor are always indirect.
if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA ==
CGCXXABI::RAA_DirectInMemory);
}
// Empty records are always ignored on Darwin, but actually passed in C++ mode
// elsewhere for GNU compatibility.
uint64_t Size = getContext().getTypeSize(Ty);
bool IsEmpty = isEmptyRecord(getContext(), Ty, true);
if (IsEmpty || Size == 0) {
if (!getContext().getLangOpts().CPlusPlus || isDarwinPCS())
return ABIArgInfo::getIgnore();
// GNU C mode. The only argument that gets ignored is an empty one with size
// 0.
if (IsEmpty && Size == 0)
return ABIArgInfo::getIgnore();
return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext()));
}
// Homogeneous Floating-point Aggregates (HFAs) need to be expanded.
const Type *Base = nullptr;
uint64_t Members = 0;
bool IsWin64 = Kind == Win64 || CallingConvention == llvm::CallingConv::Win64;
bool IsWinVariadic = IsWin64 && IsVariadic;
// In variadic functions on Windows, all composite types are treated alike,
// no special handling of HFAs/HVAs.
if (!IsWinVariadic && isHomogeneousAggregate(Ty, Base, Members)) {
if (Kind != AArch64ABIInfo::AAPCS)
return ABIArgInfo::getDirect(
llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members));
// For alignment adjusted HFAs, cap the argument alignment to 16, leave it
// default otherwise.
unsigned Align =
getContext().getTypeUnadjustedAlignInChars(Ty).getQuantity();
unsigned BaseAlign = getContext().getTypeAlignInChars(Base).getQuantity();
Align = (Align > BaseAlign && Align >= 16) ? 16 : 0;
return ABIArgInfo::getDirect(
llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members), 0,
nullptr, true, Align);
}
// Aggregates <= 16 bytes are passed directly in registers or on the stack.
if (Size <= 128) {
// On RenderScript, coerce Aggregates <= 16 bytes to an integer array of
// same size and alignment.
if (getTarget().isRenderScriptTarget()) {
return coerceToIntArray(Ty, getContext(), getVMContext());
}
unsigned Alignment;
if (Kind == AArch64ABIInfo::AAPCS) {
Alignment = getContext().getTypeUnadjustedAlign(Ty);
Alignment = Alignment < 128 ? 64 : 128;
} else {
Alignment = std::max(getContext().getTypeAlign(Ty),
(unsigned)getTarget().getPointerWidth(0));
}
Size = llvm::alignTo(Size, Alignment);
// We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
// For aggregates with 16-byte alignment, we use i128.
llvm::Type *BaseTy = llvm::Type::getIntNTy(getVMContext(), Alignment);
return ABIArgInfo::getDirect(
Size == Alignment ? BaseTy
: llvm::ArrayType::get(BaseTy, Size / Alignment));
}
return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
}
ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy,
bool IsVariadic) const {
if (RetTy->isVoidType())
return ABIArgInfo::getIgnore();
if (const auto *VT = RetTy->getAs<VectorType>()) {
if (VT->getVectorKind() == VectorType::SveFixedLengthDataVector ||
VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector)
return coerceIllegalVector(RetTy);
}
// Large vector types should be returned via memory.
if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128)
return getNaturalAlignIndirect(RetTy);
if (!isAggregateTypeForABI(RetTy)) {
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
RetTy = EnumTy->getDecl()->getIntegerType();
if (const auto *EIT = RetTy->getAs<BitIntType>())
if (EIT->getNumBits() > 128)
return getNaturalAlignIndirect(RetTy);
return (isPromotableIntegerTypeForABI(RetTy) && isDarwinPCS()
? ABIArgInfo::getExtend(RetTy)
: ABIArgInfo::getDirect());
}
uint64_t Size = getContext().getTypeSize(RetTy);
if (isEmptyRecord(getContext(), RetTy, true) || Size == 0)
return ABIArgInfo::getIgnore();
const Type *Base = nullptr;
uint64_t Members = 0;
if (isHomogeneousAggregate(RetTy, Base, Members) &&
!(getTarget().getTriple().getArch() == llvm::Triple::aarch64_32 &&
IsVariadic))
// Homogeneous Floating-point Aggregates (HFAs) are returned directly.
return ABIArgInfo::getDirect();
// Aggregates <= 16 bytes are returned directly in registers or on the stack.
if (Size <= 128) {
// On RenderScript, coerce Aggregates <= 16 bytes to an integer array of
// same size and alignment.
if (getTarget().isRenderScriptTarget()) {
return coerceToIntArray(RetTy, getContext(), getVMContext());
}
if (Size <= 64 && getDataLayout().isLittleEndian()) {
// Composite types are returned in lower bits of a 64-bit register for LE,
// and in higher bits for BE. However, integer types are always returned
// in lower bits for both LE and BE, and they are not rounded up to
// 64-bits. We can skip rounding up of composite types for LE, but not for
// BE, otherwise composite types will be indistinguishable from integer
// types.
return ABIArgInfo::getDirect(
llvm::IntegerType::get(getVMContext(), Size));
}
unsigned Alignment = getContext().getTypeAlign(RetTy);
Size = llvm::alignTo(Size, 64); // round up to multiple of 8 bytes
// We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
// For aggregates with 16-byte alignment, we use i128.
if (Alignment < 128 && Size == 128) {
llvm::Type *BaseTy = llvm::Type::getInt64Ty(getVMContext());
return ABIArgInfo::getDirect(llvm::ArrayType::get(BaseTy, Size / 64));
}
return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size));
}
return getNaturalAlignIndirect(RetTy);
}
/// isIllegalVectorType - check whether the vector type is legal for AArch64.
bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const {
if (const VectorType *VT = Ty->getAs<VectorType>()) {
// Check whether VT is a fixed-length SVE vector. These types are
// represented as scalable vectors in function args/return and must be
// coerced from fixed vectors.
if (VT->getVectorKind() == VectorType::SveFixedLengthDataVector ||
VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector)
return true;
// Check whether VT is legal.
unsigned NumElements = VT->getNumElements();
uint64_t Size = getContext().getTypeSize(VT);
// NumElements should be power of 2.
if (!llvm::isPowerOf2_32(NumElements))
return true;
// arm64_32 has to be compatible with the ARM logic here, which allows huge
// vectors for some reason.
llvm::Triple Triple = getTarget().getTriple();
if (Triple.getArch() == llvm::Triple::aarch64_32 &&
Triple.isOSBinFormatMachO())
return Size <= 32;
return Size != 64 && (Size != 128 || NumElements == 1);
}
return false;
}
bool AArch64ABIInfo::isLegalVectorTypeForSwift(CharUnits totalSize,
llvm::Type *eltTy,
unsigned elts) const {
if (!llvm::isPowerOf2_32(elts))
return false;
if (totalSize.getQuantity() != 8 &&
(totalSize.getQuantity() != 16 || elts == 1))
return false;
return true;
}
bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
// Homogeneous aggregates for AAPCS64 must have base types of a floating
// point type or a short-vector type. This is the same as the 32-bit ABI,
// but with the difference that any floating-point type is allowed,
// including __fp16.
if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
if (BT->isFloatingPoint())
return true;
} else if (const VectorType *VT = Ty->getAs<VectorType>()) {
unsigned VecSize = getContext().getTypeSize(VT);
if (VecSize == 64 || VecSize == 128)
return true;
}
return false;
}
bool AArch64ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base,
uint64_t Members) const {
return Members <= 4;
}
bool AArch64ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate()
const {
// AAPCS64 says that the rule for whether something is a homogeneous
// aggregate is applied to the output of the data layout decision. So
// anything that doesn't affect the data layout also does not affect
// homogeneity. In particular, zero-length bitfields don't stop a struct
// being homogeneous.
return true;
}
Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
CodeGenFunction &CGF) const {
ABIArgInfo AI = classifyArgumentType(Ty, /*IsVariadic=*/true,
CGF.CurFnInfo->getCallingConvention());
bool IsIndirect = AI.isIndirect();
llvm::Type *BaseTy = CGF.ConvertType(Ty);
if (IsIndirect)
BaseTy = llvm::PointerType::getUnqual(BaseTy);
else if (AI.getCoerceToType())
BaseTy = AI.getCoerceToType();
unsigned NumRegs = 1;
if (llvm::ArrayType *ArrTy = dyn_cast<llvm::ArrayType>(BaseTy)) {
BaseTy = ArrTy->getElementType();
NumRegs = ArrTy->getNumElements();
}
bool IsFPR = BaseTy->isFloatingPointTy() || BaseTy->isVectorTy();
// The AArch64 va_list type and handling is specified in the Procedure Call
// Standard, section B.4:
//
// struct {
// void *__stack;
// void *__gr_top;
// void *__vr_top;
// int __gr_offs;
// int __vr_offs;
// };
llvm::BasicBlock *MaybeRegBlock = CGF.createBasicBlock("vaarg.maybe_reg");
llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg");
llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock("vaarg.on_stack");
llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end");
CharUnits TySize = getContext().getTypeSizeInChars(Ty);
CharUnits TyAlign = getContext().getTypeUnadjustedAlignInChars(Ty);
Address reg_offs_p = Address::invalid();
llvm::Value *reg_offs = nullptr;
int reg_top_index;
int RegSize = IsIndirect ? 8 : TySize.getQuantity();
if (!IsFPR) {
// 3 is the field number of __gr_offs
reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 3, "gr_offs_p");
reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs");
reg_top_index = 1; // field number for __gr_top
RegSize = llvm::alignTo(RegSize, 8);
} else {
// 4 is the field number of __vr_offs.
reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 4, "vr_offs_p");
reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "vr_offs");
reg_top_index = 2; // field number for __vr_top
RegSize = 16 * NumRegs;
}
//=======================================
// Find out where argument was passed
//=======================================
// If reg_offs >= 0 we're already using the stack for this type of
// argument. We don't want to keep updating reg_offs (in case it overflows,
// though anyone passing 2GB of arguments, each at most 16 bytes, deserves
// whatever they get).
llvm::Value *UsingStack = nullptr;
UsingStack = CGF.Builder.CreateICmpSGE(
reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, 0));
CGF.Builder.CreateCondBr(UsingStack, OnStackBlock, MaybeRegBlock);
// Otherwise, at least some kind of argument could go in these registers, the
// question is whether this particular type is too big.
CGF.EmitBlock(MaybeRegBlock);
// Integer arguments may need to correct register alignment (for example a
// "struct { __int128 a; };" gets passed in x_2N, x_{2N+1}). In this case we
// align __gr_offs to calculate the potential address.
if (!IsFPR && !IsIndirect && TyAlign.getQuantity() > 8) {
int Align = TyAlign.getQuantity();
reg_offs = CGF.Builder.CreateAdd(
reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, Align - 1),
"align_regoffs");
reg_offs = CGF.Builder.CreateAnd(
reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, -Align),
"aligned_regoffs");
}
// Update the gr_offs/vr_offs pointer for next call to va_arg on this va_list.
// The fact that this is done unconditionally reflects the fact that
// allocating an argument to the stack also uses up all the remaining
// registers of the appropriate kind.
llvm::Value *NewOffset = nullptr;
NewOffset = CGF.Builder.CreateAdd(
reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, RegSize), "new_reg_offs");
CGF.Builder.CreateStore(NewOffset, reg_offs_p);
// Now we're in a position to decide whether this argument really was in
// registers or not.
llvm::Value *InRegs = nullptr;
InRegs = CGF.Builder.CreateICmpSLE(
NewOffset, llvm::ConstantInt::get(CGF.Int32Ty, 0), "inreg");
CGF.Builder.CreateCondBr(InRegs, InRegBlock, OnStackBlock);
//=======================================
// Argument was in registers
//=======================================
// Now we emit the code for if the argument was originally passed in
// registers. First start the appropriate block:
CGF.EmitBlock(InRegBlock);
llvm::Value *reg_top = nullptr;
Address reg_top_p =
CGF.Builder.CreateStructGEP(VAListAddr, reg_top_index, "reg_top_p");
reg_top = CGF.Builder.CreateLoad(reg_top_p, "reg_top");
Address BaseAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, reg_top, reg_offs),
CGF.Int8Ty, CharUnits::fromQuantity(IsFPR ? 16 : 8));
Address RegAddr = Address::invalid();
llvm::Type *MemTy = CGF.ConvertTypeForMem(Ty), *ElementTy = MemTy;
if (IsIndirect) {
// If it's been passed indirectly (actually a struct), whatever we find from
// stored registers or on the stack will actually be a struct **.
MemTy = llvm::PointerType::getUnqual(MemTy);
}
const Type *Base = nullptr;
uint64_t NumMembers = 0;
bool IsHFA = isHomogeneousAggregate(Ty, Base, NumMembers);
if (IsHFA && NumMembers > 1) {
// Homogeneous aggregates passed in registers will have their elements split
// and stored 16-bytes apart regardless of size (they're notionally in qN,
// qN+1, ...). We reload and store into a temporary local variable
// contiguously.
assert(!IsIndirect && "Homogeneous aggregates should be passed directly");
auto BaseTyInfo = getContext().getTypeInfoInChars(QualType(Base, 0));
llvm::Type *BaseTy = CGF.ConvertType(QualType(Base, 0));
llvm::Type *HFATy = llvm::ArrayType::get(BaseTy, NumMembers);
Address Tmp = CGF.CreateTempAlloca(HFATy,
std::max(TyAlign, BaseTyInfo.Align));
// On big-endian platforms, the value will be right-aligned in its slot.
int Offset = 0;
if (CGF.CGM.getDataLayout().isBigEndian() &&
BaseTyInfo.Width.getQuantity() < 16)
Offset = 16 - BaseTyInfo.Width.getQuantity();
for (unsigned i = 0; i < NumMembers; ++i) {
CharUnits BaseOffset = CharUnits::fromQuantity(16 * i + Offset);
Address LoadAddr =
CGF.Builder.CreateConstInBoundsByteGEP(BaseAddr, BaseOffset);
LoadAddr = CGF.Builder.CreateElementBitCast(LoadAddr, BaseTy);
Address StoreAddr = CGF.Builder.CreateConstArrayGEP(Tmp, i);
llvm::Value *Elem = CGF.Builder.CreateLoad(LoadAddr);
CGF.Builder.CreateStore(Elem, StoreAddr);
}
RegAddr = CGF.Builder.CreateElementBitCast(Tmp, MemTy);
} else {
// Otherwise the object is contiguous in memory.
// It might be right-aligned in its slot.
CharUnits SlotSize = BaseAddr.getAlignment();
if (CGF.CGM.getDataLayout().isBigEndian() && !IsIndirect &&
(IsHFA || !isAggregateTypeForABI(Ty)) &&
TySize < SlotSize) {
CharUnits Offset = SlotSize - TySize;
BaseAddr = CGF.Builder.CreateConstInBoundsByteGEP(BaseAddr, Offset);
}
RegAddr = CGF.Builder.CreateElementBitCast(BaseAddr, MemTy);
}
CGF.EmitBranch(ContBlock);
//=======================================
// Argument was on the stack
//=======================================
CGF.EmitBlock(OnStackBlock);
Address stack_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "stack_p");
llvm::Value *OnStackPtr = CGF.Builder.CreateLoad(stack_p, "stack");
// Again, stack arguments may need realignment. In this case both integer and
// floating-point ones might be affected.
if (!IsIndirect && TyAlign.getQuantity() > 8) {
int Align = TyAlign.getQuantity();
OnStackPtr = CGF.Builder.CreatePtrToInt(OnStackPtr, CGF.Int64Ty);
OnStackPtr = CGF.Builder.CreateAdd(
OnStackPtr, llvm::ConstantInt::get(CGF.Int64Ty, Align - 1),
"align_stack");
OnStackPtr = CGF.Builder.CreateAnd(
OnStackPtr, llvm::ConstantInt::get(CGF.Int64Ty, -Align),
"align_stack");
OnStackPtr = CGF.Builder.CreateIntToPtr(OnStackPtr, CGF.Int8PtrTy);
}
Address OnStackAddr = Address(OnStackPtr, CGF.Int8Ty,
std::max(CharUnits::fromQuantity(8), TyAlign));
// All stack slots are multiples of 8 bytes.
CharUnits StackSlotSize = CharUnits::fromQuantity(8);
CharUnits StackSize;
if (IsIndirect)
StackSize = StackSlotSize;
else
StackSize = TySize.alignTo(StackSlotSize);
llvm::Value *StackSizeC = CGF.Builder.getSize(StackSize);
llvm::Value *NewStack = CGF.Builder.CreateInBoundsGEP(
CGF.Int8Ty, OnStackPtr, StackSizeC, "new_stack");
// Write the new value of __stack for the next call to va_arg
CGF.Builder.CreateStore(NewStack, stack_p);
if (CGF.CGM.getDataLayout().isBigEndian() && !isAggregateTypeForABI(Ty) &&
TySize < StackSlotSize) {
CharUnits Offset = StackSlotSize - TySize;
OnStackAddr = CGF.Builder.CreateConstInBoundsByteGEP(OnStackAddr, Offset);
}
OnStackAddr = CGF.Builder.CreateElementBitCast(OnStackAddr, MemTy);
CGF.EmitBranch(ContBlock);
//=======================================
// Tidy up
//=======================================
CGF.EmitBlock(ContBlock);
Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, OnStackAddr,
OnStackBlock, "vaargs.addr");
if (IsIndirect)
return Address(CGF.Builder.CreateLoad(ResAddr, "vaarg.addr"), ElementTy,
TyAlign);
return ResAddr;
}
Address AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty,
CodeGenFunction &CGF) const {
// The backend's lowering doesn't support va_arg for aggregates or
// illegal vector types. Lower VAArg here for these cases and use
// the LLVM va_arg instruction for everything else.
if (!isAggregateTypeForABI(Ty) && !isIllegalVectorType(Ty))
return EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect());
uint64_t PointerSize = getTarget().getPointerWidth(0) / 8;
CharUnits SlotSize = CharUnits::fromQuantity(PointerSize);
// Empty records are ignored for parameter passing purposes.
if (isEmptyRecord(getContext(), Ty, true)) {
Address Addr = Address(CGF.Builder.CreateLoad(VAListAddr, "ap.cur"),
getVAListElementType(CGF), SlotSize);
Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty));
return Addr;
}
// The size of the actual thing passed, which might end up just
// being a pointer for indirect types.
auto TyInfo = getContext().getTypeInfoInChars(Ty);
// Arguments bigger than 16 bytes which aren't homogeneous
// aggregates should be passed indirectly.
bool IsIndirect = false;
if (TyInfo.Width.getQuantity() > 16) {
const Type *Base = nullptr;
uint64_t Members = 0;
IsIndirect = !isHomogeneousAggregate(Ty, Base, Members);
}
return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
TyInfo, SlotSize, /*AllowHigherAlign*/ true);
}
Address AArch64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const {
bool IsIndirect = false;
// Composites larger than 16 bytes are passed by reference.
if (isAggregateTypeForABI(Ty) && getContext().getTypeSize(Ty) > 128)
IsIndirect = true;
return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
CGF.getContext().getTypeInfoInChars(Ty),
CharUnits::fromQuantity(8),
/*allowHigherAlign*/ false);
}
//===----------------------------------------------------------------------===//
// ARM ABI Implementation
//===----------------------------------------------------------------------===//
namespace {
class ARMABIInfo : public SwiftABIInfo {
public:
enum ABIKind {
APCS = 0,
AAPCS = 1,
AAPCS_VFP = 2,
AAPCS16_VFP = 3,
};
private:
ABIKind Kind;
bool IsFloatABISoftFP;
public:
ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind)
: SwiftABIInfo(CGT), Kind(_Kind) {
setCCs();
IsFloatABISoftFP = CGT.getCodeGenOpts().FloatABI == "softfp" ||
CGT.getCodeGenOpts().FloatABI == ""; // default
}
bool isEABI() const {
switch (getTarget().getTriple().getEnvironment()) {
case llvm::Triple::Android:
case llvm::Triple::EABI:
case llvm::Triple::EABIHF:
case llvm::Triple::GNUEABI:
case llvm::Triple::GNUEABIHF:
case llvm::Triple::MuslEABI:
case llvm::Triple::MuslEABIHF:
return true;
default:
return false;
}
}
bool isEABIHF() const {
switch (getTarget().getTriple().getEnvironment()) {
case llvm::Triple::EABIHF:
case llvm::Triple::GNUEABIHF:
case llvm::Triple::MuslEABIHF:
return true;
default:
return false;
}
}
ABIKind getABIKind() const { return Kind; }
bool allowBFloatArgsAndRet() const override {
return !IsFloatABISoftFP && getTarget().hasBFloat16Type();
}
private:
ABIArgInfo classifyReturnType(QualType RetTy, bool isVariadic,
unsigned functionCallConv) const;
ABIArgInfo classifyArgumentType(QualType RetTy, bool isVariadic,
unsigned functionCallConv) const;
ABIArgInfo classifyHomogeneousAggregate(QualType Ty, const Type *Base,
uint64_t Members) const;
ABIArgInfo coerceIllegalVector(QualType Ty) const;
bool isIllegalVectorType(QualType Ty) const;
bool containsAnyFP16Vectors(QualType Ty) const;
bool isHomogeneousAggregateBaseType(QualType Ty) const override;
bool isHomogeneousAggregateSmallEnough(const Type *Ty,
uint64_t Members) const override;
bool isZeroLengthBitfieldPermittedInHomogeneousAggregate() const override;
bool isEffectivelyAAPCS_VFP(unsigned callConvention, bool acceptHalf) const;
void computeInfo(CGFunctionInfo &FI) const override;
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override;
llvm::CallingConv::ID getLLVMDefaultCC() const;
llvm::CallingConv::ID getABIDefaultCC() const;
void setCCs();
bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
bool asReturnValue) const override {
return occupiesMoreThan(CGT, scalars, /*total*/ 4);
}
bool isSwiftErrorInRegister() const override {
return true;
}
bool isLegalVectorTypeForSwift(CharUnits totalSize, llvm::Type *eltTy,
unsigned elts) const override;
};
class ARMTargetCodeGenInfo : public TargetCodeGenInfo {
public:
ARMTargetCodeGenInfo(CodeGenTypes &CGT, ARMABIInfo::ABIKind K)
: TargetCodeGenInfo(std::make_unique<ARMABIInfo>(CGT, K)) {}
const ARMABIInfo &getABIInfo() const {
return static_cast<const ARMABIInfo&>(TargetCodeGenInfo::getABIInfo());
}
int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
return 13;
}
StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
return "mov\tr7, r7\t\t// marker for objc_retainAutoreleaseReturnValue";
}
bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
llvm::Value *Address) const override {
llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4);
// 0-15 are the 16 integer registers.
AssignToArrayRange(CGF.Builder, Address, Four8, 0, 15);
return false;
}
unsigned getSizeOfUnwindException() const override {
if (getABIInfo().isEABI()) return 88;
return TargetCodeGenInfo::getSizeOfUnwindException();
}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &CGM) const override {
if (GV->isDeclaration())
return;
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD)
return;
auto *Fn = cast<llvm::Function>(GV);
if (const auto *TA = FD->getAttr<TargetAttr>()) {
ParsedTargetAttr Attr = TA->parse();
if (!Attr.BranchProtection.empty()) {
TargetInfo::BranchProtectionInfo BPI;
StringRef DiagMsg;
StringRef Arch = Attr.Architecture.empty()
? CGM.getTarget().getTargetOpts().CPU
: Attr.Architecture;
if (!CGM.getTarget().validateBranchProtection(Attr.BranchProtection,
Arch, BPI, DiagMsg)) {
CGM.getDiags().Report(
D->getLocation(),
diag::warn_target_unsupported_branch_protection_attribute)
<< Arch;
} else {
static const char *SignReturnAddrStr[] = {"none", "non-leaf", "all"};
assert(static_cast<unsigned>(BPI.SignReturnAddr) <= 2 &&
"Unexpected SignReturnAddressScopeKind");
Fn->addFnAttr(
"sign-return-address",
SignReturnAddrStr[static_cast<int>(BPI.SignReturnAddr)]);
Fn->addFnAttr("branch-target-enforcement",
BPI.BranchTargetEnforcement ? "true" : "false");
}
} else if (CGM.getLangOpts().BranchTargetEnforcement ||
CGM.getLangOpts().hasSignReturnAddress()) {
// If the Branch Protection attribute is missing, validate the target
// Architecture attribute against Branch Protection command line
// settings.
if (!CGM.getTarget().isBranchProtectionSupportedArch(Attr.Architecture))
CGM.getDiags().Report(
D->getLocation(),
diag::warn_target_unsupported_branch_protection_attribute)
<< Attr.Architecture;
}
}
const ARMInterruptAttr *Attr = FD->getAttr<ARMInterruptAttr>();
if (!Attr)
return;
const char *Kind;
switch (Attr->getInterrupt()) {
case ARMInterruptAttr::Generic: Kind = ""; break;
case ARMInterruptAttr::IRQ: Kind = "IRQ"; break;
case ARMInterruptAttr::FIQ: Kind = "FIQ"; break;
case ARMInterruptAttr::SWI: Kind = "SWI"; break;
case ARMInterruptAttr::ABORT: Kind = "ABORT"; break;
case ARMInterruptAttr::UNDEF: Kind = "UNDEF"; break;
}
Fn->addFnAttr("interrupt", Kind);
ARMABIInfo::ABIKind ABI = cast<ARMABIInfo>(getABIInfo()).getABIKind();
if (ABI == ARMABIInfo::APCS)
return;
// AAPCS guarantees that sp will be 8-byte aligned on any public interface,
// however this is not necessarily true on taking any interrupt. Instruct
// the backend to perform a realignment as part of the function prologue.
llvm::AttrBuilder B(Fn->getContext());
B.addStackAlignmentAttr(8);
Fn->addFnAttrs(B);
}
};
class WindowsARMTargetCodeGenInfo : public ARMTargetCodeGenInfo {
public:
WindowsARMTargetCodeGenInfo(CodeGenTypes &CGT, ARMABIInfo::ABIKind K)
: ARMTargetCodeGenInfo(CGT, K) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &CGM) const override;
void getDependentLibraryOption(llvm::StringRef Lib,
llvm::SmallString<24> &Opt) const override {
Opt = "/DEFAULTLIB:" + qualifyWindowsLibrary(Lib);
}
void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value,
llvm::SmallString<32> &Opt) const override {
Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
}
};
void WindowsARMTargetCodeGenInfo::setTargetAttributes(
const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const {
ARMTargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
if (GV->isDeclaration())
return;
addStackProbeTargetAttributes(D, GV, CGM);
}
}
void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const {
if (!::classifyReturnType(getCXXABI(), FI, *this))
FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), FI.isVariadic(),
FI.getCallingConvention());
for (auto &I : FI.arguments())
I.info = classifyArgumentType(I.type, FI.isVariadic(),
FI.getCallingConvention());
// Always honor user-specified calling convention.
if (FI.getCallingConvention() != llvm::CallingConv::C)
return;
llvm::CallingConv::ID cc = getRuntimeCC();
if (cc != llvm::CallingConv::C)
FI.setEffectiveCallingConvention(cc);
}
/// Return the default calling convention that LLVM will use.
llvm::CallingConv::ID ARMABIInfo::getLLVMDefaultCC() const {
// The default calling convention that LLVM will infer.
if (isEABIHF() || getTarget().getTriple().isWatchABI())
return llvm::CallingConv::ARM_AAPCS_VFP;
else if (isEABI())
return llvm::CallingConv::ARM_AAPCS;
else
return llvm::CallingConv::ARM_APCS;
}
/// Return the calling convention that our ABI would like us to use
/// as the C calling convention.
llvm::CallingConv::ID ARMABIInfo::getABIDefaultCC() const {
switch (getABIKind()) {
case APCS: return llvm::CallingConv::ARM_APCS;
case AAPCS: return llvm::CallingConv::ARM_AAPCS;
case AAPCS_VFP: return llvm::CallingConv::ARM_AAPCS_VFP;
case AAPCS16_VFP: return llvm::CallingConv::ARM_AAPCS_VFP;
}
llvm_unreachable("bad ABI kind");
}
void ARMABIInfo::setCCs() {
assert(getRuntimeCC() == llvm::CallingConv::C);
// Don't muddy up the IR with a ton of explicit annotations if
// they'd just match what LLVM will infer from the triple.
llvm::CallingConv::ID abiCC = getABIDefaultCC();
if (abiCC != getLLVMDefaultCC())
RuntimeCC = abiCC;
}
ABIArgInfo ARMABIInfo::coerceIllegalVector(QualType Ty) const {
uint64_t Size = getContext().getTypeSize(Ty);
if (Size <= 32) {
llvm::Type *ResType =
llvm::Type::getInt32Ty(getVMContext());
return ABIArgInfo::getDirect(ResType);
}
if (Size == 64 || Size == 128) {
auto *ResType = llvm::FixedVectorType::get(
llvm::Type::getInt32Ty(getVMContext()), Size / 32);
return ABIArgInfo::getDirect(ResType);
}
return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
}
ABIArgInfo ARMABIInfo::classifyHomogeneousAggregate(QualType Ty,
const Type *Base,
uint64_t Members) const {
assert(Base && "Base class should be set for homogeneous aggregate");
// Base can be a floating-point or a vector.
if (const VectorType *VT = Base->getAs<VectorType>()) {
// FP16 vectors should be converted to integer vectors
if (!getTarget().hasLegalHalfType() && containsAnyFP16Vectors(Ty)) {
uint64_t Size = getContext().getTypeSize(VT);
auto *NewVecTy = llvm::FixedVectorType::get(
llvm::Type::getInt32Ty(getVMContext()), Size / 32);
llvm::Type *Ty = llvm::ArrayType::get(NewVecTy, Members);
return ABIArgInfo::getDirect(Ty, 0, nullptr, false);
}
}
unsigned Align = 0;
if (getABIKind() == ARMABIInfo::AAPCS ||
getABIKind() == ARMABIInfo::AAPCS_VFP) {
// For alignment adjusted HFAs, cap the argument alignment to 8, leave it
// default otherwise.
Align = getContext().getTypeUnadjustedAlignInChars(Ty).getQuantity();
unsigned BaseAlign = getContext().getTypeAlignInChars(Base).getQuantity();
Align = (Align > BaseAlign && Align >= 8) ? 8 : 0;
}
return ABIArgInfo::getDirect(nullptr, 0, nullptr, false, Align);
}
ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
unsigned functionCallConv) const {
// 6.1.2.1 The following argument types are VFP CPRCs:
// A single-precision floating-point type (including promoted
// half-precision types); A double-precision floating-point type;
// A 64-bit or 128-bit containerized vector type; Homogeneous Aggregate
// with a Base Type of a single- or double-precision floating-point type,
// 64-bit containerized vectors or 128-bit containerized vectors with one
// to four Elements.
// Variadic functions should always marshal to the base standard.
bool IsAAPCS_VFP =
!isVariadic && isEffectivelyAAPCS_VFP(functionCallConv, /* AAPCS16 */ false);
Ty = useFirstFieldIfTransparentUnion(Ty);
// Handle illegal vector types here.
if (isIllegalVectorType(Ty))
return coerceIllegalVector(Ty);
if (!isAggregateTypeForABI(Ty)) {
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = Ty->getAs<EnumType>()) {
Ty = EnumTy->getDecl()->getIntegerType();
}
if (const auto *EIT = Ty->getAs<BitIntType>())
if (EIT->getNumBits() > 64)
return getNaturalAlignIndirect(Ty, /*ByVal=*/true);
return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
: ABIArgInfo::getDirect());
}
if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
}
// Ignore empty records.
if (isEmptyRecord(getContext(), Ty, true))
return ABIArgInfo::getIgnore();
if (IsAAPCS_VFP) {
// Homogeneous Aggregates need to be expanded when we can fit the aggregate
// into VFP registers.
const Type *Base = nullptr;
uint64_t Members = 0;
if (isHomogeneousAggregate(Ty, Base, Members))
return classifyHomogeneousAggregate(Ty, Base, Members);
} else if (getABIKind() == ARMABIInfo::AAPCS16_VFP) {
// WatchOS does have homogeneous aggregates. Note that we intentionally use
// this convention even for a variadic function: the backend will use GPRs
// if needed.
const Type *Base = nullptr;
uint64_t Members = 0;
if (isHomogeneousAggregate(Ty, Base, Members)) {
assert(Base && Members <= 4 && "unexpected homogeneous aggregate");
llvm::Type *Ty =
llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members);
return ABIArgInfo::getDirect(Ty, 0, nullptr, false);
}
}
if (getABIKind() == ARMABIInfo::AAPCS16_VFP &&
getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(16)) {
// WatchOS is adopting the 64-bit AAPCS rule on composite types: if they're
// bigger than 128-bits, they get placed in space allocated by the caller,
// and a pointer is passed.
return ABIArgInfo::getIndirect(
CharUnits::fromQuantity(getContext().getTypeAlign(Ty) / 8), false);
}
// Support byval for ARM.
// The ABI alignment for APCS is 4-byte and for AAPCS at least 4-byte and at
// most 8-byte. We realign the indirect argument if type alignment is bigger
// than ABI alignment.
uint64_t ABIAlign = 4;
uint64_t TyAlign;
if (getABIKind() == ARMABIInfo::AAPCS_VFP ||
getABIKind() == ARMABIInfo::AAPCS) {
TyAlign = getContext().getTypeUnadjustedAlignInChars(Ty).getQuantity();
ABIAlign = std::min(std::max(TyAlign, (uint64_t)4), (uint64_t)8);
} else {
TyAlign = getContext().getTypeAlignInChars(Ty).getQuantity();
}
if (getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(64)) {
assert(getABIKind() != ARMABIInfo::AAPCS16_VFP && "unexpected byval");
return ABIArgInfo::getIndirect(CharUnits::fromQuantity(ABIAlign),
/*ByVal=*/true,
/*Realign=*/TyAlign > ABIAlign);
}
// On RenderScript, coerce Aggregates <= 64 bytes to an integer array of
// same size and alignment.
if (getTarget().isRenderScriptTarget()) {
return coerceToIntArray(Ty, getContext(), getVMContext());
}
// Otherwise, pass by coercing to a structure of the appropriate size.
llvm::Type* ElemTy;
unsigned SizeRegs;
// FIXME: Try to match the types of the arguments more accurately where
// we can.
if (TyAlign <= 4) {
ElemTy = llvm::Type::getInt32Ty(getVMContext());
SizeRegs = (getContext().getTypeSize(Ty) + 31) / 32;
} else {
ElemTy = llvm::Type::getInt64Ty(getVMContext());
SizeRegs = (getContext().getTypeSize(Ty) + 63) / 64;
}
return ABIArgInfo::getDirect(llvm::ArrayType::get(ElemTy, SizeRegs));
}
static bool isIntegerLikeType(QualType Ty, ASTContext &Context,
llvm::LLVMContext &VMContext) {
// APCS, C Language Calling Conventions, Non-Simple Return Values: A structure
// is called integer-like if its size is less than or equal to one word, and
// the offset of each of its addressable sub-fields is zero.
uint64_t Size = Context.getTypeSize(Ty);
// Check that the type fits in a word.
if (Size > 32)
return false;
// FIXME: Handle vector types!
if (Ty->isVectorType())
return false;
// Float types are never treated as "integer like".
if (Ty->isRealFloatingType())
return false;
// If this is a builtin or pointer type then it is ok.
if (Ty->getAs<BuiltinType>() || Ty->isPointerType())
return true;
// Small complex integer types are "integer like".
if (const ComplexType *CT = Ty->getAs<ComplexType>())
return isIntegerLikeType(CT->getElementType(), Context, VMContext);
// Single element and zero sized arrays should be allowed, by the definition
// above, but they are not.
// Otherwise, it must be a record type.
const RecordType *RT = Ty->getAs<RecordType>();
if (!RT) return false;
// Ignore records with flexible arrays.
const RecordDecl *RD = RT->getDecl();
if (RD->hasFlexibleArrayMember())
return false;
// Check that all sub-fields are at offset 0, and are themselves "integer
// like".
const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD);
bool HadField = false;
unsigned idx = 0;
for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
i != e; ++i, ++idx) {
const FieldDecl *FD = *i;
// Bit-fields are not addressable, we only need to verify they are "integer
// like". We still have to disallow a subsequent non-bitfield, for example:
// struct { int : 0; int x }
// is non-integer like according to gcc.
if (FD->isBitField()) {
if (!RD->isUnion())
HadField = true;
if (!isIntegerLikeType(FD->getType(), Context, VMContext))
return false;
continue;
}
// Check if this field is at offset 0.
if (Layout.getFieldOffset(idx) != 0)
return false;
if (!isIntegerLikeType(FD->getType(), Context, VMContext))
return false;
// Only allow at most one field in a structure. This doesn't match the
// wording above, but follows gcc in situations with a field following an
// empty structure.
if (!RD->isUnion()) {
if (HadField)
return false;
HadField = true;
}
}
return true;
}
ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, bool isVariadic,
unsigned functionCallConv) const {
// Variadic functions should always marshal to the base standard.
bool IsAAPCS_VFP =
!isVariadic && isEffectivelyAAPCS_VFP(functionCallConv, /* AAPCS16 */ true);
if (RetTy->isVoidType())
return ABIArgInfo::getIgnore();
if (const VectorType *VT = RetTy->getAs<VectorType>()) {
// Large vector types should be returned via memory.
if (getContext().getTypeSize(RetTy) > 128)
return getNaturalAlignIndirect(RetTy);
// TODO: FP16/BF16 vectors should be converted to integer vectors
// This check is similar to isIllegalVectorType - refactor?
if ((!getTarget().hasLegalHalfType() &&
(VT->getElementType()->isFloat16Type() ||
VT->getElementType()->isHalfType())) ||
(IsFloatABISoftFP &&
VT->getElementType()->isBFloat16Type()))
return coerceIllegalVector(RetTy);
}
if (!isAggregateTypeForABI(RetTy)) {
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
RetTy = EnumTy->getDecl()->getIntegerType();
if (const auto *EIT = RetTy->getAs<BitIntType>())
if (EIT->getNumBits() > 64)
return getNaturalAlignIndirect(RetTy, /*ByVal=*/false);
return isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
: ABIArgInfo::getDirect();
}
// Are we following APCS?
if (getABIKind() == APCS) {
if (isEmptyRecord(getContext(), RetTy, false))
return ABIArgInfo::getIgnore();
// Complex types are all returned as packed integers.
//
// FIXME: Consider using 2 x vector types if the back end handles them
// correctly.
if (RetTy->isAnyComplexType())
return ABIArgInfo::getDirect(llvm::IntegerType::get(
getVMContext(), getContext().getTypeSize(RetTy)));
// Integer like structures are returned in r0.
if (isIntegerLikeType(RetTy, getContext(), getVMContext())) {
// Return in the smallest viable integer type.
uint64_t Size = getContext().getTypeSize(RetTy);
if (Size <= 8)
return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext()));
if (Size <= 16)
return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
}
// Otherwise return in memory.
return getNaturalAlignIndirect(RetTy);
}
// Otherwise this is an AAPCS variant.
if (isEmptyRecord(getContext(), RetTy, true))
return ABIArgInfo::getIgnore();
// Check for homogeneous aggregates with AAPCS-VFP.
if (IsAAPCS_VFP) {
const Type *Base = nullptr;
uint64_t Members = 0;
if (isHomogeneousAggregate(RetTy, Base, Members))
return classifyHomogeneousAggregate(RetTy, Base, Members);
}
// Aggregates <= 4 bytes are returned in r0; other aggregates
// are returned indirectly.
uint64_t Size = getContext().getTypeSize(RetTy);
if (Size <= 32) {
// On RenderScript, coerce Aggregates <= 4 bytes to an integer array of
// same size and alignment.
if (getTarget().isRenderScriptTarget()) {
return coerceToIntArray(RetTy, getContext(), getVMContext());
}
if (getDataLayout().isBigEndian())
// Return in 32 bit integer integer type (as if loaded by LDR, AAPCS 5.4)
return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
// Return in the smallest viable integer type.
if (Size <= 8)
return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext()));
if (Size <= 16)
return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
} else if (Size <= 128 && getABIKind() == AAPCS16_VFP) {
llvm::Type *Int32Ty = llvm::Type::getInt32Ty(getVMContext());
llvm::Type *CoerceTy =
llvm::ArrayType::get(Int32Ty, llvm::alignTo(Size, 32) / 32);
return ABIArgInfo::getDirect(CoerceTy);
}
return getNaturalAlignIndirect(RetTy);
}
/// isIllegalVector - check whether Ty is an illegal vector type.
bool ARMABIInfo::isIllegalVectorType(QualType Ty) const {
if (const VectorType *VT = Ty->getAs<VectorType> ()) {
// On targets that don't support half, fp16 or bfloat, they are expanded
// into float, and we don't want the ABI to depend on whether or not they
// are supported in hardware. Thus return false to coerce vectors of these
// types into integer vectors.
// We do not depend on hasLegalHalfType for bfloat as it is a
// separate IR type.
if ((!getTarget().hasLegalHalfType() &&
(VT->getElementType()->isFloat16Type() ||
VT->getElementType()->isHalfType())) ||
(IsFloatABISoftFP &&
VT->getElementType()->isBFloat16Type()))
return true;
if (isAndroid()) {
// Android shipped using Clang 3.1, which supported a slightly different
// vector ABI. The primary differences were that 3-element vector types
// were legal, and so were sub 32-bit vectors (i.e. <2 x i8>). This path
// accepts that legacy behavior for Android only.
// Check whether VT is legal.
unsigned NumElements = VT->getNumElements();
// NumElements should be power of 2 or equal to 3.
if (!llvm::isPowerOf2_32(NumElements) && NumElements != 3)
return true;
} else {
// Check whether VT is legal.
unsigned NumElements = VT->getNumElements();
uint64_t Size = getContext().getTypeSize(VT);
// NumElements should be power of 2.
if (!llvm::isPowerOf2_32(NumElements))
return true;
// Size should be greater than 32 bits.
return Size <= 32;
}
}
return false;
}
/// Return true if a type contains any 16-bit floating point vectors
bool ARMABIInfo::containsAnyFP16Vectors(QualType Ty) const {
if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) {
uint64_t NElements = AT->getSize().getZExtValue();
if (NElements == 0)
return false;
return containsAnyFP16Vectors(AT->getElementType());
} else if (const RecordType *RT = Ty->getAs<RecordType>()) {
const RecordDecl *RD = RT->getDecl();
// If this is a C++ record, check the bases first.
if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
if (llvm::any_of(CXXRD->bases(), [this](const CXXBaseSpecifier &B) {
return containsAnyFP16Vectors(B.getType());
}))
return true;
if (llvm::any_of(RD->fields(), [this](FieldDecl *FD) {
return FD && containsAnyFP16Vectors(FD->getType());
}))
return true;
return false;
} else {
if (const VectorType *VT = Ty->getAs<VectorType>())
return (VT->getElementType()->isFloat16Type() ||
VT->getElementType()->isBFloat16Type() ||
VT->getElementType()->isHalfType());
return false;
}
}
bool ARMABIInfo::isLegalVectorTypeForSwift(CharUnits vectorSize,
llvm::Type *eltTy,
unsigned numElts) const {
if (!llvm::isPowerOf2_32(numElts))
return false;
unsigned size = getDataLayout().getTypeStoreSizeInBits(eltTy);
if (size > 64)
return false;
if (vectorSize.getQuantity() != 8 &&
(vectorSize.getQuantity() != 16 || numElts == 1))
return false;
return true;
}
bool ARMABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
// Homogeneous aggregates for AAPCS-VFP must have base types of float,
// double, or 64-bit or 128-bit vectors.
if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
if (BT->getKind() == BuiltinType::Float ||
BT->getKind() == BuiltinType::Double ||
BT->getKind() == BuiltinType::LongDouble)
return true;
} else if (const VectorType *VT = Ty->getAs<VectorType>()) {
unsigned VecSize = getContext().getTypeSize(VT);
if (VecSize == 64 || VecSize == 128)
return true;
}
return false;
}
bool ARMABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base,
uint64_t Members) const {
return Members <= 4;
}
bool ARMABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate() const {
// AAPCS32 says that the rule for whether something is a homogeneous
// aggregate is applied to the output of the data layout decision. So
// anything that doesn't affect the data layout also does not affect
// homogeneity. In particular, zero-length bitfields don't stop a struct
// being homogeneous.
return true;
}
bool ARMABIInfo::isEffectivelyAAPCS_VFP(unsigned callConvention,
bool acceptHalf) const {
// Give precedence to user-specified calling conventions.
if (callConvention != llvm::CallingConv::C)
return (callConvention == llvm::CallingConv::ARM_AAPCS_VFP);
else
return (getABIKind() == AAPCS_VFP) ||
(acceptHalf && (getABIKind() == AAPCS16_VFP));
}
Address ARMABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const {
CharUnits SlotSize = CharUnits::fromQuantity(4);
// Empty records are ignored for parameter passing purposes.
if (isEmptyRecord(getContext(), Ty, true)) {
- Address Addr = Address(CGF.Builder.CreateLoad(VAListAddr),
- getVAListElementType(CGF), SlotSize);
- Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty));
- return Addr;
+ VAListAddr = CGF.Builder.CreateElementBitCast(VAListAddr, CGF.Int8PtrTy);
+ auto *Load = CGF.Builder.CreateLoad(VAListAddr);
+ Address Addr = Address(Load, CGF.Int8Ty, SlotSize);
+ return CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty));
}
CharUnits TySize = getContext().getTypeSizeInChars(Ty);
CharUnits TyAlignForABI = getContext().getTypeUnadjustedAlignInChars(Ty);
// Use indirect if size of the illegal vector is bigger than 16 bytes.
bool IsIndirect = false;
const Type *Base = nullptr;
uint64_t Members = 0;
if (TySize > CharUnits::fromQuantity(16) && isIllegalVectorType(Ty)) {
IsIndirect = true;
// ARMv7k passes structs bigger than 16 bytes indirectly, in space
// allocated by the caller.
} else if (TySize > CharUnits::fromQuantity(16) &&
getABIKind() == ARMABIInfo::AAPCS16_VFP &&
!isHomogeneousAggregate(Ty, Base, Members)) {
IsIndirect = true;
// Otherwise, bound the type's ABI alignment.
// The ABI alignment for 64-bit or 128-bit vectors is 8 for AAPCS and 4 for
// APCS. For AAPCS, the ABI alignment is at least 4-byte and at most 8-byte.
// Our callers should be prepared to handle an under-aligned address.
} else if (getABIKind() == ARMABIInfo::AAPCS_VFP ||
getABIKind() == ARMABIInfo::AAPCS) {
TyAlignForABI = std::max(TyAlignForABI, CharUnits::fromQuantity(4));
TyAlignForABI = std::min(TyAlignForABI, CharUnits::fromQuantity(8));
} else if (getABIKind() == ARMABIInfo::AAPCS16_VFP) {
// ARMv7k allows type alignment up to 16 bytes.
TyAlignForABI = std::max(TyAlignForABI, CharUnits::fromQuantity(4));
TyAlignForABI = std::min(TyAlignForABI, CharUnits::fromQuantity(16));
} else {
TyAlignForABI = CharUnits::fromQuantity(4);
}
TypeInfoChars TyInfo(TySize, TyAlignForABI, AlignRequirementKind::None);
return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TyInfo,
SlotSize, /*AllowHigherAlign*/ true);
}
//===----------------------------------------------------------------------===//
// NVPTX ABI Implementation
//===----------------------------------------------------------------------===//
namespace {
class NVPTXTargetCodeGenInfo;
class NVPTXABIInfo : public ABIInfo {
NVPTXTargetCodeGenInfo &CGInfo;
public:
NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info)
: ABIInfo(CGT), CGInfo(Info) {}
ABIArgInfo classifyReturnType(QualType RetTy) const;
ABIArgInfo classifyArgumentType(QualType Ty) const;
void computeInfo(CGFunctionInfo &FI) const override;
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override;
bool isUnsupportedType(QualType T) const;
ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const;
};
class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
public:
NVPTXTargetCodeGenInfo(CodeGenTypes &CGT)
: TargetCodeGenInfo(std::make_unique<NVPTXABIInfo>(CGT, *this)) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &M) const override;
bool shouldEmitStaticExternCAliases() const override;
llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override {
// On the device side, surface reference is represented as an object handle
// in 64-bit integer.
return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
}
llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override {
// On the device side, texture reference is represented as an object handle
// in 64-bit integer.
return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
}
bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst,
LValue Src) const override {
emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
return true;
}
bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst,
LValue Src) const override {
emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
return true;
}
private:
// Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
// resulting MDNode to the nvvm.annotations MDNode.
static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
int Operand);
static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
LValue Src) {
llvm::Value *Handle = nullptr;
llvm::Constant *C =
llvm::dyn_cast<llvm::Constant>(Src.getAddress(CGF).getPointer());
// Lookup `addrspacecast` through the constant pointer if any.
if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(C))
C = llvm::cast<llvm::Constant>(ASC->getPointerOperand());
if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(C)) {
// Load the handle from the specific global variable using
// `nvvm.texsurf.handle.internal` intrinsic.
Handle = CGF.EmitRuntimeCall(
CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal,
{GV->getType()}),
{GV}, "texsurf_handle");
} else
Handle = CGF.EmitLoadOfScalar(Src, SourceLocation());
CGF.EmitStoreOfScalar(Handle, Dst);
}
};
/// Checks if the type is unsupported directly by the current target.
bool NVPTXABIInfo::isUnsupportedType(QualType T) const {
ASTContext &Context = getContext();
if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type())
return true;
if (!Context.getTargetInfo().hasFloat128Type() &&
(T->isFloat128Type() ||
(T->isRealFloatingType() && Context.getTypeSize(T) == 128)))
return true;
if (const auto *EIT = T->getAs<BitIntType>())
return EIT->getNumBits() >
(Context.getTargetInfo().hasInt128Type() ? 128U : 64U);
if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() &&
Context.getTypeSize(T) > 64U)
return true;
if (const auto *AT = T->getAsArrayTypeUnsafe())
return isUnsupportedType(AT->getElementType());
const auto *RT = T->getAs<RecordType>();
if (!RT)
return false;
const RecordDecl *RD = RT->getDecl();
// If this is a C++ record, check the bases first.
if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
for (const CXXBaseSpecifier &I : CXXRD->bases())
if (isUnsupportedType(I.getType()))
return true;
for (const FieldDecl *I : RD->fields())
if (isUnsupportedType(I->getType()))
return true;
return false;
}
/// Coerce the given type into an array with maximum allowed size of elements.
ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty,
unsigned MaxSize) const {
// Alignment and Size are measured in bits.
const uint64_t Size = getContext().getTypeSize(Ty);
const uint64_t Alignment = getContext().getTypeAlign(Ty);
const unsigned Div = std::min<unsigned>(MaxSize, Alignment);
llvm::Type *IntType = llvm::Type::getIntNTy(getVMContext(), Div);
const uint64_t NumElements = (Size + Div - 1) / Div;
return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements));
}
ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const {
if (RetTy->isVoidType())
return ABIArgInfo::getIgnore();
if (getContext().getLangOpts().OpenMP &&
getContext().getLangOpts().OpenMPIsDevice && isUnsupportedType(RetTy))
return coerceToIntArrayWithLimit(RetTy, 64);
// note: this is different from default ABI
if (!RetTy->isScalarType())
return ABIArgInfo::getDirect();
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
RetTy = EnumTy->getDecl()->getIntegerType();
return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
: ABIArgInfo::getDirect());
}
ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const {
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
Ty = EnumTy->getDecl()->getIntegerType();
// Return aggregates type as indirect by value
if (isAggregateTypeForABI(Ty)) {
// Under CUDA device compilation, tex/surf builtin types are replaced with
// object types and passed directly.
if (getContext().getLangOpts().CUDAIsDevice) {
if (Ty->isCUDADeviceBuiltinSurfaceType())
return ABIArgInfo::getDirect(
CGInfo.getCUDADeviceBuiltinSurfaceDeviceType());
if (Ty->isCUDADeviceBuiltinTextureType())
return ABIArgInfo::getDirect(
CGInfo.getCUDADeviceBuiltinTextureDeviceType());
}
return getNaturalAlignIndirect(Ty, /* byval */ true);
}
if (const auto *EIT = Ty->getAs<BitIntType>()) {
if ((EIT->getNumBits() > 128) ||
(!getContext().getTargetInfo().hasInt128Type() &&
EIT->getNumBits() > 64))
return getNaturalAlignIndirect(Ty, /* byval */ true);
}
return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
: ABIArgInfo::getDirect());
}
void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
if (!getCXXABI().classifyReturnType(FI))
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
for (auto &I : FI.arguments())
I.info = classifyArgumentType(I.type);
// Always honor user-specified calling convention.
if (FI.getCallingConvention() != llvm::CallingConv::C)
return;
FI.setEffectiveCallingConvention(getRuntimeCC());
}
Address NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const {
llvm_unreachable("NVPTX does not support varargs");
}
void NVPTXTargetCodeGenInfo::setTargetAttributes(
const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
if (GV->isDeclaration())
return;
const VarDecl *VD = dyn_cast_or_null<VarDecl>(D);
if (VD) {
if (M.getLangOpts().CUDA) {
if (VD->getType()->isCUDADeviceBuiltinSurfaceType())
addNVVMMetadata(GV, "surface", 1);
else if (VD->getType()->isCUDADeviceBuiltinTextureType())
addNVVMMetadata(GV, "texture", 1);
return;
}
}
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD) return;
llvm::Function *F = cast<llvm::Function>(GV);
// Perform special handling in OpenCL mode
if (M.getLangOpts().OpenCL) {
// Use OpenCL function attributes to check for kernel functions
// By default, all functions are device functions
if (FD->hasAttr<OpenCLKernelAttr>()) {
// OpenCL __kernel functions get kernel metadata
// Create !{<func-ref>, metadata !"kernel", i32 1} node
addNVVMMetadata(F, "kernel", 1);
// And kernel functions are not subject to inlining
F->addFnAttr(llvm::Attribute::NoInline);
}
}
// Perform special handling in CUDA mode.
if (M.getLangOpts().CUDA) {
// CUDA __global__ functions get a kernel metadata entry. Since
// __global__ functions cannot be called from the device, we do not
// need to set the noinline attribute.
if (FD->hasAttr<CUDAGlobalAttr>()) {
// Create !{<func-ref>, metadata !"kernel", i32 1} node
addNVVMMetadata(F, "kernel", 1);
}
if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>()) {
// Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
llvm::APSInt MaxThreads(32);
MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(M.getContext());
if (MaxThreads > 0)
addNVVMMetadata(F, "maxntidx", MaxThreads.getExtValue());
// min blocks is an optional argument for CUDALaunchBoundsAttr. If it was
// not specified in __launch_bounds__ or if the user specified a 0 value,
// we don't have to add a PTX directive.
if (Attr->getMinBlocks()) {
llvm::APSInt MinBlocks(32);
MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(M.getContext());
if (MinBlocks > 0)
// Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
addNVVMMetadata(F, "minctasm", MinBlocks.getExtValue());
}
}
}
}
void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
StringRef Name, int Operand) {
llvm::Module *M = GV->getParent();
llvm::LLVMContext &Ctx = M->getContext();
// Get "nvvm.annotations" metadata node
llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
llvm::Metadata *MDVals[] = {
llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
// Append metadata to nvvm.annotations
MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
}
bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
return false;
}
}
//===----------------------------------------------------------------------===//
// SystemZ ABI Implementation
//===----------------------------------------------------------------------===//
namespace {
class SystemZABIInfo : public SwiftABIInfo {
bool HasVector;
bool IsSoftFloatABI;
public:
SystemZABIInfo(CodeGenTypes &CGT, bool HV, bool SF)
: SwiftABIInfo(CGT), HasVector(HV), IsSoftFloatABI(SF) {}
bool isPromotableIntegerTypeForABI(QualType Ty) const;
bool isCompoundType(QualType Ty) const;
bool isVectorArgumentType(QualType Ty) const;
bool isFPArgumentType(QualType Ty) const;
QualType GetSingleElementType(QualType Ty) const;
ABIArgInfo classifyReturnType(QualType RetTy) const;
ABIArgInfo classifyArgumentType(QualType ArgTy) const;
void computeInfo(CGFunctionInfo &FI) const override {
if (!getCXXABI().classifyReturnType(FI))
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
for (auto &I : FI.arguments())
I.info = classifyArgumentType(I.type);
}
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override;
bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
bool asReturnValue) const override {
return occupiesMoreThan(CGT, scalars, /*total*/ 4);
}
bool isSwiftErrorInRegister() const override {
return false;
}
};
class SystemZTargetCodeGenInfo : public TargetCodeGenInfo {
public:
SystemZTargetCodeGenInfo(CodeGenTypes &CGT, bool HasVector, bool SoftFloatABI)
: TargetCodeGenInfo(
std::make_unique<SystemZABIInfo>(CGT, HasVector, SoftFloatABI)) {}
llvm::Value *testFPKind(llvm::Value *V, unsigned BuiltinID,
CGBuilderTy &Builder,
CodeGenModule &CGM) const override {
assert(V->getType()->isFloatingPointTy() && "V should have an FP type.");
// Only use TDC in constrained FP mode.
if (!Builder.getIsFPConstrained())
return nullptr;
llvm::Type *Ty = V->getType();
if (Ty->isFloatTy() || Ty->isDoubleTy() || Ty->isFP128Ty()) {
llvm::Module &M = CGM.getModule();
auto &Ctx = M.getContext();
llvm::Function *TDCFunc =
llvm::Intrinsic::getDeclaration(&M, llvm::Intrinsic::s390_tdc, Ty);
unsigned TDCBits = 0;
switch (BuiltinID) {
case Builtin::BI__builtin_isnan:
TDCBits = 0xf;
break;
case Builtin::BIfinite:
case Builtin::BI__finite:
case Builtin::BIfinitef:
case Builtin::BI__finitef:
case Builtin::BIfinitel:
case Builtin::BI__finitel:
case Builtin::BI__builtin_isfinite:
TDCBits = 0xfc0;
break;
case Builtin::BI__builtin_isinf:
TDCBits = 0x30;
break;
default:
break;
}
if (TDCBits)
return Builder.CreateCall(
TDCFunc,
{V, llvm::ConstantInt::get(llvm::Type::getInt64Ty(Ctx), TDCBits)});
}
return nullptr;
}
};
}
bool SystemZABIInfo::isPromotableIntegerTypeForABI(QualType Ty) const {
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
Ty = EnumTy->getDecl()->getIntegerType();
// Promotable integer types are required to be promoted by the ABI.
if (ABIInfo::isPromotableIntegerTypeForABI(Ty))
return true;
if (const auto *EIT = Ty->getAs<BitIntType>())
if (EIT->getNumBits() < 64)
return true;
// 32-bit values must also be promoted.
if (const BuiltinType *BT = Ty->getAs<BuiltinType>())
switch (BT->getKind()) {
case BuiltinType::Int:
case BuiltinType::UInt:
return true;
default:
return false;
}
return false;
}
bool SystemZABIInfo::isCompoundType(QualType Ty) const {
return (Ty->isAnyComplexType() ||
Ty->isVectorType() ||
isAggregateTypeForABI(Ty));
}
bool SystemZABIInfo::isVectorArgumentType(QualType Ty) const {
return (HasVector &&
Ty->isVectorType() &&
getContext().getTypeSize(Ty) <= 128);
}
bool SystemZABIInfo::isFPArgumentType(QualType Ty) const {
if (IsSoftFloatABI)
return false;
if (const BuiltinType *BT = Ty->getAs<BuiltinType>())
switch (BT->getKind()) {
case BuiltinType::Float:
case BuiltinType::Double:
return true;
default:
return false;
}
return false;
}
QualType SystemZABIInfo::GetSingleElementType(QualType Ty) const {
const RecordType *RT = Ty->getAs<RecordType>();
if (RT && RT->isStructureOrClassType()) {
const RecordDecl *RD = RT->getDecl();
QualType Found;
// If this is a C++ record, check the bases first.
if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
for (const auto &I : CXXRD->bases()) {
QualType Base = I.getType();
// Empty bases don't affect things either way.
if (isEmptyRecord(getContext(), Base, true))
continue;
if (!Found.isNull())
return Ty;
Found = GetSingleElementType(Base);
}
// Check the fields.
for (const auto *FD : RD->fields()) {
// Unlike isSingleElementStruct(), empty structure and array fields
// do count. So do anonymous bitfields that aren't zero-sized.
// Like isSingleElementStruct(), ignore C++20 empty data members.
if (FD->hasAttr<NoUniqueAddressAttr>() &&
isEmptyRecord(getContext(), FD->getType(), true))
continue;
// Unlike isSingleElementStruct(), arrays do not count.
// Nested structures still do though.
if (!Found.isNull())
return Ty;
Found = GetSingleElementType(FD->getType());
}
// Unlike isSingleElementStruct(), trailing padding is allowed.
// An 8-byte aligned struct s { float f; } is passed as a double.
if (!Found.isNull())
return Found;
}
return Ty;
}
Address SystemZABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const {
// Assume that va_list type is correct; should be pointer to LLVM type:
// struct {
// i64 __gpr;
// i64 __fpr;
// i8 *__overflow_arg_area;
// i8 *__reg_save_area;
// };
// Every non-vector argument occupies 8 bytes and is passed by preference
// in either GPRs or FPRs. Vector arguments occupy 8 or 16 bytes and are
// always passed on the stack.
Ty = getContext().getCanonicalType(Ty);
auto TyInfo = getContext().getTypeInfoInChars(Ty);
llvm::Type *ArgTy = CGF.ConvertTypeForMem(Ty);
llvm::Type *DirectTy = ArgTy;
ABIArgInfo AI = classifyArgumentType(Ty);
bool IsIndirect = AI.isIndirect();
bool InFPRs = false;
bool IsVector = false;
CharUnits UnpaddedSize;
CharUnits DirectAlign;
if (IsIndirect) {
DirectTy = llvm::PointerType::getUnqual(DirectTy);
UnpaddedSize = DirectAlign = CharUnits::fromQuantity(8);
} else {
if (AI.getCoerceToType())
ArgTy = AI.getCoerceToType();
InFPRs = (!IsSoftFloatABI && (ArgTy->isFloatTy() || ArgTy->isDoubleTy()));
IsVector = ArgTy->isVectorTy();
UnpaddedSize = TyInfo.Width;
DirectAlign = TyInfo.Align;
}
CharUnits PaddedSize = CharUnits::fromQuantity(8);
if (IsVector && UnpaddedSize > PaddedSize)
PaddedSize = CharUnits::fromQuantity(16);
assert((UnpaddedSize <= PaddedSize) && "Invalid argument size.");
CharUnits Padding = (PaddedSize - UnpaddedSize);
llvm::Type *IndexTy = CGF.Int64Ty;
llvm::Value *PaddedSizeV =
llvm::ConstantInt::get(IndexTy, PaddedSize.getQuantity());
if (IsVector) {
// Work out the address of a vector argument on the stack.
// Vector arguments are always passed in the high bits of a
// single (8 byte) or double (16 byte) stack slot.
Address OverflowArgAreaPtr =
CGF.Builder.CreateStructGEP(VAListAddr, 2, "overflow_arg_area_ptr");
Address OverflowArgArea =
Address(CGF.Builder.CreateLoad(OverflowArgAreaPtr, "overflow_arg_area"),
CGF.Int8Ty, TyInfo.Align);
Address MemAddr =
CGF.Builder.CreateElementBitCast(OverflowArgArea, DirectTy, "mem_addr");
// Update overflow_arg_area_ptr pointer
llvm::Value *NewOverflowArgArea = CGF.Builder.CreateGEP(
OverflowArgArea.getElementType(), OverflowArgArea.getPointer(),
PaddedSizeV, "overflow_arg_area");
CGF.Builder.CreateStore(NewOverflowArgArea, OverflowArgAreaPtr);
return MemAddr;
}
assert(PaddedSize.getQuantity() == 8);
unsigned MaxRegs, RegCountField, RegSaveIndex;
CharUnits RegPadding;
if (InFPRs) {
MaxRegs = 4; // Maximum of 4 FPR arguments
RegCountField = 1; // __fpr
RegSaveIndex = 16; // save offset for f0
RegPadding = CharUnits(); // floats are passed in the high bits of an FPR
} else {
MaxRegs = 5; // Maximum of 5 GPR arguments
RegCountField = 0; // __gpr
RegSaveIndex = 2; // save offset for r2
RegPadding = Padding; // values are passed in the low bits of a GPR
}
Address RegCountPtr =
CGF.Builder.CreateStructGEP(VAListAddr, RegCountField, "reg_count_ptr");
llvm::Value *RegCount = CGF.Builder.CreateLoad(RegCountPtr, "reg_count");
llvm::Value *MaxRegsV = llvm::ConstantInt::get(IndexTy, MaxRegs);
llvm::Value *InRegs = CGF.Builder.CreateICmpULT(RegCount, MaxRegsV,
"fits_in_regs");
llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg");
llvm::BasicBlock *InMemBlock = CGF.createBasicBlock("vaarg.in_mem");
llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end");
CGF.Builder.CreateCondBr(InRegs, InRegBlock, InMemBlock);
// Emit code to load the value if it was passed in registers.
CGF.EmitBlock(InRegBlock);
// Work out the address of an argument register.
llvm::Value *ScaledRegCount =
CGF.Builder.CreateMul(RegCount, PaddedSizeV, "scaled_reg_count");
llvm::Value *RegBase =
llvm::ConstantInt::get(IndexTy, RegSaveIndex * PaddedSize.getQuantity()
+ RegPadding.getQuantity());
llvm::Value *RegOffset =
CGF.Builder.CreateAdd(ScaledRegCount, RegBase, "reg_offset");
Address RegSaveAreaPtr =
CGF.Builder.CreateStructGEP(VAListAddr, 3, "reg_save_area_ptr");
llvm::Value *RegSaveArea =
CGF.Builder.CreateLoad(RegSaveAreaPtr, "reg_save_area");
Address RawRegAddr(
CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, RegOffset, "raw_reg_addr"),
CGF.Int8Ty, PaddedSize);
Address RegAddr =
CGF.Builder.CreateElementBitCast(RawRegAddr, DirectTy, "reg_addr");
// Update the register count
llvm::Value *One = llvm::ConstantInt::get(IndexTy, 1);
llvm::Value *NewRegCount =
CGF.Builder.CreateAdd(RegCount, One, "reg_count");
CGF.Builder.CreateStore(NewRegCount, RegCountPtr);
CGF.EmitBranch(ContBlock);
// Emit code to load the value if it was passed in memory.
CGF.EmitBlock(InMemBlock);
// Work out the address of a stack argument.
Address OverflowArgAreaPtr =
CGF.Builder.CreateStructGEP(VAListAddr, 2, "overflow_arg_area_ptr");
Address OverflowArgArea =
Address(CGF.Builder.CreateLoad(OverflowArgAreaPtr, "overflow_arg_area"),
CGF.Int8Ty, PaddedSize);
Address RawMemAddr =
CGF.Builder.CreateConstByteGEP(OverflowArgArea, Padding, "raw_mem_addr");
Address MemAddr =
CGF.Builder.CreateElementBitCast(RawMemAddr, DirectTy, "mem_addr");
// Update overflow_arg_area_ptr pointer
llvm::Value *NewOverflowArgArea =
CGF.Builder.CreateGEP(OverflowArgArea.getElementType(),
OverflowArgArea.getPointer(), PaddedSizeV,
"overflow_arg_area");
CGF.Builder.CreateStore(NewOverflowArgArea, OverflowArgAreaPtr);
CGF.EmitBranch(ContBlock);
// Return the appropriate result.
CGF.EmitBlock(ContBlock);
Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, MemAddr, InMemBlock,
"va_arg.addr");
if (IsIndirect)
ResAddr = Address(CGF.Builder.CreateLoad(ResAddr, "indirect_arg"), ArgTy,
TyInfo.Align);
return ResAddr;
}
ABIArgInfo SystemZABIInfo::classifyReturnType(QualType RetTy) const {
if (RetTy->isVoidType())
return ABIArgInfo::getIgnore();
if (isVectorArgumentType(RetTy))
return ABIArgInfo::getDirect();
if (isCompoundType(RetTy) || getContext().getTypeSize(RetTy) > 64)
return getNaturalAlignIndirect(RetTy);
return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
: ABIArgInfo::getDirect());
}
ABIArgInfo SystemZABIInfo::classifyArgumentType(QualType Ty) const {
// Handle the generic C++ ABI.
if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
// Integers and enums are extended to full register width.
if (isPromotableIntegerTypeForABI(Ty))
return ABIArgInfo::getExtend(Ty);
// Handle vector types and vector-like structure types. Note that
// as opposed to float-like structure types, we do not allow any
// padding for vector-like structures, so verify the sizes match.
uint64_t Size = getContext().getTypeSize(Ty);
QualType SingleElementTy = GetSingleElementType(Ty);
if (isVectorArgumentType(SingleElementTy) &&
getContext().getTypeSize(SingleElementTy) == Size)
return ABIArgInfo::getDirect(CGT.ConvertType(SingleElementTy));
// Values that are not 1, 2, 4 or 8 bytes in size are passed indirectly.
if (Size != 8 && Size != 16 && Size != 32 && Size != 64)
return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
// Handle small structures.
if (const RecordType *RT = Ty->getAs<RecordType>()) {
// Structures with flexible arrays have variable length, so really
// fail the size test above.
const RecordDecl *RD = RT->getDecl();
if (RD->hasFlexibleArrayMember())
return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
// The structure is passed as an unextended integer, a float, or a double.
llvm::Type *PassTy;
if (isFPArgumentType(SingleElementTy)) {
assert(Size == 32 || Size == 64);
if (Size == 32)
PassTy = llvm::Type::getFloatTy(getVMContext());
else
PassTy = llvm::Type::getDoubleTy(getVMContext());
} else
PassTy = llvm::IntegerType::get(getVMContext(), Size);
return ABIArgInfo::getDirect(PassTy);
}
// Non-structure compounds are passed indirectly.
if (isCompoundType(Ty))
return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
return ABIArgInfo::getDirect(nullptr);
}
//===----------------------------------------------------------------------===//
// MSP430 ABI Implementation
//===----------------------------------------------------------------------===//
namespace {
class MSP430ABIInfo : public DefaultABIInfo {
static ABIArgInfo complexArgInfo() {
ABIArgInfo Info = ABIArgInfo::getDirect();
Info.setCanBeFlattened(false);
return Info;
}
public:
MSP430ABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
ABIArgInfo classifyReturnType(QualType RetTy) const {
if (RetTy->isAnyComplexType())
return complexArgInfo();
return DefaultABIInfo::classifyReturnType(RetTy);
}
ABIArgInfo classifyArgumentType(QualType RetTy) const {
if (RetTy->isAnyComplexType())
return complexArgInfo();
return DefaultABIInfo::classifyArgumentType(RetTy);
}
// Just copy the original implementations because
// DefaultABIInfo::classify{Return,Argument}Type() are not virtual
void computeInfo(CGFunctionInfo &FI) const override {
if (!getCXXABI().classifyReturnType(FI))
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
for (auto &I : FI.arguments())
I.info = classifyArgumentType(I.type);
}
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override {
return EmitVAArgInstr(CGF, VAListAddr, Ty, classifyArgumentType(Ty));
}
};
class MSP430TargetCodeGenInfo : public TargetCodeGenInfo {
public:
MSP430TargetCodeGenInfo(CodeGenTypes &CGT)
: TargetCodeGenInfo(std::make_unique<MSP430ABIInfo>(CGT)) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &M) const override;
};
}
void MSP430TargetCodeGenInfo::setTargetAttributes(
const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
if (GV->isDeclaration())
return;
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
const auto *InterruptAttr = FD->getAttr<MSP430InterruptAttr>();
if (!InterruptAttr)
return;
// Handle 'interrupt' attribute:
llvm::Function *F = cast<llvm::Function>(GV);
// Step 1: Set ISR calling convention.
F->setCallingConv(llvm::CallingConv::MSP430_INTR);
// Step 2: Add attributes goodness.
F->addFnAttr(llvm::Attribute::NoInline);
F->addFnAttr("interrupt", llvm::utostr(InterruptAttr->getNumber()));
}
}
//===----------------------------------------------------------------------===//
// MIPS ABI Implementation. This works for both little-endian and
// big-endian variants.
//===----------------------------------------------------------------------===//
namespace {
class MipsABIInfo : public ABIInfo {
bool IsO32;
unsigned MinABIStackAlignInBytes, StackAlignInBytes;
void CoerceToIntArgs(uint64_t TySize,
SmallVectorImpl<llvm::Type *> &ArgList) const;
llvm::Type* HandleAggregates(QualType Ty, uint64_t TySize) const;
llvm::Type* returnAggregateInRegs(QualType RetTy, uint64_t Size) const;
llvm::Type* getPaddingType(uint64_t Align, uint64_t Offset) const;
public:
MipsABIInfo(CodeGenTypes &CGT, bool _IsO32) :
ABIInfo(CGT), IsO32(_IsO32), MinABIStackAlignInBytes(IsO32 ? 4 : 8),
StackAlignInBytes(IsO32 ? 8 : 16) {}
ABIArgInfo classifyReturnType(QualType RetTy) const;
ABIArgInfo classifyArgumentType(QualType RetTy, uint64_t &Offset) const;
void computeInfo(CGFunctionInfo &FI) const override;
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override;
ABIArgInfo extendType(QualType Ty) const;
};
class MIPSTargetCodeGenInfo : public TargetCodeGenInfo {
unsigned SizeOfUnwindException;
public:
MIPSTargetCodeGenInfo(CodeGenTypes &CGT, bool IsO32)
: TargetCodeGenInfo(std::make_unique<MipsABIInfo>(CGT, IsO32)),
SizeOfUnwindException(IsO32 ? 24 : 32) {}
int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
return 29;
}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &CGM) const override {
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD) return;
llvm::Function *Fn = cast<llvm::Function>(GV);
if (FD->hasAttr<MipsLongCallAttr>())
Fn->addFnAttr("long-call");
else if (FD->hasAttr<MipsShortCallAttr>())
Fn->addFnAttr("short-call");
// Other attributes do not have a meaning for declarations.
if (GV->isDeclaration())
return;
if (FD->hasAttr<Mips16Attr>()) {
Fn->addFnAttr("mips16");
}
else if (FD->hasAttr<NoMips16Attr>()) {
Fn->addFnAttr("nomips16");
}
if (FD->hasAttr<MicroMipsAttr>())
Fn->addFnAttr("micromips");
else if (FD->hasAttr<NoMicroMipsAttr>())
Fn->addFnAttr("nomicromips");
const MipsInterruptAttr *Attr = FD->getAttr<MipsInterruptAttr>();
if (!Attr)
return;
const char *Kind;
switch (Attr->getInterrupt()) {
case MipsInterruptAttr::eic: Kind = "eic"; break;
case MipsInterruptAttr::sw0: Kind = "sw0"; break;
case MipsInterruptAttr::sw1: Kind = "sw1"; break;
case MipsInterruptAttr::hw0: Kind = "hw0"; break;
case MipsInterruptAttr::hw1: Kind = "hw1"; break;
case MipsInterruptAttr::hw2: Kind = "hw2"; break;
case MipsInterruptAttr::hw3: Kind = "hw3"; break;
case MipsInterruptAttr::hw4: Kind = "hw4"; break;
case MipsInterruptAttr::hw5: Kind = "hw5"; break;
}
Fn->addFnAttr("interrupt", Kind);
}
bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
llvm::Value *Address) const override;
unsigned getSizeOfUnwindException() const override {
return SizeOfUnwindException;
}
};
}
void MipsABIInfo::CoerceToIntArgs(
uint64_t TySize, SmallVectorImpl<llvm::Type *> &ArgList) const {
llvm::IntegerType *IntTy =
llvm::IntegerType::get(getVMContext(), MinABIStackAlignInBytes * 8);
// Add (TySize / MinABIStackAlignInBytes) args of IntTy.
for (unsigned N = TySize / (MinABIStackAlignInBytes * 8); N; --N)
ArgList.push_back(IntTy);
// If necessary, add one more integer type to ArgList.
unsigned R = TySize % (MinABIStackAlignInBytes * 8);
if (R)
ArgList.push_back(llvm::IntegerType::get(getVMContext(), R));
}
// In N32/64, an aligned double precision floating point field is passed in
// a register.
llvm::Type* MipsABIInfo::HandleAggregates(QualType Ty, uint64_t TySize) const {
SmallVector<llvm::Type*, 8> ArgList, IntArgList;
if (IsO32) {
CoerceToIntArgs(TySize, ArgList);
return llvm::StructType::get(getVMContext(), ArgList);
}
if (Ty->isComplexType())
return CGT.ConvertType(Ty);
const RecordType *RT = Ty->getAs<RecordType>();
// Unions/vectors are passed in integer registers.
if (!RT || !RT->isStructureOrClassType()) {
CoerceToIntArgs(TySize, ArgList);
return llvm::StructType::get(getVMContext(), ArgList);
}
const RecordDecl *RD = RT->getDecl();
const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
assert(!(TySize % 8) && "Size of structure must be multiple of 8.");
uint64_t LastOffset = 0;
unsigned idx = 0;
llvm::IntegerType *I64 = llvm::IntegerType::get(getVMContext(), 64);
// Iterate over fields in the struct/class and check if there are any aligned
// double fields.
for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
i != e; ++i, ++idx) {
const QualType Ty = i->getType();
const BuiltinType *BT = Ty->getAs<BuiltinType>();
if (!BT || BT->getKind() != BuiltinType::Double)
continue;
uint64_t Offset = Layout.getFieldOffset(idx);
if (Offset % 64) // Ignore doubles that are not aligned.
continue;
// Add ((Offset - LastOffset) / 64) args of type i64.
for (unsigned j = (Offset - LastOffset) / 64; j > 0; --j)
ArgList.push_back(I64);
// Add double type.
ArgList.push_back(llvm::Type::getDoubleTy(getVMContext()));
LastOffset = Offset + 64;
}
CoerceToIntArgs(TySize - LastOffset, IntArgList);
ArgList.append(IntArgList.begin(), IntArgList.end());
return llvm::StructType::get(getVMContext(), ArgList);
}
llvm::Type *MipsABIInfo::getPaddingType(uint64_t OrigOffset,
uint64_t Offset) const {
if (OrigOffset + MinABIStackAlignInBytes > Offset)
return nullptr;
return llvm::IntegerType::get(getVMContext(), (Offset - OrigOffset) * 8);
}
ABIArgInfo
MipsABIInfo::classifyArgumentType(QualType Ty, uint64_t &Offset) const {
Ty = useFirstFieldIfTransparentUnion(Ty);
uint64_t OrigOffset = Offset;
uint64_t TySize = getContext().getTypeSize(Ty);
uint64_t Align = getContext().getTypeAlign(Ty) / 8;
Align = std::min(std::max(Align, (uint64_t)MinABIStackAlignInBytes),
(uint64_t)StackAlignInBytes);
unsigned CurrOffset = llvm::alignTo(Offset, Align);
Offset = CurrOffset + llvm::alignTo(TySize, Align * 8) / 8;
if (isAggregateTypeForABI(Ty) || Ty->isVectorType()) {
// Ignore empty aggregates.
if (TySize == 0)
return ABIArgInfo::getIgnore();
if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
Offset = OrigOffset + MinABIStackAlignInBytes;
return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
}
// If we have reached here, aggregates are passed directly by coercing to
// another structure type. Padding is inserted if the offset of the
// aggregate is unaligned.
ABIArgInfo ArgInfo =
ABIArgInfo::getDirect(HandleAggregates(Ty, TySize), 0,
getPaddingType(OrigOffset, CurrOffset));
ArgInfo.setInReg(true);
return ArgInfo;
}
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
Ty = EnumTy->getDecl()->getIntegerType();
// Make sure we pass indirectly things that are too large.
if (const auto *EIT = Ty->getAs<BitIntType>())
if (EIT->getNumBits() > 128 ||
(EIT->getNumBits() > 64 &&
!getContext().getTargetInfo().hasInt128Type()))
return getNaturalAlignIndirect(Ty);
// All integral types are promoted to the GPR width.
if (Ty->isIntegralOrEnumerationType())
return extendType(Ty);
return ABIArgInfo::getDirect(
nullptr, 0, IsO32 ? nullptr : getPaddingType(OrigOffset, CurrOffset));
}
llvm::Type*
MipsABIInfo::returnAggregateInRegs(QualType RetTy, uint64_t Size) const {
const RecordType *RT = RetTy->getAs<RecordType>();
SmallVector<llvm::Type*, 8> RTList;
if (RT && RT->isStructureOrClassType()) {
const RecordDecl *RD = RT->getDecl();
const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
unsigned FieldCnt = Layout.getFieldCount();
// N32/64 returns struct/classes in floating point registers if the
// following conditions are met:
// 1. The size of the struct/class is no larger than 128-bit.
// 2. The struct/class has one or two fields all of which are floating
// point types.
// 3. The offset of the first field is zero (this follows what gcc does).
//
// Any other composite results are returned in integer registers.
//
if (FieldCnt && (FieldCnt <= 2) && !Layout.getFieldOffset(0)) {
RecordDecl::field_iterator b = RD->field_begin(), e = RD->field_end();
for (; b != e; ++b) {
const BuiltinType *BT = b->getType()->getAs<BuiltinType>();
if (!BT || !BT->isFloatingPoint())
break;
RTList.push_back(CGT.ConvertType(b->getType()));
}
if (b == e)
return llvm::StructType::get(getVMContext(), RTList,
RD->hasAttr<PackedAttr>());
RTList.clear();
}
}
CoerceToIntArgs(Size, RTList);
return llvm::StructType::get(getVMContext(), RTList);
}
ABIArgInfo MipsABIInfo::classifyReturnType(QualType RetTy) const {
uint64_t Size = getContext().getTypeSize(RetTy);
if (RetTy->isVoidType())
return ABIArgInfo::getIgnore();
// O32 doesn't treat zero-sized structs differently from other structs.
// However, N32/N64 ignores zero sized return values.
if (!IsO32 && Size == 0)
return ABIArgInfo::getIgnore();
if (isAggregateTypeForABI(RetTy) || RetTy->isVectorType()) {
if (Size <= 128) {
if (RetTy->isAnyComplexType())
return ABIArgInfo::getDirect();
// O32 returns integer vectors in registers and N32/N64 returns all small
// aggregates in registers.
if (!IsO32 ||
(RetTy->isVectorType() && !RetTy->hasFloatingRepresentation())) {
ABIArgInfo ArgInfo =
ABIArgInfo::getDirect(returnAggregateInRegs(RetTy, Size));
ArgInfo.setInReg(true);
return ArgInfo;
}
}
return getNaturalAlignIndirect(RetTy);
}
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
RetTy = EnumTy->getDecl()->getIntegerType();
// Make sure we pass indirectly things that are too large.
if (const auto *EIT = RetTy->getAs<BitIntType>())
if (EIT->getNumBits() > 128 ||
(EIT->getNumBits() > 64 &&
!getContext().getTargetInfo().hasInt128Type()))
return getNaturalAlignIndirect(RetTy);
if (isPromotableIntegerTypeForABI(RetTy))
return ABIArgInfo::getExtend(RetTy);
if ((RetTy->isUnsignedIntegerOrEnumerationType() ||
RetTy->isSignedIntegerOrEnumerationType()) && Size == 32 && !IsO32)
return ABIArgInfo::getSignExtend(RetTy);
return ABIArgInfo::getDirect();
}
void MipsABIInfo::computeInfo(CGFunctionInfo &FI) const {
ABIArgInfo &RetInfo = FI.getReturnInfo();
if (!getCXXABI().classifyReturnType(FI))
RetInfo = classifyReturnType(FI.getReturnType());
// Check if a pointer to an aggregate is passed as a hidden argument.
uint64_t Offset = RetInfo.isIndirect() ? MinABIStackAlignInBytes : 0;
for (auto &I : FI.arguments())
I.info = classifyArgumentType(I.type, Offset);
}
Address MipsABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType OrigTy) const {
QualType Ty = OrigTy;
// Integer arguments are promoted to 32-bit on O32 and 64-bit on N32/N64.
// Pointers are also promoted in the same way but this only matters for N32.
unsigned SlotSizeInBits = IsO32 ? 32 : 64;
unsigned PtrWidth = getTarget().getPointerWidth(0);
bool DidPromote = false;
if ((Ty->isIntegerType() &&
getContext().getIntWidth(Ty) < SlotSizeInBits) ||
(Ty->isPointerType() && PtrWidth < SlotSizeInBits)) {
DidPromote = true;
Ty = getContext().getIntTypeForBitwidth(SlotSizeInBits,
Ty->isSignedIntegerType());
}
auto TyInfo = getContext().getTypeInfoInChars(Ty);
// The alignment of things in the argument area is never larger than
// StackAlignInBytes.
TyInfo.Align =
std::min(TyInfo.Align, CharUnits::fromQuantity(StackAlignInBytes));
// MinABIStackAlignInBytes is the size of argument slots on the stack.
CharUnits ArgSlotSize = CharUnits::fromQuantity(MinABIStackAlignInBytes);
Address Addr = emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false,
TyInfo, ArgSlotSize, /*AllowHigherAlign*/ true);
// If there was a promotion, "unpromote" into a temporary.
// TODO: can we just use a pointer into a subset of the original slot?
if (DidPromote) {
Address Temp = CGF.CreateMemTemp(OrigTy, "vaarg.promotion-temp");
llvm::Value *Promoted = CGF.Builder.CreateLoad(Addr);
// Truncate down to the right width.
llvm::Type *IntTy = (OrigTy->isIntegerType() ? Temp.getElementType()
: CGF.IntPtrTy);
llvm::Value *V = CGF.Builder.CreateTrunc(Promoted, IntTy);
if (OrigTy->isPointerType())
V = CGF.Builder.CreateIntToPtr(V, Temp.getElementType());
CGF.Builder.CreateStore(V, Temp);
Addr = Temp;
}
return Addr;
}
ABIArgInfo MipsABIInfo::extendType(QualType Ty) const {
int TySize = getContext().getTypeSize(Ty);
// MIPS64 ABI requires unsigned 32 bit integers to be sign extended.
if (Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32)
return ABIArgInfo::getSignExtend(Ty);
return ABIArgInfo::getExtend(Ty);
}
bool
MIPSTargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
llvm::Value *Address) const {
// This information comes from gcc's implementation, which seems to
// as canonical as it gets.
// Everything on MIPS is 4 bytes. Double-precision FP registers
// are aliased to pairs of single-precision FP registers.
llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4);
// 0-31 are the general purpose registers, $0 - $31.
// 32-63 are the floating-point registers, $f0 - $f31.
// 64 and 65 are the multiply/divide registers, $hi and $lo.
// 66 is the (notional, I think) register for signal-handler return.
AssignToArrayRange(CGF.Builder, Address, Four8, 0, 65);
// 67-74 are the floating-point status registers, $fcc0 - $fcc7.
// They are one bit wide and ignored here.
// 80-111 are the coprocessor 0 registers, $c0r0 - $c0r31.
// (coprocessor 1 is the FP unit)
// 112-143 are the coprocessor 2 registers, $c2r0 - $c2r31.
// 144-175 are the coprocessor 3 registers, $c3r0 - $c3r31.
// 176-181 are the DSP accumulator registers.
AssignToArrayRange(CGF.Builder, Address, Four8, 80, 181);
return false;
}
//===----------------------------------------------------------------------===//
// M68k ABI Implementation
//===----------------------------------------------------------------------===//
namespace {
class M68kTargetCodeGenInfo : public TargetCodeGenInfo {
public:
M68kTargetCodeGenInfo(CodeGenTypes &CGT)
: TargetCodeGenInfo(std::make_unique<DefaultABIInfo>(CGT)) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &M) const override;
};
} // namespace
void M68kTargetCodeGenInfo::setTargetAttributes(
const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
if (const auto *FD = dyn_cast_or_null<FunctionDecl>(D)) {
if (const auto *attr = FD->getAttr<M68kInterruptAttr>()) {
// Handle 'interrupt' attribute:
llvm::Function *F = cast<llvm::Function>(GV);
// Step 1: Set ISR calling convention.
F->setCallingConv(llvm::CallingConv::M68k_INTR);
// Step 2: Add attributes goodness.
F->addFnAttr(llvm::Attribute::NoInline);
// Step 3: Emit ISR vector alias.
unsigned Num = attr->getNumber() / 2;
llvm::GlobalAlias::create(llvm::Function::ExternalLinkage,
"__isr_" + Twine(Num), F);
}
}
}
//===----------------------------------------------------------------------===//
// AVR ABI Implementation. Documented at
// https://gcc.gnu.org/wiki/avr-gcc#Calling_Convention
// https://gcc.gnu.org/wiki/avr-gcc#Reduced_Tiny
//===----------------------------------------------------------------------===//
namespace {
class AVRABIInfo : public DefaultABIInfo {
private:
// The total amount of registers can be used to pass parameters. It is 18 on
// AVR, or 6 on AVRTiny.
const unsigned ParamRegs;
// The total amount of registers can be used to pass return value. It is 8 on
// AVR, or 4 on AVRTiny.
const unsigned RetRegs;
public:
AVRABIInfo(CodeGenTypes &CGT, unsigned NPR, unsigned NRR)
: DefaultABIInfo(CGT), ParamRegs(NPR), RetRegs(NRR) {}
ABIArgInfo classifyReturnType(QualType Ty, bool &LargeRet) const {
if (isAggregateTypeForABI(Ty)) {
// On AVR, a return struct with size less than or equals to 8 bytes is
// returned directly via registers R18-R25. On AVRTiny, a return struct
// with size less than or equals to 4 bytes is returned directly via
// registers R22-R25.
if (getContext().getTypeSize(Ty) <= RetRegs * 8)
return ABIArgInfo::getDirect();
// A return struct with larger size is returned via a stack
// slot, along with a pointer to it as the function's implicit argument.
LargeRet = true;
return getNaturalAlignIndirect(Ty);
}
// Otherwise we follow the default way which is compatible.
return DefaultABIInfo::classifyReturnType(Ty);
}
ABIArgInfo classifyArgumentType(QualType Ty, unsigned &NumRegs) const {
unsigned TySize = getContext().getTypeSize(Ty);
// An int8 type argument always costs two registers like an int16.
if (TySize == 8 && NumRegs >= 2) {
NumRegs -= 2;
return ABIArgInfo::getExtend(Ty);
}
// If the argument size is an odd number of bytes, round up the size
// to the next even number.
TySize = llvm::alignTo(TySize, 16);
// Any type including an array/struct type can be passed in rgisters,
// if there are enough registers left.
if (TySize <= NumRegs * 8) {
NumRegs -= TySize / 8;
return ABIArgInfo::getDirect();
}
// An argument is passed either completely in registers or completely in
// memory. Since there are not enough registers left, current argument
// and all other unprocessed arguments should be passed in memory.
// However we still need to return `ABIArgInfo::getDirect()` other than
// `ABIInfo::getNaturalAlignIndirect(Ty)`, otherwise an extra stack slot
// will be allocated, so the stack frame layout will be incompatible with
// avr-gcc.
NumRegs = 0;
return ABIArgInfo::getDirect();
}
void computeInfo(CGFunctionInfo &FI) const override {
// Decide the return type.
bool LargeRet = false;
if (!getCXXABI().classifyReturnType(FI))
FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), LargeRet);
// Decide each argument type. The total number of registers can be used for
// arguments depends on several factors:
// 1. Arguments of varargs functions are passed on the stack. This applies
// even to the named arguments. So no register can be used.
// 2. Total 18 registers can be used on avr and 6 ones on avrtiny.
// 3. If the return type is a struct with too large size, two registers
// (out of 18/6) will be cost as an implicit pointer argument.
unsigned NumRegs = ParamRegs;
if (FI.isVariadic())
NumRegs = 0;
else if (LargeRet)
NumRegs -= 2;
for (auto &I : FI.arguments())
I.info = classifyArgumentType(I.type, NumRegs);
}
};
class AVRTargetCodeGenInfo : public TargetCodeGenInfo {
public:
AVRTargetCodeGenInfo(CodeGenTypes &CGT, unsigned NPR, unsigned NRR)
: TargetCodeGenInfo(std::make_unique<AVRABIInfo>(CGT, NPR, NRR)) {}
LangAS getGlobalVarAddressSpace(CodeGenModule &CGM,
const VarDecl *D) const override {
// Check if global/static variable is defined in address space
// 1~6 (__flash, __flash1, __flash2, __flash3, __flash4, __flash5)
// but not constant.
if (D) {
LangAS AS = D->getType().getAddressSpace();
if (isTargetAddressSpace(AS) && 1 <= toTargetAddressSpace(AS) &&
toTargetAddressSpace(AS) <= 6 && !D->getType().isConstQualified())
CGM.getDiags().Report(D->getLocation(),
diag::err_verify_nonconst_addrspace)
<< "__flash*";
}
return TargetCodeGenInfo::getGlobalVarAddressSpace(CGM, D);
}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &CGM) const override {
if (GV->isDeclaration())
return;
const auto *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD) return;
auto *Fn = cast<llvm::Function>(GV);
if (FD->getAttr<AVRInterruptAttr>())
Fn->addFnAttr("interrupt");
if (FD->getAttr<AVRSignalAttr>())
Fn->addFnAttr("signal");
}
};
}
//===----------------------------------------------------------------------===//
// TCE ABI Implementation (see http://tce.cs.tut.fi). Uses mostly the defaults.
// Currently subclassed only to implement custom OpenCL C function attribute
// handling.
//===----------------------------------------------------------------------===//
namespace {
class TCETargetCodeGenInfo : public DefaultTargetCodeGenInfo {
public:
TCETargetCodeGenInfo(CodeGenTypes &CGT)
: DefaultTargetCodeGenInfo(CGT) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &M) const override;
};
void TCETargetCodeGenInfo::setTargetAttributes(
const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
if (GV->isDeclaration())
return;
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD) return;
llvm::Function *F = cast<llvm::Function>(GV);
if (M.getLangOpts().OpenCL) {
if (FD->hasAttr<OpenCLKernelAttr>()) {
// OpenCL C Kernel functions are not subject to inlining
F->addFnAttr(llvm::Attribute::NoInline);
const ReqdWorkGroupSizeAttr *Attr = FD->getAttr<ReqdWorkGroupSizeAttr>();
if (Attr) {
// Convert the reqd_work_group_size() attributes to metadata.
llvm::LLVMContext &Context = F->getContext();
llvm::NamedMDNode *OpenCLMetadata =
M.getModule().getOrInsertNamedMetadata(
"opencl.kernel_wg_size_info");
SmallVector<llvm::Metadata *, 5> Operands;
Operands.push_back(llvm::ConstantAsMetadata::get(F));
Operands.push_back(
llvm::ConstantAsMetadata::get(llvm::Constant::getIntegerValue(
M.Int32Ty, llvm::APInt(32, Attr->getXDim()))));
Operands.push_back(
llvm::ConstantAsMetadata::get(llvm::Constant::getIntegerValue(
M.Int32Ty, llvm::APInt(32, Attr->getYDim()))));
Operands.push_back(
llvm::ConstantAsMetadata::get(llvm::Constant::getIntegerValue(
M.Int32Ty, llvm::APInt(32, Attr->getZDim()))));
// Add a boolean constant operand for "required" (true) or "hint"
// (false) for implementing the work_group_size_hint attr later.
// Currently always true as the hint is not yet implemented.
Operands.push_back(
llvm::ConstantAsMetadata::get(llvm::ConstantInt::getTrue(Context)));
OpenCLMetadata->addOperand(llvm::MDNode::get(Context, Operands));
}
}
}
}
}
//===----------------------------------------------------------------------===//
// Hexagon ABI Implementation
//===----------------------------------------------------------------------===//
namespace {
class HexagonABIInfo : public DefaultABIInfo {
public:
HexagonABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
private:
ABIArgInfo classifyReturnType(QualType RetTy) const;
ABIArgInfo classifyArgumentType(QualType RetTy) const;
ABIArgInfo classifyArgumentType(QualType RetTy, unsigned *RegsLeft) const;
void computeInfo(CGFunctionInfo &FI) const override;
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override;
Address EmitVAArgFromMemory(CodeGenFunction &CFG, Address VAListAddr,
QualType Ty) const;
Address EmitVAArgForHexagon(CodeGenFunction &CFG, Address VAListAddr,
QualType Ty) const;
Address EmitVAArgForHexagonLinux(CodeGenFunction &CFG, Address VAListAddr,
QualType Ty) const;
};
class HexagonTargetCodeGenInfo : public TargetCodeGenInfo {
public:
HexagonTargetCodeGenInfo(CodeGenTypes &CGT)
: TargetCodeGenInfo(std::make_unique<HexagonABIInfo>(CGT)) {}
int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
return 29;
}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &GCM) const override {
if (GV->isDeclaration())
return;
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD)
return;
}
};
} // namespace
void HexagonABIInfo::computeInfo(CGFunctionInfo &FI) const {
unsigned RegsLeft = 6;
if (!getCXXABI().classifyReturnType(FI))
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
for (auto &I : FI.arguments())
I.info = classifyArgumentType(I.type, &RegsLeft);
}
static bool HexagonAdjustRegsLeft(uint64_t Size, unsigned *RegsLeft) {
assert(Size <= 64 && "Not expecting to pass arguments larger than 64 bits"
" through registers");
if (*RegsLeft == 0)
return false;
if (Size <= 32) {
(*RegsLeft)--;
return true;
}
if (2 <= (*RegsLeft & (~1U))) {
*RegsLeft = (*RegsLeft & (~1U)) - 2;
return true;
}
// Next available register was r5 but candidate was greater than 32-bits so it
// has to go on the stack. However we still consume r5
if (*RegsLeft == 1)
*RegsLeft = 0;
return false;
}
ABIArgInfo HexagonABIInfo::classifyArgumentType(QualType Ty,
unsigned *RegsLeft) const {
if (!isAggregateTypeForABI(Ty)) {
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
Ty = EnumTy->getDecl()->getIntegerType();
uint64_t Size = getContext().getTypeSize(Ty);
if (Size <= 64)
HexagonAdjustRegsLeft(Size, RegsLeft);
if (Size > 64 && Ty->isBitIntType())
return getNaturalAlignIndirect(Ty, /*ByVal=*/true);
return isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
: ABIArgInfo::getDirect();
}
if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
// Ignore empty records.
if (isEmptyRecord(getContext(), Ty, true))
return ABIArgInfo::getIgnore();
uint64_t Size = getContext().getTypeSize(Ty);
unsigned Align = getContext().getTypeAlign(Ty);
if (Size > 64)
return getNaturalAlignIndirect(Ty, /*ByVal=*/true);
if (HexagonAdjustRegsLeft(Size, RegsLeft))
Align = Size <= 32 ? 32 : 64;
if (Size <= Align) {
// Pass in the smallest viable integer type.
if (!llvm::isPowerOf2_64(Size))
Size = llvm::NextPowerOf2(Size);
return ABIArgInfo::getDirect(llvm::Type::getIntNTy(getVMContext(), Size));
}
return DefaultABIInfo::classifyArgumentType(Ty);
}
ABIArgInfo HexagonABIInfo::classifyReturnType(QualType RetTy) const {
if (RetTy->isVoidType())
return ABIArgInfo::getIgnore();
const TargetInfo &T = CGT.getTarget();
uint64_t Size = getContext().getTypeSize(RetTy);
if (RetTy->getAs<VectorType>()) {
// HVX vectors are returned in vector registers or register pairs.
if (T.hasFeature("hvx")) {
assert(T.hasFeature("hvx-length64b") || T.hasFeature("hvx-length128b"));
uint64_t VecSize = T.hasFeature("hvx-length64b") ? 64*8 : 128*8;
if (Size == VecSize || Size == 2*VecSize)
return ABIArgInfo::getDirectInReg();
}
// Large vector types should be returned via memory.
if (Size > 64)
return getNaturalAlignIndirect(RetTy);
}
if (!isAggregateTypeForABI(RetTy)) {
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
RetTy = EnumTy->getDecl()->getIntegerType();
if (Size > 64 && RetTy->isBitIntType())
return getNaturalAlignIndirect(RetTy, /*ByVal=*/false);
return isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
: ABIArgInfo::getDirect();
}
if (isEmptyRecord(getContext(), RetTy, true))
return ABIArgInfo::getIgnore();
// Aggregates <= 8 bytes are returned in registers, other aggregates
// are returned indirectly.
if (Size <= 64) {
// Return in the smallest viable integer type.
if (!llvm::isPowerOf2_64(Size))
Size = llvm::NextPowerOf2(Size);
return ABIArgInfo::getDirect(llvm::Type::getIntNTy(getVMContext(), Size));
}
return getNaturalAlignIndirect(RetTy, /*ByVal=*/true);
}
Address HexagonABIInfo::EmitVAArgFromMemory(CodeGenFunction &CGF,
Address VAListAddr,
QualType Ty) const {
// Load the overflow area pointer.
Address __overflow_area_pointer_p =
CGF.Builder.CreateStructGEP(VAListAddr, 2, "__overflow_area_pointer_p");
llvm::Value *__overflow_area_pointer = CGF.Builder.CreateLoad(
__overflow_area_pointer_p, "__overflow_area_pointer");
uint64_t Align = CGF.getContext().getTypeAlign(Ty) / 8;
if (Align > 4) {
// Alignment should be a power of 2.
assert((Align & (Align - 1)) == 0 && "Alignment is not power of 2!");
// overflow_arg_area = (overflow_arg_area + align - 1) & -align;
llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int64Ty, Align - 1);
// Add offset to the current pointer to access the argument.
__overflow_area_pointer =
CGF.Builder.CreateGEP(CGF.Int8Ty, __overflow_area_pointer, Offset);
llvm::Value *AsInt =
CGF.Builder.CreatePtrToInt(__overflow_area_pointer, CGF.Int32Ty);
// Create a mask which should be "AND"ed
// with (overflow_arg_area + align - 1)
llvm::Value *Mask = llvm::ConstantInt::get(CGF.Int32Ty, -(int)Align);
__overflow_area_pointer = CGF.Builder.CreateIntToPtr(
CGF.Builder.CreateAnd(AsInt, Mask), __overflow_area_pointer->getType(),
"__overflow_area_pointer.align");
}
// Get the type of the argument from memory and bitcast
// overflow area pointer to the argument type.
llvm::Type *PTy = CGF.ConvertTypeForMem(Ty);
Address AddrTyped = CGF.Builder.CreateElementBitCast(
Address(__overflow_area_pointer, CGF.Int8Ty,
CharUnits::fromQuantity(Align)),
PTy);
// Round up to the minimum stack alignment for varargs which is 4 bytes.
uint64_t Offset = llvm::alignTo(CGF.getContext().getTypeSize(Ty) / 8, 4);
__overflow_area_pointer = CGF.Builder.CreateGEP(
CGF.Int8Ty, __overflow_area_pointer,
llvm::ConstantInt::get(CGF.Int32Ty, Offset),
"__overflow_area_pointer.next");
CGF.Builder.CreateStore(__overflow_area_pointer, __overflow_area_pointer_p);
return AddrTyped;
}
Address HexagonABIInfo::EmitVAArgForHexagon(CodeGenFunction &CGF,
Address VAListAddr,
QualType Ty) const {
// FIXME: Need to handle alignment
llvm::Type *BP = CGF.Int8PtrTy;
CGBuilderTy &Builder = CGF.Builder;
Address VAListAddrAsBPP = Builder.CreateElementBitCast(VAListAddr, BP, "ap");
llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur");
// Handle address alignment for type alignment > 32 bits
uint64_t TyAlign = CGF.getContext().getTypeAlign(Ty) / 8;
if (TyAlign > 4) {
assert((TyAlign & (TyAlign - 1)) == 0 && "Alignment is not power of 2!");
llvm::Value *AddrAsInt = Builder.CreatePtrToInt(Addr, CGF.Int32Ty);
AddrAsInt = Builder.CreateAdd(AddrAsInt, Builder.getInt32(TyAlign - 1));
AddrAsInt = Builder.CreateAnd(AddrAsInt, Builder.getInt32(~(TyAlign - 1)));
Addr = Builder.CreateIntToPtr(AddrAsInt, BP);
}
Address AddrTyped = Builder.CreateElementBitCast(
Address(Addr, CGF.Int8Ty, CharUnits::fromQuantity(TyAlign)),
CGF.ConvertType(Ty));
uint64_t Offset = llvm::alignTo(CGF.getContext().getTypeSize(Ty) / 8, 4);
llvm::Value *NextAddr = Builder.CreateGEP(
CGF.Int8Ty, Addr, llvm::ConstantInt::get(CGF.Int32Ty, Offset), "ap.next");
Builder.CreateStore(NextAddr, VAListAddrAsBPP);
return AddrTyped;
}
Address HexagonABIInfo::EmitVAArgForHexagonLinux(CodeGenFunction &CGF,
Address VAListAddr,
QualType Ty) const {
int ArgSize = CGF.getContext().getTypeSize(Ty) / 8;
if (ArgSize > 8)
return EmitVAArgFromMemory(CGF, VAListAddr, Ty);
// Here we have check if the argument is in register area or
// in overflow area.
// If the saved register area pointer + argsize rounded up to alignment >
// saved register area end pointer, argument is in overflow area.
unsigned RegsLeft = 6;
Ty = CGF.getContext().getCanonicalType(Ty);
(void)classifyArgumentType(Ty, &RegsLeft);
llvm::BasicBlock *MaybeRegBlock = CGF.createBasicBlock("vaarg.maybe_reg");
llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg");
llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock("vaarg.on_stack");
llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end");
// Get rounded size of the argument.GCC does not allow vararg of
// size < 4 bytes. We follow the same logic here.
ArgSize = (CGF.getContext().getTypeSize(Ty) <= 32) ? 4 : 8;
int ArgAlign = (CGF.getContext().getTypeSize(Ty) <= 32) ? 4 : 8;
// Argument may be in saved register area
CGF.EmitBlock(MaybeRegBlock);
// Load the current saved register area pointer.
Address __current_saved_reg_area_pointer_p = CGF.Builder.CreateStructGEP(
VAListAddr, 0, "__current_saved_reg_area_pointer_p");
llvm::Value *__current_saved_reg_area_pointer = CGF.Builder.CreateLoad(
__current_saved_reg_area_pointer_p, "__current_saved_reg_area_pointer");
// Load the saved register area end pointer.
Address __saved_reg_area_end_pointer_p = CGF.Builder.CreateStructGEP(
VAListAddr, 1, "__saved_reg_area_end_pointer_p");
llvm::Value *__saved_reg_area_end_pointer = CGF.Builder.CreateLoad(
__saved_reg_area_end_pointer_p, "__saved_reg_area_end_pointer");
// If the size of argument is > 4 bytes, check if the stack
// location is aligned to 8 bytes
if (ArgAlign > 4) {
llvm::Value *__current_saved_reg_area_pointer_int =
CGF.Builder.CreatePtrToInt(__current_saved_reg_area_pointer,
CGF.Int32Ty);
__current_saved_reg_area_pointer_int = CGF.Builder.CreateAdd(
__current_saved_reg_area_pointer_int,
llvm::ConstantInt::get(CGF.Int32Ty, (ArgAlign - 1)),
"align_current_saved_reg_area_pointer");
__current_saved_reg_area_pointer_int =
CGF.Builder.CreateAnd(__current_saved_reg_area_pointer_int,
llvm::ConstantInt::get(CGF.Int32Ty, -ArgAlign),
"align_current_saved_reg_area_pointer");
__current_saved_reg_area_pointer =
CGF.Builder.CreateIntToPtr(__current_saved_reg_area_pointer_int,
__current_saved_reg_area_pointer->getType(),
"align_current_saved_reg_area_pointer");
}
llvm::Value *__new_saved_reg_area_pointer =
CGF.Builder.CreateGEP(CGF.Int8Ty, __current_saved_reg_area_pointer,
llvm::ConstantInt::get(CGF.Int32Ty, ArgSize),
"__new_saved_reg_area_pointer");
llvm::Value *UsingStack = nullptr;
UsingStack = CGF.Builder.CreateICmpSGT(__new_saved_reg_area_pointer,
__saved_reg_area_end_pointer);
CGF.Builder.CreateCondBr(UsingStack, OnStackBlock, InRegBlock);
// Argument in saved register area
// Implement the block where argument is in register saved area
CGF.EmitBlock(InRegBlock);
llvm::Type *PTy = CGF.ConvertType(Ty);
llvm::Value *__saved_reg_area_p = CGF.Builder.CreateBitCast(
__current_saved_reg_area_pointer, llvm::PointerType::getUnqual(PTy));
CGF.Builder.CreateStore(__new_saved_reg_area_pointer,
__current_saved_reg_area_pointer_p);
CGF.EmitBranch(ContBlock);
// Argument in overflow area
// Implement the block where the argument is in overflow area.
CGF.EmitBlock(OnStackBlock);
// Load the overflow area pointer
Address __overflow_area_pointer_p =
CGF.Builder.CreateStructGEP(VAListAddr, 2, "__overflow_area_pointer_p");
llvm::Value *__overflow_area_pointer = CGF.Builder.CreateLoad(
__overflow_area_pointer_p, "__overflow_area_pointer");
// Align the overflow area pointer according to the alignment of the argument
if (ArgAlign > 4) {
llvm::Value *__overflow_area_pointer_int =
CGF.Builder.CreatePtrToInt(__overflow_area_pointer, CGF.Int32Ty);
__overflow_area_pointer_int =
CGF.Builder.CreateAdd(__overflow_area_pointer_int,
llvm::ConstantInt::get(CGF.Int32Ty, ArgAlign - 1),
"align_overflow_area_pointer");
__overflow_area_pointer_int =
CGF.Builder.CreateAnd(__overflow_area_pointer_int,
llvm::ConstantInt::get(CGF.Int32Ty, -ArgAlign),
"align_overflow_area_pointer");
__overflow_area_pointer = CGF.Builder.CreateIntToPtr(
__overflow_area_pointer_int, __overflow_area_pointer->getType(),
"align_overflow_area_pointer");
}
// Get the pointer for next argument in overflow area and store it
// to overflow area pointer.
llvm::Value *__new_overflow_area_pointer = CGF.Builder.CreateGEP(
CGF.Int8Ty, __overflow_area_pointer,
llvm::ConstantInt::get(CGF.Int32Ty, ArgSize),
"__overflow_area_pointer.next");
CGF.Builder.CreateStore(__new_overflow_area_pointer,
__overflow_area_pointer_p);
CGF.Builder.CreateStore(__new_overflow_area_pointer,
__current_saved_reg_area_pointer_p);
// Bitcast the overflow area pointer to the type of argument.
llvm::Type *OverflowPTy = CGF.ConvertTypeForMem(Ty);
llvm::Value *__overflow_area_p = CGF.Builder.CreateBitCast(
__overflow_area_pointer, llvm::PointerType::getUnqual(OverflowPTy));
CGF.EmitBranch(ContBlock);
// Get the correct pointer to load the variable argument
// Implement the ContBlock
CGF.EmitBlock(ContBlock);
llvm::Type *MemTy = CGF.ConvertTypeForMem(Ty);
llvm::Type *MemPTy = llvm::PointerType::getUnqual(MemTy);
llvm::PHINode *ArgAddr = CGF.Builder.CreatePHI(MemPTy, 2, "vaarg.addr");
ArgAddr->addIncoming(__saved_reg_area_p, InRegBlock);
ArgAddr->addIncoming(__overflow_area_p, OnStackBlock);
return Address(ArgAddr, MemTy, CharUnits::fromQuantity(ArgAlign));
}
Address HexagonABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const {
if (getTarget().getTriple().isMusl())
return EmitVAArgForHexagonLinux(CGF, VAListAddr, Ty);
return EmitVAArgForHexagon(CGF, VAListAddr, Ty);
}
//===----------------------------------------------------------------------===//
// Lanai ABI Implementation
//===----------------------------------------------------------------------===//
namespace {
class LanaiABIInfo : public DefaultABIInfo {
public:
LanaiABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
bool shouldUseInReg(QualType Ty, CCState &State) const;
void computeInfo(CGFunctionInfo &FI) const override {
CCState State(FI);
// Lanai uses 4 registers to pass arguments unless the function has the
// regparm attribute set.
if (FI.getHasRegParm()) {
State.FreeRegs = FI.getRegParm();
} else {
State.FreeRegs = 4;
}
if (!getCXXABI().classifyReturnType(FI))
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
for (auto &I : FI.arguments())
I.info = classifyArgumentType(I.type, State);
}
ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State) const;
ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const;
};
} // end anonymous namespace
bool LanaiABIInfo::shouldUseInReg(QualType Ty, CCState &State) const {
unsigned Size = getContext().getTypeSize(Ty);
unsigned SizeInRegs = llvm::alignTo(Size, 32U) / 32U;
if (SizeInRegs == 0)
return false;
if (SizeInRegs > State.FreeRegs) {
State.FreeRegs = 0;
return false;
}
State.FreeRegs -= SizeInRegs;
return true;
}
ABIArgInfo LanaiABIInfo::getIndirectResult(QualType Ty, bool ByVal,
CCState &State) const {
if (!ByVal) {
if (State.FreeRegs) {
--State.FreeRegs; // Non-byval indirects just use one pointer.
return getNaturalAlignIndirectInReg(Ty);
}
return getNaturalAlignIndirect(Ty, false);
}
// Compute the byval alignment.
const unsigned MinABIStackAlignInBytes = 4;
unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8;
return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true,
/*Realign=*/TypeAlign >
MinABIStackAlignInBytes);
}
ABIArgInfo LanaiABIInfo::classifyArgumentType(QualType Ty,
CCState &State) const {
// Check with the C++ ABI first.
const RecordType *RT = Ty->getAs<RecordType>();
if (RT) {
CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI());
if (RAA == CGCXXABI::RAA_Indirect) {
return getIndirectResult(Ty, /*ByVal=*/false, State);
} else if (RAA == CGCXXABI::RAA_DirectInMemory) {
return getNaturalAlignIndirect(Ty, /*ByVal=*/true);
}
}
if (isAggregateTypeForABI(Ty)) {
// Structures with flexible arrays are always indirect.
if (RT && RT->getDecl()->hasFlexibleArrayMember())
return getIndirectResult(Ty, /*ByVal=*/true, State);
// Ignore empty structs/unions.
if (isEmptyRecord(getContext(), Ty, true))
return ABIArgInfo::getIgnore();
llvm::LLVMContext &LLVMContext = getVMContext();
unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32;
if (SizeInRegs <= State.FreeRegs) {
llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext);
SmallVector<llvm::Type *, 3> Elements(SizeInRegs, Int32);
llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements);
State.FreeRegs -= SizeInRegs;
return ABIArgInfo::getDirectInReg(Result);
} else {
State.FreeRegs = 0;
}
return getIndirectResult(Ty, true, State);
}
// Treat an enum type as its underlying type.
if (const auto *EnumTy = Ty->getAs<EnumType>())
Ty = EnumTy->getDecl()->getIntegerType();
bool InReg = shouldUseInReg(Ty, State);
// Don't pass >64 bit integers in registers.
if (const auto *EIT = Ty->getAs<BitIntType>())
if (EIT->getNumBits() > 64)
return getIndirectResult(Ty, /*ByVal=*/true, State);
if (isPromotableIntegerTypeForABI(Ty)) {
if (InReg)
return ABIArgInfo::getDirectInReg();
return ABIArgInfo::getExtend(Ty);
}
if (InReg)
return ABIArgInfo::getDirectInReg();
return ABIArgInfo::getDirect();
}
namespace {
class LanaiTargetCodeGenInfo : public TargetCodeGenInfo {
public:
LanaiTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
: TargetCodeGenInfo(std::make_unique<LanaiABIInfo>(CGT)) {}
};
}
//===----------------------------------------------------------------------===//
// AMDGPU ABI Implementation
//===----------------------------------------------------------------------===//
namespace {
class AMDGPUABIInfo final : public DefaultABIInfo {
private:
static const unsigned MaxNumRegsForArgsRet = 16;
unsigned numRegsForType(QualType Ty) const;
bool isHomogeneousAggregateBaseType(QualType Ty) const override;
bool isHomogeneousAggregateSmallEnough(const Type *Base,
uint64_t Members) const override;
// Coerce HIP scalar pointer arguments from generic pointers to global ones.
llvm::Type *coerceKernelArgumentType(llvm::Type *Ty, unsigned FromAS,
unsigned ToAS) const {
// Single value types.
auto *PtrTy = llvm::dyn_cast<llvm::PointerType>(Ty);
if (PtrTy && PtrTy->getAddressSpace() == FromAS)
return llvm::PointerType::getWithSamePointeeType(PtrTy, ToAS);
return Ty;
}
public:
explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) :
DefaultABIInfo(CGT) {}
ABIArgInfo classifyReturnType(QualType RetTy) const;
ABIArgInfo classifyKernelArgumentType(QualType Ty) const;
ABIArgInfo classifyArgumentType(QualType Ty, unsigned &NumRegsLeft) const;
void computeInfo(CGFunctionInfo &FI) const override;
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override;
};
bool AMDGPUABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
return true;
}
bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough(
const Type *Base, uint64_t Members) const {
uint32_t NumRegs = (getContext().getTypeSize(Base) + 31) / 32;
// Homogeneous Aggregates may occupy at most 16 registers.
return Members * NumRegs <= MaxNumRegsForArgsRet;
}
/// Estimate number of registers the type will use when passed in registers.
unsigned AMDGPUABIInfo::numRegsForType(QualType Ty) const {
unsigned NumRegs = 0;
if (const VectorType *VT = Ty->getAs<VectorType>()) {
// Compute from the number of elements. The reported size is based on the
// in-memory size, which includes the padding 4th element for 3-vectors.
QualType EltTy = VT->getElementType();
unsigned EltSize = getContext().getTypeSize(EltTy);
// 16-bit element vectors should be passed as packed.
if (EltSize == 16)
return (VT->getNumElements() + 1) / 2;
unsigned EltNumRegs = (EltSize + 31) / 32;
return EltNumRegs * VT->getNumElements();
}
if (const RecordType *RT = Ty->getAs<RecordType>()) {
const RecordDecl *RD = RT->getDecl();
assert(!RD->hasFlexibleArrayMember());
for (const FieldDecl *Field : RD->fields()) {
QualType FieldTy = Field->getType();
NumRegs += numRegsForType(FieldTy);
}
return NumRegs;
}
return (getContext().getTypeSize(Ty) + 31) / 32;
}
void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const {
llvm::CallingConv::ID CC = FI.getCallingConvention();
if (!getCXXABI().classifyReturnType(FI))
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
unsigned NumRegsLeft = MaxNumRegsForArgsRet;
for (auto &Arg : FI.arguments()) {
if (CC == llvm::CallingConv::AMDGPU_KERNEL) {
Arg.info = classifyKernelArgumentType(Arg.type);
} else {
Arg.info = classifyArgumentType(Arg.type, NumRegsLeft);
}
}
}
Address AMDGPUABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const {
llvm_unreachable("AMDGPU does not support varargs");
}
ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const {
if (isAggregateTypeForABI(RetTy)) {
// Records with non-trivial destructors/copy-constructors should not be
// returned by value.
if (!getRecordArgABI(RetTy, getCXXABI())) {
// Ignore empty structs/unions.
if (isEmptyRecord(getContext(), RetTy, true))
return ABIArgInfo::getIgnore();
// Lower single-element structs to just return a regular value.
if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext()))
return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
if (const RecordType *RT = RetTy->getAs<RecordType>()) {
const RecordDecl *RD = RT->getDecl();
if (RD->hasFlexibleArrayMember())
return DefaultABIInfo::classifyReturnType(RetTy);
}
// Pack aggregates <= 4 bytes into single VGPR or pair.
uint64_t Size = getContext().getTypeSize(RetTy);
if (Size <= 16)
return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
if (Size <= 32)
return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
if (Size <= 64) {
llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
}
if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet)
return ABIArgInfo::getDirect();
}
}
// Otherwise just do the default thing.
return DefaultABIInfo::classifyReturnType(RetTy);
}
/// For kernels all parameters are really passed in a special buffer. It doesn't
/// make sense to pass anything byval, so everything must be direct.
ABIArgInfo AMDGPUABIInfo::classifyKernelArgumentType(QualType Ty) const {
Ty = useFirstFieldIfTransparentUnion(Ty);
// TODO: Can we omit empty structs?
if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
Ty = QualType(SeltTy, 0);
llvm::Type *OrigLTy = CGT.ConvertType(Ty);
llvm::Type *LTy = OrigLTy;
if (getContext().getLangOpts().HIP) {
LTy = coerceKernelArgumentType(
OrigLTy, /*FromAS=*/getContext().getTargetAddressSpace(LangAS::Default),
/*ToAS=*/getContext().getTargetAddressSpace(LangAS::cuda_device));
}
// FIXME: Should also use this for OpenCL, but it requires addressing the
// problem of kernels being called.
//
// FIXME: This doesn't apply the optimization of coercing pointers in structs
// to global address space when using byref. This would require implementing a
// new kind of coercion of the in-memory type when for indirect arguments.
if (!getContext().getLangOpts().OpenCL && LTy == OrigLTy &&
isAggregateTypeForABI(Ty)) {
return ABIArgInfo::getIndirectAliased(
getContext().getTypeAlignInChars(Ty),
getContext().getTargetAddressSpace(LangAS::opencl_constant),
false /*Realign*/, nullptr /*Padding*/);
}
// If we set CanBeFlattened to true, CodeGen will expand the struct to its
// individual elements, which confuses the Clover OpenCL backend; therefore we
// have to set it to false here. Other args of getDirect() are just defaults.
return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
}
ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty,
unsigned &NumRegsLeft) const {
assert(NumRegsLeft <= MaxNumRegsForArgsRet && "register estimate underflow");
Ty = useFirstFieldIfTransparentUnion(Ty);
if (isAggregateTypeForABI(Ty)) {
// Records with non-trivial destructors/copy-constructors should not be
// passed by value.
if (auto RAA = getRecordArgABI(Ty, getCXXABI()))
return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
// Ignore empty structs/unions.
if (isEmptyRecord(getContext(), Ty, true))
return ABIArgInfo::getIgnore();
// Lower single-element structs to just pass a regular value. TODO: We
// could do reasonable-size multiple-element structs too, using getExpand(),
// though watch out for things like bitfields.
if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
if (const RecordType *RT = Ty->getAs<RecordType>()) {
const RecordDecl *RD = RT->getDecl();
if (RD->hasFlexibleArrayMember())
return DefaultABIInfo::classifyArgumentType(Ty);
}
// Pack aggregates <= 8 bytes into single VGPR or pair.
uint64_t Size = getContext().getTypeSize(Ty);
if (Size <= 64) {
unsigned NumRegs = (Size + 31) / 32;
NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
if (Size <= 16)
return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
if (Size <= 32)
return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
// XXX: Should this be i64 instead, and should the limit increase?
llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
}
if (NumRegsLeft > 0) {
unsigned NumRegs = numRegsForType(Ty);
if (NumRegsLeft >= NumRegs) {
NumRegsLeft -= NumRegs;
return ABIArgInfo::getDirect();
}
}
}
// Otherwise just do the default thing.
ABIArgInfo ArgInfo = DefaultABIInfo::classifyArgumentType(Ty);
if (!ArgInfo.isIndirect()) {
unsigned NumRegs = numRegsForType(Ty);
NumRegsLeft -= std::min(NumRegs, NumRegsLeft);
}
return ArgInfo;
}
class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
public:
AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT)
: TargetCodeGenInfo(std::make_unique<AMDGPUABIInfo>(CGT)) {}
void setFunctionDeclAttributes(const FunctionDecl *FD, llvm::Function *F,
CodeGenModule &CGM) const;
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &M) const override;
unsigned getOpenCLKernelCallingConv() const override;
llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
llvm::PointerType *T, QualType QT) const override;
LangAS getASTAllocaAddressSpace() const override {
return getLangASFromTargetAS(
getABIInfo().getDataLayout().getAllocaAddrSpace());
}
LangAS getGlobalVarAddressSpace(CodeGenModule &CGM,
const VarDecl *D) const override;
llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts,
SyncScope Scope,
llvm::AtomicOrdering Ordering,
llvm::LLVMContext &Ctx) const override;
llvm::Function *
createEnqueuedBlockKernel(CodeGenFunction &CGF,
llvm::Function *BlockInvokeFunc,
llvm::Type *BlockTy) const override;
bool shouldEmitStaticExternCAliases() const override;
void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
};
}
static bool requiresAMDGPUProtectedVisibility(const Decl *D,
llvm::GlobalValue *GV) {
if (GV->getVisibility() != llvm::GlobalValue::HiddenVisibility)
return false;
return D->hasAttr<OpenCLKernelAttr>() ||
(isa<FunctionDecl>(D) && D->hasAttr<CUDAGlobalAttr>()) ||
(isa<VarDecl>(D) &&
(D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() ||
cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinSurfaceType() ||
cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinTextureType()));
}
void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
const FunctionDecl *FD, llvm::Function *F, CodeGenModule &M) const {
const auto *ReqdWGS =
M.getLangOpts().OpenCL ? FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr;
const bool IsOpenCLKernel =
M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>();
const bool IsHIPKernel = M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>();
const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>();
if (ReqdWGS || FlatWGS) {
unsigned Min = 0;
unsigned Max = 0;
if (FlatWGS) {
Min = FlatWGS->getMin()
->EvaluateKnownConstInt(M.getContext())
.getExtValue();
Max = FlatWGS->getMax()
->EvaluateKnownConstInt(M.getContext())
.getExtValue();
}
if (ReqdWGS && Min == 0 && Max == 0)
Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim();
if (Min != 0) {
assert(Min <= Max && "Min must be less than or equal Max");
std::string AttrVal = llvm::utostr(Min) + "," + llvm::utostr(Max);
F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
} else
assert(Max == 0 && "Max must be zero");
} else if (IsOpenCLKernel || IsHIPKernel) {
// By default, restrict the maximum size to a value specified by
// --gpu-max-threads-per-block=n or its default value for HIP.
const unsigned OpenCLDefaultMaxWorkGroupSize = 256;
const unsigned DefaultMaxWorkGroupSize =
IsOpenCLKernel ? OpenCLDefaultMaxWorkGroupSize
: M.getLangOpts().GPUMaxThreadsPerBlock;
std::string AttrVal =
std::string("1,") + llvm::utostr(DefaultMaxWorkGroupSize);
F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
}
if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>()) {
unsigned Min =
Attr->getMin()->EvaluateKnownConstInt(M.getContext()).getExtValue();
unsigned Max = Attr->getMax() ? Attr->getMax()
->EvaluateKnownConstInt(M.getContext())
.getExtValue()
: 0;
if (Min != 0) {
assert((Max == 0 || Min <= Max) && "Min must be less than or equal Max");
std::string AttrVal = llvm::utostr(Min);
if (Max != 0)
AttrVal = AttrVal + "," + llvm::utostr(Max);
F->addFnAttr("amdgpu-waves-per-eu", AttrVal);
} else
assert(Max == 0 && "Max must be zero");
}
if (const auto *Attr = FD->getAttr<AMDGPUNumSGPRAttr>()) {
unsigned NumSGPR = Attr->getNumSGPR();
if (NumSGPR != 0)
F->addFnAttr("amdgpu-num-sgpr", llvm::utostr(NumSGPR));
}
if (const auto *Attr = FD->getAttr<AMDGPUNumVGPRAttr>()) {
uint32_t NumVGPR = Attr->getNumVGPR();
if (NumVGPR != 0)
F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR));
}
}
void AMDGPUTargetCodeGenInfo::setTargetAttributes(
const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
if (requiresAMDGPUProtectedVisibility(D, GV)) {
GV->setVisibility(llvm::GlobalValue::ProtectedVisibility);
GV->setDSOLocal(true);
}
if (GV->isDeclaration())
return;
llvm::Function *F = dyn_cast<llvm::Function>(GV);
if (!F)
return;
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (FD)
setFunctionDeclAttributes(FD, F, M);
const bool IsHIPKernel =
M.getLangOpts().HIP && FD && FD->hasAttr<CUDAGlobalAttr>();
if (IsHIPKernel)
F->addFnAttr("uniform-work-group-size", "true");
if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics())
F->addFnAttr("amdgpu-unsafe-fp-atomics", "true");
if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts)
F->addFnAttr("amdgpu-ieee", "false");
}
unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
return llvm::CallingConv::AMDGPU_KERNEL;
}
// Currently LLVM assumes null pointers always have value 0,
// which results in incorrectly transformed IR. Therefore, instead of
// emitting null pointers in private and local address spaces, a null
// pointer in generic address space is emitted which is casted to a
// pointer in local or private address space.
llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer(
const CodeGen::CodeGenModule &CGM, llvm::PointerType *PT,
QualType QT) const {
if (CGM.getContext().getTargetNullPointerValue(QT) == 0)
return llvm::ConstantPointerNull::get(PT);
auto &Ctx = CGM.getContext();
auto NPT = llvm::PointerType::getWithSamePointeeType(
PT, Ctx.getTargetAddressSpace(LangAS::opencl_generic));
return llvm::ConstantExpr::getAddrSpaceCast(
llvm::ConstantPointerNull::get(NPT), PT);
}
LangAS
AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
const VarDecl *D) const {
assert(!CGM.getLangOpts().OpenCL &&
!(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) &&
"Address space agnostic languages only");
LangAS DefaultGlobalAS = getLangASFromTargetAS(
CGM.getContext().getTargetAddressSpace(LangAS::opencl_global));
if (!D)
return DefaultGlobalAS;
LangAS AddrSpace = D->getType().getAddressSpace();
assert(AddrSpace == LangAS::Default || isTargetAddressSpace(AddrSpace));
if (AddrSpace != LangAS::Default)
return AddrSpace;
// Only promote to address space 4 if VarDecl has constant initialization.
if (CGM.isTypeConstant(D->getType(), false) &&
D->hasConstantInitialization()) {
if (auto ConstAS = CGM.getTarget().getConstantAddressSpace())
return *ConstAS;
}
return DefaultGlobalAS;
}
llvm::SyncScope::ID
AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts,
SyncScope Scope,
llvm::AtomicOrdering Ordering,
llvm::LLVMContext &Ctx) const {
std::string Name;
switch (Scope) {
case SyncScope::HIPSingleThread:
Name = "singlethread";
break;
case SyncScope::HIPWavefront:
case SyncScope::OpenCLSubGroup:
Name = "wavefront";
break;
case SyncScope::HIPWorkgroup:
case SyncScope::OpenCLWorkGroup:
Name = "workgroup";
break;
case SyncScope::HIPAgent:
case SyncScope::OpenCLDevice:
Name = "agent";
break;
case SyncScope::HIPSystem:
case SyncScope::OpenCLAllSVMDevices:
Name = "";
break;
}
if (Ordering != llvm::AtomicOrdering::SequentiallyConsistent) {
if (!Name.empty())
Name = Twine(Twine(Name) + Twine("-")).str();
Name = Twine(Twine(Name) + Twine("one-as")).str();
}
return Ctx.getOrInsertSyncScopeID(Name);
}
bool AMDGPUTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
return false;
}
void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention(
const FunctionType *&FT) const {
FT = getABIInfo().getContext().adjustFunctionType(
FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel));
}
//===----------------------------------------------------------------------===//
// SPARC v8 ABI Implementation.
// Based on the SPARC Compliance Definition version 2.4.1.
//
// Ensures that complex values are passed in registers.
//
namespace {
class SparcV8ABIInfo : public DefaultABIInfo {
public:
SparcV8ABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
private:
ABIArgInfo classifyReturnType(QualType RetTy) const;
void computeInfo(CGFunctionInfo &FI) const override;
};
} // end anonymous namespace
ABIArgInfo
SparcV8ABIInfo::classifyReturnType(QualType Ty) const {
if (Ty->isAnyComplexType()) {
return ABIArgInfo::getDirect();
}
else {
return DefaultABIInfo::classifyReturnType(Ty);
}
}
void SparcV8ABIInfo::computeInfo(CGFunctionInfo &FI) const {
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
for (auto &Arg : FI.arguments())
Arg.info = classifyArgumentType(Arg.type);
}
namespace {
class SparcV8TargetCodeGenInfo : public TargetCodeGenInfo {
public:
SparcV8TargetCodeGenInfo(CodeGenTypes &CGT)
: TargetCodeGenInfo(std::make_unique<SparcV8ABIInfo>(CGT)) {}
llvm::Value *decodeReturnAddress(CodeGen::CodeGenFunction &CGF,
llvm::Value *Address) const override {
int Offset;
if (isAggregateTypeForABI(CGF.CurFnInfo->getReturnType()))
Offset = 12;
else
Offset = 8;
return CGF.Builder.CreateGEP(CGF.Int8Ty, Address,
llvm::ConstantInt::get(CGF.Int32Ty, Offset));
}
llvm::Value *encodeReturnAddress(CodeGen::CodeGenFunction &CGF,
llvm::Value *Address) const override {
int Offset;
if (isAggregateTypeForABI(CGF.CurFnInfo->getReturnType()))
Offset = -12;
else
Offset = -8;
return CGF.Builder.CreateGEP(CGF.Int8Ty, Address,
llvm::ConstantInt::get(CGF.Int32Ty, Offset));
}
};
} // end anonymous namespace
//===----------------------------------------------------------------------===//
// SPARC v9 ABI Implementation.
// Based on the SPARC Compliance Definition version 2.4.1.
//
// Function arguments a mapped to a nominal "parameter array" and promoted to
// registers depending on their type. Each argument occupies 8 or 16 bytes in
// the array, structs larger than 16 bytes are passed indirectly.
//
// One case requires special care:
//
// struct mixed {
// int i;
// float f;
// };
//
// When a struct mixed is passed by value, it only occupies 8 bytes in the
// parameter array, but the int is passed in an integer register, and the float
// is passed in a floating point register. This is represented as two arguments
// with the LLVM IR inreg attribute:
//
// declare void f(i32 inreg %i, float inreg %f)
//
// The code generator will only allocate 4 bytes from the parameter array for
// the inreg arguments. All other arguments are allocated a multiple of 8
// bytes.
//
namespace {
class SparcV9ABIInfo : public ABIInfo {
public:
SparcV9ABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {}
private:
ABIArgInfo classifyType(QualType RetTy, unsigned SizeLimit) const;
void computeInfo(CGFunctionInfo &FI) const override;
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override;
// Coercion type builder for structs passed in registers. The coercion type
// serves two purposes:
//
// 1. Pad structs to a multiple of 64 bits, so they are passed 'left-aligned'
// in registers.
// 2. Expose aligned floating point elements as first-level elements, so the
// code generator knows to pass them in floating point registers.
//
// We also compute the InReg flag which indicates that the struct contains
// aligned 32-bit floats.
//
struct CoerceBuilder {
llvm::LLVMContext &Context;
const llvm::DataLayout &DL;
SmallVector<llvm::Type*, 8> Elems;
uint64_t Size;
bool InReg;
CoerceBuilder(llvm::LLVMContext &c, const llvm::DataLayout &dl)
: Context(c), DL(dl), Size(0), InReg(false) {}
// Pad Elems with integers until Size is ToSize.
void pad(uint64_t ToSize) {
assert(ToSize >= Size && "Cannot remove elements");
if (ToSize == Size)
return;
// Finish the current 64-bit word.
uint64_t Aligned = llvm::alignTo(Size, 64);
if (Aligned > Size && Aligned <= ToSize) {
Elems.push_back(llvm::IntegerType::get(Context, Aligned - Size));
Size = Aligned;
}
// Add whole 64-bit words.
while (Size + 64 <= ToSize) {
Elems.push_back(llvm::Type::getInt64Ty(Context));
Size += 64;
}
// Final in-word padding.
if (Size < ToSize) {
Elems.push_back(llvm::IntegerType::get(Context, ToSize - Size));
Size = ToSize;
}
}
// Add a floating point element at Offset.
void addFloat(uint64_t Offset, llvm::Type *Ty, unsigned Bits) {
// Unaligned floats are treated as integers.
if (Offset % Bits)
return;
// The InReg flag is only required if there are any floats < 64 bits.
if (Bits < 64)
InReg = true;
pad(Offset);
Elems.push_back(Ty);
Size = Offset + Bits;
}
// Add a struct type to the coercion type, starting at Offset (in bits).
void addStruct(uint64_t Offset, llvm::StructType *StrTy) {
const llvm::StructLayout *Layout = DL.getStructLayout(StrTy);
for (unsigned i = 0, e = StrTy->getNumElements(); i != e; ++i) {
llvm::Type *ElemTy = StrTy->getElementType(i);
uint64_t ElemOffset = Offset + Layout->getElementOffsetInBits(i);
switch (ElemTy->getTypeID()) {
case llvm::Type::StructTyID:
addStruct(ElemOffset, cast<llvm::StructType>(ElemTy));
break;
case llvm::Type::FloatTyID:
addFloat(ElemOffset, ElemTy, 32);
break;
case llvm::Type::DoubleTyID:
addFloat(ElemOffset, ElemTy, 64);
break;
case llvm::Type::FP128TyID:
addFloat(ElemOffset, ElemTy, 128);
break;
case llvm::Type::PointerTyID:
if (ElemOffset % 64 == 0) {
pad(ElemOffset);
Elems.push_back(ElemTy);
Size += 64;
}
break;
default:
break;
}
}
}
// Check if Ty is a usable substitute for the coercion type.
bool isUsableType(llvm::StructType *Ty) const {
return llvm::makeArrayRef(Elems) == Ty->elements();
}
// Get the coercion type as a literal struct type.
llvm::Type *getType() const {
if (Elems.size() == 1)
return Elems.front();
else
return llvm::StructType::get(Context, Elems);
}
};
};
} // end anonymous namespace
ABIArgInfo
SparcV9ABIInfo::classifyType(QualType Ty, unsigned SizeLimit) const {
if (Ty->isVoidType())
return ABIArgInfo::getIgnore();
uint64_t Size = getContext().getTypeSize(Ty);
// Anything too big to fit in registers is passed with an explicit indirect
// pointer / sret pointer.
if (Size > SizeLimit)
return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
Ty = EnumTy->getDecl()->getIntegerType();
// Integer types smaller than a register are extended.
if (Size < 64 && Ty->isIntegerType())
return ABIArgInfo::getExtend(Ty);
if (const auto *EIT = Ty->getAs<BitIntType>())
if (EIT->getNumBits() < 64)
return ABIArgInfo::getExtend(Ty);
// Other non-aggregates go in registers.
if (!isAggregateTypeForABI(Ty))
return ABIArgInfo::getDirect();
// If a C++ object has either a non-trivial copy constructor or a non-trivial
// destructor, it is passed with an explicit indirect pointer / sret pointer.
if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
// This is a small aggregate type that should be passed in registers.
// Build a coercion type from the LLVM struct type.
llvm::StructType *StrTy = dyn_cast<llvm::StructType>(CGT.ConvertType(Ty));
if (!StrTy)
return ABIArgInfo::getDirect();
CoerceBuilder CB(getVMContext(), getDataLayout());
CB.addStruct(0, StrTy);
CB.pad(llvm::alignTo(CB.DL.getTypeSizeInBits(StrTy), 64));
// Try to use the original type for coercion.
llvm::Type *CoerceTy = CB.isUsableType(StrTy) ? StrTy : CB.getType();
if (CB.InReg)
return ABIArgInfo::getDirectInReg(CoerceTy);
else
return ABIArgInfo::getDirect(CoerceTy);
}
Address SparcV9ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const {
ABIArgInfo AI = classifyType(Ty, 16 * 8);
llvm::Type *ArgTy = CGT.ConvertType(Ty);
if (AI.canHaveCoerceToType() && !AI.getCoerceToType())
AI.setCoerceToType(ArgTy);
CharUnits SlotSize = CharUnits::fromQuantity(8);
CGBuilderTy &Builder = CGF.Builder;
Address Addr = Address(Builder.CreateLoad(VAListAddr, "ap.cur"),
getVAListElementType(CGF), SlotSize);
llvm::Type *ArgPtrTy = llvm::PointerType::getUnqual(ArgTy);
auto TypeInfo = getContext().getTypeInfoInChars(Ty);
Address ArgAddr = Address::invalid();
CharUnits Stride;
switch (AI.getKind()) {
case ABIArgInfo::Expand:
case ABIArgInfo::CoerceAndExpand:
case ABIArgInfo::InAlloca:
llvm_unreachable("Unsupported ABI kind for va_arg");
case ABIArgInfo::Extend: {
Stride = SlotSize;
CharUnits Offset = SlotSize - TypeInfo.Width;
ArgAddr = Builder.CreateConstInBoundsByteGEP(Addr, Offset, "extend");
break;
}
case ABIArgInfo::Direct: {
auto AllocSize = getDataLayout().getTypeAllocSize(AI.getCoerceToType());
Stride = CharUnits::fromQuantity(AllocSize).alignTo(SlotSize);
ArgAddr = Addr;
break;
}
case ABIArgInfo::Indirect:
case ABIArgInfo::IndirectAliased:
Stride = SlotSize;
ArgAddr = Builder.CreateElementBitCast(Addr, ArgPtrTy, "indirect");
ArgAddr = Address(Builder.CreateLoad(ArgAddr, "indirect.arg"), ArgTy,
TypeInfo.Align);
break;
case ABIArgInfo::Ignore:
return Address(llvm::UndefValue::get(ArgPtrTy), ArgTy, TypeInfo.Align);
}
// Update VAList.
Address NextPtr = Builder.CreateConstInBoundsByteGEP(Addr, Stride, "ap.next");
Builder.CreateStore(NextPtr.getPointer(), VAListAddr);
return Builder.CreateElementBitCast(ArgAddr, ArgTy, "arg.addr");
}
void SparcV9ABIInfo::computeInfo(CGFunctionInfo &FI) const {
FI.getReturnInfo() = classifyType(FI.getReturnType(), 32 * 8);
for (auto &I : FI.arguments())
I.info = classifyType(I.type, 16 * 8);
}
namespace {
class SparcV9TargetCodeGenInfo : public TargetCodeGenInfo {
public:
SparcV9TargetCodeGenInfo(CodeGenTypes &CGT)
: TargetCodeGenInfo(std::make_unique<SparcV9ABIInfo>(CGT)) {}
int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
return 14;
}
bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
llvm::Value *Address) const override;
llvm::Value *decodeReturnAddress(CodeGen::CodeGenFunction &CGF,
llvm::Value *Address) const override {
return CGF.Builder.CreateGEP(CGF.Int8Ty, Address,
llvm::ConstantInt::get(CGF.Int32Ty, 8));
}
llvm::Value *encodeReturnAddress(CodeGen::CodeGenFunction &CGF,
llvm::Value *Address) const override {
return CGF.Builder.CreateGEP(CGF.Int8Ty, Address,
llvm::ConstantInt::get(CGF.Int32Ty, -8));
}
};
} // end anonymous namespace
bool
SparcV9TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
llvm::Value *Address) const {
// This is calculated from the LLVM and GCC tables and verified
// against gcc output. AFAIK all ABIs use the same encoding.
CodeGen::CGBuilderTy &Builder = CGF.Builder;
llvm::IntegerType *i8 = CGF.Int8Ty;
llvm::Value *Four8 = llvm::ConstantInt::get(i8, 4);
llvm::Value *Eight8 = llvm::ConstantInt::get(i8, 8);
// 0-31: the 8-byte general-purpose registers
AssignToArrayRange(Builder, Address, Eight8, 0, 31);
// 32-63: f0-31, the 4-byte floating-point registers
AssignToArrayRange(Builder, Address, Four8, 32, 63);
// Y = 64
// PSR = 65
// WIM = 66
// TBR = 67
// PC = 68
// NPC = 69
// FSR = 70
// CSR = 71
AssignToArrayRange(Builder, Address, Eight8, 64, 71);
// 72-87: d0-15, the 8-byte floating-point registers
AssignToArrayRange(Builder, Address, Eight8, 72, 87);
return false;
}
// ARC ABI implementation.
namespace {
class ARCABIInfo : public DefaultABIInfo {
public:
using DefaultABIInfo::DefaultABIInfo;
private:
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override;
void updateState(const ABIArgInfo &Info, QualType Ty, CCState &State) const {
if (!State.FreeRegs)
return;
if (Info.isIndirect() && Info.getInReg())
State.FreeRegs--;
else if (Info.isDirect() && Info.getInReg()) {
unsigned sz = (getContext().getTypeSize(Ty) + 31) / 32;
if (sz < State.FreeRegs)
State.FreeRegs -= sz;
else
State.FreeRegs = 0;
}
}
void computeInfo(CGFunctionInfo &FI) const override {
CCState State(FI);
// ARC uses 8 registers to pass arguments.
State.FreeRegs = 8;
if (!getCXXABI().classifyReturnType(FI))
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
updateState(FI.getReturnInfo(), FI.getReturnType(), State);
for (auto &I : FI.arguments()) {
I.info = classifyArgumentType(I.type, State.FreeRegs);
updateState(I.info, I.type, State);
}
}
ABIArgInfo getIndirectByRef(QualType Ty, bool HasFreeRegs) const;
ABIArgInfo getIndirectByValue(QualType Ty) const;
ABIArgInfo classifyArgumentType(QualType Ty, uint8_t FreeRegs) const;
ABIArgInfo classifyReturnType(QualType RetTy) const;
};
class ARCTargetCodeGenInfo : public TargetCodeGenInfo {
public:
ARCTargetCodeGenInfo(CodeGenTypes &CGT)
: TargetCodeGenInfo(std::make_unique<ARCABIInfo>(CGT)) {}
};
ABIArgInfo ARCABIInfo::getIndirectByRef(QualType Ty, bool HasFreeRegs) const {
return HasFreeRegs ? getNaturalAlignIndirectInReg(Ty) :
getNaturalAlignIndirect(Ty, false);
}
ABIArgInfo ARCABIInfo::getIndirectByValue(QualType Ty) const {
// Compute the byval alignment.
const unsigned MinABIStackAlignInBytes = 4;
unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8;
return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true,
TypeAlign > MinABIStackAlignInBytes);
}
Address ARCABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const {
return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false,
getContext().getTypeInfoInChars(Ty),
CharUnits::fromQuantity(4), true);
}
ABIArgInfo ARCABIInfo::classifyArgumentType(QualType Ty,
uint8_t FreeRegs) const {
// Handle the generic C++ ABI.
const RecordType *RT = Ty->getAs<RecordType>();
if (RT) {
CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI());
if (RAA == CGCXXABI::RAA_Indirect)
return getIndirectByRef(Ty, FreeRegs > 0);
if (RAA == CGCXXABI::RAA_DirectInMemory)
return getIndirectByValue(Ty);
}
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
Ty = EnumTy->getDecl()->getIntegerType();
auto SizeInRegs = llvm::alignTo(getContext().getTypeSize(Ty), 32) / 32;
if (isAggregateTypeForABI(Ty)) {
// Structures with flexible arrays are always indirect.
if (RT && RT->getDecl()->hasFlexibleArrayMember())
return getIndirectByValue(Ty);
// Ignore empty structs/unions.
if (isEmptyRecord(getContext(), Ty, true))
return ABIArgInfo::getIgnore();
llvm::LLVMContext &LLVMContext = getVMContext();
llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext);
SmallVector<llvm::Type *, 3> Elements(SizeInRegs, Int32);
llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements);
return FreeRegs >= SizeInRegs ?
ABIArgInfo::getDirectInReg(Result) :
ABIArgInfo::getDirect(Result, 0, nullptr, false);
}
if (const auto *EIT = Ty->getAs<BitIntType>())
if (EIT->getNumBits() > 64)
return getIndirectByValue(Ty);
return isPromotableIntegerTypeForABI(Ty)
? (FreeRegs >= SizeInRegs ? ABIArgInfo::getExtendInReg(Ty)
: ABIArgInfo::getExtend(Ty))
: (FreeRegs >= SizeInRegs ? ABIArgInfo::getDirectInReg()
: ABIArgInfo::getDirect());
}
ABIArgInfo ARCABIInfo::classifyReturnType(QualType RetTy) const {
if (RetTy->isAnyComplexType())
return ABIArgInfo::getDirectInReg();
// Arguments of size > 4 registers are indirect.
auto RetSize = llvm::alignTo(getContext().getTypeSize(RetTy), 32) / 32;
if (RetSize > 4)
return getIndirectByRef(RetTy, /*HasFreeRegs*/ true);
return DefaultABIInfo::classifyReturnType(RetTy);
}
} // End anonymous namespace.
//===----------------------------------------------------------------------===//
// XCore ABI Implementation
//===----------------------------------------------------------------------===//
namespace {
/// A SmallStringEnc instance is used to build up the TypeString by passing
/// it by reference between functions that append to it.
typedef llvm::SmallString<128> SmallStringEnc;
/// TypeStringCache caches the meta encodings of Types.
///
/// The reason for caching TypeStrings is two fold:
/// 1. To cache a type's encoding for later uses;
/// 2. As a means to break recursive member type inclusion.
///
/// A cache Entry can have a Status of:
/// NonRecursive: The type encoding is not recursive;
/// Recursive: The type encoding is recursive;
/// Incomplete: An incomplete TypeString;
/// IncompleteUsed: An incomplete TypeString that has been used in a
/// Recursive type encoding.
///
/// A NonRecursive entry will have all of its sub-members expanded as fully
/// as possible. Whilst it may contain types which are recursive, the type
/// itself is not recursive and thus its encoding may be safely used whenever
/// the type is encountered.
///
/// A Recursive entry will have all of its sub-members expanded as fully as
/// possible. The type itself is recursive and it may contain other types which
/// are recursive. The Recursive encoding must not be used during the expansion
/// of a recursive type's recursive branch. For simplicity the code uses
/// IncompleteCount to reject all usage of Recursive encodings for member types.
///
/// An Incomplete entry is always a RecordType and only encodes its
/// identifier e.g. "s(S){}". Incomplete 'StubEnc' entries are ephemeral and
/// are placed into the cache during type expansion as a means to identify and
/// handle recursive inclusion of types as sub-members. If there is recursion
/// the entry becomes IncompleteUsed.
///
/// During the expansion of a RecordType's members:
///
/// If the cache contains a NonRecursive encoding for the member type, the
/// cached encoding is used;
///
/// If the cache contains a Recursive encoding for the member type, the
/// cached encoding is 'Swapped' out, as it may be incorrect, and...
///
/// If the member is a RecordType, an Incomplete encoding is placed into the
/// cache to break potential recursive inclusion of itself as a sub-member;
///
/// Once a member RecordType has been expanded, its temporary incomplete
/// entry is removed from the cache. If a Recursive encoding was swapped out
/// it is swapped back in;
///
/// If an incomplete entry is used to expand a sub-member, the incomplete
/// entry is marked as IncompleteUsed. The cache keeps count of how many
/// IncompleteUsed entries it currently contains in IncompleteUsedCount;
///
/// If a member's encoding is found to be a NonRecursive or Recursive viz:
/// IncompleteUsedCount==0, the member's encoding is added to the cache.
/// Else the member is part of a recursive type and thus the recursion has
/// been exited too soon for the encoding to be correct for the member.
///
class TypeStringCache {
enum Status {NonRecursive, Recursive, Incomplete, IncompleteUsed};
struct Entry {
std::string Str; // The encoded TypeString for the type.
enum Status State; // Information about the encoding in 'Str'.
std::string Swapped; // A temporary place holder for a Recursive encoding
// during the expansion of RecordType's members.
};
std::map<const IdentifierInfo *, struct Entry> Map;
unsigned IncompleteCount; // Number of Incomplete entries in the Map.
unsigned IncompleteUsedCount; // Number of IncompleteUsed entries in the Map.
public:
TypeStringCache() : IncompleteCount(0), IncompleteUsedCount(0) {}
void addIncomplete(const IdentifierInfo *ID, std::string StubEnc);
bool removeIncomplete(const IdentifierInfo *ID);
void addIfComplete(const IdentifierInfo *ID, StringRef Str,
bool IsRecursive);
StringRef lookupStr(const IdentifierInfo *ID);
};
/// TypeString encodings for enum & union fields must be order.
/// FieldEncoding is a helper for this ordering process.
class FieldEncoding {
bool HasName;
std::string Enc;
public:
FieldEncoding(bool b, SmallStringEnc &e) : HasName(b), Enc(e.c_str()) {}
StringRef str() { return Enc; }
bool operator<(const FieldEncoding &rhs) const {
if (HasName != rhs.HasName) return HasName;
return Enc < rhs.Enc;
}
};
class XCoreABIInfo : public DefaultABIInfo {
public:
XCoreABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override;
};
class XCoreTargetCodeGenInfo : public TargetCodeGenInfo {
mutable TypeStringCache TSC;
void emitTargetMD(const Decl *D, llvm::GlobalValue *GV,
const CodeGen::CodeGenModule &M) const;
public:
XCoreTargetCodeGenInfo(CodeGenTypes &CGT)
: TargetCodeGenInfo(std::make_unique<XCoreABIInfo>(CGT)) {}
void emitTargetMetadata(CodeGen::CodeGenModule &CGM,
const llvm::MapVector<GlobalDecl, StringRef>
&MangledDeclNames) const override;
};
} // End anonymous namespace.
// TODO: this implementation is likely now redundant with the default
// EmitVAArg.
Address XCoreABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const {
CGBuilderTy &Builder = CGF.Builder;
// Get the VAList.
CharUnits SlotSize = CharUnits::fromQuantity(4);
Address AP = Address(Builder.CreateLoad(VAListAddr),
getVAListElementType(CGF), SlotSize);
// Handle the argument.
ABIArgInfo AI = classifyArgumentType(Ty);
CharUnits TypeAlign = getContext().getTypeAlignInChars(Ty);
llvm::Type *ArgTy = CGT.ConvertType(Ty);
if (AI.canHaveCoerceToType() && !AI.getCoerceToType())
AI.setCoerceToType(ArgTy);
llvm::Type *ArgPtrTy = llvm::PointerType::getUnqual(ArgTy);
Address Val = Address::invalid();
CharUnits ArgSize = CharUnits::Zero();
switch (AI.getKind()) {
case ABIArgInfo::Expand:
case ABIArgInfo::CoerceAndExpand:
case ABIArgInfo::InAlloca:
llvm_unreachable("Unsupported ABI kind for va_arg");
case ABIArgInfo::Ignore:
Val = Address(llvm::UndefValue::get(ArgPtrTy), ArgTy, TypeAlign);
ArgSize = CharUnits::Zero();
break;
case ABIArgInfo::Extend:
case ABIArgInfo::Direct:
Val = Builder.CreateElementBitCast(AP, ArgTy);
ArgSize = CharUnits::fromQuantity(
getDataLayout().getTypeAllocSize(AI.getCoerceToType()));
ArgSize = ArgSize.alignTo(SlotSize);
break;
case ABIArgInfo::Indirect:
case ABIArgInfo::IndirectAliased:
Val = Builder.CreateElementBitCast(AP, ArgPtrTy);
Val = Address(Builder.CreateLoad(Val), ArgTy, TypeAlign);
ArgSize = SlotSize;
break;
}
// Increment the VAList.
if (!ArgSize.isZero()) {
Address APN = Builder.CreateConstInBoundsByteGEP(AP, ArgSize);
Builder.CreateStore(APN.getPointer(), VAListAddr);
}
return Val;
}
/// During the expansion of a RecordType, an incomplete TypeString is placed
/// into the cache as a means to identify and break recursion.
/// If there is a Recursive encoding in the cache, it is swapped out and will
/// be reinserted by removeIncomplete().
/// All other types of encoding should have been used rather than arriving here.
void TypeStringCache::addIncomplete(const IdentifierInfo *ID,
std::string StubEnc) {
if (!ID)
return;
Entry &E = Map[ID];
assert( (E.Str.empty() || E.State == Recursive) &&
"Incorrectly use of addIncomplete");
assert(!StubEnc.empty() && "Passing an empty string to addIncomplete()");
E.Swapped.swap(E.Str); // swap out the Recursive
E.Str.swap(StubEnc);
E.State = Incomplete;
++IncompleteCount;
}
/// Once the RecordType has been expanded, the temporary incomplete TypeString
/// must be removed from the cache.
/// If a Recursive was swapped out by addIncomplete(), it will be replaced.
/// Returns true if the RecordType was defined recursively.
bool TypeStringCache::removeIncomplete(const IdentifierInfo *ID) {
if (!ID)
return false;
auto I = Map.find(ID);
assert(I != Map.end() && "Entry not present");
Entry &E = I->second;
assert( (E.State == Incomplete ||
E.State == IncompleteUsed) &&
"Entry must be an incomplete type");
bool IsRecursive = false;
if (E.State == IncompleteUsed) {
// We made use of our Incomplete encoding, thus we are recursive.
IsRecursive = true;
--IncompleteUsedCount;
}
if (E.Swapped.empty())
Map.erase(I);
else {
// Swap the Recursive back.
E.Swapped.swap(E.Str);
E.Swapped.clear();
E.State = Recursive;
}
--IncompleteCount;
return IsRecursive;
}
/// Add the encoded TypeString to the cache only if it is NonRecursive or
/// Recursive (viz: all sub-members were expanded as fully as possible).
void TypeStringCache::addIfComplete(const IdentifierInfo *ID, StringRef Str,
bool IsRecursive) {
if (!ID || IncompleteUsedCount)
return; // No key or it is is an incomplete sub-type so don't add.
Entry &E = Map[ID];
if (IsRecursive && !E.Str.empty()) {
assert(E.State==Recursive && E.Str.size() == Str.size() &&
"This is not the same Recursive entry");
// The parent container was not recursive after all, so we could have used
// this Recursive sub-member entry after all, but we assumed the worse when
// we started viz: IncompleteCount!=0.
return;
}
assert(E.Str.empty() && "Entry already present");
E.Str = Str.str();
E.State = IsRecursive? Recursive : NonRecursive;
}
/// Return a cached TypeString encoding for the ID. If there isn't one, or we
/// are recursively expanding a type (IncompleteCount != 0) and the cached
/// encoding is Recursive, return an empty StringRef.
StringRef TypeStringCache::lookupStr(const IdentifierInfo *ID) {
if (!ID)
return StringRef(); // We have no key.
auto I = Map.find(ID);
if (I == Map.end())
return StringRef(); // We have no encoding.
Entry &E = I->second;
if (E.State == Recursive && IncompleteCount)
return StringRef(); // We don't use Recursive encodings for member types.
if (E.State == Incomplete) {
// The incomplete type is being used to break out of recursion.
E.State = IncompleteUsed;
++IncompleteUsedCount;
}
return E.Str;
}
/// The XCore ABI includes a type information section that communicates symbol
/// type information to the linker. The linker uses this information to verify
/// safety/correctness of things such as array bound and pointers et al.
/// The ABI only requires C (and XC) language modules to emit TypeStrings.
/// This type information (TypeString) is emitted into meta data for all global
/// symbols: definitions, declarations, functions & variables.
///
/// The TypeString carries type, qualifier, name, size & value details.
/// Please see 'Tools Development Guide' section 2.16.2 for format details:
/// https://www.xmos.com/download/public/Tools-Development-Guide%28X9114A%29.pdf
/// The output is tested by test/CodeGen/xcore-stringtype.c.
///
static bool getTypeString(SmallStringEnc &Enc, const Decl *D,
const CodeGen::CodeGenModule &CGM,
TypeStringCache &TSC);
/// XCore uses emitTargetMD to emit TypeString metadata for global symbols.
void XCoreTargetCodeGenInfo::emitTargetMD(
const Decl *D, llvm::GlobalValue *GV,
const CodeGen::CodeGenModule &CGM) const {
SmallStringEnc Enc;
if (getTypeString(Enc, D, CGM, TSC)) {
llvm::LLVMContext &Ctx = CGM.getModule().getContext();
llvm::Metadata *MDVals[] = {llvm::ConstantAsMetadata::get(GV),
llvm::MDString::get(Ctx, Enc.str())};
llvm::NamedMDNode *MD =
CGM.getModule().getOrInsertNamedMetadata("xcore.typestrings");
MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
}
}
void XCoreTargetCodeGenInfo::emitTargetMetadata(
CodeGen::CodeGenModule &CGM,
const llvm::MapVector<GlobalDecl, StringRef> &MangledDeclNames) const {
// Warning, new MangledDeclNames may be appended within this loop.
// We rely on MapVector insertions adding new elements to the end
// of the container.
for (unsigned I = 0; I != MangledDeclNames.size(); ++I) {
auto Val = *(MangledDeclNames.begin() + I);
llvm::GlobalValue *GV = CGM.GetGlobalValue(Val.second);
if (GV) {
const Decl *D = Val.first.getDecl()->getMostRecentDecl();
emitTargetMD(D, GV, CGM);
}
}
}
//===----------------------------------------------------------------------===//
// Base ABI and target codegen info implementation common between SPIR and
// SPIR-V.
//===----------------------------------------------------------------------===//
namespace {
class CommonSPIRABIInfo : public DefaultABIInfo {
public:
CommonSPIRABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) { setCCs(); }
private:
void setCCs();
};
class SPIRVABIInfo : public CommonSPIRABIInfo {
public:
SPIRVABIInfo(CodeGenTypes &CGT) : CommonSPIRABIInfo(CGT) {}
void computeInfo(CGFunctionInfo &FI) const override;
private:
ABIArgInfo classifyKernelArgumentType(QualType Ty) const;
};
} // end anonymous namespace
namespace {
class CommonSPIRTargetCodeGenInfo : public TargetCodeGenInfo {
public:
CommonSPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
: TargetCodeGenInfo(std::make_unique<CommonSPIRABIInfo>(CGT)) {}
CommonSPIRTargetCodeGenInfo(std::unique_ptr<ABIInfo> ABIInfo)
: TargetCodeGenInfo(std::move(ABIInfo)) {}
LangAS getASTAllocaAddressSpace() const override {
return getLangASFromTargetAS(
getABIInfo().getDataLayout().getAllocaAddrSpace());
}
unsigned getOpenCLKernelCallingConv() const override;
};
class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo {
public:
SPIRVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
: CommonSPIRTargetCodeGenInfo(std::make_unique<SPIRVABIInfo>(CGT)) {}
void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
};
} // End anonymous namespace.
void CommonSPIRABIInfo::setCCs() {
assert(getRuntimeCC() == llvm::CallingConv::C);
RuntimeCC = llvm::CallingConv::SPIR_FUNC;
}
ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
if (getContext().getLangOpts().CUDAIsDevice) {
// Coerce pointer arguments with default address space to CrossWorkGroup
// pointers for HIPSPV/CUDASPV. When the language mode is HIP/CUDA, the
// SPIRTargetInfo maps cuda_device to SPIR-V's CrossWorkGroup address space.
llvm::Type *LTy = CGT.ConvertType(Ty);
auto DefaultAS = getContext().getTargetAddressSpace(LangAS::Default);
auto GlobalAS = getContext().getTargetAddressSpace(LangAS::cuda_device);
auto *PtrTy = llvm::dyn_cast<llvm::PointerType>(LTy);
if (PtrTy && PtrTy->getAddressSpace() == DefaultAS) {
LTy = llvm::PointerType::getWithSamePointeeType(PtrTy, GlobalAS);
return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
}
// Force copying aggregate type in kernel arguments by value when
// compiling CUDA targeting SPIR-V. This is required for the object
// copied to be valid on the device.
// This behavior follows the CUDA spec
// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing,
// and matches the NVPTX implementation.
if (isAggregateTypeForABI(Ty))
return getNaturalAlignIndirect(Ty, /* byval */ true);
}
return classifyArgumentType(Ty);
}
void SPIRVABIInfo::computeInfo(CGFunctionInfo &FI) const {
// The logic is same as in DefaultABIInfo with an exception on the kernel
// arguments handling.
llvm::CallingConv::ID CC = FI.getCallingConvention();
if (!getCXXABI().classifyReturnType(FI))
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
for (auto &I : FI.arguments()) {
if (CC == llvm::CallingConv::SPIR_KERNEL) {
I.info = classifyKernelArgumentType(I.type);
} else {
I.info = classifyArgumentType(I.type);
}
}
}
namespace clang {
namespace CodeGen {
void computeSPIRKernelABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI) {
if (CGM.getTarget().getTriple().isSPIRV())
SPIRVABIInfo(CGM.getTypes()).computeInfo(FI);
else
CommonSPIRABIInfo(CGM.getTypes()).computeInfo(FI);
}
}
}
unsigned CommonSPIRTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
return llvm::CallingConv::SPIR_KERNEL;
}
void SPIRVTargetCodeGenInfo::setCUDAKernelCallingConvention(
const FunctionType *&FT) const {
// Convert HIP kernels to SPIR-V kernels.
if (getABIInfo().getContext().getLangOpts().HIP) {
FT = getABIInfo().getContext().adjustFunctionType(
FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel));
return;
}
}
static bool appendType(SmallStringEnc &Enc, QualType QType,
const CodeGen::CodeGenModule &CGM,
TypeStringCache &TSC);
/// Helper function for appendRecordType().
/// Builds a SmallVector containing the encoded field types in declaration
/// order.
static bool extractFieldType(SmallVectorImpl<FieldEncoding> &FE,
const RecordDecl *RD,
const CodeGen::CodeGenModule &CGM,
TypeStringCache &TSC) {
for (const auto *Field : RD->fields()) {
SmallStringEnc Enc;
Enc += "m(";
Enc += Field->getName();
Enc += "){";
if (Field->isBitField()) {
Enc += "b(";
llvm::raw_svector_ostream OS(Enc);
OS << Field->getBitWidthValue(CGM.getContext());
Enc += ':';
}
if (!appendType(Enc, Field->getType(), CGM, TSC))
return false;
if (Field->isBitField())
Enc += ')';
Enc += '}';
FE.emplace_back(!Field->getName().empty(), Enc);
}
return true;
}
/// Appends structure and union types to Enc and adds encoding to cache.
/// Recursively calls appendType (via extractFieldType) for each field.
/// Union types have their fields ordered according to the ABI.
static bool appendRecordType(SmallStringEnc &Enc, const RecordType *RT,
const CodeGen::CodeGenModule &CGM,
TypeStringCache &TSC, const IdentifierInfo *ID) {
// Append the cached TypeString if we have one.
StringRef TypeString = TSC.lookupStr(ID);
if (!TypeString.empty()) {
Enc += TypeString;
return true;
}
// Start to emit an incomplete TypeString.
size_t Start = Enc.size();
Enc += (RT->isUnionType()? 'u' : 's');
Enc += '(';
if (ID)
Enc += ID->getName();
Enc += "){";
// We collect all encoded fields and order as necessary.
bool IsRecursive = false;
const RecordDecl *RD = RT->getDecl()->getDefinition();
if (RD && !RD->field_empty()) {
// An incomplete TypeString stub is placed in the cache for this RecordType
// so that recursive calls to this RecordType will use it whilst building a
// complete TypeString for this RecordType.
SmallVector<FieldEncoding, 16> FE;
std::string StubEnc(Enc.substr(Start).str());
StubEnc += '}'; // StubEnc now holds a valid incomplete TypeString.
TSC.addIncomplete(ID, std::move(StubEnc));
if (!extractFieldType(FE, RD, CGM, TSC)) {
(void) TSC.removeIncomplete(ID);
return false;
}
IsRecursive = TSC.removeIncomplete(ID);
// The ABI requires unions to be sorted but not structures.
// See FieldEncoding::operator< for sort algorithm.
if (RT->isUnionType())
llvm::sort(FE);
// We can now complete the TypeString.
unsigned E = FE.size();
for (unsigned I = 0; I != E; ++I) {
if (I)
Enc += ',';
Enc += FE[I].str();
}
}
Enc += '}';
TSC.addIfComplete(ID, Enc.substr(Start), IsRecursive);
return true;
}
/// Appends enum types to Enc and adds the encoding to the cache.
static bool appendEnumType(SmallStringEnc &Enc, const EnumType *ET,
TypeStringCache &TSC,
const IdentifierInfo *ID) {
// Append the cached TypeString if we have one.
StringRef TypeString = TSC.lookupStr(ID);
if (!TypeString.empty()) {
Enc += TypeString;
return true;
}
size_t Start = Enc.size();
Enc += "e(";
if (ID)
Enc += ID->getName();
Enc += "){";
// We collect all encoded enumerations and order them alphanumerically.
if (const EnumDecl *ED = ET->getDecl()->getDefinition()) {
SmallVector<FieldEncoding, 16> FE;
for (auto I = ED->enumerator_begin(), E = ED->enumerator_end(); I != E;
++I) {
SmallStringEnc EnumEnc;
EnumEnc += "m(";
EnumEnc += I->getName();
EnumEnc += "){";
I->getInitVal().toString(EnumEnc);
EnumEnc += '}';
FE.push_back(FieldEncoding(!I->getName().empty(), EnumEnc));
}
llvm::sort(FE);
unsigned E = FE.size();
for (unsigned I = 0; I != E; ++I) {
if (I)
Enc += ',';
Enc += FE[I].str();
}
}
Enc += '}';
TSC.addIfComplete(ID, Enc.substr(Start), false);
return true;
}
/// Appends type's qualifier to Enc.
/// This is done prior to appending the type's encoding.
static void appendQualifier(SmallStringEnc &Enc, QualType QT) {
// Qualifiers are emitted in alphabetical order.
static const char *const Table[]={"","c:","r:","cr:","v:","cv:","rv:","crv:"};
int Lookup = 0;
if (QT.isConstQualified())
Lookup += 1<<0;
if (QT.isRestrictQualified())
Lookup += 1<<1;
if (QT.isVolatileQualified())
Lookup += 1<<2;
Enc += Table[Lookup];
}
/// Appends built-in types to Enc.
static bool appendBuiltinType(SmallStringEnc &Enc, const BuiltinType *BT) {
const char *EncType;
switch (BT->getKind()) {
case BuiltinType::Void:
EncType = "0";
break;
case BuiltinType::Bool:
EncType = "b";
break;
case BuiltinType::Char_U:
EncType = "uc";
break;
case BuiltinType::UChar:
EncType = "uc";
break;
case BuiltinType::SChar:
EncType = "sc";
break;
case BuiltinType::UShort:
EncType = "us";
break;
case BuiltinType::Short:
EncType = "ss";
break;
case BuiltinType::UInt:
EncType = "ui";
break;
case BuiltinType::Int:
EncType = "si";
break;
case BuiltinType::ULong:
EncType = "ul";
break;
case BuiltinType::Long:
EncType = "sl";
break;
case BuiltinType::ULongLong:
EncType = "ull";
break;
case BuiltinType::LongLong:
EncType = "sll";
break;
case BuiltinType::Float:
EncType = "ft";
break;
case BuiltinType::Double:
EncType = "d";
break;
case BuiltinType::LongDouble:
EncType = "ld";
break;
default:
return false;
}
Enc += EncType;
return true;
}
/// Appends a pointer encoding to Enc before calling appendType for the pointee.
static bool appendPointerType(SmallStringEnc &Enc, const PointerType *PT,
const CodeGen::CodeGenModule &CGM,
TypeStringCache &TSC) {
Enc += "p(";
if (!appendType(Enc, PT->getPointeeType(), CGM, TSC))
return false;
Enc += ')';
return true;
}
/// Appends array encoding to Enc before calling appendType for the element.
static bool appendArrayType(SmallStringEnc &Enc, QualType QT,
const ArrayType *AT,
const CodeGen::CodeGenModule &CGM,
TypeStringCache &TSC, StringRef NoSizeEnc) {
if (AT->getSizeModifier() != ArrayType::Normal)
return false;
Enc += "a(";
if (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(AT))
CAT->getSize().toStringUnsigned(Enc);
else
Enc += NoSizeEnc; // Global arrays use "*", otherwise it is "".
Enc += ':';
// The Qualifiers should be attached to the type rather than the array.
appendQualifier(Enc, QT);
if (!appendType(Enc, AT->getElementType(), CGM, TSC))
return false;
Enc += ')';
return true;
}
/// Appends a function encoding to Enc, calling appendType for the return type
/// and the arguments.
static bool appendFunctionType(SmallStringEnc &Enc, const FunctionType *FT,
const CodeGen::CodeGenModule &CGM,
TypeStringCache &TSC) {
Enc += "f{";
if (!appendType(Enc, FT->getReturnType(), CGM, TSC))
return false;
Enc += "}(";
if (const FunctionProtoType *FPT = FT->getAs<FunctionProtoType>()) {
// N.B. we are only interested in the adjusted param types.
auto I = FPT->param_type_begin();
auto E = FPT->param_type_end();
if (I != E) {
do {
if (!appendType(Enc, *I, CGM, TSC))
return false;
++I;
if (I != E)
Enc += ',';
} while (I != E);
if (FPT->isVariadic())
Enc += ",va";
} else {
if (FPT->isVariadic())
Enc += "va";
else
Enc += '0';
}
}
Enc += ')';
return true;
}
/// Handles the type's qualifier before dispatching a call to handle specific
/// type encodings.
static bool appendType(SmallStringEnc &Enc, QualType QType,
const CodeGen::CodeGenModule &CGM,
TypeStringCache &TSC) {
QualType QT = QType.getCanonicalType();
if (const ArrayType *AT = QT->getAsArrayTypeUnsafe())
// The Qualifiers should be attached to the type rather than the array.
// Thus we don't call appendQualifier() here.
return appendArrayType(Enc, QT, AT, CGM, TSC, "");
appendQualifier(Enc, QT);
if (const BuiltinType *BT = QT->getAs<BuiltinType>())
return appendBuiltinType(Enc, BT);
if (const PointerType *PT = QT->getAs<PointerType>())
return appendPointerType(Enc, PT, CGM, TSC);
if (const EnumType *ET = QT->getAs<EnumType>())
return appendEnumType(Enc, ET, TSC, QT.getBaseTypeIdentifier());
if (const RecordType *RT = QT->getAsStructureType())
return appendRecordType(Enc, RT, CGM, TSC, QT.getBaseTypeIdentifier());
if (const RecordType *RT = QT->getAsUnionType())
return appendRecordType(Enc, RT, CGM, TSC, QT.getBaseTypeIdentifier());
if (const FunctionType *FT = QT->getAs<FunctionType>())
return appendFunctionType(Enc, FT, CGM, TSC);
return false;
}
static bool getTypeString(SmallStringEnc &Enc, const Decl *D,
const CodeGen::CodeGenModule &CGM,
TypeStringCache &TSC) {
if (!D)
return false;
if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
if (FD->getLanguageLinkage() != CLanguageLinkage)
return false;
return appendType(Enc, FD->getType(), CGM, TSC);
}
if (const VarDecl *VD = dyn_cast<VarDecl>(D)) {
if (VD->getLanguageLinkage() != CLanguageLinkage)
return false;
QualType QT = VD->getType().getCanonicalType();
if (const ArrayType *AT = QT->getAsArrayTypeUnsafe()) {
// Global ArrayTypes are given a size of '*' if the size is unknown.
// The Qualifiers should be attached to the type rather than the array.
// Thus we don't call appendQualifier() here.
return appendArrayType(Enc, QT, AT, CGM, TSC, "*");
}
return appendType(Enc, QT, CGM, TSC);
}
return false;
}
//===----------------------------------------------------------------------===//
// RISCV ABI Implementation
//===----------------------------------------------------------------------===//
namespace {
class RISCVABIInfo : public DefaultABIInfo {
private:
// Size of the integer ('x') registers in bits.
unsigned XLen;
// Size of the floating point ('f') registers in bits. Note that the target
// ISA might have a wider FLen than the selected ABI (e.g. an RV32IF target
// with soft float ABI has FLen==0).
unsigned FLen;
static const int NumArgGPRs = 8;
static const int NumArgFPRs = 8;
bool detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff,
llvm::Type *&Field1Ty,
CharUnits &Field1Off,
llvm::Type *&Field2Ty,
CharUnits &Field2Off) const;
public:
RISCVABIInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen, unsigned FLen)
: DefaultABIInfo(CGT), XLen(XLen), FLen(FLen) {}
// DefaultABIInfo's classifyReturnType and classifyArgumentType are
// non-virtual, but computeInfo is virtual, so we overload it.
void computeInfo(CGFunctionInfo &FI) const override;
ABIArgInfo classifyArgumentType(QualType Ty, bool IsFixed, int &ArgGPRsLeft,
int &ArgFPRsLeft) const;
ABIArgInfo classifyReturnType(QualType RetTy) const;
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override;
ABIArgInfo extendType(QualType Ty) const;
bool detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty,
CharUnits &Field1Off, llvm::Type *&Field2Ty,
CharUnits &Field2Off, int &NeededArgGPRs,
int &NeededArgFPRs) const;
ABIArgInfo coerceAndExpandFPCCEligibleStruct(llvm::Type *Field1Ty,
CharUnits Field1Off,
llvm::Type *Field2Ty,
CharUnits Field2Off) const;
};
} // end anonymous namespace
void RISCVABIInfo::computeInfo(CGFunctionInfo &FI) const {
QualType RetTy = FI.getReturnType();
if (!getCXXABI().classifyReturnType(FI))
FI.getReturnInfo() = classifyReturnType(RetTy);
// IsRetIndirect is true if classifyArgumentType indicated the value should
// be passed indirect, or if the type size is a scalar greater than 2*XLen
// and not a complex type with elements <= FLen. e.g. fp128 is passed direct
// in LLVM IR, relying on the backend lowering code to rewrite the argument
// list and pass indirectly on RV32.
bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect;
if (!IsRetIndirect && RetTy->isScalarType() &&
getContext().getTypeSize(RetTy) > (2 * XLen)) {
if (RetTy->isComplexType() && FLen) {
QualType EltTy = RetTy->castAs<ComplexType>()->getElementType();
IsRetIndirect = getContext().getTypeSize(EltTy) > FLen;
} else {
// This is a normal scalar > 2*XLen, such as fp128 on RV32.
IsRetIndirect = true;
}
}
// We must track the number of GPRs used in order to conform to the RISC-V
// ABI, as integer scalars passed in registers should have signext/zeroext
// when promoted, but are anyext if passed on the stack. As GPR usage is
// different for variadic arguments, we must also track whether we are
// examining a vararg or not.
int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs;
int ArgFPRsLeft = FLen ? NumArgFPRs : 0;
int NumFixedArgs = FI.getNumRequiredArgs();
int ArgNum = 0;
for (auto &ArgInfo : FI.arguments()) {
bool IsFixed = ArgNum < NumFixedArgs;
ArgInfo.info =
classifyArgumentType(ArgInfo.type, IsFixed, ArgGPRsLeft, ArgFPRsLeft);
ArgNum++;
}
}
// Returns true if the struct is a potential candidate for the floating point
// calling convention. If this function returns true, the caller is
// responsible for checking that if there is only a single field then that
// field is a float.
bool RISCVABIInfo::detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff,
llvm::Type *&Field1Ty,
CharUnits &Field1Off,
llvm::Type *&Field2Ty,
CharUnits &Field2Off) const {
bool IsInt = Ty->isIntegralOrEnumerationType();
bool IsFloat = Ty->isRealFloatingType();
if (IsInt || IsFloat) {
uint64_t Size = getContext().getTypeSize(Ty);
if (IsInt && Size > XLen)
return false;
// Can't be eligible if larger than the FP registers. Half precision isn't
// currently supported on RISC-V and the ABI hasn't been confirmed, so
// default to the integer ABI in that case.
if (IsFloat && (Size > FLen || Size < 32))
return false;
// Can't be eligible if an integer type was already found (int+int pairs
// are not eligible).
if (IsInt && Field1Ty && Field1Ty->isIntegerTy())
return false;
if (!Field1Ty) {
Field1Ty = CGT.ConvertType(Ty);
Field1Off = CurOff;
return true;
}
if (!Field2Ty) {
Field2Ty = CGT.ConvertType(Ty);
Field2Off = CurOff;
return true;
}
return false;
}
if (auto CTy = Ty->getAs<ComplexType>()) {
if (Field1Ty)
return false;
QualType EltTy = CTy->getElementType();
if (getContext().getTypeSize(EltTy) > FLen)
return false;
Field1Ty = CGT.ConvertType(EltTy);
Field1Off = CurOff;
Field2Ty = Field1Ty;
Field2Off = Field1Off + getContext().getTypeSizeInChars(EltTy);
return true;
}
if (const ConstantArrayType *ATy = getContext().getAsConstantArrayType(Ty)) {
uint64_t ArraySize = ATy->getSize().getZExtValue();
QualType EltTy = ATy->getElementType();
CharUnits EltSize = getContext().getTypeSizeInChars(EltTy);
for (uint64_t i = 0; i < ArraySize; ++i) {
bool Ret = detectFPCCEligibleStructHelper(EltTy, CurOff, Field1Ty,
Field1Off, Field2Ty, Field2Off);
if (!Ret)
return false;
CurOff += EltSize;
}
return true;
}
if (const auto *RTy = Ty->getAs<RecordType>()) {
// Structures with either a non-trivial destructor or a non-trivial
// copy constructor are not eligible for the FP calling convention.
if (getRecordArgABI(Ty, CGT.getCXXABI()))
return false;
if (isEmptyRecord(getContext(), Ty, true))
return true;
const RecordDecl *RD = RTy->getDecl();
// Unions aren't eligible unless they're empty (which is caught above).
if (RD->isUnion())
return false;
const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
// If this is a C++ record, check the bases first.
if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
for (const CXXBaseSpecifier &B : CXXRD->bases()) {
const auto *BDecl =
cast<CXXRecordDecl>(B.getType()->castAs<RecordType>()->getDecl());
CharUnits BaseOff = Layout.getBaseClassOffset(BDecl);
bool Ret = detectFPCCEligibleStructHelper(B.getType(), CurOff + BaseOff,
Field1Ty, Field1Off, Field2Ty,
Field2Off);
if (!Ret)
return false;
}
}
int ZeroWidthBitFieldCount = 0;
for (const FieldDecl *FD : RD->fields()) {
uint64_t FieldOffInBits = Layout.getFieldOffset(FD->getFieldIndex());
QualType QTy = FD->getType();
if (FD->isBitField()) {
unsigned BitWidth = FD->getBitWidthValue(getContext());
// Allow a bitfield with a type greater than XLen as long as the
// bitwidth is XLen or less.
if (getContext().getTypeSize(QTy) > XLen && BitWidth <= XLen)
QTy = getContext().getIntTypeForBitwidth(XLen, false);
if (BitWidth == 0) {
ZeroWidthBitFieldCount++;
continue;
}
}
bool Ret = detectFPCCEligibleStructHelper(
QTy, CurOff + getContext().toCharUnitsFromBits(FieldOffInBits),
Field1Ty, Field1Off, Field2Ty, Field2Off);
if (!Ret)
return false;
// As a quirk of the ABI, zero-width bitfields aren't ignored for fp+fp
// or int+fp structs, but are ignored for a struct with an fp field and
// any number of zero-width bitfields.
if (Field2Ty && ZeroWidthBitFieldCount > 0)
return false;
}
return Field1Ty != nullptr;
}
return false;
}
// Determine if a struct is eligible for passing according to the floating
// point calling convention (i.e., when flattened it contains a single fp
// value, fp+fp, or int+fp of appropriate size). If so, NeededArgFPRs and
// NeededArgGPRs are incremented appropriately.
bool RISCVABIInfo::detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty,
CharUnits &Field1Off,
llvm::Type *&Field2Ty,
CharUnits &Field2Off,
int &NeededArgGPRs,
int &NeededArgFPRs) const {
Field1Ty = nullptr;
Field2Ty = nullptr;
NeededArgGPRs = 0;
NeededArgFPRs = 0;
bool IsCandidate = detectFPCCEligibleStructHelper(
Ty, CharUnits::Zero(), Field1Ty, Field1Off, Field2Ty, Field2Off);
// Not really a candidate if we have a single int but no float.
if (Field1Ty && !Field2Ty && !Field1Ty->isFloatingPointTy())
return false;
if (!IsCandidate)
return false;
if (Field1Ty && Field1Ty->isFloatingPointTy())
NeededArgFPRs++;
else if (Field1Ty)
NeededArgGPRs++;
if (Field2Ty && Field2Ty->isFloatingPointTy())
NeededArgFPRs++;
else if (Field2Ty)
NeededArgGPRs++;
return true;
}
// Call getCoerceAndExpand for the two-element flattened struct described by
// Field1Ty, Field1Off, Field2Ty, Field2Off. This method will create an
// appropriate coerceToType and unpaddedCoerceToType.
ABIArgInfo RISCVABIInfo::coerceAndExpandFPCCEligibleStruct(
llvm::Type *Field1Ty, CharUnits Field1Off, llvm::Type *Field2Ty,
CharUnits Field2Off) const {
SmallVector<llvm::Type *, 3> CoerceElts;
SmallVector<llvm::Type *, 2> UnpaddedCoerceElts;
if (!Field1Off.isZero())
CoerceElts.push_back(llvm::ArrayType::get(
llvm::Type::getInt8Ty(getVMContext()), Field1Off.getQuantity()));
CoerceElts.push_back(Field1Ty);
UnpaddedCoerceElts.push_back(Field1Ty);
if (!Field2Ty) {
return ABIArgInfo::getCoerceAndExpand(
llvm::StructType::get(getVMContext(), CoerceElts, !Field1Off.isZero()),
UnpaddedCoerceElts[0]);
}
CharUnits Field2Align =
CharUnits::fromQuantity(getDataLayout().getABITypeAlignment(Field2Ty));
CharUnits Field1End = Field1Off +
CharUnits::fromQuantity(getDataLayout().getTypeStoreSize(Field1Ty));
CharUnits Field2OffNoPadNoPack = Field1End.alignTo(Field2Align);
CharUnits Padding = CharUnits::Zero();
if (Field2Off > Field2OffNoPadNoPack)
Padding = Field2Off - Field2OffNoPadNoPack;
else if (Field2Off != Field2Align && Field2Off > Field1End)
Padding = Field2Off - Field1End;
bool IsPacked = !Field2Off.isMultipleOf(Field2Align);
if (!Padding.isZero())
CoerceElts.push_back(llvm::ArrayType::get(
llvm::Type::getInt8Ty(getVMContext()), Padding.getQuantity()));
CoerceElts.push_back(Field2Ty);
UnpaddedCoerceElts.push_back(Field2Ty);
auto CoerceToType =
llvm::StructType::get(getVMContext(), CoerceElts, IsPacked);
auto UnpaddedCoerceToType =
llvm::StructType::get(getVMContext(), UnpaddedCoerceElts, IsPacked);
return ABIArgInfo::getCoerceAndExpand(CoerceToType, UnpaddedCoerceToType);
}
ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed,
int &ArgGPRsLeft,
int &ArgFPRsLeft) const {
assert(ArgGPRsLeft <= NumArgGPRs && "Arg GPR tracking underflow");
Ty = useFirstFieldIfTransparentUnion(Ty);
// Structures with either a non-trivial destructor or a non-trivial
// copy constructor are always passed indirectly.
if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
if (ArgGPRsLeft)
ArgGPRsLeft -= 1;
return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA ==
CGCXXABI::RAA_DirectInMemory);
}
// Ignore empty structs/unions.
if (isEmptyRecord(getContext(), Ty, true))
return ABIArgInfo::getIgnore();
uint64_t Size = getContext().getTypeSize(Ty);
// Pass floating point values via FPRs if possible.
if (IsFixed && Ty->isFloatingType() && !Ty->isComplexType() &&
FLen >= Size && ArgFPRsLeft) {
ArgFPRsLeft--;
return ABIArgInfo::getDirect();
}
// Complex types for the hard float ABI must be passed direct rather than
// using CoerceAndExpand.
if (IsFixed && Ty->isComplexType() && FLen && ArgFPRsLeft >= 2) {
QualType EltTy = Ty->castAs<ComplexType>()->getElementType();
if (getContext().getTypeSize(EltTy) <= FLen) {
ArgFPRsLeft -= 2;
return ABIArgInfo::getDirect();
}
}
if (IsFixed && FLen && Ty->isStructureOrClassType()) {
llvm::Type *Field1Ty = nullptr;
llvm::Type *Field2Ty = nullptr;
CharUnits Field1Off = CharUnits::Zero();
CharUnits Field2Off = CharUnits::Zero();
int NeededArgGPRs = 0;
int NeededArgFPRs = 0;
bool IsCandidate =
detectFPCCEligibleStruct(Ty, Field1Ty, Field1Off, Field2Ty, Field2Off,
NeededArgGPRs, NeededArgFPRs);
if (IsCandidate && NeededArgGPRs <= ArgGPRsLeft &&
NeededArgFPRs <= ArgFPRsLeft) {
ArgGPRsLeft -= NeededArgGPRs;
ArgFPRsLeft -= NeededArgFPRs;
return coerceAndExpandFPCCEligibleStruct(Field1Ty, Field1Off, Field2Ty,
Field2Off);
}
}
uint64_t NeededAlign = getContext().getTypeAlign(Ty);
bool MustUseStack = false;
// Determine the number of GPRs needed to pass the current argument
// according to the ABI. 2*XLen-aligned varargs are passed in "aligned"
// register pairs, so may consume 3 registers.
int NeededArgGPRs = 1;
if (!IsFixed && NeededAlign == 2 * XLen)
NeededArgGPRs = 2 + (ArgGPRsLeft % 2);
else if (Size > XLen && Size <= 2 * XLen)
NeededArgGPRs = 2;
if (NeededArgGPRs > ArgGPRsLeft) {
MustUseStack = true;
NeededArgGPRs = ArgGPRsLeft;
}
ArgGPRsLeft -= NeededArgGPRs;
if (!isAggregateTypeForABI(Ty) && !Ty->isVectorType()) {
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
Ty = EnumTy->getDecl()->getIntegerType();
// All integral types are promoted to XLen width, unless passed on the
// stack.
if (Size < XLen && Ty->isIntegralOrEnumerationType() && !MustUseStack) {
return extendType(Ty);
}
if (const auto *EIT = Ty->getAs<BitIntType>()) {
if (EIT->getNumBits() < XLen && !MustUseStack)
return extendType(Ty);
if (EIT->getNumBits() > 128 ||
(!getContext().getTargetInfo().hasInt128Type() &&
EIT->getNumBits() > 64))
return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
}
return ABIArgInfo::getDirect();
}
// Aggregates which are <= 2*XLen will be passed in registers if possible,
// so coerce to integers.
if (Size <= 2 * XLen) {
unsigned Alignment = getContext().getTypeAlign(Ty);
// Use a single XLen int if possible, 2*XLen if 2*XLen alignment is
// required, and a 2-element XLen array if only XLen alignment is required.
if (Size <= XLen) {
return ABIArgInfo::getDirect(
llvm::IntegerType::get(getVMContext(), XLen));
} else if (Alignment == 2 * XLen) {
return ABIArgInfo::getDirect(
llvm::IntegerType::get(getVMContext(), 2 * XLen));
} else {
return ABIArgInfo::getDirect(llvm::ArrayType::get(
llvm::IntegerType::get(getVMContext(), XLen), 2));
}
}
return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
}
ABIArgInfo RISCVABIInfo::classifyReturnType(QualType RetTy) const {
if (RetTy->isVoidType())
return ABIArgInfo::getIgnore();
int ArgGPRsLeft = 2;
int ArgFPRsLeft = FLen ? 2 : 0;
// The rules for return and argument types are the same, so defer to
// classifyArgumentType.
return classifyArgumentType(RetTy, /*IsFixed=*/true, ArgGPRsLeft,
ArgFPRsLeft);
}
Address RISCVABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const {
CharUnits SlotSize = CharUnits::fromQuantity(XLen / 8);
// Empty records are ignored for parameter passing purposes.
if (isEmptyRecord(getContext(), Ty, true)) {
Address Addr = Address(CGF.Builder.CreateLoad(VAListAddr),
getVAListElementType(CGF), SlotSize);
Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty));
return Addr;
}
auto TInfo = getContext().getTypeInfoInChars(Ty);
// Arguments bigger than 2*Xlen bytes are passed indirectly.
bool IsIndirect = TInfo.Width > 2 * SlotSize;
return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TInfo,
SlotSize, /*AllowHigherAlign=*/true);
}
ABIArgInfo RISCVABIInfo::extendType(QualType Ty) const {
int TySize = getContext().getTypeSize(Ty);
// RV64 ABI requires unsigned 32 bit integers to be sign extended.
if (XLen == 64 && Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32)
return ABIArgInfo::getSignExtend(Ty);
return ABIArgInfo::getExtend(Ty);
}
namespace {
class RISCVTargetCodeGenInfo : public TargetCodeGenInfo {
public:
RISCVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen,
unsigned FLen)
: TargetCodeGenInfo(std::make_unique<RISCVABIInfo>(CGT, XLen, FLen)) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &CGM) const override {
const auto *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD) return;
const auto *Attr = FD->getAttr<RISCVInterruptAttr>();
if (!Attr)
return;
const char *Kind;
switch (Attr->getInterrupt()) {
case RISCVInterruptAttr::user: Kind = "user"; break;
case RISCVInterruptAttr::supervisor: Kind = "supervisor"; break;
case RISCVInterruptAttr::machine: Kind = "machine"; break;
}
auto *Fn = cast<llvm::Function>(GV);
Fn->addFnAttr("interrupt", Kind);
}
};
} // namespace
//===----------------------------------------------------------------------===//
// VE ABI Implementation.
//
namespace {
class VEABIInfo : public DefaultABIInfo {
public:
VEABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
private:
ABIArgInfo classifyReturnType(QualType RetTy) const;
ABIArgInfo classifyArgumentType(QualType RetTy) const;
void computeInfo(CGFunctionInfo &FI) const override;
};
} // end anonymous namespace
ABIArgInfo VEABIInfo::classifyReturnType(QualType Ty) const {
if (Ty->isAnyComplexType())
return ABIArgInfo::getDirect();
uint64_t Size = getContext().getTypeSize(Ty);
if (Size < 64 && Ty->isIntegerType())
return ABIArgInfo::getExtend(Ty);
return DefaultABIInfo::classifyReturnType(Ty);
}
ABIArgInfo VEABIInfo::classifyArgumentType(QualType Ty) const {
if (Ty->isAnyComplexType())
return ABIArgInfo::getDirect();
uint64_t Size = getContext().getTypeSize(Ty);
if (Size < 64 && Ty->isIntegerType())
return ABIArgInfo::getExtend(Ty);
return DefaultABIInfo::classifyArgumentType(Ty);
}
void VEABIInfo::computeInfo(CGFunctionInfo &FI) const {
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
for (auto &Arg : FI.arguments())
Arg.info = classifyArgumentType(Arg.type);
}
namespace {
class VETargetCodeGenInfo : public TargetCodeGenInfo {
public:
VETargetCodeGenInfo(CodeGenTypes &CGT)
: TargetCodeGenInfo(std::make_unique<VEABIInfo>(CGT)) {}
// VE ABI requires the arguments of variadic and prototype-less functions
// are passed in both registers and memory.
bool isNoProtoCallVariadic(const CallArgList &args,
const FunctionNoProtoType *fnType) const override {
return true;
}
};
} // end anonymous namespace
//===----------------------------------------------------------------------===//
// CSKY ABI Implementation
//===----------------------------------------------------------------------===//
namespace {
class CSKYABIInfo : public DefaultABIInfo {
static const int NumArgGPRs = 4;
static const int NumArgFPRs = 4;
static const unsigned XLen = 32;
unsigned FLen;
public:
CSKYABIInfo(CodeGen::CodeGenTypes &CGT, unsigned FLen)
: DefaultABIInfo(CGT), FLen(FLen) {}
void computeInfo(CGFunctionInfo &FI) const override;
ABIArgInfo classifyArgumentType(QualType Ty, int &ArgGPRsLeft,
int &ArgFPRsLeft,
bool isReturnType = false) const;
ABIArgInfo classifyReturnType(QualType RetTy) const;
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override;
};
} // end anonymous namespace
void CSKYABIInfo::computeInfo(CGFunctionInfo &FI) const {
QualType RetTy = FI.getReturnType();
if (!getCXXABI().classifyReturnType(FI))
FI.getReturnInfo() = classifyReturnType(RetTy);
bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect;
// We must track the number of GPRs used in order to conform to the CSKY
// ABI, as integer scalars passed in registers should have signext/zeroext
// when promoted.
int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs;
int ArgFPRsLeft = FLen ? NumArgFPRs : 0;
for (auto &ArgInfo : FI.arguments()) {
ArgInfo.info = classifyArgumentType(ArgInfo.type, ArgGPRsLeft, ArgFPRsLeft);
}
}
Address CSKYABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const {
CharUnits SlotSize = CharUnits::fromQuantity(XLen / 8);
// Empty records are ignored for parameter passing purposes.
if (isEmptyRecord(getContext(), Ty, true)) {
Address Addr = Address(CGF.Builder.CreateLoad(VAListAddr),
getVAListElementType(CGF), SlotSize);
Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty));
return Addr;
}
auto TInfo = getContext().getTypeInfoInChars(Ty);
return emitVoidPtrVAArg(CGF, VAListAddr, Ty, false, TInfo, SlotSize,
/*AllowHigherAlign=*/true);
}
ABIArgInfo CSKYABIInfo::classifyArgumentType(QualType Ty, int &ArgGPRsLeft,
int &ArgFPRsLeft,
bool isReturnType) const {
assert(ArgGPRsLeft <= NumArgGPRs && "Arg GPR tracking underflow");
Ty = useFirstFieldIfTransparentUnion(Ty);
// Structures with either a non-trivial destructor or a non-trivial
// copy constructor are always passed indirectly.
if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
if (ArgGPRsLeft)
ArgGPRsLeft -= 1;
return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA ==
CGCXXABI::RAA_DirectInMemory);
}
// Ignore empty structs/unions.
if (isEmptyRecord(getContext(), Ty, true))
return ABIArgInfo::getIgnore();
if (!Ty->getAsUnionType())
if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
uint64_t Size = getContext().getTypeSize(Ty);
// Pass floating point values via FPRs if possible.
if (Ty->isFloatingType() && !Ty->isComplexType() && FLen >= Size &&
ArgFPRsLeft) {
ArgFPRsLeft--;
return ABIArgInfo::getDirect();
}
// Complex types for the hard float ABI must be passed direct rather than
// using CoerceAndExpand.
if (Ty->isComplexType() && FLen && !isReturnType) {
QualType EltTy = Ty->castAs<ComplexType>()->getElementType();
if (getContext().getTypeSize(EltTy) <= FLen) {
ArgFPRsLeft -= 2;
return ABIArgInfo::getDirect();
}
}
if (!isAggregateTypeForABI(Ty)) {
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
Ty = EnumTy->getDecl()->getIntegerType();
// All integral types are promoted to XLen width, unless passed on the
// stack.
if (Size < XLen && Ty->isIntegralOrEnumerationType())
return ABIArgInfo::getExtend(Ty);
if (const auto *EIT = Ty->getAs<BitIntType>()) {
if (EIT->getNumBits() < XLen)
return ABIArgInfo::getExtend(Ty);
}
return ABIArgInfo::getDirect();
}
// For argument type, the first 4*XLen parts of aggregate will be passed
// in registers, and the rest will be passed in stack.
// So we can coerce to integers directly and let backend handle it correctly.
// For return type, aggregate which <= 2*XLen will be returned in registers.
// Otherwise, aggregate will be returned indirectly.
if (!isReturnType || (isReturnType && Size <= 2 * XLen)) {
if (Size <= XLen) {
return ABIArgInfo::getDirect(
llvm::IntegerType::get(getVMContext(), XLen));
} else {
return ABIArgInfo::getDirect(llvm::ArrayType::get(
llvm::IntegerType::get(getVMContext(), XLen), (Size + 31) / XLen));
}
}
return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
}
ABIArgInfo CSKYABIInfo::classifyReturnType(QualType RetTy) const {
if (RetTy->isVoidType())
return ABIArgInfo::getIgnore();
int ArgGPRsLeft = 2;
int ArgFPRsLeft = FLen ? 1 : 0;
// The rules for return and argument types are the same, so defer to
// classifyArgumentType.
return classifyArgumentType(RetTy, ArgGPRsLeft, ArgFPRsLeft, true);
}
namespace {
class CSKYTargetCodeGenInfo : public TargetCodeGenInfo {
public:
CSKYTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned FLen)
: TargetCodeGenInfo(std::make_unique<CSKYABIInfo>(CGT, FLen)) {}
};
} // end anonymous namespace
//===----------------------------------------------------------------------===//
// Driver code
//===----------------------------------------------------------------------===//
bool CodeGenModule::supportsCOMDAT() const {
return getTriple().supportsCOMDAT();
}
const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
if (TheTargetCodeGenInfo)
return *TheTargetCodeGenInfo;
// Helper to set the unique_ptr while still keeping the return value.
auto SetCGInfo = [&](TargetCodeGenInfo *P) -> const TargetCodeGenInfo & {
this->TheTargetCodeGenInfo.reset(P);
return *P;
};
const llvm::Triple &Triple = getTarget().getTriple();
switch (Triple.getArch()) {
default:
return SetCGInfo(new DefaultTargetCodeGenInfo(Types));
case llvm::Triple::le32:
return SetCGInfo(new PNaClTargetCodeGenInfo(Types));
case llvm::Triple::m68k:
return SetCGInfo(new M68kTargetCodeGenInfo(Types));
case llvm::Triple::mips:
case llvm::Triple::mipsel:
if (Triple.getOS() == llvm::Triple::NaCl)
return SetCGInfo(new PNaClTargetCodeGenInfo(Types));
return SetCGInfo(new MIPSTargetCodeGenInfo(Types, true));
case llvm::Triple::mips64:
case llvm::Triple::mips64el:
return SetCGInfo(new MIPSTargetCodeGenInfo(Types, false));
case llvm::Triple::avr: {
// For passing parameters, R8~R25 are used on avr, and R18~R25 are used
// on avrtiny. For passing return value, R18~R25 are used on avr, and
// R22~R25 are used on avrtiny.
unsigned NPR = getTarget().getABI() == "avrtiny" ? 6 : 18;
unsigned NRR = getTarget().getABI() == "avrtiny" ? 4 : 8;
return SetCGInfo(new AVRTargetCodeGenInfo(Types, NPR, NRR));
}
case llvm::Triple::aarch64:
case llvm::Triple::aarch64_32:
case llvm::Triple::aarch64_be: {
AArch64ABIInfo::ABIKind Kind = AArch64ABIInfo::AAPCS;
if (getTarget().getABI() == "darwinpcs")
Kind = AArch64ABIInfo::DarwinPCS;
else if (Triple.isOSWindows())
return SetCGInfo(
new WindowsAArch64TargetCodeGenInfo(Types, AArch64ABIInfo::Win64));
return SetCGInfo(new AArch64TargetCodeGenInfo(Types, Kind));
}
case llvm::Triple::wasm32:
case llvm::Triple::wasm64: {
WebAssemblyABIInfo::ABIKind Kind = WebAssemblyABIInfo::MVP;
if (getTarget().getABI() == "experimental-mv")
Kind = WebAssemblyABIInfo::ExperimentalMV;
return SetCGInfo(new WebAssemblyTargetCodeGenInfo(Types, Kind));
}
case llvm::Triple::arm:
case llvm::Triple::armeb:
case llvm::Triple::thumb:
case llvm::Triple::thumbeb: {
if (Triple.getOS() == llvm::Triple::Win32) {
return SetCGInfo(
new WindowsARMTargetCodeGenInfo(Types, ARMABIInfo::AAPCS_VFP));
}
ARMABIInfo::ABIKind Kind = ARMABIInfo::AAPCS;
StringRef ABIStr = getTarget().getABI();
if (ABIStr == "apcs-gnu")
Kind = ARMABIInfo::APCS;
else if (ABIStr == "aapcs16")
Kind = ARMABIInfo::AAPCS16_VFP;
else if (CodeGenOpts.FloatABI == "hard" ||
(CodeGenOpts.FloatABI != "soft" &&
(Triple.getEnvironment() == llvm::Triple::GNUEABIHF ||
Triple.getEnvironment() == llvm::Triple::MuslEABIHF ||
Triple.getEnvironment() == llvm::Triple::EABIHF)))
Kind = ARMABIInfo::AAPCS_VFP;
return SetCGInfo(new ARMTargetCodeGenInfo(Types, Kind));
}
case llvm::Triple::ppc: {
if (Triple.isOSAIX())
return SetCGInfo(new AIXTargetCodeGenInfo(Types, /*Is64Bit*/ false));
bool IsSoftFloat =
CodeGenOpts.FloatABI == "soft" || getTarget().hasFeature("spe");
bool RetSmallStructInRegABI =
PPC32TargetCodeGenInfo::isStructReturnInRegABI(Triple, CodeGenOpts);
return SetCGInfo(
new PPC32TargetCodeGenInfo(Types, IsSoftFloat, RetSmallStructInRegABI));
}
case llvm::Triple::ppcle: {
bool IsSoftFloat = CodeGenOpts.FloatABI == "soft";
bool RetSmallStructInRegABI =
PPC32TargetCodeGenInfo::isStructReturnInRegABI(Triple, CodeGenOpts);
return SetCGInfo(
new PPC32TargetCodeGenInfo(Types, IsSoftFloat, RetSmallStructInRegABI));
}
case llvm::Triple::ppc64:
if (Triple.isOSAIX())
return SetCGInfo(new AIXTargetCodeGenInfo(Types, /*Is64Bit*/ true));
if (Triple.isOSBinFormatELF()) {
PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv1;
if (getTarget().getABI() == "elfv2")
Kind = PPC64_SVR4_ABIInfo::ELFv2;
bool IsSoftFloat = CodeGenOpts.FloatABI == "soft";
return SetCGInfo(
new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, IsSoftFloat));
}
return SetCGInfo(new PPC64TargetCodeGenInfo(Types));
case llvm::Triple::ppc64le: {
assert(Triple.isOSBinFormatELF() && "PPC64 LE non-ELF not supported!");
PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv2;
if (getTarget().getABI() == "elfv1")
Kind = PPC64_SVR4_ABIInfo::ELFv1;
bool IsSoftFloat = CodeGenOpts.FloatABI == "soft";
return SetCGInfo(
new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, IsSoftFloat));
}
case llvm::Triple::nvptx:
case llvm::Triple::nvptx64:
return SetCGInfo(new NVPTXTargetCodeGenInfo(Types));
case llvm::Triple::msp430:
return SetCGInfo(new MSP430TargetCodeGenInfo(Types));
case llvm::Triple::riscv32:
case llvm::Triple::riscv64: {
StringRef ABIStr = getTarget().getABI();
unsigned XLen = getTarget().getPointerWidth(0);
unsigned ABIFLen = 0;
if (ABIStr.endswith("f"))
ABIFLen = 32;
else if (ABIStr.endswith("d"))
ABIFLen = 64;
return SetCGInfo(new RISCVTargetCodeGenInfo(Types, XLen, ABIFLen));
}
case llvm::Triple::systemz: {
bool SoftFloat = CodeGenOpts.FloatABI == "soft";
bool HasVector = !SoftFloat && getTarget().getABI() == "vector";
return SetCGInfo(new SystemZTargetCodeGenInfo(Types, HasVector, SoftFloat));
}
case llvm::Triple::tce:
case llvm::Triple::tcele:
return SetCGInfo(new TCETargetCodeGenInfo(Types));
case llvm::Triple::x86: {
bool IsDarwinVectorABI = Triple.isOSDarwin();
bool RetSmallStructInRegABI =
X86_32TargetCodeGenInfo::isStructReturnInRegABI(Triple, CodeGenOpts);
bool IsWin32FloatStructABI = Triple.isOSWindows() && !Triple.isOSCygMing();
if (Triple.getOS() == llvm::Triple::Win32) {
return SetCGInfo(new WinX86_32TargetCodeGenInfo(
Types, IsDarwinVectorABI, RetSmallStructInRegABI,
IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters));
} else {
return SetCGInfo(new X86_32TargetCodeGenInfo(
Types, IsDarwinVectorABI, RetSmallStructInRegABI,
IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters,
CodeGenOpts.FloatABI == "soft"));
}
}
case llvm::Triple::x86_64: {
StringRef ABI = getTarget().getABI();
X86AVXABILevel AVXLevel =
(ABI == "avx512"
? X86AVXABILevel::AVX512
: ABI == "avx" ? X86AVXABILevel::AVX : X86AVXABILevel::None);
switch (Triple.getOS()) {
case llvm::Triple::Win32:
return SetCGInfo(new WinX86_64TargetCodeGenInfo(Types, AVXLevel));
default:
return SetCGInfo(new X86_64TargetCodeGenInfo(Types, AVXLevel));
}
}
case llvm::Triple::hexagon:
return SetCGInfo(new HexagonTargetCodeGenInfo(Types));
case llvm::Triple::lanai:
return SetCGInfo(new LanaiTargetCodeGenInfo(Types));
case llvm::Triple::r600:
return SetCGInfo(new AMDGPUTargetCodeGenInfo(Types));
case llvm::Triple::amdgcn:
return SetCGInfo(new AMDGPUTargetCodeGenInfo(Types));
case llvm::Triple::sparc:
return SetCGInfo(new SparcV8TargetCodeGenInfo(Types));
case llvm::Triple::sparcv9:
return SetCGInfo(new SparcV9TargetCodeGenInfo(Types));
case llvm::Triple::xcore:
return SetCGInfo(new XCoreTargetCodeGenInfo(Types));
case llvm::Triple::arc:
return SetCGInfo(new ARCTargetCodeGenInfo(Types));
case llvm::Triple::spir:
case llvm::Triple::spir64:
return SetCGInfo(new CommonSPIRTargetCodeGenInfo(Types));
case llvm::Triple::spirv32:
case llvm::Triple::spirv64:
return SetCGInfo(new SPIRVTargetCodeGenInfo(Types));
case llvm::Triple::ve:
return SetCGInfo(new VETargetCodeGenInfo(Types));
case llvm::Triple::csky: {
bool IsSoftFloat = !getTarget().hasFeature("hard-float-abi");
bool hasFP64 = getTarget().hasFeature("fpuv2_df") ||
getTarget().hasFeature("fpuv3_df");
return SetCGInfo(new CSKYTargetCodeGenInfo(Types, IsSoftFloat ? 0
: hasFP64 ? 64
: 32));
}
}
}
/// Create an OpenCL kernel for an enqueued block.
///
/// The kernel has the same function type as the block invoke function. Its
/// name is the name of the block invoke function postfixed with "_kernel".
/// It simply calls the block invoke function then returns.
llvm::Function *
TargetCodeGenInfo::createEnqueuedBlockKernel(CodeGenFunction &CGF,
llvm::Function *Invoke,
llvm::Type *BlockTy) const {
auto *InvokeFT = Invoke->getFunctionType();
auto &C = CGF.getLLVMContext();
std::string Name = Invoke->getName().str() + "_kernel";
auto *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(C),
InvokeFT->params(), false);
auto *F = llvm::Function::Create(FT, llvm::GlobalValue::ExternalLinkage, Name,
&CGF.CGM.getModule());
auto IP = CGF.Builder.saveIP();
auto *BB = llvm::BasicBlock::Create(C, "entry", F);
auto &Builder = CGF.Builder;
Builder.SetInsertPoint(BB);
llvm::SmallVector<llvm::Value *, 2> Args(llvm::make_pointer_range(F->args()));
llvm::CallInst *call = Builder.CreateCall(Invoke, Args);
call->setCallingConv(Invoke->getCallingConv());
Builder.CreateRetVoid();
Builder.restoreIP(IP);
return F;
}
/// Create an OpenCL kernel for an enqueued block.
///
/// The type of the first argument (the block literal) is the struct type
/// of the block literal instead of a pointer type. The first argument
/// (block literal) is passed directly by value to the kernel. The kernel
/// allocates the same type of struct on stack and stores the block literal
/// to it and passes its pointer to the block invoke function. The kernel
/// has "enqueued-block" function attribute and kernel argument metadata.
llvm::Function *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel(
CodeGenFunction &CGF, llvm::Function *Invoke,
llvm::Type *BlockTy) const {
auto &Builder = CGF.Builder;
auto &C = CGF.getLLVMContext();
auto *InvokeFT = Invoke->getFunctionType();
llvm::SmallVector<llvm::Type *, 2> ArgTys;
llvm::SmallVector<llvm::Metadata *, 8> AddressQuals;
llvm::SmallVector<llvm::Metadata *, 8> AccessQuals;
llvm::SmallVector<llvm::Metadata *, 8> ArgTypeNames;
llvm::SmallVector<llvm::Metadata *, 8> ArgBaseTypeNames;
llvm::SmallVector<llvm::Metadata *, 8> ArgTypeQuals;
llvm::SmallVector<llvm::Metadata *, 8> ArgNames;
ArgTys.push_back(BlockTy);
ArgTypeNames.push_back(llvm::MDString::get(C, "__block_literal"));
AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(0)));
ArgBaseTypeNames.push_back(llvm::MDString::get(C, "__block_literal"));
ArgTypeQuals.push_back(llvm::MDString::get(C, ""));
AccessQuals.push_back(llvm::MDString::get(C, "none"));
ArgNames.push_back(llvm::MDString::get(C, "block_literal"));
for (unsigned I = 1, E = InvokeFT->getNumParams(); I < E; ++I) {
ArgTys.push_back(InvokeFT->getParamType(I));
ArgTypeNames.push_back(llvm::MDString::get(C, "void*"));
AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(3)));
AccessQuals.push_back(llvm::MDString::get(C, "none"));
ArgBaseTypeNames.push_back(llvm::MDString::get(C, "void*"));
ArgTypeQuals.push_back(llvm::MDString::get(C, ""));
ArgNames.push_back(
llvm::MDString::get(C, (Twine("local_arg") + Twine(I)).str()));
}
std::string Name = Invoke->getName().str() + "_kernel";
auto *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(C), ArgTys, false);
auto *F = llvm::Function::Create(FT, llvm::GlobalValue::InternalLinkage, Name,
&CGF.CGM.getModule());
F->addFnAttr("enqueued-block");
auto IP = CGF.Builder.saveIP();
auto *BB = llvm::BasicBlock::Create(C, "entry", F);
Builder.SetInsertPoint(BB);
const auto BlockAlign = CGF.CGM.getDataLayout().getPrefTypeAlign(BlockTy);
auto *BlockPtr = Builder.CreateAlloca(BlockTy, nullptr);
BlockPtr->setAlignment(BlockAlign);
Builder.CreateAlignedStore(F->arg_begin(), BlockPtr, BlockAlign);
auto *Cast = Builder.CreatePointerCast(BlockPtr, InvokeFT->getParamType(0));
llvm::SmallVector<llvm::Value *, 2> Args;
Args.push_back(Cast);
for (auto I = F->arg_begin() + 1, E = F->arg_end(); I != E; ++I)
Args.push_back(I);
llvm::CallInst *call = Builder.CreateCall(Invoke, Args);
call->setCallingConv(Invoke->getCallingConv());
Builder.CreateRetVoid();
Builder.restoreIP(IP);
F->setMetadata("kernel_arg_addr_space", llvm::MDNode::get(C, AddressQuals));
F->setMetadata("kernel_arg_access_qual", llvm::MDNode::get(C, AccessQuals));
F->setMetadata("kernel_arg_type", llvm::MDNode::get(C, ArgTypeNames));
F->setMetadata("kernel_arg_base_type",
llvm::MDNode::get(C, ArgBaseTypeNames));
F->setMetadata("kernel_arg_type_qual", llvm::MDNode::get(C, ArgTypeQuals));
if (CGF.CGM.getCodeGenOpts().EmitOpenCLArgMetadata)
F->setMetadata("kernel_arg_name", llvm::MDNode::get(C, ArgNames));
return F;
}
diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp
index f203cae1d329..665cdc3132fb 100644
--- a/clang/lib/Driver/ToolChains/Gnu.cpp
+++ b/clang/lib/Driver/ToolChains/Gnu.cpp
@@ -1,3211 +1,3201 @@
//===--- Gnu.cpp - Gnu Tool and ToolChain Implementations -------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "Gnu.h"
#include "Arch/ARM.h"
#include "Arch/CSKY.h"
#include "Arch/Mips.h"
#include "Arch/PPC.h"
#include "Arch/RISCV.h"
#include "Arch/Sparc.h"
#include "Arch/SystemZ.h"
#include "CommonArgs.h"
#include "Linux.h"
#include "clang/Config/config.h" // for GCC_INSTALL_PREFIX
#include "clang/Driver/Compilation.h"
#include "clang/Driver/Driver.h"
#include "clang/Driver/DriverDiagnostic.h"
#include "clang/Driver/Options.h"
#include "clang/Driver/Tool.h"
#include "clang/Driver/ToolChain.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/TargetParser.h"
#include "llvm/Support/VirtualFileSystem.h"
#include <system_error>
using namespace clang::driver;
using namespace clang::driver::toolchains;
using namespace clang;
using namespace llvm::opt;
using tools::addMultilibFlag;
using tools::addPathIfExists;
static bool forwardToGCC(const Option &O) {
// LinkerInput options have been forwarded. Don't duplicate.
if (O.hasFlag(options::LinkerInput))
return false;
return O.matches(options::OPT_Link_Group) || O.hasFlag(options::LinkOption);
}
// Switch CPU names not recognized by GNU assembler to a close CPU that it does
// recognize, instead of a lower march from being picked in the absence of a cpu
// flag.
static void normalizeCPUNamesForAssembler(const ArgList &Args,
ArgStringList &CmdArgs) {
if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
StringRef CPUArg(A->getValue());
if (CPUArg.equals_insensitive("krait"))
CmdArgs.push_back("-mcpu=cortex-a15");
else if (CPUArg.equals_insensitive("kryo"))
CmdArgs.push_back("-mcpu=cortex-a57");
else
Args.AddLastArg(CmdArgs, options::OPT_mcpu_EQ);
}
}
void tools::gcc::Common::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
const Driver &D = getToolChain().getDriver();
ArgStringList CmdArgs;
for (const auto &A : Args) {
if (forwardToGCC(A->getOption())) {
// It is unfortunate that we have to claim here, as this means
// we will basically never report anything interesting for
// platforms using a generic gcc, even if we are just using gcc
// to get to the assembler.
A->claim();
A->render(Args, CmdArgs);
}
}
RenderExtraToolArgs(JA, CmdArgs);
// If using a driver driver, force the arch.
if (getToolChain().getTriple().isOSDarwin()) {
CmdArgs.push_back("-arch");
CmdArgs.push_back(
Args.MakeArgString(getToolChain().getDefaultUniversalArchName()));
}
// Try to force gcc to match the tool chain we want, if we recognize
// the arch.
//
// FIXME: The triple class should directly provide the information we want
// here.
switch (getToolChain().getArch()) {
default:
break;
case llvm::Triple::x86:
case llvm::Triple::ppc:
case llvm::Triple::ppcle:
CmdArgs.push_back("-m32");
break;
case llvm::Triple::x86_64:
case llvm::Triple::ppc64:
case llvm::Triple::ppc64le:
CmdArgs.push_back("-m64");
break;
case llvm::Triple::sparcel:
CmdArgs.push_back("-EL");
break;
}
if (Output.isFilename()) {
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
} else {
assert(Output.isNothing() && "Unexpected output");
CmdArgs.push_back("-fsyntax-only");
}
Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
// Only pass -x if gcc will understand it; otherwise hope gcc
// understands the suffix correctly. The main use case this would go
// wrong in is for linker inputs if they happened to have an odd
// suffix; really the only way to get this to happen is a command
// like '-x foobar a.c' which will treat a.c like a linker input.
//
// FIXME: For the linker case specifically, can we safely convert
// inputs into '-Wl,' options?
for (const auto &II : Inputs) {
// Don't try to pass LLVM or AST inputs to a generic gcc.
if (types::isLLVMIR(II.getType()))
D.Diag(clang::diag::err_drv_no_linker_llvm_support)
<< getToolChain().getTripleString();
else if (II.getType() == types::TY_AST)
D.Diag(diag::err_drv_no_ast_support) << getToolChain().getTripleString();
else if (II.getType() == types::TY_ModuleFile)
D.Diag(diag::err_drv_no_module_support)
<< getToolChain().getTripleString();
if (types::canTypeBeUserSpecified(II.getType())) {
CmdArgs.push_back("-x");
CmdArgs.push_back(types::getTypeName(II.getType()));
}
if (II.isFilename())
CmdArgs.push_back(II.getFilename());
else {
const Arg &A = II.getInputArg();
// Reverse translate some rewritten options.
if (A.getOption().matches(options::OPT_Z_reserved_lib_stdcxx)) {
CmdArgs.push_back("-lstdc++");
continue;
}
// Don't render as input, we need gcc to do the translations.
A.render(Args, CmdArgs);
}
}
const std::string &customGCCName = D.getCCCGenericGCCName();
const char *GCCName;
if (!customGCCName.empty())
GCCName = customGCCName.c_str();
else if (D.CCCIsCXX()) {
GCCName = "g++";
} else
GCCName = "gcc";
const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath(GCCName));
C.addCommand(std::make_unique<Command>(JA, *this,
ResponseFileSupport::AtFileCurCP(),
Exec, CmdArgs, Inputs, Output));
}
void tools::gcc::Preprocessor::RenderExtraToolArgs(
const JobAction &JA, ArgStringList &CmdArgs) const {
CmdArgs.push_back("-E");
}
void tools::gcc::Compiler::RenderExtraToolArgs(const JobAction &JA,
ArgStringList &CmdArgs) const {
const Driver &D = getToolChain().getDriver();
switch (JA.getType()) {
// If -flto, etc. are present then make sure not to force assembly output.
case types::TY_LLVM_IR:
case types::TY_LTO_IR:
case types::TY_LLVM_BC:
case types::TY_LTO_BC:
CmdArgs.push_back("-c");
break;
// We assume we've got an "integrated" assembler in that gcc will produce an
// object file itself.
case types::TY_Object:
CmdArgs.push_back("-c");
break;
case types::TY_PP_Asm:
CmdArgs.push_back("-S");
break;
case types::TY_Nothing:
CmdArgs.push_back("-fsyntax-only");
break;
default:
D.Diag(diag::err_drv_invalid_gcc_output_type) << getTypeName(JA.getType());
}
}
void tools::gcc::Linker::RenderExtraToolArgs(const JobAction &JA,
ArgStringList &CmdArgs) const {
// The types are (hopefully) good enough.
}
// On Arm the endianness of the output file is determined by the target and
// can be overridden by the pseudo-target flags '-mlittle-endian'/'-EL' and
// '-mbig-endian'/'-EB'. Unlike other targets the flag does not result in a
// normalized triple so we must handle the flag here.
static bool isArmBigEndian(const llvm::Triple &Triple,
const ArgList &Args) {
bool IsBigEndian = false;
switch (Triple.getArch()) {
case llvm::Triple::armeb:
case llvm::Triple::thumbeb:
IsBigEndian = true;
LLVM_FALLTHROUGH;
case llvm::Triple::arm:
case llvm::Triple::thumb:
if (Arg *A = Args.getLastArg(options::OPT_mlittle_endian,
options::OPT_mbig_endian))
IsBigEndian = !A->getOption().matches(options::OPT_mlittle_endian);
break;
default:
break;
}
return IsBigEndian;
}
static const char *getLDMOption(const llvm::Triple &T, const ArgList &Args) {
switch (T.getArch()) {
case llvm::Triple::x86:
if (T.isOSIAMCU())
return "elf_iamcu";
return "elf_i386";
case llvm::Triple::aarch64:
return "aarch64linux";
case llvm::Triple::aarch64_be:
return "aarch64linuxb";
case llvm::Triple::arm:
case llvm::Triple::thumb:
case llvm::Triple::armeb:
case llvm::Triple::thumbeb:
return isArmBigEndian(T, Args) ? "armelfb_linux_eabi" : "armelf_linux_eabi";
case llvm::Triple::m68k:
return "m68kelf";
case llvm::Triple::ppc:
if (T.isOSLinux())
return "elf32ppclinux";
return "elf32ppc";
case llvm::Triple::ppcle:
if (T.isOSLinux())
return "elf32lppclinux";
return "elf32lppc";
case llvm::Triple::ppc64:
return "elf64ppc";
case llvm::Triple::ppc64le:
return "elf64lppc";
case llvm::Triple::riscv32:
return "elf32lriscv";
case llvm::Triple::riscv64:
return "elf64lriscv";
case llvm::Triple::sparc:
case llvm::Triple::sparcel:
return "elf32_sparc";
case llvm::Triple::sparcv9:
return "elf64_sparc";
case llvm::Triple::mips:
return "elf32btsmip";
case llvm::Triple::mipsel:
return "elf32ltsmip";
case llvm::Triple::mips64:
if (tools::mips::hasMipsAbiArg(Args, "n32") ||
T.getEnvironment() == llvm::Triple::GNUABIN32)
return "elf32btsmipn32";
return "elf64btsmip";
case llvm::Triple::mips64el:
if (tools::mips::hasMipsAbiArg(Args, "n32") ||
T.getEnvironment() == llvm::Triple::GNUABIN32)
return "elf32ltsmipn32";
return "elf64ltsmip";
case llvm::Triple::systemz:
return "elf64_s390";
case llvm::Triple::x86_64:
if (T.isX32())
return "elf32_x86_64";
return "elf_x86_64";
case llvm::Triple::ve:
return "elf64ve";
case llvm::Triple::csky:
return "cskyelf_linux";
default:
return nullptr;
}
}
static bool getPIE(const ArgList &Args, const ToolChain &TC) {
if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_static) ||
Args.hasArg(options::OPT_r) || Args.hasArg(options::OPT_static_pie))
return false;
Arg *A = Args.getLastArg(options::OPT_pie, options::OPT_no_pie,
options::OPT_nopie);
if (!A)
return TC.isPIEDefault(Args);
return A->getOption().matches(options::OPT_pie);
}
static bool getStaticPIE(const ArgList &Args, const ToolChain &TC) {
bool HasStaticPIE = Args.hasArg(options::OPT_static_pie);
// -no-pie is an alias for -nopie. So, handling -nopie takes care of
// -no-pie as well.
if (HasStaticPIE && Args.hasArg(options::OPT_nopie)) {
const Driver &D = TC.getDriver();
const llvm::opt::OptTable &Opts = D.getOpts();
const char *StaticPIEName = Opts.getOptionName(options::OPT_static_pie);
const char *NoPIEName = Opts.getOptionName(options::OPT_nopie);
D.Diag(diag::err_drv_cannot_mix_options) << StaticPIEName << NoPIEName;
}
return HasStaticPIE;
}
static bool getStatic(const ArgList &Args) {
return Args.hasArg(options::OPT_static) &&
!Args.hasArg(options::OPT_static_pie);
}
void tools::gnutools::StaticLibTool::ConstructJob(
Compilation &C, const JobAction &JA, const InputInfo &Output,
const InputInfoList &Inputs, const ArgList &Args,
const char *LinkingOutput) const {
const Driver &D = getToolChain().getDriver();
// Silence warning for "clang -g foo.o -o foo"
Args.ClaimAllArgs(options::OPT_g_Group);
// and "clang -emit-llvm foo.o -o foo"
Args.ClaimAllArgs(options::OPT_emit_llvm);
// and for "clang -w foo.o -o foo". Other warning options are already
// handled somewhere else.
Args.ClaimAllArgs(options::OPT_w);
// Silence warnings when linking C code with a C++ '-stdlib' argument.
Args.ClaimAllArgs(options::OPT_stdlib_EQ);
// ar tool command "llvm-ar <options> <output_file> <input_files>".
ArgStringList CmdArgs;
// Create and insert file members with a deterministic index.
CmdArgs.push_back("rcsD");
CmdArgs.push_back(Output.getFilename());
for (const auto &II : Inputs) {
if (II.isFilename()) {
CmdArgs.push_back(II.getFilename());
}
}
// Delete old output archive file if it already exists before generating a new
// archive file.
auto OutputFileName = Output.getFilename();
if (Output.isFilename() && llvm::sys::fs::exists(OutputFileName)) {
if (std::error_code EC = llvm::sys::fs::remove(OutputFileName)) {
D.Diag(diag::err_drv_unable_to_remove_file) << EC.message();
return;
}
}
const char *Exec = Args.MakeArgString(getToolChain().GetStaticLibToolPath());
C.addCommand(std::make_unique<Command>(JA, *this,
ResponseFileSupport::AtFileCurCP(),
Exec, CmdArgs, Inputs, Output));
}
void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
// FIXME: The Linker class constructor takes a ToolChain and not a
// Generic_ELF, so the static_cast might return a reference to a invalid
// instance (see PR45061). Ideally, the Linker constructor needs to take a
// Generic_ELF instead.
const toolchains::Generic_ELF &ToolChain =
static_cast<const toolchains::Generic_ELF &>(getToolChain());
const Driver &D = ToolChain.getDriver();
const llvm::Triple &Triple = getToolChain().getEffectiveTriple();
const llvm::Triple::ArchType Arch = ToolChain.getArch();
const bool isAndroid = ToolChain.getTriple().isAndroid();
const bool IsIAMCU = ToolChain.getTriple().isOSIAMCU();
const bool IsVE = ToolChain.getTriple().isVE();
const bool IsPIE = getPIE(Args, ToolChain);
const bool IsStaticPIE = getStaticPIE(Args, ToolChain);
const bool IsStatic = getStatic(Args);
const bool HasCRTBeginEndFiles =
ToolChain.getTriple().hasEnvironment() ||
(ToolChain.getTriple().getVendor() != llvm::Triple::MipsTechnologies);
ArgStringList CmdArgs;
// Silence warning for "clang -g foo.o -o foo"
Args.ClaimAllArgs(options::OPT_g_Group);
// and "clang -emit-llvm foo.o -o foo"
Args.ClaimAllArgs(options::OPT_emit_llvm);
// and for "clang -w foo.o -o foo". Other warning options are already
// handled somewhere else.
Args.ClaimAllArgs(options::OPT_w);
if (!D.SysRoot.empty())
CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot));
if (IsPIE)
CmdArgs.push_back("-pie");
if (IsStaticPIE) {
CmdArgs.push_back("-static");
CmdArgs.push_back("-pie");
CmdArgs.push_back("--no-dynamic-linker");
CmdArgs.push_back("-z");
CmdArgs.push_back("text");
}
if (Args.hasArg(options::OPT_rdynamic))
CmdArgs.push_back("-export-dynamic");
if (Args.hasArg(options::OPT_s))
CmdArgs.push_back("-s");
if (Triple.isARM() || Triple.isThumb() || Triple.isAArch64()) {
bool IsBigEndian = isArmBigEndian(Triple, Args);
if (IsBigEndian)
arm::appendBE8LinkFlag(Args, CmdArgs, Triple);
IsBigEndian = IsBigEndian || Arch == llvm::Triple::aarch64_be;
CmdArgs.push_back(IsBigEndian ? "-EB" : "-EL");
}
// Most Android ARM64 targets should enable the linker fix for erratum
// 843419. Only non-Cortex-A53 devices are allowed to skip this flag.
if (Arch == llvm::Triple::aarch64 && isAndroid) {
std::string CPU = getCPUName(D, Args, Triple);
if (CPU.empty() || CPU == "generic" || CPU == "cortex-a53")
CmdArgs.push_back("--fix-cortex-a53-843419");
}
ToolChain.addExtraOpts(CmdArgs);
CmdArgs.push_back("--eh-frame-hdr");
if (const char *LDMOption = getLDMOption(ToolChain.getTriple(), Args)) {
CmdArgs.push_back("-m");
CmdArgs.push_back(LDMOption);
} else {
D.Diag(diag::err_target_unknown_triple) << Triple.str();
return;
}
if (Triple.isRISCV())
CmdArgs.push_back("-X");
if (Args.hasArg(options::OPT_shared))
CmdArgs.push_back("-shared");
if (IsStatic) {
CmdArgs.push_back("-static");
} else {
if (Args.hasArg(options::OPT_rdynamic))
CmdArgs.push_back("-export-dynamic");
if (!Args.hasArg(options::OPT_shared) && !IsStaticPIE &&
!Args.hasArg(options::OPT_r)) {
CmdArgs.push_back("-dynamic-linker");
CmdArgs.push_back(Args.MakeArgString(Twine(D.DyldPrefix) +
ToolChain.getDynamicLinker(Args)));
}
}
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles,
options::OPT_r)) {
if (!isAndroid && !IsIAMCU) {
const char *crt1 = nullptr;
if (!Args.hasArg(options::OPT_shared)) {
if (Args.hasArg(options::OPT_pg))
crt1 = "gcrt1.o";
else if (IsPIE)
crt1 = "Scrt1.o";
else if (IsStaticPIE)
crt1 = "rcrt1.o";
else
crt1 = "crt1.o";
}
if (crt1)
CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crt1)));
CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crti.o")));
}
if (IsVE) {
CmdArgs.push_back("-z");
CmdArgs.push_back("max-page-size=0x4000000");
}
if (IsIAMCU)
CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crt0.o")));
else if (HasCRTBeginEndFiles) {
std::string P;
if (ToolChain.GetRuntimeLibType(Args) == ToolChain::RLT_CompilerRT &&
!isAndroid) {
std::string crtbegin = ToolChain.getCompilerRT(Args, "crtbegin",
ToolChain::FT_Object);
if (ToolChain.getVFS().exists(crtbegin))
P = crtbegin;
}
if (P.empty()) {
const char *crtbegin;
if (Args.hasArg(options::OPT_shared))
crtbegin = isAndroid ? "crtbegin_so.o" : "crtbeginS.o";
else if (IsStatic)
crtbegin = isAndroid ? "crtbegin_static.o" : "crtbeginT.o";
else if (IsPIE || IsStaticPIE)
crtbegin = isAndroid ? "crtbegin_dynamic.o" : "crtbeginS.o";
else
crtbegin = isAndroid ? "crtbegin_dynamic.o" : "crtbegin.o";
P = ToolChain.GetFilePath(crtbegin);
}
CmdArgs.push_back(Args.MakeArgString(P));
}
// Add crtfastmath.o if available and fast math is enabled.
ToolChain.addFastMathRuntimeIfAvailable(Args, CmdArgs);
}
Args.AddAllArgs(CmdArgs, options::OPT_L);
Args.AddAllArgs(CmdArgs, options::OPT_u);
ToolChain.AddFilePathLibArgs(Args, CmdArgs);
if (D.isUsingLTO()) {
assert(!Inputs.empty() && "Must have at least one input.");
addLTOOptions(ToolChain, Args, CmdArgs, Output, Inputs[0],
D.getLTOMode() == LTOK_Thin);
}
if (Args.hasArg(options::OPT_Z_Xlinker__no_demangle))
CmdArgs.push_back("--no-demangle");
bool NeedsSanitizerDeps = addSanitizerRuntimes(ToolChain, Args, CmdArgs);
bool NeedsXRayDeps = addXRayRuntime(ToolChain, Args, CmdArgs);
addLinkerCompressDebugSectionsOption(ToolChain, Args, CmdArgs);
AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA);
addHIPRuntimeLibArgs(ToolChain, Args, CmdArgs);
// The profile runtime also needs access to system libraries.
getToolChain().addProfileRTLibs(Args, CmdArgs);
if (D.CCCIsCXX() &&
!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs,
options::OPT_r)) {
if (ToolChain.ShouldLinkCXXStdlib(Args)) {
bool OnlyLibstdcxxStatic = Args.hasArg(options::OPT_static_libstdcxx) &&
!Args.hasArg(options::OPT_static);
if (OnlyLibstdcxxStatic)
CmdArgs.push_back("-Bstatic");
ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs);
if (OnlyLibstdcxxStatic)
CmdArgs.push_back("-Bdynamic");
}
CmdArgs.push_back("-lm");
}
// If we are linking for the device all symbols should be bound locally. The
// symbols are already protected which makes this redundant. This is only
// necessary to work around a problem in bfd.
// TODO: Remove this once 'lld' becomes the only linker for offloading.
if (JA.isDeviceOffloading(Action::OFK_OpenMP))
CmdArgs.push_back("-Bsymbolic");
// Silence warnings when linking C code with a C++ '-stdlib' argument.
Args.ClaimAllArgs(options::OPT_stdlib_EQ);
// Additional linker set-up and flags for Fortran. This is required in order
// to generate executables. As Fortran runtime depends on the C runtime,
// these dependencies need to be listed before the C runtime below (i.e.
// AddRuntTimeLibs).
if (D.IsFlangMode()) {
addFortranRuntimeLibraryPath(ToolChain, Args, CmdArgs);
addFortranRuntimeLibs(ToolChain, CmdArgs);
CmdArgs.push_back("-lm");
}
if (!Args.hasArg(options::OPT_nostdlib, options::OPT_r)) {
if (!Args.hasArg(options::OPT_nodefaultlibs)) {
if (IsStatic || IsStaticPIE)
CmdArgs.push_back("--start-group");
if (NeedsSanitizerDeps)
linkSanitizerRuntimeDeps(ToolChain, CmdArgs);
if (NeedsXRayDeps)
linkXRayRuntimeDeps(ToolChain, CmdArgs);
bool WantPthread = Args.hasArg(options::OPT_pthread) ||
Args.hasArg(options::OPT_pthreads);
// Use the static OpenMP runtime with -static-openmp
bool StaticOpenMP = Args.hasArg(options::OPT_static_openmp) &&
!Args.hasArg(options::OPT_static);
// FIXME: Only pass GompNeedsRT = true for platforms with libgomp that
// require librt. Most modern Linux platforms do, but some may not.
if (addOpenMPRuntime(CmdArgs, ToolChain, Args, StaticOpenMP,
JA.isHostOffloading(Action::OFK_OpenMP),
/* GompNeedsRT= */ true))
// OpenMP runtimes implies pthreads when using the GNU toolchain.
// FIXME: Does this really make sense for all GNU toolchains?
WantPthread = true;
AddRunTimeLibs(ToolChain, D, CmdArgs, Args);
// LLVM support for atomics on 32-bit SPARC V8+ is incomplete, so
// forcibly link with libatomic as a workaround.
// TODO: Issue #41880 and D118021.
if (getToolChain().getTriple().getArch() == llvm::Triple::sparc) {
CmdArgs.push_back("--push-state");
CmdArgs.push_back("--as-needed");
CmdArgs.push_back("-latomic");
CmdArgs.push_back("--pop-state");
}
if (WantPthread && !isAndroid)
CmdArgs.push_back("-lpthread");
if (Args.hasArg(options::OPT_fsplit_stack))
CmdArgs.push_back("--wrap=pthread_create");
if (!Args.hasArg(options::OPT_nolibc))
CmdArgs.push_back("-lc");
// Add IAMCU specific libs, if needed.
if (IsIAMCU)
CmdArgs.push_back("-lgloss");
if (IsStatic || IsStaticPIE)
CmdArgs.push_back("--end-group");
else
AddRunTimeLibs(ToolChain, D, CmdArgs, Args);
// Add IAMCU specific libs (outside the group), if needed.
if (IsIAMCU) {
CmdArgs.push_back("--as-needed");
CmdArgs.push_back("-lsoftfp");
CmdArgs.push_back("--no-as-needed");
}
}
if (!Args.hasArg(options::OPT_nostartfiles) && !IsIAMCU) {
if (HasCRTBeginEndFiles) {
std::string P;
if (ToolChain.GetRuntimeLibType(Args) == ToolChain::RLT_CompilerRT &&
!isAndroid) {
std::string crtend = ToolChain.getCompilerRT(Args, "crtend",
ToolChain::FT_Object);
if (ToolChain.getVFS().exists(crtend))
P = crtend;
}
if (P.empty()) {
const char *crtend;
if (Args.hasArg(options::OPT_shared))
crtend = isAndroid ? "crtend_so.o" : "crtendS.o";
else if (IsPIE || IsStaticPIE)
crtend = isAndroid ? "crtend_android.o" : "crtendS.o";
else
crtend = isAndroid ? "crtend_android.o" : "crtend.o";
P = ToolChain.GetFilePath(crtend);
}
CmdArgs.push_back(Args.MakeArgString(P));
}
if (!isAndroid)
CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o")));
}
}
Args.AddAllArgs(CmdArgs, options::OPT_T);
const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath());
C.addCommand(std::make_unique<Command>(JA, *this,
ResponseFileSupport::AtFileCurCP(),
Exec, CmdArgs, Inputs, Output));
}
void tools::gnutools::Assembler::ConstructJob(Compilation &C,
const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
const auto &D = getToolChain().getDriver();
claimNoWarnArgs(Args);
ArgStringList CmdArgs;
llvm::Reloc::Model RelocationModel;
unsigned PICLevel;
bool IsPIE;
const char *DefaultAssembler = "as";
std::tie(RelocationModel, PICLevel, IsPIE) =
ParsePICArgs(getToolChain(), Args);
if (const Arg *A = Args.getLastArg(options::OPT_gz, options::OPT_gz_EQ)) {
if (A->getOption().getID() == options::OPT_gz) {
CmdArgs.push_back("--compress-debug-sections");
} else {
StringRef Value = A->getValue();
if (Value == "none" || Value == "zlib") {
CmdArgs.push_back(
Args.MakeArgString("--compress-debug-sections=" + Twine(Value)));
} else {
D.Diag(diag::err_drv_unsupported_option_argument)
<< A->getOption().getName() << Value;
}
}
}
switch (getToolChain().getArch()) {
default:
break;
// Add --32/--64 to make sure we get the format we want.
// This is incomplete
case llvm::Triple::x86:
CmdArgs.push_back("--32");
break;
case llvm::Triple::x86_64:
if (getToolChain().getTriple().isX32())
CmdArgs.push_back("--x32");
else
CmdArgs.push_back("--64");
break;
case llvm::Triple::ppc: {
CmdArgs.push_back("-a32");
CmdArgs.push_back("-mppc");
CmdArgs.push_back("-mbig-endian");
CmdArgs.push_back(ppc::getPPCAsmModeForCPU(
getCPUName(D, Args, getToolChain().getTriple())));
break;
}
case llvm::Triple::ppcle: {
CmdArgs.push_back("-a32");
CmdArgs.push_back("-mppc");
CmdArgs.push_back("-mlittle-endian");
CmdArgs.push_back(ppc::getPPCAsmModeForCPU(
getCPUName(D, Args, getToolChain().getTriple())));
break;
}
case llvm::Triple::ppc64: {
CmdArgs.push_back("-a64");
CmdArgs.push_back("-mppc64");
CmdArgs.push_back("-mbig-endian");
CmdArgs.push_back(ppc::getPPCAsmModeForCPU(
getCPUName(D, Args, getToolChain().getTriple())));
break;
}
case llvm::Triple::ppc64le: {
CmdArgs.push_back("-a64");
CmdArgs.push_back("-mppc64");
CmdArgs.push_back("-mlittle-endian");
CmdArgs.push_back(ppc::getPPCAsmModeForCPU(
getCPUName(D, Args, getToolChain().getTriple())));
break;
}
case llvm::Triple::riscv32:
case llvm::Triple::riscv64: {
StringRef ABIName = riscv::getRISCVABI(Args, getToolChain().getTriple());
CmdArgs.push_back("-mabi");
CmdArgs.push_back(ABIName.data());
StringRef MArchName = riscv::getRISCVArch(Args, getToolChain().getTriple());
CmdArgs.push_back("-march");
CmdArgs.push_back(MArchName.data());
if (!Args.hasFlag(options::OPT_mrelax, options::OPT_mno_relax, true))
Args.addOptOutFlag(CmdArgs, options::OPT_mrelax, options::OPT_mno_relax);
break;
}
case llvm::Triple::sparc:
case llvm::Triple::sparcel: {
CmdArgs.push_back("-32");
std::string CPU = getCPUName(D, Args, getToolChain().getTriple());
CmdArgs.push_back(
sparc::getSparcAsmModeForCPU(CPU, getToolChain().getTriple()));
AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
break;
}
case llvm::Triple::sparcv9: {
CmdArgs.push_back("-64");
std::string CPU = getCPUName(D, Args, getToolChain().getTriple());
CmdArgs.push_back(
sparc::getSparcAsmModeForCPU(CPU, getToolChain().getTriple()));
AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
break;
}
case llvm::Triple::arm:
case llvm::Triple::armeb:
case llvm::Triple::thumb:
case llvm::Triple::thumbeb: {
const llvm::Triple &Triple2 = getToolChain().getTriple();
CmdArgs.push_back(isArmBigEndian(Triple2, Args) ? "-EB" : "-EL");
switch (Triple2.getSubArch()) {
case llvm::Triple::ARMSubArch_v7:
CmdArgs.push_back("-mfpu=neon");
break;
case llvm::Triple::ARMSubArch_v8:
CmdArgs.push_back("-mfpu=crypto-neon-fp-armv8");
break;
default:
break;
}
switch (arm::getARMFloatABI(getToolChain(), Args)) {
case arm::FloatABI::Invalid: llvm_unreachable("must have an ABI!");
case arm::FloatABI::Soft:
CmdArgs.push_back(Args.MakeArgString("-mfloat-abi=soft"));
break;
case arm::FloatABI::SoftFP:
CmdArgs.push_back(Args.MakeArgString("-mfloat-abi=softfp"));
break;
case arm::FloatABI::Hard:
CmdArgs.push_back(Args.MakeArgString("-mfloat-abi=hard"));
break;
}
Args.AddLastArg(CmdArgs, options::OPT_march_EQ);
normalizeCPUNamesForAssembler(Args, CmdArgs);
Args.AddLastArg(CmdArgs, options::OPT_mfpu_EQ);
break;
}
case llvm::Triple::aarch64:
case llvm::Triple::aarch64_be: {
CmdArgs.push_back(
getToolChain().getArch() == llvm::Triple::aarch64_be ? "-EB" : "-EL");
Args.AddLastArg(CmdArgs, options::OPT_march_EQ);
normalizeCPUNamesForAssembler(Args, CmdArgs);
break;
}
case llvm::Triple::mips:
case llvm::Triple::mipsel:
case llvm::Triple::mips64:
case llvm::Triple::mips64el: {
StringRef CPUName;
StringRef ABIName;
mips::getMipsCPUAndABI(Args, getToolChain().getTriple(), CPUName, ABIName);
ABIName = mips::getGnuCompatibleMipsABIName(ABIName);
CmdArgs.push_back("-march");
CmdArgs.push_back(CPUName.data());
CmdArgs.push_back("-mabi");
CmdArgs.push_back(ABIName.data());
// -mno-shared should be emitted unless -fpic, -fpie, -fPIC, -fPIE,
// or -mshared (not implemented) is in effect.
if (RelocationModel == llvm::Reloc::Static)
CmdArgs.push_back("-mno-shared");
// LLVM doesn't support -mplt yet and acts as if it is always given.
// However, -mplt has no effect with the N64 ABI.
if (ABIName != "64" && !Args.hasArg(options::OPT_mno_abicalls))
CmdArgs.push_back("-call_nonpic");
if (getToolChain().getTriple().isLittleEndian())
CmdArgs.push_back("-EL");
else
CmdArgs.push_back("-EB");
if (Arg *A = Args.getLastArg(options::OPT_mnan_EQ)) {
if (StringRef(A->getValue()) == "2008")
CmdArgs.push_back(Args.MakeArgString("-mnan=2008"));
}
// Add the last -mfp32/-mfpxx/-mfp64 or -mfpxx if it is enabled by default.
if (Arg *A = Args.getLastArg(options::OPT_mfp32, options::OPT_mfpxx,
options::OPT_mfp64)) {
A->claim();
A->render(Args, CmdArgs);
} else if (mips::shouldUseFPXX(
Args, getToolChain().getTriple(), CPUName, ABIName,
mips::getMipsFloatABI(getToolChain().getDriver(), Args,
getToolChain().getTriple())))
CmdArgs.push_back("-mfpxx");
// Pass on -mmips16 or -mno-mips16. However, the assembler equivalent of
// -mno-mips16 is actually -no-mips16.
if (Arg *A =
Args.getLastArg(options::OPT_mips16, options::OPT_mno_mips16)) {
if (A->getOption().matches(options::OPT_mips16)) {
A->claim();
A->render(Args, CmdArgs);
} else {
A->claim();
CmdArgs.push_back("-no-mips16");
}
}
Args.AddLastArg(CmdArgs, options::OPT_mmicromips,
options::OPT_mno_micromips);
Args.AddLastArg(CmdArgs, options::OPT_mdsp, options::OPT_mno_dsp);
Args.AddLastArg(CmdArgs, options::OPT_mdspr2, options::OPT_mno_dspr2);
if (Arg *A = Args.getLastArg(options::OPT_mmsa, options::OPT_mno_msa)) {
// Do not use AddLastArg because not all versions of MIPS assembler
// support -mmsa / -mno-msa options.
if (A->getOption().matches(options::OPT_mmsa))
CmdArgs.push_back(Args.MakeArgString("-mmsa"));
}
Args.AddLastArg(CmdArgs, options::OPT_mhard_float,
options::OPT_msoft_float);
Args.AddLastArg(CmdArgs, options::OPT_mdouble_float,
options::OPT_msingle_float);
Args.AddLastArg(CmdArgs, options::OPT_modd_spreg,
options::OPT_mno_odd_spreg);
AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
break;
}
case llvm::Triple::systemz: {
// Always pass an -march option, since our default of z10 is later
// than the GNU assembler's default.
std::string CPUName = systemz::getSystemZTargetCPU(Args);
CmdArgs.push_back(Args.MakeArgString("-march=" + CPUName));
break;
}
case llvm::Triple::ve:
DefaultAssembler = "nas";
}
for (const Arg *A : Args.filtered(options::OPT_ffile_prefix_map_EQ,
options::OPT_fdebug_prefix_map_EQ)) {
StringRef Map = A->getValue();
if (!Map.contains('='))
D.Diag(diag::err_drv_invalid_argument_to_option)
<< Map << A->getOption().getName();
else {
CmdArgs.push_back(Args.MakeArgString("--debug-prefix-map"));
CmdArgs.push_back(Args.MakeArgString(Map));
}
A->claim();
}
Args.AddAllArgs(CmdArgs, options::OPT_I);
Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
for (const auto &II : Inputs)
CmdArgs.push_back(II.getFilename());
const char *Exec =
Args.MakeArgString(getToolChain().GetProgramPath(DefaultAssembler));
C.addCommand(std::make_unique<Command>(JA, *this,
ResponseFileSupport::AtFileCurCP(),
Exec, CmdArgs, Inputs, Output));
// Handle the debug info splitting at object creation time if we're
// creating an object.
// TODO: Currently only works on linux with newer objcopy.
if (Args.hasArg(options::OPT_gsplit_dwarf) &&
getToolChain().getTriple().isOSLinux())
SplitDebugInfo(getToolChain(), C, *this, JA, Args, Output,
SplitDebugName(JA, Args, Inputs[0], Output));
}
namespace {
// Filter to remove Multilibs that don't exist as a suffix to Path
class FilterNonExistent {
StringRef Base, File;
llvm::vfs::FileSystem &VFS;
public:
FilterNonExistent(StringRef Base, StringRef File, llvm::vfs::FileSystem &VFS)
: Base(Base), File(File), VFS(VFS) {}
bool operator()(const Multilib &M) {
return !VFS.exists(Base + M.gccSuffix() + File);
}
};
} // end anonymous namespace
static bool isSoftFloatABI(const ArgList &Args) {
Arg *A = Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float,
options::OPT_mfloat_abi_EQ);
if (!A)
return false;
return A->getOption().matches(options::OPT_msoft_float) ||
(A->getOption().matches(options::OPT_mfloat_abi_EQ) &&
A->getValue() == StringRef("soft"));
}
static bool isArmOrThumbArch(llvm::Triple::ArchType Arch) {
return Arch == llvm::Triple::arm || Arch == llvm::Triple::thumb;
}
static bool isMipsEL(llvm::Triple::ArchType Arch) {
return Arch == llvm::Triple::mipsel || Arch == llvm::Triple::mips64el;
}
static bool isMips16(const ArgList &Args) {
Arg *A = Args.getLastArg(options::OPT_mips16, options::OPT_mno_mips16);
return A && A->getOption().matches(options::OPT_mips16);
}
static bool isMicroMips(const ArgList &Args) {
Arg *A = Args.getLastArg(options::OPT_mmicromips, options::OPT_mno_micromips);
return A && A->getOption().matches(options::OPT_mmicromips);
}
static bool isMSP430(llvm::Triple::ArchType Arch) {
return Arch == llvm::Triple::msp430;
}
static Multilib makeMultilib(StringRef commonSuffix) {
return Multilib(commonSuffix, commonSuffix, commonSuffix);
}
static bool findMipsCsMultilibs(const Multilib::flags_list &Flags,
FilterNonExistent &NonExistent,
DetectedMultilibs &Result) {
// Check for Code Sourcery toolchain multilibs
MultilibSet CSMipsMultilibs;
{
auto MArchMips16 = makeMultilib("/mips16").flag("+m32").flag("+mips16");
auto MArchMicroMips =
makeMultilib("/micromips").flag("+m32").flag("+mmicromips");
auto MArchDefault = makeMultilib("").flag("-mips16").flag("-mmicromips");
auto UCLibc = makeMultilib("/uclibc").flag("+muclibc");
auto SoftFloat = makeMultilib("/soft-float").flag("+msoft-float");
auto Nan2008 = makeMultilib("/nan2008").flag("+mnan=2008");
auto DefaultFloat =
makeMultilib("").flag("-msoft-float").flag("-mnan=2008");
auto BigEndian = makeMultilib("").flag("+EB").flag("-EL");
auto LittleEndian = makeMultilib("/el").flag("+EL").flag("-EB");
// Note that this one's osSuffix is ""
auto MAbi64 = makeMultilib("")
.gccSuffix("/64")
.includeSuffix("/64")
.flag("+mabi=n64")
.flag("-mabi=n32")
.flag("-m32");
CSMipsMultilibs =
MultilibSet()
.Either(MArchMips16, MArchMicroMips, MArchDefault)
.Maybe(UCLibc)
.Either(SoftFloat, Nan2008, DefaultFloat)
.FilterOut("/micromips/nan2008")
.FilterOut("/mips16/nan2008")
.Either(BigEndian, LittleEndian)
.Maybe(MAbi64)
.FilterOut("/mips16.*/64")
.FilterOut("/micromips.*/64")
.FilterOut(NonExistent)
.setIncludeDirsCallback([](const Multilib &M) {
std::vector<std::string> Dirs({"/include"});
if (StringRef(M.includeSuffix()).startswith("/uclibc"))
Dirs.push_back(
"/../../../../mips-linux-gnu/libc/uclibc/usr/include");
else
Dirs.push_back("/../../../../mips-linux-gnu/libc/usr/include");
return Dirs;
});
}
MultilibSet DebianMipsMultilibs;
{
Multilib MAbiN32 =
Multilib().gccSuffix("/n32").includeSuffix("/n32").flag("+mabi=n32");
Multilib M64 = Multilib()
.gccSuffix("/64")
.includeSuffix("/64")
.flag("+m64")
.flag("-m32")
.flag("-mabi=n32");
Multilib M32 =
Multilib().gccSuffix("/32").flag("-m64").flag("+m32").flag("-mabi=n32");
DebianMipsMultilibs =
MultilibSet().Either(M32, M64, MAbiN32).FilterOut(NonExistent);
}
// Sort candidates. Toolchain that best meets the directories tree goes first.
// Then select the first toolchains matches command line flags.
MultilibSet *Candidates[] = {&CSMipsMultilibs, &DebianMipsMultilibs};
if (CSMipsMultilibs.size() < DebianMipsMultilibs.size())
std::iter_swap(Candidates, Candidates + 1);
for (const MultilibSet *Candidate : Candidates) {
if (Candidate->select(Flags, Result.SelectedMultilib)) {
if (Candidate == &DebianMipsMultilibs)
Result.BiarchSibling = Multilib();
Result.Multilibs = *Candidate;
return true;
}
}
return false;
}
static bool findMipsAndroidMultilibs(llvm::vfs::FileSystem &VFS, StringRef Path,
const Multilib::flags_list &Flags,
FilterNonExistent &NonExistent,
DetectedMultilibs &Result) {
MultilibSet AndroidMipsMultilibs =
MultilibSet()
.Maybe(Multilib("/mips-r2").flag("+march=mips32r2"))
.Maybe(Multilib("/mips-r6").flag("+march=mips32r6"))
.FilterOut(NonExistent);
MultilibSet AndroidMipselMultilibs =
MultilibSet()
.Either(Multilib().flag("+march=mips32"),
Multilib("/mips-r2", "", "/mips-r2").flag("+march=mips32r2"),
Multilib("/mips-r6", "", "/mips-r6").flag("+march=mips32r6"))
.FilterOut(NonExistent);
MultilibSet AndroidMips64elMultilibs =
MultilibSet()
.Either(
Multilib().flag("+march=mips64r6"),
Multilib("/32/mips-r1", "", "/mips-r1").flag("+march=mips32"),
Multilib("/32/mips-r2", "", "/mips-r2").flag("+march=mips32r2"),
Multilib("/32/mips-r6", "", "/mips-r6").flag("+march=mips32r6"))
.FilterOut(NonExistent);
MultilibSet *MS = &AndroidMipsMultilibs;
if (VFS.exists(Path + "/mips-r6"))
MS = &AndroidMipselMultilibs;
else if (VFS.exists(Path + "/32"))
MS = &AndroidMips64elMultilibs;
if (MS->select(Flags, Result.SelectedMultilib)) {
Result.Multilibs = *MS;
return true;
}
return false;
}
static bool findMipsMuslMultilibs(const Multilib::flags_list &Flags,
FilterNonExistent &NonExistent,
DetectedMultilibs &Result) {
// Musl toolchain multilibs
MultilibSet MuslMipsMultilibs;
{
auto MArchMipsR2 = makeMultilib("")
.osSuffix("/mips-r2-hard-musl")
.flag("+EB")
.flag("-EL")
.flag("+march=mips32r2");
auto MArchMipselR2 = makeMultilib("/mipsel-r2-hard-musl")
.flag("-EB")
.flag("+EL")
.flag("+march=mips32r2");
MuslMipsMultilibs = MultilibSet().Either(MArchMipsR2, MArchMipselR2);
// Specify the callback that computes the include directories.
MuslMipsMultilibs.setIncludeDirsCallback([](const Multilib &M) {
return std::vector<std::string>(
{"/../sysroot" + M.osSuffix() + "/usr/include"});
});
}
if (MuslMipsMultilibs.select(Flags, Result.SelectedMultilib)) {
Result.Multilibs = MuslMipsMultilibs;
return true;
}
return false;
}
static bool findMipsMtiMultilibs(const Multilib::flags_list &Flags,
FilterNonExistent &NonExistent,
DetectedMultilibs &Result) {
// CodeScape MTI toolchain v1.2 and early.
MultilibSet MtiMipsMultilibsV1;
{
auto MArchMips32 = makeMultilib("/mips32")
.flag("+m32")
.flag("-m64")
.flag("-mmicromips")
.flag("+march=mips32");
auto MArchMicroMips = makeMultilib("/micromips")
.flag("+m32")
.flag("-m64")
.flag("+mmicromips");
auto MArchMips64r2 = makeMultilib("/mips64r2")
.flag("-m32")
.flag("+m64")
.flag("+march=mips64r2");
auto MArchMips64 = makeMultilib("/mips64").flag("-m32").flag("+m64").flag(
"-march=mips64r2");
auto MArchDefault = makeMultilib("")
.flag("+m32")
.flag("-m64")
.flag("-mmicromips")
.flag("+march=mips32r2");
auto Mips16 = makeMultilib("/mips16").flag("+mips16");
auto UCLibc = makeMultilib("/uclibc").flag("+muclibc");
auto MAbi64 =
makeMultilib("/64").flag("+mabi=n64").flag("-mabi=n32").flag("-m32");
auto BigEndian = makeMultilib("").flag("+EB").flag("-EL");
auto LittleEndian = makeMultilib("/el").flag("+EL").flag("-EB");
auto SoftFloat = makeMultilib("/sof").flag("+msoft-float");
auto Nan2008 = makeMultilib("/nan2008").flag("+mnan=2008");
MtiMipsMultilibsV1 =
MultilibSet()
.Either(MArchMips32, MArchMicroMips, MArchMips64r2, MArchMips64,
MArchDefault)
.Maybe(UCLibc)
.Maybe(Mips16)
.FilterOut("/mips64/mips16")
.FilterOut("/mips64r2/mips16")
.FilterOut("/micromips/mips16")
.Maybe(MAbi64)
.FilterOut("/micromips/64")
.FilterOut("/mips32/64")
.FilterOut("^/64")
.FilterOut("/mips16/64")
.Either(BigEndian, LittleEndian)
.Maybe(SoftFloat)
.Maybe(Nan2008)
.FilterOut(".*sof/nan2008")
.FilterOut(NonExistent)
.setIncludeDirsCallback([](const Multilib &M) {
std::vector<std::string> Dirs({"/include"});
if (StringRef(M.includeSuffix()).startswith("/uclibc"))
Dirs.push_back("/../../../../sysroot/uclibc/usr/include");
else
Dirs.push_back("/../../../../sysroot/usr/include");
return Dirs;
});
}
// CodeScape IMG toolchain starting from v1.3.
MultilibSet MtiMipsMultilibsV2;
{
auto BeHard = makeMultilib("/mips-r2-hard")
.flag("+EB")
.flag("-msoft-float")
.flag("-mnan=2008")
.flag("-muclibc");
auto BeSoft = makeMultilib("/mips-r2-soft")
.flag("+EB")
.flag("+msoft-float")
.flag("-mnan=2008");
auto ElHard = makeMultilib("/mipsel-r2-hard")
.flag("+EL")
.flag("-msoft-float")
.flag("-mnan=2008")
.flag("-muclibc");
auto ElSoft = makeMultilib("/mipsel-r2-soft")
.flag("+EL")
.flag("+msoft-float")
.flag("-mnan=2008")
.flag("-mmicromips");
auto BeHardNan = makeMultilib("/mips-r2-hard-nan2008")
.flag("+EB")
.flag("-msoft-float")
.flag("+mnan=2008")
.flag("-muclibc");
auto ElHardNan = makeMultilib("/mipsel-r2-hard-nan2008")
.flag("+EL")
.flag("-msoft-float")
.flag("+mnan=2008")
.flag("-muclibc")
.flag("-mmicromips");
auto BeHardNanUclibc = makeMultilib("/mips-r2-hard-nan2008-uclibc")
.flag("+EB")
.flag("-msoft-float")
.flag("+mnan=2008")
.flag("+muclibc");
auto ElHardNanUclibc = makeMultilib("/mipsel-r2-hard-nan2008-uclibc")
.flag("+EL")
.flag("-msoft-float")
.flag("+mnan=2008")
.flag("+muclibc");
auto BeHardUclibc = makeMultilib("/mips-r2-hard-uclibc")
.flag("+EB")
.flag("-msoft-float")
.flag("-mnan=2008")
.flag("+muclibc");
auto ElHardUclibc = makeMultilib("/mipsel-r2-hard-uclibc")
.flag("+EL")
.flag("-msoft-float")
.flag("-mnan=2008")
.flag("+muclibc");
auto ElMicroHardNan = makeMultilib("/micromipsel-r2-hard-nan2008")
.flag("+EL")
.flag("-msoft-float")
.flag("+mnan=2008")
.flag("+mmicromips");
auto ElMicroSoft = makeMultilib("/micromipsel-r2-soft")
.flag("+EL")
.flag("+msoft-float")
.flag("-mnan=2008")
.flag("+mmicromips");
auto O32 =
makeMultilib("/lib").osSuffix("").flag("-mabi=n32").flag("-mabi=n64");
auto N32 =
makeMultilib("/lib32").osSuffix("").flag("+mabi=n32").flag("-mabi=n64");
auto N64 =
makeMultilib("/lib64").osSuffix("").flag("-mabi=n32").flag("+mabi=n64");
MtiMipsMultilibsV2 =
MultilibSet()
.Either({BeHard, BeSoft, ElHard, ElSoft, BeHardNan, ElHardNan,
BeHardNanUclibc, ElHardNanUclibc, BeHardUclibc,
ElHardUclibc, ElMicroHardNan, ElMicroSoft})
.Either(O32, N32, N64)
.FilterOut(NonExistent)
.setIncludeDirsCallback([](const Multilib &M) {
return std::vector<std::string>({"/../../../../sysroot" +
M.includeSuffix() +
"/../usr/include"});
})
.setFilePathsCallback([](const Multilib &M) {
return std::vector<std::string>(
{"/../../../../mips-mti-linux-gnu/lib" + M.gccSuffix()});
});
}
for (auto Candidate : {&MtiMipsMultilibsV1, &MtiMipsMultilibsV2}) {
if (Candidate->select(Flags, Result.SelectedMultilib)) {
Result.Multilibs = *Candidate;
return true;
}
}
return false;
}
static bool findMipsImgMultilibs(const Multilib::flags_list &Flags,
FilterNonExistent &NonExistent,
DetectedMultilibs &Result) {
// CodeScape IMG toolchain v1.2 and early.
MultilibSet ImgMultilibsV1;
{
auto Mips64r6 = makeMultilib("/mips64r6").flag("+m64").flag("-m32");
auto LittleEndian = makeMultilib("/el").flag("+EL").flag("-EB");
auto MAbi64 =
makeMultilib("/64").flag("+mabi=n64").flag("-mabi=n32").flag("-m32");
ImgMultilibsV1 =
MultilibSet()
.Maybe(Mips64r6)
.Maybe(MAbi64)
.Maybe(LittleEndian)
.FilterOut(NonExistent)
.setIncludeDirsCallback([](const Multilib &M) {
return std::vector<std::string>(
{"/include", "/../../../../sysroot/usr/include"});
});
}
// CodeScape IMG toolchain starting from v1.3.
MultilibSet ImgMultilibsV2;
{
auto BeHard = makeMultilib("/mips-r6-hard")
.flag("+EB")
.flag("-msoft-float")
.flag("-mmicromips");
auto BeSoft = makeMultilib("/mips-r6-soft")
.flag("+EB")
.flag("+msoft-float")
.flag("-mmicromips");
auto ElHard = makeMultilib("/mipsel-r6-hard")
.flag("+EL")
.flag("-msoft-float")
.flag("-mmicromips");
auto ElSoft = makeMultilib("/mipsel-r6-soft")
.flag("+EL")
.flag("+msoft-float")
.flag("-mmicromips");
auto BeMicroHard = makeMultilib("/micromips-r6-hard")
.flag("+EB")
.flag("-msoft-float")
.flag("+mmicromips");
auto BeMicroSoft = makeMultilib("/micromips-r6-soft")
.flag("+EB")
.flag("+msoft-float")
.flag("+mmicromips");
auto ElMicroHard = makeMultilib("/micromipsel-r6-hard")
.flag("+EL")
.flag("-msoft-float")
.flag("+mmicromips");
auto ElMicroSoft = makeMultilib("/micromipsel-r6-soft")
.flag("+EL")
.flag("+msoft-float")
.flag("+mmicromips");
auto O32 =
makeMultilib("/lib").osSuffix("").flag("-mabi=n32").flag("-mabi=n64");
auto N32 =
makeMultilib("/lib32").osSuffix("").flag("+mabi=n32").flag("-mabi=n64");
auto N64 =
makeMultilib("/lib64").osSuffix("").flag("-mabi=n32").flag("+mabi=n64");
ImgMultilibsV2 =
MultilibSet()
.Either({BeHard, BeSoft, ElHard, ElSoft, BeMicroHard, BeMicroSoft,
ElMicroHard, ElMicroSoft})
.Either(O32, N32, N64)
.FilterOut(NonExistent)
.setIncludeDirsCallback([](const Multilib &M) {
return std::vector<std::string>({"/../../../../sysroot" +
M.includeSuffix() +
"/../usr/include"});
})
.setFilePathsCallback([](const Multilib &M) {
return std::vector<std::string>(
{"/../../../../mips-img-linux-gnu/lib" + M.gccSuffix()});
});
}
for (auto Candidate : {&ImgMultilibsV1, &ImgMultilibsV2}) {
if (Candidate->select(Flags, Result.SelectedMultilib)) {
Result.Multilibs = *Candidate;
return true;
}
}
return false;
}
bool clang::driver::findMIPSMultilibs(const Driver &D,
const llvm::Triple &TargetTriple,
StringRef Path, const ArgList &Args,
DetectedMultilibs &Result) {
FilterNonExistent NonExistent(Path, "/crtbegin.o", D.getVFS());
StringRef CPUName;
StringRef ABIName;
tools::mips::getMipsCPUAndABI(Args, TargetTriple, CPUName, ABIName);
llvm::Triple::ArchType TargetArch = TargetTriple.getArch();
Multilib::flags_list Flags;
addMultilibFlag(TargetTriple.isMIPS32(), "m32", Flags);
addMultilibFlag(TargetTriple.isMIPS64(), "m64", Flags);
addMultilibFlag(isMips16(Args), "mips16", Flags);
addMultilibFlag(CPUName == "mips32", "march=mips32", Flags);
addMultilibFlag(CPUName == "mips32r2" || CPUName == "mips32r3" ||
CPUName == "mips32r5" || CPUName == "p5600",
"march=mips32r2", Flags);
addMultilibFlag(CPUName == "mips32r6", "march=mips32r6", Flags);
addMultilibFlag(CPUName == "mips64", "march=mips64", Flags);
addMultilibFlag(CPUName == "mips64r2" || CPUName == "mips64r3" ||
CPUName == "mips64r5" || CPUName == "octeon" ||
CPUName == "octeon+",
"march=mips64r2", Flags);
addMultilibFlag(CPUName == "mips64r6", "march=mips64r6", Flags);
addMultilibFlag(isMicroMips(Args), "mmicromips", Flags);
addMultilibFlag(tools::mips::isUCLibc(Args), "muclibc", Flags);
addMultilibFlag(tools::mips::isNaN2008(D, Args, TargetTriple), "mnan=2008",
Flags);
addMultilibFlag(ABIName == "n32", "mabi=n32", Flags);
addMultilibFlag(ABIName == "n64", "mabi=n64", Flags);
addMultilibFlag(isSoftFloatABI(Args), "msoft-float", Flags);
addMultilibFlag(!isSoftFloatABI(Args), "mhard-float", Flags);
addMultilibFlag(isMipsEL(TargetArch), "EL", Flags);
addMultilibFlag(!isMipsEL(TargetArch), "EB", Flags);
if (TargetTriple.isAndroid())
return findMipsAndroidMultilibs(D.getVFS(), Path, Flags, NonExistent,
Result);
if (TargetTriple.getVendor() == llvm::Triple::MipsTechnologies &&
TargetTriple.getOS() == llvm::Triple::Linux &&
TargetTriple.getEnvironment() == llvm::Triple::UnknownEnvironment)
return findMipsMuslMultilibs(Flags, NonExistent, Result);
if (TargetTriple.getVendor() == llvm::Triple::MipsTechnologies &&
TargetTriple.getOS() == llvm::Triple::Linux &&
TargetTriple.isGNUEnvironment())
return findMipsMtiMultilibs(Flags, NonExistent, Result);
if (TargetTriple.getVendor() == llvm::Triple::ImaginationTechnologies &&
TargetTriple.getOS() == llvm::Triple::Linux &&
TargetTriple.isGNUEnvironment())
return findMipsImgMultilibs(Flags, NonExistent, Result);
if (findMipsCsMultilibs(Flags, NonExistent, Result))
return true;
// Fallback to the regular toolchain-tree structure.
Multilib Default;
Result.Multilibs.push_back(Default);
Result.Multilibs.FilterOut(NonExistent);
if (Result.Multilibs.select(Flags, Result.SelectedMultilib)) {
Result.BiarchSibling = Multilib();
return true;
}
return false;
}
static void findAndroidArmMultilibs(const Driver &D,
const llvm::Triple &TargetTriple,
StringRef Path, const ArgList &Args,
DetectedMultilibs &Result) {
// Find multilibs with subdirectories like armv7-a, thumb, armv7-a/thumb.
FilterNonExistent NonExistent(Path, "/crtbegin.o", D.getVFS());
Multilib ArmV7Multilib = makeMultilib("/armv7-a")
.flag("+march=armv7-a")
.flag("-mthumb");
Multilib ThumbMultilib = makeMultilib("/thumb")
.flag("-march=armv7-a")
.flag("+mthumb");
Multilib ArmV7ThumbMultilib = makeMultilib("/armv7-a/thumb")
.flag("+march=armv7-a")
.flag("+mthumb");
Multilib DefaultMultilib = makeMultilib("")
.flag("-march=armv7-a")
.flag("-mthumb");
MultilibSet AndroidArmMultilibs =
MultilibSet()
.Either(ThumbMultilib, ArmV7Multilib,
ArmV7ThumbMultilib, DefaultMultilib)
.FilterOut(NonExistent);
Multilib::flags_list Flags;
llvm::StringRef Arch = Args.getLastArgValue(options::OPT_march_EQ);
bool IsArmArch = TargetTriple.getArch() == llvm::Triple::arm;
bool IsThumbArch = TargetTriple.getArch() == llvm::Triple::thumb;
bool IsV7SubArch = TargetTriple.getSubArch() == llvm::Triple::ARMSubArch_v7;
bool IsThumbMode = IsThumbArch ||
Args.hasFlag(options::OPT_mthumb, options::OPT_mno_thumb, false) ||
(IsArmArch && llvm::ARM::parseArchISA(Arch) == llvm::ARM::ISAKind::THUMB);
bool IsArmV7Mode = (IsArmArch || IsThumbArch) &&
(llvm::ARM::parseArchVersion(Arch) == 7 ||
(IsArmArch && Arch == "" && IsV7SubArch));
addMultilibFlag(IsArmV7Mode, "march=armv7-a", Flags);
addMultilibFlag(IsThumbMode, "mthumb", Flags);
if (AndroidArmMultilibs.select(Flags, Result.SelectedMultilib))
Result.Multilibs = AndroidArmMultilibs;
}
static bool findMSP430Multilibs(const Driver &D,
const llvm::Triple &TargetTriple,
StringRef Path, const ArgList &Args,
DetectedMultilibs &Result) {
FilterNonExistent NonExistent(Path, "/crtbegin.o", D.getVFS());
Multilib WithoutExceptions = makeMultilib("/430").flag("-exceptions");
Multilib WithExceptions = makeMultilib("/430/exceptions").flag("+exceptions");
// FIXME: when clang starts to support msp430x ISA additional logic
// to select between multilib must be implemented
// Multilib MSP430xMultilib = makeMultilib("/large");
Result.Multilibs.push_back(WithoutExceptions);
Result.Multilibs.push_back(WithExceptions);
Result.Multilibs.FilterOut(NonExistent);
Multilib::flags_list Flags;
addMultilibFlag(Args.hasFlag(options::OPT_fexceptions,
options::OPT_fno_exceptions, false),
"exceptions", Flags);
if (Result.Multilibs.select(Flags, Result.SelectedMultilib))
return true;
return false;
}
static void findCSKYMultilibs(const Driver &D, const llvm::Triple &TargetTriple,
StringRef Path, const ArgList &Args,
DetectedMultilibs &Result) {
FilterNonExistent NonExistent(Path, "/crtbegin.o", D.getVFS());
tools::csky::FloatABI TheFloatABI = tools::csky::getCSKYFloatABI(D, Args);
llvm::Optional<llvm::StringRef> Res = tools::csky::getCSKYArchName(D, Args, TargetTriple);
if (!Res)
return;
auto ARCHName = *Res;
Multilib::flags_list Flags;
addMultilibFlag(TheFloatABI == tools::csky::FloatABI::Hard, "hard-fp", Flags);
addMultilibFlag(TheFloatABI == tools::csky::FloatABI::SoftFP, "soft-fp",
Flags);
addMultilibFlag(TheFloatABI == tools::csky::FloatABI::Soft, "soft", Flags);
addMultilibFlag(ARCHName == "ck801", "march=ck801", Flags);
addMultilibFlag(ARCHName == "ck802", "march=ck802", Flags);
addMultilibFlag(ARCHName == "ck803", "march=ck803", Flags);
addMultilibFlag(ARCHName == "ck804", "march=ck804", Flags);
addMultilibFlag(ARCHName == "ck805", "march=ck805", Flags);
addMultilibFlag(ARCHName == "ck807", "march=ck807", Flags);
addMultilibFlag(ARCHName == "ck810", "march=ck810", Flags);
addMultilibFlag(ARCHName == "ck810v", "march=ck810v", Flags);
addMultilibFlag(ARCHName == "ck860", "march=ck860", Flags);
addMultilibFlag(ARCHName == "ck860v", "march=ck860v", Flags);
bool isBigEndian = false;
if (Arg *A = Args.getLastArg(options::OPT_mlittle_endian,
options::OPT_mbig_endian))
isBigEndian = !A->getOption().matches(options::OPT_mlittle_endian);
addMultilibFlag(isBigEndian, "EB", Flags);
auto HardFloat = makeMultilib("/hard-fp").flag("+hard-fp");
auto SoftFpFloat = makeMultilib("/soft-fp").flag("+soft-fp");
auto SoftFloat = makeMultilib("").flag("+soft");
auto Arch801 = makeMultilib("/ck801").flag("+march=ck801");
auto Arch802 = makeMultilib("/ck802").flag("+march=ck802");
auto Arch803 = makeMultilib("/ck803").flag("+march=ck803");
// CK804 use the same library as CK803
auto Arch804 = makeMultilib("/ck803").flag("+march=ck804");
auto Arch805 = makeMultilib("/ck805").flag("+march=ck805");
auto Arch807 = makeMultilib("/ck807").flag("+march=ck807");
auto Arch810 = makeMultilib("").flag("+march=ck810");
auto Arch810v = makeMultilib("/ck810v").flag("+march=ck810v");
auto Arch860 = makeMultilib("/ck860").flag("+march=ck860");
auto Arch860v = makeMultilib("/ck860v").flag("+march=ck860v");
auto BigEndian = makeMultilib("/big").flag("+EB");
MultilibSet CSKYMultilibs =
MultilibSet()
.Maybe(BigEndian)
.Either({Arch801, Arch802, Arch803, Arch804, Arch805, Arch807,
Arch810, Arch810v, Arch860, Arch860v})
.Either(HardFloat, SoftFpFloat, SoftFloat)
.FilterOut(NonExistent);
if (CSKYMultilibs.select(Flags, Result.SelectedMultilib))
Result.Multilibs = CSKYMultilibs;
}
static void findRISCVBareMetalMultilibs(const Driver &D,
const llvm::Triple &TargetTriple,
StringRef Path, const ArgList &Args,
DetectedMultilibs &Result) {
FilterNonExistent NonExistent(Path, "/crtbegin.o", D.getVFS());
struct RiscvMultilib {
StringRef march;
StringRef mabi;
};
// currently only support the set of multilibs like riscv-gnu-toolchain does.
// TODO: support MULTILIB_REUSE
constexpr RiscvMultilib RISCVMultilibSet[] = {
{"rv32i", "ilp32"}, {"rv32im", "ilp32"}, {"rv32iac", "ilp32"},
{"rv32imac", "ilp32"}, {"rv32imafc", "ilp32f"}, {"rv64imac", "lp64"},
{"rv64imafdc", "lp64d"}};
std::vector<Multilib> Ms;
for (auto Element : RISCVMultilibSet) {
// multilib path rule is ${march}/${mabi}
Ms.emplace_back(
makeMultilib((Twine(Element.march) + "/" + Twine(Element.mabi)).str())
.flag(Twine("+march=", Element.march).str())
.flag(Twine("+mabi=", Element.mabi).str()));
}
MultilibSet RISCVMultilibs =
MultilibSet()
.Either(ArrayRef<Multilib>(Ms))
.FilterOut(NonExistent)
.setFilePathsCallback([](const Multilib &M) {
return std::vector<std::string>(
{M.gccSuffix(),
"/../../../../riscv64-unknown-elf/lib" + M.gccSuffix(),
"/../../../../riscv32-unknown-elf/lib" + M.gccSuffix()});
});
Multilib::flags_list Flags;
llvm::StringSet<> Added_ABIs;
StringRef ABIName = tools::riscv::getRISCVABI(Args, TargetTriple);
StringRef MArch = tools::riscv::getRISCVArch(Args, TargetTriple);
for (auto Element : RISCVMultilibSet) {
addMultilibFlag(MArch == Element.march,
Twine("march=", Element.march).str().c_str(), Flags);
if (!Added_ABIs.count(Element.mabi)) {
Added_ABIs.insert(Element.mabi);
addMultilibFlag(ABIName == Element.mabi,
Twine("mabi=", Element.mabi).str().c_str(), Flags);
}
}
if (RISCVMultilibs.select(Flags, Result.SelectedMultilib))
Result.Multilibs = RISCVMultilibs;
}
static void findRISCVMultilibs(const Driver &D,
const llvm::Triple &TargetTriple, StringRef Path,
const ArgList &Args, DetectedMultilibs &Result) {
if (TargetTriple.getOS() == llvm::Triple::UnknownOS)
return findRISCVBareMetalMultilibs(D, TargetTriple, Path, Args, Result);
FilterNonExistent NonExistent(Path, "/crtbegin.o", D.getVFS());
Multilib Ilp32 = makeMultilib("lib32/ilp32").flag("+m32").flag("+mabi=ilp32");
Multilib Ilp32f =
makeMultilib("lib32/ilp32f").flag("+m32").flag("+mabi=ilp32f");
Multilib Ilp32d =
makeMultilib("lib32/ilp32d").flag("+m32").flag("+mabi=ilp32d");
Multilib Lp64 = makeMultilib("lib64/lp64").flag("+m64").flag("+mabi=lp64");
Multilib Lp64f = makeMultilib("lib64/lp64f").flag("+m64").flag("+mabi=lp64f");
Multilib Lp64d = makeMultilib("lib64/lp64d").flag("+m64").flag("+mabi=lp64d");
MultilibSet RISCVMultilibs =
MultilibSet()
.Either({Ilp32, Ilp32f, Ilp32d, Lp64, Lp64f, Lp64d})
.FilterOut(NonExistent);
Multilib::flags_list Flags;
bool IsRV64 = TargetTriple.getArch() == llvm::Triple::riscv64;
StringRef ABIName = tools::riscv::getRISCVABI(Args, TargetTriple);
addMultilibFlag(!IsRV64, "m32", Flags);
addMultilibFlag(IsRV64, "m64", Flags);
addMultilibFlag(ABIName == "ilp32", "mabi=ilp32", Flags);
addMultilibFlag(ABIName == "ilp32f", "mabi=ilp32f", Flags);
addMultilibFlag(ABIName == "ilp32d", "mabi=ilp32d", Flags);
addMultilibFlag(ABIName == "lp64", "mabi=lp64", Flags);
addMultilibFlag(ABIName == "lp64f", "mabi=lp64f", Flags);
addMultilibFlag(ABIName == "lp64d", "mabi=lp64d", Flags);
if (RISCVMultilibs.select(Flags, Result.SelectedMultilib))
Result.Multilibs = RISCVMultilibs;
}
static bool findBiarchMultilibs(const Driver &D,
const llvm::Triple &TargetTriple,
StringRef Path, const ArgList &Args,
bool NeedsBiarchSuffix,
DetectedMultilibs &Result) {
Multilib Default;
// Some versions of SUSE and Fedora on ppc64 put 32-bit libs
// in what would normally be GCCInstallPath and put the 64-bit
// libs in a subdirectory named 64. The simple logic we follow is that
// *if* there is a subdirectory of the right name with crtbegin.o in it,
// we use that. If not, and if not a biarch triple alias, we look for
// crtbegin.o without the subdirectory.
StringRef Suff64 = "/64";
// Solaris uses platform-specific suffixes instead of /64.
if (TargetTriple.getOS() == llvm::Triple::Solaris) {
switch (TargetTriple.getArch()) {
case llvm::Triple::x86:
case llvm::Triple::x86_64:
Suff64 = "/amd64";
break;
case llvm::Triple::sparc:
case llvm::Triple::sparcv9:
Suff64 = "/sparcv9";
break;
default:
break;
}
}
Multilib Alt64 = Multilib()
.gccSuffix(Suff64)
.includeSuffix(Suff64)
.flag("-m32")
.flag("+m64")
.flag("-mx32");
Multilib Alt32 = Multilib()
.gccSuffix("/32")
.includeSuffix("/32")
.flag("+m32")
.flag("-m64")
.flag("-mx32");
Multilib Altx32 = Multilib()
.gccSuffix("/x32")
.includeSuffix("/x32")
.flag("-m32")
.flag("-m64")
.flag("+mx32");
// GCC toolchain for IAMCU doesn't have crtbegin.o, so look for libgcc.a.
FilterNonExistent NonExistent(
Path, TargetTriple.isOSIAMCU() ? "/libgcc.a" : "/crtbegin.o", D.getVFS());
// Determine default multilib from: 32, 64, x32
// Also handle cases such as 64 on 32, 32 on 64, etc.
enum { UNKNOWN, WANT32, WANT64, WANTX32 } Want = UNKNOWN;
const bool IsX32 = TargetTriple.isX32();
if (TargetTriple.isArch32Bit() && !NonExistent(Alt32))
Want = WANT64;
else if (TargetTriple.isArch64Bit() && IsX32 && !NonExistent(Altx32))
Want = WANT64;
else if (TargetTriple.isArch64Bit() && !IsX32 && !NonExistent(Alt64))
Want = WANT32;
else {
if (TargetTriple.isArch32Bit())
Want = NeedsBiarchSuffix ? WANT64 : WANT32;
else if (IsX32)
Want = NeedsBiarchSuffix ? WANT64 : WANTX32;
else
Want = NeedsBiarchSuffix ? WANT32 : WANT64;
}
if (Want == WANT32)
Default.flag("+m32").flag("-m64").flag("-mx32");
else if (Want == WANT64)
Default.flag("-m32").flag("+m64").flag("-mx32");
else if (Want == WANTX32)
Default.flag("-m32").flag("-m64").flag("+mx32");
else
return false;
Result.Multilibs.push_back(Default);
Result.Multilibs.push_back(Alt64);
Result.Multilibs.push_back(Alt32);
Result.Multilibs.push_back(Altx32);
Result.Multilibs.FilterOut(NonExistent);
Multilib::flags_list Flags;
addMultilibFlag(TargetTriple.isArch64Bit() && !IsX32, "m64", Flags);
addMultilibFlag(TargetTriple.isArch32Bit(), "m32", Flags);
addMultilibFlag(TargetTriple.isArch64Bit() && IsX32, "mx32", Flags);
if (!Result.Multilibs.select(Flags, Result.SelectedMultilib))
return false;
if (Result.SelectedMultilib == Alt64 || Result.SelectedMultilib == Alt32 ||
Result.SelectedMultilib == Altx32)
Result.BiarchSibling = Default;
return true;
}
/// Generic_GCC - A tool chain using the 'gcc' command to perform
/// all subcommands; this relies on gcc translating the majority of
/// command line options.
/// Less-than for GCCVersion, implementing a Strict Weak Ordering.
bool Generic_GCC::GCCVersion::isOlderThan(int RHSMajor, int RHSMinor,
int RHSPatch,
StringRef RHSPatchSuffix) const {
if (Major != RHSMajor)
return Major < RHSMajor;
if (Minor != RHSMinor)
return Minor < RHSMinor;
if (Patch != RHSPatch) {
// Note that versions without a specified patch sort higher than those with
// a patch.
if (RHSPatch == -1)
return true;
if (Patch == -1)
return false;
// Otherwise just sort on the patch itself.
return Patch < RHSPatch;
}
if (PatchSuffix != RHSPatchSuffix) {
// Sort empty suffixes higher.
if (RHSPatchSuffix.empty())
return true;
if (PatchSuffix.empty())
return false;
// Provide a lexicographic sort to make this a total ordering.
return PatchSuffix < RHSPatchSuffix;
}
// The versions are equal.
return false;
}
/// Parse a GCCVersion object out of a string of text.
///
/// This is the primary means of forming GCCVersion objects.
/*static*/
Generic_GCC::GCCVersion Generic_GCC::GCCVersion::Parse(StringRef VersionText) {
const GCCVersion BadVersion = {VersionText.str(), -1, -1, -1, "", "", ""};
std::pair<StringRef, StringRef> First = VersionText.split('.');
std::pair<StringRef, StringRef> Second = First.second.split('.');
GCCVersion GoodVersion = {VersionText.str(), -1, -1, -1, "", "", ""};
if (First.first.getAsInteger(10, GoodVersion.Major) || GoodVersion.Major < 0)
return BadVersion;
GoodVersion.MajorStr = First.first.str();
if (First.second.empty())
return GoodVersion;
StringRef MinorStr = Second.first;
if (Second.second.empty()) {
if (size_t EndNumber = MinorStr.find_first_not_of("0123456789")) {
GoodVersion.PatchSuffix = std::string(MinorStr.substr(EndNumber));
MinorStr = MinorStr.slice(0, EndNumber);
}
}
if (MinorStr.getAsInteger(10, GoodVersion.Minor) || GoodVersion.Minor < 0)
return BadVersion;
GoodVersion.MinorStr = MinorStr.str();
// First look for a number prefix and parse that if present. Otherwise just
// stash the entire patch string in the suffix, and leave the number
// unspecified. This covers versions strings such as:
// 5 (handled above)
// 4.4
// 4.4-patched
// 4.4.0
// 4.4.x
// 4.4.2-rc4
// 4.4.x-patched
// And retains any patch number it finds.
StringRef PatchText = Second.second;
if (!PatchText.empty()) {
if (size_t EndNumber = PatchText.find_first_not_of("0123456789")) {
// Try to parse the number and any suffix.
if (PatchText.slice(0, EndNumber).getAsInteger(10, GoodVersion.Patch) ||
GoodVersion.Patch < 0)
return BadVersion;
GoodVersion.PatchSuffix = std::string(PatchText.substr(EndNumber));
}
}
return GoodVersion;
}
static llvm::StringRef getGCCToolchainDir(const ArgList &Args,
llvm::StringRef SysRoot) {
const Arg *A = Args.getLastArg(clang::driver::options::OPT_gcc_toolchain);
if (A)
return A->getValue();
// If we have a SysRoot, ignore GCC_INSTALL_PREFIX.
// GCC_INSTALL_PREFIX specifies the gcc installation for the default
// sysroot and is likely not valid with a different sysroot.
if (!SysRoot.empty())
return "";
return GCC_INSTALL_PREFIX;
}
/// Initialize a GCCInstallationDetector from the driver.
///
/// This performs all of the autodetection and sets up the various paths.
/// Once constructed, a GCCInstallationDetector is essentially immutable.
///
/// FIXME: We shouldn't need an explicit TargetTriple parameter here, and
/// should instead pull the target out of the driver. This is currently
/// necessary because the driver doesn't store the final version of the target
/// triple.
void Generic_GCC::GCCInstallationDetector::init(
const llvm::Triple &TargetTriple, const ArgList &Args,
ArrayRef<std::string> ExtraTripleAliases) {
llvm::Triple BiarchVariantTriple = TargetTriple.isArch32Bit()
? TargetTriple.get64BitArchVariant()
: TargetTriple.get32BitArchVariant();
// The library directories which may contain GCC installations.
SmallVector<StringRef, 4> CandidateLibDirs, CandidateBiarchLibDirs;
// The compatible GCC triples for this particular architecture.
SmallVector<StringRef, 16> CandidateTripleAliases;
SmallVector<StringRef, 16> CandidateBiarchTripleAliases;
CollectLibDirsAndTriples(TargetTriple, BiarchVariantTriple, CandidateLibDirs,
CandidateTripleAliases, CandidateBiarchLibDirs,
CandidateBiarchTripleAliases);
// Compute the set of prefixes for our search.
SmallVector<std::string, 8> Prefixes;
StringRef GCCToolchainDir = getGCCToolchainDir(Args, D.SysRoot);
if (GCCToolchainDir != "") {
if (GCCToolchainDir.back() == '/')
GCCToolchainDir = GCCToolchainDir.drop_back(); // remove the /
Prefixes.push_back(std::string(GCCToolchainDir));
} else {
// If we have a SysRoot, try that first.
if (!D.SysRoot.empty()) {
Prefixes.push_back(D.SysRoot);
AddDefaultGCCPrefixes(TargetTriple, Prefixes, D.SysRoot);
}
// Then look for gcc installed alongside clang.
Prefixes.push_back(D.InstalledDir + "/..");
// Next, look for prefix(es) that correspond to distribution-supplied gcc
// installations.
if (D.SysRoot.empty()) {
// Typically /usr.
AddDefaultGCCPrefixes(TargetTriple, Prefixes, D.SysRoot);
}
// Try to respect gcc-config on Gentoo if --gcc-toolchain is not provided.
// This avoids accidentally enforcing the system GCC version when using a
// custom toolchain.
SmallVector<StringRef, 16> GentooTestTriples;
// Try to match an exact triple as target triple first.
// e.g. crossdev -S x86_64-gentoo-linux-gnu will install gcc libs for
// x86_64-gentoo-linux-gnu. But "clang -target x86_64-gentoo-linux-gnu"
// may pick the libraries for x86_64-pc-linux-gnu even when exact matching
// triple x86_64-gentoo-linux-gnu is present.
GentooTestTriples.push_back(TargetTriple.str());
// Check rest of triples.
GentooTestTriples.append(ExtraTripleAliases.begin(),
ExtraTripleAliases.end());
GentooTestTriples.append(CandidateTripleAliases.begin(),
CandidateTripleAliases.end());
if (ScanGentooConfigs(TargetTriple, Args, GentooTestTriples,
CandidateBiarchTripleAliases))
return;
}
// Loop over the various components which exist and select the best GCC
// installation available. GCC installs are ranked by version number.
const GCCVersion VersionZero = GCCVersion::Parse("0.0.0");
Version = VersionZero;
for (const std::string &Prefix : Prefixes) {
auto &VFS = D.getVFS();
if (!VFS.exists(Prefix))
continue;
for (StringRef Suffix : CandidateLibDirs) {
const std::string LibDir = concat(Prefix, Suffix);
if (!VFS.exists(LibDir))
continue;
// Maybe filter out <libdir>/gcc and <libdir>/gcc-cross.
bool GCCDirExists = VFS.exists(LibDir + "/gcc");
bool GCCCrossDirExists = VFS.exists(LibDir + "/gcc-cross");
// Try to match the exact target triple first.
ScanLibDirForGCCTriple(TargetTriple, Args, LibDir, TargetTriple.str(),
false, GCCDirExists, GCCCrossDirExists);
// Try rest of possible triples.
for (StringRef Candidate : ExtraTripleAliases) // Try these first.
ScanLibDirForGCCTriple(TargetTriple, Args, LibDir, Candidate, false,
GCCDirExists, GCCCrossDirExists);
for (StringRef Candidate : CandidateTripleAliases)
ScanLibDirForGCCTriple(TargetTriple, Args, LibDir, Candidate, false,
GCCDirExists, GCCCrossDirExists);
}
for (StringRef Suffix : CandidateBiarchLibDirs) {
const std::string LibDir = Prefix + Suffix.str();
if (!VFS.exists(LibDir))
continue;
bool GCCDirExists = VFS.exists(LibDir + "/gcc");
bool GCCCrossDirExists = VFS.exists(LibDir + "/gcc-cross");
for (StringRef Candidate : CandidateBiarchTripleAliases)
ScanLibDirForGCCTriple(TargetTriple, Args, LibDir, Candidate, true,
GCCDirExists, GCCCrossDirExists);
}
// Skip other prefixes once a GCC installation is found.
if (Version > VersionZero)
break;
}
}
void Generic_GCC::GCCInstallationDetector::print(raw_ostream &OS) const {
for (const auto &InstallPath : CandidateGCCInstallPaths)
OS << "Found candidate GCC installation: " << InstallPath << "\n";
if (!GCCInstallPath.empty())
OS << "Selected GCC installation: " << GCCInstallPath << "\n";
for (const auto &Multilib : Multilibs)
OS << "Candidate multilib: " << Multilib << "\n";
if (Multilibs.size() != 0 || !SelectedMultilib.isDefault())
OS << "Selected multilib: " << SelectedMultilib << "\n";
}
bool Generic_GCC::GCCInstallationDetector::getBiarchSibling(Multilib &M) const {
if (BiarchSibling) {
M = BiarchSibling.value();
return true;
}
return false;
}
void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes(
const llvm::Triple &TargetTriple, SmallVectorImpl<std::string> &Prefixes,
StringRef SysRoot) {
if (TargetTriple.getOS() == llvm::Triple::Solaris) {
// Solaris is a special case.
// The GCC installation is under
// /usr/gcc/<major>.<minor>/lib/gcc/<triple>/<major>.<minor>.<patch>/
// so we need to find those /usr/gcc/*/lib/gcc libdirs and go with
// /usr/gcc/<version> as a prefix.
std::string PrefixDir = concat(SysRoot, "/usr/gcc");
std::error_code EC;
for (llvm::vfs::directory_iterator LI = D.getVFS().dir_begin(PrefixDir, EC),
LE;
!EC && LI != LE; LI = LI.increment(EC)) {
StringRef VersionText = llvm::sys::path::filename(LI->path());
GCCVersion CandidateVersion = GCCVersion::Parse(VersionText);
// Filter out obviously bad entries.
if (CandidateVersion.Major == -1 || CandidateVersion.isOlderThan(4, 1, 1))
continue;
std::string CandidatePrefix = PrefixDir + "/" + VersionText.str();
std::string CandidateLibPath = CandidatePrefix + "/lib/gcc";
if (!D.getVFS().exists(CandidateLibPath))
continue;
Prefixes.push_back(CandidatePrefix);
}
return;
}
// For Linux, if --sysroot is not specified, look for RHEL/CentOS devtoolsets
// and gcc-toolsets.
if (SysRoot.empty() && TargetTriple.getOS() == llvm::Triple::Linux &&
D.getVFS().exists("/opt/rh")) {
- // Find the directory in /opt/rh/ starting with gcc-toolset-* or
- // devtoolset-* with the highest version number and add that
- // one to our prefixes.
- std::string ChosenToolsetDir;
- unsigned ChosenToolsetVersion = 0;
- std::error_code EC;
- for (llvm::vfs::directory_iterator LI = D.getVFS().dir_begin("/opt/rh", EC),
- LE;
- !EC && LI != LE; LI = LI.increment(EC)) {
- StringRef ToolsetDir = llvm::sys::path::filename(LI->path());
- unsigned ToolsetVersion;
- if ((!ToolsetDir.startswith("gcc-toolset-") &&
- !ToolsetDir.startswith("devtoolset-")) ||
- ToolsetDir.substr(ToolsetDir.rfind('-') + 1)
- .getAsInteger(10, ToolsetVersion))
- continue;
-
- if (ToolsetVersion > ChosenToolsetVersion) {
- ChosenToolsetVersion = ToolsetVersion;
- ChosenToolsetDir = "/opt/rh/" + ToolsetDir.str();
- }
- }
-
- if (ChosenToolsetVersion > 0)
- Prefixes.push_back(ChosenToolsetDir + "/root/usr");
+ // TODO: We may want to remove this, since the functionality
+ // can be achieved using config files.
+ Prefixes.push_back("/opt/rh/gcc-toolset-12/root/usr");
+ Prefixes.push_back("/opt/rh/gcc-toolset-11/root/usr");
+ Prefixes.push_back("/opt/rh/gcc-toolset-10/root/usr");
+ Prefixes.push_back("/opt/rh/devtoolset-12/root/usr");
+ Prefixes.push_back("/opt/rh/devtoolset-11/root/usr");
+ Prefixes.push_back("/opt/rh/devtoolset-10/root/usr");
+ Prefixes.push_back("/opt/rh/devtoolset-9/root/usr");
+ Prefixes.push_back("/opt/rh/devtoolset-8/root/usr");
+ Prefixes.push_back("/opt/rh/devtoolset-7/root/usr");
+ Prefixes.push_back("/opt/rh/devtoolset-6/root/usr");
+ Prefixes.push_back("/opt/rh/devtoolset-4/root/usr");
+ Prefixes.push_back("/opt/rh/devtoolset-3/root/usr");
+ Prefixes.push_back("/opt/rh/devtoolset-2/root/usr");
}
// Fall back to /usr which is used by most non-Solaris systems.
Prefixes.push_back(concat(SysRoot, "/usr"));
}
/*static*/ void Generic_GCC::GCCInstallationDetector::CollectLibDirsAndTriples(
const llvm::Triple &TargetTriple, const llvm::Triple &BiarchTriple,
SmallVectorImpl<StringRef> &LibDirs,
SmallVectorImpl<StringRef> &TripleAliases,
SmallVectorImpl<StringRef> &BiarchLibDirs,
SmallVectorImpl<StringRef> &BiarchTripleAliases) {
// Declare a bunch of static data sets that we'll select between below. These
// are specifically designed to always refer to string literals to avoid any
// lifetime or initialization issues.
//
// The *Triples variables hard code some triples so that, for example,
// --target=aarch64 (incomplete triple) can detect lib/aarch64-linux-gnu.
// They are not needed when the user has correct LLVM_DEFAULT_TARGET_TRIPLE
// and always uses the full --target (e.g. --target=aarch64-linux-gnu). The
// lists should shrink over time. Please don't add more elements to *Triples.
static const char *const AArch64LibDirs[] = {"/lib64", "/lib"};
static const char *const AArch64Triples[] = {
"aarch64-none-linux-gnu", "aarch64-linux-gnu", "aarch64-redhat-linux",
"aarch64-suse-linux"};
static const char *const AArch64beLibDirs[] = {"/lib"};
static const char *const AArch64beTriples[] = {"aarch64_be-none-linux-gnu",
"aarch64_be-linux-gnu"};
static const char *const ARMLibDirs[] = {"/lib"};
static const char *const ARMTriples[] = {"arm-linux-gnueabi"};
static const char *const ARMHFTriples[] = {"arm-linux-gnueabihf",
"armv7hl-redhat-linux-gnueabi",
"armv6hl-suse-linux-gnueabi",
"armv7hl-suse-linux-gnueabi"};
static const char *const ARMebLibDirs[] = {"/lib"};
static const char *const ARMebTriples[] = {"armeb-linux-gnueabi"};
static const char *const ARMebHFTriples[] = {
"armeb-linux-gnueabihf", "armebv7hl-redhat-linux-gnueabi"};
static const char *const AVRLibDirs[] = {"/lib"};
static const char *const AVRTriples[] = {"avr"};
static const char *const CSKYLibDirs[] = {"/lib"};
static const char *const CSKYTriples[] = {
"csky-linux-gnuabiv2", "csky-linux-uclibcabiv2", "csky-elf-noneabiv2"};
static const char *const X86_64LibDirs[] = {"/lib64", "/lib"};
static const char *const X86_64Triples[] = {
"x86_64-linux-gnu", "x86_64-unknown-linux-gnu",
"x86_64-pc-linux-gnu", "x86_64-redhat-linux6E",
"x86_64-redhat-linux", "x86_64-suse-linux",
"x86_64-manbo-linux-gnu", "x86_64-linux-gnu",
"x86_64-slackware-linux", "x86_64-unknown-linux",
"x86_64-amazon-linux"};
static const char *const X32Triples[] = {"x86_64-linux-gnux32",
"x86_64-pc-linux-gnux32"};
static const char *const X32LibDirs[] = {"/libx32", "/lib"};
static const char *const X86LibDirs[] = {"/lib32", "/lib"};
static const char *const X86Triples[] = {
"i586-linux-gnu", "i686-linux-gnu", "i686-pc-linux-gnu",
"i386-redhat-linux6E", "i686-redhat-linux", "i386-redhat-linux",
"i586-suse-linux", "i686-montavista-linux", "i686-gnu",
};
static const char *const M68kLibDirs[] = {"/lib"};
static const char *const M68kTriples[] = {
"m68k-linux-gnu", "m68k-unknown-linux-gnu", "m68k-suse-linux"};
static const char *const MIPSLibDirs[] = {"/libo32", "/lib"};
static const char *const MIPSTriples[] = {
"mips-linux-gnu", "mips-mti-linux", "mips-mti-linux-gnu",
"mips-img-linux-gnu", "mipsisa32r6-linux-gnu"};
static const char *const MIPSELLibDirs[] = {"/libo32", "/lib"};
static const char *const MIPSELTriples[] = {
"mipsel-linux-gnu", "mips-img-linux-gnu", "mipsisa32r6el-linux-gnu"};
static const char *const MIPS64LibDirs[] = {"/lib64", "/lib"};
static const char *const MIPS64Triples[] = {
"mips64-linux-gnu", "mips-mti-linux-gnu",
"mips-img-linux-gnu", "mips64-linux-gnuabi64",
"mipsisa64r6-linux-gnu", "mipsisa64r6-linux-gnuabi64"};
static const char *const MIPS64ELLibDirs[] = {"/lib64", "/lib"};
static const char *const MIPS64ELTriples[] = {
"mips64el-linux-gnu", "mips-mti-linux-gnu",
"mips-img-linux-gnu", "mips64el-linux-gnuabi64",
"mipsisa64r6el-linux-gnu", "mipsisa64r6el-linux-gnuabi64"};
static const char *const MIPSN32LibDirs[] = {"/lib32"};
static const char *const MIPSN32Triples[] = {"mips64-linux-gnuabin32",
"mipsisa64r6-linux-gnuabin32"};
static const char *const MIPSN32ELLibDirs[] = {"/lib32"};
static const char *const MIPSN32ELTriples[] = {
"mips64el-linux-gnuabin32", "mipsisa64r6el-linux-gnuabin32"};
static const char *const MSP430LibDirs[] = {"/lib"};
static const char *const MSP430Triples[] = {"msp430-elf"};
static const char *const PPCLibDirs[] = {"/lib32", "/lib"};
static const char *const PPCTriples[] = {
"powerpc-linux-gnu", "powerpc-unknown-linux-gnu", "powerpc-linux-gnuspe",
// On 32-bit PowerPC systems running SUSE Linux, gcc is configured as a
// 64-bit compiler which defaults to "-m32", hence "powerpc64-suse-linux".
"powerpc64-suse-linux", "powerpc-montavista-linuxspe"};
static const char *const PPCLELibDirs[] = {"/lib32", "/lib"};
static const char *const PPCLETriples[] = {"powerpcle-linux-gnu",
"powerpcle-unknown-linux-gnu",
"powerpcle-linux-musl"};
static const char *const PPC64LibDirs[] = {"/lib64", "/lib"};
static const char *const PPC64Triples[] = {
"powerpc64-linux-gnu", "powerpc64-unknown-linux-gnu",
"powerpc64-suse-linux", "ppc64-redhat-linux"};
static const char *const PPC64LELibDirs[] = {"/lib64", "/lib"};
static const char *const PPC64LETriples[] = {
"powerpc64le-linux-gnu", "powerpc64le-unknown-linux-gnu",
"powerpc64le-none-linux-gnu", "powerpc64le-suse-linux",
"ppc64le-redhat-linux"};
static const char *const RISCV32LibDirs[] = {"/lib32", "/lib"};
static const char *const RISCV32Triples[] = {"riscv32-unknown-linux-gnu",
"riscv32-linux-gnu",
"riscv32-unknown-elf"};
static const char *const RISCV64LibDirs[] = {"/lib64", "/lib"};
static const char *const RISCV64Triples[] = {"riscv64-unknown-linux-gnu",
"riscv64-linux-gnu",
"riscv64-unknown-elf"};
static const char *const SPARCv8LibDirs[] = {"/lib32", "/lib"};
static const char *const SPARCv8Triples[] = {"sparc-linux-gnu",
"sparcv8-linux-gnu"};
static const char *const SPARCv9LibDirs[] = {"/lib64", "/lib"};
static const char *const SPARCv9Triples[] = {"sparc64-linux-gnu",
"sparcv9-linux-gnu"};
static const char *const SystemZLibDirs[] = {"/lib64", "/lib"};
static const char *const SystemZTriples[] = {
"s390x-linux-gnu", "s390x-unknown-linux-gnu", "s390x-ibm-linux-gnu",
"s390x-suse-linux", "s390x-redhat-linux"};
using std::begin;
using std::end;
if (TargetTriple.getOS() == llvm::Triple::Solaris) {
static const char *const SolarisLibDirs[] = {"/lib"};
static const char *const SolarisSparcV8Triples[] = {
"sparc-sun-solaris2.11", "sparc-sun-solaris2.12"};
static const char *const SolarisSparcV9Triples[] = {
"sparcv9-sun-solaris2.11", "sparcv9-sun-solaris2.12"};
static const char *const SolarisX86Triples[] = {"i386-pc-solaris2.11",
"i386-pc-solaris2.12"};
static const char *const SolarisX86_64Triples[] = {"x86_64-pc-solaris2.11",
"x86_64-pc-solaris2.12"};
LibDirs.append(begin(SolarisLibDirs), end(SolarisLibDirs));
BiarchLibDirs.append(begin(SolarisLibDirs), end(SolarisLibDirs));
switch (TargetTriple.getArch()) {
case llvm::Triple::x86:
TripleAliases.append(begin(SolarisX86Triples), end(SolarisX86Triples));
BiarchTripleAliases.append(begin(SolarisX86_64Triples),
end(SolarisX86_64Triples));
break;
case llvm::Triple::x86_64:
TripleAliases.append(begin(SolarisX86_64Triples),
end(SolarisX86_64Triples));
BiarchTripleAliases.append(begin(SolarisX86Triples),
end(SolarisX86Triples));
break;
case llvm::Triple::sparc:
TripleAliases.append(begin(SolarisSparcV8Triples),
end(SolarisSparcV8Triples));
BiarchTripleAliases.append(begin(SolarisSparcV9Triples),
end(SolarisSparcV9Triples));
break;
case llvm::Triple::sparcv9:
TripleAliases.append(begin(SolarisSparcV9Triples),
end(SolarisSparcV9Triples));
BiarchTripleAliases.append(begin(SolarisSparcV8Triples),
end(SolarisSparcV8Triples));
break;
default:
break;
}
return;
}
// Android targets should not use GNU/Linux tools or libraries.
if (TargetTriple.isAndroid()) {
static const char *const AArch64AndroidTriples[] = {
"aarch64-linux-android"};
static const char *const ARMAndroidTriples[] = {"arm-linux-androideabi"};
static const char *const MIPSELAndroidTriples[] = {"mipsel-linux-android"};
static const char *const MIPS64ELAndroidTriples[] = {
"mips64el-linux-android"};
static const char *const X86AndroidTriples[] = {"i686-linux-android"};
static const char *const X86_64AndroidTriples[] = {"x86_64-linux-android"};
switch (TargetTriple.getArch()) {
case llvm::Triple::aarch64:
LibDirs.append(begin(AArch64LibDirs), end(AArch64LibDirs));
TripleAliases.append(begin(AArch64AndroidTriples),
end(AArch64AndroidTriples));
break;
case llvm::Triple::arm:
case llvm::Triple::thumb:
LibDirs.append(begin(ARMLibDirs), end(ARMLibDirs));
TripleAliases.append(begin(ARMAndroidTriples), end(ARMAndroidTriples));
break;
case llvm::Triple::mipsel:
LibDirs.append(begin(MIPSELLibDirs), end(MIPSELLibDirs));
TripleAliases.append(begin(MIPSELAndroidTriples),
end(MIPSELAndroidTriples));
BiarchLibDirs.append(begin(MIPS64ELLibDirs), end(MIPS64ELLibDirs));
BiarchTripleAliases.append(begin(MIPS64ELAndroidTriples),
end(MIPS64ELAndroidTriples));
break;
case llvm::Triple::mips64el:
LibDirs.append(begin(MIPS64ELLibDirs), end(MIPS64ELLibDirs));
TripleAliases.append(begin(MIPS64ELAndroidTriples),
end(MIPS64ELAndroidTriples));
BiarchLibDirs.append(begin(MIPSELLibDirs), end(MIPSELLibDirs));
BiarchTripleAliases.append(begin(MIPSELAndroidTriples),
end(MIPSELAndroidTriples));
break;
case llvm::Triple::x86_64:
LibDirs.append(begin(X86_64LibDirs), end(X86_64LibDirs));
TripleAliases.append(begin(X86_64AndroidTriples),
end(X86_64AndroidTriples));
BiarchLibDirs.append(begin(X86LibDirs), end(X86LibDirs));
BiarchTripleAliases.append(begin(X86AndroidTriples),
end(X86AndroidTriples));
break;
case llvm::Triple::x86:
LibDirs.append(begin(X86LibDirs), end(X86LibDirs));
TripleAliases.append(begin(X86AndroidTriples), end(X86AndroidTriples));
BiarchLibDirs.append(begin(X86_64LibDirs), end(X86_64LibDirs));
BiarchTripleAliases.append(begin(X86_64AndroidTriples),
end(X86_64AndroidTriples));
break;
default:
break;
}
return;
}
switch (TargetTriple.getArch()) {
case llvm::Triple::aarch64:
LibDirs.append(begin(AArch64LibDirs), end(AArch64LibDirs));
TripleAliases.append(begin(AArch64Triples), end(AArch64Triples));
BiarchLibDirs.append(begin(AArch64LibDirs), end(AArch64LibDirs));
BiarchTripleAliases.append(begin(AArch64Triples), end(AArch64Triples));
break;
case llvm::Triple::aarch64_be:
LibDirs.append(begin(AArch64beLibDirs), end(AArch64beLibDirs));
TripleAliases.append(begin(AArch64beTriples), end(AArch64beTriples));
BiarchLibDirs.append(begin(AArch64beLibDirs), end(AArch64beLibDirs));
BiarchTripleAliases.append(begin(AArch64beTriples), end(AArch64beTriples));
break;
case llvm::Triple::arm:
case llvm::Triple::thumb:
LibDirs.append(begin(ARMLibDirs), end(ARMLibDirs));
if (TargetTriple.getEnvironment() == llvm::Triple::GNUEABIHF) {
TripleAliases.append(begin(ARMHFTriples), end(ARMHFTriples));
} else {
TripleAliases.append(begin(ARMTriples), end(ARMTriples));
}
break;
case llvm::Triple::armeb:
case llvm::Triple::thumbeb:
LibDirs.append(begin(ARMebLibDirs), end(ARMebLibDirs));
if (TargetTriple.getEnvironment() == llvm::Triple::GNUEABIHF) {
TripleAliases.append(begin(ARMebHFTriples), end(ARMebHFTriples));
} else {
TripleAliases.append(begin(ARMebTriples), end(ARMebTriples));
}
break;
case llvm::Triple::avr:
LibDirs.append(begin(AVRLibDirs), end(AVRLibDirs));
TripleAliases.append(begin(AVRTriples), end(AVRTriples));
break;
case llvm::Triple::csky:
LibDirs.append(begin(CSKYLibDirs), end(CSKYLibDirs));
TripleAliases.append(begin(CSKYTriples), end(CSKYTriples));
break;
case llvm::Triple::x86_64:
if (TargetTriple.isX32()) {
LibDirs.append(begin(X32LibDirs), end(X32LibDirs));
TripleAliases.append(begin(X32Triples), end(X32Triples));
BiarchLibDirs.append(begin(X86_64LibDirs), end(X86_64LibDirs));
BiarchTripleAliases.append(begin(X86_64Triples), end(X86_64Triples));
} else {
LibDirs.append(begin(X86_64LibDirs), end(X86_64LibDirs));
TripleAliases.append(begin(X86_64Triples), end(X86_64Triples));
BiarchLibDirs.append(begin(X32LibDirs), end(X32LibDirs));
BiarchTripleAliases.append(begin(X32Triples), end(X32Triples));
}
BiarchLibDirs.append(begin(X86LibDirs), end(X86LibDirs));
BiarchTripleAliases.append(begin(X86Triples), end(X86Triples));
break;
case llvm::Triple::x86:
LibDirs.append(begin(X86LibDirs), end(X86LibDirs));
// MCU toolchain is 32 bit only and its triple alias is TargetTriple
// itself, which will be appended below.
if (!TargetTriple.isOSIAMCU()) {
TripleAliases.append(begin(X86Triples), end(X86Triples));
BiarchLibDirs.append(begin(X86_64LibDirs), end(X86_64LibDirs));
BiarchTripleAliases.append(begin(X86_64Triples), end(X86_64Triples));
BiarchLibDirs.append(begin(X32LibDirs), end(X32LibDirs));
BiarchTripleAliases.append(begin(X32Triples), end(X32Triples));
}
break;
case llvm::Triple::m68k:
LibDirs.append(begin(M68kLibDirs), end(M68kLibDirs));
TripleAliases.append(begin(M68kTriples), end(M68kTriples));
break;
case llvm::Triple::mips:
LibDirs.append(begin(MIPSLibDirs), end(MIPSLibDirs));
TripleAliases.append(begin(MIPSTriples), end(MIPSTriples));
BiarchLibDirs.append(begin(MIPS64LibDirs), end(MIPS64LibDirs));
BiarchTripleAliases.append(begin(MIPS64Triples), end(MIPS64Triples));
BiarchLibDirs.append(begin(MIPSN32LibDirs), end(MIPSN32LibDirs));
BiarchTripleAliases.append(begin(MIPSN32Triples), end(MIPSN32Triples));
break;
case llvm::Triple::mipsel:
LibDirs.append(begin(MIPSELLibDirs), end(MIPSELLibDirs));
TripleAliases.append(begin(MIPSELTriples), end(MIPSELTriples));
TripleAliases.append(begin(MIPSTriples), end(MIPSTriples));
BiarchLibDirs.append(begin(MIPS64ELLibDirs), end(MIPS64ELLibDirs));
BiarchTripleAliases.append(begin(MIPS64ELTriples), end(MIPS64ELTriples));
BiarchLibDirs.append(begin(MIPSN32ELLibDirs), end(MIPSN32ELLibDirs));
BiarchTripleAliases.append(begin(MIPSN32ELTriples), end(MIPSN32ELTriples));
break;
case llvm::Triple::mips64:
LibDirs.append(begin(MIPS64LibDirs), end(MIPS64LibDirs));
TripleAliases.append(begin(MIPS64Triples), end(MIPS64Triples));
BiarchLibDirs.append(begin(MIPSLibDirs), end(MIPSLibDirs));
BiarchTripleAliases.append(begin(MIPSTriples), end(MIPSTriples));
BiarchLibDirs.append(begin(MIPSN32LibDirs), end(MIPSN32LibDirs));
BiarchTripleAliases.append(begin(MIPSN32Triples), end(MIPSN32Triples));
break;
case llvm::Triple::mips64el:
LibDirs.append(begin(MIPS64ELLibDirs), end(MIPS64ELLibDirs));
TripleAliases.append(begin(MIPS64ELTriples), end(MIPS64ELTriples));
BiarchLibDirs.append(begin(MIPSELLibDirs), end(MIPSELLibDirs));
BiarchTripleAliases.append(begin(MIPSELTriples), end(MIPSELTriples));
BiarchLibDirs.append(begin(MIPSN32ELLibDirs), end(MIPSN32ELLibDirs));
BiarchTripleAliases.append(begin(MIPSN32ELTriples), end(MIPSN32ELTriples));
BiarchTripleAliases.append(begin(MIPSTriples), end(MIPSTriples));
break;
case llvm::Triple::msp430:
LibDirs.append(begin(MSP430LibDirs), end(MSP430LibDirs));
TripleAliases.append(begin(MSP430Triples), end(MSP430Triples));
break;
case llvm::Triple::ppc:
LibDirs.append(begin(PPCLibDirs), end(PPCLibDirs));
TripleAliases.append(begin(PPCTriples), end(PPCTriples));
BiarchLibDirs.append(begin(PPC64LibDirs), end(PPC64LibDirs));
BiarchTripleAliases.append(begin(PPC64Triples), end(PPC64Triples));
break;
case llvm::Triple::ppcle:
LibDirs.append(begin(PPCLELibDirs), end(PPCLELibDirs));
TripleAliases.append(begin(PPCLETriples), end(PPCLETriples));
BiarchLibDirs.append(begin(PPC64LELibDirs), end(PPC64LELibDirs));
BiarchTripleAliases.append(begin(PPC64LETriples), end(PPC64LETriples));
break;
case llvm::Triple::ppc64:
LibDirs.append(begin(PPC64LibDirs), end(PPC64LibDirs));
TripleAliases.append(begin(PPC64Triples), end(PPC64Triples));
BiarchLibDirs.append(begin(PPCLibDirs), end(PPCLibDirs));
BiarchTripleAliases.append(begin(PPCTriples), end(PPCTriples));
break;
case llvm::Triple::ppc64le:
LibDirs.append(begin(PPC64LELibDirs), end(PPC64LELibDirs));
TripleAliases.append(begin(PPC64LETriples), end(PPC64LETriples));
BiarchLibDirs.append(begin(PPCLELibDirs), end(PPCLELibDirs));
BiarchTripleAliases.append(begin(PPCLETriples), end(PPCLETriples));
break;
case llvm::Triple::riscv32:
LibDirs.append(begin(RISCV32LibDirs), end(RISCV32LibDirs));
TripleAliases.append(begin(RISCV32Triples), end(RISCV32Triples));
BiarchLibDirs.append(begin(RISCV64LibDirs), end(RISCV64LibDirs));
BiarchTripleAliases.append(begin(RISCV64Triples), end(RISCV64Triples));
break;
case llvm::Triple::riscv64:
LibDirs.append(begin(RISCV64LibDirs), end(RISCV64LibDirs));
TripleAliases.append(begin(RISCV64Triples), end(RISCV64Triples));
BiarchLibDirs.append(begin(RISCV32LibDirs), end(RISCV32LibDirs));
BiarchTripleAliases.append(begin(RISCV32Triples), end(RISCV32Triples));
break;
case llvm::Triple::sparc:
case llvm::Triple::sparcel:
LibDirs.append(begin(SPARCv8LibDirs), end(SPARCv8LibDirs));
TripleAliases.append(begin(SPARCv8Triples), end(SPARCv8Triples));
BiarchLibDirs.append(begin(SPARCv9LibDirs), end(SPARCv9LibDirs));
BiarchTripleAliases.append(begin(SPARCv9Triples), end(SPARCv9Triples));
break;
case llvm::Triple::sparcv9:
LibDirs.append(begin(SPARCv9LibDirs), end(SPARCv9LibDirs));
TripleAliases.append(begin(SPARCv9Triples), end(SPARCv9Triples));
BiarchLibDirs.append(begin(SPARCv8LibDirs), end(SPARCv8LibDirs));
BiarchTripleAliases.append(begin(SPARCv8Triples), end(SPARCv8Triples));
break;
case llvm::Triple::systemz:
LibDirs.append(begin(SystemZLibDirs), end(SystemZLibDirs));
TripleAliases.append(begin(SystemZTriples), end(SystemZTriples));
break;
default:
// By default, just rely on the standard lib directories and the original
// triple.
break;
}
// Always append the drivers target triple to the end, in case it doesn't
// match any of our aliases.
TripleAliases.push_back(TargetTriple.str());
// Also include the multiarch variant if it's different.
if (TargetTriple.str() != BiarchTriple.str())
BiarchTripleAliases.push_back(BiarchTriple.str());
}
bool Generic_GCC::GCCInstallationDetector::ScanGCCForMultilibs(
const llvm::Triple &TargetTriple, const ArgList &Args,
StringRef Path, bool NeedsBiarchSuffix) {
llvm::Triple::ArchType TargetArch = TargetTriple.getArch();
DetectedMultilibs Detected;
// Android standalone toolchain could have multilibs for ARM and Thumb.
// Debian mips multilibs behave more like the rest of the biarch ones,
// so handle them there
if (isArmOrThumbArch(TargetArch) && TargetTriple.isAndroid()) {
// It should also work without multilibs in a simplified toolchain.
findAndroidArmMultilibs(D, TargetTriple, Path, Args, Detected);
} else if (TargetTriple.isCSKY()) {
findCSKYMultilibs(D, TargetTriple, Path, Args, Detected);
} else if (TargetTriple.isMIPS()) {
if (!findMIPSMultilibs(D, TargetTriple, Path, Args, Detected))
return false;
} else if (TargetTriple.isRISCV()) {
findRISCVMultilibs(D, TargetTriple, Path, Args, Detected);
} else if (isMSP430(TargetArch)) {
findMSP430Multilibs(D, TargetTriple, Path, Args, Detected);
} else if (TargetArch == llvm::Triple::avr) {
// AVR has no multilibs.
} else if (!findBiarchMultilibs(D, TargetTriple, Path, Args,
NeedsBiarchSuffix, Detected)) {
return false;
}
Multilibs = Detected.Multilibs;
SelectedMultilib = Detected.SelectedMultilib;
BiarchSibling = Detected.BiarchSibling;
return true;
}
void Generic_GCC::GCCInstallationDetector::ScanLibDirForGCCTriple(
const llvm::Triple &TargetTriple, const ArgList &Args,
const std::string &LibDir, StringRef CandidateTriple,
bool NeedsBiarchSuffix, bool GCCDirExists, bool GCCCrossDirExists) {
// Locations relative to the system lib directory where GCC's triple-specific
// directories might reside.
struct GCCLibSuffix {
// Path from system lib directory to GCC triple-specific directory.
std::string LibSuffix;
// Path from GCC triple-specific directory back to system lib directory.
// This is one '..' component per component in LibSuffix.
StringRef ReversePath;
// Whether this library suffix is relevant for the triple.
bool Active;
} Suffixes[] = {
// This is the normal place.
{"gcc/" + CandidateTriple.str(), "../..", GCCDirExists},
// Debian puts cross-compilers in gcc-cross.
{"gcc-cross/" + CandidateTriple.str(), "../..", GCCCrossDirExists},
// The Freescale PPC SDK has the gcc libraries in
// <sysroot>/usr/lib/<triple>/x.y.z so have a look there as well. Only do
// this on Freescale triples, though, since some systems put a *lot* of
// files in that location, not just GCC installation data.
{CandidateTriple.str(), "..",
TargetTriple.getVendor() == llvm::Triple::Freescale ||
TargetTriple.getVendor() == llvm::Triple::OpenEmbedded}};
for (auto &Suffix : Suffixes) {
if (!Suffix.Active)
continue;
StringRef LibSuffix = Suffix.LibSuffix;
std::error_code EC;
for (llvm::vfs::directory_iterator
LI = D.getVFS().dir_begin(LibDir + "/" + LibSuffix, EC),
LE;
!EC && LI != LE; LI = LI.increment(EC)) {
StringRef VersionText = llvm::sys::path::filename(LI->path());
GCCVersion CandidateVersion = GCCVersion::Parse(VersionText);
if (CandidateVersion.Major != -1) // Filter obviously bad entries.
if (!CandidateGCCInstallPaths.insert(std::string(LI->path())).second)
continue; // Saw this path before; no need to look at it again.
if (CandidateVersion.isOlderThan(4, 1, 1))
continue;
if (CandidateVersion <= Version)
continue;
if (!ScanGCCForMultilibs(TargetTriple, Args, LI->path(),
NeedsBiarchSuffix))
continue;
Version = CandidateVersion;
GCCTriple.setTriple(CandidateTriple);
// FIXME: We hack together the directory name here instead of
// using LI to ensure stable path separators across Windows and
// Linux.
GCCInstallPath = (LibDir + "/" + LibSuffix + "/" + VersionText).str();
GCCParentLibPath = (GCCInstallPath + "/../" + Suffix.ReversePath).str();
IsValid = true;
}
}
}
bool Generic_GCC::GCCInstallationDetector::ScanGentooConfigs(
const llvm::Triple &TargetTriple, const ArgList &Args,
const SmallVectorImpl<StringRef> &CandidateTriples,
const SmallVectorImpl<StringRef> &CandidateBiarchTriples) {
if (!D.getVFS().exists(concat(D.SysRoot, GentooConfigDir)))
return false;
for (StringRef CandidateTriple : CandidateTriples) {
if (ScanGentooGccConfig(TargetTriple, Args, CandidateTriple))
return true;
}
for (StringRef CandidateTriple : CandidateBiarchTriples) {
if (ScanGentooGccConfig(TargetTriple, Args, CandidateTriple, true))
return true;
}
return false;
}
bool Generic_GCC::GCCInstallationDetector::ScanGentooGccConfig(
const llvm::Triple &TargetTriple, const ArgList &Args,
StringRef CandidateTriple, bool NeedsBiarchSuffix) {
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> File =
D.getVFS().getBufferForFile(concat(D.SysRoot, GentooConfigDir,
"/config-" + CandidateTriple.str()));
if (File) {
SmallVector<StringRef, 2> Lines;
File.get()->getBuffer().split(Lines, "\n");
for (StringRef Line : Lines) {
Line = Line.trim();
// CURRENT=triple-version
if (!Line.consume_front("CURRENT="))
continue;
// Process the config file pointed to by CURRENT.
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ConfigFile =
D.getVFS().getBufferForFile(
concat(D.SysRoot, GentooConfigDir, "/" + Line));
std::pair<StringRef, StringRef> ActiveVersion = Line.rsplit('-');
// List of paths to scan for libraries.
SmallVector<StringRef, 4> GentooScanPaths;
// Scan the Config file to find installed GCC libraries path.
// Typical content of the GCC config file:
// LDPATH="/usr/lib/gcc/x86_64-pc-linux-gnu/4.9.x:/usr/lib/gcc/
// (continued from previous line) x86_64-pc-linux-gnu/4.9.x/32"
// MANPATH="/usr/share/gcc-data/x86_64-pc-linux-gnu/4.9.x/man"
// INFOPATH="/usr/share/gcc-data/x86_64-pc-linux-gnu/4.9.x/info"
// STDCXX_INCDIR="/usr/lib/gcc/x86_64-pc-linux-gnu/4.9.x/include/g++-v4"
// We are looking for the paths listed in LDPATH=... .
if (ConfigFile) {
SmallVector<StringRef, 2> ConfigLines;
ConfigFile.get()->getBuffer().split(ConfigLines, "\n");
for (StringRef ConfLine : ConfigLines) {
ConfLine = ConfLine.trim();
if (ConfLine.consume_front("LDPATH=")) {
// Drop '"' from front and back if present.
ConfLine.consume_back("\"");
ConfLine.consume_front("\"");
// Get all paths sperated by ':'
ConfLine.split(GentooScanPaths, ':', -1, /*AllowEmpty*/ false);
}
}
}
// Test the path based on the version in /etc/env.d/gcc/config-{tuple}.
std::string basePath = "/usr/lib/gcc/" + ActiveVersion.first.str() + "/"
+ ActiveVersion.second.str();
GentooScanPaths.push_back(StringRef(basePath));
// Scan all paths for GCC libraries.
for (const auto &GentooScanPath : GentooScanPaths) {
std::string GentooPath = concat(D.SysRoot, GentooScanPath);
if (D.getVFS().exists(GentooPath + "/crtbegin.o")) {
if (!ScanGCCForMultilibs(TargetTriple, Args, GentooPath,
NeedsBiarchSuffix))
continue;
Version = GCCVersion::Parse(ActiveVersion.second);
GCCInstallPath = GentooPath;
GCCParentLibPath = GentooPath + std::string("/../../..");
GCCTriple.setTriple(ActiveVersion.first);
IsValid = true;
return true;
}
}
}
}
return false;
}
Generic_GCC::Generic_GCC(const Driver &D, const llvm::Triple &Triple,
const ArgList &Args)
: ToolChain(D, Triple, Args), GCCInstallation(D),
CudaInstallation(D, Triple, Args), RocmInstallation(D, Triple, Args) {
getProgramPaths().push_back(getDriver().getInstalledDir());
if (getDriver().getInstalledDir() != getDriver().Dir)
getProgramPaths().push_back(getDriver().Dir);
}
Generic_GCC::~Generic_GCC() {}
Tool *Generic_GCC::getTool(Action::ActionClass AC) const {
switch (AC) {
case Action::PreprocessJobClass:
if (!Preprocess)
Preprocess.reset(new clang::driver::tools::gcc::Preprocessor(*this));
return Preprocess.get();
case Action::CompileJobClass:
if (!Compile)
Compile.reset(new tools::gcc::Compiler(*this));
return Compile.get();
default:
return ToolChain::getTool(AC);
}
}
Tool *Generic_GCC::buildAssembler() const {
return new tools::gnutools::Assembler(*this);
}
Tool *Generic_GCC::buildLinker() const { return new tools::gcc::Linker(*this); }
void Generic_GCC::printVerboseInfo(raw_ostream &OS) const {
// Print the information about how we detected the GCC installation.
GCCInstallation.print(OS);
CudaInstallation.print(OS);
RocmInstallation.print(OS);
}
bool Generic_GCC::IsUnwindTablesDefault(const ArgList &Args) const {
switch (getArch()) {
case llvm::Triple::aarch64:
case llvm::Triple::ppc:
case llvm::Triple::ppcle:
case llvm::Triple::ppc64:
case llvm::Triple::ppc64le:
case llvm::Triple::x86:
case llvm::Triple::x86_64:
return true;
default:
return false;
}
}
bool Generic_GCC::isPICDefault() const {
switch (getArch()) {
case llvm::Triple::x86_64:
return getTriple().isOSWindows();
case llvm::Triple::mips64:
case llvm::Triple::mips64el:
return true;
default:
return false;
}
}
bool Generic_GCC::isPIEDefault(const llvm::opt::ArgList &Args) const {
return false;
}
bool Generic_GCC::isPICDefaultForced() const {
return getArch() == llvm::Triple::x86_64 && getTriple().isOSWindows();
}
bool Generic_GCC::IsIntegratedAssemblerDefault() const {
switch (getTriple().getArch()) {
case llvm::Triple::aarch64:
case llvm::Triple::aarch64_be:
case llvm::Triple::arm:
case llvm::Triple::armeb:
case llvm::Triple::avr:
case llvm::Triple::bpfel:
case llvm::Triple::bpfeb:
case llvm::Triple::csky:
case llvm::Triple::hexagon:
case llvm::Triple::lanai:
case llvm::Triple::m68k:
case llvm::Triple::mips:
case llvm::Triple::mipsel:
case llvm::Triple::mips64:
case llvm::Triple::mips64el:
case llvm::Triple::msp430:
case llvm::Triple::ppc:
case llvm::Triple::ppcle:
case llvm::Triple::ppc64:
case llvm::Triple::ppc64le:
case llvm::Triple::riscv32:
case llvm::Triple::riscv64:
case llvm::Triple::sparc:
case llvm::Triple::sparcel:
case llvm::Triple::sparcv9:
case llvm::Triple::systemz:
case llvm::Triple::thumb:
case llvm::Triple::thumbeb:
case llvm::Triple::ve:
case llvm::Triple::x86:
case llvm::Triple::x86_64:
return true;
default:
return false;
}
}
void Generic_GCC::PushPPaths(ToolChain::path_list &PPaths) {
// Cross-compiling binutils and GCC installations (vanilla and openSUSE at
// least) put various tools in a triple-prefixed directory off of the parent
// of the GCC installation. We use the GCC triple here to ensure that we end
// up with tools that support the same amount of cross compiling as the
// detected GCC installation. For example, if we find a GCC installation
// targeting x86_64, but it is a bi-arch GCC installation, it can also be
// used to target i386.
if (GCCInstallation.isValid()) {
PPaths.push_back(Twine(GCCInstallation.getParentLibPath() + "/../" +
GCCInstallation.getTriple().str() + "/bin")
.str());
}
}
void Generic_GCC::AddMultilibPaths(const Driver &D,
const std::string &SysRoot,
const std::string &OSLibDir,
const std::string &MultiarchTriple,
path_list &Paths) {
// Add the multilib suffixed paths where they are available.
if (GCCInstallation.isValid()) {
const llvm::Triple &GCCTriple = GCCInstallation.getTriple();
const std::string &LibPath =
std::string(GCCInstallation.getParentLibPath());
// Sourcery CodeBench MIPS toolchain holds some libraries under
// a biarch-like suffix of the GCC installation.
if (const auto &PathsCallback = Multilibs.filePathsCallback())
for (const auto &Path : PathsCallback(SelectedMultilib))
addPathIfExists(D, GCCInstallation.getInstallPath() + Path, Paths);
// Add lib/gcc/$triple/$version, with an optional /multilib suffix.
addPathIfExists(
D, GCCInstallation.getInstallPath() + SelectedMultilib.gccSuffix(),
Paths);
// Add lib/gcc/$triple/$libdir
// For GCC built with --enable-version-specific-runtime-libs.
addPathIfExists(D, GCCInstallation.getInstallPath() + "/../" + OSLibDir,
Paths);
// GCC cross compiling toolchains will install target libraries which ship
// as part of the toolchain under <prefix>/<triple>/<libdir> rather than as
// any part of the GCC installation in
// <prefix>/<libdir>/gcc/<triple>/<version>. This decision is somewhat
// debatable, but is the reality today. We need to search this tree even
// when we have a sysroot somewhere else. It is the responsibility of
// whomever is doing the cross build targeting a sysroot using a GCC
// installation that is *not* within the system root to ensure two things:
//
// 1) Any DSOs that are linked in from this tree or from the install path
// above must be present on the system root and found via an
// appropriate rpath.
// 2) There must not be libraries installed into
// <prefix>/<triple>/<libdir> unless they should be preferred over
// those within the system root.
//
// Note that this matches the GCC behavior. See the below comment for where
// Clang diverges from GCC's behavior.
addPathIfExists(D,
LibPath + "/../" + GCCTriple.str() + "/lib/../" + OSLibDir +
SelectedMultilib.osSuffix(),
Paths);
// If the GCC installation we found is inside of the sysroot, we want to
// prefer libraries installed in the parent prefix of the GCC installation.
// It is important to *not* use these paths when the GCC installation is
// outside of the system root as that can pick up unintended libraries.
// This usually happens when there is an external cross compiler on the
// host system, and a more minimal sysroot available that is the target of
// the cross. Note that GCC does include some of these directories in some
// configurations but this seems somewhere between questionable and simply
// a bug.
if (StringRef(LibPath).startswith(SysRoot))
addPathIfExists(D, LibPath + "/../" + OSLibDir, Paths);
}
}
void Generic_GCC::AddMultiarchPaths(const Driver &D,
const std::string &SysRoot,
const std::string &OSLibDir,
path_list &Paths) {
if (GCCInstallation.isValid()) {
const std::string &LibPath =
std::string(GCCInstallation.getParentLibPath());
const llvm::Triple &GCCTriple = GCCInstallation.getTriple();
const Multilib &Multilib = GCCInstallation.getMultilib();
addPathIfExists(
D, LibPath + "/../" + GCCTriple.str() + "/lib" + Multilib.osSuffix(),
Paths);
}
}
void Generic_GCC::AddMultilibIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
// Add include directories specific to the selected multilib set and multilib.
if (!GCCInstallation.isValid())
return;
// gcc TOOL_INCLUDE_DIR.
const llvm::Triple &GCCTriple = GCCInstallation.getTriple();
std::string LibPath(GCCInstallation.getParentLibPath());
addSystemInclude(DriverArgs, CC1Args,
Twine(LibPath) + "/../" + GCCTriple.str() + "/include");
const auto &Callback = Multilibs.includeDirsCallback();
if (Callback) {
for (const auto &Path : Callback(GCCInstallation.getMultilib()))
addExternCSystemIncludeIfExists(DriverArgs, CC1Args,
GCCInstallation.getInstallPath() + Path);
}
}
void Generic_GCC::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
if (DriverArgs.hasArg(options::OPT_nostdinc, options::OPT_nostdincxx,
options::OPT_nostdlibinc))
return;
switch (GetCXXStdlibType(DriverArgs)) {
case ToolChain::CST_Libcxx:
addLibCxxIncludePaths(DriverArgs, CC1Args);
break;
case ToolChain::CST_Libstdcxx:
addLibStdCxxIncludePaths(DriverArgs, CC1Args);
break;
}
}
void
Generic_GCC::addLibCxxIncludePaths(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const {
const Driver &D = getDriver();
std::string SysRoot = computeSysRoot();
std::string Target = getTripleString();
auto AddIncludePath = [&](std::string Path) {
std::string Version = detectLibcxxVersion(Path);
if (Version.empty())
return false;
// First add the per-target include path if it exists.
std::string TargetDir = Path + "/" + Target + "/c++/" + Version;
if (D.getVFS().exists(TargetDir))
addSystemInclude(DriverArgs, CC1Args, TargetDir);
// Second add the generic one.
addSystemInclude(DriverArgs, CC1Args, Path + "/c++/" + Version);
return true;
};
// Android never uses the libc++ headers installed alongside the toolchain,
// which are generally incompatible with the NDK libraries anyway.
if (!getTriple().isAndroid())
if (AddIncludePath(getDriver().Dir + "/../include"))
return;
// If this is a development, non-installed, clang, libcxx will
// not be found at ../include/c++ but it likely to be found at
// one of the following two locations:
if (AddIncludePath(concat(SysRoot, "/usr/local/include")))
return;
if (AddIncludePath(concat(SysRoot, "/usr/include")))
return;
}
bool Generic_GCC::addLibStdCXXIncludePaths(Twine IncludeDir, StringRef Triple,
Twine IncludeSuffix,
const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
bool DetectDebian) const {
if (!getVFS().exists(IncludeDir))
return false;
// Debian native gcc uses g++-multiarch-incdir.diff which uses
// include/x86_64-linux-gnu/c++/10$IncludeSuffix instead of
// include/c++/10/x86_64-linux-gnu$IncludeSuffix.
std::string Dir = IncludeDir.str();
StringRef Include =
llvm::sys::path::parent_path(llvm::sys::path::parent_path(Dir));
std::string Path =
(Include + "/" + Triple + Dir.substr(Include.size()) + IncludeSuffix)
.str();
if (DetectDebian && !getVFS().exists(Path))
return false;
// GPLUSPLUS_INCLUDE_DIR
addSystemInclude(DriverArgs, CC1Args, IncludeDir);
// GPLUSPLUS_TOOL_INCLUDE_DIR. If Triple is not empty, add a target-dependent
// include directory.
if (DetectDebian)
addSystemInclude(DriverArgs, CC1Args, Path);
else if (!Triple.empty())
addSystemInclude(DriverArgs, CC1Args,
IncludeDir + "/" + Triple + IncludeSuffix);
// GPLUSPLUS_BACKWARD_INCLUDE_DIR
addSystemInclude(DriverArgs, CC1Args, IncludeDir + "/backward");
return true;
}
bool Generic_GCC::addGCCLibStdCxxIncludePaths(
const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
StringRef DebianMultiarch) const {
assert(GCCInstallation.isValid());
// By default, look for the C++ headers in an include directory adjacent to
// the lib directory of the GCC installation. Note that this is expect to be
// equivalent to '/usr/include/c++/X.Y' in almost all cases.
StringRef LibDir = GCCInstallation.getParentLibPath();
StringRef InstallDir = GCCInstallation.getInstallPath();
StringRef TripleStr = GCCInstallation.getTriple().str();
const Multilib &Multilib = GCCInstallation.getMultilib();
const GCCVersion &Version = GCCInstallation.getVersion();
// Try /../$triple/include/c++/$version (gcc --print-multiarch is not empty).
if (addLibStdCXXIncludePaths(
LibDir.str() + "/../" + TripleStr + "/include/c++/" + Version.Text,
TripleStr, Multilib.includeSuffix(), DriverArgs, CC1Args))
return true;
// Try /gcc/$triple/$version/include/c++/ (gcc --print-multiarch is not
// empty). Like above but for GCC built with
// --enable-version-specific-runtime-libs.
if (addLibStdCXXIncludePaths(LibDir.str() + "/gcc/" + TripleStr + "/" +
Version.Text + "/include/c++/",
TripleStr, Multilib.includeSuffix(), DriverArgs,
CC1Args))
return true;
// Detect Debian g++-multiarch-incdir.diff.
if (addLibStdCXXIncludePaths(LibDir.str() + "/../include/c++/" + Version.Text,
DebianMultiarch, Multilib.includeSuffix(),
DriverArgs, CC1Args, /*Debian=*/true))
return true;
// Try /../include/c++/$version (gcc --print-multiarch is empty).
if (addLibStdCXXIncludePaths(LibDir.str() + "/../include/c++/" + Version.Text,
TripleStr, Multilib.includeSuffix(), DriverArgs,
CC1Args))
return true;
// Otherwise, fall back on a bunch of options which don't use multiarch
// layouts for simplicity.
const std::string LibStdCXXIncludePathCandidates[] = {
// Gentoo is weird and places its headers inside the GCC install,
// so if the first attempt to find the headers fails, try these patterns.
InstallDir.str() + "/include/g++-v" + Version.Text,
InstallDir.str() + "/include/g++-v" + Version.MajorStr + "." +
Version.MinorStr,
InstallDir.str() + "/include/g++-v" + Version.MajorStr,
};
for (const auto &IncludePath : LibStdCXXIncludePathCandidates) {
if (addLibStdCXXIncludePaths(IncludePath, TripleStr,
Multilib.includeSuffix(), DriverArgs, CC1Args))
return true;
}
return false;
}
void
Generic_GCC::addLibStdCxxIncludePaths(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const {
if (GCCInstallation.isValid()) {
addGCCLibStdCxxIncludePaths(DriverArgs, CC1Args,
GCCInstallation.getTriple().str());
}
}
llvm::opt::DerivedArgList *
Generic_GCC::TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef,
Action::OffloadKind DeviceOffloadKind) const {
// If this tool chain is used for an OpenMP offloading device we have to make
// sure we always generate a shared library regardless of the commands the
// user passed to the host. This is required because the runtime library
// is required to load the device image dynamically at run time.
if (DeviceOffloadKind == Action::OFK_OpenMP) {
DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs());
const OptTable &Opts = getDriver().getOpts();
// Request the shared library. Given that these options are decided
// implicitly, they do not refer to any base argument.
DAL->AddFlagArg(/*BaseArg=*/nullptr, Opts.getOption(options::OPT_shared));
DAL->AddFlagArg(/*BaseArg=*/nullptr, Opts.getOption(options::OPT_fPIC));
// Filter all the arguments we don't care passing to the offloading
// toolchain as they can mess up with the creation of a shared library.
for (auto *A : Args) {
switch ((options::ID)A->getOption().getID()) {
default:
DAL->append(A);
break;
case options::OPT_shared:
case options::OPT_dynamic:
case options::OPT_static:
case options::OPT_fPIC:
case options::OPT_fno_PIC:
case options::OPT_fpic:
case options::OPT_fno_pic:
case options::OPT_fPIE:
case options::OPT_fno_PIE:
case options::OPT_fpie:
case options::OPT_fno_pie:
break;
}
}
return DAL;
}
return nullptr;
}
void Generic_ELF::anchor() {}
void Generic_ELF::addClangTargetOptions(const ArgList &DriverArgs,
ArgStringList &CC1Args,
Action::OffloadKind) const {
if (!DriverArgs.hasFlag(options::OPT_fuse_init_array,
options::OPT_fno_use_init_array, true))
CC1Args.push_back("-fno-use-init-array");
}
diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp
index 68158ec977cf..5d0d87fd2422 100644
--- a/clang/lib/Sema/SemaLookup.cpp
+++ b/clang/lib/Sema/SemaLookup.cpp
@@ -1,5838 +1,5836 @@
//===--------------------- SemaLookup.cpp - Name Lookup ------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements name lookup for C, C++, Objective-C, and
// Objective-C++.
//
//===----------------------------------------------------------------------===//
#include "clang/AST/ASTContext.h"
#include "clang/AST/CXXInheritance.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/DeclLookups.h"
#include "clang/AST/DeclObjC.h"
#include "clang/AST/DeclTemplate.h"
#include "clang/AST/Expr.h"
#include "clang/AST/ExprCXX.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Lex/HeaderSearch.h"
#include "clang/Lex/ModuleLoader.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Sema/DeclSpec.h"
#include "clang/Sema/Lookup.h"
#include "clang/Sema/Overload.h"
#include "clang/Sema/RISCVIntrinsicManager.h"
#include "clang/Sema/Scope.h"
#include "clang/Sema/ScopeInfo.h"
#include "clang/Sema/Sema.h"
#include "clang/Sema/SemaInternal.h"
#include "clang/Sema/TemplateDeduction.h"
#include "clang/Sema/TypoCorrection.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/ADT/edit_distance.h"
#include "llvm/Support/ErrorHandling.h"
#include <algorithm>
#include <iterator>
#include <list>
#include <set>
#include <utility>
#include <vector>
#include "OpenCLBuiltins.inc"
using namespace clang;
using namespace sema;
namespace {
class UnqualUsingEntry {
const DeclContext *Nominated;
const DeclContext *CommonAncestor;
public:
UnqualUsingEntry(const DeclContext *Nominated,
const DeclContext *CommonAncestor)
: Nominated(Nominated), CommonAncestor(CommonAncestor) {
}
const DeclContext *getCommonAncestor() const {
return CommonAncestor;
}
const DeclContext *getNominatedNamespace() const {
return Nominated;
}
// Sort by the pointer value of the common ancestor.
struct Comparator {
bool operator()(const UnqualUsingEntry &L, const UnqualUsingEntry &R) {
return L.getCommonAncestor() < R.getCommonAncestor();
}
bool operator()(const UnqualUsingEntry &E, const DeclContext *DC) {
return E.getCommonAncestor() < DC;
}
bool operator()(const DeclContext *DC, const UnqualUsingEntry &E) {
return DC < E.getCommonAncestor();
}
};
};
/// A collection of using directives, as used by C++ unqualified
/// lookup.
class UnqualUsingDirectiveSet {
Sema &SemaRef;
typedef SmallVector<UnqualUsingEntry, 8> ListTy;
ListTy list;
llvm::SmallPtrSet<DeclContext*, 8> visited;
public:
UnqualUsingDirectiveSet(Sema &SemaRef) : SemaRef(SemaRef) {}
void visitScopeChain(Scope *S, Scope *InnermostFileScope) {
// C++ [namespace.udir]p1:
// During unqualified name lookup, the names appear as if they
// were declared in the nearest enclosing namespace which contains
// both the using-directive and the nominated namespace.
DeclContext *InnermostFileDC = InnermostFileScope->getEntity();
assert(InnermostFileDC && InnermostFileDC->isFileContext());
for (; S; S = S->getParent()) {
// C++ [namespace.udir]p1:
// A using-directive shall not appear in class scope, but may
// appear in namespace scope or in block scope.
DeclContext *Ctx = S->getEntity();
if (Ctx && Ctx->isFileContext()) {
visit(Ctx, Ctx);
} else if (!Ctx || Ctx->isFunctionOrMethod()) {
for (auto *I : S->using_directives())
if (SemaRef.isVisible(I))
visit(I, InnermostFileDC);
}
}
}
// Visits a context and collect all of its using directives
// recursively. Treats all using directives as if they were
// declared in the context.
//
// A given context is only every visited once, so it is important
// that contexts be visited from the inside out in order to get
// the effective DCs right.
void visit(DeclContext *DC, DeclContext *EffectiveDC) {
if (!visited.insert(DC).second)
return;
addUsingDirectives(DC, EffectiveDC);
}
// Visits a using directive and collects all of its using
// directives recursively. Treats all using directives as if they
// were declared in the effective DC.
void visit(UsingDirectiveDecl *UD, DeclContext *EffectiveDC) {
DeclContext *NS = UD->getNominatedNamespace();
if (!visited.insert(NS).second)
return;
addUsingDirective(UD, EffectiveDC);
addUsingDirectives(NS, EffectiveDC);
}
// Adds all the using directives in a context (and those nominated
// by its using directives, transitively) as if they appeared in
// the given effective context.
void addUsingDirectives(DeclContext *DC, DeclContext *EffectiveDC) {
SmallVector<DeclContext*, 4> queue;
while (true) {
for (auto UD : DC->using_directives()) {
DeclContext *NS = UD->getNominatedNamespace();
if (SemaRef.isVisible(UD) && visited.insert(NS).second) {
addUsingDirective(UD, EffectiveDC);
queue.push_back(NS);
}
}
if (queue.empty())
return;
DC = queue.pop_back_val();
}
}
// Add a using directive as if it had been declared in the given
// context. This helps implement C++ [namespace.udir]p3:
// The using-directive is transitive: if a scope contains a
// using-directive that nominates a second namespace that itself
// contains using-directives, the effect is as if the
// using-directives from the second namespace also appeared in
// the first.
void addUsingDirective(UsingDirectiveDecl *UD, DeclContext *EffectiveDC) {
// Find the common ancestor between the effective context and
// the nominated namespace.
DeclContext *Common = UD->getNominatedNamespace();
while (!Common->Encloses(EffectiveDC))
Common = Common->getParent();
Common = Common->getPrimaryContext();
list.push_back(UnqualUsingEntry(UD->getNominatedNamespace(), Common));
}
void done() { llvm::sort(list, UnqualUsingEntry::Comparator()); }
typedef ListTy::const_iterator const_iterator;
const_iterator begin() const { return list.begin(); }
const_iterator end() const { return list.end(); }
llvm::iterator_range<const_iterator>
getNamespacesFor(DeclContext *DC) const {
return llvm::make_range(std::equal_range(begin(), end(),
DC->getPrimaryContext(),
UnqualUsingEntry::Comparator()));
}
};
} // end anonymous namespace
// Retrieve the set of identifier namespaces that correspond to a
// specific kind of name lookup.
static inline unsigned getIDNS(Sema::LookupNameKind NameKind,
bool CPlusPlus,
bool Redeclaration) {
unsigned IDNS = 0;
switch (NameKind) {
case Sema::LookupObjCImplicitSelfParam:
case Sema::LookupOrdinaryName:
case Sema::LookupRedeclarationWithLinkage:
case Sema::LookupLocalFriendName:
case Sema::LookupDestructorName:
IDNS = Decl::IDNS_Ordinary;
if (CPlusPlus) {
IDNS |= Decl::IDNS_Tag | Decl::IDNS_Member | Decl::IDNS_Namespace;
if (Redeclaration)
IDNS |= Decl::IDNS_TagFriend | Decl::IDNS_OrdinaryFriend;
}
if (Redeclaration)
IDNS |= Decl::IDNS_LocalExtern;
break;
case Sema::LookupOperatorName:
// Operator lookup is its own crazy thing; it is not the same
// as (e.g.) looking up an operator name for redeclaration.
assert(!Redeclaration && "cannot do redeclaration operator lookup");
IDNS = Decl::IDNS_NonMemberOperator;
break;
case Sema::LookupTagName:
if (CPlusPlus) {
IDNS = Decl::IDNS_Type;
// When looking for a redeclaration of a tag name, we add:
// 1) TagFriend to find undeclared friend decls
// 2) Namespace because they can't "overload" with tag decls.
// 3) Tag because it includes class templates, which can't
// "overload" with tag decls.
if (Redeclaration)
IDNS |= Decl::IDNS_Tag | Decl::IDNS_TagFriend | Decl::IDNS_Namespace;
} else {
IDNS = Decl::IDNS_Tag;
}
break;
case Sema::LookupLabel:
IDNS = Decl::IDNS_Label;
break;
case Sema::LookupMemberName:
IDNS = Decl::IDNS_Member;
if (CPlusPlus)
IDNS |= Decl::IDNS_Tag | Decl::IDNS_Ordinary;
break;
case Sema::LookupNestedNameSpecifierName:
IDNS = Decl::IDNS_Type | Decl::IDNS_Namespace;
break;
case Sema::LookupNamespaceName:
IDNS = Decl::IDNS_Namespace;
break;
case Sema::LookupUsingDeclName:
assert(Redeclaration && "should only be used for redecl lookup");
IDNS = Decl::IDNS_Ordinary | Decl::IDNS_Tag | Decl::IDNS_Member |
Decl::IDNS_Using | Decl::IDNS_TagFriend | Decl::IDNS_OrdinaryFriend |
Decl::IDNS_LocalExtern;
break;
case Sema::LookupObjCProtocolName:
IDNS = Decl::IDNS_ObjCProtocol;
break;
case Sema::LookupOMPReductionName:
IDNS = Decl::IDNS_OMPReduction;
break;
case Sema::LookupOMPMapperName:
IDNS = Decl::IDNS_OMPMapper;
break;
case Sema::LookupAnyName:
IDNS = Decl::IDNS_Ordinary | Decl::IDNS_Tag | Decl::IDNS_Member
| Decl::IDNS_Using | Decl::IDNS_Namespace | Decl::IDNS_ObjCProtocol
| Decl::IDNS_Type;
break;
}
return IDNS;
}
void LookupResult::configure() {
IDNS = getIDNS(LookupKind, getSema().getLangOpts().CPlusPlus,
isForRedeclaration());
// If we're looking for one of the allocation or deallocation
// operators, make sure that the implicitly-declared new and delete
// operators can be found.
switch (NameInfo.getName().getCXXOverloadedOperator()) {
case OO_New:
case OO_Delete:
case OO_Array_New:
case OO_Array_Delete:
getSema().DeclareGlobalNewDelete();
break;
default:
break;
}
// Compiler builtins are always visible, regardless of where they end
// up being declared.
if (IdentifierInfo *Id = NameInfo.getName().getAsIdentifierInfo()) {
if (unsigned BuiltinID = Id->getBuiltinID()) {
if (!getSema().Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID))
AllowHidden = true;
}
}
}
bool LookupResult::checkDebugAssumptions() const {
// This function is never called by NDEBUG builds.
assert(ResultKind != NotFound || Decls.size() == 0);
assert(ResultKind != Found || Decls.size() == 1);
assert(ResultKind != FoundOverloaded || Decls.size() > 1 ||
(Decls.size() == 1 &&
isa<FunctionTemplateDecl>((*begin())->getUnderlyingDecl())));
assert(ResultKind != FoundUnresolvedValue || checkUnresolved());
assert(ResultKind != Ambiguous || Decls.size() > 1 ||
(Decls.size() == 1 && (Ambiguity == AmbiguousBaseSubobjects ||
Ambiguity == AmbiguousBaseSubobjectTypes)));
assert((Paths != nullptr) == (ResultKind == Ambiguous &&
(Ambiguity == AmbiguousBaseSubobjectTypes ||
Ambiguity == AmbiguousBaseSubobjects)));
return true;
}
// Necessary because CXXBasePaths is not complete in Sema.h
void LookupResult::deletePaths(CXXBasePaths *Paths) {
delete Paths;
}
/// Get a representative context for a declaration such that two declarations
/// will have the same context if they were found within the same scope.
static DeclContext *getContextForScopeMatching(Decl *D) {
// For function-local declarations, use that function as the context. This
// doesn't account for scopes within the function; the caller must deal with
// those.
DeclContext *DC = D->getLexicalDeclContext();
if (DC->isFunctionOrMethod())
return DC;
// Otherwise, look at the semantic context of the declaration. The
// declaration must have been found there.
return D->getDeclContext()->getRedeclContext();
}
/// Determine whether \p D is a better lookup result than \p Existing,
/// given that they declare the same entity.
static bool isPreferredLookupResult(Sema &S, Sema::LookupNameKind Kind,
NamedDecl *D, NamedDecl *Existing) {
// When looking up redeclarations of a using declaration, prefer a using
// shadow declaration over any other declaration of the same entity.
if (Kind == Sema::LookupUsingDeclName && isa<UsingShadowDecl>(D) &&
!isa<UsingShadowDecl>(Existing))
return true;
auto *DUnderlying = D->getUnderlyingDecl();
auto *EUnderlying = Existing->getUnderlyingDecl();
// If they have different underlying declarations, prefer a typedef over the
// original type (this happens when two type declarations denote the same
// type), per a generous reading of C++ [dcl.typedef]p3 and p4. The typedef
// might carry additional semantic information, such as an alignment override.
// However, per C++ [dcl.typedef]p5, when looking up a tag name, prefer a tag
// declaration over a typedef. Also prefer a tag over a typedef for
// destructor name lookup because in some contexts we only accept a
// class-name in a destructor declaration.
if (DUnderlying->getCanonicalDecl() != EUnderlying->getCanonicalDecl()) {
assert(isa<TypeDecl>(DUnderlying) && isa<TypeDecl>(EUnderlying));
bool HaveTag = isa<TagDecl>(EUnderlying);
bool WantTag =
Kind == Sema::LookupTagName || Kind == Sema::LookupDestructorName;
return HaveTag != WantTag;
}
// Pick the function with more default arguments.
// FIXME: In the presence of ambiguous default arguments, we should keep both,
// so we can diagnose the ambiguity if the default argument is needed.
// See C++ [over.match.best]p3.
if (auto *DFD = dyn_cast<FunctionDecl>(DUnderlying)) {
auto *EFD = cast<FunctionDecl>(EUnderlying);
unsigned DMin = DFD->getMinRequiredArguments();
unsigned EMin = EFD->getMinRequiredArguments();
// If D has more default arguments, it is preferred.
if (DMin != EMin)
return DMin < EMin;
// FIXME: When we track visibility for default function arguments, check
// that we pick the declaration with more visible default arguments.
}
// Pick the template with more default template arguments.
if (auto *DTD = dyn_cast<TemplateDecl>(DUnderlying)) {
auto *ETD = cast<TemplateDecl>(EUnderlying);
unsigned DMin = DTD->getTemplateParameters()->getMinRequiredArguments();
unsigned EMin = ETD->getTemplateParameters()->getMinRequiredArguments();
// If D has more default arguments, it is preferred. Note that default
// arguments (and their visibility) is monotonically increasing across the
// redeclaration chain, so this is a quick proxy for "is more recent".
if (DMin != EMin)
return DMin < EMin;
// If D has more *visible* default arguments, it is preferred. Note, an
// earlier default argument being visible does not imply that a later
// default argument is visible, so we can't just check the first one.
for (unsigned I = DMin, N = DTD->getTemplateParameters()->size();
I != N; ++I) {
if (!S.hasVisibleDefaultArgument(
ETD->getTemplateParameters()->getParam(I)) &&
S.hasVisibleDefaultArgument(
DTD->getTemplateParameters()->getParam(I)))
return true;
}
}
// VarDecl can have incomplete array types, prefer the one with more complete
// array type.
if (VarDecl *DVD = dyn_cast<VarDecl>(DUnderlying)) {
VarDecl *EVD = cast<VarDecl>(EUnderlying);
if (EVD->getType()->isIncompleteType() &&
!DVD->getType()->isIncompleteType()) {
// Prefer the decl with a more complete type if visible.
return S.isVisible(DVD);
}
return false; // Avoid picking up a newer decl, just because it was newer.
}
// For most kinds of declaration, it doesn't really matter which one we pick.
if (!isa<FunctionDecl>(DUnderlying) && !isa<VarDecl>(DUnderlying)) {
// If the existing declaration is hidden, prefer the new one. Otherwise,
// keep what we've got.
return !S.isVisible(Existing);
}
// Pick the newer declaration; it might have a more precise type.
for (Decl *Prev = DUnderlying->getPreviousDecl(); Prev;
Prev = Prev->getPreviousDecl())
if (Prev == EUnderlying)
return true;
return false;
}
/// Determine whether \p D can hide a tag declaration.
static bool canHideTag(NamedDecl *D) {
// C++ [basic.scope.declarative]p4:
// Given a set of declarations in a single declarative region [...]
// exactly one declaration shall declare a class name or enumeration name
// that is not a typedef name and the other declarations shall all refer to
// the same variable, non-static data member, or enumerator, or all refer
// to functions and function templates; in this case the class name or
// enumeration name is hidden.
// C++ [basic.scope.hiding]p2:
// A class name or enumeration name can be hidden by the name of a
// variable, data member, function, or enumerator declared in the same
// scope.
// An UnresolvedUsingValueDecl always instantiates to one of these.
D = D->getUnderlyingDecl();
return isa<VarDecl>(D) || isa<EnumConstantDecl>(D) || isa<FunctionDecl>(D) ||
isa<FunctionTemplateDecl>(D) || isa<FieldDecl>(D) ||
isa<UnresolvedUsingValueDecl>(D);
}
/// Resolves the result kind of this lookup.
void LookupResult::resolveKind() {
unsigned N = Decls.size();
// Fast case: no possible ambiguity.
if (N == 0) {
assert(ResultKind == NotFound ||
ResultKind == NotFoundInCurrentInstantiation);
return;
}
// If there's a single decl, we need to examine it to decide what
// kind of lookup this is.
if (N == 1) {
NamedDecl *D = (*Decls.begin())->getUnderlyingDecl();
if (isa<FunctionTemplateDecl>(D))
ResultKind = FoundOverloaded;
else if (isa<UnresolvedUsingValueDecl>(D))
ResultKind = FoundUnresolvedValue;
return;
}
// Don't do any extra resolution if we've already resolved as ambiguous.
if (ResultKind == Ambiguous) return;
llvm::SmallDenseMap<NamedDecl*, unsigned, 16> Unique;
llvm::SmallDenseMap<QualType, unsigned, 16> UniqueTypes;
bool Ambiguous = false;
bool HasTag = false, HasFunction = false;
bool HasFunctionTemplate = false, HasUnresolved = false;
NamedDecl *HasNonFunction = nullptr;
llvm::SmallVector<NamedDecl*, 4> EquivalentNonFunctions;
unsigned UniqueTagIndex = 0;
unsigned I = 0;
while (I < N) {
NamedDecl *D = Decls[I]->getUnderlyingDecl();
D = cast<NamedDecl>(D->getCanonicalDecl());
// Ignore an invalid declaration unless it's the only one left.
if (D->isInvalidDecl() && !(I == 0 && N == 1)) {
Decls[I] = Decls[--N];
continue;
}
llvm::Optional<unsigned> ExistingI;
// Redeclarations of types via typedef can occur both within a scope
// and, through using declarations and directives, across scopes. There is
// no ambiguity if they all refer to the same type, so unique based on the
// canonical type.
if (TypeDecl *TD = dyn_cast<TypeDecl>(D)) {
QualType T = getSema().Context.getTypeDeclType(TD);
auto UniqueResult = UniqueTypes.insert(
std::make_pair(getSema().Context.getCanonicalType(T), I));
if (!UniqueResult.second) {
// The type is not unique.
ExistingI = UniqueResult.first->second;
}
}
// For non-type declarations, check for a prior lookup result naming this
// canonical declaration.
if (!ExistingI) {
auto UniqueResult = Unique.insert(std::make_pair(D, I));
if (!UniqueResult.second) {
// We've seen this entity before.
ExistingI = UniqueResult.first->second;
}
}
if (ExistingI) {
// This is not a unique lookup result. Pick one of the results and
// discard the other.
if (isPreferredLookupResult(getSema(), getLookupKind(), Decls[I],
Decls[*ExistingI]))
Decls[*ExistingI] = Decls[I];
Decls[I] = Decls[--N];
continue;
}
// Otherwise, do some decl type analysis and then continue.
if (isa<UnresolvedUsingValueDecl>(D)) {
HasUnresolved = true;
} else if (isa<TagDecl>(D)) {
if (HasTag)
Ambiguous = true;
UniqueTagIndex = I;
HasTag = true;
} else if (isa<FunctionTemplateDecl>(D)) {
HasFunction = true;
HasFunctionTemplate = true;
} else if (isa<FunctionDecl>(D)) {
HasFunction = true;
} else {
if (HasNonFunction) {
// If we're about to create an ambiguity between two declarations that
// are equivalent, but one is an internal linkage declaration from one
// module and the other is an internal linkage declaration from another
// module, just skip it.
if (getSema().isEquivalentInternalLinkageDeclaration(HasNonFunction,
D)) {
EquivalentNonFunctions.push_back(D);
Decls[I] = Decls[--N];
continue;
}
Ambiguous = true;
}
HasNonFunction = D;
}
I++;
}
// C++ [basic.scope.hiding]p2:
// A class name or enumeration name can be hidden by the name of
// an object, function, or enumerator declared in the same
// scope. If a class or enumeration name and an object, function,
// or enumerator are declared in the same scope (in any order)
// with the same name, the class or enumeration name is hidden
// wherever the object, function, or enumerator name is visible.
// But it's still an error if there are distinct tag types found,
// even if they're not visible. (ref?)
if (N > 1 && HideTags && HasTag && !Ambiguous &&
(HasFunction || HasNonFunction || HasUnresolved)) {
NamedDecl *OtherDecl = Decls[UniqueTagIndex ? 0 : N - 1];
if (isa<TagDecl>(Decls[UniqueTagIndex]->getUnderlyingDecl()) &&
getContextForScopeMatching(Decls[UniqueTagIndex])->Equals(
getContextForScopeMatching(OtherDecl)) &&
canHideTag(OtherDecl))
Decls[UniqueTagIndex] = Decls[--N];
else
Ambiguous = true;
}
// FIXME: This diagnostic should really be delayed until we're done with
// the lookup result, in case the ambiguity is resolved by the caller.
if (!EquivalentNonFunctions.empty() && !Ambiguous)
getSema().diagnoseEquivalentInternalLinkageDeclarations(
getNameLoc(), HasNonFunction, EquivalentNonFunctions);
Decls.truncate(N);
if (HasNonFunction && (HasFunction || HasUnresolved))
Ambiguous = true;
if (Ambiguous)
setAmbiguous(LookupResult::AmbiguousReference);
else if (HasUnresolved)
ResultKind = LookupResult::FoundUnresolvedValue;
else if (N > 1 || HasFunctionTemplate)
ResultKind = LookupResult::FoundOverloaded;
else
ResultKind = LookupResult::Found;
}
void LookupResult::addDeclsFromBasePaths(const CXXBasePaths &P) {
CXXBasePaths::const_paths_iterator I, E;
for (I = P.begin(), E = P.end(); I != E; ++I)
for (DeclContext::lookup_iterator DI = I->Decls, DE = DI.end(); DI != DE;
++DI)
addDecl(*DI);
}
void LookupResult::setAmbiguousBaseSubobjects(CXXBasePaths &P) {
Paths = new CXXBasePaths;
Paths->swap(P);
addDeclsFromBasePaths(*Paths);
resolveKind();
setAmbiguous(AmbiguousBaseSubobjects);
}
void LookupResult::setAmbiguousBaseSubobjectTypes(CXXBasePaths &P) {
Paths = new CXXBasePaths;
Paths->swap(P);
addDeclsFromBasePaths(*Paths);
resolveKind();
setAmbiguous(AmbiguousBaseSubobjectTypes);
}
void LookupResult::print(raw_ostream &Out) {
Out << Decls.size() << " result(s)";
if (isAmbiguous()) Out << ", ambiguous";
if (Paths) Out << ", base paths present";
for (iterator I = begin(), E = end(); I != E; ++I) {
Out << "\n";
(*I)->print(Out, 2);
}
}
LLVM_DUMP_METHOD void LookupResult::dump() {
llvm::errs() << "lookup results for " << getLookupName().getAsString()
<< ":\n";
for (NamedDecl *D : *this)
D->dump();
}
/// Diagnose a missing builtin type.
static QualType diagOpenCLBuiltinTypeError(Sema &S, llvm::StringRef TypeClass,
llvm::StringRef Name) {
S.Diag(SourceLocation(), diag::err_opencl_type_not_found)
<< TypeClass << Name;
return S.Context.VoidTy;
}
/// Lookup an OpenCL enum type.
static QualType getOpenCLEnumType(Sema &S, llvm::StringRef Name) {
LookupResult Result(S, &S.Context.Idents.get(Name), SourceLocation(),
Sema::LookupTagName);
S.LookupName(Result, S.TUScope);
if (Result.empty())
return diagOpenCLBuiltinTypeError(S, "enum", Name);
EnumDecl *Decl = Result.getAsSingle<EnumDecl>();
if (!Decl)
return diagOpenCLBuiltinTypeError(S, "enum", Name);
return S.Context.getEnumType(Decl);
}
/// Lookup an OpenCL typedef type.
static QualType getOpenCLTypedefType(Sema &S, llvm::StringRef Name) {
LookupResult Result(S, &S.Context.Idents.get(Name), SourceLocation(),
Sema::LookupOrdinaryName);
S.LookupName(Result, S.TUScope);
if (Result.empty())
return diagOpenCLBuiltinTypeError(S, "typedef", Name);
TypedefNameDecl *Decl = Result.getAsSingle<TypedefNameDecl>();
if (!Decl)
return diagOpenCLBuiltinTypeError(S, "typedef", Name);
return S.Context.getTypedefType(Decl);
}
/// Get the QualType instances of the return type and arguments for an OpenCL
/// builtin function signature.
/// \param S (in) The Sema instance.
/// \param OpenCLBuiltin (in) The signature currently handled.
/// \param GenTypeMaxCnt (out) Maximum number of types contained in a generic
/// type used as return type or as argument.
/// Only meaningful for generic types, otherwise equals 1.
/// \param RetTypes (out) List of the possible return types.
/// \param ArgTypes (out) List of the possible argument types. For each
/// argument, ArgTypes contains QualTypes for the Cartesian product
/// of (vector sizes) x (types) .
static void GetQualTypesForOpenCLBuiltin(
Sema &S, const OpenCLBuiltinStruct &OpenCLBuiltin, unsigned &GenTypeMaxCnt,
SmallVector<QualType, 1> &RetTypes,
SmallVector<SmallVector<QualType, 1>, 5> &ArgTypes) {
// Get the QualType instances of the return types.
unsigned Sig = SignatureTable[OpenCLBuiltin.SigTableIndex];
OCL2Qual(S, TypeTable[Sig], RetTypes);
GenTypeMaxCnt = RetTypes.size();
// Get the QualType instances of the arguments.
// First type is the return type, skip it.
for (unsigned Index = 1; Index < OpenCLBuiltin.NumTypes; Index++) {
SmallVector<QualType, 1> Ty;
OCL2Qual(S, TypeTable[SignatureTable[OpenCLBuiltin.SigTableIndex + Index]],
Ty);
GenTypeMaxCnt = (Ty.size() > GenTypeMaxCnt) ? Ty.size() : GenTypeMaxCnt;
ArgTypes.push_back(std::move(Ty));
}
}
/// Create a list of the candidate function overloads for an OpenCL builtin
/// function.
/// \param Context (in) The ASTContext instance.
/// \param GenTypeMaxCnt (in) Maximum number of types contained in a generic
/// type used as return type or as argument.
/// Only meaningful for generic types, otherwise equals 1.
/// \param FunctionList (out) List of FunctionTypes.
/// \param RetTypes (in) List of the possible return types.
/// \param ArgTypes (in) List of the possible types for the arguments.
static void GetOpenCLBuiltinFctOverloads(
ASTContext &Context, unsigned GenTypeMaxCnt,
std::vector<QualType> &FunctionList, SmallVector<QualType, 1> &RetTypes,
SmallVector<SmallVector<QualType, 1>, 5> &ArgTypes) {
FunctionProtoType::ExtProtoInfo PI(
Context.getDefaultCallingConvention(false, false, true));
PI.Variadic = false;
// Do not attempt to create any FunctionTypes if there are no return types,
// which happens when a type belongs to a disabled extension.
if (RetTypes.size() == 0)
return;
// Create FunctionTypes for each (gen)type.
for (unsigned IGenType = 0; IGenType < GenTypeMaxCnt; IGenType++) {
SmallVector<QualType, 5> ArgList;
for (unsigned A = 0; A < ArgTypes.size(); A++) {
// Bail out if there is an argument that has no available types.
if (ArgTypes[A].size() == 0)
return;
// Builtins such as "max" have an "sgentype" argument that represents
// the corresponding scalar type of a gentype. The number of gentypes
// must be a multiple of the number of sgentypes.
assert(GenTypeMaxCnt % ArgTypes[A].size() == 0 &&
"argument type count not compatible with gentype type count");
unsigned Idx = IGenType % ArgTypes[A].size();
ArgList.push_back(ArgTypes[A][Idx]);
}
FunctionList.push_back(Context.getFunctionType(
RetTypes[(RetTypes.size() != 1) ? IGenType : 0], ArgList, PI));
}
}
/// When trying to resolve a function name, if isOpenCLBuiltin() returns a
/// non-null <Index, Len> pair, then the name is referencing an OpenCL
/// builtin function. Add all candidate signatures to the LookUpResult.
///
/// \param S (in) The Sema instance.
/// \param LR (inout) The LookupResult instance.
/// \param II (in) The identifier being resolved.
/// \param FctIndex (in) Starting index in the BuiltinTable.
/// \param Len (in) The signature list has Len elements.
static void InsertOCLBuiltinDeclarationsFromTable(Sema &S, LookupResult &LR,
IdentifierInfo *II,
const unsigned FctIndex,
const unsigned Len) {
// The builtin function declaration uses generic types (gentype).
bool HasGenType = false;
// Maximum number of types contained in a generic type used as return type or
// as argument. Only meaningful for generic types, otherwise equals 1.
unsigned GenTypeMaxCnt;
ASTContext &Context = S.Context;
for (unsigned SignatureIndex = 0; SignatureIndex < Len; SignatureIndex++) {
const OpenCLBuiltinStruct &OpenCLBuiltin =
BuiltinTable[FctIndex + SignatureIndex];
// Ignore this builtin function if it is not available in the currently
// selected language version.
if (!isOpenCLVersionContainedInMask(Context.getLangOpts(),
OpenCLBuiltin.Versions))
continue;
// Ignore this builtin function if it carries an extension macro that is
// not defined. This indicates that the extension is not supported by the
// target, so the builtin function should not be available.
StringRef Extensions = FunctionExtensionTable[OpenCLBuiltin.Extension];
if (!Extensions.empty()) {
SmallVector<StringRef, 2> ExtVec;
Extensions.split(ExtVec, " ");
bool AllExtensionsDefined = true;
for (StringRef Ext : ExtVec) {
if (!S.getPreprocessor().isMacroDefined(Ext)) {
AllExtensionsDefined = false;
break;
}
}
if (!AllExtensionsDefined)
continue;
}
SmallVector<QualType, 1> RetTypes;
SmallVector<SmallVector<QualType, 1>, 5> ArgTypes;
// Obtain QualType lists for the function signature.
GetQualTypesForOpenCLBuiltin(S, OpenCLBuiltin, GenTypeMaxCnt, RetTypes,
ArgTypes);
if (GenTypeMaxCnt > 1) {
HasGenType = true;
}
// Create function overload for each type combination.
std::vector<QualType> FunctionList;
GetOpenCLBuiltinFctOverloads(Context, GenTypeMaxCnt, FunctionList, RetTypes,
ArgTypes);
SourceLocation Loc = LR.getNameLoc();
DeclContext *Parent = Context.getTranslationUnitDecl();
FunctionDecl *NewOpenCLBuiltin;
for (const auto &FTy : FunctionList) {
NewOpenCLBuiltin = FunctionDecl::Create(
Context, Parent, Loc, Loc, II, FTy, /*TInfo=*/nullptr, SC_Extern,
S.getCurFPFeatures().isFPConstrained(), false,
FTy->isFunctionProtoType());
NewOpenCLBuiltin->setImplicit();
// Create Decl objects for each parameter, adding them to the
// FunctionDecl.
const auto *FP = cast<FunctionProtoType>(FTy);
SmallVector<ParmVarDecl *, 4> ParmList;
for (unsigned IParm = 0, e = FP->getNumParams(); IParm != e; ++IParm) {
ParmVarDecl *Parm = ParmVarDecl::Create(
Context, NewOpenCLBuiltin, SourceLocation(), SourceLocation(),
nullptr, FP->getParamType(IParm), nullptr, SC_None, nullptr);
Parm->setScopeInfo(0, IParm);
ParmList.push_back(Parm);
}
NewOpenCLBuiltin->setParams(ParmList);
// Add function attributes.
if (OpenCLBuiltin.IsPure)
NewOpenCLBuiltin->addAttr(PureAttr::CreateImplicit(Context));
if (OpenCLBuiltin.IsConst)
NewOpenCLBuiltin->addAttr(ConstAttr::CreateImplicit(Context));
if (OpenCLBuiltin.IsConv)
NewOpenCLBuiltin->addAttr(ConvergentAttr::CreateImplicit(Context));
if (!S.getLangOpts().OpenCLCPlusPlus)
NewOpenCLBuiltin->addAttr(OverloadableAttr::CreateImplicit(Context));
LR.addDecl(NewOpenCLBuiltin);
}
}
// If we added overloads, need to resolve the lookup result.
if (Len > 1 || HasGenType)
LR.resolveKind();
}
/// Lookup a builtin function, when name lookup would otherwise
/// fail.
bool Sema::LookupBuiltin(LookupResult &R) {
Sema::LookupNameKind NameKind = R.getLookupKind();
// If we didn't find a use of this identifier, and if the identifier
// corresponds to a compiler builtin, create the decl object for the builtin
// now, injecting it into translation unit scope, and return it.
if (NameKind == Sema::LookupOrdinaryName ||
NameKind == Sema::LookupRedeclarationWithLinkage) {
IdentifierInfo *II = R.getLookupName().getAsIdentifierInfo();
if (II) {
if (getLangOpts().CPlusPlus && NameKind == Sema::LookupOrdinaryName) {
if (II == getASTContext().getMakeIntegerSeqName()) {
R.addDecl(getASTContext().getMakeIntegerSeqDecl());
return true;
} else if (II == getASTContext().getTypePackElementName()) {
R.addDecl(getASTContext().getTypePackElementDecl());
return true;
}
}
// Check if this is an OpenCL Builtin, and if so, insert its overloads.
if (getLangOpts().OpenCL && getLangOpts().DeclareOpenCLBuiltins) {
auto Index = isOpenCLBuiltin(II->getName());
if (Index.first) {
InsertOCLBuiltinDeclarationsFromTable(*this, R, II, Index.first - 1,
Index.second);
return true;
}
}
if (DeclareRISCVVBuiltins) {
if (!RVIntrinsicManager)
RVIntrinsicManager = CreateRISCVIntrinsicManager(*this);
if (RVIntrinsicManager->CreateIntrinsicIfFound(R, II, PP))
return true;
}
// If this is a builtin on this (or all) targets, create the decl.
if (unsigned BuiltinID = II->getBuiltinID()) {
- // In C++, C2x, and OpenCL (spec v1.2 s6.9.f), we don't have any
- // predefined library functions like 'malloc'. Instead, we'll just
- // error.
- if ((getLangOpts().CPlusPlus || getLangOpts().OpenCL ||
- getLangOpts().C2x) &&
+ // In C++ and OpenCL (spec v1.2 s6.9.f), we don't have any predefined
+ // library functions like 'malloc'. Instead, we'll just error.
+ if ((getLangOpts().CPlusPlus || getLangOpts().OpenCL) &&
Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID))
return false;
if (NamedDecl *D =
LazilyCreateBuiltin(II, BuiltinID, TUScope,
R.isForRedeclaration(), R.getNameLoc())) {
R.addDecl(D);
return true;
}
}
}
}
return false;
}
/// Looks up the declaration of "struct objc_super" and
/// saves it for later use in building builtin declaration of
/// objc_msgSendSuper and objc_msgSendSuper_stret.
static void LookupPredefedObjCSuperType(Sema &Sema, Scope *S) {
ASTContext &Context = Sema.Context;
LookupResult Result(Sema, &Context.Idents.get("objc_super"), SourceLocation(),
Sema::LookupTagName);
Sema.LookupName(Result, S);
if (Result.getResultKind() == LookupResult::Found)
if (const TagDecl *TD = Result.getAsSingle<TagDecl>())
Context.setObjCSuperType(Context.getTagDeclType(TD));
}
void Sema::LookupNecessaryTypesForBuiltin(Scope *S, unsigned ID) {
if (ID == Builtin::BIobjc_msgSendSuper)
LookupPredefedObjCSuperType(*this, S);
}
/// Determine whether we can declare a special member function within
/// the class at this point.
static bool CanDeclareSpecialMemberFunction(const CXXRecordDecl *Class) {
// We need to have a definition for the class.
if (!Class->getDefinition() || Class->isDependentContext())
return false;
// We can't be in the middle of defining the class.
return !Class->isBeingDefined();
}
void Sema::ForceDeclarationOfImplicitMembers(CXXRecordDecl *Class) {
if (!CanDeclareSpecialMemberFunction(Class))
return;
// If the default constructor has not yet been declared, do so now.
if (Class->needsImplicitDefaultConstructor())
DeclareImplicitDefaultConstructor(Class);
// If the copy constructor has not yet been declared, do so now.
if (Class->needsImplicitCopyConstructor())
DeclareImplicitCopyConstructor(Class);
// If the copy assignment operator has not yet been declared, do so now.
if (Class->needsImplicitCopyAssignment())
DeclareImplicitCopyAssignment(Class);
if (getLangOpts().CPlusPlus11) {
// If the move constructor has not yet been declared, do so now.
if (Class->needsImplicitMoveConstructor())
DeclareImplicitMoveConstructor(Class);
// If the move assignment operator has not yet been declared, do so now.
if (Class->needsImplicitMoveAssignment())
DeclareImplicitMoveAssignment(Class);
}
// If the destructor has not yet been declared, do so now.
if (Class->needsImplicitDestructor())
DeclareImplicitDestructor(Class);
}
/// Determine whether this is the name of an implicitly-declared
/// special member function.
static bool isImplicitlyDeclaredMemberFunctionName(DeclarationName Name) {
switch (Name.getNameKind()) {
case DeclarationName::CXXConstructorName:
case DeclarationName::CXXDestructorName:
return true;
case DeclarationName::CXXOperatorName:
return Name.getCXXOverloadedOperator() == OO_Equal;
default:
break;
}
return false;
}
/// If there are any implicit member functions with the given name
/// that need to be declared in the given declaration context, do so.
static void DeclareImplicitMemberFunctionsWithName(Sema &S,
DeclarationName Name,
SourceLocation Loc,
const DeclContext *DC) {
if (!DC)
return;
switch (Name.getNameKind()) {
case DeclarationName::CXXConstructorName:
if (const CXXRecordDecl *Record = dyn_cast<CXXRecordDecl>(DC))
if (Record->getDefinition() && CanDeclareSpecialMemberFunction(Record)) {
CXXRecordDecl *Class = const_cast<CXXRecordDecl *>(Record);
if (Record->needsImplicitDefaultConstructor())
S.DeclareImplicitDefaultConstructor(Class);
if (Record->needsImplicitCopyConstructor())
S.DeclareImplicitCopyConstructor(Class);
if (S.getLangOpts().CPlusPlus11 &&
Record->needsImplicitMoveConstructor())
S.DeclareImplicitMoveConstructor(Class);
}
break;
case DeclarationName::CXXDestructorName:
if (const CXXRecordDecl *Record = dyn_cast<CXXRecordDecl>(DC))
if (Record->getDefinition() && Record->needsImplicitDestructor() &&
CanDeclareSpecialMemberFunction(Record))
S.DeclareImplicitDestructor(const_cast<CXXRecordDecl *>(Record));
break;
case DeclarationName::CXXOperatorName:
if (Name.getCXXOverloadedOperator() != OO_Equal)
break;
if (const CXXRecordDecl *Record = dyn_cast<CXXRecordDecl>(DC)) {
if (Record->getDefinition() && CanDeclareSpecialMemberFunction(Record)) {
CXXRecordDecl *Class = const_cast<CXXRecordDecl *>(Record);
if (Record->needsImplicitCopyAssignment())
S.DeclareImplicitCopyAssignment(Class);
if (S.getLangOpts().CPlusPlus11 &&
Record->needsImplicitMoveAssignment())
S.DeclareImplicitMoveAssignment(Class);
}
}
break;
case DeclarationName::CXXDeductionGuideName:
S.DeclareImplicitDeductionGuides(Name.getCXXDeductionGuideTemplate(), Loc);
break;
default:
break;
}
}
// Adds all qualifying matches for a name within a decl context to the
// given lookup result. Returns true if any matches were found.
static bool LookupDirect(Sema &S, LookupResult &R, const DeclContext *DC) {
bool Found = false;
// Lazily declare C++ special member functions.
if (S.getLangOpts().CPlusPlus)
DeclareImplicitMemberFunctionsWithName(S, R.getLookupName(), R.getNameLoc(),
DC);
// Perform lookup into this declaration context.
DeclContext::lookup_result DR = DC->lookup(R.getLookupName());
for (NamedDecl *D : DR) {
if ((D = R.getAcceptableDecl(D))) {
R.addDecl(D);
Found = true;
}
}
if (!Found && DC->isTranslationUnit() && S.LookupBuiltin(R))
return true;
if (R.getLookupName().getNameKind()
!= DeclarationName::CXXConversionFunctionName ||
R.getLookupName().getCXXNameType()->isDependentType() ||
!isa<CXXRecordDecl>(DC))
return Found;
// C++ [temp.mem]p6:
// A specialization of a conversion function template is not found by
// name lookup. Instead, any conversion function templates visible in the
// context of the use are considered. [...]
const CXXRecordDecl *Record = cast<CXXRecordDecl>(DC);
if (!Record->isCompleteDefinition())
return Found;
// For conversion operators, 'operator auto' should only match
// 'operator auto'. Since 'auto' is not a type, it shouldn't be considered
// as a candidate for template substitution.
auto *ContainedDeducedType =
R.getLookupName().getCXXNameType()->getContainedDeducedType();
if (R.getLookupName().getNameKind() ==
DeclarationName::CXXConversionFunctionName &&
ContainedDeducedType && ContainedDeducedType->isUndeducedType())
return Found;
for (CXXRecordDecl::conversion_iterator U = Record->conversion_begin(),
UEnd = Record->conversion_end(); U != UEnd; ++U) {
FunctionTemplateDecl *ConvTemplate = dyn_cast<FunctionTemplateDecl>(*U);
if (!ConvTemplate)
continue;
// When we're performing lookup for the purposes of redeclaration, just
// add the conversion function template. When we deduce template
// arguments for specializations, we'll end up unifying the return
// type of the new declaration with the type of the function template.
if (R.isForRedeclaration()) {
R.addDecl(ConvTemplate);
Found = true;
continue;
}
// C++ [temp.mem]p6:
// [...] For each such operator, if argument deduction succeeds
// (14.9.2.3), the resulting specialization is used as if found by
// name lookup.
//
// When referencing a conversion function for any purpose other than
// a redeclaration (such that we'll be building an expression with the
// result), perform template argument deduction and place the
// specialization into the result set. We do this to avoid forcing all
// callers to perform special deduction for conversion functions.
TemplateDeductionInfo Info(R.getNameLoc());
FunctionDecl *Specialization = nullptr;
const FunctionProtoType *ConvProto
= ConvTemplate->getTemplatedDecl()->getType()->getAs<FunctionProtoType>();
assert(ConvProto && "Nonsensical conversion function template type");
// Compute the type of the function that we would expect the conversion
// function to have, if it were to match the name given.
// FIXME: Calling convention!
FunctionProtoType::ExtProtoInfo EPI = ConvProto->getExtProtoInfo();
EPI.ExtInfo = EPI.ExtInfo.withCallingConv(CC_C);
EPI.ExceptionSpec = EST_None;
QualType ExpectedType
= R.getSema().Context.getFunctionType(R.getLookupName().getCXXNameType(),
None, EPI);
// Perform template argument deduction against the type that we would
// expect the function to have.
if (R.getSema().DeduceTemplateArguments(ConvTemplate, nullptr, ExpectedType,
Specialization, Info)
== Sema::TDK_Success) {
R.addDecl(Specialization);
Found = true;
}
}
return Found;
}
// Performs C++ unqualified lookup into the given file context.
static bool
CppNamespaceLookup(Sema &S, LookupResult &R, ASTContext &Context,
DeclContext *NS, UnqualUsingDirectiveSet &UDirs) {
assert(NS && NS->isFileContext() && "CppNamespaceLookup() requires namespace!");
// Perform direct name lookup into the LookupCtx.
bool Found = LookupDirect(S, R, NS);
// Perform direct name lookup into the namespaces nominated by the
// using directives whose common ancestor is this namespace.
for (const UnqualUsingEntry &UUE : UDirs.getNamespacesFor(NS))
if (LookupDirect(S, R, UUE.getNominatedNamespace()))
Found = true;
R.resolveKind();
return Found;
}
static bool isNamespaceOrTranslationUnitScope(Scope *S) {
if (DeclContext *Ctx = S->getEntity())
return Ctx->isFileContext();
return false;
}
/// Find the outer declaration context from this scope. This indicates the
/// context that we should search up to (exclusive) before considering the
/// parent of the specified scope.
static DeclContext *findOuterContext(Scope *S) {
for (Scope *OuterS = S->getParent(); OuterS; OuterS = OuterS->getParent())
if (DeclContext *DC = OuterS->getLookupEntity())
return DC;
return nullptr;
}
namespace {
/// An RAII object to specify that we want to find block scope extern
/// declarations.
struct FindLocalExternScope {
FindLocalExternScope(LookupResult &R)
: R(R), OldFindLocalExtern(R.getIdentifierNamespace() &
Decl::IDNS_LocalExtern) {
R.setFindLocalExtern(R.getIdentifierNamespace() &
(Decl::IDNS_Ordinary | Decl::IDNS_NonMemberOperator));
}
void restore() {
R.setFindLocalExtern(OldFindLocalExtern);
}
~FindLocalExternScope() {
restore();
}
LookupResult &R;
bool OldFindLocalExtern;
};
} // end anonymous namespace
bool Sema::CppLookupName(LookupResult &R, Scope *S) {
assert(getLangOpts().CPlusPlus && "Can perform only C++ lookup");
DeclarationName Name = R.getLookupName();
Sema::LookupNameKind NameKind = R.getLookupKind();
// If this is the name of an implicitly-declared special member function,
// go through the scope stack to implicitly declare
if (isImplicitlyDeclaredMemberFunctionName(Name)) {
for (Scope *PreS = S; PreS; PreS = PreS->getParent())
if (DeclContext *DC = PreS->getEntity())
DeclareImplicitMemberFunctionsWithName(*this, Name, R.getNameLoc(), DC);
}
// Implicitly declare member functions with the name we're looking for, if in
// fact we are in a scope where it matters.
Scope *Initial = S;
IdentifierResolver::iterator
I = IdResolver.begin(Name),
IEnd = IdResolver.end();
// First we lookup local scope.
// We don't consider using-directives, as per 7.3.4.p1 [namespace.udir]
// ...During unqualified name lookup (3.4.1), the names appear as if
// they were declared in the nearest enclosing namespace which contains
// both the using-directive and the nominated namespace.
// [Note: in this context, "contains" means "contains directly or
// indirectly".
//
// For example:
// namespace A { int i; }
// void foo() {
// int i;
// {
// using namespace A;
// ++i; // finds local 'i', A::i appears at global scope
// }
// }
//
UnqualUsingDirectiveSet UDirs(*this);
bool VisitedUsingDirectives = false;
bool LeftStartingScope = false;
// When performing a scope lookup, we want to find local extern decls.
FindLocalExternScope FindLocals(R);
for (; S && !isNamespaceOrTranslationUnitScope(S); S = S->getParent()) {
bool SearchNamespaceScope = true;
// Check whether the IdResolver has anything in this scope.
for (; I != IEnd && S->isDeclScope(*I); ++I) {
if (NamedDecl *ND = R.getAcceptableDecl(*I)) {
if (NameKind == LookupRedeclarationWithLinkage &&
!(*I)->isTemplateParameter()) {
// If it's a template parameter, we still find it, so we can diagnose
// the invalid redeclaration.
// Determine whether this (or a previous) declaration is
// out-of-scope.
if (!LeftStartingScope && !Initial->isDeclScope(*I))
LeftStartingScope = true;
// If we found something outside of our starting scope that
// does not have linkage, skip it.
if (LeftStartingScope && !((*I)->hasLinkage())) {
R.setShadowed();
continue;
}
} else {
// We found something in this scope, we should not look at the
// namespace scope
SearchNamespaceScope = false;
}
R.addDecl(ND);
}
}
if (!SearchNamespaceScope) {
R.resolveKind();
if (S->isClassScope())
if (CXXRecordDecl *Record =
dyn_cast_or_null<CXXRecordDecl>(S->getEntity()))
R.setNamingClass(Record);
return true;
}
if (NameKind == LookupLocalFriendName && !S->isClassScope()) {
// C++11 [class.friend]p11:
// If a friend declaration appears in a local class and the name
// specified is an unqualified name, a prior declaration is
// looked up without considering scopes that are outside the
// innermost enclosing non-class scope.
return false;
}
if (DeclContext *Ctx = S->getLookupEntity()) {
DeclContext *OuterCtx = findOuterContext(S);
for (; Ctx && !Ctx->Equals(OuterCtx); Ctx = Ctx->getLookupParent()) {
// We do not directly look into transparent contexts, since
// those entities will be found in the nearest enclosing
// non-transparent context.
if (Ctx->isTransparentContext())
continue;
// We do not look directly into function or method contexts,
// since all of the local variables and parameters of the
// function/method are present within the Scope.
if (Ctx->isFunctionOrMethod()) {
// If we have an Objective-C instance method, look for ivars
// in the corresponding interface.
if (ObjCMethodDecl *Method = dyn_cast<ObjCMethodDecl>(Ctx)) {
if (Method->isInstanceMethod() && Name.getAsIdentifierInfo())
if (ObjCInterfaceDecl *Class = Method->getClassInterface()) {
ObjCInterfaceDecl *ClassDeclared;
if (ObjCIvarDecl *Ivar = Class->lookupInstanceVariable(
Name.getAsIdentifierInfo(),
ClassDeclared)) {
if (NamedDecl *ND = R.getAcceptableDecl(Ivar)) {
R.addDecl(ND);
R.resolveKind();
return true;
}
}
}
}
continue;
}
// If this is a file context, we need to perform unqualified name
// lookup considering using directives.
if (Ctx->isFileContext()) {
// If we haven't handled using directives yet, do so now.
if (!VisitedUsingDirectives) {
// Add using directives from this context up to the top level.
for (DeclContext *UCtx = Ctx; UCtx; UCtx = UCtx->getParent()) {
if (UCtx->isTransparentContext())
continue;
UDirs.visit(UCtx, UCtx);
}
// Find the innermost file scope, so we can add using directives
// from local scopes.
Scope *InnermostFileScope = S;
while (InnermostFileScope &&
!isNamespaceOrTranslationUnitScope(InnermostFileScope))
InnermostFileScope = InnermostFileScope->getParent();
UDirs.visitScopeChain(Initial, InnermostFileScope);
UDirs.done();
VisitedUsingDirectives = true;
}
if (CppNamespaceLookup(*this, R, Context, Ctx, UDirs)) {
R.resolveKind();
return true;
}
continue;
}
// Perform qualified name lookup into this context.
// FIXME: In some cases, we know that every name that could be found by
// this qualified name lookup will also be on the identifier chain. For
// example, inside a class without any base classes, we never need to
// perform qualified lookup because all of the members are on top of the
// identifier chain.
if (LookupQualifiedName(R, Ctx, /*InUnqualifiedLookup=*/true))
return true;
}
}
}
// Stop if we ran out of scopes.
// FIXME: This really, really shouldn't be happening.
if (!S) return false;
// If we are looking for members, no need to look into global/namespace scope.
if (NameKind == LookupMemberName)
return false;
// Collect UsingDirectiveDecls in all scopes, and recursively all
// nominated namespaces by those using-directives.
//
// FIXME: Cache this sorted list in Scope structure, and DeclContext, so we
// don't build it for each lookup!
if (!VisitedUsingDirectives) {
UDirs.visitScopeChain(Initial, S);
UDirs.done();
}
// If we're not performing redeclaration lookup, do not look for local
// extern declarations outside of a function scope.
if (!R.isForRedeclaration())
FindLocals.restore();
// Lookup namespace scope, and global scope.
// Unqualified name lookup in C++ requires looking into scopes
// that aren't strictly lexical, and therefore we walk through the
// context as well as walking through the scopes.
for (; S; S = S->getParent()) {
// Check whether the IdResolver has anything in this scope.
bool Found = false;
for (; I != IEnd && S->isDeclScope(*I); ++I) {
if (NamedDecl *ND = R.getAcceptableDecl(*I)) {
// We found something. Look for anything else in our scope
// with this same name and in an acceptable identifier
// namespace, so that we can construct an overload set if we
// need to.
Found = true;
R.addDecl(ND);
}
}
if (Found && S->isTemplateParamScope()) {
R.resolveKind();
return true;
}
DeclContext *Ctx = S->getLookupEntity();
if (Ctx) {
DeclContext *OuterCtx = findOuterContext(S);
for (; Ctx && !Ctx->Equals(OuterCtx); Ctx = Ctx->getLookupParent()) {
// We do not directly look into transparent contexts, since
// those entities will be found in the nearest enclosing
// non-transparent context.
if (Ctx->isTransparentContext())
continue;
// If we have a context, and it's not a context stashed in the
// template parameter scope for an out-of-line definition, also
// look into that context.
if (!(Found && S->isTemplateParamScope())) {
assert(Ctx->isFileContext() &&
"We should have been looking only at file context here already.");
// Look into context considering using-directives.
if (CppNamespaceLookup(*this, R, Context, Ctx, UDirs))
Found = true;
}
if (Found) {
R.resolveKind();
return true;
}
if (R.isForRedeclaration() && !Ctx->isTransparentContext())
return false;
}
}
if (R.isForRedeclaration() && Ctx && !Ctx->isTransparentContext())
return false;
}
return !R.empty();
}
void Sema::makeMergedDefinitionVisible(NamedDecl *ND) {
if (auto *M = getCurrentModule())
Context.mergeDefinitionIntoModule(ND, M);
else
// We're not building a module; just make the definition visible.
ND->setVisibleDespiteOwningModule();
// If ND is a template declaration, make the template parameters
// visible too. They're not (necessarily) within a mergeable DeclContext.
if (auto *TD = dyn_cast<TemplateDecl>(ND))
for (auto *Param : *TD->getTemplateParameters())
makeMergedDefinitionVisible(Param);
}
/// Find the module in which the given declaration was defined.
static Module *getDefiningModule(Sema &S, Decl *Entity) {
if (FunctionDecl *FD = dyn_cast<FunctionDecl>(Entity)) {
// If this function was instantiated from a template, the defining module is
// the module containing the pattern.
if (FunctionDecl *Pattern = FD->getTemplateInstantiationPattern())
Entity = Pattern;
} else if (CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(Entity)) {
if (CXXRecordDecl *Pattern = RD->getTemplateInstantiationPattern())
Entity = Pattern;
} else if (EnumDecl *ED = dyn_cast<EnumDecl>(Entity)) {
if (auto *Pattern = ED->getTemplateInstantiationPattern())
Entity = Pattern;
} else if (VarDecl *VD = dyn_cast<VarDecl>(Entity)) {
if (VarDecl *Pattern = VD->getTemplateInstantiationPattern())
Entity = Pattern;
}
// Walk up to the containing context. That might also have been instantiated
// from a template.
DeclContext *Context = Entity->getLexicalDeclContext();
if (Context->isFileContext())
return S.getOwningModule(Entity);
return getDefiningModule(S, cast<Decl>(Context));
}
llvm::DenseSet<Module*> &Sema::getLookupModules() {
unsigned N = CodeSynthesisContexts.size();
for (unsigned I = CodeSynthesisContextLookupModules.size();
I != N; ++I) {
Module *M = CodeSynthesisContexts[I].Entity ?
getDefiningModule(*this, CodeSynthesisContexts[I].Entity) :
nullptr;
if (M && !LookupModulesCache.insert(M).second)
M = nullptr;
CodeSynthesisContextLookupModules.push_back(M);
}
return LookupModulesCache;
}
/// Determine if we could use all the declarations in the module.
bool Sema::isUsableModule(const Module *M) {
assert(M && "We shouldn't check nullness for module here");
// Return quickly if we cached the result.
if (UsableModuleUnitsCache.count(M))
return true;
// If M is the global module fragment of the current translation unit. So it
// should be usable.
// [module.global.frag]p1:
// The global module fragment can be used to provide declarations that are
// attached to the global module and usable within the module unit.
if (M == GlobalModuleFragment ||
// If M is the module we're parsing, it should be usable. This covers the
// private module fragment. The private module fragment is usable only if
// it is within the current module unit. And it must be the current
// parsing module unit if it is within the current module unit according
// to the grammar of the private module fragment. NOTE: This is covered by
// the following condition. The intention of the check is to avoid string
// comparison as much as possible.
M == getCurrentModule() ||
// The module unit which is in the same module with the current module
// unit is usable.
//
// FIXME: Here we judge if they are in the same module by comparing the
// string. Is there any better solution?
M->getPrimaryModuleInterfaceName() ==
llvm::StringRef(getLangOpts().CurrentModule).split(':').first) {
UsableModuleUnitsCache.insert(M);
return true;
}
return false;
}
bool Sema::hasVisibleMergedDefinition(NamedDecl *Def) {
for (const Module *Merged : Context.getModulesWithMergedDefinition(Def))
if (isModuleVisible(Merged))
return true;
return false;
}
bool Sema::hasMergedDefinitionInCurrentModule(NamedDecl *Def) {
for (const Module *Merged : Context.getModulesWithMergedDefinition(Def))
if (isUsableModule(Merged))
return true;
return false;
}
template <typename ParmDecl>
static bool
hasAcceptableDefaultArgument(Sema &S, const ParmDecl *D,
llvm::SmallVectorImpl<Module *> *Modules,
Sema::AcceptableKind Kind) {
if (!D->hasDefaultArgument())
return false;
llvm::SmallDenseSet<const ParmDecl *, 4> Visited;
while (D && !Visited.count(D)) {
Visited.insert(D);
auto &DefaultArg = D->getDefaultArgStorage();
if (!DefaultArg.isInherited() && S.isAcceptable(D, Kind))
return true;
if (!DefaultArg.isInherited() && Modules) {
auto *NonConstD = const_cast<ParmDecl*>(D);
Modules->push_back(S.getOwningModule(NonConstD));
}
// If there was a previous default argument, maybe its parameter is
// acceptable.
D = DefaultArg.getInheritedFrom();
}
return false;
}
bool Sema::hasAcceptableDefaultArgument(
const NamedDecl *D, llvm::SmallVectorImpl<Module *> *Modules,
Sema::AcceptableKind Kind) {
if (auto *P = dyn_cast<TemplateTypeParmDecl>(D))
return ::hasAcceptableDefaultArgument(*this, P, Modules, Kind);
if (auto *P = dyn_cast<NonTypeTemplateParmDecl>(D))
return ::hasAcceptableDefaultArgument(*this, P, Modules, Kind);
return ::hasAcceptableDefaultArgument(
*this, cast<TemplateTemplateParmDecl>(D), Modules, Kind);
}
bool Sema::hasVisibleDefaultArgument(const NamedDecl *D,
llvm::SmallVectorImpl<Module *> *Modules) {
return hasAcceptableDefaultArgument(D, Modules,
Sema::AcceptableKind::Visible);
}
bool Sema::hasReachableDefaultArgument(
const NamedDecl *D, llvm::SmallVectorImpl<Module *> *Modules) {
return hasAcceptableDefaultArgument(D, Modules,
Sema::AcceptableKind::Reachable);
}
template <typename Filter>
static bool
hasAcceptableDeclarationImpl(Sema &S, const NamedDecl *D,
llvm::SmallVectorImpl<Module *> *Modules, Filter F,
Sema::AcceptableKind Kind) {
bool HasFilteredRedecls = false;
for (auto *Redecl : D->redecls()) {
auto *R = cast<NamedDecl>(Redecl);
if (!F(R))
continue;
if (S.isAcceptable(R, Kind))
return true;
HasFilteredRedecls = true;
if (Modules)
Modules->push_back(R->getOwningModule());
}
// Only return false if there is at least one redecl that is not filtered out.
if (HasFilteredRedecls)
return false;
return true;
}
static bool
hasAcceptableExplicitSpecialization(Sema &S, const NamedDecl *D,
llvm::SmallVectorImpl<Module *> *Modules,
Sema::AcceptableKind Kind) {
return hasAcceptableDeclarationImpl(
S, D, Modules,
[](const NamedDecl *D) {
if (auto *RD = dyn_cast<CXXRecordDecl>(D))
return RD->getTemplateSpecializationKind() ==
TSK_ExplicitSpecialization;
if (auto *FD = dyn_cast<FunctionDecl>(D))
return FD->getTemplateSpecializationKind() ==
TSK_ExplicitSpecialization;
if (auto *VD = dyn_cast<VarDecl>(D))
return VD->getTemplateSpecializationKind() ==
TSK_ExplicitSpecialization;
llvm_unreachable("unknown explicit specialization kind");
},
Kind);
}
bool Sema::hasVisibleExplicitSpecialization(
const NamedDecl *D, llvm::SmallVectorImpl<Module *> *Modules) {
return ::hasAcceptableExplicitSpecialization(*this, D, Modules,
Sema::AcceptableKind::Visible);
}
bool Sema::hasReachableExplicitSpecialization(
const NamedDecl *D, llvm::SmallVectorImpl<Module *> *Modules) {
return ::hasAcceptableExplicitSpecialization(*this, D, Modules,
Sema::AcceptableKind::Reachable);
}
static bool
hasAcceptableMemberSpecialization(Sema &S, const NamedDecl *D,
llvm::SmallVectorImpl<Module *> *Modules,
Sema::AcceptableKind Kind) {
assert(isa<CXXRecordDecl>(D->getDeclContext()) &&
"not a member specialization");
return hasAcceptableDeclarationImpl(
S, D, Modules,
[](const NamedDecl *D) {
// If the specialization is declared at namespace scope, then it's a
// member specialization declaration. If it's lexically inside the class
// definition then it was instantiated.
//
// FIXME: This is a hack. There should be a better way to determine
// this.
// FIXME: What about MS-style explicit specializations declared within a
// class definition?
return D->getLexicalDeclContext()->isFileContext();
},
Kind);
}
bool Sema::hasVisibleMemberSpecialization(
const NamedDecl *D, llvm::SmallVectorImpl<Module *> *Modules) {
return hasAcceptableMemberSpecialization(*this, D, Modules,
Sema::AcceptableKind::Visible);
}
bool Sema::hasReachableMemberSpecialization(
const NamedDecl *D, llvm::SmallVectorImpl<Module *> *Modules) {
return hasAcceptableMemberSpecialization(*this, D, Modules,
Sema::AcceptableKind::Reachable);
}
/// Determine whether a declaration is acceptable to name lookup.
///
/// This routine determines whether the declaration D is acceptable in the
/// current lookup context, taking into account the current template
/// instantiation stack. During template instantiation, a declaration is
/// acceptable if it is acceptable from a module containing any entity on the
/// template instantiation path (by instantiating a template, you allow it to
/// see the declarations that your module can see, including those later on in
/// your module).
bool LookupResult::isAcceptableSlow(Sema &SemaRef, NamedDecl *D,
Sema::AcceptableKind Kind) {
assert(!D->isUnconditionallyVisible() &&
"should not call this: not in slow case");
Module *DeclModule = SemaRef.getOwningModule(D);
assert(DeclModule && "hidden decl has no owning module");
// If the owning module is visible, the decl is acceptable.
if (SemaRef.isModuleVisible(DeclModule,
D->isInvisibleOutsideTheOwningModule()))
return true;
// Determine whether a decl context is a file context for the purpose of
// visibility/reachability. This looks through some (export and linkage spec)
// transparent contexts, but not others (enums).
auto IsEffectivelyFileContext = [](const DeclContext *DC) {
return DC->isFileContext() || isa<LinkageSpecDecl>(DC) ||
isa<ExportDecl>(DC);
};
// If this declaration is not at namespace scope
// then it is acceptable if its lexical parent has a acceptable definition.
DeclContext *DC = D->getLexicalDeclContext();
if (DC && !IsEffectivelyFileContext(DC)) {
// For a parameter, check whether our current template declaration's
// lexical context is acceptable, not whether there's some other acceptable
// definition of it, because parameters aren't "within" the definition.
//
// In C++ we need to check for a acceptable definition due to ODR merging,
// and in C we must not because each declaration of a function gets its own
// set of declarations for tags in prototype scope.
bool AcceptableWithinParent;
if (D->isTemplateParameter()) {
bool SearchDefinitions = true;
if (const auto *DCD = dyn_cast<Decl>(DC)) {
if (const auto *TD = DCD->getDescribedTemplate()) {
TemplateParameterList *TPL = TD->getTemplateParameters();
auto Index = getDepthAndIndex(D).second;
SearchDefinitions = Index >= TPL->size() || TPL->getParam(Index) != D;
}
}
if (SearchDefinitions)
AcceptableWithinParent =
SemaRef.hasAcceptableDefinition(cast<NamedDecl>(DC), Kind);
else
AcceptableWithinParent =
isAcceptable(SemaRef, cast<NamedDecl>(DC), Kind);
} else if (isa<ParmVarDecl>(D) ||
(isa<FunctionDecl>(DC) && !SemaRef.getLangOpts().CPlusPlus))
AcceptableWithinParent = isAcceptable(SemaRef, cast<NamedDecl>(DC), Kind);
else if (D->isModulePrivate()) {
// A module-private declaration is only acceptable if an enclosing lexical
// parent was merged with another definition in the current module.
AcceptableWithinParent = false;
do {
if (SemaRef.hasMergedDefinitionInCurrentModule(cast<NamedDecl>(DC))) {
AcceptableWithinParent = true;
break;
}
DC = DC->getLexicalParent();
} while (!IsEffectivelyFileContext(DC));
} else {
AcceptableWithinParent =
SemaRef.hasAcceptableDefinition(cast<NamedDecl>(DC), Kind);
}
if (AcceptableWithinParent && SemaRef.CodeSynthesisContexts.empty() &&
Kind == Sema::AcceptableKind::Visible &&
// FIXME: Do something better in this case.
!SemaRef.getLangOpts().ModulesLocalVisibility) {
// Cache the fact that this declaration is implicitly visible because
// its parent has a visible definition.
D->setVisibleDespiteOwningModule();
}
return AcceptableWithinParent;
}
if (Kind == Sema::AcceptableKind::Visible)
return false;
assert(Kind == Sema::AcceptableKind::Reachable &&
"Additional Sema::AcceptableKind?");
return isReachableSlow(SemaRef, D);
}
bool Sema::isModuleVisible(const Module *M, bool ModulePrivate) {
// [module.global.frag]p2:
// A global-module-fragment specifies the contents of the global module
// fragment for a module unit. The global module fragment can be used to
// provide declarations that are attached to the global module and usable
// within the module unit.
//
// Global module fragment is special. Global Module fragment is only usable
// within the module unit it got defined [module.global.frag]p2. So here we
// check if the Module is the global module fragment in current translation
// unit.
if (M->isGlobalModule() && M != this->GlobalModuleFragment)
return false;
// The module might be ordinarily visible. For a module-private query, that
// means it is part of the current module.
if (ModulePrivate && isUsableModule(M))
return true;
// For a query which is not module-private, that means it is in our visible
// module set.
if (!ModulePrivate && VisibleModules.isVisible(M))
return true;
// Otherwise, it might be visible by virtue of the query being within a
// template instantiation or similar that is permitted to look inside M.
// Find the extra places where we need to look.
const auto &LookupModules = getLookupModules();
if (LookupModules.empty())
return false;
// If our lookup set contains the module, it's visible.
if (LookupModules.count(M))
return true;
// For a module-private query, that's everywhere we get to look.
if (ModulePrivate)
return false;
// Check whether M is transitively exported to an import of the lookup set.
return llvm::any_of(LookupModules, [&](const Module *LookupM) {
return LookupM->isModuleVisible(M);
});
}
// FIXME: Return false directly if we don't have an interface dependency on the
// translation unit containing D.
bool LookupResult::isReachableSlow(Sema &SemaRef, NamedDecl *D) {
assert(!isVisible(SemaRef, D) && "Shouldn't call the slow case.\n");
Module *DeclModule = SemaRef.getOwningModule(D);
assert(DeclModule && "hidden decl has no owning module");
// Entities in module map modules are reachable only if they're visible.
if (DeclModule->isModuleMapModule())
return false;
// If D comes from a module and SemaRef doesn't own a module, it implies D
// comes from another TU. In case SemaRef owns a module, we could judge if D
// comes from another TU by comparing the module unit.
//
// FIXME: It would look better if we have direct method to judge whether D is
// in another TU.
if (SemaRef.getCurrentModule() &&
SemaRef.getCurrentModule()->getTopLevelModule() ==
DeclModule->getTopLevelModule())
return true;
// [module.reach]/p3:
// A declaration D is reachable from a point P if:
// ...
// - D is not discarded ([module.global.frag]), appears in a translation unit
// that is reachable from P, and does not appear within a private module
// fragment.
//
// A declaration that's discarded in the GMF should be module-private.
if (D->isModulePrivate())
return false;
// [module.reach]/p1
// A translation unit U is necessarily reachable from a point P if U is a
// module interface unit on which the translation unit containing P has an
// interface dependency, or the translation unit containing P imports U, in
// either case prior to P ([module.import]).
//
// [module.import]/p10
// A translation unit has an interface dependency on a translation unit U if
// it contains a declaration (possibly a module-declaration) that imports U
// or if it has an interface dependency on a translation unit that has an
// interface dependency on U.
//
// So we could conclude the module unit U is necessarily reachable if:
// (1) The module unit U is module interface unit.
// (2) The current unit has an interface dependency on the module unit U.
//
// Here we only check for the first condition. Since we couldn't see
// DeclModule if it isn't (transitively) imported.
if (DeclModule->getTopLevelModule()->isModuleInterfaceUnit())
return true;
// [module.reach]/p2
// Additional translation units on
// which the point within the program has an interface dependency may be
// considered reachable, but it is unspecified which are and under what
// circumstances.
//
// The decision here is to treat all additional tranditional units as
// unreachable.
return false;
}
bool Sema::isAcceptableSlow(const NamedDecl *D, Sema::AcceptableKind Kind) {
return LookupResult::isAcceptable(*this, const_cast<NamedDecl *>(D), Kind);
}
bool Sema::shouldLinkPossiblyHiddenDecl(LookupResult &R, const NamedDecl *New) {
// FIXME: If there are both visible and hidden declarations, we need to take
// into account whether redeclaration is possible. Example:
//
// Non-imported module:
// int f(T); // #1
// Some TU:
// static int f(U); // #2, not a redeclaration of #1
// int f(T); // #3, finds both, should link with #1 if T != U, but
// // with #2 if T == U; neither should be ambiguous.
for (auto *D : R) {
if (isVisible(D))
return true;
assert(D->isExternallyDeclarable() &&
"should not have hidden, non-externally-declarable result here");
}
// This function is called once "New" is essentially complete, but before a
// previous declaration is attached. We can't query the linkage of "New" in
// general, because attaching the previous declaration can change the
// linkage of New to match the previous declaration.
//
// However, because we've just determined that there is no *visible* prior
// declaration, we can compute the linkage here. There are two possibilities:
//
// * This is not a redeclaration; it's safe to compute the linkage now.
//
// * This is a redeclaration of a prior declaration that is externally
// redeclarable. In that case, the linkage of the declaration is not
// changed by attaching the prior declaration, because both are externally
// declarable (and thus ExternalLinkage or VisibleNoLinkage).
//
// FIXME: This is subtle and fragile.
return New->isExternallyDeclarable();
}
/// Retrieve the visible declaration corresponding to D, if any.
///
/// This routine determines whether the declaration D is visible in the current
/// module, with the current imports. If not, it checks whether any
/// redeclaration of D is visible, and if so, returns that declaration.
///
/// \returns D, or a visible previous declaration of D, whichever is more recent
/// and visible. If no declaration of D is visible, returns null.
static NamedDecl *findAcceptableDecl(Sema &SemaRef, NamedDecl *D,
unsigned IDNS) {
assert(!LookupResult::isAvailableForLookup(SemaRef, D) && "not in slow case");
for (auto RD : D->redecls()) {
// Don't bother with extra checks if we already know this one isn't visible.
if (RD == D)
continue;
auto ND = cast<NamedDecl>(RD);
// FIXME: This is wrong in the case where the previous declaration is not
// visible in the same scope as D. This needs to be done much more
// carefully.
if (ND->isInIdentifierNamespace(IDNS) &&
LookupResult::isAvailableForLookup(SemaRef, ND))
return ND;
}
return nullptr;
}
bool Sema::hasVisibleDeclarationSlow(const NamedDecl *D,
llvm::SmallVectorImpl<Module *> *Modules) {
assert(!isVisible(D) && "not in slow case");
return hasAcceptableDeclarationImpl(
*this, D, Modules, [](const NamedDecl *) { return true; },
Sema::AcceptableKind::Visible);
}
bool Sema::hasReachableDeclarationSlow(
const NamedDecl *D, llvm::SmallVectorImpl<Module *> *Modules) {
assert(!isReachable(D) && "not in slow case");
return hasAcceptableDeclarationImpl(
*this, D, Modules, [](const NamedDecl *) { return true; },
Sema::AcceptableKind::Reachable);
}
NamedDecl *LookupResult::getAcceptableDeclSlow(NamedDecl *D) const {
if (auto *ND = dyn_cast<NamespaceDecl>(D)) {
// Namespaces are a bit of a special case: we expect there to be a lot of
// redeclarations of some namespaces, all declarations of a namespace are
// essentially interchangeable, all declarations are found by name lookup
// if any is, and namespaces are never looked up during template
// instantiation. So we benefit from caching the check in this case, and
// it is correct to do so.
auto *Key = ND->getCanonicalDecl();
if (auto *Acceptable = getSema().VisibleNamespaceCache.lookup(Key))
return Acceptable;
auto *Acceptable = isVisible(getSema(), Key)
? Key
: findAcceptableDecl(getSema(), Key, IDNS);
if (Acceptable)
getSema().VisibleNamespaceCache.insert(std::make_pair(Key, Acceptable));
return Acceptable;
}
return findAcceptableDecl(getSema(), D, IDNS);
}
bool LookupResult::isVisible(Sema &SemaRef, NamedDecl *D) {
// If this declaration is already visible, return it directly.
if (D->isUnconditionallyVisible())
return true;
// During template instantiation, we can refer to hidden declarations, if
// they were visible in any module along the path of instantiation.
return isAcceptableSlow(SemaRef, D, Sema::AcceptableKind::Visible);
}
bool LookupResult::isReachable(Sema &SemaRef, NamedDecl *D) {
if (D->isUnconditionallyVisible())
return true;
return isAcceptableSlow(SemaRef, D, Sema::AcceptableKind::Reachable);
}
bool LookupResult::isAvailableForLookup(Sema &SemaRef, NamedDecl *ND) {
// We should check the visibility at the callsite already.
if (isVisible(SemaRef, ND))
return true;
// Deduction guide lives in namespace scope generally, but it is just a
// hint to the compilers. What we actually lookup for is the generated member
// of the corresponding template. So it is sufficient to check the
// reachability of the template decl.
if (auto *DeductionGuide = ND->getDeclName().getCXXDeductionGuideTemplate())
return SemaRef.hasReachableDefinition(DeductionGuide);
auto *DC = ND->getDeclContext();
// If ND is not visible and it is at namespace scope, it shouldn't be found
// by name lookup.
if (DC->isFileContext())
return false;
// [module.interface]p7
// Class and enumeration member names can be found by name lookup in any
// context in which a definition of the type is reachable.
//
// FIXME: The current implementation didn't consider about scope. For example,
// ```
// // m.cppm
// export module m;
// enum E1 { e1 };
// // Use.cpp
// import m;
// void test() {
// auto a = E1::e1; // Error as expected.
// auto b = e1; // Should be error. namespace-scope name e1 is not visible
// }
// ```
// For the above example, the current implementation would emit error for `a`
// correctly. However, the implementation wouldn't diagnose about `b` now.
// Since we only check the reachability for the parent only.
// See clang/test/CXX/module/module.interface/p7.cpp for example.
if (auto *TD = dyn_cast<TagDecl>(DC))
return SemaRef.hasReachableDefinition(TD);
return false;
}
/// Perform unqualified name lookup starting from a given
/// scope.
///
/// Unqualified name lookup (C++ [basic.lookup.unqual], C99 6.2.1) is
/// used to find names within the current scope. For example, 'x' in
/// @code
/// int x;
/// int f() {
/// return x; // unqualified name look finds 'x' in the global scope
/// }
/// @endcode
///
/// Different lookup criteria can find different names. For example, a
/// particular scope can have both a struct and a function of the same
/// name, and each can be found by certain lookup criteria. For more
/// information about lookup criteria, see the documentation for the
/// class LookupCriteria.
///
/// @param S The scope from which unqualified name lookup will
/// begin. If the lookup criteria permits, name lookup may also search
/// in the parent scopes.
///
/// @param [in,out] R Specifies the lookup to perform (e.g., the name to
/// look up and the lookup kind), and is updated with the results of lookup
/// including zero or more declarations and possibly additional information
/// used to diagnose ambiguities.
///
/// @returns \c true if lookup succeeded and false otherwise.
bool Sema::LookupName(LookupResult &R, Scope *S, bool AllowBuiltinCreation,
bool ForceNoCPlusPlus) {
DeclarationName Name = R.getLookupName();
if (!Name) return false;
LookupNameKind NameKind = R.getLookupKind();
if (!getLangOpts().CPlusPlus || ForceNoCPlusPlus) {
// Unqualified name lookup in C/Objective-C is purely lexical, so
// search in the declarations attached to the name.
if (NameKind == Sema::LookupRedeclarationWithLinkage) {
// Find the nearest non-transparent declaration scope.
while (!(S->getFlags() & Scope::DeclScope) ||
(S->getEntity() && S->getEntity()->isTransparentContext()))
S = S->getParent();
}
// When performing a scope lookup, we want to find local extern decls.
FindLocalExternScope FindLocals(R);
// Scan up the scope chain looking for a decl that matches this
// identifier that is in the appropriate namespace. This search
// should not take long, as shadowing of names is uncommon, and
// deep shadowing is extremely uncommon.
bool LeftStartingScope = false;
for (IdentifierResolver::iterator I = IdResolver.begin(Name),
IEnd = IdResolver.end();
I != IEnd; ++I)
if (NamedDecl *D = R.getAcceptableDecl(*I)) {
if (NameKind == LookupRedeclarationWithLinkage) {
// Determine whether this (or a previous) declaration is
// out-of-scope.
if (!LeftStartingScope && !S->isDeclScope(*I))
LeftStartingScope = true;
// If we found something outside of our starting scope that
// does not have linkage, skip it.
if (LeftStartingScope && !((*I)->hasLinkage())) {
R.setShadowed();
continue;
}
}
else if (NameKind == LookupObjCImplicitSelfParam &&
!isa<ImplicitParamDecl>(*I))
continue;
R.addDecl(D);
// Check whether there are any other declarations with the same name
// and in the same scope.
if (I != IEnd) {
// Find the scope in which this declaration was declared (if it
// actually exists in a Scope).
while (S && !S->isDeclScope(D))
S = S->getParent();
// If the scope containing the declaration is the translation unit,
// then we'll need to perform our checks based on the matching
// DeclContexts rather than matching scopes.
if (S && isNamespaceOrTranslationUnitScope(S))
S = nullptr;
// Compute the DeclContext, if we need it.
DeclContext *DC = nullptr;
if (!S)
DC = (*I)->getDeclContext()->getRedeclContext();
IdentifierResolver::iterator LastI = I;
for (++LastI; LastI != IEnd; ++LastI) {
if (S) {
// Match based on scope.
if (!S->isDeclScope(*LastI))
break;
} else {
// Match based on DeclContext.
DeclContext *LastDC
= (*LastI)->getDeclContext()->getRedeclContext();
if (!LastDC->Equals(DC))
break;
}
// If the declaration is in the right namespace and visible, add it.
if (NamedDecl *LastD = R.getAcceptableDecl(*LastI))
R.addDecl(LastD);
}
R.resolveKind();
}
return true;
}
} else {
// Perform C++ unqualified name lookup.
if (CppLookupName(R, S))
return true;
}
// If we didn't find a use of this identifier, and if the identifier
// corresponds to a compiler builtin, create the decl object for the builtin
// now, injecting it into translation unit scope, and return it.
if (AllowBuiltinCreation && LookupBuiltin(R))
return true;
// If we didn't find a use of this identifier, the ExternalSource
// may be able to handle the situation.
// Note: some lookup failures are expected!
// See e.g. R.isForRedeclaration().
return (ExternalSource && ExternalSource->LookupUnqualified(R, S));
}
/// Perform qualified name lookup in the namespaces nominated by
/// using directives by the given context.
///
/// C++98 [namespace.qual]p2:
/// Given X::m (where X is a user-declared namespace), or given \::m
/// (where X is the global namespace), let S be the set of all
/// declarations of m in X and in the transitive closure of all
/// namespaces nominated by using-directives in X and its used
/// namespaces, except that using-directives are ignored in any
/// namespace, including X, directly containing one or more
/// declarations of m. No namespace is searched more than once in
/// the lookup of a name. If S is the empty set, the program is
/// ill-formed. Otherwise, if S has exactly one member, or if the
/// context of the reference is a using-declaration
/// (namespace.udecl), S is the required set of declarations of
/// m. Otherwise if the use of m is not one that allows a unique
/// declaration to be chosen from S, the program is ill-formed.
///
/// C++98 [namespace.qual]p5:
/// During the lookup of a qualified namespace member name, if the
/// lookup finds more than one declaration of the member, and if one
/// declaration introduces a class name or enumeration name and the
/// other declarations either introduce the same object, the same
/// enumerator or a set of functions, the non-type name hides the
/// class or enumeration name if and only if the declarations are
/// from the same namespace; otherwise (the declarations are from
/// different namespaces), the program is ill-formed.
static bool LookupQualifiedNameInUsingDirectives(Sema &S, LookupResult &R,
DeclContext *StartDC) {
assert(StartDC->isFileContext() && "start context is not a file context");
// We have not yet looked into these namespaces, much less added
// their "using-children" to the queue.
SmallVector<NamespaceDecl*, 8> Queue;
// We have at least added all these contexts to the queue.
llvm::SmallPtrSet<DeclContext*, 8> Visited;
Visited.insert(StartDC);
// We have already looked into the initial namespace; seed the queue
// with its using-children.
for (auto *I : StartDC->using_directives()) {
NamespaceDecl *ND = I->getNominatedNamespace()->getOriginalNamespace();
if (S.isVisible(I) && Visited.insert(ND).second)
Queue.push_back(ND);
}
// The easiest way to implement the restriction in [namespace.qual]p5
// is to check whether any of the individual results found a tag
// and, if so, to declare an ambiguity if the final result is not
// a tag.
bool FoundTag = false;
bool FoundNonTag = false;
LookupResult LocalR(LookupResult::Temporary, R);
bool Found = false;
while (!Queue.empty()) {
NamespaceDecl *ND = Queue.pop_back_val();
// We go through some convolutions here to avoid copying results
// between LookupResults.
bool UseLocal = !R.empty();
LookupResult &DirectR = UseLocal ? LocalR : R;
bool FoundDirect = LookupDirect(S, DirectR, ND);
if (FoundDirect) {
// First do any local hiding.
DirectR.resolveKind();
// If the local result is a tag, remember that.
if (DirectR.isSingleTagDecl())
FoundTag = true;
else
FoundNonTag = true;
// Append the local results to the total results if necessary.
if (UseLocal) {
R.addAllDecls(LocalR);
LocalR.clear();
}
}
// If we find names in this namespace, ignore its using directives.
if (FoundDirect) {
Found = true;
continue;
}
for (auto I : ND->using_directives()) {
NamespaceDecl *Nom = I->getNominatedNamespace();
if (S.isVisible(I) && Visited.insert(Nom).second)
Queue.push_back(Nom);
}
}
if (Found) {
if (FoundTag && FoundNonTag)
R.setAmbiguousQualifiedTagHiding();
else
R.resolveKind();
}
return Found;
}
/// Perform qualified name lookup into a given context.
///
/// Qualified name lookup (C++ [basic.lookup.qual]) is used to find
/// names when the context of those names is explicit specified, e.g.,
/// "std::vector" or "x->member", or as part of unqualified name lookup.
///
/// Different lookup criteria can find different names. For example, a
/// particular scope can have both a struct and a function of the same
/// name, and each can be found by certain lookup criteria. For more
/// information about lookup criteria, see the documentation for the
/// class LookupCriteria.
///
/// \param R captures both the lookup criteria and any lookup results found.
///
/// \param LookupCtx The context in which qualified name lookup will
/// search. If the lookup criteria permits, name lookup may also search
/// in the parent contexts or (for C++ classes) base classes.
///
/// \param InUnqualifiedLookup true if this is qualified name lookup that
/// occurs as part of unqualified name lookup.
///
/// \returns true if lookup succeeded, false if it failed.
bool Sema::LookupQualifiedName(LookupResult &R, DeclContext *LookupCtx,
bool InUnqualifiedLookup) {
assert(LookupCtx && "Sema::LookupQualifiedName requires a lookup context");
if (!R.getLookupName())
return false;
// Make sure that the declaration context is complete.
assert((!isa<TagDecl>(LookupCtx) ||
LookupCtx->isDependentContext() ||
cast<TagDecl>(LookupCtx)->isCompleteDefinition() ||
cast<TagDecl>(LookupCtx)->isBeingDefined()) &&
"Declaration context must already be complete!");
struct QualifiedLookupInScope {
bool oldVal;
DeclContext *Context;
// Set flag in DeclContext informing debugger that we're looking for qualified name
QualifiedLookupInScope(DeclContext *ctx) : Context(ctx) {
oldVal = ctx->setUseQualifiedLookup();
}
~QualifiedLookupInScope() {
Context->setUseQualifiedLookup(oldVal);
}
} QL(LookupCtx);
if (LookupDirect(*this, R, LookupCtx)) {
R.resolveKind();
if (isa<CXXRecordDecl>(LookupCtx))
R.setNamingClass(cast<CXXRecordDecl>(LookupCtx));
return true;
}
// Don't descend into implied contexts for redeclarations.
// C++98 [namespace.qual]p6:
// In a declaration for a namespace member in which the
// declarator-id is a qualified-id, given that the qualified-id
// for the namespace member has the form
// nested-name-specifier unqualified-id
// the unqualified-id shall name a member of the namespace
// designated by the nested-name-specifier.
// See also [class.mfct]p5 and [class.static.data]p2.
if (R.isForRedeclaration())
return false;
// If this is a namespace, look it up in the implied namespaces.
if (LookupCtx->isFileContext())
return LookupQualifiedNameInUsingDirectives(*this, R, LookupCtx);
// If this isn't a C++ class, we aren't allowed to look into base
// classes, we're done.
CXXRecordDecl *LookupRec = dyn_cast<CXXRecordDecl>(LookupCtx);
if (!LookupRec || !LookupRec->getDefinition())
return false;
// We're done for lookups that can never succeed for C++ classes.
if (R.getLookupKind() == LookupOperatorName ||
R.getLookupKind() == LookupNamespaceName ||
R.getLookupKind() == LookupObjCProtocolName ||
R.getLookupKind() == LookupLabel)
return false;
// If we're performing qualified name lookup into a dependent class,
// then we are actually looking into a current instantiation. If we have any
// dependent base classes, then we either have to delay lookup until
// template instantiation time (at which point all bases will be available)
// or we have to fail.
if (!InUnqualifiedLookup && LookupRec->isDependentContext() &&
LookupRec->hasAnyDependentBases()) {
R.setNotFoundInCurrentInstantiation();
return false;
}
// Perform lookup into our base classes.
DeclarationName Name = R.getLookupName();
unsigned IDNS = R.getIdentifierNamespace();
// Look for this member in our base classes.
auto BaseCallback = [Name, IDNS](const CXXBaseSpecifier *Specifier,
CXXBasePath &Path) -> bool {
CXXRecordDecl *BaseRecord = Specifier->getType()->getAsCXXRecordDecl();
// Drop leading non-matching lookup results from the declaration list so
// we don't need to consider them again below.
for (Path.Decls = BaseRecord->lookup(Name).begin();
Path.Decls != Path.Decls.end(); ++Path.Decls) {
if ((*Path.Decls)->isInIdentifierNamespace(IDNS))
return true;
}
return false;
};
CXXBasePaths Paths;
Paths.setOrigin(LookupRec);
if (!LookupRec->lookupInBases(BaseCallback, Paths))
return false;
R.setNamingClass(LookupRec);
// C++ [class.member.lookup]p2:
// [...] If the resulting set of declarations are not all from
// sub-objects of the same type, or the set has a nonstatic member
// and includes members from distinct sub-objects, there is an
// ambiguity and the program is ill-formed. Otherwise that set is
// the result of the lookup.
QualType SubobjectType;
int SubobjectNumber = 0;
AccessSpecifier SubobjectAccess = AS_none;
// Check whether the given lookup result contains only static members.
auto HasOnlyStaticMembers = [&](DeclContext::lookup_iterator Result) {
for (DeclContext::lookup_iterator I = Result, E = I.end(); I != E; ++I)
if ((*I)->isInIdentifierNamespace(IDNS) && (*I)->isCXXInstanceMember())
return false;
return true;
};
bool TemplateNameLookup = R.isTemplateNameLookup();
// Determine whether two sets of members contain the same members, as
// required by C++ [class.member.lookup]p6.
auto HasSameDeclarations = [&](DeclContext::lookup_iterator A,
DeclContext::lookup_iterator B) {
using Iterator = DeclContextLookupResult::iterator;
using Result = const void *;
auto Next = [&](Iterator &It, Iterator End) -> Result {
while (It != End) {
NamedDecl *ND = *It++;
if (!ND->isInIdentifierNamespace(IDNS))
continue;
// C++ [temp.local]p3:
// A lookup that finds an injected-class-name (10.2) can result in
// an ambiguity in certain cases (for example, if it is found in
// more than one base class). If all of the injected-class-names
// that are found refer to specializations of the same class
// template, and if the name is used as a template-name, the
// reference refers to the class template itself and not a
// specialization thereof, and is not ambiguous.
if (TemplateNameLookup)
if (auto *TD = getAsTemplateNameDecl(ND))
ND = TD;
// C++ [class.member.lookup]p3:
// type declarations (including injected-class-names) are replaced by
// the types they designate
if (const TypeDecl *TD = dyn_cast<TypeDecl>(ND->getUnderlyingDecl())) {
QualType T = Context.getTypeDeclType(TD);
return T.getCanonicalType().getAsOpaquePtr();
}
return ND->getUnderlyingDecl()->getCanonicalDecl();
}
return nullptr;
};
// We'll often find the declarations are in the same order. Handle this
// case (and the special case of only one declaration) efficiently.
Iterator AIt = A, BIt = B, AEnd, BEnd;
while (true) {
Result AResult = Next(AIt, AEnd);
Result BResult = Next(BIt, BEnd);
if (!AResult && !BResult)
return true;
if (!AResult || !BResult)
return false;
if (AResult != BResult) {
// Found a mismatch; carefully check both lists, accounting for the
// possibility of declarations appearing more than once.
llvm::SmallDenseMap<Result, bool, 32> AResults;
for (; AResult; AResult = Next(AIt, AEnd))
AResults.insert({AResult, /*FoundInB*/false});
unsigned Found = 0;
for (; BResult; BResult = Next(BIt, BEnd)) {
auto It = AResults.find(BResult);
if (It == AResults.end())
return false;
if (!It->second) {
It->second = true;
++Found;
}
}
return AResults.size() == Found;
}
}
};
for (CXXBasePaths::paths_iterator Path = Paths.begin(), PathEnd = Paths.end();
Path != PathEnd; ++Path) {
const CXXBasePathElement &PathElement = Path->back();
// Pick the best (i.e. most permissive i.e. numerically lowest) access
// across all paths.
SubobjectAccess = std::min(SubobjectAccess, Path->Access);
// Determine whether we're looking at a distinct sub-object or not.
if (SubobjectType.isNull()) {
// This is the first subobject we've looked at. Record its type.
SubobjectType = Context.getCanonicalType(PathElement.Base->getType());
SubobjectNumber = PathElement.SubobjectNumber;
continue;
}
if (SubobjectType !=
Context.getCanonicalType(PathElement.Base->getType())) {
// We found members of the given name in two subobjects of
// different types. If the declaration sets aren't the same, this
// lookup is ambiguous.
//
// FIXME: The language rule says that this applies irrespective of
// whether the sets contain only static members.
if (HasOnlyStaticMembers(Path->Decls) &&
HasSameDeclarations(Paths.begin()->Decls, Path->Decls))
continue;
R.setAmbiguousBaseSubobjectTypes(Paths);
return true;
}
// FIXME: This language rule no longer exists. Checking for ambiguous base
// subobjects should be done as part of formation of a class member access
// expression (when converting the object parameter to the member's type).
if (SubobjectNumber != PathElement.SubobjectNumber) {
// We have a different subobject of the same type.
// C++ [class.member.lookup]p5:
// A static member, a nested type or an enumerator defined in
// a base class T can unambiguously be found even if an object
// has more than one base class subobject of type T.
if (HasOnlyStaticMembers(Path->Decls))
continue;
// We have found a nonstatic member name in multiple, distinct
// subobjects. Name lookup is ambiguous.
R.setAmbiguousBaseSubobjects(Paths);
return true;
}
}
// Lookup in a base class succeeded; return these results.
for (DeclContext::lookup_iterator I = Paths.front().Decls, E = I.end();
I != E; ++I) {
AccessSpecifier AS = CXXRecordDecl::MergeAccess(SubobjectAccess,
(*I)->getAccess());
if (NamedDecl *ND = R.getAcceptableDecl(*I))
R.addDecl(ND, AS);
}
R.resolveKind();
return true;
}
/// Performs qualified name lookup or special type of lookup for
/// "__super::" scope specifier.
///
/// This routine is a convenience overload meant to be called from contexts
/// that need to perform a qualified name lookup with an optional C++ scope
/// specifier that might require special kind of lookup.
///
/// \param R captures both the lookup criteria and any lookup results found.
///
/// \param LookupCtx The context in which qualified name lookup will
/// search.
///
/// \param SS An optional C++ scope-specifier.
///
/// \returns true if lookup succeeded, false if it failed.
bool Sema::LookupQualifiedName(LookupResult &R, DeclContext *LookupCtx,
CXXScopeSpec &SS) {
auto *NNS = SS.getScopeRep();
if (NNS && NNS->getKind() == NestedNameSpecifier::Super)
return LookupInSuper(R, NNS->getAsRecordDecl());
else
return LookupQualifiedName(R, LookupCtx);
}
/// Performs name lookup for a name that was parsed in the
/// source code, and may contain a C++ scope specifier.
///
/// This routine is a convenience routine meant to be called from
/// contexts that receive a name and an optional C++ scope specifier
/// (e.g., "N::M::x"). It will then perform either qualified or
/// unqualified name lookup (with LookupQualifiedName or LookupName,
/// respectively) on the given name and return those results. It will
/// perform a special type of lookup for "__super::" scope specifier.
///
/// @param S The scope from which unqualified name lookup will
/// begin.
///
/// @param SS An optional C++ scope-specifier, e.g., "::N::M".
///
/// @param EnteringContext Indicates whether we are going to enter the
/// context of the scope-specifier SS (if present).
///
/// @returns True if any decls were found (but possibly ambiguous)
bool Sema::LookupParsedName(LookupResult &R, Scope *S, CXXScopeSpec *SS,
bool AllowBuiltinCreation, bool EnteringContext) {
if (SS && SS->isInvalid()) {
// When the scope specifier is invalid, don't even look for
// anything.
return false;
}
if (SS && SS->isSet()) {
NestedNameSpecifier *NNS = SS->getScopeRep();
if (NNS->getKind() == NestedNameSpecifier::Super)
return LookupInSuper(R, NNS->getAsRecordDecl());
if (DeclContext *DC = computeDeclContext(*SS, EnteringContext)) {
// We have resolved the scope specifier to a particular declaration
// contex, and will perform name lookup in that context.
if (!DC->isDependentContext() && RequireCompleteDeclContext(*SS, DC))
return false;
R.setContextRange(SS->getRange());
return LookupQualifiedName(R, DC);
}
// We could not resolve the scope specified to a specific declaration
// context, which means that SS refers to an unknown specialization.
// Name lookup can't find anything in this case.
R.setNotFoundInCurrentInstantiation();
R.setContextRange(SS->getRange());
return false;
}
// Perform unqualified name lookup starting in the given scope.
return LookupName(R, S, AllowBuiltinCreation);
}
/// Perform qualified name lookup into all base classes of the given
/// class.
///
/// \param R captures both the lookup criteria and any lookup results found.
///
/// \param Class The context in which qualified name lookup will
/// search. Name lookup will search in all base classes merging the results.
///
/// @returns True if any decls were found (but possibly ambiguous)
bool Sema::LookupInSuper(LookupResult &R, CXXRecordDecl *Class) {
// The access-control rules we use here are essentially the rules for
// doing a lookup in Class that just magically skipped the direct
// members of Class itself. That is, the naming class is Class, and the
// access includes the access of the base.
for (const auto &BaseSpec : Class->bases()) {
CXXRecordDecl *RD = cast<CXXRecordDecl>(
BaseSpec.getType()->castAs<RecordType>()->getDecl());
LookupResult Result(*this, R.getLookupNameInfo(), R.getLookupKind());
Result.setBaseObjectType(Context.getRecordType(Class));
LookupQualifiedName(Result, RD);
// Copy the lookup results into the target, merging the base's access into
// the path access.
for (auto I = Result.begin(), E = Result.end(); I != E; ++I) {
R.addDecl(I.getDecl(),
CXXRecordDecl::MergeAccess(BaseSpec.getAccessSpecifier(),
I.getAccess()));
}
Result.suppressDiagnostics();
}
R.resolveKind();
R.setNamingClass(Class);
return !R.empty();
}
/// Produce a diagnostic describing the ambiguity that resulted
/// from name lookup.
///
/// \param Result The result of the ambiguous lookup to be diagnosed.
void Sema::DiagnoseAmbiguousLookup(LookupResult &Result) {
assert(Result.isAmbiguous() && "Lookup result must be ambiguous");
DeclarationName Name = Result.getLookupName();
SourceLocation NameLoc = Result.getNameLoc();
SourceRange LookupRange = Result.getContextRange();
switch (Result.getAmbiguityKind()) {
case LookupResult::AmbiguousBaseSubobjects: {
CXXBasePaths *Paths = Result.getBasePaths();
QualType SubobjectType = Paths->front().back().Base->getType();
Diag(NameLoc, diag::err_ambiguous_member_multiple_subobjects)
<< Name << SubobjectType << getAmbiguousPathsDisplayString(*Paths)
<< LookupRange;
DeclContext::lookup_iterator Found = Paths->front().Decls;
while (isa<CXXMethodDecl>(*Found) &&
cast<CXXMethodDecl>(*Found)->isStatic())
++Found;
Diag((*Found)->getLocation(), diag::note_ambiguous_member_found);
break;
}
case LookupResult::AmbiguousBaseSubobjectTypes: {
Diag(NameLoc, diag::err_ambiguous_member_multiple_subobject_types)
<< Name << LookupRange;
CXXBasePaths *Paths = Result.getBasePaths();
std::set<const NamedDecl *> DeclsPrinted;
for (CXXBasePaths::paths_iterator Path = Paths->begin(),
PathEnd = Paths->end();
Path != PathEnd; ++Path) {
const NamedDecl *D = *Path->Decls;
if (!D->isInIdentifierNamespace(Result.getIdentifierNamespace()))
continue;
if (DeclsPrinted.insert(D).second) {
if (const auto *TD = dyn_cast<TypedefNameDecl>(D->getUnderlyingDecl()))
Diag(D->getLocation(), diag::note_ambiguous_member_type_found)
<< TD->getUnderlyingType();
else if (const auto *TD = dyn_cast<TypeDecl>(D->getUnderlyingDecl()))
Diag(D->getLocation(), diag::note_ambiguous_member_type_found)
<< Context.getTypeDeclType(TD);
else
Diag(D->getLocation(), diag::note_ambiguous_member_found);
}
}
break;
}
case LookupResult::AmbiguousTagHiding: {
Diag(NameLoc, diag::err_ambiguous_tag_hiding) << Name << LookupRange;
llvm::SmallPtrSet<NamedDecl*, 8> TagDecls;
for (auto *D : Result)
if (TagDecl *TD = dyn_cast<TagDecl>(D)) {
TagDecls.insert(TD);
Diag(TD->getLocation(), diag::note_hidden_tag);
}
for (auto *D : Result)
if (!isa<TagDecl>(D))
Diag(D->getLocation(), diag::note_hiding_object);
// For recovery purposes, go ahead and implement the hiding.
LookupResult::Filter F = Result.makeFilter();
while (F.hasNext()) {
if (TagDecls.count(F.next()))
F.erase();
}
F.done();
break;
}
case LookupResult::AmbiguousReference: {
Diag(NameLoc, diag::err_ambiguous_reference) << Name << LookupRange;
for (auto *D : Result)
Diag(D->getLocation(), diag::note_ambiguous_candidate) << D;
break;
}
}
}
namespace {
struct AssociatedLookup {
AssociatedLookup(Sema &S, SourceLocation InstantiationLoc,
Sema::AssociatedNamespaceSet &Namespaces,
Sema::AssociatedClassSet &Classes)
: S(S), Namespaces(Namespaces), Classes(Classes),
InstantiationLoc(InstantiationLoc) {
}
bool addClassTransitive(CXXRecordDecl *RD) {
Classes.insert(RD);
return ClassesTransitive.insert(RD);
}
Sema &S;
Sema::AssociatedNamespaceSet &Namespaces;
Sema::AssociatedClassSet &Classes;
SourceLocation InstantiationLoc;
private:
Sema::AssociatedClassSet ClassesTransitive;
};
} // end anonymous namespace
static void
addAssociatedClassesAndNamespaces(AssociatedLookup &Result, QualType T);
// Given the declaration context \param Ctx of a class, class template or
// enumeration, add the associated namespaces to \param Namespaces as described
// in [basic.lookup.argdep]p2.
static void CollectEnclosingNamespace(Sema::AssociatedNamespaceSet &Namespaces,
DeclContext *Ctx) {
// The exact wording has been changed in C++14 as a result of
// CWG 1691 (see also CWG 1690 and CWG 1692). We apply it unconditionally
// to all language versions since it is possible to return a local type
// from a lambda in C++11.
//
// C++14 [basic.lookup.argdep]p2:
// If T is a class type [...]. Its associated namespaces are the innermost
// enclosing namespaces of its associated classes. [...]
//
// If T is an enumeration type, its associated namespace is the innermost
// enclosing namespace of its declaration. [...]
// We additionally skip inline namespaces. The innermost non-inline namespace
// contains all names of all its nested inline namespaces anyway, so we can
// replace the entire inline namespace tree with its root.
while (!Ctx->isFileContext() || Ctx->isInlineNamespace())
Ctx = Ctx->getParent();
Namespaces.insert(Ctx->getPrimaryContext());
}
// Add the associated classes and namespaces for argument-dependent
// lookup that involves a template argument (C++ [basic.lookup.argdep]p2).
static void
addAssociatedClassesAndNamespaces(AssociatedLookup &Result,
const TemplateArgument &Arg) {
// C++ [basic.lookup.argdep]p2, last bullet:
// -- [...] ;
switch (Arg.getKind()) {
case TemplateArgument::Null:
break;
case TemplateArgument::Type:
// [...] the namespaces and classes associated with the types of the
// template arguments provided for template type parameters (excluding
// template template parameters)
addAssociatedClassesAndNamespaces(Result, Arg.getAsType());
break;
case TemplateArgument::Template:
case TemplateArgument::TemplateExpansion: {
// [...] the namespaces in which any template template arguments are
// defined; and the classes in which any member templates used as
// template template arguments are defined.
TemplateName Template = Arg.getAsTemplateOrTemplatePattern();
if (ClassTemplateDecl *ClassTemplate
= dyn_cast<ClassTemplateDecl>(Template.getAsTemplateDecl())) {
DeclContext *Ctx = ClassTemplate->getDeclContext();
if (CXXRecordDecl *EnclosingClass = dyn_cast<CXXRecordDecl>(Ctx))
Result.Classes.insert(EnclosingClass);
// Add the associated namespace for this class.
CollectEnclosingNamespace(Result.Namespaces, Ctx);
}
break;
}
case TemplateArgument::Declaration:
case TemplateArgument::Integral:
case TemplateArgument::Expression:
case TemplateArgument::NullPtr:
// [Note: non-type template arguments do not contribute to the set of
// associated namespaces. ]
break;
case TemplateArgument::Pack:
for (const auto &P : Arg.pack_elements())
addAssociatedClassesAndNamespaces(Result, P);
break;
}
}
// Add the associated classes and namespaces for argument-dependent lookup
// with an argument of class type (C++ [basic.lookup.argdep]p2).
static void
addAssociatedClassesAndNamespaces(AssociatedLookup &Result,
CXXRecordDecl *Class) {
// Just silently ignore anything whose name is __va_list_tag.
if (Class->getDeclName() == Result.S.VAListTagName)
return;
// C++ [basic.lookup.argdep]p2:
// [...]
// -- If T is a class type (including unions), its associated
// classes are: the class itself; the class of which it is a
// member, if any; and its direct and indirect base classes.
// Its associated namespaces are the innermost enclosing
// namespaces of its associated classes.
// Add the class of which it is a member, if any.
DeclContext *Ctx = Class->getDeclContext();
if (CXXRecordDecl *EnclosingClass = dyn_cast<CXXRecordDecl>(Ctx))
Result.Classes.insert(EnclosingClass);
// Add the associated namespace for this class.
CollectEnclosingNamespace(Result.Namespaces, Ctx);
// -- If T is a template-id, its associated namespaces and classes are
// the namespace in which the template is defined; for member
// templates, the member template's class; the namespaces and classes
// associated with the types of the template arguments provided for
// template type parameters (excluding template template parameters); the
// namespaces in which any template template arguments are defined; and
// the classes in which any member templates used as template template
// arguments are defined. [Note: non-type template arguments do not
// contribute to the set of associated namespaces. ]
if (ClassTemplateSpecializationDecl *Spec
= dyn_cast<ClassTemplateSpecializationDecl>(Class)) {
DeclContext *Ctx = Spec->getSpecializedTemplate()->getDeclContext();
if (CXXRecordDecl *EnclosingClass = dyn_cast<CXXRecordDecl>(Ctx))
Result.Classes.insert(EnclosingClass);
// Add the associated namespace for this class.
CollectEnclosingNamespace(Result.Namespaces, Ctx);
const TemplateArgumentList &TemplateArgs = Spec->getTemplateArgs();
for (unsigned I = 0, N = TemplateArgs.size(); I != N; ++I)
addAssociatedClassesAndNamespaces(Result, TemplateArgs[I]);
}
// Add the class itself. If we've already transitively visited this class,
// we don't need to visit base classes.
if (!Result.addClassTransitive(Class))
return;
// Only recurse into base classes for complete types.
if (!Result.S.isCompleteType(Result.InstantiationLoc,
Result.S.Context.getRecordType(Class)))
return;
// Add direct and indirect base classes along with their associated
// namespaces.
SmallVector<CXXRecordDecl *, 32> Bases;
Bases.push_back(Class);
while (!Bases.empty()) {
// Pop this class off the stack.
Class = Bases.pop_back_val();
// Visit the base classes.
for (const auto &Base : Class->bases()) {
const RecordType *BaseType = Base.getType()->getAs<RecordType>();
// In dependent contexts, we do ADL twice, and the first time around,
// the base type might be a dependent TemplateSpecializationType, or a
// TemplateTypeParmType. If that happens, simply ignore it.
// FIXME: If we want to support export, we probably need to add the
// namespace of the template in a TemplateSpecializationType, or even
// the classes and namespaces of known non-dependent arguments.
if (!BaseType)
continue;
CXXRecordDecl *BaseDecl = cast<CXXRecordDecl>(BaseType->getDecl());
if (Result.addClassTransitive(BaseDecl)) {
// Find the associated namespace for this base class.
DeclContext *BaseCtx = BaseDecl->getDeclContext();
CollectEnclosingNamespace(Result.Namespaces, BaseCtx);
// Make sure we visit the bases of this base class.
if (BaseDecl->bases_begin() != BaseDecl->bases_end())
Bases.push_back(BaseDecl);
}
}
}
}
// Add the associated classes and namespaces for
// argument-dependent lookup with an argument of type T
// (C++ [basic.lookup.koenig]p2).
static void
addAssociatedClassesAndNamespaces(AssociatedLookup &Result, QualType Ty) {
// C++ [basic.lookup.koenig]p2:
//
// For each argument type T in the function call, there is a set
// of zero or more associated namespaces and a set of zero or more
// associated classes to be considered. The sets of namespaces and
// classes is determined entirely by the types of the function
// arguments (and the namespace of any template template
// argument). Typedef names and using-declarations used to specify
// the types do not contribute to this set. The sets of namespaces
// and classes are determined in the following way:
SmallVector<const Type *, 16> Queue;
const Type *T = Ty->getCanonicalTypeInternal().getTypePtr();
while (true) {
switch (T->getTypeClass()) {
#define TYPE(Class, Base)
#define DEPENDENT_TYPE(Class, Base) case Type::Class:
#define NON_CANONICAL_TYPE(Class, Base) case Type::Class:
#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) case Type::Class:
#define ABSTRACT_TYPE(Class, Base)
#include "clang/AST/TypeNodes.inc"
// T is canonical. We can also ignore dependent types because
// we don't need to do ADL at the definition point, but if we
// wanted to implement template export (or if we find some other
// use for associated classes and namespaces...) this would be
// wrong.
break;
// -- If T is a pointer to U or an array of U, its associated
// namespaces and classes are those associated with U.
case Type::Pointer:
T = cast<PointerType>(T)->getPointeeType().getTypePtr();
continue;
case Type::ConstantArray:
case Type::IncompleteArray:
case Type::VariableArray:
T = cast<ArrayType>(T)->getElementType().getTypePtr();
continue;
// -- If T is a fundamental type, its associated sets of
// namespaces and classes are both empty.
case Type::Builtin:
break;
// -- If T is a class type (including unions), its associated
// classes are: the class itself; the class of which it is
// a member, if any; and its direct and indirect base classes.
// Its associated namespaces are the innermost enclosing
// namespaces of its associated classes.
case Type::Record: {
CXXRecordDecl *Class =
cast<CXXRecordDecl>(cast<RecordType>(T)->getDecl());
addAssociatedClassesAndNamespaces(Result, Class);
break;
}
// -- If T is an enumeration type, its associated namespace
// is the innermost enclosing namespace of its declaration.
// If it is a class member, its associated class is the
// member’s class; else it has no associated class.
case Type::Enum: {
EnumDecl *Enum = cast<EnumType>(T)->getDecl();
DeclContext *Ctx = Enum->getDeclContext();
if (CXXRecordDecl *EnclosingClass = dyn_cast<CXXRecordDecl>(Ctx))
Result.Classes.insert(EnclosingClass);
// Add the associated namespace for this enumeration.
CollectEnclosingNamespace(Result.Namespaces, Ctx);
break;
}
// -- If T is a function type, its associated namespaces and
// classes are those associated with the function parameter
// types and those associated with the return type.
case Type::FunctionProto: {
const FunctionProtoType *Proto = cast<FunctionProtoType>(T);
for (const auto &Arg : Proto->param_types())
Queue.push_back(Arg.getTypePtr());
// fallthrough
LLVM_FALLTHROUGH;
}
case Type::FunctionNoProto: {
const FunctionType *FnType = cast<FunctionType>(T);
T = FnType->getReturnType().getTypePtr();
continue;
}
// -- If T is a pointer to a member function of a class X, its
// associated namespaces and classes are those associated
// with the function parameter types and return type,
// together with those associated with X.
//
// -- If T is a pointer to a data member of class X, its
// associated namespaces and classes are those associated
// with the member type together with those associated with
// X.
case Type::MemberPointer: {
const MemberPointerType *MemberPtr = cast<MemberPointerType>(T);
// Queue up the class type into which this points.
Queue.push_back(MemberPtr->getClass());
// And directly continue with the pointee type.
T = MemberPtr->getPointeeType().getTypePtr();
continue;
}
// As an extension, treat this like a normal pointer.
case Type::BlockPointer:
T = cast<BlockPointerType>(T)->getPointeeType().getTypePtr();
continue;
// References aren't covered by the standard, but that's such an
// obvious defect that we cover them anyway.
case Type::LValueReference:
case Type::RValueReference:
T = cast<ReferenceType>(T)->getPointeeType().getTypePtr();
continue;
// These are fundamental types.
case Type::Vector:
case Type::ExtVector:
case Type::ConstantMatrix:
case Type::Complex:
case Type::BitInt:
break;
// Non-deduced auto types only get here for error cases.
case Type::Auto:
case Type::DeducedTemplateSpecialization:
break;
// If T is an Objective-C object or interface type, or a pointer to an
// object or interface type, the associated namespace is the global
// namespace.
case Type::ObjCObject:
case Type::ObjCInterface:
case Type::ObjCObjectPointer:
Result.Namespaces.insert(Result.S.Context.getTranslationUnitDecl());
break;
// Atomic types are just wrappers; use the associations of the
// contained type.
case Type::Atomic:
T = cast<AtomicType>(T)->getValueType().getTypePtr();
continue;
case Type::Pipe:
T = cast<PipeType>(T)->getElementType().getTypePtr();
continue;
}
if (Queue.empty())
break;
T = Queue.pop_back_val();
}
}
/// Find the associated classes and namespaces for
/// argument-dependent lookup for a call with the given set of
/// arguments.
///
/// This routine computes the sets of associated classes and associated
/// namespaces searched by argument-dependent lookup
/// (C++ [basic.lookup.argdep]) for a given set of arguments.
void Sema::FindAssociatedClassesAndNamespaces(
SourceLocation InstantiationLoc, ArrayRef<Expr *> Args,
AssociatedNamespaceSet &AssociatedNamespaces,
AssociatedClassSet &AssociatedClasses) {
AssociatedNamespaces.clear();
AssociatedClasses.clear();
AssociatedLookup Result(*this, InstantiationLoc,
AssociatedNamespaces, AssociatedClasses);
// C++ [basic.lookup.koenig]p2:
// For each argument type T in the function call, there is a set
// of zero or more associated namespaces and a set of zero or more
// associated classes to be considered. The sets of namespaces and
// classes is determined entirely by the types of the function
// arguments (and the namespace of any template template
// argument).
for (unsigned ArgIdx = 0; ArgIdx != Args.size(); ++ArgIdx) {
Expr *Arg = Args[ArgIdx];
if (Arg->getType() != Context.OverloadTy) {
addAssociatedClassesAndNamespaces(Result, Arg->getType());
continue;
}
// [...] In addition, if the argument is the name or address of a
// set of overloaded functions and/or function templates, its
// associated classes and namespaces are the union of those
// associated with each of the members of the set: the namespace
// in which the function or function template is defined and the
// classes and namespaces associated with its (non-dependent)
// parameter types and return type.
OverloadExpr *OE = OverloadExpr::find(Arg).Expression;
for (const NamedDecl *D : OE->decls()) {
// Look through any using declarations to find the underlying function.
const FunctionDecl *FDecl = D->getUnderlyingDecl()->getAsFunction();
// Add the classes and namespaces associated with the parameter
// types and return type of this function.
addAssociatedClassesAndNamespaces(Result, FDecl->getType());
}
}
}
NamedDecl *Sema::LookupSingleName(Scope *S, DeclarationName Name,
SourceLocation Loc,
LookupNameKind NameKind,
RedeclarationKind Redecl) {
LookupResult R(*this, Name, Loc, NameKind, Redecl);
LookupName(R, S);
return R.getAsSingle<NamedDecl>();
}
/// Find the protocol with the given name, if any.
ObjCProtocolDecl *Sema::LookupProtocol(IdentifierInfo *II,
SourceLocation IdLoc,
RedeclarationKind Redecl) {
Decl *D = LookupSingleName(TUScope, II, IdLoc,
LookupObjCProtocolName, Redecl);
return cast_or_null<ObjCProtocolDecl>(D);
}
void Sema::LookupOverloadedOperatorName(OverloadedOperatorKind Op, Scope *S,
UnresolvedSetImpl &Functions) {
// C++ [over.match.oper]p3:
// -- The set of non-member candidates is the result of the
// unqualified lookup of operator@ in the context of the
// expression according to the usual rules for name lookup in
// unqualified function calls (3.4.2) except that all member
// functions are ignored.
DeclarationName OpName = Context.DeclarationNames.getCXXOperatorName(Op);
LookupResult Operators(*this, OpName, SourceLocation(), LookupOperatorName);
LookupName(Operators, S);
assert(!Operators.isAmbiguous() && "Operator lookup cannot be ambiguous");
Functions.append(Operators.begin(), Operators.end());
}
Sema::SpecialMemberOverloadResult Sema::LookupSpecialMember(CXXRecordDecl *RD,
CXXSpecialMember SM,
bool ConstArg,
bool VolatileArg,
bool RValueThis,
bool ConstThis,
bool VolatileThis) {
assert(CanDeclareSpecialMemberFunction(RD) &&
"doing special member lookup into record that isn't fully complete");
RD = RD->getDefinition();
if (RValueThis || ConstThis || VolatileThis)
assert((SM == CXXCopyAssignment || SM == CXXMoveAssignment) &&
"constructors and destructors always have unqualified lvalue this");
if (ConstArg || VolatileArg)
assert((SM != CXXDefaultConstructor && SM != CXXDestructor) &&
"parameter-less special members can't have qualified arguments");
// FIXME: Get the caller to pass in a location for the lookup.
SourceLocation LookupLoc = RD->getLocation();
llvm::FoldingSetNodeID ID;
ID.AddPointer(RD);
ID.AddInteger(SM);
ID.AddInteger(ConstArg);
ID.AddInteger(VolatileArg);
ID.AddInteger(RValueThis);
ID.AddInteger(ConstThis);
ID.AddInteger(VolatileThis);
void *InsertPoint;
SpecialMemberOverloadResultEntry *Result =
SpecialMemberCache.FindNodeOrInsertPos(ID, InsertPoint);
// This was already cached
if (Result)
return *Result;
Result = BumpAlloc.Allocate<SpecialMemberOverloadResultEntry>();
Result = new (Result) SpecialMemberOverloadResultEntry(ID);
SpecialMemberCache.InsertNode(Result, InsertPoint);
if (SM == CXXDestructor) {
if (RD->needsImplicitDestructor()) {
runWithSufficientStackSpace(RD->getLocation(), [&] {
DeclareImplicitDestructor(RD);
});
}
CXXDestructorDecl *DD = RD->getDestructor();
Result->setMethod(DD);
Result->setKind(DD && !DD->isDeleted()
? SpecialMemberOverloadResult::Success
: SpecialMemberOverloadResult::NoMemberOrDeleted);
return *Result;
}
// Prepare for overload resolution. Here we construct a synthetic argument
// if necessary and make sure that implicit functions are declared.
CanQualType CanTy = Context.getCanonicalType(Context.getTagDeclType(RD));
DeclarationName Name;
Expr *Arg = nullptr;
unsigned NumArgs;
QualType ArgType = CanTy;
ExprValueKind VK = VK_LValue;
if (SM == CXXDefaultConstructor) {
Name = Context.DeclarationNames.getCXXConstructorName(CanTy);
NumArgs = 0;
if (RD->needsImplicitDefaultConstructor()) {
runWithSufficientStackSpace(RD->getLocation(), [&] {
DeclareImplicitDefaultConstructor(RD);
});
}
} else {
if (SM == CXXCopyConstructor || SM == CXXMoveConstructor) {
Name = Context.DeclarationNames.getCXXConstructorName(CanTy);
if (RD->needsImplicitCopyConstructor()) {
runWithSufficientStackSpace(RD->getLocation(), [&] {
DeclareImplicitCopyConstructor(RD);
});
}
if (getLangOpts().CPlusPlus11 && RD->needsImplicitMoveConstructor()) {
runWithSufficientStackSpace(RD->getLocation(), [&] {
DeclareImplicitMoveConstructor(RD);
});
}
} else {
Name = Context.DeclarationNames.getCXXOperatorName(OO_Equal);
if (RD->needsImplicitCopyAssignment()) {
runWithSufficientStackSpace(RD->getLocation(), [&] {
DeclareImplicitCopyAssignment(RD);
});
}
if (getLangOpts().CPlusPlus11 && RD->needsImplicitMoveAssignment()) {
runWithSufficientStackSpace(RD->getLocation(), [&] {
DeclareImplicitMoveAssignment(RD);
});
}
}
if (ConstArg)
ArgType.addConst();
if (VolatileArg)
ArgType.addVolatile();
// This isn't /really/ specified by the standard, but it's implied
// we should be working from a PRValue in the case of move to ensure
// that we prefer to bind to rvalue references, and an LValue in the
// case of copy to ensure we don't bind to rvalue references.
// Possibly an XValue is actually correct in the case of move, but
// there is no semantic difference for class types in this restricted
// case.
if (SM == CXXCopyConstructor || SM == CXXCopyAssignment)
VK = VK_LValue;
else
VK = VK_PRValue;
}
OpaqueValueExpr FakeArg(LookupLoc, ArgType, VK);
if (SM != CXXDefaultConstructor) {
NumArgs = 1;
Arg = &FakeArg;
}
// Create the object argument
QualType ThisTy = CanTy;
if (ConstThis)
ThisTy.addConst();
if (VolatileThis)
ThisTy.addVolatile();
Expr::Classification Classification =
OpaqueValueExpr(LookupLoc, ThisTy, RValueThis ? VK_PRValue : VK_LValue)
.Classify(Context);
// Now we perform lookup on the name we computed earlier and do overload
// resolution. Lookup is only performed directly into the class since there
// will always be a (possibly implicit) declaration to shadow any others.
OverloadCandidateSet OCS(LookupLoc, OverloadCandidateSet::CSK_Normal);
DeclContext::lookup_result R = RD->lookup(Name);
if (R.empty()) {
// We might have no default constructor because we have a lambda's closure
// type, rather than because there's some other declared constructor.
// Every class has a copy/move constructor, copy/move assignment, and
// destructor.
assert(SM == CXXDefaultConstructor &&
"lookup for a constructor or assignment operator was empty");
Result->setMethod(nullptr);
Result->setKind(SpecialMemberOverloadResult::NoMemberOrDeleted);
return *Result;
}
// Copy the candidates as our processing of them may load new declarations
// from an external source and invalidate lookup_result.
SmallVector<NamedDecl *, 8> Candidates(R.begin(), R.end());
for (NamedDecl *CandDecl : Candidates) {
if (CandDecl->isInvalidDecl())
continue;
DeclAccessPair Cand = DeclAccessPair::make(CandDecl, AS_public);
auto CtorInfo = getConstructorInfo(Cand);
if (CXXMethodDecl *M = dyn_cast<CXXMethodDecl>(Cand->getUnderlyingDecl())) {
if (SM == CXXCopyAssignment || SM == CXXMoveAssignment)
AddMethodCandidate(M, Cand, RD, ThisTy, Classification,
llvm::makeArrayRef(&Arg, NumArgs), OCS, true);
else if (CtorInfo)
AddOverloadCandidate(CtorInfo.Constructor, CtorInfo.FoundDecl,
llvm::makeArrayRef(&Arg, NumArgs), OCS,
/*SuppressUserConversions*/ true);
else
AddOverloadCandidate(M, Cand, llvm::makeArrayRef(&Arg, NumArgs), OCS,
/*SuppressUserConversions*/ true);
} else if (FunctionTemplateDecl *Tmpl =
dyn_cast<FunctionTemplateDecl>(Cand->getUnderlyingDecl())) {
if (SM == CXXCopyAssignment || SM == CXXMoveAssignment)
AddMethodTemplateCandidate(
Tmpl, Cand, RD, nullptr, ThisTy, Classification,
llvm::makeArrayRef(&Arg, NumArgs), OCS, true);
else if (CtorInfo)
AddTemplateOverloadCandidate(
CtorInfo.ConstructorTmpl, CtorInfo.FoundDecl, nullptr,
llvm::makeArrayRef(&Arg, NumArgs), OCS, true);
else
AddTemplateOverloadCandidate(
Tmpl, Cand, nullptr, llvm::makeArrayRef(&Arg, NumArgs), OCS, true);
} else {
assert(isa<UsingDecl>(Cand.getDecl()) &&
"illegal Kind of operator = Decl");
}
}
OverloadCandidateSet::iterator Best;
switch (OCS.BestViableFunction(*this, LookupLoc, Best)) {
case OR_Success:
Result->setMethod(cast<CXXMethodDecl>(Best->Function));
Result->setKind(SpecialMemberOverloadResult::Success);
break;
case OR_Deleted:
Result->setMethod(cast<CXXMethodDecl>(Best->Function));
Result->setKind(SpecialMemberOverloadResult::NoMemberOrDeleted);
break;
case OR_Ambiguous:
Result->setMethod(nullptr);
Result->setKind(SpecialMemberOverloadResult::Ambiguous);
break;
case OR_No_Viable_Function:
Result->setMethod(nullptr);
Result->setKind(SpecialMemberOverloadResult::NoMemberOrDeleted);
break;
}
return *Result;
}
/// Look up the default constructor for the given class.
CXXConstructorDecl *Sema::LookupDefaultConstructor(CXXRecordDecl *Class) {
SpecialMemberOverloadResult Result =
LookupSpecialMember(Class, CXXDefaultConstructor, false, false, false,
false, false);
return cast_or_null<CXXConstructorDecl>(Result.getMethod());
}
/// Look up the copying constructor for the given class.
CXXConstructorDecl *Sema::LookupCopyingConstructor(CXXRecordDecl *Class,
unsigned Quals) {
assert(!(Quals & ~(Qualifiers::Const | Qualifiers::Volatile)) &&
"non-const, non-volatile qualifiers for copy ctor arg");
SpecialMemberOverloadResult Result =
LookupSpecialMember(Class, CXXCopyConstructor, Quals & Qualifiers::Const,
Quals & Qualifiers::Volatile, false, false, false);
return cast_or_null<CXXConstructorDecl>(Result.getMethod());
}
/// Look up the moving constructor for the given class.
CXXConstructorDecl *Sema::LookupMovingConstructor(CXXRecordDecl *Class,
unsigned Quals) {
SpecialMemberOverloadResult Result =
LookupSpecialMember(Class, CXXMoveConstructor, Quals & Qualifiers::Const,
Quals & Qualifiers::Volatile, false, false, false);
return cast_or_null<CXXConstructorDecl>(Result.getMethod());
}
/// Look up the constructors for the given class.
DeclContext::lookup_result Sema::LookupConstructors(CXXRecordDecl *Class) {
// If the implicit constructors have not yet been declared, do so now.
if (CanDeclareSpecialMemberFunction(Class)) {
runWithSufficientStackSpace(Class->getLocation(), [&] {
if (Class->needsImplicitDefaultConstructor())
DeclareImplicitDefaultConstructor(Class);
if (Class->needsImplicitCopyConstructor())
DeclareImplicitCopyConstructor(Class);
if (getLangOpts().CPlusPlus11 && Class->needsImplicitMoveConstructor())
DeclareImplicitMoveConstructor(Class);
});
}
CanQualType T = Context.getCanonicalType(Context.getTypeDeclType(Class));
DeclarationName Name = Context.DeclarationNames.getCXXConstructorName(T);
return Class->lookup(Name);
}
/// Look up the copying assignment operator for the given class.
CXXMethodDecl *Sema::LookupCopyingAssignment(CXXRecordDecl *Class,
unsigned Quals, bool RValueThis,
unsigned ThisQuals) {
assert(!(Quals & ~(Qualifiers::Const | Qualifiers::Volatile)) &&
"non-const, non-volatile qualifiers for copy assignment arg");
assert(!(ThisQuals & ~(Qualifiers::Const | Qualifiers::Volatile)) &&
"non-const, non-volatile qualifiers for copy assignment this");
SpecialMemberOverloadResult Result =
LookupSpecialMember(Class, CXXCopyAssignment, Quals & Qualifiers::Const,
Quals & Qualifiers::Volatile, RValueThis,
ThisQuals & Qualifiers::Const,
ThisQuals & Qualifiers::Volatile);
return Result.getMethod();
}
/// Look up the moving assignment operator for the given class.
CXXMethodDecl *Sema::LookupMovingAssignment(CXXRecordDecl *Class,
unsigned Quals,
bool RValueThis,
unsigned ThisQuals) {
assert(!(ThisQuals & ~(Qualifiers::Const | Qualifiers::Volatile)) &&
"non-const, non-volatile qualifiers for copy assignment this");
SpecialMemberOverloadResult Result =
LookupSpecialMember(Class, CXXMoveAssignment, Quals & Qualifiers::Const,
Quals & Qualifiers::Volatile, RValueThis,
ThisQuals & Qualifiers::Const,
ThisQuals & Qualifiers::Volatile);
return Result.getMethod();
}
/// Look for the destructor of the given class.
///
/// During semantic analysis, this routine should be used in lieu of
/// CXXRecordDecl::getDestructor().
///
/// \returns The destructor for this class.
CXXDestructorDecl *Sema::LookupDestructor(CXXRecordDecl *Class) {
return cast<CXXDestructorDecl>(LookupSpecialMember(Class, CXXDestructor,
false, false, false,
false, false).getMethod());
}
/// LookupLiteralOperator - Determine which literal operator should be used for
/// a user-defined literal, per C++11 [lex.ext].
///
/// Normal overload resolution is not used to select which literal operator to
/// call for a user-defined literal. Look up the provided literal operator name,
/// and filter the results to the appropriate set for the given argument types.
Sema::LiteralOperatorLookupResult
Sema::LookupLiteralOperator(Scope *S, LookupResult &R,
ArrayRef<QualType> ArgTys, bool AllowRaw,
bool AllowTemplate, bool AllowStringTemplatePack,
bool DiagnoseMissing, StringLiteral *StringLit) {
LookupName(R, S);
assert(R.getResultKind() != LookupResult::Ambiguous &&
"literal operator lookup can't be ambiguous");
// Filter the lookup results appropriately.
LookupResult::Filter F = R.makeFilter();
bool AllowCooked = true;
bool FoundRaw = false;
bool FoundTemplate = false;
bool FoundStringTemplatePack = false;
bool FoundCooked = false;
while (F.hasNext()) {
Decl *D = F.next();
if (UsingShadowDecl *USD = dyn_cast<UsingShadowDecl>(D))
D = USD->getTargetDecl();
// If the declaration we found is invalid, skip it.
if (D->isInvalidDecl()) {
F.erase();
continue;
}
bool IsRaw = false;
bool IsTemplate = false;
bool IsStringTemplatePack = false;
bool IsCooked = false;
if (FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
if (FD->getNumParams() == 1 &&
FD->getParamDecl(0)->getType()->getAs<PointerType>())
IsRaw = true;
else if (FD->getNumParams() == ArgTys.size()) {
IsCooked = true;
for (unsigned ArgIdx = 0; ArgIdx != ArgTys.size(); ++ArgIdx) {
QualType ParamTy = FD->getParamDecl(ArgIdx)->getType();
if (!Context.hasSameUnqualifiedType(ArgTys[ArgIdx], ParamTy)) {
IsCooked = false;
break;
}
}
}
}
if (FunctionTemplateDecl *FD = dyn_cast<FunctionTemplateDecl>(D)) {
TemplateParameterList *Params = FD->getTemplateParameters();
if (Params->size() == 1) {
IsTemplate = true;
if (!Params->getParam(0)->isTemplateParameterPack() && !StringLit) {
// Implied but not stated: user-defined integer and floating literals
// only ever use numeric literal operator templates, not templates
// taking a parameter of class type.
F.erase();
continue;
}
// A string literal template is only considered if the string literal
// is a well-formed template argument for the template parameter.
if (StringLit) {
SFINAETrap Trap(*this);
SmallVector<TemplateArgument, 1> Checked;
TemplateArgumentLoc Arg(TemplateArgument(StringLit), StringLit);
if (CheckTemplateArgument(Params->getParam(0), Arg, FD,
R.getNameLoc(), R.getNameLoc(), 0,
Checked) ||
Trap.hasErrorOccurred())
IsTemplate = false;
}
} else {
IsStringTemplatePack = true;
}
}
if (AllowTemplate && StringLit && IsTemplate) {
FoundTemplate = true;
AllowRaw = false;
AllowCooked = false;
AllowStringTemplatePack = false;
if (FoundRaw || FoundCooked || FoundStringTemplatePack) {
F.restart();
FoundRaw = FoundCooked = FoundStringTemplatePack = false;
}
} else if (AllowCooked && IsCooked) {
FoundCooked = true;
AllowRaw = false;
AllowTemplate = StringLit;
AllowStringTemplatePack = false;
if (FoundRaw || FoundTemplate || FoundStringTemplatePack) {
// Go through again and remove the raw and template decls we've
// already found.
F.restart();
FoundRaw = FoundTemplate = FoundStringTemplatePack = false;
}
} else if (AllowRaw && IsRaw) {
FoundRaw = true;
} else if (AllowTemplate && IsTemplate) {
FoundTemplate = true;
} else if (AllowStringTemplatePack && IsStringTemplatePack) {
FoundStringTemplatePack = true;
} else {
F.erase();
}
}
F.done();
// Per C++20 [lex.ext]p5, we prefer the template form over the non-template
// form for string literal operator templates.
if (StringLit && FoundTemplate)
return LOLR_Template;
// C++11 [lex.ext]p3, p4: If S contains a literal operator with a matching
// parameter type, that is used in preference to a raw literal operator
// or literal operator template.
if (FoundCooked)
return LOLR_Cooked;
// C++11 [lex.ext]p3, p4: S shall contain a raw literal operator or a literal
// operator template, but not both.
if (FoundRaw && FoundTemplate) {
Diag(R.getNameLoc(), diag::err_ovl_ambiguous_call) << R.getLookupName();
for (LookupResult::iterator I = R.begin(), E = R.end(); I != E; ++I)
NoteOverloadCandidate(*I, (*I)->getUnderlyingDecl()->getAsFunction());
return LOLR_Error;
}
if (FoundRaw)
return LOLR_Raw;
if (FoundTemplate)
return LOLR_Template;
if (FoundStringTemplatePack)
return LOLR_StringTemplatePack;
// Didn't find anything we could use.
if (DiagnoseMissing) {
Diag(R.getNameLoc(), diag::err_ovl_no_viable_literal_operator)
<< R.getLookupName() << (int)ArgTys.size() << ArgTys[0]
<< (ArgTys.size() == 2 ? ArgTys[1] : QualType()) << AllowRaw
<< (AllowTemplate || AllowStringTemplatePack);
return LOLR_Error;
}
return LOLR_ErrorNoDiagnostic;
}
void ADLResult::insert(NamedDecl *New) {
NamedDecl *&Old = Decls[cast<NamedDecl>(New->getCanonicalDecl())];
// If we haven't yet seen a decl for this key, or the last decl
// was exactly this one, we're done.
if (Old == nullptr || Old == New) {
Old = New;
return;
}
// Otherwise, decide which is a more recent redeclaration.
FunctionDecl *OldFD = Old->getAsFunction();
FunctionDecl *NewFD = New->getAsFunction();
FunctionDecl *Cursor = NewFD;
while (true) {
Cursor = Cursor->getPreviousDecl();
// If we got to the end without finding OldFD, OldFD is the newer
// declaration; leave things as they are.
if (!Cursor) return;
// If we do find OldFD, then NewFD is newer.
if (Cursor == OldFD) break;
// Otherwise, keep looking.
}
Old = New;
}
void Sema::ArgumentDependentLookup(DeclarationName Name, SourceLocation Loc,
ArrayRef<Expr *> Args, ADLResult &Result) {
// Find all of the associated namespaces and classes based on the
// arguments we have.
AssociatedNamespaceSet AssociatedNamespaces;
AssociatedClassSet AssociatedClasses;
FindAssociatedClassesAndNamespaces(Loc, Args,
AssociatedNamespaces,
AssociatedClasses);
// C++ [basic.lookup.argdep]p3:
// Let X be the lookup set produced by unqualified lookup (3.4.1)
// and let Y be the lookup set produced by argument dependent
// lookup (defined as follows). If X contains [...] then Y is
// empty. Otherwise Y is the set of declarations found in the
// namespaces associated with the argument types as described
// below. The set of declarations found by the lookup of the name
// is the union of X and Y.
//
// Here, we compute Y and add its members to the overloaded
// candidate set.
for (auto *NS : AssociatedNamespaces) {
// When considering an associated namespace, the lookup is the
// same as the lookup performed when the associated namespace is
// used as a qualifier (3.4.3.2) except that:
//
// -- Any using-directives in the associated namespace are
// ignored.
//
// -- Any namespace-scope friend functions declared in
// associated classes are visible within their respective
// namespaces even if they are not visible during an ordinary
// lookup (11.4).
//
// C++20 [basic.lookup.argdep] p4.3
// -- are exported, are attached to a named module M, do not appear
// in the translation unit containing the point of the lookup, and
// have the same innermost enclosing non-inline namespace scope as
// a declaration of an associated entity attached to M.
DeclContext::lookup_result R = NS->lookup(Name);
for (auto *D : R) {
auto *Underlying = D;
if (auto *USD = dyn_cast<UsingShadowDecl>(D))
Underlying = USD->getTargetDecl();
if (!isa<FunctionDecl>(Underlying) &&
!isa<FunctionTemplateDecl>(Underlying))
continue;
// The declaration is visible to argument-dependent lookup if either
// it's ordinarily visible or declared as a friend in an associated
// class.
bool Visible = false;
for (D = D->getMostRecentDecl(); D;
D = cast_or_null<NamedDecl>(D->getPreviousDecl())) {
if (D->getIdentifierNamespace() & Decl::IDNS_Ordinary) {
if (isVisible(D)) {
Visible = true;
break;
} else if (getLangOpts().CPlusPlusModules &&
D->isInExportDeclContext()) {
// C++20 [basic.lookup.argdep] p4.3 .. are exported ...
Module *FM = D->getOwningModule();
// exports are only valid in module purview and outside of any
// PMF (although a PMF should not even be present in a module
// with an import).
assert(FM && FM->isModulePurview() && !FM->isPrivateModule() &&
"bad export context");
// .. are attached to a named module M, do not appear in the
// translation unit containing the point of the lookup..
if (!isModuleUnitOfCurrentTU(FM) &&
llvm::any_of(AssociatedClasses, [&](auto *E) {
// ... and have the same innermost enclosing non-inline
// namespace scope as a declaration of an associated entity
// attached to M
if (!E->hasOwningModule() ||
E->getOwningModule()->getTopLevelModuleName() !=
FM->getTopLevelModuleName())
return false;
// TODO: maybe this could be cached when generating the
// associated namespaces / entities.
DeclContext *Ctx = E->getDeclContext();
while (!Ctx->isFileContext() || Ctx->isInlineNamespace())
Ctx = Ctx->getParent();
return Ctx == NS;
})) {
Visible = true;
break;
}
}
} else if (D->getFriendObjectKind()) {
auto *RD = cast<CXXRecordDecl>(D->getLexicalDeclContext());
// [basic.lookup.argdep]p4:
// Argument-dependent lookup finds all declarations of functions and
// function templates that
// - ...
// - are declared as a friend ([class.friend]) of any class with a
// reachable definition in the set of associated entities,
//
// FIXME: If there's a merged definition of D that is reachable, then
// the friend declaration should be considered.
if (AssociatedClasses.count(RD) && isReachable(D)) {
Visible = true;
break;
}
}
}
// FIXME: Preserve D as the FoundDecl.
if (Visible)
Result.insert(Underlying);
}
}
}
//----------------------------------------------------------------------------
// Search for all visible declarations.
//----------------------------------------------------------------------------
VisibleDeclConsumer::~VisibleDeclConsumer() { }
bool VisibleDeclConsumer::includeHiddenDecls() const { return false; }
namespace {
class ShadowContextRAII;
class VisibleDeclsRecord {
public:
/// An entry in the shadow map, which is optimized to store a
/// single declaration (the common case) but can also store a list
/// of declarations.
typedef llvm::TinyPtrVector<NamedDecl*> ShadowMapEntry;
private:
/// A mapping from declaration names to the declarations that have
/// this name within a particular scope.
typedef llvm::DenseMap<DeclarationName, ShadowMapEntry> ShadowMap;
/// A list of shadow maps, which is used to model name hiding.
std::list<ShadowMap> ShadowMaps;
/// The declaration contexts we have already visited.
llvm::SmallPtrSet<DeclContext *, 8> VisitedContexts;
friend class ShadowContextRAII;
public:
/// Determine whether we have already visited this context
/// (and, if not, note that we are going to visit that context now).
bool visitedContext(DeclContext *Ctx) {
return !VisitedContexts.insert(Ctx).second;
}
bool alreadyVisitedContext(DeclContext *Ctx) {
return VisitedContexts.count(Ctx);
}
/// Determine whether the given declaration is hidden in the
/// current scope.
///
/// \returns the declaration that hides the given declaration, or
/// NULL if no such declaration exists.
NamedDecl *checkHidden(NamedDecl *ND);
/// Add a declaration to the current shadow map.
void add(NamedDecl *ND) {
ShadowMaps.back()[ND->getDeclName()].push_back(ND);
}
};
/// RAII object that records when we've entered a shadow context.
class ShadowContextRAII {
VisibleDeclsRecord &Visible;
typedef VisibleDeclsRecord::ShadowMap ShadowMap;
public:
ShadowContextRAII(VisibleDeclsRecord &Visible) : Visible(Visible) {
Visible.ShadowMaps.emplace_back();
}
~ShadowContextRAII() {
Visible.ShadowMaps.pop_back();
}
};
} // end anonymous namespace
NamedDecl *VisibleDeclsRecord::checkHidden(NamedDecl *ND) {
unsigned IDNS = ND->getIdentifierNamespace();
std::list<ShadowMap>::reverse_iterator SM = ShadowMaps.rbegin();
for (std::list<ShadowMap>::reverse_iterator SMEnd = ShadowMaps.rend();
SM != SMEnd; ++SM) {
ShadowMap::iterator Pos = SM->find(ND->getDeclName());
if (Pos == SM->end())
continue;
for (auto *D : Pos->second) {
// A tag declaration does not hide a non-tag declaration.
if (D->hasTagIdentifierNamespace() &&
(IDNS & (Decl::IDNS_Member | Decl::IDNS_Ordinary |
Decl::IDNS_ObjCProtocol)))
continue;
// Protocols are in distinct namespaces from everything else.
if (((D->getIdentifierNamespace() & Decl::IDNS_ObjCProtocol)
|| (IDNS & Decl::IDNS_ObjCProtocol)) &&
D->getIdentifierNamespace() != IDNS)
continue;
// Functions and function templates in the same scope overload
// rather than hide. FIXME: Look for hiding based on function
// signatures!
if (D->getUnderlyingDecl()->isFunctionOrFunctionTemplate() &&
ND->getUnderlyingDecl()->isFunctionOrFunctionTemplate() &&
SM == ShadowMaps.rbegin())
continue;
// A shadow declaration that's created by a resolved using declaration
// is not hidden by the same using declaration.
if (isa<UsingShadowDecl>(ND) && isa<UsingDecl>(D) &&
cast<UsingShadowDecl>(ND)->getIntroducer() == D)
continue;
// We've found a declaration that hides this one.
return D;
}
}
return nullptr;
}
namespace {
class LookupVisibleHelper {
public:
LookupVisibleHelper(VisibleDeclConsumer &Consumer, bool IncludeDependentBases,
bool LoadExternal)
: Consumer(Consumer), IncludeDependentBases(IncludeDependentBases),
LoadExternal(LoadExternal) {}
void lookupVisibleDecls(Sema &SemaRef, Scope *S, Sema::LookupNameKind Kind,
bool IncludeGlobalScope) {
// Determine the set of using directives available during
// unqualified name lookup.
Scope *Initial = S;
UnqualUsingDirectiveSet UDirs(SemaRef);
if (SemaRef.getLangOpts().CPlusPlus) {
// Find the first namespace or translation-unit scope.
while (S && !isNamespaceOrTranslationUnitScope(S))
S = S->getParent();
UDirs.visitScopeChain(Initial, S);
}
UDirs.done();
// Look for visible declarations.
LookupResult Result(SemaRef, DeclarationName(), SourceLocation(), Kind);
Result.setAllowHidden(Consumer.includeHiddenDecls());
if (!IncludeGlobalScope)
Visited.visitedContext(SemaRef.getASTContext().getTranslationUnitDecl());
ShadowContextRAII Shadow(Visited);
lookupInScope(Initial, Result, UDirs);
}
void lookupVisibleDecls(Sema &SemaRef, DeclContext *Ctx,
Sema::LookupNameKind Kind, bool IncludeGlobalScope) {
LookupResult Result(SemaRef, DeclarationName(), SourceLocation(), Kind);
Result.setAllowHidden(Consumer.includeHiddenDecls());
if (!IncludeGlobalScope)
Visited.visitedContext(SemaRef.getASTContext().getTranslationUnitDecl());
ShadowContextRAII Shadow(Visited);
lookupInDeclContext(Ctx, Result, /*QualifiedNameLookup=*/true,
/*InBaseClass=*/false);
}
private:
void lookupInDeclContext(DeclContext *Ctx, LookupResult &Result,
bool QualifiedNameLookup, bool InBaseClass) {
if (!Ctx)
return;
// Make sure we don't visit the same context twice.
if (Visited.visitedContext(Ctx->getPrimaryContext()))
return;
Consumer.EnteredContext(Ctx);
// Outside C++, lookup results for the TU live on identifiers.
if (isa<TranslationUnitDecl>(Ctx) &&
!Result.getSema().getLangOpts().CPlusPlus) {
auto &S = Result.getSema();
auto &Idents = S.Context.Idents;
// Ensure all external identifiers are in the identifier table.
if (LoadExternal)
if (IdentifierInfoLookup *External =
Idents.getExternalIdentifierLookup()) {
std::unique_ptr<IdentifierIterator> Iter(External->getIdentifiers());
for (StringRef Name = Iter->Next(); !Name.empty();
Name = Iter->Next())
Idents.get(Name);
}
// Walk all lookup results in the TU for each identifier.
for (const auto &Ident : Idents) {
for (auto I = S.IdResolver.begin(Ident.getValue()),
E = S.IdResolver.end();
I != E; ++I) {
if (S.IdResolver.isDeclInScope(*I, Ctx)) {
if (NamedDecl *ND = Result.getAcceptableDecl(*I)) {
Consumer.FoundDecl(ND, Visited.checkHidden(ND), Ctx, InBaseClass);
Visited.add(ND);
}
}
}
}
return;
}
if (CXXRecordDecl *Class = dyn_cast<CXXRecordDecl>(Ctx))
Result.getSema().ForceDeclarationOfImplicitMembers(Class);
llvm::SmallVector<NamedDecl *, 4> DeclsToVisit;
// We sometimes skip loading namespace-level results (they tend to be huge).
bool Load = LoadExternal ||
!(isa<TranslationUnitDecl>(Ctx) || isa<NamespaceDecl>(Ctx));
// Enumerate all of the results in this context.
for (DeclContextLookupResult R :
Load ? Ctx->lookups()
: Ctx->noload_lookups(/*PreserveInternalState=*/false)) {
for (auto *D : R) {
if (auto *ND = Result.getAcceptableDecl(D)) {
// Rather than visit immediately, we put ND into a vector and visit
// all decls, in order, outside of this loop. The reason is that
// Consumer.FoundDecl() may invalidate the iterators used in the two
// loops above.
DeclsToVisit.push_back(ND);
}
}
}
for (auto *ND : DeclsToVisit) {
Consumer.FoundDecl(ND, Visited.checkHidden(ND), Ctx, InBaseClass);
Visited.add(ND);
}
DeclsToVisit.clear();
// Traverse using directives for qualified name lookup.
if (QualifiedNameLookup) {
ShadowContextRAII Shadow(Visited);
for (auto I : Ctx->using_directives()) {
if (!Result.getSema().isVisible(I))
continue;
lookupInDeclContext(I->getNominatedNamespace(), Result,
QualifiedNameLookup, InBaseClass);
}
}
// Traverse the contexts of inherited C++ classes.
if (CXXRecordDecl *Record = dyn_cast<CXXRecordDecl>(Ctx)) {
if (!Record->hasDefinition())
return;
for (const auto &B : Record->bases()) {
QualType BaseType = B.getType();
RecordDecl *RD;
if (BaseType->isDependentType()) {
if (!IncludeDependentBases) {
// Don't look into dependent bases, because name lookup can't look
// there anyway.
continue;
}
const auto *TST = BaseType->getAs<TemplateSpecializationType>();
if (!TST)
continue;
TemplateName TN = TST->getTemplateName();
const auto *TD =
dyn_cast_or_null<ClassTemplateDecl>(TN.getAsTemplateDecl());
if (!TD)
continue;
RD = TD->getTemplatedDecl();
} else {
const auto *Record = BaseType->getAs<RecordType>();
if (!Record)
continue;
RD = Record->getDecl();
}
// FIXME: It would be nice to be able to determine whether referencing
// a particular member would be ambiguous. For example, given
//
// struct A { int member; };
// struct B { int member; };
// struct C : A, B { };
//
// void f(C *c) { c->### }
//
// accessing 'member' would result in an ambiguity. However, we
// could be smart enough to qualify the member with the base
// class, e.g.,
//
// c->B::member
//
// or
//
// c->A::member
// Find results in this base class (and its bases).
ShadowContextRAII Shadow(Visited);
lookupInDeclContext(RD, Result, QualifiedNameLookup,
/*InBaseClass=*/true);
}
}
// Traverse the contexts of Objective-C classes.
if (ObjCInterfaceDecl *IFace = dyn_cast<ObjCInterfaceDecl>(Ctx)) {
// Traverse categories.
for (auto *Cat : IFace->visible_categories()) {
ShadowContextRAII Shadow(Visited);
lookupInDeclContext(Cat, Result, QualifiedNameLookup,
/*InBaseClass=*/false);
}
// Traverse protocols.
for (auto *I : IFace->all_referenced_protocols()) {
ShadowContextRAII Shadow(Visited);
lookupInDeclContext(I, Result, QualifiedNameLookup,
/*InBaseClass=*/false);
}
// Traverse the superclass.
if (IFace->getSuperClass()) {
ShadowContextRAII Shadow(Visited);
lookupInDeclContext(IFace->getSuperClass(), Result, QualifiedNameLookup,
/*InBaseClass=*/true);
}
// If there is an implementation, traverse it. We do this to find
// synthesized ivars.
if (IFace->getImplementation()) {
ShadowContextRAII Shadow(Visited);
lookupInDeclContext(IFace->getImplementation(), Result,
QualifiedNameLookup, InBaseClass);
}
} else if (ObjCProtocolDecl *Protocol = dyn_cast<ObjCProtocolDecl>(Ctx)) {
for (auto *I : Protocol->protocols()) {
ShadowContextRAII Shadow(Visited);
lookupInDeclContext(I, Result, QualifiedNameLookup,
/*InBaseClass=*/false);
}
} else if (ObjCCategoryDecl *Category = dyn_cast<ObjCCategoryDecl>(Ctx)) {
for (auto *I : Category->protocols()) {
ShadowContextRAII Shadow(Visited);
lookupInDeclContext(I, Result, QualifiedNameLookup,
/*InBaseClass=*/false);
}
// If there is an implementation, traverse it.
if (Category->getImplementation()) {
ShadowContextRAII Shadow(Visited);
lookupInDeclContext(Category->getImplementation(), Result,
QualifiedNameLookup, /*InBaseClass=*/true);
}
}
}
void lookupInScope(Scope *S, LookupResult &Result,
UnqualUsingDirectiveSet &UDirs) {
// No clients run in this mode and it's not supported. Please add tests and
// remove the assertion if you start relying on it.
assert(!IncludeDependentBases && "Unsupported flag for lookupInScope");
if (!S)
return;
if (!S->getEntity() ||
(!S->getParent() && !Visited.alreadyVisitedContext(S->getEntity())) ||
(S->getEntity())->isFunctionOrMethod()) {
FindLocalExternScope FindLocals(Result);
// Walk through the declarations in this Scope. The consumer might add new
// decls to the scope as part of deserialization, so make a copy first.
SmallVector<Decl *, 8> ScopeDecls(S->decls().begin(), S->decls().end());
for (Decl *D : ScopeDecls) {
if (NamedDecl *ND = dyn_cast<NamedDecl>(D))
if ((ND = Result.getAcceptableDecl(ND))) {
Consumer.FoundDecl(ND, Visited.checkHidden(ND), nullptr, false);
Visited.add(ND);
}
}
}
DeclContext *Entity = S->getLookupEntity();
if (Entity) {
// Look into this scope's declaration context, along with any of its
// parent lookup contexts (e.g., enclosing classes), up to the point
// where we hit the context stored in the next outer scope.
DeclContext *OuterCtx = findOuterContext(S);
for (DeclContext *Ctx = Entity; Ctx && !Ctx->Equals(OuterCtx);
Ctx = Ctx->getLookupParent()) {
if (ObjCMethodDecl *Method = dyn_cast<ObjCMethodDecl>(Ctx)) {
if (Method->isInstanceMethod()) {
// For instance methods, look for ivars in the method's interface.
LookupResult IvarResult(Result.getSema(), Result.getLookupName(),
Result.getNameLoc(),
Sema::LookupMemberName);
if (ObjCInterfaceDecl *IFace = Method->getClassInterface()) {
lookupInDeclContext(IFace, IvarResult,
/*QualifiedNameLookup=*/false,
/*InBaseClass=*/false);
}
}
// We've already performed all of the name lookup that we need
// to for Objective-C methods; the next context will be the
// outer scope.
break;
}
if (Ctx->isFunctionOrMethod())
continue;
lookupInDeclContext(Ctx, Result, /*QualifiedNameLookup=*/false,
/*InBaseClass=*/false);
}
} else if (!S->getParent()) {
// Look into the translation unit scope. We walk through the translation
// unit's declaration context, because the Scope itself won't have all of
// the declarations if we loaded a precompiled header.
// FIXME: We would like the translation unit's Scope object to point to
// the translation unit, so we don't need this special "if" branch.
// However, doing so would force the normal C++ name-lookup code to look
// into the translation unit decl when the IdentifierInfo chains would
// suffice. Once we fix that problem (which is part of a more general
// "don't look in DeclContexts unless we have to" optimization), we can
// eliminate this.
Entity = Result.getSema().Context.getTranslationUnitDecl();
lookupInDeclContext(Entity, Result, /*QualifiedNameLookup=*/false,
/*InBaseClass=*/false);
}
if (Entity) {
// Lookup visible declarations in any namespaces found by using
// directives.
for (const UnqualUsingEntry &UUE : UDirs.getNamespacesFor(Entity))
lookupInDeclContext(
const_cast<DeclContext *>(UUE.getNominatedNamespace()), Result,
/*QualifiedNameLookup=*/false,
/*InBaseClass=*/false);
}
// Lookup names in the parent scope.
ShadowContextRAII Shadow(Visited);
lookupInScope(S->getParent(), Result, UDirs);
}
private:
VisibleDeclsRecord Visited;
VisibleDeclConsumer &Consumer;
bool IncludeDependentBases;
bool LoadExternal;
};
} // namespace
void Sema::LookupVisibleDecls(Scope *S, LookupNameKind Kind,
VisibleDeclConsumer &Consumer,
bool IncludeGlobalScope, bool LoadExternal) {
LookupVisibleHelper H(Consumer, /*IncludeDependentBases=*/false,
LoadExternal);
H.lookupVisibleDecls(*this, S, Kind, IncludeGlobalScope);
}
void Sema::LookupVisibleDecls(DeclContext *Ctx, LookupNameKind Kind,
VisibleDeclConsumer &Consumer,
bool IncludeGlobalScope,
bool IncludeDependentBases, bool LoadExternal) {
LookupVisibleHelper H(Consumer, IncludeDependentBases, LoadExternal);
H.lookupVisibleDecls(*this, Ctx, Kind, IncludeGlobalScope);
}
/// LookupOrCreateLabel - Do a name lookup of a label with the specified name.
/// If GnuLabelLoc is a valid source location, then this is a definition
/// of an __label__ label name, otherwise it is a normal label definition
/// or use.
LabelDecl *Sema::LookupOrCreateLabel(IdentifierInfo *II, SourceLocation Loc,
SourceLocation GnuLabelLoc) {
// Do a lookup to see if we have a label with this name already.
NamedDecl *Res = nullptr;
if (GnuLabelLoc.isValid()) {
// Local label definitions always shadow existing labels.
Res = LabelDecl::Create(Context, CurContext, Loc, II, GnuLabelLoc);
Scope *S = CurScope;
PushOnScopeChains(Res, S, true);
return cast<LabelDecl>(Res);
}
// Not a GNU local label.
Res = LookupSingleName(CurScope, II, Loc, LookupLabel, NotForRedeclaration);
// If we found a label, check to see if it is in the same context as us.
// When in a Block, we don't want to reuse a label in an enclosing function.
if (Res && Res->getDeclContext() != CurContext)
Res = nullptr;
if (!Res) {
// If not forward referenced or defined already, create the backing decl.
Res = LabelDecl::Create(Context, CurContext, Loc, II);
Scope *S = CurScope->getFnParent();
assert(S && "Not in a function?");
PushOnScopeChains(Res, S, true);
}
return cast<LabelDecl>(Res);
}
//===----------------------------------------------------------------------===//
// Typo correction
//===----------------------------------------------------------------------===//
static bool isCandidateViable(CorrectionCandidateCallback &CCC,
TypoCorrection &Candidate) {
Candidate.setCallbackDistance(CCC.RankCandidate(Candidate));
return Candidate.getEditDistance(false) != TypoCorrection::InvalidDistance;
}
static void LookupPotentialTypoResult(Sema &SemaRef,
LookupResult &Res,
IdentifierInfo *Name,
Scope *S, CXXScopeSpec *SS,
DeclContext *MemberContext,
bool EnteringContext,
bool isObjCIvarLookup,
bool FindHidden);
/// Check whether the declarations found for a typo correction are
/// visible. Set the correction's RequiresImport flag to true if none of the
/// declarations are visible, false otherwise.
static void checkCorrectionVisibility(Sema &SemaRef, TypoCorrection &TC) {
TypoCorrection::decl_iterator DI = TC.begin(), DE = TC.end();
for (/**/; DI != DE; ++DI)
if (!LookupResult::isVisible(SemaRef, *DI))
break;
// No filtering needed if all decls are visible.
if (DI == DE) {
TC.setRequiresImport(false);
return;
}
llvm::SmallVector<NamedDecl*, 4> NewDecls(TC.begin(), DI);
bool AnyVisibleDecls = !NewDecls.empty();
for (/**/; DI != DE; ++DI) {
if (LookupResult::isVisible(SemaRef, *DI)) {
if (!AnyVisibleDecls) {
// Found a visible decl, discard all hidden ones.
AnyVisibleDecls = true;
NewDecls.clear();
}
NewDecls.push_back(*DI);
} else if (!AnyVisibleDecls && !(*DI)->isModulePrivate())
NewDecls.push_back(*DI);
}
if (NewDecls.empty())
TC = TypoCorrection();
else {
TC.setCorrectionDecls(NewDecls);
TC.setRequiresImport(!AnyVisibleDecls);
}
}
// Fill the supplied vector with the IdentifierInfo pointers for each piece of
// the given NestedNameSpecifier (i.e. given a NestedNameSpecifier "foo::bar::",
// fill the vector with the IdentifierInfo pointers for "foo" and "bar").
static void getNestedNameSpecifierIdentifiers(
NestedNameSpecifier *NNS,
SmallVectorImpl<const IdentifierInfo*> &Identifiers) {
if (NestedNameSpecifier *Prefix = NNS->getPrefix())
getNestedNameSpecifierIdentifiers(Prefix, Identifiers);
else
Identifiers.clear();
const IdentifierInfo *II = nullptr;
switch (NNS->getKind()) {
case NestedNameSpecifier::Identifier:
II = NNS->getAsIdentifier();
break;
case NestedNameSpecifier::Namespace:
if (NNS->getAsNamespace()->isAnonymousNamespace())
return;
II = NNS->getAsNamespace()->getIdentifier();
break;
case NestedNameSpecifier::NamespaceAlias:
II = NNS->getAsNamespaceAlias()->getIdentifier();
break;
case NestedNameSpecifier::TypeSpecWithTemplate:
case NestedNameSpecifier::TypeSpec:
II = QualType(NNS->getAsType(), 0).getBaseTypeIdentifier();
break;
case NestedNameSpecifier::Global:
case NestedNameSpecifier::Super:
return;
}
if (II)
Identifiers.push_back(II);
}
void TypoCorrectionConsumer::FoundDecl(NamedDecl *ND, NamedDecl *Hiding,
DeclContext *Ctx, bool InBaseClass) {
// Don't consider hidden names for typo correction.
if (Hiding)
return;
// Only consider entities with identifiers for names, ignoring
// special names (constructors, overloaded operators, selectors,
// etc.).
IdentifierInfo *Name = ND->getIdentifier();
if (!Name)
return;
// Only consider visible declarations and declarations from modules with
// names that exactly match.
if (!LookupResult::isVisible(SemaRef, ND) && Name != Typo)
return;
FoundName(Name->getName());
}
void TypoCorrectionConsumer::FoundName(StringRef Name) {
// Compute the edit distance between the typo and the name of this
// entity, and add the identifier to the list of results.
addName(Name, nullptr);
}
void TypoCorrectionConsumer::addKeywordResult(StringRef Keyword) {
// Compute the edit distance between the typo and this keyword,
// and add the keyword to the list of results.
addName(Keyword, nullptr, nullptr, true);
}
void TypoCorrectionConsumer::addName(StringRef Name, NamedDecl *ND,
NestedNameSpecifier *NNS, bool isKeyword) {
// Use a simple length-based heuristic to determine the minimum possible
// edit distance. If the minimum isn't good enough, bail out early.
StringRef TypoStr = Typo->getName();
unsigned MinED = abs((int)Name.size() - (int)TypoStr.size());
if (MinED && TypoStr.size() / MinED < 3)
return;
// Compute an upper bound on the allowable edit distance, so that the
// edit-distance algorithm can short-circuit.
unsigned UpperBound = (TypoStr.size() + 2) / 3;
unsigned ED = TypoStr.edit_distance(Name, true, UpperBound);
if (ED > UpperBound) return;
TypoCorrection TC(&SemaRef.Context.Idents.get(Name), ND, NNS, ED);
if (isKeyword) TC.makeKeyword();
TC.setCorrectionRange(nullptr, Result.getLookupNameInfo());
addCorrection(TC);
}
static const unsigned MaxTypoDistanceResultSets = 5;
void TypoCorrectionConsumer::addCorrection(TypoCorrection Correction) {
StringRef TypoStr = Typo->getName();
StringRef Name = Correction.getCorrectionAsIdentifierInfo()->getName();
// For very short typos, ignore potential corrections that have a different
// base identifier from the typo or which have a normalized edit distance
// longer than the typo itself.
if (TypoStr.size() < 3 &&
(Name != TypoStr || Correction.getEditDistance(true) > TypoStr.size()))
return;
// If the correction is resolved but is not viable, ignore it.
if (Correction.isResolved()) {
checkCorrectionVisibility(SemaRef, Correction);
if (!Correction || !isCandidateViable(*CorrectionValidator, Correction))
return;
}
TypoResultList &CList =
CorrectionResults[Correction.getEditDistance(false)][Name];
if (!CList.empty() && !CList.back().isResolved())
CList.pop_back();
if (NamedDecl *NewND = Correction.getCorrectionDecl()) {
auto RI = llvm::find_if(CList, [NewND](const TypoCorrection &TypoCorr) {
return TypoCorr.getCorrectionDecl() == NewND;
});
if (RI != CList.end()) {
// The Correction refers to a decl already in the list. No insertion is
// necessary and all further cases will return.
auto IsDeprecated = [](Decl *D) {
while (D) {
if (D->isDeprecated())
return true;
D = llvm::dyn_cast_or_null<NamespaceDecl>(D->getDeclContext());
}
return false;
};
// Prefer non deprecated Corrections over deprecated and only then
// sort using an alphabetical order.
std::pair<bool, std::string> NewKey = {
IsDeprecated(Correction.getFoundDecl()),
Correction.getAsString(SemaRef.getLangOpts())};
std::pair<bool, std::string> PrevKey = {
IsDeprecated(RI->getFoundDecl()),
RI->getAsString(SemaRef.getLangOpts())};
if (NewKey < PrevKey)
*RI = Correction;
return;
}
}
if (CList.empty() || Correction.isResolved())
CList.push_back(Correction);
while (CorrectionResults.size() > MaxTypoDistanceResultSets)
CorrectionResults.erase(std::prev(CorrectionResults.end()));
}
void TypoCorrectionConsumer::addNamespaces(
const llvm::MapVector<NamespaceDecl *, bool> &KnownNamespaces) {
SearchNamespaces = true;
for (auto KNPair : KnownNamespaces)
Namespaces.addNameSpecifier(KNPair.first);
bool SSIsTemplate = false;
if (NestedNameSpecifier *NNS =
(SS && SS->isValid()) ? SS->getScopeRep() : nullptr) {
if (const Type *T = NNS->getAsType())
SSIsTemplate = T->getTypeClass() == Type::TemplateSpecialization;
}
// Do not transform this into an iterator-based loop. The loop body can
// trigger the creation of further types (through lazy deserialization) and
// invalid iterators into this list.
auto &Types = SemaRef.getASTContext().getTypes();
for (unsigned I = 0; I != Types.size(); ++I) {
const auto *TI = Types[I];
if (CXXRecordDecl *CD = TI->getAsCXXRecordDecl()) {
CD = CD->getCanonicalDecl();
if (!CD->isDependentType() && !CD->isAnonymousStructOrUnion() &&
!CD->isUnion() && CD->getIdentifier() &&
(SSIsTemplate || !isa<ClassTemplateSpecializationDecl>(CD)) &&
(CD->isBeingDefined() || CD->isCompleteDefinition()))
Namespaces.addNameSpecifier(CD);
}
}
}
const TypoCorrection &TypoCorrectionConsumer::getNextCorrection() {
if (++CurrentTCIndex < ValidatedCorrections.size())
return ValidatedCorrections[CurrentTCIndex];
CurrentTCIndex = ValidatedCorrections.size();
while (!CorrectionResults.empty()) {
auto DI = CorrectionResults.begin();
if (DI->second.empty()) {
CorrectionResults.erase(DI);
continue;
}
auto RI = DI->second.begin();
if (RI->second.empty()) {
DI->second.erase(RI);
performQualifiedLookups();
continue;
}
TypoCorrection TC = RI->second.pop_back_val();
if (TC.isResolved() || TC.requiresImport() || resolveCorrection(TC)) {
ValidatedCorrections.push_back(TC);
return ValidatedCorrections[CurrentTCIndex];
}
}
return ValidatedCorrections[0]; // The empty correction.
}
bool TypoCorrectionConsumer::resolveCorrection(TypoCorrection &Candidate) {
IdentifierInfo *Name = Candidate.getCorrectionAsIdentifierInfo();
DeclContext *TempMemberContext = MemberContext;
CXXScopeSpec *TempSS = SS.get();
retry_lookup:
LookupPotentialTypoResult(SemaRef, Result, Name, S, TempSS, TempMemberContext,
EnteringContext,
CorrectionValidator->IsObjCIvarLookup,
Name == Typo && !Candidate.WillReplaceSpecifier());
switch (Result.getResultKind()) {
case LookupResult::NotFound:
case LookupResult::NotFoundInCurrentInstantiation:
case LookupResult::FoundUnresolvedValue:
if (TempSS) {
// Immediately retry the lookup without the given CXXScopeSpec
TempSS = nullptr;
Candidate.WillReplaceSpecifier(true);
goto retry_lookup;
}
if (TempMemberContext) {
if (SS && !TempSS)
TempSS = SS.get();
TempMemberContext = nullptr;
goto retry_lookup;
}
if (SearchNamespaces)
QualifiedResults.push_back(Candidate);
break;
case LookupResult::Ambiguous:
// We don't deal with ambiguities.
break;
case LookupResult::Found:
case LookupResult::FoundOverloaded:
// Store all of the Decls for overloaded symbols
for (auto *TRD : Result)
Candidate.addCorrectionDecl(TRD);
checkCorrectionVisibility(SemaRef, Candidate);
if (!isCandidateViable(*CorrectionValidator, Candidate)) {
if (SearchNamespaces)
QualifiedResults.push_back(Candidate);
break;
}
Candidate.setCorrectionRange(SS.get(), Result.getLookupNameInfo());
return true;
}
return false;
}
void TypoCorrectionConsumer::performQualifiedLookups() {
unsigned TypoLen = Typo->getName().size();
for (const TypoCorrection &QR : QualifiedResults) {
for (const auto &NSI : Namespaces) {
DeclContext *Ctx = NSI.DeclCtx;
const Type *NSType = NSI.NameSpecifier->getAsType();
// If the current NestedNameSpecifier refers to a class and the
// current correction candidate is the name of that class, then skip
// it as it is unlikely a qualified version of the class' constructor
// is an appropriate correction.
if (CXXRecordDecl *NSDecl = NSType ? NSType->getAsCXXRecordDecl() :
nullptr) {
if (NSDecl->getIdentifier() == QR.getCorrectionAsIdentifierInfo())
continue;
}
TypoCorrection TC(QR);
TC.ClearCorrectionDecls();
TC.setCorrectionSpecifier(NSI.NameSpecifier);
TC.setQualifierDistance(NSI.EditDistance);
TC.setCallbackDistance(0); // Reset the callback distance
// If the current correction candidate and namespace combination are
// too far away from the original typo based on the normalized edit
// distance, then skip performing a qualified name lookup.
unsigned TmpED = TC.getEditDistance(true);
if (QR.getCorrectionAsIdentifierInfo() != Typo && TmpED &&
TypoLen / TmpED < 3)
continue;
Result.clear();
Result.setLookupName(QR.getCorrectionAsIdentifierInfo());
if (!SemaRef.LookupQualifiedName(Result, Ctx))
continue;
// Any corrections added below will be validated in subsequent
// iterations of the main while() loop over the Consumer's contents.
switch (Result.getResultKind()) {
case LookupResult::Found:
case LookupResult::FoundOverloaded: {
if (SS && SS->isValid()) {
std::string NewQualified = TC.getAsString(SemaRef.getLangOpts());
std::string OldQualified;
llvm::raw_string_ostream OldOStream(OldQualified);
SS->getScopeRep()->print(OldOStream, SemaRef.getPrintingPolicy());
OldOStream << Typo->getName();
// If correction candidate would be an identical written qualified
// identifier, then the existing CXXScopeSpec probably included a
// typedef that didn't get accounted for properly.
if (OldOStream.str() == NewQualified)
break;
}
for (LookupResult::iterator TRD = Result.begin(), TRDEnd = Result.end();
TRD != TRDEnd; ++TRD) {
if (SemaRef.CheckMemberAccess(TC.getCorrectionRange().getBegin(),
NSType ? NSType->getAsCXXRecordDecl()
: nullptr,
TRD.getPair()) == Sema::AR_accessible)
TC.addCorrectionDecl(*TRD);
}
if (TC.isResolved()) {
TC.setCorrectionRange(SS.get(), Result.getLookupNameInfo());
addCorrection(TC);
}
break;
}
case LookupResult::NotFound:
case LookupResult::NotFoundInCurrentInstantiation:
case LookupResult::Ambiguous:
case LookupResult::FoundUnresolvedValue:
break;
}
}
}
QualifiedResults.clear();
}
TypoCorrectionConsumer::NamespaceSpecifierSet::NamespaceSpecifierSet(
ASTContext &Context, DeclContext *CurContext, CXXScopeSpec *CurScopeSpec)
: Context(Context), CurContextChain(buildContextChain(CurContext)) {
if (NestedNameSpecifier *NNS =
CurScopeSpec ? CurScopeSpec->getScopeRep() : nullptr) {
llvm::raw_string_ostream SpecifierOStream(CurNameSpecifier);
NNS->print(SpecifierOStream, Context.getPrintingPolicy());
getNestedNameSpecifierIdentifiers(NNS, CurNameSpecifierIdentifiers);
}
// Build the list of identifiers that would be used for an absolute
// (from the global context) NestedNameSpecifier referring to the current
// context.
for (DeclContext *C : llvm::reverse(CurContextChain)) {
if (auto *ND = dyn_cast_or_null<NamespaceDecl>(C))
CurContextIdentifiers.push_back(ND->getIdentifier());
}
// Add the global context as a NestedNameSpecifier
SpecifierInfo SI = {cast<DeclContext>(Context.getTranslationUnitDecl()),
NestedNameSpecifier::GlobalSpecifier(Context), 1};
DistanceMap[1].push_back(SI);
}
auto TypoCorrectionConsumer::NamespaceSpecifierSet::buildContextChain(
DeclContext *Start) -> DeclContextList {
assert(Start && "Building a context chain from a null context");
DeclContextList Chain;
for (DeclContext *DC = Start->getPrimaryContext(); DC != nullptr;
DC = DC->getLookupParent()) {
NamespaceDecl *ND = dyn_cast_or_null<NamespaceDecl>(DC);
if (!DC->isInlineNamespace() && !DC->isTransparentContext() &&
!(ND && ND->isAnonymousNamespace()))
Chain.push_back(DC->getPrimaryContext());
}
return Chain;
}
unsigned
TypoCorrectionConsumer::NamespaceSpecifierSet::buildNestedNameSpecifier(
DeclContextList &DeclChain, NestedNameSpecifier *&NNS) {
unsigned NumSpecifiers = 0;
for (DeclContext *C : llvm::reverse(DeclChain)) {
if (auto *ND = dyn_cast_or_null<NamespaceDecl>(C)) {
NNS = NestedNameSpecifier::Create(Context, NNS, ND);
++NumSpecifiers;
} else if (auto *RD = dyn_cast_or_null<RecordDecl>(C)) {
NNS = NestedNameSpecifier::Create(Context, NNS, RD->isTemplateDecl(),
RD->getTypeForDecl());
++NumSpecifiers;
}
}
return NumSpecifiers;
}
void TypoCorrectionConsumer::NamespaceSpecifierSet::addNameSpecifier(
DeclContext *Ctx) {
NestedNameSpecifier *NNS = nullptr;
unsigned NumSpecifiers = 0;
DeclContextList NamespaceDeclChain(buildContextChain(Ctx));
DeclContextList FullNamespaceDeclChain(NamespaceDeclChain);
// Eliminate common elements from the two DeclContext chains.
for (DeclContext *C : llvm::reverse(CurContextChain)) {
if (NamespaceDeclChain.empty() || NamespaceDeclChain.back() != C)
break;
NamespaceDeclChain.pop_back();
}
// Build the NestedNameSpecifier from what is left of the NamespaceDeclChain
NumSpecifiers = buildNestedNameSpecifier(NamespaceDeclChain, NNS);
// Add an explicit leading '::' specifier if needed.
if (NamespaceDeclChain.empty()) {
// Rebuild the NestedNameSpecifier as a globally-qualified specifier.
NNS = NestedNameSpecifier::GlobalSpecifier(Context);
NumSpecifiers =
buildNestedNameSpecifier(FullNamespaceDeclChain, NNS);
} else if (NamedDecl *ND =
dyn_cast_or_null<NamedDecl>(NamespaceDeclChain.back())) {
IdentifierInfo *Name = ND->getIdentifier();
bool SameNameSpecifier = false;
if (llvm::is_contained(CurNameSpecifierIdentifiers, Name)) {
std::string NewNameSpecifier;
llvm::raw_string_ostream SpecifierOStream(NewNameSpecifier);
SmallVector<const IdentifierInfo *, 4> NewNameSpecifierIdentifiers;
getNestedNameSpecifierIdentifiers(NNS, NewNameSpecifierIdentifiers);
NNS->print(SpecifierOStream, Context.getPrintingPolicy());
SpecifierOStream.flush();
SameNameSpecifier = NewNameSpecifier == CurNameSpecifier;
}
if (SameNameSpecifier || llvm::is_contained(CurContextIdentifiers, Name)) {
// Rebuild the NestedNameSpecifier as a globally-qualified specifier.
NNS = NestedNameSpecifier::GlobalSpecifier(Context);
NumSpecifiers =
buildNestedNameSpecifier(FullNamespaceDeclChain, NNS);
}
}
// If the built NestedNameSpecifier would be replacing an existing
// NestedNameSpecifier, use the number of component identifiers that
// would need to be changed as the edit distance instead of the number
// of components in the built NestedNameSpecifier.
if (NNS && !CurNameSpecifierIdentifiers.empty()) {
SmallVector<const IdentifierInfo*, 4> NewNameSpecifierIdentifiers;
getNestedNameSpecifierIdentifiers(NNS, NewNameSpecifierIdentifiers);
NumSpecifiers = llvm::ComputeEditDistance(
llvm::makeArrayRef(CurNameSpecifierIdentifiers),
llvm::makeArrayRef(NewNameSpecifierIdentifiers));
}
SpecifierInfo SI = {Ctx, NNS, NumSpecifiers};
DistanceMap[NumSpecifiers].push_back(SI);
}
/// Perform name lookup for a possible result for typo correction.
static void LookupPotentialTypoResult(Sema &SemaRef,
LookupResult &Res,
IdentifierInfo *Name,
Scope *S, CXXScopeSpec *SS,
DeclContext *MemberContext,
bool EnteringContext,
bool isObjCIvarLookup,
bool FindHidden) {
Res.suppressDiagnostics();
Res.clear();
Res.setLookupName(Name);
Res.setAllowHidden(FindHidden);
if (MemberContext) {
if (ObjCInterfaceDecl *Class = dyn_cast<ObjCInterfaceDecl>(MemberContext)) {
if (isObjCIvarLookup) {
if (ObjCIvarDecl *Ivar = Class->lookupInstanceVariable(Name)) {
Res.addDecl(Ivar);
Res.resolveKind();
return;
}
}
if (ObjCPropertyDecl *Prop = Class->FindPropertyDeclaration(
Name, ObjCPropertyQueryKind::OBJC_PR_query_instance)) {
Res.addDecl(Prop);
Res.resolveKind();
return;
}
}
SemaRef.LookupQualifiedName(Res, MemberContext);
return;
}
SemaRef.LookupParsedName(Res, S, SS, /*AllowBuiltinCreation=*/false,
EnteringContext);
// Fake ivar lookup; this should really be part of
// LookupParsedName.
if (ObjCMethodDecl *Method = SemaRef.getCurMethodDecl()) {
if (Method->isInstanceMethod() && Method->getClassInterface() &&
(Res.empty() ||
(Res.isSingleResult() &&
Res.getFoundDecl()->isDefinedOutsideFunctionOrMethod()))) {
if (ObjCIvarDecl *IV
= Method->getClassInterface()->lookupInstanceVariable(Name)) {
Res.addDecl(IV);
Res.resolveKind();
}
}
}
}
/// Add keywords to the consumer as possible typo corrections.
static void AddKeywordsToConsumer(Sema &SemaRef,
TypoCorrectionConsumer &Consumer,
Scope *S, CorrectionCandidateCallback &CCC,
bool AfterNestedNameSpecifier) {
if (AfterNestedNameSpecifier) {
// For 'X::', we know exactly which keywords can appear next.
Consumer.addKeywordResult("template");
if (CCC.WantExpressionKeywords)
Consumer.addKeywordResult("operator");
return;
}
if (CCC.WantObjCSuper)
Consumer.addKeywordResult("super");
if (CCC.WantTypeSpecifiers) {
// Add type-specifier keywords to the set of results.
static const char *const CTypeSpecs[] = {
"char", "const", "double", "enum", "float", "int", "long", "short",
"signed", "struct", "union", "unsigned", "void", "volatile",
"_Complex", "_Imaginary",
// storage-specifiers as well
"extern", "inline", "static", "typedef"
};
const unsigned NumCTypeSpecs = llvm::array_lengthof(CTypeSpecs);
for (unsigned I = 0; I != NumCTypeSpecs; ++I)
Consumer.addKeywordResult(CTypeSpecs[I]);
if (SemaRef.getLangOpts().C99)
Consumer.addKeywordResult("restrict");
if (SemaRef.getLangOpts().Bool || SemaRef.getLangOpts().CPlusPlus)
Consumer.addKeywordResult("bool");
else if (SemaRef.getLangOpts().C99)
Consumer.addKeywordResult("_Bool");
if (SemaRef.getLangOpts().CPlusPlus) {
Consumer.addKeywordResult("class");
Consumer.addKeywordResult("typename");
Consumer.addKeywordResult("wchar_t");
if (SemaRef.getLangOpts().CPlusPlus11) {
Consumer.addKeywordResult("char16_t");
Consumer.addKeywordResult("char32_t");
Consumer.addKeywordResult("constexpr");
Consumer.addKeywordResult("decltype");
Consumer.addKeywordResult("thread_local");
}
}
if (SemaRef.getLangOpts().GNUKeywords)
Consumer.addKeywordResult("typeof");
} else if (CCC.WantFunctionLikeCasts) {
static const char *const CastableTypeSpecs[] = {
"char", "double", "float", "int", "long", "short",
"signed", "unsigned", "void"
};
for (auto *kw : CastableTypeSpecs)
Consumer.addKeywordResult(kw);
}
if (CCC.WantCXXNamedCasts && SemaRef.getLangOpts().CPlusPlus) {
Consumer.addKeywordResult("const_cast");
Consumer.addKeywordResult("dynamic_cast");
Consumer.addKeywordResult("reinterpret_cast");
Consumer.addKeywordResult("static_cast");
}
if (CCC.WantExpressionKeywords) {
Consumer.addKeywordResult("sizeof");
if (SemaRef.getLangOpts().Bool || SemaRef.getLangOpts().CPlusPlus) {
Consumer.addKeywordResult("false");
Consumer.addKeywordResult("true");
}
if (SemaRef.getLangOpts().CPlusPlus) {
static const char *const CXXExprs[] = {
"delete", "new", "operator", "throw", "typeid"
};
const unsigned NumCXXExprs = llvm::array_lengthof(CXXExprs);
for (unsigned I = 0; I != NumCXXExprs; ++I)
Consumer.addKeywordResult(CXXExprs[I]);
if (isa<CXXMethodDecl>(SemaRef.CurContext) &&
cast<CXXMethodDecl>(SemaRef.CurContext)->isInstance())
Consumer.addKeywordResult("this");
if (SemaRef.getLangOpts().CPlusPlus11) {
Consumer.addKeywordResult("alignof");
Consumer.addKeywordResult("nullptr");
}
}
if (SemaRef.getLangOpts().C11) {
// FIXME: We should not suggest _Alignof if the alignof macro
// is present.
Consumer.addKeywordResult("_Alignof");
}
}
if (CCC.WantRemainingKeywords) {
if (SemaRef.getCurFunctionOrMethodDecl() || SemaRef.getCurBlock()) {
// Statements.
static const char *const CStmts[] = {
"do", "else", "for", "goto", "if", "return", "switch", "while" };
const unsigned NumCStmts = llvm::array_lengthof(CStmts);
for (unsigned I = 0; I != NumCStmts; ++I)
Consumer.addKeywordResult(CStmts[I]);
if (SemaRef.getLangOpts().CPlusPlus) {
Consumer.addKeywordResult("catch");
Consumer.addKeywordResult("try");
}
if (S && S->getBreakParent())
Consumer.addKeywordResult("break");
if (S && S->getContinueParent())
Consumer.addKeywordResult("continue");
if (SemaRef.getCurFunction() &&
!SemaRef.getCurFunction()->SwitchStack.empty()) {
Consumer.addKeywordResult("case");
Consumer.addKeywordResult("default");
}
} else {
if (SemaRef.getLangOpts().CPlusPlus) {
Consumer.addKeywordResult("namespace");
Consumer.addKeywordResult("template");
}
if (S && S->isClassScope()) {
Consumer.addKeywordResult("explicit");
Consumer.addKeywordResult("friend");
Consumer.addKeywordResult("mutable");
Consumer.addKeywordResult("private");
Consumer.addKeywordResult("protected");
Consumer.addKeywordResult("public");
Consumer.addKeywordResult("virtual");
}
}
if (SemaRef.getLangOpts().CPlusPlus) {
Consumer.addKeywordResult("using");
if (SemaRef.getLangOpts().CPlusPlus11)
Consumer.addKeywordResult("static_assert");
}
}
}
std::unique_ptr<TypoCorrectionConsumer> Sema::makeTypoCorrectionConsumer(
const DeclarationNameInfo &TypoName, Sema::LookupNameKind LookupKind,
Scope *S, CXXScopeSpec *SS, CorrectionCandidateCallback &CCC,
DeclContext *MemberContext, bool EnteringContext,
const ObjCObjectPointerType *OPT, bool ErrorRecovery) {
if (Diags.hasFatalErrorOccurred() || !getLangOpts().SpellChecking ||
DisableTypoCorrection)
return nullptr;
// In Microsoft mode, don't perform typo correction in a template member
// function dependent context because it interferes with the "lookup into
// dependent bases of class templates" feature.
if (getLangOpts().MSVCCompat && CurContext->isDependentContext() &&
isa<CXXMethodDecl>(CurContext))
return nullptr;
// We only attempt to correct typos for identifiers.
IdentifierInfo *Typo = TypoName.getName().getAsIdentifierInfo();
if (!Typo)
return nullptr;
// If the scope specifier itself was invalid, don't try to correct
// typos.
if (SS && SS->isInvalid())
return nullptr;
// Never try to correct typos during any kind of code synthesis.
if (!CodeSynthesisContexts.empty())
return nullptr;
// Don't try to correct 'super'.
if (S && S->isInObjcMethodScope() && Typo == getSuperIdentifier())
return nullptr;
// Abort if typo correction already failed for this specific typo.
IdentifierSourceLocations::iterator locs = TypoCorrectionFailures.find(Typo);
if (locs != TypoCorrectionFailures.end() &&
locs->second.count(TypoName.getLoc()))
return nullptr;
// Don't try to correct the identifier "vector" when in AltiVec mode.
// TODO: Figure out why typo correction misbehaves in this case, fix it, and
// remove this workaround.
if ((getLangOpts().AltiVec || getLangOpts().ZVector) && Typo->isStr("vector"))
return nullptr;
// Provide a stop gap for files that are just seriously broken. Trying
// to correct all typos can turn into a HUGE performance penalty, causing
// some files to take minutes to get rejected by the parser.
unsigned Limit = getDiagnostics().getDiagnosticOptions().SpellCheckingLimit;
if (Limit && TyposCorrected >= Limit)
return nullptr;
++TyposCorrected;
// If we're handling a missing symbol error, using modules, and the
// special search all modules option is used, look for a missing import.
if (ErrorRecovery && getLangOpts().Modules &&
getLangOpts().ModulesSearchAll) {
// The following has the side effect of loading the missing module.
getModuleLoader().lookupMissingImports(Typo->getName(),
TypoName.getBeginLoc());
}
// Extend the lifetime of the callback. We delayed this until here
// to avoid allocations in the hot path (which is where no typo correction
// occurs). Note that CorrectionCandidateCallback is polymorphic and
// initially stack-allocated.
std::unique_ptr<CorrectionCandidateCallback> ClonedCCC = CCC.clone();
auto Consumer = std::make_unique<TypoCorrectionConsumer>(
*this, TypoName, LookupKind, S, SS, std::move(ClonedCCC), MemberContext,
EnteringContext);
// Perform name lookup to find visible, similarly-named entities.
bool IsUnqualifiedLookup = false;
DeclContext *QualifiedDC = MemberContext;
if (MemberContext) {
LookupVisibleDecls(MemberContext, LookupKind, *Consumer);
// Look in qualified interfaces.
if (OPT) {
for (auto *I : OPT->quals())
LookupVisibleDecls(I, LookupKind, *Consumer);
}
} else if (SS && SS->isSet()) {
QualifiedDC = computeDeclContext(*SS, EnteringContext);
if (!QualifiedDC)
return nullptr;
LookupVisibleDecls(QualifiedDC, LookupKind, *Consumer);
} else {
IsUnqualifiedLookup = true;
}
// Determine whether we are going to search in the various namespaces for
// corrections.
bool SearchNamespaces
= getLangOpts().CPlusPlus &&
(IsUnqualifiedLookup || (SS && SS->isSet()));
if (IsUnqualifiedLookup || SearchNamespaces) {
// For unqualified lookup, look through all of the names that we have
// seen in this translation unit.
// FIXME: Re-add the ability to skip very unlikely potential corrections.
for (const auto &I : Context.Idents)
Consumer->FoundName(I.getKey());
// Walk through identifiers in external identifier sources.
// FIXME: Re-add the ability to skip very unlikely potential corrections.
if (IdentifierInfoLookup *External
= Context.Idents.getExternalIdentifierLookup()) {
std::unique_ptr<IdentifierIterator> Iter(External->getIdentifiers());
do {
StringRef Name = Iter->Next();
if (Name.empty())
break;
Consumer->FoundName(Name);
} while (true);
}
}
AddKeywordsToConsumer(*this, *Consumer, S,
*Consumer->getCorrectionValidator(),
SS && SS->isNotEmpty());
// Build the NestedNameSpecifiers for the KnownNamespaces, if we're going
// to search those namespaces.
if (SearchNamespaces) {
// Load any externally-known namespaces.
if (ExternalSource && !LoadedExternalKnownNamespaces) {
SmallVector<NamespaceDecl *, 4> ExternalKnownNamespaces;
LoadedExternalKnownNamespaces = true;
ExternalSource->ReadKnownNamespaces(ExternalKnownNamespaces);
for (auto *N : ExternalKnownNamespaces)
KnownNamespaces[N] = true;
}
Consumer->addNamespaces(KnownNamespaces);
}
return Consumer;
}
/// Try to "correct" a typo in the source code by finding
/// visible declarations whose names are similar to the name that was
/// present in the source code.
///
/// \param TypoName the \c DeclarationNameInfo structure that contains
/// the name that was present in the source code along with its location.
///
/// \param LookupKind the name-lookup criteria used to search for the name.
///
/// \param S the scope in which name lookup occurs.
///
/// \param SS the nested-name-specifier that precedes the name we're
/// looking for, if present.
///
/// \param CCC A CorrectionCandidateCallback object that provides further
/// validation of typo correction candidates. It also provides flags for
/// determining the set of keywords permitted.
///
/// \param MemberContext if non-NULL, the context in which to look for
/// a member access expression.
///
/// \param EnteringContext whether we're entering the context described by
/// the nested-name-specifier SS.
///
/// \param OPT when non-NULL, the search for visible declarations will
/// also walk the protocols in the qualified interfaces of \p OPT.
///
/// \returns a \c TypoCorrection containing the corrected name if the typo
/// along with information such as the \c NamedDecl where the corrected name
/// was declared, and any additional \c NestedNameSpecifier needed to access
/// it (C++ only). The \c TypoCorrection is empty if there is no correction.
TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName,
Sema::LookupNameKind LookupKind,
Scope *S, CXXScopeSpec *SS,
CorrectionCandidateCallback &CCC,
CorrectTypoKind Mode,
DeclContext *MemberContext,
bool EnteringContext,
const ObjCObjectPointerType *OPT,
bool RecordFailure) {
// Always let the ExternalSource have the first chance at correction, even
// if we would otherwise have given up.
if (ExternalSource) {
if (TypoCorrection Correction =
ExternalSource->CorrectTypo(TypoName, LookupKind, S, SS, CCC,
MemberContext, EnteringContext, OPT))
return Correction;
}
// Ugly hack equivalent to CTC == CTC_ObjCMessageReceiver;
// WantObjCSuper is only true for CTC_ObjCMessageReceiver and for
// some instances of CTC_Unknown, while WantRemainingKeywords is true
// for CTC_Unknown but not for CTC_ObjCMessageReceiver.
bool ObjCMessageReceiver = CCC.WantObjCSuper && !CCC.WantRemainingKeywords;
IdentifierInfo *Typo = TypoName.getName().getAsIdentifierInfo();
auto Consumer = makeTypoCorrectionConsumer(TypoName, LookupKind, S, SS, CCC,
MemberContext, EnteringContext,
OPT, Mode == CTK_ErrorRecovery);
if (!Consumer)
return TypoCorrection();
// If we haven't found anything, we're done.
if (Consumer->empty())
return FailedCorrection(Typo, TypoName.getLoc(), RecordFailure);
// Make sure the best edit distance (prior to adding any namespace qualifiers)
// is not more that about a third of the length of the typo's identifier.
unsigned ED = Consumer->getBestEditDistance(true);
unsigned TypoLen = Typo->getName().size();
if (ED > 0 && TypoLen / ED < 3)
return FailedCorrection(Typo, TypoName.getLoc(), RecordFailure);
TypoCorrection BestTC = Consumer->getNextCorrection();
TypoCorrection SecondBestTC = Consumer->getNextCorrection();
if (!BestTC)
return FailedCorrection(Typo, TypoName.getLoc(), RecordFailure);
ED = BestTC.getEditDistance();
if (TypoLen >= 3 && ED > 0 && TypoLen / ED < 3) {
// If this was an unqualified lookup and we believe the callback
// object wouldn't have filtered out possible corrections, note
// that no correction was found.
return FailedCorrection(Typo, TypoName.getLoc(), RecordFailure);
}
// If only a single name remains, return that result.
if (!SecondBestTC ||
SecondBestTC.getEditDistance(false) > BestTC.getEditDistance(false)) {
const TypoCorrection &Result = BestTC;
// Don't correct to a keyword that's the same as the typo; the keyword
// wasn't actually in scope.
if (ED == 0 && Result.isKeyword())
return FailedCorrection(Typo, TypoName.getLoc(), RecordFailure);
TypoCorrection TC = Result;
TC.setCorrectionRange(SS, TypoName);
checkCorrectionVisibility(*this, TC);
return TC;
} else if (SecondBestTC && ObjCMessageReceiver) {
// Prefer 'super' when we're completing in a message-receiver
// context.
if (BestTC.getCorrection().getAsString() != "super") {
if (SecondBestTC.getCorrection().getAsString() == "super")
BestTC = SecondBestTC;
else if ((*Consumer)["super"].front().isKeyword())
BestTC = (*Consumer)["super"].front();
}
// Don't correct to a keyword that's the same as the typo; the keyword
// wasn't actually in scope.
if (BestTC.getEditDistance() == 0 ||
BestTC.getCorrection().getAsString() != "super")
return FailedCorrection(Typo, TypoName.getLoc(), RecordFailure);
BestTC.setCorrectionRange(SS, TypoName);
return BestTC;
}
// Record the failure's location if needed and return an empty correction. If
// this was an unqualified lookup and we believe the callback object did not
// filter out possible corrections, also cache the failure for the typo.
return FailedCorrection(Typo, TypoName.getLoc(), RecordFailure && !SecondBestTC);
}
/// Try to "correct" a typo in the source code by finding
/// visible declarations whose names are similar to the name that was
/// present in the source code.
///
/// \param TypoName the \c DeclarationNameInfo structure that contains
/// the name that was present in the source code along with its location.
///
/// \param LookupKind the name-lookup criteria used to search for the name.
///
/// \param S the scope in which name lookup occurs.
///
/// \param SS the nested-name-specifier that precedes the name we're
/// looking for, if present.
///
/// \param CCC A CorrectionCandidateCallback object that provides further
/// validation of typo correction candidates. It also provides flags for
/// determining the set of keywords permitted.
///
/// \param TDG A TypoDiagnosticGenerator functor that will be used to print
/// diagnostics when the actual typo correction is attempted.
///
/// \param TRC A TypoRecoveryCallback functor that will be used to build an
/// Expr from a typo correction candidate.
///
/// \param MemberContext if non-NULL, the context in which to look for
/// a member access expression.
///
/// \param EnteringContext whether we're entering the context described by
/// the nested-name-specifier SS.
///
/// \param OPT when non-NULL, the search for visible declarations will
/// also walk the protocols in the qualified interfaces of \p OPT.
///
/// \returns a new \c TypoExpr that will later be replaced in the AST with an
/// Expr representing the result of performing typo correction, or nullptr if
/// typo correction is not possible. If nullptr is returned, no diagnostics will
/// be emitted and it is the responsibility of the caller to emit any that are
/// needed.
TypoExpr *Sema::CorrectTypoDelayed(
const DeclarationNameInfo &TypoName, Sema::LookupNameKind LookupKind,
Scope *S, CXXScopeSpec *SS, CorrectionCandidateCallback &CCC,
TypoDiagnosticGenerator TDG, TypoRecoveryCallback TRC, CorrectTypoKind Mode,
DeclContext *MemberContext, bool EnteringContext,
const ObjCObjectPointerType *OPT) {
auto Consumer = makeTypoCorrectionConsumer(TypoName, LookupKind, S, SS, CCC,
MemberContext, EnteringContext,
OPT, Mode == CTK_ErrorRecovery);
// Give the external sema source a chance to correct the typo.
TypoCorrection ExternalTypo;
if (ExternalSource && Consumer) {
ExternalTypo = ExternalSource->CorrectTypo(
TypoName, LookupKind, S, SS, *Consumer->getCorrectionValidator(),
MemberContext, EnteringContext, OPT);
if (ExternalTypo)
Consumer->addCorrection(ExternalTypo);
}
if (!Consumer || Consumer->empty())
return nullptr;
// Make sure the best edit distance (prior to adding any namespace qualifiers)
// is not more that about a third of the length of the typo's identifier.
unsigned ED = Consumer->getBestEditDistance(true);
IdentifierInfo *Typo = TypoName.getName().getAsIdentifierInfo();
if (!ExternalTypo && ED > 0 && Typo->getName().size() / ED < 3)
return nullptr;
ExprEvalContexts.back().NumTypos++;
return createDelayedTypo(std::move(Consumer), std::move(TDG), std::move(TRC),
TypoName.getLoc());
}
void TypoCorrection::addCorrectionDecl(NamedDecl *CDecl) {
if (!CDecl) return;
if (isKeyword())
CorrectionDecls.clear();
CorrectionDecls.push_back(CDecl);
if (!CorrectionName)
CorrectionName = CDecl->getDeclName();
}
std::string TypoCorrection::getAsString(const LangOptions &LO) const {
if (CorrectionNameSpec) {
std::string tmpBuffer;
llvm::raw_string_ostream PrefixOStream(tmpBuffer);
CorrectionNameSpec->print(PrefixOStream, PrintingPolicy(LO));
PrefixOStream << CorrectionName;
return PrefixOStream.str();
}
return CorrectionName.getAsString();
}
bool CorrectionCandidateCallback::ValidateCandidate(
const TypoCorrection &candidate) {
if (!candidate.isResolved())
return true;
if (candidate.isKeyword())
return WantTypeSpecifiers || WantExpressionKeywords || WantCXXNamedCasts ||
WantRemainingKeywords || WantObjCSuper;
bool HasNonType = false;
bool HasStaticMethod = false;
bool HasNonStaticMethod = false;
for (Decl *D : candidate) {
if (FunctionTemplateDecl *FTD = dyn_cast<FunctionTemplateDecl>(D))
D = FTD->getTemplatedDecl();
if (CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(D)) {
if (Method->isStatic())
HasStaticMethod = true;
else
HasNonStaticMethod = true;
}
if (!isa<TypeDecl>(D))
HasNonType = true;
}
if (IsAddressOfOperand && HasNonStaticMethod && !HasStaticMethod &&
!candidate.getCorrectionSpecifier())
return false;
return WantTypeSpecifiers || HasNonType;
}
FunctionCallFilterCCC::FunctionCallFilterCCC(Sema &SemaRef, unsigned NumArgs,
bool HasExplicitTemplateArgs,
MemberExpr *ME)
: NumArgs(NumArgs), HasExplicitTemplateArgs(HasExplicitTemplateArgs),
CurContext(SemaRef.CurContext), MemberFn(ME) {
WantTypeSpecifiers = false;
WantFunctionLikeCasts = SemaRef.getLangOpts().CPlusPlus &&
!HasExplicitTemplateArgs && NumArgs == 1;
WantCXXNamedCasts = HasExplicitTemplateArgs && NumArgs == 1;
WantRemainingKeywords = false;
}
bool FunctionCallFilterCCC::ValidateCandidate(const TypoCorrection &candidate) {
if (!candidate.getCorrectionDecl())
return candidate.isKeyword();
for (auto *C : candidate) {
FunctionDecl *FD = nullptr;
NamedDecl *ND = C->getUnderlyingDecl();
if (FunctionTemplateDecl *FTD = dyn_cast<FunctionTemplateDecl>(ND))
FD = FTD->getTemplatedDecl();
if (!HasExplicitTemplateArgs && !FD) {
if (!(FD = dyn_cast<FunctionDecl>(ND)) && isa<ValueDecl>(ND)) {
// If the Decl is neither a function nor a template function,
// determine if it is a pointer or reference to a function. If so,
// check against the number of arguments expected for the pointee.
QualType ValType = cast<ValueDecl>(ND)->getType();
if (ValType.isNull())
continue;
if (ValType->isAnyPointerType() || ValType->isReferenceType())
ValType = ValType->getPointeeType();
if (const FunctionProtoType *FPT = ValType->getAs<FunctionProtoType>())
if (FPT->getNumParams() == NumArgs)
return true;
}
}
// A typo for a function-style cast can look like a function call in C++.
if ((HasExplicitTemplateArgs ? getAsTypeTemplateDecl(ND) != nullptr
: isa<TypeDecl>(ND)) &&
CurContext->getParentASTContext().getLangOpts().CPlusPlus)
// Only a class or class template can take two or more arguments.
return NumArgs <= 1 || HasExplicitTemplateArgs || isa<CXXRecordDecl>(ND);
// Skip the current candidate if it is not a FunctionDecl or does not accept
// the current number of arguments.
if (!FD || !(FD->getNumParams() >= NumArgs &&
FD->getMinRequiredArguments() <= NumArgs))
continue;
// If the current candidate is a non-static C++ method, skip the candidate
// unless the method being corrected--or the current DeclContext, if the
// function being corrected is not a method--is a method in the same class
// or a descendent class of the candidate's parent class.
if (CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD)) {
if (MemberFn || !MD->isStatic()) {
CXXMethodDecl *CurMD =
MemberFn
? dyn_cast_or_null<CXXMethodDecl>(MemberFn->getMemberDecl())
: dyn_cast_or_null<CXXMethodDecl>(CurContext);
CXXRecordDecl *CurRD =
CurMD ? CurMD->getParent()->getCanonicalDecl() : nullptr;
CXXRecordDecl *RD = MD->getParent()->getCanonicalDecl();
if (!CurRD || (CurRD != RD && !CurRD->isDerivedFrom(RD)))
continue;
}
}
return true;
}
return false;
}
void Sema::diagnoseTypo(const TypoCorrection &Correction,
const PartialDiagnostic &TypoDiag,
bool ErrorRecovery) {
diagnoseTypo(Correction, TypoDiag, PDiag(diag::note_previous_decl),
ErrorRecovery);
}
/// Find which declaration we should import to provide the definition of
/// the given declaration.
static NamedDecl *getDefinitionToImport(NamedDecl *D) {
if (VarDecl *VD = dyn_cast<VarDecl>(D))
return VD->getDefinition();
if (FunctionDecl *FD = dyn_cast<FunctionDecl>(D))
return FD->getDefinition();
if (TagDecl *TD = dyn_cast<TagDecl>(D))
return TD->getDefinition();
if (ObjCInterfaceDecl *ID = dyn_cast<ObjCInterfaceDecl>(D))
return ID->getDefinition();
if (ObjCProtocolDecl *PD = dyn_cast<ObjCProtocolDecl>(D))
return PD->getDefinition();
if (TemplateDecl *TD = dyn_cast<TemplateDecl>(D))
if (NamedDecl *TTD = TD->getTemplatedDecl())
return getDefinitionToImport(TTD);
return nullptr;
}
void Sema::diagnoseMissingImport(SourceLocation Loc, NamedDecl *Decl,
MissingImportKind MIK, bool Recover) {
// Suggest importing a module providing the definition of this entity, if
// possible.
NamedDecl *Def = getDefinitionToImport(Decl);
if (!Def)
Def = Decl;
Module *Owner = getOwningModule(Def);
assert(Owner && "definition of hidden declaration is not in a module");
llvm::SmallVector<Module*, 8> OwningModules;
OwningModules.push_back(Owner);
auto Merged = Context.getModulesWithMergedDefinition(Def);
OwningModules.insert(OwningModules.end(), Merged.begin(), Merged.end());
diagnoseMissingImport(Loc, Def, Def->getLocation(), OwningModules, MIK,
Recover);
}
/// Get a "quoted.h" or <angled.h> include path to use in a diagnostic
/// suggesting the addition of a #include of the specified file.
static std::string getHeaderNameForHeader(Preprocessor &PP, const FileEntry *E,
llvm::StringRef IncludingFile) {
bool IsSystem = false;
auto Path = PP.getHeaderSearchInfo().suggestPathToFileForDiagnostics(
E, IncludingFile, &IsSystem);
return (IsSystem ? '<' : '"') + Path + (IsSystem ? '>' : '"');
}
void Sema::diagnoseMissingImport(SourceLocation UseLoc, NamedDecl *Decl,
SourceLocation DeclLoc,
ArrayRef<Module *> Modules,
MissingImportKind MIK, bool Recover) {
assert(!Modules.empty());
auto NotePrevious = [&] {
// FIXME: Suppress the note backtrace even under
// -fdiagnostics-show-note-include-stack. We don't care how this
// declaration was previously reached.
Diag(DeclLoc, diag::note_unreachable_entity) << (int)MIK;
};
// Weed out duplicates from module list.
llvm::SmallVector<Module*, 8> UniqueModules;
llvm::SmallDenseSet<Module*, 8> UniqueModuleSet;
for (auto *M : Modules) {
if (M->Kind == Module::GlobalModuleFragment)
continue;
if (UniqueModuleSet.insert(M).second)
UniqueModules.push_back(M);
}
// Try to find a suitable header-name to #include.
std::string HeaderName;
if (const FileEntry *Header =
PP.getHeaderToIncludeForDiagnostics(UseLoc, DeclLoc)) {
if (const FileEntry *FE =
SourceMgr.getFileEntryForID(SourceMgr.getFileID(UseLoc)))
HeaderName = getHeaderNameForHeader(PP, Header, FE->tryGetRealPathName());
}
// If we have a #include we should suggest, or if all definition locations
// were in global module fragments, don't suggest an import.
if (!HeaderName.empty() || UniqueModules.empty()) {
// FIXME: Find a smart place to suggest inserting a #include, and add
// a FixItHint there.
Diag(UseLoc, diag::err_module_unimported_use_header)
<< (int)MIK << Decl << !HeaderName.empty() << HeaderName;
// Produce a note showing where the entity was declared.
NotePrevious();
if (Recover)
createImplicitModuleImportForErrorRecovery(UseLoc, Modules[0]);
return;
}
Modules = UniqueModules;
if (Modules.size() > 1) {
std::string ModuleList;
unsigned N = 0;
for (Module *M : Modules) {
ModuleList += "\n ";
if (++N == 5 && N != Modules.size()) {
ModuleList += "[...]";
break;
}
ModuleList += M->getFullModuleName();
}
Diag(UseLoc, diag::err_module_unimported_use_multiple)
<< (int)MIK << Decl << ModuleList;
} else {
// FIXME: Add a FixItHint that imports the corresponding module.
Diag(UseLoc, diag::err_module_unimported_use)
<< (int)MIK << Decl << Modules[0]->getFullModuleName();
}
NotePrevious();
// Try to recover by implicitly importing this module.
if (Recover)
createImplicitModuleImportForErrorRecovery(UseLoc, Modules[0]);
}
/// Diagnose a successfully-corrected typo. Separated from the correction
/// itself to allow external validation of the result, etc.
///
/// \param Correction The result of performing typo correction.
/// \param TypoDiag The diagnostic to produce. This will have the corrected
/// string added to it (and usually also a fixit).
/// \param PrevNote A note to use when indicating the location of the entity to
/// which we are correcting. Will have the correction string added to it.
/// \param ErrorRecovery If \c true (the default), the caller is going to
/// recover from the typo as if the corrected string had been typed.
/// In this case, \c PDiag must be an error, and we will attach a fixit
/// to it.
void Sema::diagnoseTypo(const TypoCorrection &Correction,
const PartialDiagnostic &TypoDiag,
const PartialDiagnostic &PrevNote,
bool ErrorRecovery) {
std::string CorrectedStr = Correction.getAsString(getLangOpts());
std::string CorrectedQuotedStr = Correction.getQuoted(getLangOpts());
FixItHint FixTypo = FixItHint::CreateReplacement(
Correction.getCorrectionRange(), CorrectedStr);
// Maybe we're just missing a module import.
if (Correction.requiresImport()) {
NamedDecl *Decl = Correction.getFoundDecl();
assert(Decl && "import required but no declaration to import");
diagnoseMissingImport(Correction.getCorrectionRange().getBegin(), Decl,
MissingImportKind::Declaration, ErrorRecovery);
return;
}
Diag(Correction.getCorrectionRange().getBegin(), TypoDiag)
<< CorrectedQuotedStr << (ErrorRecovery ? FixTypo : FixItHint());
NamedDecl *ChosenDecl =
Correction.isKeyword() ? nullptr : Correction.getFoundDecl();
if (PrevNote.getDiagID() && ChosenDecl)
Diag(ChosenDecl->getLocation(), PrevNote)
<< CorrectedQuotedStr << (ErrorRecovery ? FixItHint() : FixTypo);
// Add any extra diagnostics.
for (const PartialDiagnostic &PD : Correction.getExtraDiagnostics())
Diag(Correction.getCorrectionRange().getBegin(), PD);
}
TypoExpr *Sema::createDelayedTypo(std::unique_ptr<TypoCorrectionConsumer> TCC,
TypoDiagnosticGenerator TDG,
TypoRecoveryCallback TRC,
SourceLocation TypoLoc) {
assert(TCC && "createDelayedTypo requires a valid TypoCorrectionConsumer");
auto TE = new (Context) TypoExpr(Context.DependentTy, TypoLoc);
auto &State = DelayedTypos[TE];
State.Consumer = std::move(TCC);
State.DiagHandler = std::move(TDG);
State.RecoveryHandler = std::move(TRC);
if (TE)
TypoExprs.push_back(TE);
return TE;
}
const Sema::TypoExprState &Sema::getTypoExprState(TypoExpr *TE) const {
auto Entry = DelayedTypos.find(TE);
assert(Entry != DelayedTypos.end() &&
"Failed to get the state for a TypoExpr!");
return Entry->second;
}
void Sema::clearDelayedTypo(TypoExpr *TE) {
DelayedTypos.erase(TE);
}
void Sema::ActOnPragmaDump(Scope *S, SourceLocation IILoc, IdentifierInfo *II) {
DeclarationNameInfo Name(II, IILoc);
LookupResult R(*this, Name, LookupAnyName, Sema::NotForRedeclaration);
R.suppressDiagnostics();
R.setHideTags(false);
LookupName(R, S);
R.dump();
}
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 3ab5d26a9a75..edcac4d2ee9a 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -1,9281 +1,9284 @@
//===--- SemaType.cpp - Semantic Analysis for Types -----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements type-related semantic analysis.
//
//===----------------------------------------------------------------------===//
#include "TypeLocBuilder.h"
#include "clang/AST/ASTConsumer.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/ASTMutationListener.h"
#include "clang/AST/ASTStructuralEquivalence.h"
#include "clang/AST/CXXInheritance.h"
#include "clang/AST/DeclObjC.h"
#include "clang/AST/DeclTemplate.h"
#include "clang/AST/Expr.h"
#include "clang/AST/TypeLoc.h"
#include "clang/AST/TypeLocVisitor.h"
#include "clang/Basic/PartialDiagnostic.h"
#include "clang/Basic/Specifiers.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Sema/DeclSpec.h"
#include "clang/Sema/DelayedDiagnostic.h"
#include "clang/Sema/Lookup.h"
#include "clang/Sema/ParsedTemplate.h"
#include "clang/Sema/ScopeInfo.h"
#include "clang/Sema/SemaInternal.h"
#include "clang/Sema/Template.h"
#include "clang/Sema/TemplateInstCallback.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/Support/ErrorHandling.h"
#include <bitset>
using namespace clang;
enum TypeDiagSelector {
TDS_Function,
TDS_Pointer,
TDS_ObjCObjOrBlock
};
/// isOmittedBlockReturnType - Return true if this declarator is missing a
/// return type because this is a omitted return type on a block literal.
static bool isOmittedBlockReturnType(const Declarator &D) {
if (D.getContext() != DeclaratorContext::BlockLiteral ||
D.getDeclSpec().hasTypeSpecifier())
return false;
if (D.getNumTypeObjects() == 0)
return true; // ^{ ... }
if (D.getNumTypeObjects() == 1 &&
D.getTypeObject(0).Kind == DeclaratorChunk::Function)
return true; // ^(int X, float Y) { ... }
return false;
}
/// diagnoseBadTypeAttribute - Diagnoses a type attribute which
/// doesn't apply to the given type.
static void diagnoseBadTypeAttribute(Sema &S, const ParsedAttr &attr,
QualType type) {
TypeDiagSelector WhichType;
bool useExpansionLoc = true;
switch (attr.getKind()) {
case ParsedAttr::AT_ObjCGC:
WhichType = TDS_Pointer;
break;
case ParsedAttr::AT_ObjCOwnership:
WhichType = TDS_ObjCObjOrBlock;
break;
default:
// Assume everything else was a function attribute.
WhichType = TDS_Function;
useExpansionLoc = false;
break;
}
SourceLocation loc = attr.getLoc();
StringRef name = attr.getAttrName()->getName();
// The GC attributes are usually written with macros; special-case them.
IdentifierInfo *II = attr.isArgIdent(0) ? attr.getArgAsIdent(0)->Ident
: nullptr;
if (useExpansionLoc && loc.isMacroID() && II) {
if (II->isStr("strong")) {
if (S.findMacroSpelling(loc, "__strong")) name = "__strong";
} else if (II->isStr("weak")) {
if (S.findMacroSpelling(loc, "__weak")) name = "__weak";
}
}
S.Diag(loc, diag::warn_type_attribute_wrong_type) << name << WhichType
<< type;
}
// objc_gc applies to Objective-C pointers or, otherwise, to the
// smallest available pointer type (i.e. 'void*' in 'void**').
#define OBJC_POINTER_TYPE_ATTRS_CASELIST \
case ParsedAttr::AT_ObjCGC: \
case ParsedAttr::AT_ObjCOwnership
// Calling convention attributes.
#define CALLING_CONV_ATTRS_CASELIST \
case ParsedAttr::AT_CDecl: \
case ParsedAttr::AT_FastCall: \
case ParsedAttr::AT_StdCall: \
case ParsedAttr::AT_ThisCall: \
case ParsedAttr::AT_RegCall: \
case ParsedAttr::AT_Pascal: \
case ParsedAttr::AT_SwiftCall: \
case ParsedAttr::AT_SwiftAsyncCall: \
case ParsedAttr::AT_VectorCall: \
case ParsedAttr::AT_AArch64VectorPcs: \
case ParsedAttr::AT_AArch64SVEPcs: \
case ParsedAttr::AT_AMDGPUKernelCall: \
case ParsedAttr::AT_MSABI: \
case ParsedAttr::AT_SysVABI: \
case ParsedAttr::AT_Pcs: \
case ParsedAttr::AT_IntelOclBicc: \
case ParsedAttr::AT_PreserveMost: \
case ParsedAttr::AT_PreserveAll
// Function type attributes.
#define FUNCTION_TYPE_ATTRS_CASELIST \
case ParsedAttr::AT_NSReturnsRetained: \
case ParsedAttr::AT_NoReturn: \
case ParsedAttr::AT_Regparm: \
case ParsedAttr::AT_CmseNSCall: \
case ParsedAttr::AT_AnyX86NoCallerSavedRegisters: \
case ParsedAttr::AT_AnyX86NoCfCheck: \
CALLING_CONV_ATTRS_CASELIST
// Microsoft-specific type qualifiers.
#define MS_TYPE_ATTRS_CASELIST \
case ParsedAttr::AT_Ptr32: \
case ParsedAttr::AT_Ptr64: \
case ParsedAttr::AT_SPtr: \
case ParsedAttr::AT_UPtr
// Nullability qualifiers.
#define NULLABILITY_TYPE_ATTRS_CASELIST \
case ParsedAttr::AT_TypeNonNull: \
case ParsedAttr::AT_TypeNullable: \
case ParsedAttr::AT_TypeNullableResult: \
case ParsedAttr::AT_TypeNullUnspecified
namespace {
/// An object which stores processing state for the entire
/// GetTypeForDeclarator process.
class TypeProcessingState {
Sema &sema;
/// The declarator being processed.
Declarator &declarator;
/// The index of the declarator chunk we're currently processing.
/// May be the total number of valid chunks, indicating the
/// DeclSpec.
unsigned chunkIndex;
/// The original set of attributes on the DeclSpec.
SmallVector<ParsedAttr *, 2> savedAttrs;
/// A list of attributes to diagnose the uselessness of when the
/// processing is complete.
SmallVector<ParsedAttr *, 2> ignoredTypeAttrs;
/// Attributes corresponding to AttributedTypeLocs that we have not yet
/// populated.
// FIXME: The two-phase mechanism by which we construct Types and fill
// their TypeLocs makes it hard to correctly assign these. We keep the
// attributes in creation order as an attempt to make them line up
// properly.
using TypeAttrPair = std::pair<const AttributedType*, const Attr*>;
SmallVector<TypeAttrPair, 8> AttrsForTypes;
bool AttrsForTypesSorted = true;
/// MacroQualifiedTypes mapping to macro expansion locations that will be
/// stored in a MacroQualifiedTypeLoc.
llvm::DenseMap<const MacroQualifiedType *, SourceLocation> LocsForMacros;
/// Flag to indicate we parsed a noderef attribute. This is used for
/// validating that noderef was used on a pointer or array.
bool parsedNoDeref;
public:
TypeProcessingState(Sema &sema, Declarator &declarator)
: sema(sema), declarator(declarator),
chunkIndex(declarator.getNumTypeObjects()), parsedNoDeref(false) {}
Sema &getSema() const {
return sema;
}
Declarator &getDeclarator() const {
return declarator;
}
bool isProcessingDeclSpec() const {
return chunkIndex == declarator.getNumTypeObjects();
}
unsigned getCurrentChunkIndex() const {
return chunkIndex;
}
void setCurrentChunkIndex(unsigned idx) {
assert(idx <= declarator.getNumTypeObjects());
chunkIndex = idx;
}
ParsedAttributesView &getCurrentAttributes() const {
if (isProcessingDeclSpec())
return getMutableDeclSpec().getAttributes();
return declarator.getTypeObject(chunkIndex).getAttrs();
}
/// Save the current set of attributes on the DeclSpec.
void saveDeclSpecAttrs() {
// Don't try to save them multiple times.
if (!savedAttrs.empty())
return;
DeclSpec &spec = getMutableDeclSpec();
llvm::append_range(savedAttrs,
llvm::make_pointer_range(spec.getAttributes()));
}
/// Record that we had nowhere to put the given type attribute.
/// We will diagnose such attributes later.
void addIgnoredTypeAttr(ParsedAttr &attr) {
ignoredTypeAttrs.push_back(&attr);
}
/// Diagnose all the ignored type attributes, given that the
/// declarator worked out to the given type.
void diagnoseIgnoredTypeAttrs(QualType type) const {
for (auto *Attr : ignoredTypeAttrs)
diagnoseBadTypeAttribute(getSema(), *Attr, type);
}
/// Get an attributed type for the given attribute, and remember the Attr
/// object so that we can attach it to the AttributedTypeLoc.
QualType getAttributedType(Attr *A, QualType ModifiedType,
QualType EquivType) {
QualType T =
sema.Context.getAttributedType(A->getKind(), ModifiedType, EquivType);
AttrsForTypes.push_back({cast<AttributedType>(T.getTypePtr()), A});
AttrsForTypesSorted = false;
return T;
}
/// Get a BTFTagAttributed type for the btf_type_tag attribute.
QualType getBTFTagAttributedType(const BTFTypeTagAttr *BTFAttr,
QualType WrappedType) {
return sema.Context.getBTFTagAttributedType(BTFAttr, WrappedType);
}
/// Completely replace the \c auto in \p TypeWithAuto by
/// \p Replacement. Also replace \p TypeWithAuto in \c TypeAttrPair if
/// necessary.
QualType ReplaceAutoType(QualType TypeWithAuto, QualType Replacement) {
QualType T = sema.ReplaceAutoType(TypeWithAuto, Replacement);
if (auto *AttrTy = TypeWithAuto->getAs<AttributedType>()) {
// Attributed type still should be an attributed type after replacement.
auto *NewAttrTy = cast<AttributedType>(T.getTypePtr());
for (TypeAttrPair &A : AttrsForTypes) {
if (A.first == AttrTy)
A.first = NewAttrTy;
}
AttrsForTypesSorted = false;
}
return T;
}
/// Extract and remove the Attr* for a given attributed type.
const Attr *takeAttrForAttributedType(const AttributedType *AT) {
if (!AttrsForTypesSorted) {
llvm::stable_sort(AttrsForTypes, llvm::less_first());
AttrsForTypesSorted = true;
}
// FIXME: This is quadratic if we have lots of reuses of the same
// attributed type.
for (auto It = std::partition_point(
AttrsForTypes.begin(), AttrsForTypes.end(),
[=](const TypeAttrPair &A) { return A.first < AT; });
It != AttrsForTypes.end() && It->first == AT; ++It) {
if (It->second) {
const Attr *Result = It->second;
It->second = nullptr;
return Result;
}
}
llvm_unreachable("no Attr* for AttributedType*");
}
SourceLocation
getExpansionLocForMacroQualifiedType(const MacroQualifiedType *MQT) const {
auto FoundLoc = LocsForMacros.find(MQT);
assert(FoundLoc != LocsForMacros.end() &&
"Unable to find macro expansion location for MacroQualifedType");
return FoundLoc->second;
}
void setExpansionLocForMacroQualifiedType(const MacroQualifiedType *MQT,
SourceLocation Loc) {
LocsForMacros[MQT] = Loc;
}
void setParsedNoDeref(bool parsed) { parsedNoDeref = parsed; }
bool didParseNoDeref() const { return parsedNoDeref; }
~TypeProcessingState() {
if (savedAttrs.empty())
return;
getMutableDeclSpec().getAttributes().clearListOnly();
for (ParsedAttr *AL : savedAttrs)
getMutableDeclSpec().getAttributes().addAtEnd(AL);
}
private:
DeclSpec &getMutableDeclSpec() const {
return const_cast<DeclSpec&>(declarator.getDeclSpec());
}
};
} // end anonymous namespace
static void moveAttrFromListToList(ParsedAttr &attr,
ParsedAttributesView &fromList,
ParsedAttributesView &toList) {
fromList.remove(&attr);
toList.addAtEnd(&attr);
}
/// The location of a type attribute.
enum TypeAttrLocation {
/// The attribute is in the decl-specifier-seq.
TAL_DeclSpec,
/// The attribute is part of a DeclaratorChunk.
TAL_DeclChunk,
/// The attribute is immediately after the declaration's name.
TAL_DeclName
};
static void processTypeAttrs(TypeProcessingState &state, QualType &type,
TypeAttrLocation TAL,
const ParsedAttributesView &attrs);
static bool handleFunctionTypeAttr(TypeProcessingState &state, ParsedAttr &attr,
QualType &type);
static bool handleMSPointerTypeQualifierAttr(TypeProcessingState &state,
ParsedAttr &attr, QualType &type);
static bool handleObjCGCTypeAttr(TypeProcessingState &state, ParsedAttr &attr,
QualType &type);
static bool handleObjCOwnershipTypeAttr(TypeProcessingState &state,
ParsedAttr &attr, QualType &type);
static bool handleObjCPointerTypeAttr(TypeProcessingState &state,
ParsedAttr &attr, QualType &type) {
if (attr.getKind() == ParsedAttr::AT_ObjCGC)
return handleObjCGCTypeAttr(state, attr, type);
assert(attr.getKind() == ParsedAttr::AT_ObjCOwnership);
return handleObjCOwnershipTypeAttr(state, attr, type);
}
/// Given the index of a declarator chunk, check whether that chunk
/// directly specifies the return type of a function and, if so, find
/// an appropriate place for it.
///
/// \param i - a notional index which the search will start
/// immediately inside
///
/// \param onlyBlockPointers Whether we should only look into block
/// pointer types (vs. all pointer types).
static DeclaratorChunk *maybeMovePastReturnType(Declarator &declarator,
unsigned i,
bool onlyBlockPointers) {
assert(i <= declarator.getNumTypeObjects());
DeclaratorChunk *result = nullptr;
// First, look inwards past parens for a function declarator.
for (; i != 0; --i) {
DeclaratorChunk &fnChunk = declarator.getTypeObject(i-1);
switch (fnChunk.Kind) {
case DeclaratorChunk::Paren:
continue;
// If we find anything except a function, bail out.
case DeclaratorChunk::Pointer:
case DeclaratorChunk::BlockPointer:
case DeclaratorChunk::Array:
case DeclaratorChunk::Reference:
case DeclaratorChunk::MemberPointer:
case DeclaratorChunk::Pipe:
return result;
// If we do find a function declarator, scan inwards from that,
// looking for a (block-)pointer declarator.
case DeclaratorChunk::Function:
for (--i; i != 0; --i) {
DeclaratorChunk &ptrChunk = declarator.getTypeObject(i-1);
switch (ptrChunk.Kind) {
case DeclaratorChunk::Paren:
case DeclaratorChunk::Array:
case DeclaratorChunk::Function:
case DeclaratorChunk::Reference:
case DeclaratorChunk::Pipe:
continue;
case DeclaratorChunk::MemberPointer:
case DeclaratorChunk::Pointer:
if (onlyBlockPointers)
continue;
LLVM_FALLTHROUGH;
case DeclaratorChunk::BlockPointer:
result = &ptrChunk;
goto continue_outer;
}
llvm_unreachable("bad declarator chunk kind");
}
// If we run out of declarators doing that, we're done.
return result;
}
llvm_unreachable("bad declarator chunk kind");
// Okay, reconsider from our new point.
continue_outer: ;
}
// Ran out of chunks, bail out.
return result;
}
/// Given that an objc_gc attribute was written somewhere on a
/// declaration *other* than on the declarator itself (for which, use
/// distributeObjCPointerTypeAttrFromDeclarator), and given that it
/// didn't apply in whatever position it was written in, try to move
/// it to a more appropriate position.
static void distributeObjCPointerTypeAttr(TypeProcessingState &state,
ParsedAttr &attr, QualType type) {
Declarator &declarator = state.getDeclarator();
// Move it to the outermost normal or block pointer declarator.
for (unsigned i = state.getCurrentChunkIndex(); i != 0; --i) {
DeclaratorChunk &chunk = declarator.getTypeObject(i-1);
switch (chunk.Kind) {
case DeclaratorChunk::Pointer:
case DeclaratorChunk::BlockPointer: {
// But don't move an ARC ownership attribute to the return type
// of a block.
DeclaratorChunk *destChunk = nullptr;
if (state.isProcessingDeclSpec() &&
attr.getKind() == ParsedAttr::AT_ObjCOwnership)
destChunk = maybeMovePastReturnType(declarator, i - 1,
/*onlyBlockPointers=*/true);
if (!destChunk) destChunk = &chunk;
moveAttrFromListToList(attr, state.getCurrentAttributes(),
destChunk->getAttrs());
return;
}
case DeclaratorChunk::Paren:
case DeclaratorChunk::Array:
continue;
// We may be starting at the return type of a block.
case DeclaratorChunk::Function:
if (state.isProcessingDeclSpec() &&
attr.getKind() == ParsedAttr::AT_ObjCOwnership) {
if (DeclaratorChunk *dest = maybeMovePastReturnType(
declarator, i,
/*onlyBlockPointers=*/true)) {
moveAttrFromListToList(attr, state.getCurrentAttributes(),
dest->getAttrs());
return;
}
}
goto error;
// Don't walk through these.
case DeclaratorChunk::Reference:
case DeclaratorChunk::MemberPointer:
case DeclaratorChunk::Pipe:
goto error;
}
}
error:
diagnoseBadTypeAttribute(state.getSema(), attr, type);
}
/// Distribute an objc_gc type attribute that was written on the
/// declarator.
static void distributeObjCPointerTypeAttrFromDeclarator(
TypeProcessingState &state, ParsedAttr &attr, QualType &declSpecType) {
Declarator &declarator = state.getDeclarator();
// objc_gc goes on the innermost pointer to something that's not a
// pointer.
unsigned innermost = -1U;
bool considerDeclSpec = true;
for (unsigned i = 0, e = declarator.getNumTypeObjects(); i != e; ++i) {
DeclaratorChunk &chunk = declarator.getTypeObject(i);
switch (chunk.Kind) {
case DeclaratorChunk::Pointer:
case DeclaratorChunk::BlockPointer:
innermost = i;
continue;
case DeclaratorChunk::Reference:
case DeclaratorChunk::MemberPointer:
case DeclaratorChunk::Paren:
case DeclaratorChunk::Array:
case DeclaratorChunk::Pipe:
continue;
case DeclaratorChunk::Function:
considerDeclSpec = false;
goto done;
}
}
done:
// That might actually be the decl spec if we weren't blocked by
// anything in the declarator.
if (considerDeclSpec) {
if (handleObjCPointerTypeAttr(state, attr, declSpecType)) {
// Splice the attribute into the decl spec. Prevents the
// attribute from being applied multiple times and gives
// the source-location-filler something to work with.
state.saveDeclSpecAttrs();
declarator.getMutableDeclSpec().getAttributes().takeOneFrom(
declarator.getAttributes(), &attr);
return;
}
}
// Otherwise, if we found an appropriate chunk, splice the attribute
// into it.
if (innermost != -1U) {
moveAttrFromListToList(attr, declarator.getAttributes(),
declarator.getTypeObject(innermost).getAttrs());
return;
}
// Otherwise, diagnose when we're done building the type.
declarator.getAttributes().remove(&attr);
state.addIgnoredTypeAttr(attr);
}
/// A function type attribute was written somewhere in a declaration
/// *other* than on the declarator itself or in the decl spec. Given
/// that it didn't apply in whatever position it was written in, try
/// to move it to a more appropriate position.
static void distributeFunctionTypeAttr(TypeProcessingState &state,
ParsedAttr &attr, QualType type) {
Declarator &declarator = state.getDeclarator();
// Try to push the attribute from the return type of a function to
// the function itself.
for (unsigned i = state.getCurrentChunkIndex(); i != 0; --i) {
DeclaratorChunk &chunk = declarator.getTypeObject(i-1);
switch (chunk.Kind) {
case DeclaratorChunk::Function:
moveAttrFromListToList(attr, state.getCurrentAttributes(),
chunk.getAttrs());
return;
case DeclaratorChunk::Paren:
case DeclaratorChunk::Pointer:
case DeclaratorChunk::BlockPointer:
case DeclaratorChunk::Array:
case DeclaratorChunk::Reference:
case DeclaratorChunk::MemberPointer:
case DeclaratorChunk::Pipe:
continue;
}
}
diagnoseBadTypeAttribute(state.getSema(), attr, type);
}
/// Try to distribute a function type attribute to the innermost
/// function chunk or type. Returns true if the attribute was
/// distributed, false if no location was found.
static bool distributeFunctionTypeAttrToInnermost(
TypeProcessingState &state, ParsedAttr &attr,
ParsedAttributesView &attrList, QualType &declSpecType) {
Declarator &declarator = state.getDeclarator();
// Put it on the innermost function chunk, if there is one.
for (unsigned i = 0, e = declarator.getNumTypeObjects(); i != e; ++i) {
DeclaratorChunk &chunk = declarator.getTypeObject(i);
if (chunk.Kind != DeclaratorChunk::Function) continue;
moveAttrFromListToList(attr, attrList, chunk.getAttrs());
return true;
}
return handleFunctionTypeAttr(state, attr, declSpecType);
}
/// A function type attribute was written in the decl spec. Try to
/// apply it somewhere.
static void distributeFunctionTypeAttrFromDeclSpec(TypeProcessingState &state,
ParsedAttr &attr,
QualType &declSpecType) {
state.saveDeclSpecAttrs();
// Try to distribute to the innermost.
if (distributeFunctionTypeAttrToInnermost(
state, attr, state.getCurrentAttributes(), declSpecType))
return;
// If that failed, diagnose the bad attribute when the declarator is
// fully built.
state.addIgnoredTypeAttr(attr);
}
/// A function type attribute was written on the declarator or declaration.
/// Try to apply it somewhere.
/// `Attrs` is the attribute list containing the declaration (either of the
/// declarator or the declaration).
static void distributeFunctionTypeAttrFromDeclarator(TypeProcessingState &state,
ParsedAttr &attr,
QualType &declSpecType) {
Declarator &declarator = state.getDeclarator();
// Try to distribute to the innermost.
if (distributeFunctionTypeAttrToInnermost(
state, attr, declarator.getAttributes(), declSpecType))
return;
// If that failed, diagnose the bad attribute when the declarator is
// fully built.
declarator.getAttributes().remove(&attr);
state.addIgnoredTypeAttr(attr);
}
/// Given that there are attributes written on the declarator or declaration
/// itself, try to distribute any type attributes to the appropriate
/// declarator chunk.
///
/// These are attributes like the following:
/// int f ATTR;
/// int (f ATTR)();
/// but not necessarily this:
/// int f() ATTR;
///
/// `Attrs` is the attribute list containing the declaration (either of the
/// declarator or the declaration).
static void distributeTypeAttrsFromDeclarator(TypeProcessingState &state,
QualType &declSpecType) {
// The called functions in this loop actually remove things from the current
// list, so iterating over the existing list isn't possible. Instead, make a
// non-owning copy and iterate over that.
ParsedAttributesView AttrsCopy{state.getDeclarator().getAttributes()};
for (ParsedAttr &attr : AttrsCopy) {
// Do not distribute [[]] attributes. They have strict rules for what
// they appertain to.
if (attr.isStandardAttributeSyntax())
continue;
switch (attr.getKind()) {
OBJC_POINTER_TYPE_ATTRS_CASELIST:
distributeObjCPointerTypeAttrFromDeclarator(state, attr, declSpecType);
break;
FUNCTION_TYPE_ATTRS_CASELIST:
distributeFunctionTypeAttrFromDeclarator(state, attr, declSpecType);
break;
MS_TYPE_ATTRS_CASELIST:
// Microsoft type attributes cannot go after the declarator-id.
continue;
NULLABILITY_TYPE_ATTRS_CASELIST:
// Nullability specifiers cannot go after the declarator-id.
// Objective-C __kindof does not get distributed.
case ParsedAttr::AT_ObjCKindOf:
continue;
default:
break;
}
}
}
/// Add a synthetic '()' to a block-literal declarator if it is
/// required, given the return type.
static void maybeSynthesizeBlockSignature(TypeProcessingState &state,
QualType declSpecType) {
Declarator &declarator = state.getDeclarator();
// First, check whether the declarator would produce a function,
// i.e. whether the innermost semantic chunk is a function.
if (declarator.isFunctionDeclarator()) {
// If so, make that declarator a prototyped declarator.
declarator.getFunctionTypeInfo().hasPrototype = true;
return;
}
// If there are any type objects, the type as written won't name a
// function, regardless of the decl spec type. This is because a
// block signature declarator is always an abstract-declarator, and
// abstract-declarators can't just be parentheses chunks. Therefore
// we need to build a function chunk unless there are no type
// objects and the decl spec type is a function.
if (!declarator.getNumTypeObjects() && declSpecType->isFunctionType())
return;
// Note that there *are* cases with invalid declarators where
// declarators consist solely of parentheses. In general, these
// occur only in failed efforts to make function declarators, so
// faking up the function chunk is still the right thing to do.
// Otherwise, we need to fake up a function declarator.
SourceLocation loc = declarator.getBeginLoc();
// ...and *prepend* it to the declarator.
SourceLocation NoLoc;
declarator.AddInnermostTypeInfo(DeclaratorChunk::getFunction(
/*HasProto=*/true,
/*IsAmbiguous=*/false,
/*LParenLoc=*/NoLoc,
/*ArgInfo=*/nullptr,
/*NumParams=*/0,
/*EllipsisLoc=*/NoLoc,
/*RParenLoc=*/NoLoc,
/*RefQualifierIsLvalueRef=*/true,
/*RefQualifierLoc=*/NoLoc,
/*MutableLoc=*/NoLoc, EST_None,
/*ESpecRange=*/SourceRange(),
/*Exceptions=*/nullptr,
/*ExceptionRanges=*/nullptr,
/*NumExceptions=*/0,
/*NoexceptExpr=*/nullptr,
/*ExceptionSpecTokens=*/nullptr,
/*DeclsInPrototype=*/None, loc, loc, declarator));
// For consistency, make sure the state still has us as processing
// the decl spec.
assert(state.getCurrentChunkIndex() == declarator.getNumTypeObjects() - 1);
state.setCurrentChunkIndex(declarator.getNumTypeObjects());
}
static void diagnoseAndRemoveTypeQualifiers(Sema &S, const DeclSpec &DS,
unsigned &TypeQuals,
QualType TypeSoFar,
unsigned RemoveTQs,
unsigned DiagID) {
// If this occurs outside a template instantiation, warn the user about
// it; they probably didn't mean to specify a redundant qualifier.
typedef std::pair<DeclSpec::TQ, SourceLocation> QualLoc;
for (QualLoc Qual : {QualLoc(DeclSpec::TQ_const, DS.getConstSpecLoc()),
QualLoc(DeclSpec::TQ_restrict, DS.getRestrictSpecLoc()),
QualLoc(DeclSpec::TQ_volatile, DS.getVolatileSpecLoc()),
QualLoc(DeclSpec::TQ_atomic, DS.getAtomicSpecLoc())}) {
if (!(RemoveTQs & Qual.first))
continue;
if (!S.inTemplateInstantiation()) {
if (TypeQuals & Qual.first)
S.Diag(Qual.second, DiagID)
<< DeclSpec::getSpecifierName(Qual.first) << TypeSoFar
<< FixItHint::CreateRemoval(Qual.second);
}
TypeQuals &= ~Qual.first;
}
}
/// Return true if this is omitted block return type. Also check type
/// attributes and type qualifiers when returning true.
static bool checkOmittedBlockReturnType(Sema &S, Declarator &declarator,
QualType Result) {
if (!isOmittedBlockReturnType(declarator))
return false;
// Warn if we see type attributes for omitted return type on a block literal.
SmallVector<ParsedAttr *, 2> ToBeRemoved;
for (ParsedAttr &AL : declarator.getMutableDeclSpec().getAttributes()) {
if (AL.isInvalid() || !AL.isTypeAttr())
continue;
S.Diag(AL.getLoc(),
diag::warn_block_literal_attributes_on_omitted_return_type)
<< AL;
ToBeRemoved.push_back(&AL);
}
// Remove bad attributes from the list.
for (ParsedAttr *AL : ToBeRemoved)
declarator.getMutableDeclSpec().getAttributes().remove(AL);
// Warn if we see type qualifiers for omitted return type on a block literal.
const DeclSpec &DS = declarator.getDeclSpec();
unsigned TypeQuals = DS.getTypeQualifiers();
diagnoseAndRemoveTypeQualifiers(S, DS, TypeQuals, Result, (unsigned)-1,
diag::warn_block_literal_qualifiers_on_omitted_return_type);
declarator.getMutableDeclSpec().ClearTypeQualifiers();
return true;
}
/// Apply Objective-C type arguments to the given type.
static QualType applyObjCTypeArgs(Sema &S, SourceLocation loc, QualType type,
ArrayRef<TypeSourceInfo *> typeArgs,
SourceRange typeArgsRange,
bool failOnError = false) {
// We can only apply type arguments to an Objective-C class type.
const auto *objcObjectType = type->getAs<ObjCObjectType>();
if (!objcObjectType || !objcObjectType->getInterface()) {
S.Diag(loc, diag::err_objc_type_args_non_class)
<< type
<< typeArgsRange;
if (failOnError)
return QualType();
return type;
}
// The class type must be parameterized.
ObjCInterfaceDecl *objcClass = objcObjectType->getInterface();
ObjCTypeParamList *typeParams = objcClass->getTypeParamList();
if (!typeParams) {
S.Diag(loc, diag::err_objc_type_args_non_parameterized_class)
<< objcClass->getDeclName()
<< FixItHint::CreateRemoval(typeArgsRange);
if (failOnError)
return QualType();
return type;
}
// The type must not already be specialized.
if (objcObjectType->isSpecialized()) {
S.Diag(loc, diag::err_objc_type_args_specialized_class)
<< type
<< FixItHint::CreateRemoval(typeArgsRange);
if (failOnError)
return QualType();
return type;
}
// Check the type arguments.
SmallVector<QualType, 4> finalTypeArgs;
unsigned numTypeParams = typeParams->size();
bool anyPackExpansions = false;
for (unsigned i = 0, n = typeArgs.size(); i != n; ++i) {
TypeSourceInfo *typeArgInfo = typeArgs[i];
QualType typeArg = typeArgInfo->getType();
// Type arguments cannot have explicit qualifiers or nullability.
// We ignore indirect sources of these, e.g. behind typedefs or
// template arguments.
if (TypeLoc qual = typeArgInfo->getTypeLoc().findExplicitQualifierLoc()) {
bool diagnosed = false;
SourceRange rangeToRemove;
if (auto attr = qual.getAs<AttributedTypeLoc>()) {
rangeToRemove = attr.getLocalSourceRange();
if (attr.getTypePtr()->getImmediateNullability()) {
typeArg = attr.getTypePtr()->getModifiedType();
S.Diag(attr.getBeginLoc(),
diag::err_objc_type_arg_explicit_nullability)
<< typeArg << FixItHint::CreateRemoval(rangeToRemove);
diagnosed = true;
}
}
if (!diagnosed) {
S.Diag(qual.getBeginLoc(), diag::err_objc_type_arg_qualified)
<< typeArg << typeArg.getQualifiers().getAsString()
<< FixItHint::CreateRemoval(rangeToRemove);
}
}
// Remove qualifiers even if they're non-local.
typeArg = typeArg.getUnqualifiedType();
finalTypeArgs.push_back(typeArg);
if (typeArg->getAs<PackExpansionType>())
anyPackExpansions = true;
// Find the corresponding type parameter, if there is one.
ObjCTypeParamDecl *typeParam = nullptr;
if (!anyPackExpansions) {
if (i < numTypeParams) {
typeParam = typeParams->begin()[i];
} else {
// Too many arguments.
S.Diag(loc, diag::err_objc_type_args_wrong_arity)
<< false
<< objcClass->getDeclName()
<< (unsigned)typeArgs.size()
<< numTypeParams;
S.Diag(objcClass->getLocation(), diag::note_previous_decl)
<< objcClass;
if (failOnError)
return QualType();
return type;
}
}
// Objective-C object pointer types must be substitutable for the bounds.
if (const auto *typeArgObjC = typeArg->getAs<ObjCObjectPointerType>()) {
// If we don't have a type parameter to match against, assume
// everything is fine. There was a prior pack expansion that
// means we won't be able to match anything.
if (!typeParam) {
assert(anyPackExpansions && "Too many arguments?");
continue;
}
// Retrieve the bound.
QualType bound = typeParam->getUnderlyingType();
const auto *boundObjC = bound->getAs<ObjCObjectPointerType>();
// Determine whether the type argument is substitutable for the bound.
if (typeArgObjC->isObjCIdType()) {
// When the type argument is 'id', the only acceptable type
// parameter bound is 'id'.
if (boundObjC->isObjCIdType())
continue;
} else if (S.Context.canAssignObjCInterfaces(boundObjC, typeArgObjC)) {
// Otherwise, we follow the assignability rules.
continue;
}
// Diagnose the mismatch.
S.Diag(typeArgInfo->getTypeLoc().getBeginLoc(),
diag::err_objc_type_arg_does_not_match_bound)
<< typeArg << bound << typeParam->getDeclName();
S.Diag(typeParam->getLocation(), diag::note_objc_type_param_here)
<< typeParam->getDeclName();
if (failOnError)
return QualType();
return type;
}
// Block pointer types are permitted for unqualified 'id' bounds.
if (typeArg->isBlockPointerType()) {
// If we don't have a type parameter to match against, assume
// everything is fine. There was a prior pack expansion that
// means we won't be able to match anything.
if (!typeParam) {
assert(anyPackExpansions && "Too many arguments?");
continue;
}
// Retrieve the bound.
QualType bound = typeParam->getUnderlyingType();
if (bound->isBlockCompatibleObjCPointerType(S.Context))
continue;
// Diagnose the mismatch.
S.Diag(typeArgInfo->getTypeLoc().getBeginLoc(),
diag::err_objc_type_arg_does_not_match_bound)
<< typeArg << bound << typeParam->getDeclName();
S.Diag(typeParam->getLocation(), diag::note_objc_type_param_here)
<< typeParam->getDeclName();
if (failOnError)
return QualType();
return type;
}
// Dependent types will be checked at instantiation time.
if (typeArg->isDependentType()) {
continue;
}
// Diagnose non-id-compatible type arguments.
S.Diag(typeArgInfo->getTypeLoc().getBeginLoc(),
diag::err_objc_type_arg_not_id_compatible)
<< typeArg << typeArgInfo->getTypeLoc().getSourceRange();
if (failOnError)
return QualType();
return type;
}
// Make sure we didn't have the wrong number of arguments.
if (!anyPackExpansions && finalTypeArgs.size() != numTypeParams) {
S.Diag(loc, diag::err_objc_type_args_wrong_arity)
<< (typeArgs.size() < typeParams->size())
<< objcClass->getDeclName()
<< (unsigned)finalTypeArgs.size()
<< (unsigned)numTypeParams;
S.Diag(objcClass->getLocation(), diag::note_previous_decl)
<< objcClass;
if (failOnError)
return QualType();
return type;
}
// Success. Form the specialized type.
return S.Context.getObjCObjectType(type, finalTypeArgs, { }, false);
}
QualType Sema::BuildObjCTypeParamType(const ObjCTypeParamDecl *Decl,
SourceLocation ProtocolLAngleLoc,
ArrayRef<ObjCProtocolDecl *> Protocols,
ArrayRef<SourceLocation> ProtocolLocs,
SourceLocation ProtocolRAngleLoc,
bool FailOnError) {
QualType Result = QualType(Decl->getTypeForDecl(), 0);
if (!Protocols.empty()) {
bool HasError;
Result = Context.applyObjCProtocolQualifiers(Result, Protocols,
HasError);
if (HasError) {
Diag(SourceLocation(), diag::err_invalid_protocol_qualifiers)
<< SourceRange(ProtocolLAngleLoc, ProtocolRAngleLoc);
if (FailOnError) Result = QualType();
}
if (FailOnError && Result.isNull())
return QualType();
}
return Result;
}
QualType Sema::BuildObjCObjectType(QualType BaseType,
SourceLocation Loc,
SourceLocation TypeArgsLAngleLoc,
ArrayRef<TypeSourceInfo *> TypeArgs,
SourceLocation TypeArgsRAngleLoc,
SourceLocation ProtocolLAngleLoc,
ArrayRef<ObjCProtocolDecl *> Protocols,
ArrayRef<SourceLocation> ProtocolLocs,
SourceLocation ProtocolRAngleLoc,
bool FailOnError) {
QualType Result = BaseType;
if (!TypeArgs.empty()) {
Result = applyObjCTypeArgs(*this, Loc, Result, TypeArgs,
SourceRange(TypeArgsLAngleLoc,
TypeArgsRAngleLoc),
FailOnError);
if (FailOnError && Result.isNull())
return QualType();
}
if (!Protocols.empty()) {
bool HasError;
Result = Context.applyObjCProtocolQualifiers(Result, Protocols,
HasError);
if (HasError) {
Diag(Loc, diag::err_invalid_protocol_qualifiers)
<< SourceRange(ProtocolLAngleLoc, ProtocolRAngleLoc);
if (FailOnError) Result = QualType();
}
if (FailOnError && Result.isNull())
return QualType();
}
return Result;
}
TypeResult Sema::actOnObjCProtocolQualifierType(
SourceLocation lAngleLoc,
ArrayRef<Decl *> protocols,
ArrayRef<SourceLocation> protocolLocs,
SourceLocation rAngleLoc) {
// Form id<protocol-list>.
QualType Result = Context.getObjCObjectType(
Context.ObjCBuiltinIdTy, { },
llvm::makeArrayRef(
(ObjCProtocolDecl * const *)protocols.data(),
protocols.size()),
false);
Result = Context.getObjCObjectPointerType(Result);
TypeSourceInfo *ResultTInfo = Context.CreateTypeSourceInfo(Result);
TypeLoc ResultTL = ResultTInfo->getTypeLoc();
auto ObjCObjectPointerTL = ResultTL.castAs<ObjCObjectPointerTypeLoc>();
ObjCObjectPointerTL.setStarLoc(SourceLocation()); // implicit
auto ObjCObjectTL = ObjCObjectPointerTL.getPointeeLoc()
.castAs<ObjCObjectTypeLoc>();
ObjCObjectTL.setHasBaseTypeAsWritten(false);
ObjCObjectTL.getBaseLoc().initialize(Context, SourceLocation());
// No type arguments.
ObjCObjectTL.setTypeArgsLAngleLoc(SourceLocation());
ObjCObjectTL.setTypeArgsRAngleLoc(SourceLocation());
// Fill in protocol qualifiers.
ObjCObjectTL.setProtocolLAngleLoc(lAngleLoc);
ObjCObjectTL.setProtocolRAngleLoc(rAngleLoc);
for (unsigned i = 0, n = protocols.size(); i != n; ++i)
ObjCObjectTL.setProtocolLoc(i, protocolLocs[i]);
// We're done. Return the completed type to the parser.
return CreateParsedType(Result, ResultTInfo);
}
TypeResult Sema::actOnObjCTypeArgsAndProtocolQualifiers(
Scope *S,
SourceLocation Loc,
ParsedType BaseType,
SourceLocation TypeArgsLAngleLoc,
ArrayRef<ParsedType> TypeArgs,
SourceLocation TypeArgsRAngleLoc,
SourceLocation ProtocolLAngleLoc,
ArrayRef<Decl *> Protocols,
ArrayRef<SourceLocation> ProtocolLocs,
SourceLocation ProtocolRAngleLoc) {
TypeSourceInfo *BaseTypeInfo = nullptr;
QualType T = GetTypeFromParser(BaseType, &BaseTypeInfo);
if (T.isNull())
return true;
// Handle missing type-source info.
if (!BaseTypeInfo)
BaseTypeInfo = Context.getTrivialTypeSourceInfo(T, Loc);
// Extract type arguments.
SmallVector<TypeSourceInfo *, 4> ActualTypeArgInfos;
for (unsigned i = 0, n = TypeArgs.size(); i != n; ++i) {
TypeSourceInfo *TypeArgInfo = nullptr;
QualType TypeArg = GetTypeFromParser(TypeArgs[i], &TypeArgInfo);
if (TypeArg.isNull()) {
ActualTypeArgInfos.clear();
break;
}
assert(TypeArgInfo && "No type source info?");
ActualTypeArgInfos.push_back(TypeArgInfo);
}
// Build the object type.
QualType Result = BuildObjCObjectType(
T, BaseTypeInfo->getTypeLoc().getSourceRange().getBegin(),
TypeArgsLAngleLoc, ActualTypeArgInfos, TypeArgsRAngleLoc,
ProtocolLAngleLoc,
llvm::makeArrayRef((ObjCProtocolDecl * const *)Protocols.data(),
Protocols.size()),
ProtocolLocs, ProtocolRAngleLoc,
/*FailOnError=*/false);
if (Result == T)
return BaseType;
// Create source information for this type.
TypeSourceInfo *ResultTInfo = Context.CreateTypeSourceInfo(Result);
TypeLoc ResultTL = ResultTInfo->getTypeLoc();
// For id<Proto1, Proto2> or Class<Proto1, Proto2>, we'll have an
// object pointer type. Fill in source information for it.
if (auto ObjCObjectPointerTL = ResultTL.getAs<ObjCObjectPointerTypeLoc>()) {
// The '*' is implicit.
ObjCObjectPointerTL.setStarLoc(SourceLocation());
ResultTL = ObjCObjectPointerTL.getPointeeLoc();
}
if (auto OTPTL = ResultTL.getAs<ObjCTypeParamTypeLoc>()) {
// Protocol qualifier information.
if (OTPTL.getNumProtocols() > 0) {
assert(OTPTL.getNumProtocols() == Protocols.size());
OTPTL.setProtocolLAngleLoc(ProtocolLAngleLoc);
OTPTL.setProtocolRAngleLoc(ProtocolRAngleLoc);
for (unsigned i = 0, n = Protocols.size(); i != n; ++i)
OTPTL.setProtocolLoc(i, ProtocolLocs[i]);
}
// We're done. Return the completed type to the parser.
return CreateParsedType(Result, ResultTInfo);
}
auto ObjCObjectTL = ResultTL.castAs<ObjCObjectTypeLoc>();
// Type argument information.
if (ObjCObjectTL.getNumTypeArgs() > 0) {
assert(ObjCObjectTL.getNumTypeArgs() == ActualTypeArgInfos.size());
ObjCObjectTL.setTypeArgsLAngleLoc(TypeArgsLAngleLoc);
ObjCObjectTL.setTypeArgsRAngleLoc(TypeArgsRAngleLoc);
for (unsigned i = 0, n = ActualTypeArgInfos.size(); i != n; ++i)
ObjCObjectTL.setTypeArgTInfo(i, ActualTypeArgInfos[i]);
} else {
ObjCObjectTL.setTypeArgsLAngleLoc(SourceLocation());
ObjCObjectTL.setTypeArgsRAngleLoc(SourceLocation());
}
// Protocol qualifier information.
if (ObjCObjectTL.getNumProtocols() > 0) {
assert(ObjCObjectTL.getNumProtocols() == Protocols.size());
ObjCObjectTL.setProtocolLAngleLoc(ProtocolLAngleLoc);
ObjCObjectTL.setProtocolRAngleLoc(ProtocolRAngleLoc);
for (unsigned i = 0, n = Protocols.size(); i != n; ++i)
ObjCObjectTL.setProtocolLoc(i, ProtocolLocs[i]);
} else {
ObjCObjectTL.setProtocolLAngleLoc(SourceLocation());
ObjCObjectTL.setProtocolRAngleLoc(SourceLocation());
}
// Base type.
ObjCObjectTL.setHasBaseTypeAsWritten(true);
if (ObjCObjectTL.getType() == T)
ObjCObjectTL.getBaseLoc().initializeFullCopy(BaseTypeInfo->getTypeLoc());
else
ObjCObjectTL.getBaseLoc().initialize(Context, Loc);
// We're done. Return the completed type to the parser.
return CreateParsedType(Result, ResultTInfo);
}
static OpenCLAccessAttr::Spelling
getImageAccess(const ParsedAttributesView &Attrs) {
for (const ParsedAttr &AL : Attrs)
if (AL.getKind() == ParsedAttr::AT_OpenCLAccess)
return static_cast<OpenCLAccessAttr::Spelling>(AL.getSemanticSpelling());
return OpenCLAccessAttr::Keyword_read_only;
}
/// Convert the specified declspec to the appropriate type
/// object.
/// \param state Specifies the declarator containing the declaration specifier
/// to be converted, along with other associated processing state.
/// \returns The type described by the declaration specifiers. This function
/// never returns null.
static QualType ConvertDeclSpecToType(TypeProcessingState &state) {
// FIXME: Should move the logic from DeclSpec::Finish to here for validity
// checking.
Sema &S = state.getSema();
Declarator &declarator = state.getDeclarator();
DeclSpec &DS = declarator.getMutableDeclSpec();
SourceLocation DeclLoc = declarator.getIdentifierLoc();
if (DeclLoc.isInvalid())
DeclLoc = DS.getBeginLoc();
ASTContext &Context = S.Context;
QualType Result;
switch (DS.getTypeSpecType()) {
case DeclSpec::TST_void:
Result = Context.VoidTy;
break;
case DeclSpec::TST_char:
if (DS.getTypeSpecSign() == TypeSpecifierSign::Unspecified)
Result = Context.CharTy;
else if (DS.getTypeSpecSign() == TypeSpecifierSign::Signed)
Result = Context.SignedCharTy;
else {
assert(DS.getTypeSpecSign() == TypeSpecifierSign::Unsigned &&
"Unknown TSS value");
Result = Context.UnsignedCharTy;
}
break;
case DeclSpec::TST_wchar:
if (DS.getTypeSpecSign() == TypeSpecifierSign::Unspecified)
Result = Context.WCharTy;
else if (DS.getTypeSpecSign() == TypeSpecifierSign::Signed) {
S.Diag(DS.getTypeSpecSignLoc(), diag::ext_wchar_t_sign_spec)
<< DS.getSpecifierName(DS.getTypeSpecType(),
Context.getPrintingPolicy());
Result = Context.getSignedWCharType();
} else {
assert(DS.getTypeSpecSign() == TypeSpecifierSign::Unsigned &&
"Unknown TSS value");
S.Diag(DS.getTypeSpecSignLoc(), diag::ext_wchar_t_sign_spec)
<< DS.getSpecifierName(DS.getTypeSpecType(),
Context.getPrintingPolicy());
Result = Context.getUnsignedWCharType();
}
break;
case DeclSpec::TST_char8:
assert(DS.getTypeSpecSign() == TypeSpecifierSign::Unspecified &&
"Unknown TSS value");
Result = Context.Char8Ty;
break;
case DeclSpec::TST_char16:
assert(DS.getTypeSpecSign() == TypeSpecifierSign::Unspecified &&
"Unknown TSS value");
Result = Context.Char16Ty;
break;
case DeclSpec::TST_char32:
assert(DS.getTypeSpecSign() == TypeSpecifierSign::Unspecified &&
"Unknown TSS value");
Result = Context.Char32Ty;
break;
case DeclSpec::TST_unspecified:
// If this is a missing declspec in a block literal return context, then it
// is inferred from the return statements inside the block.
// The declspec is always missing in a lambda expr context; it is either
// specified with a trailing return type or inferred.
if (S.getLangOpts().CPlusPlus14 &&
declarator.getContext() == DeclaratorContext::LambdaExpr) {
// In C++1y, a lambda's implicit return type is 'auto'.
Result = Context.getAutoDeductType();
break;
} else if (declarator.getContext() == DeclaratorContext::LambdaExpr ||
checkOmittedBlockReturnType(S, declarator,
Context.DependentTy)) {
Result = Context.DependentTy;
break;
}
// Unspecified typespec defaults to int in C90. However, the C90 grammar
// [C90 6.5] only allows a decl-spec if there was *some* type-specifier,
// type-qualifier, or storage-class-specifier. If not, emit an extwarn.
// Note that the one exception to this is function definitions, which are
// allowed to be completely missing a declspec. This is handled in the
// parser already though by it pretending to have seen an 'int' in this
// case.
if (S.getLangOpts().isImplicitIntRequired()) {
S.Diag(DeclLoc, diag::warn_missing_type_specifier)
<< DS.getSourceRange()
<< FixItHint::CreateInsertion(DS.getBeginLoc(), "int");
} else if (!DS.hasTypeSpecifier()) {
// C99 and C++ require a type specifier. For example, C99 6.7.2p2 says:
// "At least one type specifier shall be given in the declaration
// specifiers in each declaration, and in the specifier-qualifier list in
// each struct declaration and type name."
if (!S.getLangOpts().isImplicitIntAllowed() && !DS.isTypeSpecPipe()) {
S.Diag(DeclLoc, diag::err_missing_type_specifier)
<< DS.getSourceRange();
// When this occurs, often something is very broken with the value
// being declared, poison it as invalid so we don't get chains of
// errors.
declarator.setInvalidType(true);
} else if (S.getLangOpts().getOpenCLCompatibleVersion() >= 200 &&
DS.isTypeSpecPipe()) {
S.Diag(DeclLoc, diag::err_missing_actual_pipe_type)
<< DS.getSourceRange();
declarator.setInvalidType(true);
} else {
assert(S.getLangOpts().isImplicitIntAllowed() &&
"implicit int is disabled?");
S.Diag(DeclLoc, diag::ext_missing_type_specifier)
<< DS.getSourceRange()
<< FixItHint::CreateInsertion(DS.getBeginLoc(), "int");
}
}
LLVM_FALLTHROUGH;
case DeclSpec::TST_int: {
if (DS.getTypeSpecSign() != TypeSpecifierSign::Unsigned) {
switch (DS.getTypeSpecWidth()) {
case TypeSpecifierWidth::Unspecified:
Result = Context.IntTy;
break;
case TypeSpecifierWidth::Short:
Result = Context.ShortTy;
break;
case TypeSpecifierWidth::Long:
Result = Context.LongTy;
break;
case TypeSpecifierWidth::LongLong:
Result = Context.LongLongTy;
// 'long long' is a C99 or C++11 feature.
if (!S.getLangOpts().C99) {
if (S.getLangOpts().CPlusPlus)
S.Diag(DS.getTypeSpecWidthLoc(),
S.getLangOpts().CPlusPlus11 ?
diag::warn_cxx98_compat_longlong : diag::ext_cxx11_longlong);
else
S.Diag(DS.getTypeSpecWidthLoc(), diag::ext_c99_longlong);
}
break;
}
} else {
switch (DS.getTypeSpecWidth()) {
case TypeSpecifierWidth::Unspecified:
Result = Context.UnsignedIntTy;
break;
case TypeSpecifierWidth::Short:
Result = Context.UnsignedShortTy;
break;
case TypeSpecifierWidth::Long:
Result = Context.UnsignedLongTy;
break;
case TypeSpecifierWidth::LongLong:
Result = Context.UnsignedLongLongTy;
// 'long long' is a C99 or C++11 feature.
if (!S.getLangOpts().C99) {
if (S.getLangOpts().CPlusPlus)
S.Diag(DS.getTypeSpecWidthLoc(),
S.getLangOpts().CPlusPlus11 ?
diag::warn_cxx98_compat_longlong : diag::ext_cxx11_longlong);
else
S.Diag(DS.getTypeSpecWidthLoc(), diag::ext_c99_longlong);
}
break;
}
}
break;
}
case DeclSpec::TST_bitint: {
if (!S.Context.getTargetInfo().hasBitIntType())
S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported) << "_BitInt";
Result =
S.BuildBitIntType(DS.getTypeSpecSign() == TypeSpecifierSign::Unsigned,
DS.getRepAsExpr(), DS.getBeginLoc());
if (Result.isNull()) {
Result = Context.IntTy;
declarator.setInvalidType(true);
}
break;
}
case DeclSpec::TST_accum: {
switch (DS.getTypeSpecWidth()) {
case TypeSpecifierWidth::Short:
Result = Context.ShortAccumTy;
break;
case TypeSpecifierWidth::Unspecified:
Result = Context.AccumTy;
break;
case TypeSpecifierWidth::Long:
Result = Context.LongAccumTy;
break;
case TypeSpecifierWidth::LongLong:
llvm_unreachable("Unable to specify long long as _Accum width");
}
if (DS.getTypeSpecSign() == TypeSpecifierSign::Unsigned)
Result = Context.getCorrespondingUnsignedType(Result);
if (DS.isTypeSpecSat())
Result = Context.getCorrespondingSaturatedType(Result);
break;
}
case DeclSpec::TST_fract: {
switch (DS.getTypeSpecWidth()) {
case TypeSpecifierWidth::Short:
Result = Context.ShortFractTy;
break;
case TypeSpecifierWidth::Unspecified:
Result = Context.FractTy;
break;
case TypeSpecifierWidth::Long:
Result = Context.LongFractTy;
break;
case TypeSpecifierWidth::LongLong:
llvm_unreachable("Unable to specify long long as _Fract width");
}
if (DS.getTypeSpecSign() == TypeSpecifierSign::Unsigned)
Result = Context.getCorrespondingUnsignedType(Result);
if (DS.isTypeSpecSat())
Result = Context.getCorrespondingSaturatedType(Result);
break;
}
case DeclSpec::TST_int128:
if (!S.Context.getTargetInfo().hasInt128Type() &&
!(S.getLangOpts().SYCLIsDevice || S.getLangOpts().CUDAIsDevice ||
(S.getLangOpts().OpenMP && S.getLangOpts().OpenMPIsDevice)))
S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported)
<< "__int128";
if (DS.getTypeSpecSign() == TypeSpecifierSign::Unsigned)
Result = Context.UnsignedInt128Ty;
else
Result = Context.Int128Ty;
break;
case DeclSpec::TST_float16:
// CUDA host and device may have different _Float16 support, therefore
// do not diagnose _Float16 usage to avoid false alarm.
// ToDo: more precise diagnostics for CUDA.
if (!S.Context.getTargetInfo().hasFloat16Type() && !S.getLangOpts().CUDA &&
!(S.getLangOpts().OpenMP && S.getLangOpts().OpenMPIsDevice))
S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported)
<< "_Float16";
Result = Context.Float16Ty;
break;
case DeclSpec::TST_half: Result = Context.HalfTy; break;
case DeclSpec::TST_BFloat16:
if (!S.Context.getTargetInfo().hasBFloat16Type())
S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported)
<< "__bf16";
Result = Context.BFloat16Ty;
break;
case DeclSpec::TST_float: Result = Context.FloatTy; break;
case DeclSpec::TST_double:
if (DS.getTypeSpecWidth() == TypeSpecifierWidth::Long)
Result = Context.LongDoubleTy;
else
Result = Context.DoubleTy;
if (S.getLangOpts().OpenCL) {
if (!S.getOpenCLOptions().isSupported("cl_khr_fp64", S.getLangOpts()))
S.Diag(DS.getTypeSpecTypeLoc(), diag::err_opencl_requires_extension)
<< 0 << Result
<< (S.getLangOpts().getOpenCLCompatibleVersion() == 300
? "cl_khr_fp64 and __opencl_c_fp64"
: "cl_khr_fp64");
else if (!S.getOpenCLOptions().isAvailableOption("cl_khr_fp64", S.getLangOpts()))
S.Diag(DS.getTypeSpecTypeLoc(), diag::ext_opencl_double_without_pragma);
}
break;
case DeclSpec::TST_float128:
if (!S.Context.getTargetInfo().hasFloat128Type() &&
!S.getLangOpts().SYCLIsDevice &&
!(S.getLangOpts().OpenMP && S.getLangOpts().OpenMPIsDevice))
S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported)
<< "__float128";
Result = Context.Float128Ty;
break;
case DeclSpec::TST_ibm128:
if (!S.Context.getTargetInfo().hasIbm128Type() &&
!S.getLangOpts().SYCLIsDevice &&
!(S.getLangOpts().OpenMP && S.getLangOpts().OpenMPIsDevice))
S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported) << "__ibm128";
Result = Context.Ibm128Ty;
break;
case DeclSpec::TST_bool:
Result = Context.BoolTy; // _Bool or bool
break;
case DeclSpec::TST_decimal32: // _Decimal32
case DeclSpec::TST_decimal64: // _Decimal64
case DeclSpec::TST_decimal128: // _Decimal128
S.Diag(DS.getTypeSpecTypeLoc(), diag::err_decimal_unsupported);
Result = Context.IntTy;
declarator.setInvalidType(true);
break;
case DeclSpec::TST_class:
case DeclSpec::TST_enum:
case DeclSpec::TST_union:
case DeclSpec::TST_struct:
case DeclSpec::TST_interface: {
TagDecl *D = dyn_cast_or_null<TagDecl>(DS.getRepAsDecl());
if (!D) {
// This can happen in C++ with ambiguous lookups.
Result = Context.IntTy;
declarator.setInvalidType(true);
break;
}
// If the type is deprecated or unavailable, diagnose it.
S.DiagnoseUseOfDecl(D, DS.getTypeSpecTypeNameLoc());
assert(DS.getTypeSpecWidth() == TypeSpecifierWidth::Unspecified &&
DS.getTypeSpecComplex() == 0 &&
DS.getTypeSpecSign() == TypeSpecifierSign::Unspecified &&
"No qualifiers on tag names!");
// TypeQuals handled by caller.
Result = Context.getTypeDeclType(D);
// In both C and C++, make an ElaboratedType.
ElaboratedTypeKeyword Keyword
= ElaboratedType::getKeywordForTypeSpec(DS.getTypeSpecType());
Result = S.getElaboratedType(Keyword, DS.getTypeSpecScope(), Result,
DS.isTypeSpecOwned() ? D : nullptr);
break;
}
case DeclSpec::TST_typename: {
assert(DS.getTypeSpecWidth() == TypeSpecifierWidth::Unspecified &&
DS.getTypeSpecComplex() == 0 &&
DS.getTypeSpecSign() == TypeSpecifierSign::Unspecified &&
"Can't handle qualifiers on typedef names yet!");
Result = S.GetTypeFromParser(DS.getRepAsType());
if (Result.isNull()) {
declarator.setInvalidType(true);
}
// TypeQuals handled by caller.
break;
}
case DeclSpec::TST_typeofType:
// FIXME: Preserve type source info.
Result = S.GetTypeFromParser(DS.getRepAsType());
assert(!Result.isNull() && "Didn't get a type for typeof?");
if (!Result->isDependentType())
if (const TagType *TT = Result->getAs<TagType>())
S.DiagnoseUseOfDecl(TT->getDecl(), DS.getTypeSpecTypeLoc());
// TypeQuals handled by caller.
Result = Context.getTypeOfType(Result);
break;
case DeclSpec::TST_typeofExpr: {
Expr *E = DS.getRepAsExpr();
assert(E && "Didn't get an expression for typeof?");
// TypeQuals handled by caller.
Result = S.BuildTypeofExprType(E);
if (Result.isNull()) {
Result = Context.IntTy;
declarator.setInvalidType(true);
}
break;
}
case DeclSpec::TST_decltype: {
Expr *E = DS.getRepAsExpr();
assert(E && "Didn't get an expression for decltype?");
// TypeQuals handled by caller.
Result = S.BuildDecltypeType(E);
if (Result.isNull()) {
Result = Context.IntTy;
declarator.setInvalidType(true);
}
break;
}
case DeclSpec::TST_underlyingType:
Result = S.GetTypeFromParser(DS.getRepAsType());
assert(!Result.isNull() && "Didn't get a type for __underlying_type?");
Result = S.BuildUnaryTransformType(Result,
UnaryTransformType::EnumUnderlyingType,
DS.getTypeSpecTypeLoc());
if (Result.isNull()) {
Result = Context.IntTy;
declarator.setInvalidType(true);
}
break;
case DeclSpec::TST_auto:
case DeclSpec::TST_decltype_auto: {
auto AutoKW = DS.getTypeSpecType() == DeclSpec::TST_decltype_auto
? AutoTypeKeyword::DecltypeAuto
: AutoTypeKeyword::Auto;
ConceptDecl *TypeConstraintConcept = nullptr;
llvm::SmallVector<TemplateArgument, 8> TemplateArgs;
if (DS.isConstrainedAuto()) {
if (TemplateIdAnnotation *TemplateId = DS.getRepAsTemplateId()) {
TypeConstraintConcept =
cast<ConceptDecl>(TemplateId->Template.get().getAsTemplateDecl());
TemplateArgumentListInfo TemplateArgsInfo;
TemplateArgsInfo.setLAngleLoc(TemplateId->LAngleLoc);
TemplateArgsInfo.setRAngleLoc(TemplateId->RAngleLoc);
ASTTemplateArgsPtr TemplateArgsPtr(TemplateId->getTemplateArgs(),
TemplateId->NumArgs);
S.translateTemplateArguments(TemplateArgsPtr, TemplateArgsInfo);
for (const auto &ArgLoc : TemplateArgsInfo.arguments())
TemplateArgs.push_back(ArgLoc.getArgument());
} else {
declarator.setInvalidType(true);
}
}
Result = S.Context.getAutoType(QualType(), AutoKW,
/*IsDependent*/ false, /*IsPack=*/false,
TypeConstraintConcept, TemplateArgs);
break;
}
case DeclSpec::TST_auto_type:
Result = Context.getAutoType(QualType(), AutoTypeKeyword::GNUAutoType, false);
break;
case DeclSpec::TST_unknown_anytype:
Result = Context.UnknownAnyTy;
break;
case DeclSpec::TST_atomic:
Result = S.GetTypeFromParser(DS.getRepAsType());
assert(!Result.isNull() && "Didn't get a type for _Atomic?");
Result = S.BuildAtomicType(Result, DS.getTypeSpecTypeLoc());
if (Result.isNull()) {
Result = Context.IntTy;
declarator.setInvalidType(true);
}
break;
#define GENERIC_IMAGE_TYPE(ImgType, Id) \
case DeclSpec::TST_##ImgType##_t: \
switch (getImageAccess(DS.getAttributes())) { \
case OpenCLAccessAttr::Keyword_write_only: \
Result = Context.Id##WOTy; \
break; \
case OpenCLAccessAttr::Keyword_read_write: \
Result = Context.Id##RWTy; \
break; \
case OpenCLAccessAttr::Keyword_read_only: \
Result = Context.Id##ROTy; \
break; \
case OpenCLAccessAttr::SpellingNotCalculated: \
llvm_unreachable("Spelling not yet calculated"); \
} \
break;
#include "clang/Basic/OpenCLImageTypes.def"
case DeclSpec::TST_error:
Result = Context.IntTy;
declarator.setInvalidType(true);
break;
}
// FIXME: we want resulting declarations to be marked invalid, but claiming
// the type is invalid is too strong - e.g. it causes ActOnTypeName to return
// a null type.
if (Result->containsErrors())
declarator.setInvalidType();
if (S.getLangOpts().OpenCL) {
const auto &OpenCLOptions = S.getOpenCLOptions();
bool IsOpenCLC30Compatible =
S.getLangOpts().getOpenCLCompatibleVersion() == 300;
// OpenCL C v3.0 s6.3.3 - OpenCL image types require __opencl_c_images
// support.
// OpenCL C v3.0 s6.2.1 - OpenCL 3d image write types requires support
// for OpenCL C 2.0, or OpenCL C 3.0 or newer and the
// __opencl_c_3d_image_writes feature. OpenCL C v3.0 API s4.2 - For devices
// that support OpenCL 3.0, cl_khr_3d_image_writes must be returned when and
// only when the optional feature is supported
if ((Result->isImageType() || Result->isSamplerT()) &&
(IsOpenCLC30Compatible &&
!OpenCLOptions.isSupported("__opencl_c_images", S.getLangOpts()))) {
S.Diag(DS.getTypeSpecTypeLoc(), diag::err_opencl_requires_extension)
<< 0 << Result << "__opencl_c_images";
declarator.setInvalidType();
} else if (Result->isOCLImage3dWOType() &&
!OpenCLOptions.isSupported("cl_khr_3d_image_writes",
S.getLangOpts())) {
S.Diag(DS.getTypeSpecTypeLoc(), diag::err_opencl_requires_extension)
<< 0 << Result
<< (IsOpenCLC30Compatible
? "cl_khr_3d_image_writes and __opencl_c_3d_image_writes"
: "cl_khr_3d_image_writes");
declarator.setInvalidType();
}
}
bool IsFixedPointType = DS.getTypeSpecType() == DeclSpec::TST_accum ||
DS.getTypeSpecType() == DeclSpec::TST_fract;
// Only fixed point types can be saturated
if (DS.isTypeSpecSat() && !IsFixedPointType)
S.Diag(DS.getTypeSpecSatLoc(), diag::err_invalid_saturation_spec)
<< DS.getSpecifierName(DS.getTypeSpecType(),
Context.getPrintingPolicy());
// Handle complex types.
if (DS.getTypeSpecComplex() == DeclSpec::TSC_complex) {
if (S.getLangOpts().Freestanding)
S.Diag(DS.getTypeSpecComplexLoc(), diag::ext_freestanding_complex);
Result = Context.getComplexType(Result);
} else if (DS.isTypeAltiVecVector()) {
unsigned typeSize = static_cast<unsigned>(Context.getTypeSize(Result));
assert(typeSize > 0 && "type size for vector must be greater than 0 bits");
VectorType::VectorKind VecKind = VectorType::AltiVecVector;
if (DS.isTypeAltiVecPixel())
VecKind = VectorType::AltiVecPixel;
else if (DS.isTypeAltiVecBool())
VecKind = VectorType::AltiVecBool;
Result = Context.getVectorType(Result, 128/typeSize, VecKind);
}
// FIXME: Imaginary.
if (DS.getTypeSpecComplex() == DeclSpec::TSC_imaginary)
S.Diag(DS.getTypeSpecComplexLoc(), diag::err_imaginary_not_supported);
// Before we process any type attributes, synthesize a block literal
// function declarator if necessary.
if (declarator.getContext() == DeclaratorContext::BlockLiteral)
maybeSynthesizeBlockSignature(state, Result);
// Apply any type attributes from the decl spec. This may cause the
// list of type attributes to be temporarily saved while the type
// attributes are pushed around.
// pipe attributes will be handled later ( at GetFullTypeForDeclarator )
if (!DS.isTypeSpecPipe()) {
// We also apply declaration attributes that "slide" to the decl spec.
// Ordering can be important for attributes. The decalaration attributes
// come syntactically before the decl spec attributes, so we process them
// in that order.
ParsedAttributesView SlidingAttrs;
for (ParsedAttr &AL : declarator.getDeclarationAttributes()) {
if (AL.slidesFromDeclToDeclSpecLegacyBehavior()) {
SlidingAttrs.addAtEnd(&AL);
// For standard syntax attributes, which would normally appertain to the
// declaration here, suggest moving them to the type instead. But only
// do this for our own vendor attributes; moving other vendors'
// attributes might hurt portability.
// There's one special case that we need to deal with here: The
// `MatrixType` attribute may only be used in a typedef declaration. If
// it's being used anywhere else, don't output the warning as
// ProcessDeclAttributes() will output an error anyway.
if (AL.isStandardAttributeSyntax() && AL.isClangScope() &&
!(AL.getKind() == ParsedAttr::AT_MatrixType &&
DS.getStorageClassSpec() != DeclSpec::SCS_typedef)) {
S.Diag(AL.getLoc(), diag::warn_type_attribute_deprecated_on_decl)
<< AL;
}
}
}
// During this call to processTypeAttrs(),
// TypeProcessingState::getCurrentAttributes() will erroneously return a
// reference to the DeclSpec attributes, rather than the declaration
// attributes. However, this doesn't matter, as getCurrentAttributes()
// is only called when distributing attributes from one attribute list
// to another. Declaration attributes are always C++11 attributes, and these
// are never distributed.
processTypeAttrs(state, Result, TAL_DeclSpec, SlidingAttrs);
processTypeAttrs(state, Result, TAL_DeclSpec, DS.getAttributes());
}
// Apply const/volatile/restrict qualifiers to T.
if (unsigned TypeQuals = DS.getTypeQualifiers()) {
// Warn about CV qualifiers on function types.
// C99 6.7.3p8:
// If the specification of a function type includes any type qualifiers,
// the behavior is undefined.
// C++11 [dcl.fct]p7:
// The effect of a cv-qualifier-seq in a function declarator is not the
// same as adding cv-qualification on top of the function type. In the
// latter case, the cv-qualifiers are ignored.
if (Result->isFunctionType()) {
diagnoseAndRemoveTypeQualifiers(
S, DS, TypeQuals, Result, DeclSpec::TQ_const | DeclSpec::TQ_volatile,
S.getLangOpts().CPlusPlus
? diag::warn_typecheck_function_qualifiers_ignored
: diag::warn_typecheck_function_qualifiers_unspecified);
// No diagnostic for 'restrict' or '_Atomic' applied to a
// function type; we'll diagnose those later, in BuildQualifiedType.
}
// C++11 [dcl.ref]p1:
// Cv-qualified references are ill-formed except when the
// cv-qualifiers are introduced through the use of a typedef-name
// or decltype-specifier, in which case the cv-qualifiers are ignored.
//
// There don't appear to be any other contexts in which a cv-qualified
// reference type could be formed, so the 'ill-formed' clause here appears
// to never happen.
if (TypeQuals && Result->isReferenceType()) {
diagnoseAndRemoveTypeQualifiers(
S, DS, TypeQuals, Result,
DeclSpec::TQ_const | DeclSpec::TQ_volatile | DeclSpec::TQ_atomic,
diag::warn_typecheck_reference_qualifiers);
}
// C90 6.5.3 constraints: "The same type qualifier shall not appear more
// than once in the same specifier-list or qualifier-list, either directly
// or via one or more typedefs."
if (!S.getLangOpts().C99 && !S.getLangOpts().CPlusPlus
&& TypeQuals & Result.getCVRQualifiers()) {
if (TypeQuals & DeclSpec::TQ_const && Result.isConstQualified()) {
S.Diag(DS.getConstSpecLoc(), diag::ext_duplicate_declspec)
<< "const";
}
if (TypeQuals & DeclSpec::TQ_volatile && Result.isVolatileQualified()) {
S.Diag(DS.getVolatileSpecLoc(), diag::ext_duplicate_declspec)
<< "volatile";
}
// C90 doesn't have restrict nor _Atomic, so it doesn't force us to
// produce a warning in this case.
}
QualType Qualified = S.BuildQualifiedType(Result, DeclLoc, TypeQuals, &DS);
// If adding qualifiers fails, just use the unqualified type.
if (Qualified.isNull())
declarator.setInvalidType(true);
else
Result = Qualified;
}
assert(!Result.isNull() && "This function should not return a null type");
return Result;
}
static std::string getPrintableNameForEntity(DeclarationName Entity) {
if (Entity)
return Entity.getAsString();
return "type name";
}
static bool isDependentOrGNUAutoType(QualType T) {
if (T->isDependentType())
return true;
const auto *AT = dyn_cast<AutoType>(T);
return AT && AT->isGNUAutoType();
}
QualType Sema::BuildQualifiedType(QualType T, SourceLocation Loc,
Qualifiers Qs, const DeclSpec *DS) {
if (T.isNull())
return QualType();
// Ignore any attempt to form a cv-qualified reference.
if (T->isReferenceType()) {
Qs.removeConst();
Qs.removeVolatile();
}
// Enforce C99 6.7.3p2: "Types other than pointer types derived from
// object or incomplete types shall not be restrict-qualified."
if (Qs.hasRestrict()) {
unsigned DiagID = 0;
QualType ProblemTy;
if (T->isAnyPointerType() || T->isReferenceType() ||
T->isMemberPointerType()) {
QualType EltTy;
if (T->isObjCObjectPointerType())
EltTy = T;
else if (const MemberPointerType *PTy = T->getAs<MemberPointerType>())
EltTy = PTy->getPointeeType();
else
EltTy = T->getPointeeType();
// If we have a pointer or reference, the pointee must have an object
// incomplete type.
if (!EltTy->isIncompleteOrObjectType()) {
DiagID = diag::err_typecheck_invalid_restrict_invalid_pointee;
ProblemTy = EltTy;
}
} else if (!isDependentOrGNUAutoType(T)) {
// For an __auto_type variable, we may not have seen the initializer yet
// and so have no idea whether the underlying type is a pointer type or
// not.
DiagID = diag::err_typecheck_invalid_restrict_not_pointer;
ProblemTy = T;
}
if (DiagID) {
Diag(DS ? DS->getRestrictSpecLoc() : Loc, DiagID) << ProblemTy;
Qs.removeRestrict();
}
}
return Context.getQualifiedType(T, Qs);
}
QualType Sema::BuildQualifiedType(QualType T, SourceLocation Loc,
unsigned CVRAU, const DeclSpec *DS) {
if (T.isNull())
return QualType();
// Ignore any attempt to form a cv-qualified reference.
if (T->isReferenceType())
CVRAU &=
~(DeclSpec::TQ_const | DeclSpec::TQ_volatile | DeclSpec::TQ_atomic);
// Convert from DeclSpec::TQ to Qualifiers::TQ by just dropping TQ_atomic and
// TQ_unaligned;
unsigned CVR = CVRAU & ~(DeclSpec::TQ_atomic | DeclSpec::TQ_unaligned);
// C11 6.7.3/5:
// If the same qualifier appears more than once in the same
// specifier-qualifier-list, either directly or via one or more typedefs,
// the behavior is the same as if it appeared only once.
//
// It's not specified what happens when the _Atomic qualifier is applied to
// a type specified with the _Atomic specifier, but we assume that this
// should be treated as if the _Atomic qualifier appeared multiple times.
if (CVRAU & DeclSpec::TQ_atomic && !T->isAtomicType()) {
// C11 6.7.3/5:
// If other qualifiers appear along with the _Atomic qualifier in a
// specifier-qualifier-list, the resulting type is the so-qualified
// atomic type.
//
// Don't need to worry about array types here, since _Atomic can't be
// applied to such types.
SplitQualType Split = T.getSplitUnqualifiedType();
T = BuildAtomicType(QualType(Split.Ty, 0),
DS ? DS->getAtomicSpecLoc() : Loc);
if (T.isNull())
return T;
Split.Quals.addCVRQualifiers(CVR);
return BuildQualifiedType(T, Loc, Split.Quals);
}
Qualifiers Q = Qualifiers::fromCVRMask(CVR);
Q.setUnaligned(CVRAU & DeclSpec::TQ_unaligned);
return BuildQualifiedType(T, Loc, Q, DS);
}
/// Build a paren type including \p T.
QualType Sema::BuildParenType(QualType T) {
return Context.getParenType(T);
}
/// Given that we're building a pointer or reference to the given
static QualType inferARCLifetimeForPointee(Sema &S, QualType type,
SourceLocation loc,
bool isReference) {
// Bail out if retention is unrequired or already specified.
if (!type->isObjCLifetimeType() ||
type.getObjCLifetime() != Qualifiers::OCL_None)
return type;
Qualifiers::ObjCLifetime implicitLifetime = Qualifiers::OCL_None;
// If the object type is const-qualified, we can safely use
// __unsafe_unretained. This is safe (because there are no read
// barriers), and it'll be safe to coerce anything but __weak* to
// the resulting type.
if (type.isConstQualified()) {
implicitLifetime = Qualifiers::OCL_ExplicitNone;
// Otherwise, check whether the static type does not require
// retaining. This currently only triggers for Class (possibly
// protocol-qualifed, and arrays thereof).
} else if (type->isObjCARCImplicitlyUnretainedType()) {
implicitLifetime = Qualifiers::OCL_ExplicitNone;
// If we are in an unevaluated context, like sizeof, skip adding a
// qualification.
} else if (S.isUnevaluatedContext()) {
return type;
// If that failed, give an error and recover using __strong. __strong
// is the option most likely to prevent spurious second-order diagnostics,
// like when binding a reference to a field.
} else {
// These types can show up in private ivars in system headers, so
// we need this to not be an error in those cases. Instead we
// want to delay.
if (S.DelayedDiagnostics.shouldDelayDiagnostics()) {
S.DelayedDiagnostics.add(
sema::DelayedDiagnostic::makeForbiddenType(loc,
diag::err_arc_indirect_no_ownership, type, isReference));
} else {
S.Diag(loc, diag::err_arc_indirect_no_ownership) << type << isReference;
}
implicitLifetime = Qualifiers::OCL_Strong;
}
assert(implicitLifetime && "didn't infer any lifetime!");
Qualifiers qs;
qs.addObjCLifetime(implicitLifetime);
return S.Context.getQualifiedType(type, qs);
}
static std::string getFunctionQualifiersAsString(const FunctionProtoType *FnTy){
std::string Quals = FnTy->getMethodQuals().getAsString();
switch (FnTy->getRefQualifier()) {
case RQ_None:
break;
case RQ_LValue:
if (!Quals.empty())
Quals += ' ';
Quals += '&';
break;
case RQ_RValue:
if (!Quals.empty())
Quals += ' ';
Quals += "&&";
break;
}
return Quals;
}
namespace {
/// Kinds of declarator that cannot contain a qualified function type.
///
/// C++98 [dcl.fct]p4 / C++11 [dcl.fct]p6:
/// a function type with a cv-qualifier or a ref-qualifier can only appear
/// at the topmost level of a type.
///
/// Parens and member pointers are permitted. We don't diagnose array and
/// function declarators, because they don't allow function types at all.
///
/// The values of this enum are used in diagnostics.
enum QualifiedFunctionKind { QFK_BlockPointer, QFK_Pointer, QFK_Reference };
} // end anonymous namespace
/// Check whether the type T is a qualified function type, and if it is,
/// diagnose that it cannot be contained within the given kind of declarator.
static bool checkQualifiedFunction(Sema &S, QualType T, SourceLocation Loc,
QualifiedFunctionKind QFK) {
// Does T refer to a function type with a cv-qualifier or a ref-qualifier?
const FunctionProtoType *FPT = T->getAs<FunctionProtoType>();
if (!FPT ||
(FPT->getMethodQuals().empty() && FPT->getRefQualifier() == RQ_None))
return false;
S.Diag(Loc, diag::err_compound_qualified_function_type)
<< QFK << isa<FunctionType>(T.IgnoreParens()) << T
<< getFunctionQualifiersAsString(FPT);
return true;
}
bool Sema::CheckQualifiedFunctionForTypeId(QualType T, SourceLocation Loc) {
const FunctionProtoType *FPT = T->getAs<FunctionProtoType>();
if (!FPT ||
(FPT->getMethodQuals().empty() && FPT->getRefQualifier() == RQ_None))
return false;
Diag(Loc, diag::err_qualified_function_typeid)
<< T << getFunctionQualifiersAsString(FPT);
return true;
}
// Helper to deduce addr space of a pointee type in OpenCL mode.
static QualType deduceOpenCLPointeeAddrSpace(Sema &S, QualType PointeeType) {
if (!PointeeType->isUndeducedAutoType() && !PointeeType->isDependentType() &&
!PointeeType->isSamplerT() &&
!PointeeType.hasAddressSpace())
PointeeType = S.getASTContext().getAddrSpaceQualType(
PointeeType, S.getASTContext().getDefaultOpenCLPointeeAddrSpace());
return PointeeType;
}
/// Build a pointer type.
///
/// \param T The type to which we'll be building a pointer.
///
/// \param Loc The location of the entity whose type involves this
/// pointer type or, if there is no such entity, the location of the
/// type that will have pointer type.
///
/// \param Entity The name of the entity that involves the pointer
/// type, if known.
///
/// \returns A suitable pointer type, if there are no
/// errors. Otherwise, returns a NULL type.
QualType Sema::BuildPointerType(QualType T,
SourceLocation Loc, DeclarationName Entity) {
if (T->isReferenceType()) {
// C++ 8.3.2p4: There shall be no ... pointers to references ...
Diag(Loc, diag::err_illegal_decl_pointer_to_reference)
<< getPrintableNameForEntity(Entity) << T;
return QualType();
}
if (T->isFunctionType() && getLangOpts().OpenCL &&
!getOpenCLOptions().isAvailableOption("__cl_clang_function_pointers",
getLangOpts())) {
Diag(Loc, diag::err_opencl_function_pointer) << /*pointer*/ 0;
return QualType();
}
if (getLangOpts().HLSL) {
Diag(Loc, diag::err_hlsl_pointers_unsupported) << 0;
return QualType();
}
if (checkQualifiedFunction(*this, T, Loc, QFK_Pointer))
return QualType();
assert(!T->isObjCObjectType() && "Should build ObjCObjectPointerType");
// In ARC, it is forbidden to build pointers to unqualified pointers.
if (getLangOpts().ObjCAutoRefCount)
T = inferARCLifetimeForPointee(*this, T, Loc, /*reference*/ false);
if (getLangOpts().OpenCL)
T = deduceOpenCLPointeeAddrSpace(*this, T);
// Build the pointer type.
return Context.getPointerType(T);
}
/// Build a reference type.
///
/// \param T The type to which we'll be building a reference.
///
/// \param Loc The location of the entity whose type involves this
/// reference type or, if there is no such entity, the location of the
/// type that will have reference type.
///
/// \param Entity The name of the entity that involves the reference
/// type, if known.
///
/// \returns A suitable reference type, if there are no
/// errors. Otherwise, returns a NULL type.
QualType Sema::BuildReferenceType(QualType T, bool SpelledAsLValue,
SourceLocation Loc,
DeclarationName Entity) {
assert(Context.getCanonicalType(T) != Context.OverloadTy &&
"Unresolved overloaded function type");
// C++0x [dcl.ref]p6:
// If a typedef (7.1.3), a type template-parameter (14.3.1), or a
// decltype-specifier (7.1.6.2) denotes a type TR that is a reference to a
// type T, an attempt to create the type "lvalue reference to cv TR" creates
// the type "lvalue reference to T", while an attempt to create the type
// "rvalue reference to cv TR" creates the type TR.
bool LValueRef = SpelledAsLValue || T->getAs<LValueReferenceType>();
// C++ [dcl.ref]p4: There shall be no references to references.
//
// According to C++ DR 106, references to references are only
// diagnosed when they are written directly (e.g., "int & &"),
// but not when they happen via a typedef:
//
// typedef int& intref;
// typedef intref& intref2;
//
// Parser::ParseDeclaratorInternal diagnoses the case where
// references are written directly; here, we handle the
// collapsing of references-to-references as described in C++0x.
// DR 106 and 540 introduce reference-collapsing into C++98/03.
// C++ [dcl.ref]p1:
// A declarator that specifies the type "reference to cv void"
// is ill-formed.
if (T->isVoidType()) {
Diag(Loc, diag::err_reference_to_void);
return QualType();
}
if (getLangOpts().HLSL) {
Diag(Loc, diag::err_hlsl_pointers_unsupported) << 1;
return QualType();
}
if (checkQualifiedFunction(*this, T, Loc, QFK_Reference))
return QualType();
if (T->isFunctionType() && getLangOpts().OpenCL &&
!getOpenCLOptions().isAvailableOption("__cl_clang_function_pointers",
getLangOpts())) {
Diag(Loc, diag::err_opencl_function_pointer) << /*reference*/ 1;
return QualType();
}
// In ARC, it is forbidden to build references to unqualified pointers.
if (getLangOpts().ObjCAutoRefCount)
T = inferARCLifetimeForPointee(*this, T, Loc, /*reference*/ true);
if (getLangOpts().OpenCL)
T = deduceOpenCLPointeeAddrSpace(*this, T);
// Handle restrict on references.
if (LValueRef)
return Context.getLValueReferenceType(T, SpelledAsLValue);
return Context.getRValueReferenceType(T);
}
/// Build a Read-only Pipe type.
///
/// \param T The type to which we'll be building a Pipe.
///
/// \param Loc We do not use it for now.
///
/// \returns A suitable pipe type, if there are no errors. Otherwise, returns a
/// NULL type.
QualType Sema::BuildReadPipeType(QualType T, SourceLocation Loc) {
return Context.getReadPipeType(T);
}
/// Build a Write-only Pipe type.
///
/// \param T The type to which we'll be building a Pipe.
///
/// \param Loc We do not use it for now.
///
/// \returns A suitable pipe type, if there are no errors. Otherwise, returns a
/// NULL type.
QualType Sema::BuildWritePipeType(QualType T, SourceLocation Loc) {
return Context.getWritePipeType(T);
}
/// Build a bit-precise integer type.
///
/// \param IsUnsigned Boolean representing the signedness of the type.
///
/// \param BitWidth Size of this int type in bits, or an expression representing
/// that.
///
/// \param Loc Location of the keyword.
QualType Sema::BuildBitIntType(bool IsUnsigned, Expr *BitWidth,
SourceLocation Loc) {
if (BitWidth->isInstantiationDependent())
return Context.getDependentBitIntType(IsUnsigned, BitWidth);
llvm::APSInt Bits(32);
ExprResult ICE =
VerifyIntegerConstantExpression(BitWidth, &Bits, /*FIXME*/ AllowFold);
if (ICE.isInvalid())
return QualType();
size_t NumBits = Bits.getZExtValue();
if (!IsUnsigned && NumBits < 2) {
Diag(Loc, diag::err_bit_int_bad_size) << 0;
return QualType();
}
if (IsUnsigned && NumBits < 1) {
Diag(Loc, diag::err_bit_int_bad_size) << 1;
return QualType();
}
const TargetInfo &TI = getASTContext().getTargetInfo();
if (NumBits > TI.getMaxBitIntWidth()) {
Diag(Loc, diag::err_bit_int_max_size)
<< IsUnsigned << static_cast<uint64_t>(TI.getMaxBitIntWidth());
return QualType();
}
return Context.getBitIntType(IsUnsigned, NumBits);
}
/// Check whether the specified array bound can be evaluated using the relevant
/// language rules. If so, returns the possibly-converted expression and sets
/// SizeVal to the size. If not, but the expression might be a VLA bound,
/// returns ExprResult(). Otherwise, produces a diagnostic and returns
/// ExprError().
static ExprResult checkArraySize(Sema &S, Expr *&ArraySize,
llvm::APSInt &SizeVal, unsigned VLADiag,
bool VLAIsError) {
if (S.getLangOpts().CPlusPlus14 &&
(VLAIsError ||
!ArraySize->getType()->isIntegralOrUnscopedEnumerationType())) {
// C++14 [dcl.array]p1:
// The constant-expression shall be a converted constant expression of
// type std::size_t.
//
// Don't apply this rule if we might be forming a VLA: in that case, we
// allow non-constant expressions and constant-folding. We only need to use
// the converted constant expression rules (to properly convert the source)
// when the source expression is of class type.
return S.CheckConvertedConstantExpression(
ArraySize, S.Context.getSizeType(), SizeVal, Sema::CCEK_ArrayBound);
}
// If the size is an ICE, it certainly isn't a VLA. If we're in a GNU mode
// (like gnu99, but not c99) accept any evaluatable value as an extension.
class VLADiagnoser : public Sema::VerifyICEDiagnoser {
public:
unsigned VLADiag;
bool VLAIsError;
bool IsVLA = false;
VLADiagnoser(unsigned VLADiag, bool VLAIsError)
: VLADiag(VLADiag), VLAIsError(VLAIsError) {}
Sema::SemaDiagnosticBuilder diagnoseNotICEType(Sema &S, SourceLocation Loc,
QualType T) override {
return S.Diag(Loc, diag::err_array_size_non_int) << T;
}
Sema::SemaDiagnosticBuilder diagnoseNotICE(Sema &S,
SourceLocation Loc) override {
IsVLA = !VLAIsError;
return S.Diag(Loc, VLADiag);
}
Sema::SemaDiagnosticBuilder diagnoseFold(Sema &S,
SourceLocation Loc) override {
return S.Diag(Loc, diag::ext_vla_folded_to_constant);
}
} Diagnoser(VLADiag, VLAIsError);
ExprResult R =
S.VerifyIntegerConstantExpression(ArraySize, &SizeVal, Diagnoser);
if (Diagnoser.IsVLA)
return ExprResult();
return R;
}
/// Build an array type.
///
/// \param T The type of each element in the array.
///
/// \param ASM C99 array size modifier (e.g., '*', 'static').
///
/// \param ArraySize Expression describing the size of the array.
///
/// \param Brackets The range from the opening '[' to the closing ']'.
///
/// \param Entity The name of the entity that involves the array
/// type, if known.
///
/// \returns A suitable array type, if there are no errors. Otherwise,
/// returns a NULL type.
QualType Sema::BuildArrayType(QualType T, ArrayType::ArraySizeModifier ASM,
Expr *ArraySize, unsigned Quals,
SourceRange Brackets, DeclarationName Entity) {
SourceLocation Loc = Brackets.getBegin();
if (getLangOpts().CPlusPlus) {
// C++ [dcl.array]p1:
// T is called the array element type; this type shall not be a reference
// type, the (possibly cv-qualified) type void, a function type or an
// abstract class type.
//
// C++ [dcl.array]p3:
// When several "array of" specifications are adjacent, [...] only the
// first of the constant expressions that specify the bounds of the arrays
// may be omitted.
//
// Note: function types are handled in the common path with C.
if (T->isReferenceType()) {
Diag(Loc, diag::err_illegal_decl_array_of_references)
<< getPrintableNameForEntity(Entity) << T;
return QualType();
}
if (T->isVoidType() || T->isIncompleteArrayType()) {
Diag(Loc, diag::err_array_incomplete_or_sizeless_type) << 0 << T;
return QualType();
}
if (RequireNonAbstractType(Brackets.getBegin(), T,
diag::err_array_of_abstract_type))
return QualType();
// Mentioning a member pointer type for an array type causes us to lock in
// an inheritance model, even if it's inside an unused typedef.
if (Context.getTargetInfo().getCXXABI().isMicrosoft())
if (const MemberPointerType *MPTy = T->getAs<MemberPointerType>())
if (!MPTy->getClass()->isDependentType())
(void)isCompleteType(Loc, T);
} else {
// C99 6.7.5.2p1: If the element type is an incomplete or function type,
// reject it (e.g. void ary[7], struct foo ary[7], void ary[7]())
if (RequireCompleteSizedType(Loc, T,
diag::err_array_incomplete_or_sizeless_type))
return QualType();
}
if (T->isSizelessType()) {
Diag(Loc, diag::err_array_incomplete_or_sizeless_type) << 1 << T;
return QualType();
}
if (T->isFunctionType()) {
Diag(Loc, diag::err_illegal_decl_array_of_functions)
<< getPrintableNameForEntity(Entity) << T;
return QualType();
}
if (const RecordType *EltTy = T->getAs<RecordType>()) {
// If the element type is a struct or union that contains a variadic
// array, accept it as a GNU extension: C99 6.7.2.1p2.
if (EltTy->getDecl()->hasFlexibleArrayMember())
Diag(Loc, diag::ext_flexible_array_in_array) << T;
} else if (T->isObjCObjectType()) {
Diag(Loc, diag::err_objc_array_of_interfaces) << T;
return QualType();
}
// Do placeholder conversions on the array size expression.
if (ArraySize && ArraySize->hasPlaceholderType()) {
ExprResult Result = CheckPlaceholderExpr(ArraySize);
if (Result.isInvalid()) return QualType();
ArraySize = Result.get();
}
// Do lvalue-to-rvalue conversions on the array size expression.
if (ArraySize && !ArraySize->isPRValue()) {
ExprResult Result = DefaultLvalueConversion(ArraySize);
if (Result.isInvalid())
return QualType();
ArraySize = Result.get();
}
// C99 6.7.5.2p1: The size expression shall have integer type.
// C++11 allows contextual conversions to such types.
if (!getLangOpts().CPlusPlus11 &&
ArraySize && !ArraySize->isTypeDependent() &&
!ArraySize->getType()->isIntegralOrUnscopedEnumerationType()) {
Diag(ArraySize->getBeginLoc(), diag::err_array_size_non_int)
<< ArraySize->getType() << ArraySize->getSourceRange();
return QualType();
}
// VLAs always produce at least a -Wvla diagnostic, sometimes an error.
unsigned VLADiag;
bool VLAIsError;
if (getLangOpts().OpenCL) {
// OpenCL v1.2 s6.9.d: variable length arrays are not supported.
VLADiag = diag::err_opencl_vla;
VLAIsError = true;
} else if (getLangOpts().C99) {
VLADiag = diag::warn_vla_used;
VLAIsError = false;
} else if (isSFINAEContext()) {
VLADiag = diag::err_vla_in_sfinae;
VLAIsError = true;
} else if (getLangOpts().OpenMP && isInOpenMPTaskUntiedContext()) {
VLADiag = diag::err_openmp_vla_in_task_untied;
VLAIsError = true;
} else {
VLADiag = diag::ext_vla;
VLAIsError = false;
}
llvm::APSInt ConstVal(Context.getTypeSize(Context.getSizeType()));
if (!ArraySize) {
if (ASM == ArrayType::Star) {
Diag(Loc, VLADiag);
if (VLAIsError)
return QualType();
T = Context.getVariableArrayType(T, nullptr, ASM, Quals, Brackets);
} else {
T = Context.getIncompleteArrayType(T, ASM, Quals);
}
} else if (ArraySize->isTypeDependent() || ArraySize->isValueDependent()) {
T = Context.getDependentSizedArrayType(T, ArraySize, ASM, Quals, Brackets);
} else {
ExprResult R =
checkArraySize(*this, ArraySize, ConstVal, VLADiag, VLAIsError);
if (R.isInvalid())
return QualType();
if (!R.isUsable()) {
// C99: an array with a non-ICE size is a VLA. We accept any expression
// that we can fold to a non-zero positive value as a non-VLA as an
// extension.
T = Context.getVariableArrayType(T, ArraySize, ASM, Quals, Brackets);
} else if (!T->isDependentType() && !T->isIncompleteType() &&
!T->isConstantSizeType()) {
// C99: an array with an element type that has a non-constant-size is a
// VLA.
// FIXME: Add a note to explain why this isn't a VLA.
Diag(Loc, VLADiag);
if (VLAIsError)
return QualType();
T = Context.getVariableArrayType(T, ArraySize, ASM, Quals, Brackets);
} else {
// C99 6.7.5.2p1: If the expression is a constant expression, it shall
// have a value greater than zero.
// In C++, this follows from narrowing conversions being disallowed.
if (ConstVal.isSigned() && ConstVal.isNegative()) {
if (Entity)
Diag(ArraySize->getBeginLoc(), diag::err_decl_negative_array_size)
<< getPrintableNameForEntity(Entity)
<< ArraySize->getSourceRange();
else
Diag(ArraySize->getBeginLoc(),
diag::err_typecheck_negative_array_size)
<< ArraySize->getSourceRange();
return QualType();
}
if (ConstVal == 0) {
// GCC accepts zero sized static arrays. We allow them when
// we're not in a SFINAE context.
Diag(ArraySize->getBeginLoc(),
isSFINAEContext() ? diag::err_typecheck_zero_array_size
: diag::ext_typecheck_zero_array_size)
<< 0 << ArraySize->getSourceRange();
}
// Is the array too large?
unsigned ActiveSizeBits =
(!T->isDependentType() && !T->isVariablyModifiedType() &&
!T->isIncompleteType() && !T->isUndeducedType())
? ConstantArrayType::getNumAddressingBits(Context, T, ConstVal)
: ConstVal.getActiveBits();
if (ActiveSizeBits > ConstantArrayType::getMaxSizeBits(Context)) {
Diag(ArraySize->getBeginLoc(), diag::err_array_too_large)
<< toString(ConstVal, 10) << ArraySize->getSourceRange();
return QualType();
}
T = Context.getConstantArrayType(T, ConstVal, ArraySize, ASM, Quals);
}
}
if (T->isVariableArrayType() && !Context.getTargetInfo().isVLASupported()) {
// CUDA device code and some other targets don't support VLAs.
targetDiag(Loc, (getLangOpts().CUDA && getLangOpts().CUDAIsDevice)
? diag::err_cuda_vla
: diag::err_vla_unsupported)
<< ((getLangOpts().CUDA && getLangOpts().CUDAIsDevice)
? CurrentCUDATarget()
: CFT_InvalidTarget);
}
// If this is not C99, diagnose array size modifiers on non-VLAs.
if (!getLangOpts().C99 && !T->isVariableArrayType() &&
(ASM != ArrayType::Normal || Quals != 0)) {
Diag(Loc, getLangOpts().CPlusPlus ? diag::err_c99_array_usage_cxx
: diag::ext_c99_array_usage)
<< ASM;
}
// OpenCL v2.0 s6.12.5 - Arrays of blocks are not supported.
// OpenCL v2.0 s6.16.13.1 - Arrays of pipe type are not supported.
// OpenCL v2.0 s6.9.b - Arrays of image/sampler type are not supported.
if (getLangOpts().OpenCL) {
const QualType ArrType = Context.getBaseElementType(T);
if (ArrType->isBlockPointerType() || ArrType->isPipeType() ||
ArrType->isSamplerT() || ArrType->isImageType()) {
Diag(Loc, diag::err_opencl_invalid_type_array) << ArrType;
return QualType();
}
}
return T;
}
QualType Sema::BuildVectorType(QualType CurType, Expr *SizeExpr,
SourceLocation AttrLoc) {
// The base type must be integer (not Boolean or enumeration) or float, and
// can't already be a vector.
if ((!CurType->isDependentType() &&
(!CurType->isBuiltinType() || CurType->isBooleanType() ||
(!CurType->isIntegerType() && !CurType->isRealFloatingType()))) ||
CurType->isArrayType()) {
Diag(AttrLoc, diag::err_attribute_invalid_vector_type) << CurType;
return QualType();
}
if (SizeExpr->isTypeDependent() || SizeExpr->isValueDependent())
return Context.getDependentVectorType(CurType, SizeExpr, AttrLoc,
VectorType::GenericVector);
Optional<llvm::APSInt> VecSize = SizeExpr->getIntegerConstantExpr(Context);
if (!VecSize) {
Diag(AttrLoc, diag::err_attribute_argument_type)
<< "vector_size" << AANT_ArgumentIntegerConstant
<< SizeExpr->getSourceRange();
return QualType();
}
if (CurType->isDependentType())
return Context.getDependentVectorType(CurType, SizeExpr, AttrLoc,
VectorType::GenericVector);
// vecSize is specified in bytes - convert to bits.
if (!VecSize->isIntN(61)) {
// Bit size will overflow uint64.
Diag(AttrLoc, diag::err_attribute_size_too_large)
<< SizeExpr->getSourceRange() << "vector";
return QualType();
}
uint64_t VectorSizeBits = VecSize->getZExtValue() * 8;
unsigned TypeSize = static_cast<unsigned>(Context.getTypeSize(CurType));
if (VectorSizeBits == 0) {
Diag(AttrLoc, diag::err_attribute_zero_size)
<< SizeExpr->getSourceRange() << "vector";
return QualType();
}
if (!TypeSize || VectorSizeBits % TypeSize) {
Diag(AttrLoc, diag::err_attribute_invalid_size)
<< SizeExpr->getSourceRange();
return QualType();
}
if (VectorSizeBits / TypeSize > std::numeric_limits<uint32_t>::max()) {
Diag(AttrLoc, diag::err_attribute_size_too_large)
<< SizeExpr->getSourceRange() << "vector";
return QualType();
}
return Context.getVectorType(CurType, VectorSizeBits / TypeSize,
VectorType::GenericVector);
}
/// Build an ext-vector type.
///
/// Run the required checks for the extended vector type.
QualType Sema::BuildExtVectorType(QualType T, Expr *ArraySize,
SourceLocation AttrLoc) {
// Unlike gcc's vector_size attribute, we do not allow vectors to be defined
// in conjunction with complex types (pointers, arrays, functions, etc.).
//
// Additionally, OpenCL prohibits vectors of booleans (they're considered a
// reserved data type under OpenCL v2.0 s6.1.4), we don't support selects
// on bitvectors, and we have no well-defined ABI for bitvectors, so vectors
// of bool aren't allowed.
//
// We explictly allow bool elements in ext_vector_type for C/C++.
bool IsNoBoolVecLang = getLangOpts().OpenCL || getLangOpts().OpenCLCPlusPlus;
if ((!T->isDependentType() && !T->isIntegerType() &&
!T->isRealFloatingType()) ||
(IsNoBoolVecLang && T->isBooleanType())) {
Diag(AttrLoc, diag::err_attribute_invalid_vector_type) << T;
return QualType();
}
if (!ArraySize->isTypeDependent() && !ArraySize->isValueDependent()) {
Optional<llvm::APSInt> vecSize = ArraySize->getIntegerConstantExpr(Context);
if (!vecSize) {
Diag(AttrLoc, diag::err_attribute_argument_type)
<< "ext_vector_type" << AANT_ArgumentIntegerConstant
<< ArraySize->getSourceRange();
return QualType();
}
if (!vecSize->isIntN(32)) {
Diag(AttrLoc, diag::err_attribute_size_too_large)
<< ArraySize->getSourceRange() << "vector";
return QualType();
}
// Unlike gcc's vector_size attribute, the size is specified as the
// number of elements, not the number of bytes.
unsigned vectorSize = static_cast<unsigned>(vecSize->getZExtValue());
if (vectorSize == 0) {
Diag(AttrLoc, diag::err_attribute_zero_size)
<< ArraySize->getSourceRange() << "vector";
return QualType();
}
return Context.getExtVectorType(T, vectorSize);
}
return Context.getDependentSizedExtVectorType(T, ArraySize, AttrLoc);
}
QualType Sema::BuildMatrixType(QualType ElementTy, Expr *NumRows, Expr *NumCols,
SourceLocation AttrLoc) {
assert(Context.getLangOpts().MatrixTypes &&
"Should never build a matrix type when it is disabled");
// Check element type, if it is not dependent.
if (!ElementTy->isDependentType() &&
!MatrixType::isValidElementType(ElementTy)) {
Diag(AttrLoc, diag::err_attribute_invalid_matrix_type) << ElementTy;
return QualType();
}
if (NumRows->isTypeDependent() || NumCols->isTypeDependent() ||
NumRows->isValueDependent() || NumCols->isValueDependent())
return Context.getDependentSizedMatrixType(ElementTy, NumRows, NumCols,
AttrLoc);
Optional<llvm::APSInt> ValueRows = NumRows->getIntegerConstantExpr(Context);
Optional<llvm::APSInt> ValueColumns =
NumCols->getIntegerConstantExpr(Context);
auto const RowRange = NumRows->getSourceRange();
auto const ColRange = NumCols->getSourceRange();
// Both are row and column expressions are invalid.
if (!ValueRows && !ValueColumns) {
Diag(AttrLoc, diag::err_attribute_argument_type)
<< "matrix_type" << AANT_ArgumentIntegerConstant << RowRange
<< ColRange;
return QualType();
}
// Only the row expression is invalid.
if (!ValueRows) {
Diag(AttrLoc, diag::err_attribute_argument_type)
<< "matrix_type" << AANT_ArgumentIntegerConstant << RowRange;
return QualType();
}
// Only the column expression is invalid.
if (!ValueColumns) {
Diag(AttrLoc, diag::err_attribute_argument_type)
<< "matrix_type" << AANT_ArgumentIntegerConstant << ColRange;
return QualType();
}
// Check the matrix dimensions.
unsigned MatrixRows = static_cast<unsigned>(ValueRows->getZExtValue());
unsigned MatrixColumns = static_cast<unsigned>(ValueColumns->getZExtValue());
if (MatrixRows == 0 && MatrixColumns == 0) {
Diag(AttrLoc, diag::err_attribute_zero_size)
<< "matrix" << RowRange << ColRange;
return QualType();
}
if (MatrixRows == 0) {
Diag(AttrLoc, diag::err_attribute_zero_size) << "matrix" << RowRange;
return QualType();
}
if (MatrixColumns == 0) {
Diag(AttrLoc, diag::err_attribute_zero_size) << "matrix" << ColRange;
return QualType();
}
if (!ConstantMatrixType::isDimensionValid(MatrixRows)) {
Diag(AttrLoc, diag::err_attribute_size_too_large)
<< RowRange << "matrix row";
return QualType();
}
if (!ConstantMatrixType::isDimensionValid(MatrixColumns)) {
Diag(AttrLoc, diag::err_attribute_size_too_large)
<< ColRange << "matrix column";
return QualType();
}
return Context.getConstantMatrixType(ElementTy, MatrixRows, MatrixColumns);
}
bool Sema::CheckFunctionReturnType(QualType T, SourceLocation Loc) {
if (T->isArrayType() || T->isFunctionType()) {
Diag(Loc, diag::err_func_returning_array_function)
<< T->isFunctionType() << T;
return true;
}
// Functions cannot return half FP.
if (T->isHalfType() && !getLangOpts().HalfArgsAndReturns) {
Diag(Loc, diag::err_parameters_retval_cannot_have_fp16_type) << 1 <<
FixItHint::CreateInsertion(Loc, "*");
return true;
}
// Methods cannot return interface types. All ObjC objects are
// passed by reference.
if (T->isObjCObjectType()) {
Diag(Loc, diag::err_object_cannot_be_passed_returned_by_value)
<< 0 << T << FixItHint::CreateInsertion(Loc, "*");
return true;
}
if (T.hasNonTrivialToPrimitiveDestructCUnion() ||
T.hasNonTrivialToPrimitiveCopyCUnion())
checkNonTrivialCUnion(T, Loc, NTCUC_FunctionReturn,
NTCUK_Destruct|NTCUK_Copy);
// C++2a [dcl.fct]p12:
// A volatile-qualified return type is deprecated
if (T.isVolatileQualified() && getLangOpts().CPlusPlus20)
Diag(Loc, diag::warn_deprecated_volatile_return) << T;
return false;
}
/// Check the extended parameter information. Most of the necessary
/// checking should occur when applying the parameter attribute; the
/// only other checks required are positional restrictions.
static void checkExtParameterInfos(Sema &S, ArrayRef<QualType> paramTypes,
const FunctionProtoType::ExtProtoInfo &EPI,
llvm::function_ref<SourceLocation(unsigned)> getParamLoc) {
assert(EPI.ExtParameterInfos && "shouldn't get here without param infos");
bool emittedError = false;
auto actualCC = EPI.ExtInfo.getCC();
enum class RequiredCC { OnlySwift, SwiftOrSwiftAsync };
auto checkCompatible = [&](unsigned paramIndex, RequiredCC required) {
bool isCompatible =
(required == RequiredCC::OnlySwift)
? (actualCC == CC_Swift)
: (actualCC == CC_Swift || actualCC == CC_SwiftAsync);
if (isCompatible || emittedError)
return;
S.Diag(getParamLoc(paramIndex), diag::err_swift_param_attr_not_swiftcall)
<< getParameterABISpelling(EPI.ExtParameterInfos[paramIndex].getABI())
<< (required == RequiredCC::OnlySwift);
emittedError = true;
};
for (size_t paramIndex = 0, numParams = paramTypes.size();
paramIndex != numParams; ++paramIndex) {
switch (EPI.ExtParameterInfos[paramIndex].getABI()) {
// Nothing interesting to check for orindary-ABI parameters.
case ParameterABI::Ordinary:
continue;
// swift_indirect_result parameters must be a prefix of the function
// arguments.
case ParameterABI::SwiftIndirectResult:
checkCompatible(paramIndex, RequiredCC::SwiftOrSwiftAsync);
if (paramIndex != 0 &&
EPI.ExtParameterInfos[paramIndex - 1].getABI()
!= ParameterABI::SwiftIndirectResult) {
S.Diag(getParamLoc(paramIndex),
diag::err_swift_indirect_result_not_first);
}
continue;
case ParameterABI::SwiftContext:
checkCompatible(paramIndex, RequiredCC::SwiftOrSwiftAsync);
continue;
// SwiftAsyncContext is not limited to swiftasynccall functions.
case ParameterABI::SwiftAsyncContext:
continue;
// swift_error parameters must be preceded by a swift_context parameter.
case ParameterABI::SwiftErrorResult:
checkCompatible(paramIndex, RequiredCC::OnlySwift);
if (paramIndex == 0 ||
EPI.ExtParameterInfos[paramIndex - 1].getABI() !=
ParameterABI::SwiftContext) {
S.Diag(getParamLoc(paramIndex),
diag::err_swift_error_result_not_after_swift_context);
}
continue;
}
llvm_unreachable("bad ABI kind");
}
}
QualType Sema::BuildFunctionType(QualType T,
MutableArrayRef<QualType> ParamTypes,
SourceLocation Loc, DeclarationName Entity,
const FunctionProtoType::ExtProtoInfo &EPI) {
bool Invalid = false;
Invalid |= CheckFunctionReturnType(T, Loc);
for (unsigned Idx = 0, Cnt = ParamTypes.size(); Idx < Cnt; ++Idx) {
// FIXME: Loc is too inprecise here, should use proper locations for args.
QualType ParamType = Context.getAdjustedParameterType(ParamTypes[Idx]);
if (ParamType->isVoidType()) {
Diag(Loc, diag::err_param_with_void_type);
Invalid = true;
} else if (ParamType->isHalfType() && !getLangOpts().HalfArgsAndReturns) {
// Disallow half FP arguments.
Diag(Loc, diag::err_parameters_retval_cannot_have_fp16_type) << 0 <<
FixItHint::CreateInsertion(Loc, "*");
Invalid = true;
}
// C++2a [dcl.fct]p4:
// A parameter with volatile-qualified type is deprecated
if (ParamType.isVolatileQualified() && getLangOpts().CPlusPlus20)
Diag(Loc, diag::warn_deprecated_volatile_param) << ParamType;
ParamTypes[Idx] = ParamType;
}
if (EPI.ExtParameterInfos) {
checkExtParameterInfos(*this, ParamTypes, EPI,
[=](unsigned i) { return Loc; });
}
if (EPI.ExtInfo.getProducesResult()) {
// This is just a warning, so we can't fail to build if we see it.
checkNSReturnsRetainedReturnType(Loc, T);
}
if (Invalid)
return QualType();
return Context.getFunctionType(T, ParamTypes, EPI);
}
/// Build a member pointer type \c T Class::*.
///
/// \param T the type to which the member pointer refers.
/// \param Class the class type into which the member pointer points.
/// \param Loc the location where this type begins
/// \param Entity the name of the entity that will have this member pointer type
///
/// \returns a member pointer type, if successful, or a NULL type if there was
/// an error.
QualType Sema::BuildMemberPointerType(QualType T, QualType Class,
SourceLocation Loc,
DeclarationName Entity) {
// Verify that we're not building a pointer to pointer to function with
// exception specification.
if (CheckDistantExceptionSpec(T)) {
Diag(Loc, diag::err_distant_exception_spec);
return QualType();
}
// C++ 8.3.3p3: A pointer to member shall not point to ... a member
// with reference type, or "cv void."
if (T->isReferenceType()) {
Diag(Loc, diag::err_illegal_decl_mempointer_to_reference)
<< getPrintableNameForEntity(Entity) << T;
return QualType();
}
if (T->isVoidType()) {
Diag(Loc, diag::err_illegal_decl_mempointer_to_void)
<< getPrintableNameForEntity(Entity);
return QualType();
}
if (!Class->isDependentType() && !Class->isRecordType()) {
Diag(Loc, diag::err_mempointer_in_nonclass_type) << Class;
return QualType();
}
if (T->isFunctionType() && getLangOpts().OpenCL &&
!getOpenCLOptions().isAvailableOption("__cl_clang_function_pointers",
getLangOpts())) {
Diag(Loc, diag::err_opencl_function_pointer) << /*pointer*/ 0;
return QualType();
}
if (getLangOpts().HLSL) {
Diag(Loc, diag::err_hlsl_pointers_unsupported) << 0;
return QualType();
}
// Adjust the default free function calling convention to the default method
// calling convention.
bool IsCtorOrDtor =
(Entity.getNameKind() == DeclarationName::CXXConstructorName) ||
(Entity.getNameKind() == DeclarationName::CXXDestructorName);
if (T->isFunctionType())
adjustMemberFunctionCC(T, /*IsStatic=*/false, IsCtorOrDtor, Loc);
return Context.getMemberPointerType(T, Class.getTypePtr());
}
/// Build a block pointer type.
///
/// \param T The type to which we'll be building a block pointer.
///
/// \param Loc The source location, used for diagnostics.
///
/// \param Entity The name of the entity that involves the block pointer
/// type, if known.
///
/// \returns A suitable block pointer type, if there are no
/// errors. Otherwise, returns a NULL type.
QualType Sema::BuildBlockPointerType(QualType T,
SourceLocation Loc,
DeclarationName Entity) {
if (!T->isFunctionType()) {
Diag(Loc, diag::err_nonfunction_block_type);
return QualType();
}
if (checkQualifiedFunction(*this, T, Loc, QFK_BlockPointer))
return QualType();
if (getLangOpts().OpenCL)
T = deduceOpenCLPointeeAddrSpace(*this, T);
return Context.getBlockPointerType(T);
}
QualType Sema::GetTypeFromParser(ParsedType Ty, TypeSourceInfo **TInfo) {
QualType QT = Ty.get();
if (QT.isNull()) {
if (TInfo) *TInfo = nullptr;
return QualType();
}
TypeSourceInfo *DI = nullptr;
if (const LocInfoType *LIT = dyn_cast<LocInfoType>(QT)) {
QT = LIT->getType();
DI = LIT->getTypeSourceInfo();
}
if (TInfo) *TInfo = DI;
return QT;
}
static void transferARCOwnershipToDeclaratorChunk(TypeProcessingState &state,
Qualifiers::ObjCLifetime ownership,
unsigned chunkIndex);
/// Given that this is the declaration of a parameter under ARC,
/// attempt to infer attributes and such for pointer-to-whatever
/// types.
static void inferARCWriteback(TypeProcessingState &state,
QualType &declSpecType) {
Sema &S = state.getSema();
Declarator &declarator = state.getDeclarator();
// TODO: should we care about decl qualifiers?
// Check whether the declarator has the expected form. We walk
// from the inside out in order to make the block logic work.
unsigned outermostPointerIndex = 0;
bool isBlockPointer = false;
unsigned numPointers = 0;
for (unsigned i = 0, e = declarator.getNumTypeObjects(); i != e; ++i) {
unsigned chunkIndex = i;
DeclaratorChunk &chunk = declarator.getTypeObject(chunkIndex);
switch (chunk.Kind) {
case DeclaratorChunk::Paren:
// Ignore parens.
break;
case DeclaratorChunk::Reference:
case DeclaratorChunk::Pointer:
// Count the number of pointers. Treat references
// interchangeably as pointers; if they're mis-ordered, normal
// type building will discover that.
outermostPointerIndex = chunkIndex;
numPointers++;
break;
case DeclaratorChunk::BlockPointer:
// If we have a pointer to block pointer, that's an acceptable
// indirect reference; anything else is not an application of
// the rules.
if (numPointers != 1) return;
numPointers++;
outermostPointerIndex = chunkIndex;
isBlockPointer = true;
// We don't care about pointer structure in return values here.
goto done;
case DeclaratorChunk::Array: // suppress if written (id[])?
case DeclaratorChunk::Function:
case DeclaratorChunk::MemberPointer:
case DeclaratorChunk::Pipe:
return;
}
}
done:
// If we have *one* pointer, then we want to throw the qualifier on
// the declaration-specifiers, which means that it needs to be a
// retainable object type.
if (numPointers == 1) {
// If it's not a retainable object type, the rule doesn't apply.
if (!declSpecType->isObjCRetainableType()) return;
// If it already has lifetime, don't do anything.
if (declSpecType.getObjCLifetime()) return;
// Otherwise, modify the type in-place.
Qualifiers qs;
if (declSpecType->isObjCARCImplicitlyUnretainedType())
qs.addObjCLifetime(Qualifiers::OCL_ExplicitNone);
else
qs.addObjCLifetime(Qualifiers::OCL_Autoreleasing);
declSpecType = S.Context.getQualifiedType(declSpecType, qs);
// If we have *two* pointers, then we want to throw the qualifier on
// the outermost pointer.
} else if (numPointers == 2) {
// If we don't have a block pointer, we need to check whether the
// declaration-specifiers gave us something that will turn into a
// retainable object pointer after we slap the first pointer on it.
if (!isBlockPointer && !declSpecType->isObjCObjectType())
return;
// Look for an explicit lifetime attribute there.
DeclaratorChunk &chunk = declarator.getTypeObject(outermostPointerIndex);
if (chunk.Kind != DeclaratorChunk::Pointer &&
chunk.Kind != DeclaratorChunk::BlockPointer)
return;
for (const ParsedAttr &AL : chunk.getAttrs())
if (AL.getKind() == ParsedAttr::AT_ObjCOwnership)
return;
transferARCOwnershipToDeclaratorChunk(state, Qualifiers::OCL_Autoreleasing,
outermostPointerIndex);
// Any other number of pointers/references does not trigger the rule.
} else return;
// TODO: mark whether we did this inference?
}
void Sema::diagnoseIgnoredQualifiers(unsigned DiagID, unsigned Quals,
SourceLocation FallbackLoc,
SourceLocation ConstQualLoc,
SourceLocation VolatileQualLoc,
SourceLocation RestrictQualLoc,
SourceLocation AtomicQualLoc,
SourceLocation UnalignedQualLoc) {
if (!Quals)
return;
struct Qual {
const char *Name;
unsigned Mask;
SourceLocation Loc;
} const QualKinds[5] = {
{ "const", DeclSpec::TQ_const, ConstQualLoc },
{ "volatile", DeclSpec::TQ_volatile, VolatileQualLoc },
{ "restrict", DeclSpec::TQ_restrict, RestrictQualLoc },
{ "__unaligned", DeclSpec::TQ_unaligned, UnalignedQualLoc },
{ "_Atomic", DeclSpec::TQ_atomic, AtomicQualLoc }
};
SmallString<32> QualStr;
unsigned NumQuals = 0;
SourceLocation Loc;
FixItHint FixIts[5];
// Build a string naming the redundant qualifiers.
for (auto &E : QualKinds) {
if (Quals & E.Mask) {
if (!QualStr.empty()) QualStr += ' ';
QualStr += E.Name;
// If we have a location for the qualifier, offer a fixit.
SourceLocation QualLoc = E.Loc;
if (QualLoc.isValid()) {
FixIts[NumQuals] = FixItHint::CreateRemoval(QualLoc);
if (Loc.isInvalid() ||
getSourceManager().isBeforeInTranslationUnit(QualLoc, Loc))
Loc = QualLoc;
}
++NumQuals;
}
}
Diag(Loc.isInvalid() ? FallbackLoc : Loc, DiagID)
<< QualStr << NumQuals << FixIts[0] << FixIts[1] << FixIts[2] << FixIts[3];
}
// Diagnose pointless type qualifiers on the return type of a function.
static void diagnoseRedundantReturnTypeQualifiers(Sema &S, QualType RetTy,
Declarator &D,
unsigned FunctionChunkIndex) {
const DeclaratorChunk::FunctionTypeInfo &FTI =
D.getTypeObject(FunctionChunkIndex).Fun;
if (FTI.hasTrailingReturnType()) {
S.diagnoseIgnoredQualifiers(diag::warn_qual_return_type,
RetTy.getLocalCVRQualifiers(),
FTI.getTrailingReturnTypeLoc());
return;
}
for (unsigned OuterChunkIndex = FunctionChunkIndex + 1,
End = D.getNumTypeObjects();
OuterChunkIndex != End; ++OuterChunkIndex) {
DeclaratorChunk &OuterChunk = D.getTypeObject(OuterChunkIndex);
switch (OuterChunk.Kind) {
case DeclaratorChunk::Paren:
continue;
case DeclaratorChunk::Pointer: {
DeclaratorChunk::PointerTypeInfo &PTI = OuterChunk.Ptr;
S.diagnoseIgnoredQualifiers(
diag::warn_qual_return_type,
PTI.TypeQuals,
SourceLocation(),
PTI.ConstQualLoc,
PTI.VolatileQualLoc,
PTI.RestrictQualLoc,
PTI.AtomicQualLoc,
PTI.UnalignedQualLoc);
return;
}
case DeclaratorChunk::Function:
case DeclaratorChunk::BlockPointer:
case DeclaratorChunk::Reference:
case DeclaratorChunk::Array:
case DeclaratorChunk::MemberPointer:
case DeclaratorChunk::Pipe:
// FIXME: We can't currently provide an accurate source location and a
// fix-it hint for these.
unsigned AtomicQual = RetTy->isAtomicType() ? DeclSpec::TQ_atomic : 0;
S.diagnoseIgnoredQualifiers(diag::warn_qual_return_type,
RetTy.getCVRQualifiers() | AtomicQual,
D.getIdentifierLoc());
return;
}
llvm_unreachable("unknown declarator chunk kind");
}
// If the qualifiers come from a conversion function type, don't diagnose
// them -- they're not necessarily redundant, since such a conversion
// operator can be explicitly called as "x.operator const int()".
if (D.getName().getKind() == UnqualifiedIdKind::IK_ConversionFunctionId)
return;
// Just parens all the way out to the decl specifiers. Diagnose any qualifiers
// which are present there.
S.diagnoseIgnoredQualifiers(diag::warn_qual_return_type,
D.getDeclSpec().getTypeQualifiers(),
D.getIdentifierLoc(),
D.getDeclSpec().getConstSpecLoc(),
D.getDeclSpec().getVolatileSpecLoc(),
D.getDeclSpec().getRestrictSpecLoc(),
D.getDeclSpec().getAtomicSpecLoc(),
D.getDeclSpec().getUnalignedSpecLoc());
}
static std::pair<QualType, TypeSourceInfo *>
InventTemplateParameter(TypeProcessingState &state, QualType T,
TypeSourceInfo *TrailingTSI, AutoType *Auto,
InventedTemplateParameterInfo &Info) {
Sema &S = state.getSema();
Declarator &D = state.getDeclarator();
const unsigned TemplateParameterDepth = Info.AutoTemplateParameterDepth;
const unsigned AutoParameterPosition = Info.TemplateParams.size();
const bool IsParameterPack = D.hasEllipsis();
// If auto is mentioned in a lambda parameter or abbreviated function
// template context, convert it to a template parameter type.
// Create the TemplateTypeParmDecl here to retrieve the corresponding
// template parameter type. Template parameters are temporarily added
// to the TU until the associated TemplateDecl is created.
TemplateTypeParmDecl *InventedTemplateParam =
TemplateTypeParmDecl::Create(
S.Context, S.Context.getTranslationUnitDecl(),
/*KeyLoc=*/D.getDeclSpec().getTypeSpecTypeLoc(),
/*NameLoc=*/D.getIdentifierLoc(),
TemplateParameterDepth, AutoParameterPosition,
S.InventAbbreviatedTemplateParameterTypeName(
D.getIdentifier(), AutoParameterPosition), false,
IsParameterPack, /*HasTypeConstraint=*/Auto->isConstrained());
InventedTemplateParam->setImplicit();
Info.TemplateParams.push_back(InventedTemplateParam);
// Attach type constraints to the new parameter.
if (Auto->isConstrained()) {
if (TrailingTSI) {
// The 'auto' appears in a trailing return type we've already built;
// extract its type constraints to attach to the template parameter.
AutoTypeLoc AutoLoc = TrailingTSI->getTypeLoc().getContainedAutoTypeLoc();
TemplateArgumentListInfo TAL(AutoLoc.getLAngleLoc(), AutoLoc.getRAngleLoc());
bool Invalid = false;
for (unsigned Idx = 0; Idx < AutoLoc.getNumArgs(); ++Idx) {
if (D.getEllipsisLoc().isInvalid() && !Invalid &&
S.DiagnoseUnexpandedParameterPack(AutoLoc.getArgLoc(Idx),
Sema::UPPC_TypeConstraint))
Invalid = true;
TAL.addArgument(AutoLoc.getArgLoc(Idx));
}
if (!Invalid) {
S.AttachTypeConstraint(
AutoLoc.getNestedNameSpecifierLoc(), AutoLoc.getConceptNameInfo(),
AutoLoc.getNamedConcept(),
AutoLoc.hasExplicitTemplateArgs() ? &TAL : nullptr,
InventedTemplateParam, D.getEllipsisLoc());
}
} else {
// The 'auto' appears in the decl-specifiers; we've not finished forming
// TypeSourceInfo for it yet.
TemplateIdAnnotation *TemplateId = D.getDeclSpec().getRepAsTemplateId();
TemplateArgumentListInfo TemplateArgsInfo;
bool Invalid = false;
if (TemplateId->LAngleLoc.isValid()) {
ASTTemplateArgsPtr TemplateArgsPtr(TemplateId->getTemplateArgs(),
TemplateId->NumArgs);
S.translateTemplateArguments(TemplateArgsPtr, TemplateArgsInfo);
if (D.getEllipsisLoc().isInvalid()) {
for (TemplateArgumentLoc Arg : TemplateArgsInfo.arguments()) {
if (S.DiagnoseUnexpandedParameterPack(Arg,
Sema::UPPC_TypeConstraint)) {
Invalid = true;
break;
}
}
}
}
if (!Invalid) {
S.AttachTypeConstraint(
D.getDeclSpec().getTypeSpecScope().getWithLocInContext(S.Context),
DeclarationNameInfo(DeclarationName(TemplateId->Name),
TemplateId->TemplateNameLoc),
cast<ConceptDecl>(TemplateId->Template.get().getAsTemplateDecl()),
TemplateId->LAngleLoc.isValid() ? &TemplateArgsInfo : nullptr,
InventedTemplateParam, D.getEllipsisLoc());
}
}
}
// Replace the 'auto' in the function parameter with this invented
// template type parameter.
// FIXME: Retain some type sugar to indicate that this was written
// as 'auto'?
QualType Replacement(InventedTemplateParam->getTypeForDecl(), 0);
QualType NewT = state.ReplaceAutoType(T, Replacement);
TypeSourceInfo *NewTSI =
TrailingTSI ? S.ReplaceAutoTypeSourceInfo(TrailingTSI, Replacement)
: nullptr;
return {NewT, NewTSI};
}
static TypeSourceInfo *
GetTypeSourceInfoForDeclarator(TypeProcessingState &State,
QualType T, TypeSourceInfo *ReturnTypeInfo);
static QualType GetDeclSpecTypeForDeclarator(TypeProcessingState &state,
TypeSourceInfo *&ReturnTypeInfo) {
Sema &SemaRef = state.getSema();
Declarator &D = state.getDeclarator();
QualType T;
ReturnTypeInfo = nullptr;
// The TagDecl owned by the DeclSpec.
TagDecl *OwnedTagDecl = nullptr;
switch (D.getName().getKind()) {
case UnqualifiedIdKind::IK_ImplicitSelfParam:
case UnqualifiedIdKind::IK_OperatorFunctionId:
case UnqualifiedIdKind::IK_Identifier:
case UnqualifiedIdKind::IK_LiteralOperatorId:
case UnqualifiedIdKind::IK_TemplateId:
T = ConvertDeclSpecToType(state);
if (!D.isInvalidType() && D.getDeclSpec().isTypeSpecOwned()) {
OwnedTagDecl = cast<TagDecl>(D.getDeclSpec().getRepAsDecl());
// Owned declaration is embedded in declarator.
OwnedTagDecl->setEmbeddedInDeclarator(true);
}
break;
case UnqualifiedIdKind::IK_ConstructorName:
case UnqualifiedIdKind::IK_ConstructorTemplateId:
case UnqualifiedIdKind::IK_DestructorName:
// Constructors and destructors don't have return types. Use
// "void" instead.
T = SemaRef.Context.VoidTy;
processTypeAttrs(state, T, TAL_DeclSpec,
D.getMutableDeclSpec().getAttributes());
break;
case UnqualifiedIdKind::IK_DeductionGuideName:
// Deduction guides have a trailing return type and no type in their
// decl-specifier sequence. Use a placeholder return type for now.
T = SemaRef.Context.DependentTy;
break;
case UnqualifiedIdKind::IK_ConversionFunctionId:
// The result type of a conversion function is the type that it
// converts to.
T = SemaRef.GetTypeFromParser(D.getName().ConversionFunctionId,
&ReturnTypeInfo);
break;
}
// Note: We don't need to distribute declaration attributes (i.e.
// D.getDeclarationAttributes()) because those are always C++11 attributes,
// and those don't get distributed.
distributeTypeAttrsFromDeclarator(state, T);
// Find the deduced type in this type. Look in the trailing return type if we
// have one, otherwise in the DeclSpec type.
// FIXME: The standard wording doesn't currently describe this.
DeducedType *Deduced = T->getContainedDeducedType();
bool DeducedIsTrailingReturnType = false;
if (Deduced && isa<AutoType>(Deduced) && D.hasTrailingReturnType()) {
QualType T = SemaRef.GetTypeFromParser(D.getTrailingReturnType());
Deduced = T.isNull() ? nullptr : T->getContainedDeducedType();
DeducedIsTrailingReturnType = true;
}
// C++11 [dcl.spec.auto]p5: reject 'auto' if it is not in an allowed context.
if (Deduced) {
AutoType *Auto = dyn_cast<AutoType>(Deduced);
int Error = -1;
// Is this a 'auto' or 'decltype(auto)' type (as opposed to __auto_type or
// class template argument deduction)?
bool IsCXXAutoType =
(Auto && Auto->getKeyword() != AutoTypeKeyword::GNUAutoType);
bool IsDeducedReturnType = false;
switch (D.getContext()) {
case DeclaratorContext::LambdaExpr:
// Declared return type of a lambda-declarator is implicit and is always
// 'auto'.
break;
case DeclaratorContext::ObjCParameter:
case DeclaratorContext::ObjCResult:
Error = 0;
break;
case DeclaratorContext::RequiresExpr:
Error = 22;
break;
case DeclaratorContext::Prototype:
case DeclaratorContext::LambdaExprParameter: {
InventedTemplateParameterInfo *Info = nullptr;
if (D.getContext() == DeclaratorContext::Prototype) {
// With concepts we allow 'auto' in function parameters.
if (!SemaRef.getLangOpts().CPlusPlus20 || !Auto ||
Auto->getKeyword() != AutoTypeKeyword::Auto) {
Error = 0;
break;
} else if (!SemaRef.getCurScope()->isFunctionDeclarationScope()) {
Error = 21;
break;
}
Info = &SemaRef.InventedParameterInfos.back();
} else {
// In C++14, generic lambdas allow 'auto' in their parameters.
if (!SemaRef.getLangOpts().CPlusPlus14 || !Auto ||
Auto->getKeyword() != AutoTypeKeyword::Auto) {
Error = 16;
break;
}
Info = SemaRef.getCurLambda();
assert(Info && "No LambdaScopeInfo on the stack!");
}
// We'll deal with inventing template parameters for 'auto' in trailing
// return types when we pick up the trailing return type when processing
// the function chunk.
if (!DeducedIsTrailingReturnType)
T = InventTemplateParameter(state, T, nullptr, Auto, *Info).first;
break;
}
case DeclaratorContext::Member: {
if (D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_static ||
D.isFunctionDeclarator())
break;
bool Cxx = SemaRef.getLangOpts().CPlusPlus;
if (isa<ObjCContainerDecl>(SemaRef.CurContext)) {
Error = 6; // Interface member.
} else {
switch (cast<TagDecl>(SemaRef.CurContext)->getTagKind()) {
case TTK_Enum: llvm_unreachable("unhandled tag kind");
case TTK_Struct: Error = Cxx ? 1 : 2; /* Struct member */ break;
case TTK_Union: Error = Cxx ? 3 : 4; /* Union member */ break;
case TTK_Class: Error = 5; /* Class member */ break;
case TTK_Interface: Error = 6; /* Interface member */ break;
}
}
if (D.getDeclSpec().isFriendSpecified())
Error = 20; // Friend type
break;
}
case DeclaratorContext::CXXCatch:
case DeclaratorContext::ObjCCatch:
Error = 7; // Exception declaration
break;
case DeclaratorContext::TemplateParam:
if (isa<DeducedTemplateSpecializationType>(Deduced) &&
!SemaRef.getLangOpts().CPlusPlus20)
Error = 19; // Template parameter (until C++20)
else if (!SemaRef.getLangOpts().CPlusPlus17)
Error = 8; // Template parameter (until C++17)
break;
case DeclaratorContext::BlockLiteral:
Error = 9; // Block literal
break;
case DeclaratorContext::TemplateArg:
// Within a template argument list, a deduced template specialization
// type will be reinterpreted as a template template argument.
if (isa<DeducedTemplateSpecializationType>(Deduced) &&
!D.getNumTypeObjects() &&
D.getDeclSpec().getParsedSpecifiers() == DeclSpec::PQ_TypeSpecifier)
break;
LLVM_FALLTHROUGH;
case DeclaratorContext::TemplateTypeArg:
Error = 10; // Template type argument
break;
case DeclaratorContext::AliasDecl:
case DeclaratorContext::AliasTemplate:
Error = 12; // Type alias
break;
case DeclaratorContext::TrailingReturn:
case DeclaratorContext::TrailingReturnVar:
if (!SemaRef.getLangOpts().CPlusPlus14 || !IsCXXAutoType)
Error = 13; // Function return type
IsDeducedReturnType = true;
break;
case DeclaratorContext::ConversionId:
if (!SemaRef.getLangOpts().CPlusPlus14 || !IsCXXAutoType)
Error = 14; // conversion-type-id
IsDeducedReturnType = true;
break;
case DeclaratorContext::FunctionalCast:
if (isa<DeducedTemplateSpecializationType>(Deduced))
break;
if (SemaRef.getLangOpts().CPlusPlus2b && IsCXXAutoType &&
!Auto->isDecltypeAuto())
break; // auto(x)
LLVM_FALLTHROUGH;
case DeclaratorContext::TypeName:
case DeclaratorContext::Association:
Error = 15; // Generic
break;
case DeclaratorContext::File:
case DeclaratorContext::Block:
case DeclaratorContext::ForInit:
case DeclaratorContext::SelectionInit:
case DeclaratorContext::Condition:
// FIXME: P0091R3 (erroneously) does not permit class template argument
// deduction in conditions, for-init-statements, and other declarations
// that are not simple-declarations.
break;
case DeclaratorContext::CXXNew:
// FIXME: P0091R3 does not permit class template argument deduction here,
// but we follow GCC and allow it anyway.
if (!IsCXXAutoType && !isa<DeducedTemplateSpecializationType>(Deduced))
Error = 17; // 'new' type
break;
case DeclaratorContext::KNRTypeList:
Error = 18; // K&R function parameter
break;
}
if (D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_typedef)
Error = 11;
// In Objective-C it is an error to use 'auto' on a function declarator
// (and everywhere for '__auto_type').
if (D.isFunctionDeclarator() &&
(!SemaRef.getLangOpts().CPlusPlus11 || !IsCXXAutoType))
Error = 13;
SourceRange AutoRange = D.getDeclSpec().getTypeSpecTypeLoc();
if (D.getName().getKind() == UnqualifiedIdKind::IK_ConversionFunctionId)
AutoRange = D.getName().getSourceRange();
if (Error != -1) {
unsigned Kind;
if (Auto) {
switch (Auto->getKeyword()) {
case AutoTypeKeyword::Auto: Kind = 0; break;
case AutoTypeKeyword::DecltypeAuto: Kind = 1; break;
case AutoTypeKeyword::GNUAutoType: Kind = 2; break;
}
} else {
assert(isa<DeducedTemplateSpecializationType>(Deduced) &&
"unknown auto type");
Kind = 3;
}
auto *DTST = dyn_cast<DeducedTemplateSpecializationType>(Deduced);
TemplateName TN = DTST ? DTST->getTemplateName() : TemplateName();
SemaRef.Diag(AutoRange.getBegin(), diag::err_auto_not_allowed)
<< Kind << Error << (int)SemaRef.getTemplateNameKindForDiagnostics(TN)
<< QualType(Deduced, 0) << AutoRange;
if (auto *TD = TN.getAsTemplateDecl())
SemaRef.Diag(TD->getLocation(), diag::note_template_decl_here);
T = SemaRef.Context.IntTy;
D.setInvalidType(true);
} else if (Auto && D.getContext() != DeclaratorContext::LambdaExpr) {
// If there was a trailing return type, we already got
// warn_cxx98_compat_trailing_return_type in the parser.
SemaRef.Diag(AutoRange.getBegin(),
D.getContext() == DeclaratorContext::LambdaExprParameter
? diag::warn_cxx11_compat_generic_lambda
: IsDeducedReturnType
? diag::warn_cxx11_compat_deduced_return_type
: diag::warn_cxx98_compat_auto_type_specifier)
<< AutoRange;
}
}
if (SemaRef.getLangOpts().CPlusPlus &&
OwnedTagDecl && OwnedTagDecl->isCompleteDefinition()) {
// Check the contexts where C++ forbids the declaration of a new class
// or enumeration in a type-specifier-seq.
unsigned DiagID = 0;
switch (D.getContext()) {
case DeclaratorContext::TrailingReturn:
case DeclaratorContext::TrailingReturnVar:
// Class and enumeration definitions are syntactically not allowed in
// trailing return types.
llvm_unreachable("parser should not have allowed this");
break;
case DeclaratorContext::File:
case DeclaratorContext::Member:
case DeclaratorContext::Block:
case DeclaratorContext::ForInit:
case DeclaratorContext::SelectionInit:
case DeclaratorContext::BlockLiteral:
case DeclaratorContext::LambdaExpr:
// C++11 [dcl.type]p3:
// A type-specifier-seq shall not define a class or enumeration unless
// it appears in the type-id of an alias-declaration (7.1.3) that is not
// the declaration of a template-declaration.
case DeclaratorContext::AliasDecl:
break;
case DeclaratorContext::AliasTemplate:
DiagID = diag::err_type_defined_in_alias_template;
break;
case DeclaratorContext::TypeName:
case DeclaratorContext::FunctionalCast:
case DeclaratorContext::ConversionId:
case DeclaratorContext::TemplateParam:
case DeclaratorContext::CXXNew:
case DeclaratorContext::CXXCatch:
case DeclaratorContext::ObjCCatch:
case DeclaratorContext::TemplateArg:
case DeclaratorContext::TemplateTypeArg:
case DeclaratorContext::Association:
DiagID = diag::err_type_defined_in_type_specifier;
break;
case DeclaratorContext::Prototype:
case DeclaratorContext::LambdaExprParameter:
case DeclaratorContext::ObjCParameter:
case DeclaratorContext::ObjCResult:
case DeclaratorContext::KNRTypeList:
case DeclaratorContext::RequiresExpr:
// C++ [dcl.fct]p6:
// Types shall not be defined in return or parameter types.
DiagID = diag::err_type_defined_in_param_type;
break;
case DeclaratorContext::Condition:
// C++ 6.4p2:
// The type-specifier-seq shall not contain typedef and shall not declare
// a new class or enumeration.
DiagID = diag::err_type_defined_in_condition;
break;
}
if (DiagID != 0) {
SemaRef.Diag(OwnedTagDecl->getLocation(), DiagID)
<< SemaRef.Context.getTypeDeclType(OwnedTagDecl);
D.setInvalidType(true);
}
}
assert(!T.isNull() && "This function should not return a null type");
return T;
}
/// Produce an appropriate diagnostic for an ambiguity between a function
/// declarator and a C++ direct-initializer.
static void warnAboutAmbiguousFunction(Sema &S, Declarator &D,
DeclaratorChunk &DeclType, QualType RT) {
const DeclaratorChunk::FunctionTypeInfo &FTI = DeclType.Fun;
assert(FTI.isAmbiguous && "no direct-initializer / function ambiguity");
// If the return type is void there is no ambiguity.
if (RT->isVoidType())
return;
// An initializer for a non-class type can have at most one argument.
if (!RT->isRecordType() && FTI.NumParams > 1)
return;
// An initializer for a reference must have exactly one argument.
if (RT->isReferenceType() && FTI.NumParams != 1)
return;
// Only warn if this declarator is declaring a function at block scope, and
// doesn't have a storage class (such as 'extern') specified.
if (!D.isFunctionDeclarator() ||
D.getFunctionDefinitionKind() != FunctionDefinitionKind::Declaration ||
!S.CurContext->isFunctionOrMethod() ||
D.getDeclSpec().getStorageClassSpec() != DeclSpec::SCS_unspecified)
return;
// Inside a condition, a direct initializer is not permitted. We allow one to
// be parsed in order to give better diagnostics in condition parsing.
if (D.getContext() == DeclaratorContext::Condition)
return;
SourceRange ParenRange(DeclType.Loc, DeclType.EndLoc);
S.Diag(DeclType.Loc,
FTI.NumParams ? diag::warn_parens_disambiguated_as_function_declaration
: diag::warn_empty_parens_are_function_decl)
<< ParenRange;
// If the declaration looks like:
// T var1,
// f();
// and name lookup finds a function named 'f', then the ',' was
// probably intended to be a ';'.
if (!D.isFirstDeclarator() && D.getIdentifier()) {
FullSourceLoc Comma(D.getCommaLoc(), S.SourceMgr);
FullSourceLoc Name(D.getIdentifierLoc(), S.SourceMgr);
if (Comma.getFileID() != Name.getFileID() ||
Comma.getSpellingLineNumber() != Name.getSpellingLineNumber()) {
LookupResult Result(S, D.getIdentifier(), SourceLocation(),
Sema::LookupOrdinaryName);
if (S.LookupName(Result, S.getCurScope()))
S.Diag(D.getCommaLoc(), diag::note_empty_parens_function_call)
<< FixItHint::CreateReplacement(D.getCommaLoc(), ";")
<< D.getIdentifier();
Result.suppressDiagnostics();
}
}
if (FTI.NumParams > 0) {
// For a declaration with parameters, eg. "T var(T());", suggest adding
// parens around the first parameter to turn the declaration into a
// variable declaration.
SourceRange Range = FTI.Params[0].Param->getSourceRange();
SourceLocation B = Range.getBegin();
SourceLocation E = S.getLocForEndOfToken(Range.getEnd());
// FIXME: Maybe we should suggest adding braces instead of parens
// in C++11 for classes that don't have an initializer_list constructor.
S.Diag(B, diag::note_additional_parens_for_variable_declaration)
<< FixItHint::CreateInsertion(B, "(")
<< FixItHint::CreateInsertion(E, ")");
} else {
// For a declaration without parameters, eg. "T var();", suggest replacing
// the parens with an initializer to turn the declaration into a variable
// declaration.
const CXXRecordDecl *RD = RT->getAsCXXRecordDecl();
// Empty parens mean value-initialization, and no parens mean
// default initialization. These are equivalent if the default
// constructor is user-provided or if zero-initialization is a
// no-op.
if (RD && RD->hasDefinition() &&
(RD->isEmpty() || RD->hasUserProvidedDefaultConstructor()))
S.Diag(DeclType.Loc, diag::note_empty_parens_default_ctor)
<< FixItHint::CreateRemoval(ParenRange);
else {
std::string Init =
S.getFixItZeroInitializerForType(RT, ParenRange.getBegin());
if (Init.empty() && S.LangOpts.CPlusPlus11)
Init = "{}";
if (!Init.empty())
S.Diag(DeclType.Loc, diag::note_empty_parens_zero_initialize)
<< FixItHint::CreateReplacement(ParenRange, Init);
}
}
}
/// Produce an appropriate diagnostic for a declarator with top-level
/// parentheses.
static void warnAboutRedundantParens(Sema &S, Declarator &D, QualType T) {
DeclaratorChunk &Paren = D.getTypeObject(D.getNumTypeObjects() - 1);
assert(Paren.Kind == DeclaratorChunk::Paren &&
"do not have redundant top-level parentheses");
// This is a syntactic check; we're not interested in cases that arise
// during template instantiation.
if (S.inTemplateInstantiation())
return;
// Check whether this could be intended to be a construction of a temporary
// object in C++ via a function-style cast.
bool CouldBeTemporaryObject =
S.getLangOpts().CPlusPlus && D.isExpressionContext() &&
!D.isInvalidType() && D.getIdentifier() &&
D.getDeclSpec().getParsedSpecifiers() == DeclSpec::PQ_TypeSpecifier &&
(T->isRecordType() || T->isDependentType()) &&
D.getDeclSpec().getTypeQualifiers() == 0 && D.isFirstDeclarator();
bool StartsWithDeclaratorId = true;
for (auto &C : D.type_objects()) {
switch (C.Kind) {
case DeclaratorChunk::Paren:
if (&C == &Paren)
continue;
LLVM_FALLTHROUGH;
case DeclaratorChunk::Pointer:
StartsWithDeclaratorId = false;
continue;
case DeclaratorChunk::Array:
if (!C.Arr.NumElts)
CouldBeTemporaryObject = false;
continue;
case DeclaratorChunk::Reference:
// FIXME: Suppress the warning here if there is no initializer; we're
// going to give an error anyway.
// We assume that something like 'T (&x) = y;' is highly likely to not
// be intended to be a temporary object.
CouldBeTemporaryObject = false;
StartsWithDeclaratorId = false;
continue;
case DeclaratorChunk::Function:
// In a new-type-id, function chunks require parentheses.
if (D.getContext() == DeclaratorContext::CXXNew)
return;
// FIXME: "A(f())" deserves a vexing-parse warning, not just a
// redundant-parens warning, but we don't know whether the function
// chunk was syntactically valid as an expression here.
CouldBeTemporaryObject = false;
continue;
case DeclaratorChunk::BlockPointer:
case DeclaratorChunk::MemberPointer:
case DeclaratorChunk::Pipe:
// These cannot appear in expressions.
CouldBeTemporaryObject = false;
StartsWithDeclaratorId = false;
continue;
}
}
// FIXME: If there is an initializer, assume that this is not intended to be
// a construction of a temporary object.
// Check whether the name has already been declared; if not, this is not a
// function-style cast.
if (CouldBeTemporaryObject) {
LookupResult Result(S, D.getIdentifier(), SourceLocation(),
Sema::LookupOrdinaryName);
if (!S.LookupName(Result, S.getCurScope()))
CouldBeTemporaryObject = false;
Result.suppressDiagnostics();
}
SourceRange ParenRange(Paren.Loc, Paren.EndLoc);
if (!CouldBeTemporaryObject) {
// If we have A (::B), the parentheses affect the meaning of the program.
// Suppress the warning in that case. Don't bother looking at the DeclSpec
// here: even (e.g.) "int ::x" is visually ambiguous even though it's
// formally unambiguous.
if (StartsWithDeclaratorId && D.getCXXScopeSpec().isValid()) {
for (NestedNameSpecifier *NNS = D.getCXXScopeSpec().getScopeRep(); NNS;
NNS = NNS->getPrefix()) {
if (NNS->getKind() == NestedNameSpecifier::Global)
return;
}
}
S.Diag(Paren.Loc, diag::warn_redundant_parens_around_declarator)
<< ParenRange << FixItHint::CreateRemoval(Paren.Loc)
<< FixItHint::CreateRemoval(Paren.EndLoc);
return;
}
S.Diag(Paren.Loc, diag::warn_parens_disambiguated_as_variable_declaration)
<< ParenRange << D.getIdentifier();
auto *RD = T->getAsCXXRecordDecl();
if (!RD || !RD->hasDefinition() || RD->hasNonTrivialDestructor())
S.Diag(Paren.Loc, diag::note_raii_guard_add_name)
<< FixItHint::CreateInsertion(Paren.Loc, " varname") << T
<< D.getIdentifier();
// FIXME: A cast to void is probably a better suggestion in cases where it's
// valid (when there is no initializer and we're not in a condition).
S.Diag(D.getBeginLoc(), diag::note_function_style_cast_add_parentheses)
<< FixItHint::CreateInsertion(D.getBeginLoc(), "(")
<< FixItHint::CreateInsertion(S.getLocForEndOfToken(D.getEndLoc()), ")");
S.Diag(Paren.Loc, diag::note_remove_parens_for_variable_declaration)
<< FixItHint::CreateRemoval(Paren.Loc)
<< FixItHint::CreateRemoval(Paren.EndLoc);
}
/// Helper for figuring out the default CC for a function declarator type. If
/// this is the outermost chunk, then we can determine the CC from the
/// declarator context. If not, then this could be either a member function
/// type or normal function type.
static CallingConv getCCForDeclaratorChunk(
Sema &S, Declarator &D, const ParsedAttributesView &AttrList,
const DeclaratorChunk::FunctionTypeInfo &FTI, unsigned ChunkIndex) {
assert(D.getTypeObject(ChunkIndex).Kind == DeclaratorChunk::Function);
// Check for an explicit CC attribute.
for (const ParsedAttr &AL : AttrList) {
switch (AL.getKind()) {
CALLING_CONV_ATTRS_CASELIST : {
// Ignore attributes that don't validate or can't apply to the
// function type. We'll diagnose the failure to apply them in
// handleFunctionTypeAttr.
CallingConv CC;
if (!S.CheckCallingConvAttr(AL, CC) &&
(!FTI.isVariadic || supportsVariadicCall(CC))) {
return CC;
}
break;
}
default:
break;
}
}
bool IsCXXInstanceMethod = false;
if (S.getLangOpts().CPlusPlus) {
// Look inwards through parentheses to see if this chunk will form a
// member pointer type or if we're the declarator. Any type attributes
// between here and there will override the CC we choose here.
unsigned I = ChunkIndex;
bool FoundNonParen = false;
while (I && !FoundNonParen) {
--I;
if (D.getTypeObject(I).Kind != DeclaratorChunk::Paren)
FoundNonParen = true;
}
if (FoundNonParen) {
// If we're not the declarator, we're a regular function type unless we're
// in a member pointer.
IsCXXInstanceMethod =
D.getTypeObject(I).Kind == DeclaratorChunk::MemberPointer;
} else if (D.getContext() == DeclaratorContext::LambdaExpr) {
// This can only be a call operator for a lambda, which is an instance
// method.
IsCXXInstanceMethod = true;
} else {
// We're the innermost decl chunk, so must be a function declarator.
assert(D.isFunctionDeclarator());
// If we're inside a record, we're declaring a method, but it could be
// explicitly or implicitly static.
IsCXXInstanceMethod =
D.isFirstDeclarationOfMember() &&
D.getDeclSpec().getStorageClassSpec() != DeclSpec::SCS_typedef &&
!D.isStaticMember();
}
}
CallingConv CC = S.Context.getDefaultCallingConvention(FTI.isVariadic,
IsCXXInstanceMethod);
// Attribute AT_OpenCLKernel affects the calling convention for SPIR
// and AMDGPU targets, hence it cannot be treated as a calling
// convention attribute. This is the simplest place to infer
// calling convention for OpenCL kernels.
if (S.getLangOpts().OpenCL) {
for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) {
if (AL.getKind() == ParsedAttr::AT_OpenCLKernel) {
CC = CC_OpenCLKernel;
break;
}
}
} else if (S.getLangOpts().CUDA) {
// If we're compiling CUDA/HIP code and targeting SPIR-V we need to make
// sure the kernels will be marked with the right calling convention so that
// they will be visible by the APIs that ingest SPIR-V.
llvm::Triple Triple = S.Context.getTargetInfo().getTriple();
if (Triple.getArch() == llvm::Triple::spirv32 ||
Triple.getArch() == llvm::Triple::spirv64) {
for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) {
if (AL.getKind() == ParsedAttr::AT_CUDAGlobal) {
CC = CC_OpenCLKernel;
break;
}
}
}
}
return CC;
}
namespace {
/// A simple notion of pointer kinds, which matches up with the various
/// pointer declarators.
enum class SimplePointerKind {
Pointer,
BlockPointer,
MemberPointer,
Array,
};
} // end anonymous namespace
IdentifierInfo *Sema::getNullabilityKeyword(NullabilityKind nullability) {
switch (nullability) {
case NullabilityKind::NonNull:
if (!Ident__Nonnull)
Ident__Nonnull = PP.getIdentifierInfo("_Nonnull");
return Ident__Nonnull;
case NullabilityKind::Nullable:
if (!Ident__Nullable)
Ident__Nullable = PP.getIdentifierInfo("_Nullable");
return Ident__Nullable;
case NullabilityKind::NullableResult:
if (!Ident__Nullable_result)
Ident__Nullable_result = PP.getIdentifierInfo("_Nullable_result");
return Ident__Nullable_result;
case NullabilityKind::Unspecified:
if (!Ident__Null_unspecified)
Ident__Null_unspecified = PP.getIdentifierInfo("_Null_unspecified");
return Ident__Null_unspecified;
}
llvm_unreachable("Unknown nullability kind.");
}
/// Retrieve the identifier "NSError".
IdentifierInfo *Sema::getNSErrorIdent() {
if (!Ident_NSError)
Ident_NSError = PP.getIdentifierInfo("NSError");
return Ident_NSError;
}
/// Check whether there is a nullability attribute of any kind in the given
/// attribute list.
static bool hasNullabilityAttr(const ParsedAttributesView &attrs) {
for (const ParsedAttr &AL : attrs) {
if (AL.getKind() == ParsedAttr::AT_TypeNonNull ||
AL.getKind() == ParsedAttr::AT_TypeNullable ||
AL.getKind() == ParsedAttr::AT_TypeNullableResult ||
AL.getKind() == ParsedAttr::AT_TypeNullUnspecified)
return true;
}
return false;
}
namespace {
/// Describes the kind of a pointer a declarator describes.
enum class PointerDeclaratorKind {
// Not a pointer.
NonPointer,
// Single-level pointer.
SingleLevelPointer,
// Multi-level pointer (of any pointer kind).
MultiLevelPointer,
// CFFooRef*
MaybePointerToCFRef,
// CFErrorRef*
CFErrorRefPointer,
// NSError**
NSErrorPointerPointer,
};
/// Describes a declarator chunk wrapping a pointer that marks inference as
/// unexpected.
// These values must be kept in sync with diagnostics.
enum class PointerWrappingDeclaratorKind {
/// Pointer is top-level.
None = -1,
/// Pointer is an array element.
Array = 0,
/// Pointer is the referent type of a C++ reference.
Reference = 1
};
} // end anonymous namespace
/// Classify the given declarator, whose type-specified is \c type, based on
/// what kind of pointer it refers to.
///
/// This is used to determine the default nullability.
static PointerDeclaratorKind
classifyPointerDeclarator(Sema &S, QualType type, Declarator &declarator,
PointerWrappingDeclaratorKind &wrappingKind) {
unsigned numNormalPointers = 0;
// For any dependent type, we consider it a non-pointer.
if (type->isDependentType())
return PointerDeclaratorKind::NonPointer;
// Look through the declarator chunks to identify pointers.
for (unsigned i = 0, n = declarator.getNumTypeObjects(); i != n; ++i) {
DeclaratorChunk &chunk = declarator.getTypeObject(i);
switch (chunk.Kind) {
case DeclaratorChunk::Array:
if (numNormalPointers == 0)
wrappingKind = PointerWrappingDeclaratorKind::Array;
break;
case DeclaratorChunk::Function:
case DeclaratorChunk::Pipe:
break;
case DeclaratorChunk::BlockPointer:
case DeclaratorChunk::MemberPointer:
return numNormalPointers > 0 ? PointerDeclaratorKind::MultiLevelPointer
: PointerDeclaratorKind::SingleLevelPointer;
case DeclaratorChunk::Paren:
break;
case DeclaratorChunk::Reference:
if (numNormalPointers == 0)
wrappingKind = PointerWrappingDeclaratorKind::Reference;
break;
case DeclaratorChunk::Pointer:
++numNormalPointers;
if (numNormalPointers > 2)
return PointerDeclaratorKind::MultiLevelPointer;
break;
}
}
// Then, dig into the type specifier itself.
unsigned numTypeSpecifierPointers = 0;
do {
// Decompose normal pointers.
if (auto ptrType = type->getAs<PointerType>()) {
++numNormalPointers;
if (numNormalPointers > 2)
return PointerDeclaratorKind::MultiLevelPointer;
type = ptrType->getPointeeType();
++numTypeSpecifierPointers;
continue;
}
// Decompose block pointers.
if (type->getAs<BlockPointerType>()) {
return numNormalPointers > 0 ? PointerDeclaratorKind::MultiLevelPointer
: PointerDeclaratorKind::SingleLevelPointer;
}
// Decompose member pointers.
if (type->getAs<MemberPointerType>()) {
return numNormalPointers > 0 ? PointerDeclaratorKind::MultiLevelPointer
: PointerDeclaratorKind::SingleLevelPointer;
}
// Look at Objective-C object pointers.
if (auto objcObjectPtr = type->getAs<ObjCObjectPointerType>()) {
++numNormalPointers;
++numTypeSpecifierPointers;
// If this is NSError**, report that.
if (auto objcClassDecl = objcObjectPtr->getInterfaceDecl()) {
if (objcClassDecl->getIdentifier() == S.getNSErrorIdent() &&
numNormalPointers == 2 && numTypeSpecifierPointers < 2) {
return PointerDeclaratorKind::NSErrorPointerPointer;
}
}
break;
}
// Look at Objective-C class types.
if (auto objcClass = type->getAs<ObjCInterfaceType>()) {
if (objcClass->getInterface()->getIdentifier() == S.getNSErrorIdent()) {
if (numNormalPointers == 2 && numTypeSpecifierPointers < 2)
return PointerDeclaratorKind::NSErrorPointerPointer;
}
break;
}
// If at this point we haven't seen a pointer, we won't see one.
if (numNormalPointers == 0)
return PointerDeclaratorKind::NonPointer;
if (auto recordType = type->getAs<RecordType>()) {
RecordDecl *recordDecl = recordType->getDecl();
// If this is CFErrorRef*, report it as such.
if (numNormalPointers == 2 && numTypeSpecifierPointers < 2 &&
S.isCFError(recordDecl)) {
return PointerDeclaratorKind::CFErrorRefPointer;
}
break;
}
break;
} while (true);
switch (numNormalPointers) {
case 0:
return PointerDeclaratorKind::NonPointer;
case 1:
return PointerDeclaratorKind::SingleLevelPointer;
case 2:
return PointerDeclaratorKind::MaybePointerToCFRef;
default:
return PointerDeclaratorKind::MultiLevelPointer;
}
}
bool Sema::isCFError(RecordDecl *RD) {
// If we already know about CFError, test it directly.
if (CFError)
return CFError == RD;
// Check whether this is CFError, which we identify based on its bridge to
// NSError. CFErrorRef used to be declared with "objc_bridge" but is now
// declared with "objc_bridge_mutable", so look for either one of the two
// attributes.
if (RD->getTagKind() == TTK_Struct) {
IdentifierInfo *bridgedType = nullptr;
if (auto bridgeAttr = RD->getAttr<ObjCBridgeAttr>())
bridgedType = bridgeAttr->getBridgedType();
else if (auto bridgeAttr = RD->getAttr<ObjCBridgeMutableAttr>())
bridgedType = bridgeAttr->getBridgedType();
if (bridgedType == getNSErrorIdent()) {
CFError = RD;
return true;
}
}
return false;
}
static FileID getNullabilityCompletenessCheckFileID(Sema &S,
SourceLocation loc) {
// If we're anywhere in a function, method, or closure context, don't perform
// completeness checks.
for (DeclContext *ctx = S.CurContext; ctx; ctx = ctx->getParent()) {
if (ctx->isFunctionOrMethod())
return FileID();
if (ctx->isFileContext())
break;
}
// We only care about the expansion location.
loc = S.SourceMgr.getExpansionLoc(loc);
FileID file = S.SourceMgr.getFileID(loc);
if (file.isInvalid())
return FileID();
// Retrieve file information.
bool invalid = false;
const SrcMgr::SLocEntry &sloc = S.SourceMgr.getSLocEntry(file, &invalid);
if (invalid || !sloc.isFile())
return FileID();
// We don't want to perform completeness checks on the main file or in
// system headers.
const SrcMgr::FileInfo &fileInfo = sloc.getFile();
if (fileInfo.getIncludeLoc().isInvalid())
return FileID();
if (fileInfo.getFileCharacteristic() != SrcMgr::C_User &&
S.Diags.getSuppressSystemWarnings()) {
return FileID();
}
return file;
}
/// Creates a fix-it to insert a C-style nullability keyword at \p pointerLoc,
/// taking into account whitespace before and after.
template <typename DiagBuilderT>
static void fixItNullability(Sema &S, DiagBuilderT &Diag,
SourceLocation PointerLoc,
NullabilityKind Nullability) {
assert(PointerLoc.isValid());
if (PointerLoc.isMacroID())
return;
SourceLocation FixItLoc = S.getLocForEndOfToken(PointerLoc);
if (!FixItLoc.isValid() || FixItLoc == PointerLoc)
return;
const char *NextChar = S.SourceMgr.getCharacterData(FixItLoc);
if (!NextChar)
return;
SmallString<32> InsertionTextBuf{" "};
InsertionTextBuf += getNullabilitySpelling(Nullability);
InsertionTextBuf += " ";
StringRef InsertionText = InsertionTextBuf.str();
if (isWhitespace(*NextChar)) {
InsertionText = InsertionText.drop_back();
} else if (NextChar[-1] == '[') {
if (NextChar[0] == ']')
InsertionText = InsertionText.drop_back().drop_front();
else
InsertionText = InsertionText.drop_front();
} else if (!isAsciiIdentifierContinue(NextChar[0], /*allow dollar*/ true) &&
!isAsciiIdentifierContinue(NextChar[-1], /*allow dollar*/ true)) {
InsertionText = InsertionText.drop_back().drop_front();
}
Diag << FixItHint::CreateInsertion(FixItLoc, InsertionText);
}
static void emitNullabilityConsistencyWarning(Sema &S,
SimplePointerKind PointerKind,
SourceLocation PointerLoc,
SourceLocation PointerEndLoc) {
assert(PointerLoc.isValid());
if (PointerKind == SimplePointerKind::Array) {
S.Diag(PointerLoc, diag::warn_nullability_missing_array);
} else {
S.Diag(PointerLoc, diag::warn_nullability_missing)
<< static_cast<unsigned>(PointerKind);
}
auto FixItLoc = PointerEndLoc.isValid() ? PointerEndLoc : PointerLoc;
if (FixItLoc.isMacroID())
return;
auto addFixIt = [&](NullabilityKind Nullability) {
auto Diag = S.Diag(FixItLoc, diag::note_nullability_fix_it);
Diag << static_cast<unsigned>(Nullability);
Diag << static_cast<unsigned>(PointerKind);
fixItNullability(S, Diag, FixItLoc, Nullability);
};
addFixIt(NullabilityKind::Nullable);
addFixIt(NullabilityKind::NonNull);
}
/// Complains about missing nullability if the file containing \p pointerLoc
/// has other uses of nullability (either the keywords or the \c assume_nonnull
/// pragma).
///
/// If the file has \e not seen other uses of nullability, this particular
/// pointer is saved for possible later diagnosis. See recordNullabilitySeen().
static void
checkNullabilityConsistency(Sema &S, SimplePointerKind pointerKind,
SourceLocation pointerLoc,
SourceLocation pointerEndLoc = SourceLocation()) {
// Determine which file we're performing consistency checking for.
FileID file = getNullabilityCompletenessCheckFileID(S, pointerLoc);
if (file.isInvalid())
return;
// If we haven't seen any type nullability in this file, we won't warn now
// about anything.
FileNullability &fileNullability = S.NullabilityMap[file];
if (!fileNullability.SawTypeNullability) {
// If this is the first pointer declarator in the file, and the appropriate
// warning is on, record it in case we need to diagnose it retroactively.
diag::kind diagKind;
if (pointerKind == SimplePointerKind::Array)
diagKind = diag::warn_nullability_missing_array;
else
diagKind = diag::warn_nullability_missing;
if (fileNullability.PointerLoc.isInvalid() &&
!S.Context.getDiagnostics().isIgnored(diagKind, pointerLoc)) {
fileNullability.PointerLoc = pointerLoc;
fileNullability.PointerEndLoc = pointerEndLoc;
fileNullability.PointerKind = static_cast<unsigned>(pointerKind);
}
return;
}
// Complain about missing nullability.
emitNullabilityConsistencyWarning(S, pointerKind, pointerLoc, pointerEndLoc);
}
/// Marks that a nullability feature has been used in the file containing
/// \p loc.
///
/// If this file already had pointer types in it that were missing nullability,
/// the first such instance is retroactively diagnosed.
///
/// \sa checkNullabilityConsistency
static void recordNullabilitySeen(Sema &S, SourceLocation loc) {
FileID file = getNullabilityCompletenessCheckFileID(S, loc);
if (file.isInvalid())
return;
FileNullability &fileNullability = S.NullabilityMap[file];
if (fileNullability.SawTypeNullability)
return;
fileNullability.SawTypeNullability = true;
// If we haven't seen any type nullability before, now we have. Retroactively
// diagnose the first unannotated pointer, if there was one.
if (fileNullability.PointerLoc.isInvalid())
return;
auto kind = static_cast<SimplePointerKind>(fileNullability.PointerKind);
emitNullabilityConsistencyWarning(S, kind, fileNullability.PointerLoc,
fileNullability.PointerEndLoc);
}
/// Returns true if any of the declarator chunks before \p endIndex include a
/// level of indirection: array, pointer, reference, or pointer-to-member.
///
/// Because declarator chunks are stored in outer-to-inner order, testing
/// every chunk before \p endIndex is testing all chunks that embed the current
/// chunk as part of their type.
///
/// It is legal to pass the result of Declarator::getNumTypeObjects() as the
/// end index, in which case all chunks are tested.
static bool hasOuterPointerLikeChunk(const Declarator &D, unsigned endIndex) {
unsigned i = endIndex;
while (i != 0) {
// Walk outwards along the declarator chunks.
--i;
const DeclaratorChunk &DC = D.getTypeObject(i);
switch (DC.Kind) {
case DeclaratorChunk::Paren:
break;
case DeclaratorChunk::Array:
case DeclaratorChunk::Pointer:
case DeclaratorChunk::Reference:
case DeclaratorChunk::MemberPointer:
return true;
case DeclaratorChunk::Function:
case DeclaratorChunk::BlockPointer:
case DeclaratorChunk::Pipe:
// These are invalid anyway, so just ignore.
break;
}
}
return false;
}
static bool IsNoDerefableChunk(DeclaratorChunk Chunk) {
return (Chunk.Kind == DeclaratorChunk::Pointer ||
Chunk.Kind == DeclaratorChunk::Array);
}
template<typename AttrT>
static AttrT *createSimpleAttr(ASTContext &Ctx, ParsedAttr &AL) {
AL.setUsedAsTypeAttr();
return ::new (Ctx) AttrT(Ctx, AL);
}
static Attr *createNullabilityAttr(ASTContext &Ctx, ParsedAttr &Attr,
NullabilityKind NK) {
switch (NK) {
case NullabilityKind::NonNull:
return createSimpleAttr<TypeNonNullAttr>(Ctx, Attr);
case NullabilityKind::Nullable:
return createSimpleAttr<TypeNullableAttr>(Ctx, Attr);
case NullabilityKind::NullableResult:
return createSimpleAttr<TypeNullableResultAttr>(Ctx, Attr);
case NullabilityKind::Unspecified:
return createSimpleAttr<TypeNullUnspecifiedAttr>(Ctx, Attr);
}
llvm_unreachable("unknown NullabilityKind");
}
// Diagnose whether this is a case with the multiple addr spaces.
// Returns true if this is an invalid case.
// ISO/IEC TR 18037 S5.3 (amending C99 6.7.3): "No type shall be qualified
// by qualifiers for two or more different address spaces."
static bool DiagnoseMultipleAddrSpaceAttributes(Sema &S, LangAS ASOld,
LangAS ASNew,
SourceLocation AttrLoc) {
if (ASOld != LangAS::Default) {
if (ASOld != ASNew) {
S.Diag(AttrLoc, diag::err_attribute_address_multiple_qualifiers);
return true;
}
// Emit a warning if they are identical; it's likely unintended.
S.Diag(AttrLoc,
diag::warn_attribute_address_multiple_identical_qualifiers);
}
return false;
}
static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
QualType declSpecType,
TypeSourceInfo *TInfo) {
// The TypeSourceInfo that this function returns will not be a null type.
// If there is an error, this function will fill in a dummy type as fallback.
QualType T = declSpecType;
Declarator &D = state.getDeclarator();
Sema &S = state.getSema();
ASTContext &Context = S.Context;
const LangOptions &LangOpts = S.getLangOpts();
// The name we're declaring, if any.
DeclarationName Name;
if (D.getIdentifier())
Name = D.getIdentifier();
// Does this declaration declare a typedef-name?
bool IsTypedefName =
D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_typedef ||
D.getContext() == DeclaratorContext::AliasDecl ||
D.getContext() == DeclaratorContext::AliasTemplate;
// Does T refer to a function type with a cv-qualifier or a ref-qualifier?
bool IsQualifiedFunction = T->isFunctionProtoType() &&
(!T->castAs<FunctionProtoType>()->getMethodQuals().empty() ||
T->castAs<FunctionProtoType>()->getRefQualifier() != RQ_None);
// If T is 'decltype(auto)', the only declarators we can have are parens
// and at most one function declarator if this is a function declaration.
// If T is a deduced class template specialization type, we can have no
// declarator chunks at all.
if (auto *DT = T->getAs<DeducedType>()) {
const AutoType *AT = T->getAs<AutoType>();
bool IsClassTemplateDeduction = isa<DeducedTemplateSpecializationType>(DT);
if ((AT && AT->isDecltypeAuto()) || IsClassTemplateDeduction) {
for (unsigned I = 0, E = D.getNumTypeObjects(); I != E; ++I) {
unsigned Index = E - I - 1;
DeclaratorChunk &DeclChunk = D.getTypeObject(Index);
unsigned DiagId = IsClassTemplateDeduction
? diag::err_deduced_class_template_compound_type
: diag::err_decltype_auto_compound_type;
unsigned DiagKind = 0;
switch (DeclChunk.Kind) {
case DeclaratorChunk::Paren:
// FIXME: Rejecting this is a little silly.
if (IsClassTemplateDeduction) {
DiagKind = 4;
break;
}
continue;
case DeclaratorChunk::Function: {
if (IsClassTemplateDeduction) {
DiagKind = 3;
break;
}
unsigned FnIndex;
if (D.isFunctionDeclarationContext() &&
D.isFunctionDeclarator(FnIndex) && FnIndex == Index)
continue;
DiagId = diag::err_decltype_auto_function_declarator_not_declaration;
break;
}
case DeclaratorChunk::Pointer:
case DeclaratorChunk::BlockPointer:
case DeclaratorChunk::MemberPointer:
DiagKind = 0;
break;
case DeclaratorChunk::Reference:
DiagKind = 1;
break;
case DeclaratorChunk::Array:
DiagKind = 2;
break;
case DeclaratorChunk::Pipe:
break;
}
S.Diag(DeclChunk.Loc, DiagId) << DiagKind;
D.setInvalidType(true);
break;
}
}
}
// Determine whether we should infer _Nonnull on pointer types.
Optional<NullabilityKind> inferNullability;
bool inferNullabilityCS = false;
bool inferNullabilityInnerOnly = false;
bool inferNullabilityInnerOnlyComplete = false;
// Are we in an assume-nonnull region?
bool inAssumeNonNullRegion = false;
SourceLocation assumeNonNullLoc = S.PP.getPragmaAssumeNonNullLoc();
if (assumeNonNullLoc.isValid()) {
inAssumeNonNullRegion = true;
recordNullabilitySeen(S, assumeNonNullLoc);
}
// Whether to complain about missing nullability specifiers or not.
enum {
/// Never complain.
CAMN_No,
/// Complain on the inner pointers (but not the outermost
/// pointer).
CAMN_InnerPointers,
/// Complain about any pointers that don't have nullability
/// specified or inferred.
CAMN_Yes
} complainAboutMissingNullability = CAMN_No;
unsigned NumPointersRemaining = 0;
auto complainAboutInferringWithinChunk = PointerWrappingDeclaratorKind::None;
if (IsTypedefName) {
// For typedefs, we do not infer any nullability (the default),
// and we only complain about missing nullability specifiers on
// inner pointers.
complainAboutMissingNullability = CAMN_InnerPointers;
if (T->canHaveNullability(/*ResultIfUnknown*/false) &&
!T->getNullability(S.Context)) {
// Note that we allow but don't require nullability on dependent types.
++NumPointersRemaining;
}
for (unsigned i = 0, n = D.getNumTypeObjects(); i != n; ++i) {
DeclaratorChunk &chunk = D.getTypeObject(i);
switch (chunk.Kind) {
case DeclaratorChunk::Array:
case DeclaratorChunk::Function:
case DeclaratorChunk::Pipe:
break;
case DeclaratorChunk::BlockPointer:
case DeclaratorChunk::MemberPointer:
++NumPointersRemaining;
break;
case DeclaratorChunk::Paren:
case DeclaratorChunk::Reference:
continue;
case DeclaratorChunk::Pointer:
++NumPointersRemaining;
continue;
}
}
} else {
bool isFunctionOrMethod = false;
switch (auto context = state.getDeclarator().getContext()) {
case DeclaratorContext::ObjCParameter:
case DeclaratorContext::ObjCResult:
case DeclaratorContext::Prototype:
case DeclaratorContext::TrailingReturn:
case DeclaratorContext::TrailingReturnVar:
isFunctionOrMethod = true;
LLVM_FALLTHROUGH;
case DeclaratorContext::Member:
if (state.getDeclarator().isObjCIvar() && !isFunctionOrMethod) {
complainAboutMissingNullability = CAMN_No;
break;
}
// Weak properties are inferred to be nullable.
if (state.getDeclarator().isObjCWeakProperty() && inAssumeNonNullRegion) {
inferNullability = NullabilityKind::Nullable;
break;
}
LLVM_FALLTHROUGH;
case DeclaratorContext::File:
case DeclaratorContext::KNRTypeList: {
complainAboutMissingNullability = CAMN_Yes;
// Nullability inference depends on the type and declarator.
auto wrappingKind = PointerWrappingDeclaratorKind::None;
switch (classifyPointerDeclarator(S, T, D, wrappingKind)) {
case PointerDeclaratorKind::NonPointer:
case PointerDeclaratorKind::MultiLevelPointer:
// Cannot infer nullability.
break;
case PointerDeclaratorKind::SingleLevelPointer:
// Infer _Nonnull if we are in an assumes-nonnull region.
if (inAssumeNonNullRegion) {
complainAboutInferringWithinChunk = wrappingKind;
inferNullability = NullabilityKind::NonNull;
inferNullabilityCS = (context == DeclaratorContext::ObjCParameter ||
context == DeclaratorContext::ObjCResult);
}
break;
case PointerDeclaratorKind::CFErrorRefPointer:
case PointerDeclaratorKind::NSErrorPointerPointer:
// Within a function or method signature, infer _Nullable at both
// levels.
if (isFunctionOrMethod && inAssumeNonNullRegion)
inferNullability = NullabilityKind::Nullable;
break;
case PointerDeclaratorKind::MaybePointerToCFRef:
if (isFunctionOrMethod) {
// On pointer-to-pointer parameters marked cf_returns_retained or
// cf_returns_not_retained, if the outer pointer is explicit then
// infer the inner pointer as _Nullable.
auto hasCFReturnsAttr =
[](const ParsedAttributesView &AttrList) -> bool {
return AttrList.hasAttribute(ParsedAttr::AT_CFReturnsRetained) ||
AttrList.hasAttribute(ParsedAttr::AT_CFReturnsNotRetained);
};
if (const auto *InnermostChunk = D.getInnermostNonParenChunk()) {
if (hasCFReturnsAttr(D.getDeclarationAttributes()) ||
hasCFReturnsAttr(D.getAttributes()) ||
hasCFReturnsAttr(InnermostChunk->getAttrs()) ||
hasCFReturnsAttr(D.getDeclSpec().getAttributes())) {
inferNullability = NullabilityKind::Nullable;
inferNullabilityInnerOnly = true;
}
}
}
break;
}
break;
}
case DeclaratorContext::ConversionId:
complainAboutMissingNullability = CAMN_Yes;
break;
case DeclaratorContext::AliasDecl:
case DeclaratorContext::AliasTemplate:
case DeclaratorContext::Block:
case DeclaratorContext::BlockLiteral:
case DeclaratorContext::Condition:
case DeclaratorContext::CXXCatch:
case DeclaratorContext::CXXNew:
case DeclaratorContext::ForInit:
case DeclaratorContext::SelectionInit:
case DeclaratorContext::LambdaExpr:
case DeclaratorContext::LambdaExprParameter:
case DeclaratorContext::ObjCCatch:
case DeclaratorContext::TemplateParam:
case DeclaratorContext::TemplateArg:
case DeclaratorContext::TemplateTypeArg:
case DeclaratorContext::TypeName:
case DeclaratorContext::FunctionalCast:
case DeclaratorContext::RequiresExpr:
case DeclaratorContext::Association:
// Don't infer in these contexts.
break;
}
}
// Local function that returns true if its argument looks like a va_list.
auto isVaList = [&S](QualType T) -> bool {
auto *typedefTy = T->getAs<TypedefType>();
if (!typedefTy)
return false;
TypedefDecl *vaListTypedef = S.Context.getBuiltinVaListDecl();
do {
if (typedefTy->getDecl() == vaListTypedef)
return true;
if (auto *name = typedefTy->getDecl()->getIdentifier())
if (name->isStr("va_list"))
return true;
typedefTy = typedefTy->desugar()->getAs<TypedefType>();
} while (typedefTy);
return false;
};
// Local function that checks the nullability for a given pointer declarator.
// Returns true if _Nonnull was inferred.
auto inferPointerNullability =
[&](SimplePointerKind pointerKind, SourceLocation pointerLoc,
SourceLocation pointerEndLoc,
ParsedAttributesView &attrs, AttributePool &Pool) -> ParsedAttr * {
// We've seen a pointer.
if (NumPointersRemaining > 0)
--NumPointersRemaining;
// If a nullability attribute is present, there's nothing to do.
if (hasNullabilityAttr(attrs))
return nullptr;
// If we're supposed to infer nullability, do so now.
if (inferNullability && !inferNullabilityInnerOnlyComplete) {
ParsedAttr::Syntax syntax = inferNullabilityCS
? ParsedAttr::AS_ContextSensitiveKeyword
: ParsedAttr::AS_Keyword;
ParsedAttr *nullabilityAttr = Pool.create(
S.getNullabilityKeyword(*inferNullability), SourceRange(pointerLoc),
nullptr, SourceLocation(), nullptr, 0, syntax);
attrs.addAtEnd(nullabilityAttr);
if (inferNullabilityCS) {
state.getDeclarator().getMutableDeclSpec().getObjCQualifiers()
->setObjCDeclQualifier(ObjCDeclSpec::DQ_CSNullability);
}
if (pointerLoc.isValid() &&
complainAboutInferringWithinChunk !=
PointerWrappingDeclaratorKind::None) {
auto Diag =
S.Diag(pointerLoc, diag::warn_nullability_inferred_on_nested_type);
Diag << static_cast<int>(complainAboutInferringWithinChunk);
fixItNullability(S, Diag, pointerLoc, NullabilityKind::NonNull);
}
if (inferNullabilityInnerOnly)
inferNullabilityInnerOnlyComplete = true;
return nullabilityAttr;
}
// If we're supposed to complain about missing nullability, do so
// now if it's truly missing.
switch (complainAboutMissingNullability) {
case CAMN_No:
break;
case CAMN_InnerPointers:
if (NumPointersRemaining == 0)
break;
LLVM_FALLTHROUGH;
case CAMN_Yes:
checkNullabilityConsistency(S, pointerKind, pointerLoc, pointerEndLoc);
}
return nullptr;
};
// If the type itself could have nullability but does not, infer pointer
// nullability and perform consistency checking.
if (S.CodeSynthesisContexts.empty()) {
if (T->canHaveNullability(/*ResultIfUnknown*/false) &&
!T->getNullability(S.Context)) {
if (isVaList(T)) {
// Record that we've seen a pointer, but do nothing else.
if (NumPointersRemaining > 0)
--NumPointersRemaining;
} else {
SimplePointerKind pointerKind = SimplePointerKind::Pointer;
if (T->isBlockPointerType())
pointerKind = SimplePointerKind::BlockPointer;
else if (T->isMemberPointerType())
pointerKind = SimplePointerKind::MemberPointer;
if (auto *attr = inferPointerNullability(
pointerKind, D.getDeclSpec().getTypeSpecTypeLoc(),
D.getDeclSpec().getEndLoc(),
D.getMutableDeclSpec().getAttributes(),
D.getMutableDeclSpec().getAttributePool())) {
T = state.getAttributedType(
createNullabilityAttr(Context, *attr, *inferNullability), T, T);
}
}
}
if (complainAboutMissingNullability == CAMN_Yes &&
T->isArrayType() && !T->getNullability(S.Context) && !isVaList(T) &&
D.isPrototypeContext() &&
!hasOuterPointerLikeChunk(D, D.getNumTypeObjects())) {
checkNullabilityConsistency(S, SimplePointerKind::Array,
D.getDeclSpec().getTypeSpecTypeLoc());
}
}
bool ExpectNoDerefChunk =
state.getCurrentAttributes().hasAttribute(ParsedAttr::AT_NoDeref);
// Walk the DeclTypeInfo, building the recursive type as we go.
// DeclTypeInfos are ordered from the identifier out, which is
// opposite of what we want :).
for (unsigned i = 0, e = D.getNumTypeObjects(); i != e; ++i) {
unsigned chunkIndex = e - i - 1;
state.setCurrentChunkIndex(chunkIndex);
DeclaratorChunk &DeclType = D.getTypeObject(chunkIndex);
IsQualifiedFunction &= DeclType.Kind == DeclaratorChunk::Paren;
switch (DeclType.Kind) {
case DeclaratorChunk::Paren:
if (i == 0)
warnAboutRedundantParens(S, D, T);
T = S.BuildParenType(T);
break;
case DeclaratorChunk::BlockPointer:
// If blocks are disabled, emit an error.
if (!LangOpts.Blocks)
S.Diag(DeclType.Loc, diag::err_blocks_disable) << LangOpts.OpenCL;
// Handle pointer nullability.
inferPointerNullability(SimplePointerKind::BlockPointer, DeclType.Loc,
DeclType.EndLoc, DeclType.getAttrs(),
state.getDeclarator().getAttributePool());
T = S.BuildBlockPointerType(T, D.getIdentifierLoc(), Name);
if (DeclType.Cls.TypeQuals || LangOpts.OpenCL) {
// OpenCL v2.0, s6.12.5 - Block variable declarations are implicitly
// qualified with const.
if (LangOpts.OpenCL)
DeclType.Cls.TypeQuals |= DeclSpec::TQ_const;
T = S.BuildQualifiedType(T, DeclType.Loc, DeclType.Cls.TypeQuals);
}
break;
case DeclaratorChunk::Pointer:
// Verify that we're not building a pointer to pointer to function with
// exception specification.
if (LangOpts.CPlusPlus && S.CheckDistantExceptionSpec(T)) {
S.Diag(D.getIdentifierLoc(), diag::err_distant_exception_spec);
D.setInvalidType(true);
// Build the type anyway.
}
// Handle pointer nullability
inferPointerNullability(SimplePointerKind::Pointer, DeclType.Loc,
DeclType.EndLoc, DeclType.getAttrs(),
state.getDeclarator().getAttributePool());
if (LangOpts.ObjC && T->getAs<ObjCObjectType>()) {
T = Context.getObjCObjectPointerType(T);
if (DeclType.Ptr.TypeQuals)
T = S.BuildQualifiedType(T, DeclType.Loc, DeclType.Ptr.TypeQuals);
break;
}
// OpenCL v2.0 s6.9b - Pointer to image/sampler cannot be used.
// OpenCL v2.0 s6.13.16.1 - Pointer to pipe cannot be used.
// OpenCL v2.0 s6.12.5 - Pointers to Blocks are not allowed.
if (LangOpts.OpenCL) {
if (T->isImageType() || T->isSamplerT() || T->isPipeType() ||
T->isBlockPointerType()) {
S.Diag(D.getIdentifierLoc(), diag::err_opencl_pointer_to_type) << T;
D.setInvalidType(true);
}
}
T = S.BuildPointerType(T, DeclType.Loc, Name);
if (DeclType.Ptr.TypeQuals)
T = S.BuildQualifiedType(T, DeclType.Loc, DeclType.Ptr.TypeQuals);
break;
case DeclaratorChunk::Reference: {
// Verify that we're not building a reference to pointer to function with
// exception specification.
if (LangOpts.CPlusPlus && S.CheckDistantExceptionSpec(T)) {
S.Diag(D.getIdentifierLoc(), diag::err_distant_exception_spec);
D.setInvalidType(true);
// Build the type anyway.
}
T = S.BuildReferenceType(T, DeclType.Ref.LValueRef, DeclType.Loc, Name);
if (DeclType.Ref.HasRestrict)
T = S.BuildQualifiedType(T, DeclType.Loc, Qualifiers::Restrict);
break;
}
case DeclaratorChunk::Array: {
// Verify that we're not building an array of pointers to function with
// exception specification.
if (LangOpts.CPlusPlus && S.CheckDistantExceptionSpec(T)) {
S.Diag(D.getIdentifierLoc(), diag::err_distant_exception_spec);
D.setInvalidType(true);
// Build the type anyway.
}
DeclaratorChunk::ArrayTypeInfo &ATI = DeclType.Arr;
Expr *ArraySize = static_cast<Expr*>(ATI.NumElts);
ArrayType::ArraySizeModifier ASM;
if (ATI.isStar)
ASM = ArrayType::Star;
else if (ATI.hasStatic)
ASM = ArrayType::Static;
else
ASM = ArrayType::Normal;
if (ASM == ArrayType::Star && !D.isPrototypeContext()) {
// FIXME: This check isn't quite right: it allows star in prototypes
// for function definitions, and disallows some edge cases detailed
// in http://gcc.gnu.org/ml/gcc-patches/2009-02/msg00133.html
S.Diag(DeclType.Loc, diag::err_array_star_outside_prototype);
ASM = ArrayType::Normal;
D.setInvalidType(true);
}
// C99 6.7.5.2p1: The optional type qualifiers and the keyword static
// shall appear only in a declaration of a function parameter with an
// array type, ...
if (ASM == ArrayType::Static || ATI.TypeQuals) {
if (!(D.isPrototypeContext() ||
D.getContext() == DeclaratorContext::KNRTypeList)) {
S.Diag(DeclType.Loc, diag::err_array_static_outside_prototype) <<
(ASM == ArrayType::Static ? "'static'" : "type qualifier");
// Remove the 'static' and the type qualifiers.
if (ASM == ArrayType::Static)
ASM = ArrayType::Normal;
ATI.TypeQuals = 0;
D.setInvalidType(true);
}
// C99 6.7.5.2p1: ... and then only in the outermost array type
// derivation.
if (hasOuterPointerLikeChunk(D, chunkIndex)) {
S.Diag(DeclType.Loc, diag::err_array_static_not_outermost) <<
(ASM == ArrayType::Static ? "'static'" : "type qualifier");
if (ASM == ArrayType::Static)
ASM = ArrayType::Normal;
ATI.TypeQuals = 0;
D.setInvalidType(true);
}
}
const AutoType *AT = T->getContainedAutoType();
// Allow arrays of auto if we are a generic lambda parameter.
// i.e. [](auto (&array)[5]) { return array[0]; }; OK
if (AT && D.getContext() != DeclaratorContext::LambdaExprParameter) {
// We've already diagnosed this for decltype(auto).
if (!AT->isDecltypeAuto())
S.Diag(DeclType.Loc, diag::err_illegal_decl_array_of_auto)
<< getPrintableNameForEntity(Name) << T;
T = QualType();
break;
}
// Array parameters can be marked nullable as well, although it's not
// necessary if they're marked 'static'.
if (complainAboutMissingNullability == CAMN_Yes &&
!hasNullabilityAttr(DeclType.getAttrs()) &&
ASM != ArrayType::Static &&
D.isPrototypeContext() &&
!hasOuterPointerLikeChunk(D, chunkIndex)) {
checkNullabilityConsistency(S, SimplePointerKind::Array, DeclType.Loc);
}
T = S.BuildArrayType(T, ASM, ArraySize, ATI.TypeQuals,
SourceRange(DeclType.Loc, DeclType.EndLoc), Name);
break;
}
case DeclaratorChunk::Function: {
// If the function declarator has a prototype (i.e. it is not () and
// does not have a K&R-style identifier list), then the arguments are part
// of the type, otherwise the argument list is ().
DeclaratorChunk::FunctionTypeInfo &FTI = DeclType.Fun;
IsQualifiedFunction =
FTI.hasMethodTypeQualifiers() || FTI.hasRefQualifier();
// Check for auto functions and trailing return type and adjust the
// return type accordingly.
if (!D.isInvalidType()) {
// trailing-return-type is only required if we're declaring a function,
// and not, for instance, a pointer to a function.
if (D.getDeclSpec().hasAutoTypeSpec() &&
!FTI.hasTrailingReturnType() && chunkIndex == 0) {
if (!S.getLangOpts().CPlusPlus14) {
S.Diag(D.getDeclSpec().getTypeSpecTypeLoc(),
D.getDeclSpec().getTypeSpecType() == DeclSpec::TST_auto
? diag::err_auto_missing_trailing_return
: diag::err_deduced_return_type);
T = Context.IntTy;
D.setInvalidType(true);
} else {
S.Diag(D.getDeclSpec().getTypeSpecTypeLoc(),
diag::warn_cxx11_compat_deduced_return_type);
}
} else if (FTI.hasTrailingReturnType()) {
// T must be exactly 'auto' at this point. See CWG issue 681.
if (isa<ParenType>(T)) {
S.Diag(D.getBeginLoc(), diag::err_trailing_return_in_parens)
<< T << D.getSourceRange();
D.setInvalidType(true);
} else if (D.getName().getKind() ==
UnqualifiedIdKind::IK_DeductionGuideName) {
if (T != Context.DependentTy) {
S.Diag(D.getDeclSpec().getBeginLoc(),
diag::err_deduction_guide_with_complex_decl)
<< D.getSourceRange();
D.setInvalidType(true);
}
} else if (D.getContext() != DeclaratorContext::LambdaExpr &&
(T.hasQualifiers() || !isa<AutoType>(T) ||
cast<AutoType>(T)->getKeyword() !=
AutoTypeKeyword::Auto ||
cast<AutoType>(T)->isConstrained())) {
S.Diag(D.getDeclSpec().getTypeSpecTypeLoc(),
diag::err_trailing_return_without_auto)
<< T << D.getDeclSpec().getSourceRange();
D.setInvalidType(true);
}
T = S.GetTypeFromParser(FTI.getTrailingReturnType(), &TInfo);
if (T.isNull()) {
// An error occurred parsing the trailing return type.
T = Context.IntTy;
D.setInvalidType(true);
} else if (AutoType *Auto = T->getContainedAutoType()) {
// If the trailing return type contains an `auto`, we may need to
// invent a template parameter for it, for cases like
// `auto f() -> C auto` or `[](auto (*p) -> auto) {}`.
InventedTemplateParameterInfo *InventedParamInfo = nullptr;
if (D.getContext() == DeclaratorContext::Prototype)
InventedParamInfo = &S.InventedParameterInfos.back();
else if (D.getContext() == DeclaratorContext::LambdaExprParameter)
InventedParamInfo = S.getCurLambda();
if (InventedParamInfo) {
std::tie(T, TInfo) = InventTemplateParameter(
state, T, TInfo, Auto, *InventedParamInfo);
}
}
} else {
// This function type is not the type of the entity being declared,
// so checking the 'auto' is not the responsibility of this chunk.
}
}
// C99 6.7.5.3p1: The return type may not be a function or array type.
// For conversion functions, we'll diagnose this particular error later.
if (!D.isInvalidType() && (T->isArrayType() || T->isFunctionType()) &&
(D.getName().getKind() !=
UnqualifiedIdKind::IK_ConversionFunctionId)) {
unsigned diagID = diag::err_func_returning_array_function;
// Last processing chunk in block context means this function chunk
// represents the block.
if (chunkIndex == 0 &&
D.getContext() == DeclaratorContext::BlockLiteral)
diagID = diag::err_block_returning_array_function;
S.Diag(DeclType.Loc, diagID) << T->isFunctionType() << T;
T = Context.IntTy;
D.setInvalidType(true);
}
// Do not allow returning half FP value.
// FIXME: This really should be in BuildFunctionType.
if (T->isHalfType()) {
if (S.getLangOpts().OpenCL) {
if (!S.getOpenCLOptions().isAvailableOption("cl_khr_fp16",
S.getLangOpts())) {
S.Diag(D.getIdentifierLoc(), diag::err_opencl_invalid_return)
<< T << 0 /*pointer hint*/;
D.setInvalidType(true);
}
} else if (!S.getLangOpts().HalfArgsAndReturns) {
S.Diag(D.getIdentifierLoc(),
diag::err_parameters_retval_cannot_have_fp16_type) << 1;
D.setInvalidType(true);
}
}
if (LangOpts.OpenCL) {
// OpenCL v2.0 s6.12.5 - A block cannot be the return value of a
// function.
if (T->isBlockPointerType() || T->isImageType() || T->isSamplerT() ||
T->isPipeType()) {
S.Diag(D.getIdentifierLoc(), diag::err_opencl_invalid_return)
<< T << 1 /*hint off*/;
D.setInvalidType(true);
}
// OpenCL doesn't support variadic functions and blocks
// (s6.9.e and s6.12.5 OpenCL v2.0) except for printf.
// We also allow here any toolchain reserved identifiers.
if (FTI.isVariadic &&
!S.getOpenCLOptions().isAvailableOption(
"__cl_clang_variadic_functions", S.getLangOpts()) &&
!(D.getIdentifier() &&
((D.getIdentifier()->getName() == "printf" &&
LangOpts.getOpenCLCompatibleVersion() >= 120) ||
D.getIdentifier()->getName().startswith("__")))) {
S.Diag(D.getIdentifierLoc(), diag::err_opencl_variadic_function);
D.setInvalidType(true);
}
}
// Methods cannot return interface types. All ObjC objects are
// passed by reference.
if (T->isObjCObjectType()) {
SourceLocation DiagLoc, FixitLoc;
if (TInfo) {
DiagLoc = TInfo->getTypeLoc().getBeginLoc();
FixitLoc = S.getLocForEndOfToken(TInfo->getTypeLoc().getEndLoc());
} else {
DiagLoc = D.getDeclSpec().getTypeSpecTypeLoc();
FixitLoc = S.getLocForEndOfToken(D.getDeclSpec().getEndLoc());
}
S.Diag(DiagLoc, diag::err_object_cannot_be_passed_returned_by_value)
<< 0 << T
<< FixItHint::CreateInsertion(FixitLoc, "*");
T = Context.getObjCObjectPointerType(T);
if (TInfo) {
TypeLocBuilder TLB;
TLB.pushFullCopy(TInfo->getTypeLoc());
ObjCObjectPointerTypeLoc TLoc = TLB.push<ObjCObjectPointerTypeLoc>(T);
TLoc.setStarLoc(FixitLoc);
TInfo = TLB.getTypeSourceInfo(Context, T);
}
D.setInvalidType(true);
}
// cv-qualifiers on return types are pointless except when the type is a
// class type in C++.
if ((T.getCVRQualifiers() || T->isAtomicType()) &&
!(S.getLangOpts().CPlusPlus &&
(T->isDependentType() || T->isRecordType()))) {
if (T->isVoidType() && !S.getLangOpts().CPlusPlus &&
D.getFunctionDefinitionKind() ==
FunctionDefinitionKind::Definition) {
// [6.9.1/3] qualified void return is invalid on a C
// function definition. Apparently ok on declarations and
// in C++ though (!)
S.Diag(DeclType.Loc, diag::err_func_returning_qualified_void) << T;
} else
diagnoseRedundantReturnTypeQualifiers(S, T, D, chunkIndex);
// C++2a [dcl.fct]p12:
// A volatile-qualified return type is deprecated
if (T.isVolatileQualified() && S.getLangOpts().CPlusPlus20)
S.Diag(DeclType.Loc, diag::warn_deprecated_volatile_return) << T;
}
// Objective-C ARC ownership qualifiers are ignored on the function
// return type (by type canonicalization). Complain if this attribute
// was written here.
if (T.getQualifiers().hasObjCLifetime()) {
SourceLocation AttrLoc;
if (chunkIndex + 1 < D.getNumTypeObjects()) {
DeclaratorChunk ReturnTypeChunk = D.getTypeObject(chunkIndex + 1);
for (const ParsedAttr &AL : ReturnTypeChunk.getAttrs()) {
if (AL.getKind() == ParsedAttr::AT_ObjCOwnership) {
AttrLoc = AL.getLoc();
break;
}
}
}
if (AttrLoc.isInvalid()) {
for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) {
if (AL.getKind() == ParsedAttr::AT_ObjCOwnership) {
AttrLoc = AL.getLoc();
break;
}
}
}
if (AttrLoc.isValid()) {
// The ownership attributes are almost always written via
// the predefined
// __strong/__weak/__autoreleasing/__unsafe_unretained.
if (AttrLoc.isMacroID())
AttrLoc =
S.SourceMgr.getImmediateExpansionRange(AttrLoc).getBegin();
S.Diag(AttrLoc, diag::warn_arc_lifetime_result_type)
<< T.getQualifiers().getObjCLifetime();
}
}
if (LangOpts.CPlusPlus && D.getDeclSpec().hasTagDefinition()) {
// C++ [dcl.fct]p6:
// Types shall not be defined in return or parameter types.
TagDecl *Tag = cast<TagDecl>(D.getDeclSpec().getRepAsDecl());
S.Diag(Tag->getLocation(), diag::err_type_defined_in_result_type)
<< Context.getTypeDeclType(Tag);
}
// Exception specs are not allowed in typedefs. Complain, but add it
// anyway.
if (IsTypedefName && FTI.getExceptionSpecType() && !LangOpts.CPlusPlus17)
S.Diag(FTI.getExceptionSpecLocBeg(),
diag::err_exception_spec_in_typedef)
<< (D.getContext() == DeclaratorContext::AliasDecl ||
D.getContext() == DeclaratorContext::AliasTemplate);
// If we see "T var();" or "T var(T());" at block scope, it is probably
// an attempt to initialize a variable, not a function declaration.
if (FTI.isAmbiguous)
warnAboutAmbiguousFunction(S, D, DeclType, T);
FunctionType::ExtInfo EI(
getCCForDeclaratorChunk(S, D, DeclType.getAttrs(), FTI, chunkIndex));
// OpenCL disallows functions without a prototype, but it doesn't enforce
// strict prototypes as in C2x because it allows a function definition to
// have an identifier list. See OpenCL 3.0 6.11/g for more details.
if (!FTI.NumParams && !FTI.isVariadic &&
!LangOpts.requiresStrictPrototypes() && !LangOpts.OpenCL) {
// Simple void foo(), where the incoming T is the result type.
T = Context.getFunctionNoProtoType(T, EI);
} else {
// We allow a zero-parameter variadic function in C if the
// function is marked with the "overloadable" attribute. Scan
// for this attribute now.
if (!FTI.NumParams && FTI.isVariadic && !LangOpts.CPlusPlus)
if (!D.getDeclarationAttributes().hasAttribute(
ParsedAttr::AT_Overloadable) &&
!D.getAttributes().hasAttribute(ParsedAttr::AT_Overloadable) &&
!D.getDeclSpec().getAttributes().hasAttribute(
ParsedAttr::AT_Overloadable))
S.Diag(FTI.getEllipsisLoc(), diag::err_ellipsis_first_param);
if (FTI.NumParams && FTI.Params[0].Param == nullptr) {
// C99 6.7.5.3p3: Reject int(x,y,z) when it's not a function
// definition.
S.Diag(FTI.Params[0].IdentLoc,
diag::err_ident_list_in_fn_declaration);
D.setInvalidType(true);
// Recover by creating a K&R-style function type, if possible.
T = (!LangOpts.requiresStrictPrototypes() && !LangOpts.OpenCL)
? Context.getFunctionNoProtoType(T, EI)
: Context.IntTy;
break;
}
FunctionProtoType::ExtProtoInfo EPI;
EPI.ExtInfo = EI;
EPI.Variadic = FTI.isVariadic;
EPI.EllipsisLoc = FTI.getEllipsisLoc();
EPI.HasTrailingReturn = FTI.hasTrailingReturnType();
EPI.TypeQuals.addCVRUQualifiers(
FTI.MethodQualifiers ? FTI.MethodQualifiers->getTypeQualifiers()
: 0);
EPI.RefQualifier = !FTI.hasRefQualifier()? RQ_None
: FTI.RefQualifierIsLValueRef? RQ_LValue
: RQ_RValue;
// Otherwise, we have a function with a parameter list that is
// potentially variadic.
SmallVector<QualType, 16> ParamTys;
ParamTys.reserve(FTI.NumParams);
SmallVector<FunctionProtoType::ExtParameterInfo, 16>
ExtParameterInfos(FTI.NumParams);
bool HasAnyInterestingExtParameterInfos = false;
for (unsigned i = 0, e = FTI.NumParams; i != e; ++i) {
ParmVarDecl *Param = cast<ParmVarDecl>(FTI.Params[i].Param);
QualType ParamTy = Param->getType();
assert(!ParamTy.isNull() && "Couldn't parse type?");
// Look for 'void'. void is allowed only as a single parameter to a
// function with no other parameters (C99 6.7.5.3p10). We record
// int(void) as a FunctionProtoType with an empty parameter list.
if (ParamTy->isVoidType()) {
// If this is something like 'float(int, void)', reject it. 'void'
// is an incomplete type (C99 6.2.5p19) and function decls cannot
// have parameters of incomplete type.
if (FTI.NumParams != 1 || FTI.isVariadic) {
S.Diag(FTI.Params[i].IdentLoc, diag::err_void_only_param);
ParamTy = Context.IntTy;
Param->setType(ParamTy);
} else if (FTI.Params[i].Ident) {
// Reject, but continue to parse 'int(void abc)'.
S.Diag(FTI.Params[i].IdentLoc, diag::err_param_with_void_type);
ParamTy = Context.IntTy;
Param->setType(ParamTy);
} else {
// Reject, but continue to parse 'float(const void)'.
if (ParamTy.hasQualifiers())
S.Diag(DeclType.Loc, diag::err_void_param_qualified);
// Do not add 'void' to the list.
break;
}
} else if (ParamTy->isHalfType()) {
// Disallow half FP parameters.
// FIXME: This really should be in BuildFunctionType.
if (S.getLangOpts().OpenCL) {
if (!S.getOpenCLOptions().isAvailableOption("cl_khr_fp16",
S.getLangOpts())) {
S.Diag(Param->getLocation(), diag::err_opencl_invalid_param)
<< ParamTy << 0;
D.setInvalidType();
Param->setInvalidDecl();
}
} else if (!S.getLangOpts().HalfArgsAndReturns) {
S.Diag(Param->getLocation(),
diag::err_parameters_retval_cannot_have_fp16_type) << 0;
D.setInvalidType();
}
} else if (!FTI.hasPrototype) {
if (ParamTy->isPromotableIntegerType()) {
ParamTy = Context.getPromotedIntegerType(ParamTy);
Param->setKNRPromoted(true);
} else if (const BuiltinType* BTy = ParamTy->getAs<BuiltinType>()) {
if (BTy->getKind() == BuiltinType::Float) {
ParamTy = Context.DoubleTy;
Param->setKNRPromoted(true);
}
}
} else if (S.getLangOpts().OpenCL && ParamTy->isBlockPointerType()) {
// OpenCL 2.0 s6.12.5: A block cannot be a parameter of a function.
S.Diag(Param->getLocation(), diag::err_opencl_invalid_param)
<< ParamTy << 1 /*hint off*/;
D.setInvalidType();
}
if (LangOpts.ObjCAutoRefCount && Param->hasAttr<NSConsumedAttr>()) {
ExtParameterInfos[i] = ExtParameterInfos[i].withIsConsumed(true);
HasAnyInterestingExtParameterInfos = true;
}
if (auto attr = Param->getAttr<ParameterABIAttr>()) {
ExtParameterInfos[i] =
ExtParameterInfos[i].withABI(attr->getABI());
HasAnyInterestingExtParameterInfos = true;
}
if (Param->hasAttr<PassObjectSizeAttr>()) {
ExtParameterInfos[i] = ExtParameterInfos[i].withHasPassObjectSize();
HasAnyInterestingExtParameterInfos = true;
}
if (Param->hasAttr<NoEscapeAttr>()) {
ExtParameterInfos[i] = ExtParameterInfos[i].withIsNoEscape(true);
HasAnyInterestingExtParameterInfos = true;
}
ParamTys.push_back(ParamTy);
}
if (HasAnyInterestingExtParameterInfos) {
EPI.ExtParameterInfos = ExtParameterInfos.data();
checkExtParameterInfos(S, ParamTys, EPI,
[&](unsigned i) { return FTI.Params[i].Param->getLocation(); });
}
SmallVector<QualType, 4> Exceptions;
SmallVector<ParsedType, 2> DynamicExceptions;
SmallVector<SourceRange, 2> DynamicExceptionRanges;
Expr *NoexceptExpr = nullptr;
if (FTI.getExceptionSpecType() == EST_Dynamic) {
// FIXME: It's rather inefficient to have to split into two vectors
// here.
unsigned N = FTI.getNumExceptions();
DynamicExceptions.reserve(N);
DynamicExceptionRanges.reserve(N);
for (unsigned I = 0; I != N; ++I) {
DynamicExceptions.push_back(FTI.Exceptions[I].Ty);
DynamicExceptionRanges.push_back(FTI.Exceptions[I].Range);
}
} else if (isComputedNoexcept(FTI.getExceptionSpecType())) {
NoexceptExpr = FTI.NoexceptExpr;
}
S.checkExceptionSpecification(D.isFunctionDeclarationContext(),
FTI.getExceptionSpecType(),
DynamicExceptions,
DynamicExceptionRanges,
NoexceptExpr,
Exceptions,
EPI.ExceptionSpec);
// FIXME: Set address space from attrs for C++ mode here.
// OpenCLCPlusPlus: A class member function has an address space.
auto IsClassMember = [&]() {
return (!state.getDeclarator().getCXXScopeSpec().isEmpty() &&
state.getDeclarator()
.getCXXScopeSpec()
.getScopeRep()
->getKind() == NestedNameSpecifier::TypeSpec) ||
state.getDeclarator().getContext() ==
DeclaratorContext::Member ||
state.getDeclarator().getContext() ==
DeclaratorContext::LambdaExpr;
};
if (state.getSema().getLangOpts().OpenCLCPlusPlus && IsClassMember()) {
LangAS ASIdx = LangAS::Default;
// Take address space attr if any and mark as invalid to avoid adding
// them later while creating QualType.
if (FTI.MethodQualifiers)
for (ParsedAttr &attr : FTI.MethodQualifiers->getAttributes()) {
LangAS ASIdxNew = attr.asOpenCLLangAS();
if (DiagnoseMultipleAddrSpaceAttributes(S, ASIdx, ASIdxNew,
attr.getLoc()))
D.setInvalidType(true);
else
ASIdx = ASIdxNew;
}
// If a class member function's address space is not set, set it to
// __generic.
LangAS AS =
(ASIdx == LangAS::Default ? S.getDefaultCXXMethodAddrSpace()
: ASIdx);
EPI.TypeQuals.addAddressSpace(AS);
}
T = Context.getFunctionType(T, ParamTys, EPI);
}
break;
}
case DeclaratorChunk::MemberPointer: {
// The scope spec must refer to a class, or be dependent.
CXXScopeSpec &SS = DeclType.Mem.Scope();
QualType ClsType;
// Handle pointer nullability.
inferPointerNullability(SimplePointerKind::MemberPointer, DeclType.Loc,
DeclType.EndLoc, DeclType.getAttrs(),
state.getDeclarator().getAttributePool());
if (SS.isInvalid()) {
// Avoid emitting extra errors if we already errored on the scope.
D.setInvalidType(true);
} else if (S.isDependentScopeSpecifier(SS) ||
isa_and_nonnull<CXXRecordDecl>(S.computeDeclContext(SS))) {
NestedNameSpecifier *NNS = SS.getScopeRep();
NestedNameSpecifier *NNSPrefix = NNS->getPrefix();
switch (NNS->getKind()) {
case NestedNameSpecifier::Identifier:
ClsType = Context.getDependentNameType(ETK_None, NNSPrefix,
NNS->getAsIdentifier());
break;
case NestedNameSpecifier::Namespace:
case NestedNameSpecifier::NamespaceAlias:
case NestedNameSpecifier::Global:
case NestedNameSpecifier::Super:
llvm_unreachable("Nested-name-specifier must name a type");
case NestedNameSpecifier::TypeSpec:
case NestedNameSpecifier::TypeSpecWithTemplate:
ClsType = QualType(NNS->getAsType(), 0);
// Note: if the NNS has a prefix and ClsType is a nondependent
// TemplateSpecializationType, then the NNS prefix is NOT included
// in ClsType; hence we wrap ClsType into an ElaboratedType.
// NOTE: in particular, no wrap occurs if ClsType already is an
// Elaborated, DependentName, or DependentTemplateSpecialization.
if (NNSPrefix && isa<TemplateSpecializationType>(NNS->getAsType()))
ClsType = Context.getElaboratedType(ETK_None, NNSPrefix, ClsType);
break;
}
} else {
S.Diag(DeclType.Mem.Scope().getBeginLoc(),
diag::err_illegal_decl_mempointer_in_nonclass)
<< (D.getIdentifier() ? D.getIdentifier()->getName() : "type name")
<< DeclType.Mem.Scope().getRange();
D.setInvalidType(true);
}
if (!ClsType.isNull())
T = S.BuildMemberPointerType(T, ClsType, DeclType.Loc,
D.getIdentifier());
if (T.isNull()) {
T = Context.IntTy;
D.setInvalidType(true);
} else if (DeclType.Mem.TypeQuals) {
T = S.BuildQualifiedType(T, DeclType.Loc, DeclType.Mem.TypeQuals);
}
break;
}
case DeclaratorChunk::Pipe: {
T = S.BuildReadPipeType(T, DeclType.Loc);
processTypeAttrs(state, T, TAL_DeclSpec,
D.getMutableDeclSpec().getAttributes());
break;
}
}
if (T.isNull()) {
D.setInvalidType(true);
T = Context.IntTy;
}
// See if there are any attributes on this declarator chunk.
processTypeAttrs(state, T, TAL_DeclChunk, DeclType.getAttrs());
if (DeclType.Kind != DeclaratorChunk::Paren) {
if (ExpectNoDerefChunk && !IsNoDerefableChunk(DeclType))
S.Diag(DeclType.Loc, diag::warn_noderef_on_non_pointer_or_array);
ExpectNoDerefChunk = state.didParseNoDeref();
}
}
if (ExpectNoDerefChunk)
S.Diag(state.getDeclarator().getBeginLoc(),
diag::warn_noderef_on_non_pointer_or_array);
// GNU warning -Wstrict-prototypes
// Warn if a function declaration or definition is without a prototype.
// This warning is issued for all kinds of unprototyped function
// declarations (i.e. function type typedef, function pointer etc.)
// C99 6.7.5.3p14:
// The empty list in a function declarator that is not part of a definition
// of that function specifies that no information about the number or types
// of the parameters is supplied.
// See ActOnFinishFunctionBody() and MergeFunctionDecl() for handling of
// function declarations whose behavior changes in C2x.
if (!LangOpts.requiresStrictPrototypes()) {
bool IsBlock = false;
for (const DeclaratorChunk &DeclType : D.type_objects()) {
switch (DeclType.Kind) {
case DeclaratorChunk::BlockPointer:
IsBlock = true;
break;
case DeclaratorChunk::Function: {
const DeclaratorChunk::FunctionTypeInfo &FTI = DeclType.Fun;
// We suppress the warning when there's no LParen location, as this
// indicates the declaration was an implicit declaration, which gets
// warned about separately via -Wimplicit-function-declaration. We also
// suppress the warning when we know the function has a prototype.
if (!FTI.hasPrototype && FTI.NumParams == 0 && !FTI.isVariadic &&
FTI.getLParenLoc().isValid())
S.Diag(DeclType.Loc, diag::warn_strict_prototypes)
<< IsBlock
<< FixItHint::CreateInsertion(FTI.getRParenLoc(), "void");
IsBlock = false;
break;
}
default:
break;
}
}
}
assert(!T.isNull() && "T must not be null after this point");
if (LangOpts.CPlusPlus && T->isFunctionType()) {
const FunctionProtoType *FnTy = T->getAs<FunctionProtoType>();
assert(FnTy && "Why oh why is there not a FunctionProtoType here?");
// C++ 8.3.5p4:
// A cv-qualifier-seq shall only be part of the function type
// for a nonstatic member function, the function type to which a pointer
// to member refers, or the top-level function type of a function typedef
// declaration.
//
// Core issue 547 also allows cv-qualifiers on function types that are
// top-level template type arguments.
enum { NonMember, Member, DeductionGuide } Kind = NonMember;
if (D.getName().getKind() == UnqualifiedIdKind::IK_DeductionGuideName)
Kind = DeductionGuide;
else if (!D.getCXXScopeSpec().isSet()) {
if ((D.getContext() == DeclaratorContext::Member ||
D.getContext() == DeclaratorContext::LambdaExpr) &&
!D.getDeclSpec().isFriendSpecified())
Kind = Member;
} else {
DeclContext *DC = S.computeDeclContext(D.getCXXScopeSpec());
if (!DC || DC->isRecord())
Kind = Member;
}
// C++11 [dcl.fct]p6 (w/DR1417):
// An attempt to specify a function type with a cv-qualifier-seq or a
// ref-qualifier (including by typedef-name) is ill-formed unless it is:
// - the function type for a non-static member function,
// - the function type to which a pointer to member refers,
// - the top-level function type of a function typedef declaration or
// alias-declaration,
// - the type-id in the default argument of a type-parameter, or
// - the type-id of a template-argument for a type-parameter
//
// FIXME: Checking this here is insufficient. We accept-invalid on:
//
// template<typename T> struct S { void f(T); };
// S<int() const> s;
//
// ... for instance.
if (IsQualifiedFunction &&
!(Kind == Member &&
D.getDeclSpec().getStorageClassSpec() != DeclSpec::SCS_static) &&
!IsTypedefName && D.getContext() != DeclaratorContext::TemplateArg &&
D.getContext() != DeclaratorContext::TemplateTypeArg) {
SourceLocation Loc = D.getBeginLoc();
SourceRange RemovalRange;
unsigned I;
if (D.isFunctionDeclarator(I)) {
SmallVector<SourceLocation, 4> RemovalLocs;
const DeclaratorChunk &Chunk = D.getTypeObject(I);
assert(Chunk.Kind == DeclaratorChunk::Function);
if (Chunk.Fun.hasRefQualifier())
RemovalLocs.push_back(Chunk.Fun.getRefQualifierLoc());
if (Chunk.Fun.hasMethodTypeQualifiers())
Chunk.Fun.MethodQualifiers->forEachQualifier(
[&](DeclSpec::TQ TypeQual, StringRef QualName,
SourceLocation SL) { RemovalLocs.push_back(SL); });
if (!RemovalLocs.empty()) {
llvm::sort(RemovalLocs,
BeforeThanCompare<SourceLocation>(S.getSourceManager()));
RemovalRange = SourceRange(RemovalLocs.front(), RemovalLocs.back());
Loc = RemovalLocs.front();
}
}
S.Diag(Loc, diag::err_invalid_qualified_function_type)
<< Kind << D.isFunctionDeclarator() << T
<< getFunctionQualifiersAsString(FnTy)
<< FixItHint::CreateRemoval(RemovalRange);
// Strip the cv-qualifiers and ref-qualifiers from the type.
FunctionProtoType::ExtProtoInfo EPI = FnTy->getExtProtoInfo();
EPI.TypeQuals.removeCVRQualifiers();
EPI.RefQualifier = RQ_None;
T = Context.getFunctionType(FnTy->getReturnType(), FnTy->getParamTypes(),
EPI);
// Rebuild any parens around the identifier in the function type.
for (unsigned i = 0, e = D.getNumTypeObjects(); i != e; ++i) {
if (D.getTypeObject(i).Kind != DeclaratorChunk::Paren)
break;
T = S.BuildParenType(T);
}
}
}
// Apply any undistributed attributes from the declaration or declarator.
ParsedAttributesView NonSlidingAttrs;
for (ParsedAttr &AL : D.getDeclarationAttributes()) {
if (!AL.slidesFromDeclToDeclSpecLegacyBehavior()) {
NonSlidingAttrs.addAtEnd(&AL);
}
}
processTypeAttrs(state, T, TAL_DeclName, NonSlidingAttrs);
processTypeAttrs(state, T, TAL_DeclName, D.getAttributes());
// Diagnose any ignored type attributes.
state.diagnoseIgnoredTypeAttrs(T);
// C++0x [dcl.constexpr]p9:
// A constexpr specifier used in an object declaration declares the object
// as const.
if (D.getDeclSpec().getConstexprSpecifier() == ConstexprSpecKind::Constexpr &&
T->isObjectType())
T.addConst();
// C++2a [dcl.fct]p4:
// A parameter with volatile-qualified type is deprecated
if (T.isVolatileQualified() && S.getLangOpts().CPlusPlus20 &&
(D.getContext() == DeclaratorContext::Prototype ||
D.getContext() == DeclaratorContext::LambdaExprParameter))
S.Diag(D.getIdentifierLoc(), diag::warn_deprecated_volatile_param) << T;
// If there was an ellipsis in the declarator, the declaration declares a
// parameter pack whose type may be a pack expansion type.
if (D.hasEllipsis()) {
// C++0x [dcl.fct]p13:
// A declarator-id or abstract-declarator containing an ellipsis shall
// only be used in a parameter-declaration. Such a parameter-declaration
// is a parameter pack (14.5.3). [...]
switch (D.getContext()) {
case DeclaratorContext::Prototype:
case DeclaratorContext::LambdaExprParameter:
case DeclaratorContext::RequiresExpr:
// C++0x [dcl.fct]p13:
// [...] When it is part of a parameter-declaration-clause, the
// parameter pack is a function parameter pack (14.5.3). The type T
// of the declarator-id of the function parameter pack shall contain
// a template parameter pack; each template parameter pack in T is
// expanded by the function parameter pack.
//
// We represent function parameter packs as function parameters whose
// type is a pack expansion.
if (!T->containsUnexpandedParameterPack() &&
(!LangOpts.CPlusPlus20 || !T->getContainedAutoType())) {
S.Diag(D.getEllipsisLoc(),
diag::err_function_parameter_pack_without_parameter_packs)
<< T << D.getSourceRange();
D.setEllipsisLoc(SourceLocation());
} else {
T = Context.getPackExpansionType(T, None, /*ExpectPackInType=*/false);
}
break;
case DeclaratorContext::TemplateParam:
// C++0x [temp.param]p15:
// If a template-parameter is a [...] is a parameter-declaration that
// declares a parameter pack (8.3.5), then the template-parameter is a
// template parameter pack (14.5.3).
//
// Note: core issue 778 clarifies that, if there are any unexpanded
// parameter packs in the type of the non-type template parameter, then
// it expands those parameter packs.
if (T->containsUnexpandedParameterPack())
T = Context.getPackExpansionType(T, None);
else
S.Diag(D.getEllipsisLoc(),
LangOpts.CPlusPlus11
? diag::warn_cxx98_compat_variadic_templates
: diag::ext_variadic_templates);
break;
case DeclaratorContext::File:
case DeclaratorContext::KNRTypeList:
case DeclaratorContext::ObjCParameter: // FIXME: special diagnostic here?
case DeclaratorContext::ObjCResult: // FIXME: special diagnostic here?
case DeclaratorContext::TypeName:
case DeclaratorContext::FunctionalCast:
case DeclaratorContext::CXXNew:
case DeclaratorContext::AliasDecl:
case DeclaratorContext::AliasTemplate:
case DeclaratorContext::Member:
case DeclaratorContext::Block:
case DeclaratorContext::ForInit:
case DeclaratorContext::SelectionInit:
case DeclaratorContext::Condition:
case DeclaratorContext::CXXCatch:
case DeclaratorContext::ObjCCatch:
case DeclaratorContext::BlockLiteral:
case DeclaratorContext::LambdaExpr:
case DeclaratorContext::ConversionId:
case DeclaratorContext::TrailingReturn:
case DeclaratorContext::TrailingReturnVar:
case DeclaratorContext::TemplateArg:
case DeclaratorContext::TemplateTypeArg:
case DeclaratorContext::Association:
// FIXME: We may want to allow parameter packs in block-literal contexts
// in the future.
S.Diag(D.getEllipsisLoc(),
diag::err_ellipsis_in_declarator_not_parameter);
D.setEllipsisLoc(SourceLocation());
break;
}
}
assert(!T.isNull() && "T must not be null at the end of this function");
if (D.isInvalidType())
return Context.getTrivialTypeSourceInfo(T);
return GetTypeSourceInfoForDeclarator(state, T, TInfo);
}
/// GetTypeForDeclarator - Convert the type for the specified
/// declarator to Type instances.
///
/// The result of this call will never be null, but the associated
/// type may be a null type if there's an unrecoverable error.
TypeSourceInfo *Sema::GetTypeForDeclarator(Declarator &D, Scope *S) {
// Determine the type of the declarator. Not all forms of declarator
// have a type.
TypeProcessingState state(*this, D);
TypeSourceInfo *ReturnTypeInfo = nullptr;
QualType T = GetDeclSpecTypeForDeclarator(state, ReturnTypeInfo);
if (D.isPrototypeContext() && getLangOpts().ObjCAutoRefCount)
inferARCWriteback(state, T);
return GetFullTypeForDeclarator(state, T, ReturnTypeInfo);
}
static void transferARCOwnershipToDeclSpec(Sema &S,
QualType &declSpecTy,
Qualifiers::ObjCLifetime ownership) {
if (declSpecTy->isObjCRetainableType() &&
declSpecTy.getObjCLifetime() == Qualifiers::OCL_None) {
Qualifiers qs;
qs.addObjCLifetime(ownership);
declSpecTy = S.Context.getQualifiedType(declSpecTy, qs);
}
}
static void transferARCOwnershipToDeclaratorChunk(TypeProcessingState &state,
Qualifiers::ObjCLifetime ownership,
unsigned chunkIndex) {
Sema &S = state.getSema();
Declarator &D = state.getDeclarator();
// Look for an explicit lifetime attribute.
DeclaratorChunk &chunk = D.getTypeObject(chunkIndex);
if (chunk.getAttrs().hasAttribute(ParsedAttr::AT_ObjCOwnership))
return;
const char *attrStr = nullptr;
switch (ownership) {
case Qualifiers::OCL_None: llvm_unreachable("no ownership!");
case Qualifiers::OCL_ExplicitNone: attrStr = "none"; break;
case Qualifiers::OCL_Strong: attrStr = "strong"; break;
case Qualifiers::OCL_Weak: attrStr = "weak"; break;
case Qualifiers::OCL_Autoreleasing: attrStr = "autoreleasing"; break;
}
IdentifierLoc *Arg = new (S.Context) IdentifierLoc;
Arg->Ident = &S.Context.Idents.get(attrStr);
Arg->Loc = SourceLocation();
ArgsUnion Args(Arg);
// If there wasn't one, add one (with an invalid source location
// so that we don't make an AttributedType for it).
ParsedAttr *attr = D.getAttributePool().create(
&S.Context.Idents.get("objc_ownership"), SourceLocation(),
/*scope*/ nullptr, SourceLocation(),
/*args*/ &Args, 1, ParsedAttr::AS_GNU);
chunk.getAttrs().addAtEnd(attr);
// TODO: mark whether we did this inference?
}
/// Used for transferring ownership in casts resulting in l-values.
static void transferARCOwnership(TypeProcessingState &state,
QualType &declSpecTy,
Qualifiers::ObjCLifetime ownership) {
Sema &S = state.getSema();
Declarator &D = state.getDeclarator();
int inner = -1;
bool hasIndirection = false;
for (unsigned i = 0, e = D.getNumTypeObjects(); i != e; ++i) {
DeclaratorChunk &chunk = D.getTypeObject(i);
switch (chunk.Kind) {
case DeclaratorChunk::Paren:
// Ignore parens.
break;
case DeclaratorChunk::Array:
case DeclaratorChunk::Reference:
case DeclaratorChunk::Pointer:
if (inner != -1)
hasIndirection = true;
inner = i;
break;
case DeclaratorChunk::BlockPointer:
if (inner != -1)
transferARCOwnershipToDeclaratorChunk(state, ownership, i);
return;
case DeclaratorChunk::Function:
case DeclaratorChunk::MemberPointer:
case DeclaratorChunk::Pipe:
return;
}
}
if (inner == -1)
return;
DeclaratorChunk &chunk = D.getTypeObject(inner);
if (chunk.Kind == DeclaratorChunk::Pointer) {
if (declSpecTy->isObjCRetainableType())
return transferARCOwnershipToDeclSpec(S, declSpecTy, ownership);
if (declSpecTy->isObjCObjectType() && hasIndirection)
return transferARCOwnershipToDeclaratorChunk(state, ownership, inner);
} else {
assert(chunk.Kind == DeclaratorChunk::Array ||
chunk.Kind == DeclaratorChunk::Reference);
return transferARCOwnershipToDeclSpec(S, declSpecTy, ownership);
}
}
TypeSourceInfo *Sema::GetTypeForDeclaratorCast(Declarator &D, QualType FromTy) {
TypeProcessingState state(*this, D);
TypeSourceInfo *ReturnTypeInfo = nullptr;
QualType declSpecTy = GetDeclSpecTypeForDeclarator(state, ReturnTypeInfo);
if (getLangOpts().ObjC) {
Qualifiers::ObjCLifetime ownership = Context.getInnerObjCOwnership(FromTy);
if (ownership != Qualifiers::OCL_None)
transferARCOwnership(state, declSpecTy, ownership);
}
return GetFullTypeForDeclarator(state, declSpecTy, ReturnTypeInfo);
}
static void fillAttributedTypeLoc(AttributedTypeLoc TL,
TypeProcessingState &State) {
TL.setAttr(State.takeAttrForAttributedType(TL.getTypePtr()));
}
namespace {
class TypeSpecLocFiller : public TypeLocVisitor<TypeSpecLocFiller> {
Sema &SemaRef;
ASTContext &Context;
TypeProcessingState &State;
const DeclSpec &DS;
public:
TypeSpecLocFiller(Sema &S, ASTContext &Context, TypeProcessingState &State,
const DeclSpec &DS)
: SemaRef(S), Context(Context), State(State), DS(DS) {}
void VisitAttributedTypeLoc(AttributedTypeLoc TL) {
Visit(TL.getModifiedLoc());
fillAttributedTypeLoc(TL, State);
}
void VisitBTFTagAttributedTypeLoc(BTFTagAttributedTypeLoc TL) {
Visit(TL.getWrappedLoc());
}
void VisitMacroQualifiedTypeLoc(MacroQualifiedTypeLoc TL) {
Visit(TL.getInnerLoc());
TL.setExpansionLoc(
State.getExpansionLocForMacroQualifiedType(TL.getTypePtr()));
}
void VisitQualifiedTypeLoc(QualifiedTypeLoc TL) {
Visit(TL.getUnqualifiedLoc());
}
// Allow to fill pointee's type locations, e.g.,
// int __attr * __attr * __attr *p;
void VisitPointerTypeLoc(PointerTypeLoc TL) { Visit(TL.getNextTypeLoc()); }
void VisitTypedefTypeLoc(TypedefTypeLoc TL) {
TL.setNameLoc(DS.getTypeSpecTypeLoc());
}
void VisitObjCInterfaceTypeLoc(ObjCInterfaceTypeLoc TL) {
TL.setNameLoc(DS.getTypeSpecTypeLoc());
// FIXME. We should have DS.getTypeSpecTypeEndLoc(). But, it requires
// addition field. What we have is good enough for display of location
// of 'fixit' on interface name.
TL.setNameEndLoc(DS.getEndLoc());
}
void VisitObjCObjectTypeLoc(ObjCObjectTypeLoc TL) {
TypeSourceInfo *RepTInfo = nullptr;
Sema::GetTypeFromParser(DS.getRepAsType(), &RepTInfo);
TL.copy(RepTInfo->getTypeLoc());
}
void VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc TL) {
TypeSourceInfo *RepTInfo = nullptr;
Sema::GetTypeFromParser(DS.getRepAsType(), &RepTInfo);
TL.copy(RepTInfo->getTypeLoc());
}
void VisitTemplateSpecializationTypeLoc(TemplateSpecializationTypeLoc TL) {
TypeSourceInfo *TInfo = nullptr;
Sema::GetTypeFromParser(DS.getRepAsType(), &TInfo);
// If we got no declarator info from previous Sema routines,
// just fill with the typespec loc.
if (!TInfo) {
TL.initialize(Context, DS.getTypeSpecTypeNameLoc());
return;
}
TypeLoc OldTL = TInfo->getTypeLoc();
if (TInfo->getType()->getAs<ElaboratedType>()) {
ElaboratedTypeLoc ElabTL = OldTL.castAs<ElaboratedTypeLoc>();
TemplateSpecializationTypeLoc NamedTL = ElabTL.getNamedTypeLoc()
.castAs<TemplateSpecializationTypeLoc>();
TL.copy(NamedTL);
} else {
TL.copy(OldTL.castAs<TemplateSpecializationTypeLoc>());
assert(TL.getRAngleLoc() == OldTL.castAs<TemplateSpecializationTypeLoc>().getRAngleLoc());
}
}
void VisitTypeOfExprTypeLoc(TypeOfExprTypeLoc TL) {
assert(DS.getTypeSpecType() == DeclSpec::TST_typeofExpr);
TL.setTypeofLoc(DS.getTypeSpecTypeLoc());
TL.setParensRange(DS.getTypeofParensRange());
}
void VisitTypeOfTypeLoc(TypeOfTypeLoc TL) {
assert(DS.getTypeSpecType() == DeclSpec::TST_typeofType);
TL.setTypeofLoc(DS.getTypeSpecTypeLoc());
TL.setParensRange(DS.getTypeofParensRange());
assert(DS.getRepAsType());
TypeSourceInfo *TInfo = nullptr;
Sema::GetTypeFromParser(DS.getRepAsType(), &TInfo);
TL.setUnderlyingTInfo(TInfo);
}
void VisitDecltypeTypeLoc(DecltypeTypeLoc TL) {
assert(DS.getTypeSpecType() == DeclSpec::TST_decltype);
TL.setDecltypeLoc(DS.getTypeSpecTypeLoc());
TL.setRParenLoc(DS.getTypeofParensRange().getEnd());
}
void VisitUnaryTransformTypeLoc(UnaryTransformTypeLoc TL) {
// FIXME: This holds only because we only have one unary transform.
assert(DS.getTypeSpecType() == DeclSpec::TST_underlyingType);
TL.setKWLoc(DS.getTypeSpecTypeLoc());
TL.setParensRange(DS.getTypeofParensRange());
assert(DS.getRepAsType());
TypeSourceInfo *TInfo = nullptr;
Sema::GetTypeFromParser(DS.getRepAsType(), &TInfo);
TL.setUnderlyingTInfo(TInfo);
}
void VisitBuiltinTypeLoc(BuiltinTypeLoc TL) {
// By default, use the source location of the type specifier.
TL.setBuiltinLoc(DS.getTypeSpecTypeLoc());
if (TL.needsExtraLocalData()) {
// Set info for the written builtin specifiers.
TL.getWrittenBuiltinSpecs() = DS.getWrittenBuiltinSpecs();
// Try to have a meaningful source location.
if (TL.getWrittenSignSpec() != TypeSpecifierSign::Unspecified)
TL.expandBuiltinRange(DS.getTypeSpecSignLoc());
if (TL.getWrittenWidthSpec() != TypeSpecifierWidth::Unspecified)
TL.expandBuiltinRange(DS.getTypeSpecWidthRange());
}
}
void VisitElaboratedTypeLoc(ElaboratedTypeLoc TL) {
ElaboratedTypeKeyword Keyword
= TypeWithKeyword::getKeywordForTypeSpec(DS.getTypeSpecType());
if (DS.getTypeSpecType() == TST_typename) {
TypeSourceInfo *TInfo = nullptr;
Sema::GetTypeFromParser(DS.getRepAsType(), &TInfo);
if (TInfo) {
TL.copy(TInfo->getTypeLoc().castAs<ElaboratedTypeLoc>());
return;
}
}
TL.setElaboratedKeywordLoc(Keyword != ETK_None
? DS.getTypeSpecTypeLoc()
: SourceLocation());
const CXXScopeSpec& SS = DS.getTypeSpecScope();
TL.setQualifierLoc(SS.getWithLocInContext(Context));
Visit(TL.getNextTypeLoc().getUnqualifiedLoc());
}
void VisitDependentNameTypeLoc(DependentNameTypeLoc TL) {
assert(DS.getTypeSpecType() == TST_typename);
TypeSourceInfo *TInfo = nullptr;
Sema::GetTypeFromParser(DS.getRepAsType(), &TInfo);
assert(TInfo);
TL.copy(TInfo->getTypeLoc().castAs<DependentNameTypeLoc>());
}
void VisitDependentTemplateSpecializationTypeLoc(
DependentTemplateSpecializationTypeLoc TL) {
assert(DS.getTypeSpecType() == TST_typename);
TypeSourceInfo *TInfo = nullptr;
Sema::GetTypeFromParser(DS.getRepAsType(), &TInfo);
assert(TInfo);
TL.copy(
TInfo->getTypeLoc().castAs<DependentTemplateSpecializationTypeLoc>());
}
void VisitAutoTypeLoc(AutoTypeLoc TL) {
assert(DS.getTypeSpecType() == TST_auto ||
DS.getTypeSpecType() == TST_decltype_auto ||
DS.getTypeSpecType() == TST_auto_type ||
DS.getTypeSpecType() == TST_unspecified);
TL.setNameLoc(DS.getTypeSpecTypeLoc());
if (DS.getTypeSpecType() == TST_decltype_auto)
TL.setRParenLoc(DS.getTypeofParensRange().getEnd());
if (!DS.isConstrainedAuto())
return;
TemplateIdAnnotation *TemplateId = DS.getRepAsTemplateId();
if (!TemplateId)
return;
if (DS.getTypeSpecScope().isNotEmpty())
TL.setNestedNameSpecifierLoc(
DS.getTypeSpecScope().getWithLocInContext(Context));
else
TL.setNestedNameSpecifierLoc(NestedNameSpecifierLoc());
TL.setTemplateKWLoc(TemplateId->TemplateKWLoc);
TL.setConceptNameLoc(TemplateId->TemplateNameLoc);
TL.setFoundDecl(nullptr);
TL.setLAngleLoc(TemplateId->LAngleLoc);
TL.setRAngleLoc(TemplateId->RAngleLoc);
if (TemplateId->NumArgs == 0)
return;
TemplateArgumentListInfo TemplateArgsInfo;
ASTTemplateArgsPtr TemplateArgsPtr(TemplateId->getTemplateArgs(),
TemplateId->NumArgs);
SemaRef.translateTemplateArguments(TemplateArgsPtr, TemplateArgsInfo);
for (unsigned I = 0; I < TemplateId->NumArgs; ++I)
TL.setArgLocInfo(I, TemplateArgsInfo.arguments()[I].getLocInfo());
}
void VisitTagTypeLoc(TagTypeLoc TL) {
TL.setNameLoc(DS.getTypeSpecTypeNameLoc());
}
void VisitAtomicTypeLoc(AtomicTypeLoc TL) {
// An AtomicTypeLoc can come from either an _Atomic(...) type specifier
// or an _Atomic qualifier.
if (DS.getTypeSpecType() == DeclSpec::TST_atomic) {
TL.setKWLoc(DS.getTypeSpecTypeLoc());
TL.setParensRange(DS.getTypeofParensRange());
TypeSourceInfo *TInfo = nullptr;
Sema::GetTypeFromParser(DS.getRepAsType(), &TInfo);
assert(TInfo);
TL.getValueLoc().initializeFullCopy(TInfo->getTypeLoc());
} else {
TL.setKWLoc(DS.getAtomicSpecLoc());
// No parens, to indicate this was spelled as an _Atomic qualifier.
TL.setParensRange(SourceRange());
Visit(TL.getValueLoc());
}
}
void VisitPipeTypeLoc(PipeTypeLoc TL) {
TL.setKWLoc(DS.getTypeSpecTypeLoc());
TypeSourceInfo *TInfo = nullptr;
Sema::GetTypeFromParser(DS.getRepAsType(), &TInfo);
TL.getValueLoc().initializeFullCopy(TInfo->getTypeLoc());
}
void VisitExtIntTypeLoc(BitIntTypeLoc TL) {
TL.setNameLoc(DS.getTypeSpecTypeLoc());
}
void VisitDependentExtIntTypeLoc(DependentBitIntTypeLoc TL) {
TL.setNameLoc(DS.getTypeSpecTypeLoc());
}
void VisitTypeLoc(TypeLoc TL) {
// FIXME: add other typespec types and change this to an assert.
TL.initialize(Context, DS.getTypeSpecTypeLoc());
}
};
class DeclaratorLocFiller : public TypeLocVisitor<DeclaratorLocFiller> {
ASTContext &Context;
TypeProcessingState &State;
const DeclaratorChunk &Chunk;
public:
DeclaratorLocFiller(ASTContext &Context, TypeProcessingState &State,
const DeclaratorChunk &Chunk)
: Context(Context), State(State), Chunk(Chunk) {}
void VisitQualifiedTypeLoc(QualifiedTypeLoc TL) {
llvm_unreachable("qualified type locs not expected here!");
}
void VisitDecayedTypeLoc(DecayedTypeLoc TL) {
llvm_unreachable("decayed type locs not expected here!");
}
void VisitAttributedTypeLoc(AttributedTypeLoc TL) {
fillAttributedTypeLoc(TL, State);
}
void VisitBTFTagAttributedTypeLoc(BTFTagAttributedTypeLoc TL) {
// nothing
}
void VisitAdjustedTypeLoc(AdjustedTypeLoc TL) {
// nothing
}
void VisitBlockPointerTypeLoc(BlockPointerTypeLoc TL) {
assert(Chunk.Kind == DeclaratorChunk::BlockPointer);
TL.setCaretLoc(Chunk.Loc);
}
void VisitPointerTypeLoc(PointerTypeLoc TL) {
assert(Chunk.Kind == DeclaratorChunk::Pointer);
TL.setStarLoc(Chunk.Loc);
}
void VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc TL) {
assert(Chunk.Kind == DeclaratorChunk::Pointer);
TL.setStarLoc(Chunk.Loc);
}
void VisitMemberPointerTypeLoc(MemberPointerTypeLoc TL) {
assert(Chunk.Kind == DeclaratorChunk::MemberPointer);
const CXXScopeSpec& SS = Chunk.Mem.Scope();
NestedNameSpecifierLoc NNSLoc = SS.getWithLocInContext(Context);
const Type* ClsTy = TL.getClass();
QualType ClsQT = QualType(ClsTy, 0);
TypeSourceInfo *ClsTInfo = Context.CreateTypeSourceInfo(ClsQT, 0);
// Now copy source location info into the type loc component.
TypeLoc ClsTL = ClsTInfo->getTypeLoc();
switch (NNSLoc.getNestedNameSpecifier()->getKind()) {
case NestedNameSpecifier::Identifier:
assert(isa<DependentNameType>(ClsTy) && "Unexpected TypeLoc");
{
DependentNameTypeLoc DNTLoc = ClsTL.castAs<DependentNameTypeLoc>();
DNTLoc.setElaboratedKeywordLoc(SourceLocation());
DNTLoc.setQualifierLoc(NNSLoc.getPrefix());
DNTLoc.setNameLoc(NNSLoc.getLocalBeginLoc());
}
break;
case NestedNameSpecifier::TypeSpec:
case NestedNameSpecifier::TypeSpecWithTemplate:
if (isa<ElaboratedType>(ClsTy)) {
ElaboratedTypeLoc ETLoc = ClsTL.castAs<ElaboratedTypeLoc>();
ETLoc.setElaboratedKeywordLoc(SourceLocation());
ETLoc.setQualifierLoc(NNSLoc.getPrefix());
TypeLoc NamedTL = ETLoc.getNamedTypeLoc();
NamedTL.initializeFullCopy(NNSLoc.getTypeLoc());
} else {
ClsTL.initializeFullCopy(NNSLoc.getTypeLoc());
}
break;
case NestedNameSpecifier::Namespace:
case NestedNameSpecifier::NamespaceAlias:
case NestedNameSpecifier::Global:
case NestedNameSpecifier::Super:
llvm_unreachable("Nested-name-specifier must name a type");
}
// Finally fill in MemberPointerLocInfo fields.
TL.setStarLoc(Chunk.Mem.StarLoc);
TL.setClassTInfo(ClsTInfo);
}
void VisitLValueReferenceTypeLoc(LValueReferenceTypeLoc TL) {
assert(Chunk.Kind == DeclaratorChunk::Reference);
// 'Amp' is misleading: this might have been originally
/// spelled with AmpAmp.
TL.setAmpLoc(Chunk.Loc);
}
void VisitRValueReferenceTypeLoc(RValueReferenceTypeLoc TL) {
assert(Chunk.Kind == DeclaratorChunk::Reference);
assert(!Chunk.Ref.LValueRef);
TL.setAmpAmpLoc(Chunk.Loc);
}
void VisitArrayTypeLoc(ArrayTypeLoc TL) {
assert(Chunk.Kind == DeclaratorChunk::Array);
TL.setLBracketLoc(Chunk.Loc);
TL.setRBracketLoc(Chunk.EndLoc);
TL.setSizeExpr(static_cast<Expr*>(Chunk.Arr.NumElts));
}
void VisitFunctionTypeLoc(FunctionTypeLoc TL) {
assert(Chunk.Kind == DeclaratorChunk::Function);
TL.setLocalRangeBegin(Chunk.Loc);
TL.setLocalRangeEnd(Chunk.EndLoc);
const DeclaratorChunk::FunctionTypeInfo &FTI = Chunk.Fun;
TL.setLParenLoc(FTI.getLParenLoc());
TL.setRParenLoc(FTI.getRParenLoc());
for (unsigned i = 0, e = TL.getNumParams(), tpi = 0; i != e; ++i) {
ParmVarDecl *Param = cast<ParmVarDecl>(FTI.Params[i].Param);
TL.setParam(tpi++, Param);
}
TL.setExceptionSpecRange(FTI.getExceptionSpecRange());
}
void VisitParenTypeLoc(ParenTypeLoc TL) {
assert(Chunk.Kind == DeclaratorChunk::Paren);
TL.setLParenLoc(Chunk.Loc);
TL.setRParenLoc(Chunk.EndLoc);
}
void VisitPipeTypeLoc(PipeTypeLoc TL) {
assert(Chunk.Kind == DeclaratorChunk::Pipe);
TL.setKWLoc(Chunk.Loc);
}
void VisitBitIntTypeLoc(BitIntTypeLoc TL) {
TL.setNameLoc(Chunk.Loc);
}
void VisitMacroQualifiedTypeLoc(MacroQualifiedTypeLoc TL) {
TL.setExpansionLoc(Chunk.Loc);
}
void VisitVectorTypeLoc(VectorTypeLoc TL) { TL.setNameLoc(Chunk.Loc); }
void VisitDependentVectorTypeLoc(DependentVectorTypeLoc TL) {
TL.setNameLoc(Chunk.Loc);
}
void VisitExtVectorTypeLoc(ExtVectorTypeLoc TL) {
TL.setNameLoc(Chunk.Loc);
}
void
VisitDependentSizedExtVectorTypeLoc(DependentSizedExtVectorTypeLoc TL) {
TL.setNameLoc(Chunk.Loc);
}
void VisitTypeLoc(TypeLoc TL) {
llvm_unreachable("unsupported TypeLoc kind in declarator!");
}
};
} // end anonymous namespace
static void fillAtomicQualLoc(AtomicTypeLoc ATL, const DeclaratorChunk &Chunk) {
SourceLocation Loc;
switch (Chunk.Kind) {
case DeclaratorChunk::Function:
case DeclaratorChunk::Array:
case DeclaratorChunk::Paren:
case DeclaratorChunk::Pipe:
llvm_unreachable("cannot be _Atomic qualified");
case DeclaratorChunk::Pointer:
Loc = Chunk.Ptr.AtomicQualLoc;
break;
case DeclaratorChunk::BlockPointer:
case DeclaratorChunk::Reference:
case DeclaratorChunk::MemberPointer:
// FIXME: Provide a source location for the _Atomic keyword.
break;
}
ATL.setKWLoc(Loc);
ATL.setParensRange(SourceRange());
}
static void
fillDependentAddressSpaceTypeLoc(DependentAddressSpaceTypeLoc DASTL,
const ParsedAttributesView &Attrs) {
for (const ParsedAttr &AL : Attrs) {
if (AL.getKind() == ParsedAttr::AT_AddressSpace) {
DASTL.setAttrNameLoc(AL.getLoc());
DASTL.setAttrExprOperand(AL.getArgAsExpr(0));
DASTL.setAttrOperandParensRange(SourceRange());
return;
}
}
llvm_unreachable(
"no address_space attribute found at the expected location!");
}
static void fillMatrixTypeLoc(MatrixTypeLoc MTL,
const ParsedAttributesView &Attrs) {
for (const ParsedAttr &AL : Attrs) {
if (AL.getKind() == ParsedAttr::AT_MatrixType) {
MTL.setAttrNameLoc(AL.getLoc());
MTL.setAttrRowOperand(AL.getArgAsExpr(0));
MTL.setAttrColumnOperand(AL.getArgAsExpr(1));
MTL.setAttrOperandParensRange(SourceRange());
return;
}
}
llvm_unreachable("no matrix_type attribute found at the expected location!");
}
/// Create and instantiate a TypeSourceInfo with type source information.
///
/// \param T QualType referring to the type as written in source code.
///
/// \param ReturnTypeInfo For declarators whose return type does not show
/// up in the normal place in the declaration specifiers (such as a C++
/// conversion function), this pointer will refer to a type source information
/// for that return type.
static TypeSourceInfo *
GetTypeSourceInfoForDeclarator(TypeProcessingState &State,
QualType T, TypeSourceInfo *ReturnTypeInfo) {
Sema &S = State.getSema();
Declarator &D = State.getDeclarator();
TypeSourceInfo *TInfo = S.Context.CreateTypeSourceInfo(T);
UnqualTypeLoc CurrTL = TInfo->getTypeLoc().getUnqualifiedLoc();
// Handle parameter packs whose type is a pack expansion.
if (isa<PackExpansionType>(T)) {
CurrTL.castAs<PackExpansionTypeLoc>().setEllipsisLoc(D.getEllipsisLoc());
CurrTL = CurrTL.getNextTypeLoc().getUnqualifiedLoc();
}
for (unsigned i = 0, e = D.getNumTypeObjects(); i != e; ++i) {
// An AtomicTypeLoc might be produced by an atomic qualifier in this
// declarator chunk.
if (AtomicTypeLoc ATL = CurrTL.getAs<AtomicTypeLoc>()) {
fillAtomicQualLoc(ATL, D.getTypeObject(i));
CurrTL = ATL.getValueLoc().getUnqualifiedLoc();
}
while (MacroQualifiedTypeLoc TL = CurrTL.getAs<MacroQualifiedTypeLoc>()) {
TL.setExpansionLoc(
State.getExpansionLocForMacroQualifiedType(TL.getTypePtr()));
CurrTL = TL.getNextTypeLoc().getUnqualifiedLoc();
}
while (AttributedTypeLoc TL = CurrTL.getAs<AttributedTypeLoc>()) {
fillAttributedTypeLoc(TL, State);
CurrTL = TL.getNextTypeLoc().getUnqualifiedLoc();
}
+ while (BTFTagAttributedTypeLoc TL = CurrTL.getAs<BTFTagAttributedTypeLoc>())
+ CurrTL = TL.getNextTypeLoc().getUnqualifiedLoc();
+
while (DependentAddressSpaceTypeLoc TL =
CurrTL.getAs<DependentAddressSpaceTypeLoc>()) {
fillDependentAddressSpaceTypeLoc(TL, D.getTypeObject(i).getAttrs());
CurrTL = TL.getPointeeTypeLoc().getUnqualifiedLoc();
}
if (MatrixTypeLoc TL = CurrTL.getAs<MatrixTypeLoc>())
fillMatrixTypeLoc(TL, D.getTypeObject(i).getAttrs());
// FIXME: Ordering here?
while (AdjustedTypeLoc TL = CurrTL.getAs<AdjustedTypeLoc>())
CurrTL = TL.getNextTypeLoc().getUnqualifiedLoc();
DeclaratorLocFiller(S.Context, State, D.getTypeObject(i)).Visit(CurrTL);
CurrTL = CurrTL.getNextTypeLoc().getUnqualifiedLoc();
}
// If we have different source information for the return type, use
// that. This really only applies to C++ conversion functions.
if (ReturnTypeInfo) {
TypeLoc TL = ReturnTypeInfo->getTypeLoc();
assert(TL.getFullDataSize() == CurrTL.getFullDataSize());
memcpy(CurrTL.getOpaqueData(), TL.getOpaqueData(), TL.getFullDataSize());
} else {
TypeSpecLocFiller(S, S.Context, State, D.getDeclSpec()).Visit(CurrTL);
}
return TInfo;
}
/// Create a LocInfoType to hold the given QualType and TypeSourceInfo.
ParsedType Sema::CreateParsedType(QualType T, TypeSourceInfo *TInfo) {
// FIXME: LocInfoTypes are "transient", only needed for passing to/from Parser
// and Sema during declaration parsing. Try deallocating/caching them when
// it's appropriate, instead of allocating them and keeping them around.
LocInfoType *LocT = (LocInfoType*)BumpAlloc.Allocate(sizeof(LocInfoType),
TypeAlignment);
new (LocT) LocInfoType(T, TInfo);
assert(LocT->getTypeClass() != T->getTypeClass() &&
"LocInfoType's TypeClass conflicts with an existing Type class");
return ParsedType::make(QualType(LocT, 0));
}
void LocInfoType::getAsStringInternal(std::string &Str,
const PrintingPolicy &Policy) const {
llvm_unreachable("LocInfoType leaked into the type system; an opaque TypeTy*"
" was used directly instead of getting the QualType through"
" GetTypeFromParser");
}
TypeResult Sema::ActOnTypeName(Scope *S, Declarator &D) {
// C99 6.7.6: Type names have no identifier. This is already validated by
// the parser.
assert(D.getIdentifier() == nullptr &&
"Type name should have no identifier!");
TypeSourceInfo *TInfo = GetTypeForDeclarator(D, S);
QualType T = TInfo->getType();
if (D.isInvalidType())
return true;
// Make sure there are no unused decl attributes on the declarator.
// We don't want to do this for ObjC parameters because we're going
// to apply them to the actual parameter declaration.
// Likewise, we don't want to do this for alias declarations, because
// we are actually going to build a declaration from this eventually.
if (D.getContext() != DeclaratorContext::ObjCParameter &&
D.getContext() != DeclaratorContext::AliasDecl &&
D.getContext() != DeclaratorContext::AliasTemplate)
checkUnusedDeclAttributes(D);
if (getLangOpts().CPlusPlus) {
// Check that there are no default arguments (C++ only).
CheckExtraCXXDefaultArguments(D);
}
return CreateParsedType(T, TInfo);
}
ParsedType Sema::ActOnObjCInstanceType(SourceLocation Loc) {
QualType T = Context.getObjCInstanceType();
TypeSourceInfo *TInfo = Context.getTrivialTypeSourceInfo(T, Loc);
return CreateParsedType(T, TInfo);
}
//===----------------------------------------------------------------------===//
// Type Attribute Processing
//===----------------------------------------------------------------------===//
/// Build an AddressSpace index from a constant expression and diagnose any
/// errors related to invalid address_spaces. Returns true on successfully
/// building an AddressSpace index.
static bool BuildAddressSpaceIndex(Sema &S, LangAS &ASIdx,
const Expr *AddrSpace,
SourceLocation AttrLoc) {
if (!AddrSpace->isValueDependent()) {
Optional<llvm::APSInt> OptAddrSpace =
AddrSpace->getIntegerConstantExpr(S.Context);
if (!OptAddrSpace) {
S.Diag(AttrLoc, diag::err_attribute_argument_type)
<< "'address_space'" << AANT_ArgumentIntegerConstant
<< AddrSpace->getSourceRange();
return false;
}
llvm::APSInt &addrSpace = *OptAddrSpace;
// Bounds checking.
if (addrSpace.isSigned()) {
if (addrSpace.isNegative()) {
S.Diag(AttrLoc, diag::err_attribute_address_space_negative)
<< AddrSpace->getSourceRange();
return false;
}
addrSpace.setIsSigned(false);
}
llvm::APSInt max(addrSpace.getBitWidth());
max =
Qualifiers::MaxAddressSpace - (unsigned)LangAS::FirstTargetAddressSpace;
if (addrSpace > max) {
S.Diag(AttrLoc, diag::err_attribute_address_space_too_high)
<< (unsigned)max.getZExtValue() << AddrSpace->getSourceRange();
return false;
}
ASIdx =
getLangASFromTargetAS(static_cast<unsigned>(addrSpace.getZExtValue()));
return true;
}
// Default value for DependentAddressSpaceTypes
ASIdx = LangAS::Default;
return true;
}
/// BuildAddressSpaceAttr - Builds a DependentAddressSpaceType if an expression
/// is uninstantiated. If instantiated it will apply the appropriate address
/// space to the type. This function allows dependent template variables to be
/// used in conjunction with the address_space attribute
QualType Sema::BuildAddressSpaceAttr(QualType &T, LangAS ASIdx, Expr *AddrSpace,
SourceLocation AttrLoc) {
if (!AddrSpace->isValueDependent()) {
if (DiagnoseMultipleAddrSpaceAttributes(*this, T.getAddressSpace(), ASIdx,
AttrLoc))
return QualType();
return Context.getAddrSpaceQualType(T, ASIdx);
}
// A check with similar intentions as checking if a type already has an
// address space except for on a dependent types, basically if the
// current type is already a DependentAddressSpaceType then its already
// lined up to have another address space on it and we can't have
// multiple address spaces on the one pointer indirection
if (T->getAs<DependentAddressSpaceType>()) {
Diag(AttrLoc, diag::err_attribute_address_multiple_qualifiers);
return QualType();
}
return Context.getDependentAddressSpaceType(T, AddrSpace, AttrLoc);
}
QualType Sema::BuildAddressSpaceAttr(QualType &T, Expr *AddrSpace,
SourceLocation AttrLoc) {
LangAS ASIdx;
if (!BuildAddressSpaceIndex(*this, ASIdx, AddrSpace, AttrLoc))
return QualType();
return BuildAddressSpaceAttr(T, ASIdx, AddrSpace, AttrLoc);
}
static void HandleBTFTypeTagAttribute(QualType &Type, const ParsedAttr &Attr,
TypeProcessingState &State) {
Sema &S = State.getSema();
// Check the number of attribute arguments.
if (Attr.getNumArgs() != 1) {
S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments)
<< Attr << 1;
Attr.setInvalid();
return;
}
// Ensure the argument is a string.
auto *StrLiteral = dyn_cast<StringLiteral>(Attr.getArgAsExpr(0));
if (!StrLiteral) {
S.Diag(Attr.getLoc(), diag::err_attribute_argument_type)
<< Attr << AANT_ArgumentString;
Attr.setInvalid();
return;
}
ASTContext &Ctx = S.Context;
StringRef BTFTypeTag = StrLiteral->getString();
Type = State.getBTFTagAttributedType(
::new (Ctx) BTFTypeTagAttr(Ctx, Attr, BTFTypeTag), Type);
}
/// HandleAddressSpaceTypeAttribute - Process an address_space attribute on the
/// specified type. The attribute contains 1 argument, the id of the address
/// space for the type.
static void HandleAddressSpaceTypeAttribute(QualType &Type,
const ParsedAttr &Attr,
TypeProcessingState &State) {
Sema &S = State.getSema();
// ISO/IEC TR 18037 S5.3 (amending C99 6.7.3): "A function type shall not be
// qualified by an address-space qualifier."
if (Type->isFunctionType()) {
S.Diag(Attr.getLoc(), diag::err_attribute_address_function_type);
Attr.setInvalid();
return;
}
LangAS ASIdx;
if (Attr.getKind() == ParsedAttr::AT_AddressSpace) {
// Check the attribute arguments.
if (Attr.getNumArgs() != 1) {
S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments) << Attr
<< 1;
Attr.setInvalid();
return;
}
Expr *ASArgExpr = static_cast<Expr *>(Attr.getArgAsExpr(0));
LangAS ASIdx;
if (!BuildAddressSpaceIndex(S, ASIdx, ASArgExpr, Attr.getLoc())) {
Attr.setInvalid();
return;
}
ASTContext &Ctx = S.Context;
auto *ASAttr =
::new (Ctx) AddressSpaceAttr(Ctx, Attr, static_cast<unsigned>(ASIdx));
// If the expression is not value dependent (not templated), then we can
// apply the address space qualifiers just to the equivalent type.
// Otherwise, we make an AttributedType with the modified and equivalent
// type the same, and wrap it in a DependentAddressSpaceType. When this
// dependent type is resolved, the qualifier is added to the equivalent type
// later.
QualType T;
if (!ASArgExpr->isValueDependent()) {
QualType EquivType =
S.BuildAddressSpaceAttr(Type, ASIdx, ASArgExpr, Attr.getLoc());
if (EquivType.isNull()) {
Attr.setInvalid();
return;
}
T = State.getAttributedType(ASAttr, Type, EquivType);
} else {
T = State.getAttributedType(ASAttr, Type, Type);
T = S.BuildAddressSpaceAttr(T, ASIdx, ASArgExpr, Attr.getLoc());
}
if (!T.isNull())
Type = T;
else
Attr.setInvalid();
} else {
// The keyword-based type attributes imply which address space to use.
ASIdx = S.getLangOpts().SYCLIsDevice ? Attr.asSYCLLangAS()
: Attr.asOpenCLLangAS();
if (ASIdx == LangAS::Default)
llvm_unreachable("Invalid address space");
if (DiagnoseMultipleAddrSpaceAttributes(S, Type.getAddressSpace(), ASIdx,
Attr.getLoc())) {
Attr.setInvalid();
return;
}
Type = S.Context.getAddrSpaceQualType(Type, ASIdx);
}
}
/// handleObjCOwnershipTypeAttr - Process an objc_ownership
/// attribute on the specified type.
///
/// Returns 'true' if the attribute was handled.
static bool handleObjCOwnershipTypeAttr(TypeProcessingState &state,
ParsedAttr &attr, QualType &type) {
bool NonObjCPointer = false;
if (!type->isDependentType() && !type->isUndeducedType()) {
if (const PointerType *ptr = type->getAs<PointerType>()) {
QualType pointee = ptr->getPointeeType();
if (pointee->isObjCRetainableType() || pointee->isPointerType())
return false;
// It is important not to lose the source info that there was an attribute
// applied to non-objc pointer. We will create an attributed type but
// its type will be the same as the original type.
NonObjCPointer = true;
} else if (!type->isObjCRetainableType()) {
return false;
}
// Don't accept an ownership attribute in the declspec if it would
// just be the return type of a block pointer.
if (state.isProcessingDeclSpec()) {
Declarator &D = state.getDeclarator();
if (maybeMovePastReturnType(D, D.getNumTypeObjects(),
/*onlyBlockPointers=*/true))
return false;
}
}
Sema &S = state.getSema();
SourceLocation AttrLoc = attr.getLoc();
if (AttrLoc.isMacroID())
AttrLoc =
S.getSourceManager().getImmediateExpansionRange(AttrLoc).getBegin();
if (!attr.isArgIdent(0)) {
S.Diag(AttrLoc, diag::err_attribute_argument_type) << attr
<< AANT_ArgumentString;
attr.setInvalid();
return true;
}
IdentifierInfo *II = attr.getArgAsIdent(0)->Ident;
Qualifiers::ObjCLifetime lifetime;
if (II->isStr("none"))
lifetime = Qualifiers::OCL_ExplicitNone;
else if (II->isStr("strong"))
lifetime = Qualifiers::OCL_Strong;
else if (II->isStr("weak"))
lifetime = Qualifiers::OCL_Weak;
else if (II->isStr("autoreleasing"))
lifetime = Qualifiers::OCL_Autoreleasing;
else {
S.Diag(AttrLoc, diag::warn_attribute_type_not_supported) << attr << II;
attr.setInvalid();
return true;
}
// Just ignore lifetime attributes other than __weak and __unsafe_unretained
// outside of ARC mode.
if (!S.getLangOpts().ObjCAutoRefCount &&
lifetime != Qualifiers::OCL_Weak &&
lifetime != Qualifiers::OCL_ExplicitNone) {
return true;
}
SplitQualType underlyingType = type.split();
// Check for redundant/conflicting ownership qualifiers.
if (Qualifiers::ObjCLifetime previousLifetime
= type.getQualifiers().getObjCLifetime()) {
// If it's written directly, that's an error.
if (S.Context.hasDirectOwnershipQualifier(type)) {
S.Diag(AttrLoc, diag::err_attr_objc_ownership_redundant)
<< type;
return true;
}
// Otherwise, if the qualifiers actually conflict, pull sugar off
// and remove the ObjCLifetime qualifiers.
if (previousLifetime != lifetime) {
// It's possible to have multiple local ObjCLifetime qualifiers. We
// can't stop after we reach a type that is directly qualified.
const Type *prevTy = nullptr;
while (!prevTy || prevTy != underlyingType.Ty) {
prevTy = underlyingType.Ty;
underlyingType = underlyingType.getSingleStepDesugaredType();
}
underlyingType.Quals.removeObjCLifetime();
}
}
underlyingType.Quals.addObjCLifetime(lifetime);
if (NonObjCPointer) {
StringRef name = attr.getAttrName()->getName();
switch (lifetime) {
case Qualifiers::OCL_None:
case Qualifiers::OCL_ExplicitNone:
break;
case Qualifiers::OCL_Strong: name = "__strong"; break;
case Qualifiers::OCL_Weak: name = "__weak"; break;
case Qualifiers::OCL_Autoreleasing: name = "__autoreleasing"; break;
}
S.Diag(AttrLoc, diag::warn_type_attribute_wrong_type) << name
<< TDS_ObjCObjOrBlock << type;
}
// Don't actually add the __unsafe_unretained qualifier in non-ARC files,
// because having both 'T' and '__unsafe_unretained T' exist in the type
// system causes unfortunate widespread consistency problems. (For example,
// they're not considered compatible types, and we mangle them identicially
// as template arguments.) These problems are all individually fixable,
// but it's easier to just not add the qualifier and instead sniff it out
// in specific places using isObjCInertUnsafeUnretainedType().
//
// Doing this does means we miss some trivial consistency checks that
// would've triggered in ARC, but that's better than trying to solve all
// the coexistence problems with __unsafe_unretained.
if (!S.getLangOpts().ObjCAutoRefCount &&
lifetime == Qualifiers::OCL_ExplicitNone) {
type = state.getAttributedType(
createSimpleAttr<ObjCInertUnsafeUnretainedAttr>(S.Context, attr),
type, type);
return true;
}
QualType origType = type;
if (!NonObjCPointer)
type = S.Context.getQualifiedType(underlyingType);
// If we have a valid source location for the attribute, use an
// AttributedType instead.
if (AttrLoc.isValid()) {
type = state.getAttributedType(::new (S.Context)
ObjCOwnershipAttr(S.Context, attr, II),
origType, type);
}
auto diagnoseOrDelay = [](Sema &S, SourceLocation loc,
unsigned diagnostic, QualType type) {
if (S.DelayedDiagnostics.shouldDelayDiagnostics()) {
S.DelayedDiagnostics.add(
sema::DelayedDiagnostic::makeForbiddenType(
S.getSourceManager().getExpansionLoc(loc),
diagnostic, type, /*ignored*/ 0));
} else {
S.Diag(loc, diagnostic);
}
};
// Sometimes, __weak isn't allowed.
if (lifetime == Qualifiers::OCL_Weak &&
!S.getLangOpts().ObjCWeak && !NonObjCPointer) {
// Use a specialized diagnostic if the runtime just doesn't support them.
unsigned diagnostic =
(S.getLangOpts().ObjCWeakRuntime ? diag::err_arc_weak_disabled
: diag::err_arc_weak_no_runtime);
// In any case, delay the diagnostic until we know what we're parsing.
diagnoseOrDelay(S, AttrLoc, diagnostic, type);
attr.setInvalid();
return true;
}
// Forbid __weak for class objects marked as
// objc_arc_weak_reference_unavailable
if (lifetime == Qualifiers::OCL_Weak) {
if (const ObjCObjectPointerType *ObjT =
type->getAs<ObjCObjectPointerType>()) {
if (ObjCInterfaceDecl *Class = ObjT->getInterfaceDecl()) {
if (Class->isArcWeakrefUnavailable()) {
S.Diag(AttrLoc, diag::err_arc_unsupported_weak_class);
S.Diag(ObjT->getInterfaceDecl()->getLocation(),
diag::note_class_declared);
}
}
}
}
return true;
}
/// handleObjCGCTypeAttr - Process the __attribute__((objc_gc)) type
/// attribute on the specified type. Returns true to indicate that
/// the attribute was handled, false to indicate that the type does
/// not permit the attribute.
static bool handleObjCGCTypeAttr(TypeProcessingState &state, ParsedAttr &attr,
QualType &type) {
Sema &S = state.getSema();
// Delay if this isn't some kind of pointer.
if (!type->isPointerType() &&
!type->isObjCObjectPointerType() &&
!type->isBlockPointerType())
return false;
if (type.getObjCGCAttr() != Qualifiers::GCNone) {
S.Diag(attr.getLoc(), diag::err_attribute_multiple_objc_gc);
attr.setInvalid();
return true;
}
// Check the attribute arguments.
if (!attr.isArgIdent(0)) {
S.Diag(attr.getLoc(), diag::err_attribute_argument_type)
<< attr << AANT_ArgumentString;
attr.setInvalid();
return true;
}
Qualifiers::GC GCAttr;
if (attr.getNumArgs() > 1) {
S.Diag(attr.getLoc(), diag::err_attribute_wrong_number_arguments) << attr
<< 1;
attr.setInvalid();
return true;
}
IdentifierInfo *II = attr.getArgAsIdent(0)->Ident;
if (II->isStr("weak"))
GCAttr = Qualifiers::Weak;
else if (II->isStr("strong"))
GCAttr = Qualifiers::Strong;
else {
S.Diag(attr.getLoc(), diag::warn_attribute_type_not_supported)
<< attr << II;
attr.setInvalid();
return true;
}
QualType origType = type;
type = S.Context.getObjCGCQualType(origType, GCAttr);
// Make an attributed type to preserve the source information.
if (attr.getLoc().isValid())
type = state.getAttributedType(
::new (S.Context) ObjCGCAttr(S.Context, attr, II), origType, type);
return true;
}
namespace {
/// A helper class to unwrap a type down to a function for the
/// purposes of applying attributes there.
///
/// Use:
/// FunctionTypeUnwrapper unwrapped(SemaRef, T);
/// if (unwrapped.isFunctionType()) {
/// const FunctionType *fn = unwrapped.get();
/// // change fn somehow
/// T = unwrapped.wrap(fn);
/// }
struct FunctionTypeUnwrapper {
enum WrapKind {
Desugar,
Attributed,
Parens,
Array,
Pointer,
BlockPointer,
Reference,
MemberPointer,
MacroQualified,
};
QualType Original;
const FunctionType *Fn;
SmallVector<unsigned char /*WrapKind*/, 8> Stack;
FunctionTypeUnwrapper(Sema &S, QualType T) : Original(T) {
while (true) {
const Type *Ty = T.getTypePtr();
if (isa<FunctionType>(Ty)) {
Fn = cast<FunctionType>(Ty);
return;
} else if (isa<ParenType>(Ty)) {
T = cast<ParenType>(Ty)->getInnerType();
Stack.push_back(Parens);
} else if (isa<ConstantArrayType>(Ty) || isa<VariableArrayType>(Ty) ||
isa<IncompleteArrayType>(Ty)) {
T = cast<ArrayType>(Ty)->getElementType();
Stack.push_back(Array);
} else if (isa<PointerType>(Ty)) {
T = cast<PointerType>(Ty)->getPointeeType();
Stack.push_back(Pointer);
} else if (isa<BlockPointerType>(Ty)) {
T = cast<BlockPointerType>(Ty)->getPointeeType();
Stack.push_back(BlockPointer);
} else if (isa<MemberPointerType>(Ty)) {
T = cast<MemberPointerType>(Ty)->getPointeeType();
Stack.push_back(MemberPointer);
} else if (isa<ReferenceType>(Ty)) {
T = cast<ReferenceType>(Ty)->getPointeeType();
Stack.push_back(Reference);
} else if (isa<AttributedType>(Ty)) {
T = cast<AttributedType>(Ty)->getEquivalentType();
Stack.push_back(Attributed);
} else if (isa<MacroQualifiedType>(Ty)) {
T = cast<MacroQualifiedType>(Ty)->getUnderlyingType();
Stack.push_back(MacroQualified);
} else {
const Type *DTy = Ty->getUnqualifiedDesugaredType();
if (Ty == DTy) {
Fn = nullptr;
return;
}
T = QualType(DTy, 0);
Stack.push_back(Desugar);
}
}
}
bool isFunctionType() const { return (Fn != nullptr); }
const FunctionType *get() const { return Fn; }
QualType wrap(Sema &S, const FunctionType *New) {
// If T wasn't modified from the unwrapped type, do nothing.
if (New == get()) return Original;
Fn = New;
return wrap(S.Context, Original, 0);
}
private:
QualType wrap(ASTContext &C, QualType Old, unsigned I) {
if (I == Stack.size())
return C.getQualifiedType(Fn, Old.getQualifiers());
// Build up the inner type, applying the qualifiers from the old
// type to the new type.
SplitQualType SplitOld = Old.split();
// As a special case, tail-recurse if there are no qualifiers.
if (SplitOld.Quals.empty())
return wrap(C, SplitOld.Ty, I);
return C.getQualifiedType(wrap(C, SplitOld.Ty, I), SplitOld.Quals);
}
QualType wrap(ASTContext &C, const Type *Old, unsigned I) {
if (I == Stack.size()) return QualType(Fn, 0);
switch (static_cast<WrapKind>(Stack[I++])) {
case Desugar:
// This is the point at which we potentially lose source
// information.
return wrap(C, Old->getUnqualifiedDesugaredType(), I);
case Attributed:
return wrap(C, cast<AttributedType>(Old)->getEquivalentType(), I);
case Parens: {
QualType New = wrap(C, cast<ParenType>(Old)->getInnerType(), I);
return C.getParenType(New);
}
case MacroQualified:
return wrap(C, cast<MacroQualifiedType>(Old)->getUnderlyingType(), I);
case Array: {
if (const auto *CAT = dyn_cast<ConstantArrayType>(Old)) {
QualType New = wrap(C, CAT->getElementType(), I);
return C.getConstantArrayType(New, CAT->getSize(), CAT->getSizeExpr(),
CAT->getSizeModifier(),
CAT->getIndexTypeCVRQualifiers());
}
if (const auto *VAT = dyn_cast<VariableArrayType>(Old)) {
QualType New = wrap(C, VAT->getElementType(), I);
return C.getVariableArrayType(
New, VAT->getSizeExpr(), VAT->getSizeModifier(),
VAT->getIndexTypeCVRQualifiers(), VAT->getBracketsRange());
}
const auto *IAT = cast<IncompleteArrayType>(Old);
QualType New = wrap(C, IAT->getElementType(), I);
return C.getIncompleteArrayType(New, IAT->getSizeModifier(),
IAT->getIndexTypeCVRQualifiers());
}
case Pointer: {
QualType New = wrap(C, cast<PointerType>(Old)->getPointeeType(), I);
return C.getPointerType(New);
}
case BlockPointer: {
QualType New = wrap(C, cast<BlockPointerType>(Old)->getPointeeType(),I);
return C.getBlockPointerType(New);
}
case MemberPointer: {
const MemberPointerType *OldMPT = cast<MemberPointerType>(Old);
QualType New = wrap(C, OldMPT->getPointeeType(), I);
return C.getMemberPointerType(New, OldMPT->getClass());
}
case Reference: {
const ReferenceType *OldRef = cast<ReferenceType>(Old);
QualType New = wrap(C, OldRef->getPointeeType(), I);
if (isa<LValueReferenceType>(OldRef))
return C.getLValueReferenceType(New, OldRef->isSpelledAsLValue());
else
return C.getRValueReferenceType(New);
}
}
llvm_unreachable("unknown wrapping kind");
}
};
} // end anonymous namespace
static bool handleMSPointerTypeQualifierAttr(TypeProcessingState &State,
ParsedAttr &PAttr, QualType &Type) {
Sema &S = State.getSema();
Attr *A;
switch (PAttr.getKind()) {
default: llvm_unreachable("Unknown attribute kind");
case ParsedAttr::AT_Ptr32:
A = createSimpleAttr<Ptr32Attr>(S.Context, PAttr);
break;
case ParsedAttr::AT_Ptr64:
A = createSimpleAttr<Ptr64Attr>(S.Context, PAttr);
break;
case ParsedAttr::AT_SPtr:
A = createSimpleAttr<SPtrAttr>(S.Context, PAttr);
break;
case ParsedAttr::AT_UPtr:
A = createSimpleAttr<UPtrAttr>(S.Context, PAttr);
break;
}
std::bitset<attr::LastAttr> Attrs;
attr::Kind NewAttrKind = A->getKind();
QualType Desugared = Type;
const AttributedType *AT = dyn_cast<AttributedType>(Type);
while (AT) {
Attrs[AT->getAttrKind()] = true;
Desugared = AT->getModifiedType();
AT = dyn_cast<AttributedType>(Desugared);
}
// You cannot specify duplicate type attributes, so if the attribute has
// already been applied, flag it.
if (Attrs[NewAttrKind]) {
S.Diag(PAttr.getLoc(), diag::warn_duplicate_attribute_exact) << PAttr;
return true;
}
Attrs[NewAttrKind] = true;
// You cannot have both __sptr and __uptr on the same type, nor can you
// have __ptr32 and __ptr64.
if (Attrs[attr::Ptr32] && Attrs[attr::Ptr64]) {
S.Diag(PAttr.getLoc(), diag::err_attributes_are_not_compatible)
<< "'__ptr32'"
<< "'__ptr64'";
return true;
} else if (Attrs[attr::SPtr] && Attrs[attr::UPtr]) {
S.Diag(PAttr.getLoc(), diag::err_attributes_are_not_compatible)
<< "'__sptr'"
<< "'__uptr'";
return true;
}
// Pointer type qualifiers can only operate on pointer types, but not
// pointer-to-member types.
//
// FIXME: Should we really be disallowing this attribute if there is any
// type sugar between it and the pointer (other than attributes)? Eg, this
// disallows the attribute on a parenthesized pointer.
// And if so, should we really allow *any* type attribute?
if (!isa<PointerType>(Desugared)) {
if (Type->isMemberPointerType())
S.Diag(PAttr.getLoc(), diag::err_attribute_no_member_pointers) << PAttr;
else
S.Diag(PAttr.getLoc(), diag::err_attribute_pointers_only) << PAttr << 0;
return true;
}
// Add address space to type based on its attributes.
LangAS ASIdx = LangAS::Default;
uint64_t PtrWidth = S.Context.getTargetInfo().getPointerWidth(0);
if (PtrWidth == 32) {
if (Attrs[attr::Ptr64])
ASIdx = LangAS::ptr64;
else if (Attrs[attr::UPtr])
ASIdx = LangAS::ptr32_uptr;
} else if (PtrWidth == 64 && Attrs[attr::Ptr32]) {
if (Attrs[attr::UPtr])
ASIdx = LangAS::ptr32_uptr;
else
ASIdx = LangAS::ptr32_sptr;
}
QualType Pointee = Type->getPointeeType();
if (ASIdx != LangAS::Default)
Pointee = S.Context.getAddrSpaceQualType(
S.Context.removeAddrSpaceQualType(Pointee), ASIdx);
Type = State.getAttributedType(A, Type, S.Context.getPointerType(Pointee));
return false;
}
/// Map a nullability attribute kind to a nullability kind.
static NullabilityKind mapNullabilityAttrKind(ParsedAttr::Kind kind) {
switch (kind) {
case ParsedAttr::AT_TypeNonNull:
return NullabilityKind::NonNull;
case ParsedAttr::AT_TypeNullable:
return NullabilityKind::Nullable;
case ParsedAttr::AT_TypeNullableResult:
return NullabilityKind::NullableResult;
case ParsedAttr::AT_TypeNullUnspecified:
return NullabilityKind::Unspecified;
default:
llvm_unreachable("not a nullability attribute kind");
}
}
/// Applies a nullability type specifier to the given type, if possible.
///
/// \param state The type processing state.
///
/// \param type The type to which the nullability specifier will be
/// added. On success, this type will be updated appropriately.
///
/// \param attr The attribute as written on the type.
///
/// \param allowOnArrayType Whether to accept nullability specifiers on an
/// array type (e.g., because it will decay to a pointer).
///
/// \returns true if a problem has been diagnosed, false on success.
static bool checkNullabilityTypeSpecifier(TypeProcessingState &state,
QualType &type,
ParsedAttr &attr,
bool allowOnArrayType) {
Sema &S = state.getSema();
NullabilityKind nullability = mapNullabilityAttrKind(attr.getKind());
SourceLocation nullabilityLoc = attr.getLoc();
bool isContextSensitive = attr.isContextSensitiveKeywordAttribute();
recordNullabilitySeen(S, nullabilityLoc);
// Check for existing nullability attributes on the type.
QualType desugared = type;
while (auto attributed = dyn_cast<AttributedType>(desugared.getTypePtr())) {
// Check whether there is already a null
if (auto existingNullability = attributed->getImmediateNullability()) {
// Duplicated nullability.
if (nullability == *existingNullability) {
S.Diag(nullabilityLoc, diag::warn_nullability_duplicate)
<< DiagNullabilityKind(nullability, isContextSensitive)
<< FixItHint::CreateRemoval(nullabilityLoc);
break;
}
// Conflicting nullability.
S.Diag(nullabilityLoc, diag::err_nullability_conflicting)
<< DiagNullabilityKind(nullability, isContextSensitive)
<< DiagNullabilityKind(*existingNullability, false);
return true;
}
desugared = attributed->getModifiedType();
}
// If there is already a different nullability specifier, complain.
// This (unlike the code above) looks through typedefs that might
// have nullability specifiers on them, which means we cannot
// provide a useful Fix-It.
if (auto existingNullability = desugared->getNullability(S.Context)) {
if (nullability != *existingNullability) {
S.Diag(nullabilityLoc, diag::err_nullability_conflicting)
<< DiagNullabilityKind(nullability, isContextSensitive)
<< DiagNullabilityKind(*existingNullability, false);
// Try to find the typedef with the existing nullability specifier.
if (auto typedefType = desugared->getAs<TypedefType>()) {
TypedefNameDecl *typedefDecl = typedefType->getDecl();
QualType underlyingType = typedefDecl->getUnderlyingType();
if (auto typedefNullability
= AttributedType::stripOuterNullability(underlyingType)) {
if (*typedefNullability == *existingNullability) {
S.Diag(typedefDecl->getLocation(), diag::note_nullability_here)
<< DiagNullabilityKind(*existingNullability, false);
}
}
}
return true;
}
}
// If this definitely isn't a pointer type, reject the specifier.
if (!desugared->canHaveNullability() &&
!(allowOnArrayType && desugared->isArrayType())) {
S.Diag(nullabilityLoc, diag::err_nullability_nonpointer)
<< DiagNullabilityKind(nullability, isContextSensitive) << type;
return true;
}
// For the context-sensitive keywords/Objective-C property
// attributes, require that the type be a single-level pointer.
if (isContextSensitive) {
// Make sure that the pointee isn't itself a pointer type.
const Type *pointeeType = nullptr;
if (desugared->isArrayType())
pointeeType = desugared->getArrayElementTypeNoTypeQual();
else if (desugared->isAnyPointerType())
pointeeType = desugared->getPointeeType().getTypePtr();
if (pointeeType && (pointeeType->isAnyPointerType() ||
pointeeType->isObjCObjectPointerType() ||
pointeeType->isMemberPointerType())) {
S.Diag(nullabilityLoc, diag::err_nullability_cs_multilevel)
<< DiagNullabilityKind(nullability, true)
<< type;
S.Diag(nullabilityLoc, diag::note_nullability_type_specifier)
<< DiagNullabilityKind(nullability, false)
<< type
<< FixItHint::CreateReplacement(nullabilityLoc,
getNullabilitySpelling(nullability));
return true;
}
}
// Form the attributed type.
type = state.getAttributedType(
createNullabilityAttr(S.Context, attr, nullability), type, type);
return false;
}
/// Check the application of the Objective-C '__kindof' qualifier to
/// the given type.
static bool checkObjCKindOfType(TypeProcessingState &state, QualType &type,
ParsedAttr &attr) {
Sema &S = state.getSema();
if (isa<ObjCTypeParamType>(type)) {
// Build the attributed type to record where __kindof occurred.
type = state.getAttributedType(
createSimpleAttr<ObjCKindOfAttr>(S.Context, attr), type, type);
return false;
}
// Find out if it's an Objective-C object or object pointer type;
const ObjCObjectPointerType *ptrType = type->getAs<ObjCObjectPointerType>();
const ObjCObjectType *objType = ptrType ? ptrType->getObjectType()
: type->getAs<ObjCObjectType>();
// If not, we can't apply __kindof.
if (!objType) {
// FIXME: Handle dependent types that aren't yet object types.
S.Diag(attr.getLoc(), diag::err_objc_kindof_nonobject)
<< type;
return true;
}
// Rebuild the "equivalent" type, which pushes __kindof down into
// the object type.
// There is no need to apply kindof on an unqualified id type.
QualType equivType = S.Context.getObjCObjectType(
objType->getBaseType(), objType->getTypeArgsAsWritten(),
objType->getProtocols(),
/*isKindOf=*/objType->isObjCUnqualifiedId() ? false : true);
// If we started with an object pointer type, rebuild it.
if (ptrType) {
equivType = S.Context.getObjCObjectPointerType(equivType);
if (auto nullability = type->getNullability(S.Context)) {
// We create a nullability attribute from the __kindof attribute.
// Make sure that will make sense.
assert(attr.getAttributeSpellingListIndex() == 0 &&
"multiple spellings for __kindof?");
Attr *A = createNullabilityAttr(S.Context, attr, *nullability);
A->setImplicit(true);
equivType = state.getAttributedType(A, equivType, equivType);
}
}
// Build the attributed type to record where __kindof occurred.
type = state.getAttributedType(
createSimpleAttr<ObjCKindOfAttr>(S.Context, attr), type, equivType);
return false;
}
/// Distribute a nullability type attribute that cannot be applied to
/// the type specifier to a pointer, block pointer, or member pointer
/// declarator, complaining if necessary.
///
/// \returns true if the nullability annotation was distributed, false
/// otherwise.
static bool distributeNullabilityTypeAttr(TypeProcessingState &state,
QualType type, ParsedAttr &attr) {
Declarator &declarator = state.getDeclarator();
/// Attempt to move the attribute to the specified chunk.
auto moveToChunk = [&](DeclaratorChunk &chunk, bool inFunction) -> bool {
// If there is already a nullability attribute there, don't add
// one.
if (hasNullabilityAttr(chunk.getAttrs()))
return false;
// Complain about the nullability qualifier being in the wrong
// place.
enum {
PK_Pointer,
PK_BlockPointer,
PK_MemberPointer,
PK_FunctionPointer,
PK_MemberFunctionPointer,
} pointerKind
= chunk.Kind == DeclaratorChunk::Pointer ? (inFunction ? PK_FunctionPointer
: PK_Pointer)
: chunk.Kind == DeclaratorChunk::BlockPointer ? PK_BlockPointer
: inFunction? PK_MemberFunctionPointer : PK_MemberPointer;
auto diag = state.getSema().Diag(attr.getLoc(),
diag::warn_nullability_declspec)
<< DiagNullabilityKind(mapNullabilityAttrKind(attr.getKind()),
attr.isContextSensitiveKeywordAttribute())
<< type
<< static_cast<unsigned>(pointerKind);
// FIXME: MemberPointer chunks don't carry the location of the *.
if (chunk.Kind != DeclaratorChunk::MemberPointer) {
diag << FixItHint::CreateRemoval(attr.getLoc())
<< FixItHint::CreateInsertion(
state.getSema().getPreprocessor().getLocForEndOfToken(
chunk.Loc),
" " + attr.getAttrName()->getName().str() + " ");
}
moveAttrFromListToList(attr, state.getCurrentAttributes(),
chunk.getAttrs());
return true;
};
// Move it to the outermost pointer, member pointer, or block
// pointer declarator.
for (unsigned i = state.getCurrentChunkIndex(); i != 0; --i) {
DeclaratorChunk &chunk = declarator.getTypeObject(i-1);
switch (chunk.Kind) {
case DeclaratorChunk::Pointer:
case DeclaratorChunk::BlockPointer:
case DeclaratorChunk::MemberPointer:
return moveToChunk(chunk, false);
case DeclaratorChunk::Paren:
case DeclaratorChunk::Array:
continue;
case DeclaratorChunk::Function:
// Try to move past the return type to a function/block/member
// function pointer.
if (DeclaratorChunk *dest = maybeMovePastReturnType(
declarator, i,
/*onlyBlockPointers=*/false)) {
return moveToChunk(*dest, true);
}
return false;
// Don't walk through these.
case DeclaratorChunk::Reference:
case DeclaratorChunk::Pipe:
return false;
}
}
return false;
}
static Attr *getCCTypeAttr(ASTContext &Ctx, ParsedAttr &Attr) {
assert(!Attr.isInvalid());
switch (Attr.getKind()) {
default:
llvm_unreachable("not a calling convention attribute");
case ParsedAttr::AT_CDecl:
return createSimpleAttr<CDeclAttr>(Ctx, Attr);
case ParsedAttr::AT_FastCall:
return createSimpleAttr<FastCallAttr>(Ctx, Attr);
case ParsedAttr::AT_StdCall:
return createSimpleAttr<StdCallAttr>(Ctx, Attr);
case ParsedAttr::AT_ThisCall:
return createSimpleAttr<ThisCallAttr>(Ctx, Attr);
case ParsedAttr::AT_RegCall:
return createSimpleAttr<RegCallAttr>(Ctx, Attr);
case ParsedAttr::AT_Pascal:
return createSimpleAttr<PascalAttr>(Ctx, Attr);
case ParsedAttr::AT_SwiftCall:
return createSimpleAttr<SwiftCallAttr>(Ctx, Attr);
case ParsedAttr::AT_SwiftAsyncCall:
return createSimpleAttr<SwiftAsyncCallAttr>(Ctx, Attr);
case ParsedAttr::AT_VectorCall:
return createSimpleAttr<VectorCallAttr>(Ctx, Attr);
case ParsedAttr::AT_AArch64VectorPcs:
return createSimpleAttr<AArch64VectorPcsAttr>(Ctx, Attr);
case ParsedAttr::AT_AArch64SVEPcs:
return createSimpleAttr<AArch64SVEPcsAttr>(Ctx, Attr);
case ParsedAttr::AT_AMDGPUKernelCall:
return createSimpleAttr<AMDGPUKernelCallAttr>(Ctx, Attr);
case ParsedAttr::AT_Pcs: {
// The attribute may have had a fixit applied where we treated an
// identifier as a string literal. The contents of the string are valid,
// but the form may not be.
StringRef Str;
if (Attr.isArgExpr(0))
Str = cast<StringLiteral>(Attr.getArgAsExpr(0))->getString();
else
Str = Attr.getArgAsIdent(0)->Ident->getName();
PcsAttr::PCSType Type;
if (!PcsAttr::ConvertStrToPCSType(Str, Type))
llvm_unreachable("already validated the attribute");
return ::new (Ctx) PcsAttr(Ctx, Attr, Type);
}
case ParsedAttr::AT_IntelOclBicc:
return createSimpleAttr<IntelOclBiccAttr>(Ctx, Attr);
case ParsedAttr::AT_MSABI:
return createSimpleAttr<MSABIAttr>(Ctx, Attr);
case ParsedAttr::AT_SysVABI:
return createSimpleAttr<SysVABIAttr>(Ctx, Attr);
case ParsedAttr::AT_PreserveMost:
return createSimpleAttr<PreserveMostAttr>(Ctx, Attr);
case ParsedAttr::AT_PreserveAll:
return createSimpleAttr<PreserveAllAttr>(Ctx, Attr);
}
llvm_unreachable("unexpected attribute kind!");
}
/// Process an individual function attribute. Returns true to
/// indicate that the attribute was handled, false if it wasn't.
static bool handleFunctionTypeAttr(TypeProcessingState &state, ParsedAttr &attr,
QualType &type) {
Sema &S = state.getSema();
FunctionTypeUnwrapper unwrapped(S, type);
if (attr.getKind() == ParsedAttr::AT_NoReturn) {
if (S.CheckAttrNoArgs(attr))
return true;
// Delay if this is not a function type.
if (!unwrapped.isFunctionType())
return false;
// Otherwise we can process right away.
FunctionType::ExtInfo EI = unwrapped.get()->getExtInfo().withNoReturn(true);
type = unwrapped.wrap(S, S.Context.adjustFunctionType(unwrapped.get(), EI));
return true;
}
if (attr.getKind() == ParsedAttr::AT_CmseNSCall) {
// Delay if this is not a function type.
if (!unwrapped.isFunctionType())
return false;
// Ignore if we don't have CMSE enabled.
if (!S.getLangOpts().Cmse) {
S.Diag(attr.getLoc(), diag::warn_attribute_ignored) << attr;
attr.setInvalid();
return true;
}
// Otherwise we can process right away.
FunctionType::ExtInfo EI =
unwrapped.get()->getExtInfo().withCmseNSCall(true);
type = unwrapped.wrap(S, S.Context.adjustFunctionType(unwrapped.get(), EI));
return true;
}
// ns_returns_retained is not always a type attribute, but if we got
// here, we're treating it as one right now.
if (attr.getKind() == ParsedAttr::AT_NSReturnsRetained) {
if (attr.getNumArgs()) return true;
// Delay if this is not a function type.
if (!unwrapped.isFunctionType())
return false;
// Check whether the return type is reasonable.
if (S.checkNSReturnsRetainedReturnType(attr.getLoc(),
unwrapped.get()->getReturnType()))
return true;
// Only actually change the underlying type in ARC builds.
QualType origType = type;
if (state.getSema().getLangOpts().ObjCAutoRefCount) {
FunctionType::ExtInfo EI
= unwrapped.get()->getExtInfo().withProducesResult(true);
type = unwrapped.wrap(S, S.Context.adjustFunctionType(unwrapped.get(), EI));
}
type = state.getAttributedType(
createSimpleAttr<NSReturnsRetainedAttr>(S.Context, attr),
origType, type);
return true;
}
if (attr.getKind() == ParsedAttr::AT_AnyX86NoCallerSavedRegisters) {
if (S.CheckAttrTarget(attr) || S.CheckAttrNoArgs(attr))
return true;
// Delay if this is not a function type.
if (!unwrapped.isFunctionType())
return false;
FunctionType::ExtInfo EI =
unwrapped.get()->getExtInfo().withNoCallerSavedRegs(true);
type = unwrapped.wrap(S, S.Context.adjustFunctionType(unwrapped.get(), EI));
return true;
}
if (attr.getKind() == ParsedAttr::AT_AnyX86NoCfCheck) {
if (!S.getLangOpts().CFProtectionBranch) {
S.Diag(attr.getLoc(), diag::warn_nocf_check_attribute_ignored);
attr.setInvalid();
return true;
}
if (S.CheckAttrTarget(attr) || S.CheckAttrNoArgs(attr))
return true;
// If this is not a function type, warning will be asserted by subject
// check.
if (!unwrapped.isFunctionType())
return true;
FunctionType::ExtInfo EI =
unwrapped.get()->getExtInfo().withNoCfCheck(true);
type = unwrapped.wrap(S, S.Context.adjustFunctionType(unwrapped.get(), EI));
return true;
}
if (attr.getKind() == ParsedAttr::AT_Regparm) {
unsigned value;
if (S.CheckRegparmAttr(attr, value))
return true;
// Delay if this is not a function type.
if (!unwrapped.isFunctionType())
return false;
// Diagnose regparm with fastcall.
const FunctionType *fn = unwrapped.get();
CallingConv CC = fn->getCallConv();
if (CC == CC_X86FastCall) {
S.Diag(attr.getLoc(), diag::err_attributes_are_not_compatible)
<< FunctionType::getNameForCallConv(CC)
<< "regparm";
attr.setInvalid();
return true;
}
FunctionType::ExtInfo EI =
unwrapped.get()->getExtInfo().withRegParm(value);
type = unwrapped.wrap(S, S.Context.adjustFunctionType(unwrapped.get(), EI));
return true;
}
if (attr.getKind() == ParsedAttr::AT_NoThrow) {
// Delay if this is not a function type.
if (!unwrapped.isFunctionType())
return false;
if (S.CheckAttrNoArgs(attr)) {
attr.setInvalid();
return true;
}
// Otherwise we can process right away.
auto *Proto = unwrapped.get()->castAs<FunctionProtoType>();
// MSVC ignores nothrow if it is in conflict with an explicit exception
// specification.
if (Proto->hasExceptionSpec()) {
switch (Proto->getExceptionSpecType()) {
case EST_None:
llvm_unreachable("This doesn't have an exception spec!");
case EST_DynamicNone:
case EST_BasicNoexcept:
case EST_NoexceptTrue:
case EST_NoThrow:
// Exception spec doesn't conflict with nothrow, so don't warn.
LLVM_FALLTHROUGH;
case EST_Unparsed:
case EST_Uninstantiated:
case EST_DependentNoexcept:
case EST_Unevaluated:
// We don't have enough information to properly determine if there is a
// conflict, so suppress the warning.
break;
case EST_Dynamic:
case EST_MSAny:
case EST_NoexceptFalse:
S.Diag(attr.getLoc(), diag::warn_nothrow_attribute_ignored);
break;
}
return true;
}
type = unwrapped.wrap(
S, S.Context
.getFunctionTypeWithExceptionSpec(
QualType{Proto, 0},
FunctionProtoType::ExceptionSpecInfo{EST_NoThrow})
->getAs<FunctionType>());
return true;
}
// Delay if the type didn't work out to a function.
if (!unwrapped.isFunctionType()) return false;
// Otherwise, a calling convention.
CallingConv CC;
if (S.CheckCallingConvAttr(attr, CC))
return true;
const FunctionType *fn = unwrapped.get();
CallingConv CCOld = fn->getCallConv();
Attr *CCAttr = getCCTypeAttr(S.Context, attr);
if (CCOld != CC) {
// Error out on when there's already an attribute on the type
// and the CCs don't match.
if (S.getCallingConvAttributedType(type)) {
S.Diag(attr.getLoc(), diag::err_attributes_are_not_compatible)
<< FunctionType::getNameForCallConv(CC)
<< FunctionType::getNameForCallConv(CCOld);
attr.setInvalid();
return true;
}
}
// Diagnose use of variadic functions with calling conventions that
// don't support them (e.g. because they're callee-cleanup).
// We delay warning about this on unprototyped function declarations
// until after redeclaration checking, just in case we pick up a
// prototype that way. And apparently we also "delay" warning about
// unprototyped function types in general, despite not necessarily having
// much ability to diagnose it later.
if (!supportsVariadicCall(CC)) {
const FunctionProtoType *FnP = dyn_cast<FunctionProtoType>(fn);
if (FnP && FnP->isVariadic()) {
// stdcall and fastcall are ignored with a warning for GCC and MS
// compatibility.
if (CC == CC_X86StdCall || CC == CC_X86FastCall)
return S.Diag(attr.getLoc(), diag::warn_cconv_unsupported)
<< FunctionType::getNameForCallConv(CC)
<< (int)Sema::CallingConventionIgnoredReason::VariadicFunction;
attr.setInvalid();
return S.Diag(attr.getLoc(), diag::err_cconv_varargs)
<< FunctionType::getNameForCallConv(CC);
}
}
// Also diagnose fastcall with regparm.
if (CC == CC_X86FastCall && fn->getHasRegParm()) {
S.Diag(attr.getLoc(), diag::err_attributes_are_not_compatible)
<< "regparm" << FunctionType::getNameForCallConv(CC_X86FastCall);
attr.setInvalid();
return true;
}
// Modify the CC from the wrapped function type, wrap it all back, and then
// wrap the whole thing in an AttributedType as written. The modified type
// might have a different CC if we ignored the attribute.
QualType Equivalent;
if (CCOld == CC) {
Equivalent = type;
} else {
auto EI = unwrapped.get()->getExtInfo().withCallingConv(CC);
Equivalent =
unwrapped.wrap(S, S.Context.adjustFunctionType(unwrapped.get(), EI));
}
type = state.getAttributedType(CCAttr, type, Equivalent);
return true;
}
bool Sema::hasExplicitCallingConv(QualType T) {
const AttributedType *AT;
// Stop if we'd be stripping off a typedef sugar node to reach the
// AttributedType.
while ((AT = T->getAs<AttributedType>()) &&
AT->getAs<TypedefType>() == T->getAs<TypedefType>()) {
if (AT->isCallingConv())
return true;
T = AT->getModifiedType();
}
return false;
}
void Sema::adjustMemberFunctionCC(QualType &T, bool IsStatic, bool IsCtorOrDtor,
SourceLocation Loc) {
FunctionTypeUnwrapper Unwrapped(*this, T);
const FunctionType *FT = Unwrapped.get();
bool IsVariadic = (isa<FunctionProtoType>(FT) &&
cast<FunctionProtoType>(FT)->isVariadic());
CallingConv CurCC = FT->getCallConv();
CallingConv ToCC = Context.getDefaultCallingConvention(IsVariadic, !IsStatic);
if (CurCC == ToCC)
return;
// MS compiler ignores explicit calling convention attributes on structors. We
// should do the same.
if (Context.getTargetInfo().getCXXABI().isMicrosoft() && IsCtorOrDtor) {
// Issue a warning on ignored calling convention -- except of __stdcall.
// Again, this is what MS compiler does.
if (CurCC != CC_X86StdCall)
Diag(Loc, diag::warn_cconv_unsupported)
<< FunctionType::getNameForCallConv(CurCC)
<< (int)Sema::CallingConventionIgnoredReason::ConstructorDestructor;
// Default adjustment.
} else {
// Only adjust types with the default convention. For example, on Windows
// we should adjust a __cdecl type to __thiscall for instance methods, and a
// __thiscall type to __cdecl for static methods.
CallingConv DefaultCC =
Context.getDefaultCallingConvention(IsVariadic, IsStatic);
if (CurCC != DefaultCC || DefaultCC == ToCC)
return;
if (hasExplicitCallingConv(T))
return;
}
FT = Context.adjustFunctionType(FT, FT->getExtInfo().withCallingConv(ToCC));
QualType Wrapped = Unwrapped.wrap(*this, FT);
T = Context.getAdjustedType(T, Wrapped);
}
/// HandleVectorSizeAttribute - this attribute is only applicable to integral
/// and float scalars, although arrays, pointers, and function return values are
/// allowed in conjunction with this construct. Aggregates with this attribute
/// are invalid, even if they are of the same size as a corresponding scalar.
/// The raw attribute should contain precisely 1 argument, the vector size for
/// the variable, measured in bytes. If curType and rawAttr are well formed,
/// this routine will return a new vector type.
static void HandleVectorSizeAttr(QualType &CurType, const ParsedAttr &Attr,
Sema &S) {
// Check the attribute arguments.
if (Attr.getNumArgs() != 1) {
S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments) << Attr
<< 1;
Attr.setInvalid();
return;
}
Expr *SizeExpr = Attr.getArgAsExpr(0);
QualType T = S.BuildVectorType(CurType, SizeExpr, Attr.getLoc());
if (!T.isNull())
CurType = T;
else
Attr.setInvalid();
}
/// Process the OpenCL-like ext_vector_type attribute when it occurs on
/// a type.
static void HandleExtVectorTypeAttr(QualType &CurType, const ParsedAttr &Attr,
Sema &S) {
// check the attribute arguments.
if (Attr.getNumArgs() != 1) {
S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments) << Attr
<< 1;
return;
}
Expr *SizeExpr = Attr.getArgAsExpr(0);
QualType T = S.BuildExtVectorType(CurType, SizeExpr, Attr.getLoc());
if (!T.isNull())
CurType = T;
}
static bool isPermittedNeonBaseType(QualType &Ty,
VectorType::VectorKind VecKind, Sema &S) {
const BuiltinType *BTy = Ty->getAs<BuiltinType>();
if (!BTy)
return false;
llvm::Triple Triple = S.Context.getTargetInfo().getTriple();
// Signed poly is mathematically wrong, but has been baked into some ABIs by
// now.
bool IsPolyUnsigned = Triple.getArch() == llvm::Triple::aarch64 ||
Triple.getArch() == llvm::Triple::aarch64_32 ||
Triple.getArch() == llvm::Triple::aarch64_be;
if (VecKind == VectorType::NeonPolyVector) {
if (IsPolyUnsigned) {
// AArch64 polynomial vectors are unsigned.
return BTy->getKind() == BuiltinType::UChar ||
BTy->getKind() == BuiltinType::UShort ||
BTy->getKind() == BuiltinType::ULong ||
BTy->getKind() == BuiltinType::ULongLong;
} else {
// AArch32 polynomial vectors are signed.
return BTy->getKind() == BuiltinType::SChar ||
BTy->getKind() == BuiltinType::Short ||
BTy->getKind() == BuiltinType::LongLong;
}
}
// Non-polynomial vector types: the usual suspects are allowed, as well as
// float64_t on AArch64.
if ((Triple.isArch64Bit() || Triple.getArch() == llvm::Triple::aarch64_32) &&
BTy->getKind() == BuiltinType::Double)
return true;
return BTy->getKind() == BuiltinType::SChar ||
BTy->getKind() == BuiltinType::UChar ||
BTy->getKind() == BuiltinType::Short ||
BTy->getKind() == BuiltinType::UShort ||
BTy->getKind() == BuiltinType::Int ||
BTy->getKind() == BuiltinType::UInt ||
BTy->getKind() == BuiltinType::Long ||
BTy->getKind() == BuiltinType::ULong ||
BTy->getKind() == BuiltinType::LongLong ||
BTy->getKind() == BuiltinType::ULongLong ||
BTy->getKind() == BuiltinType::Float ||
BTy->getKind() == BuiltinType::Half ||
BTy->getKind() == BuiltinType::BFloat16;
}
static bool verifyValidIntegerConstantExpr(Sema &S, const ParsedAttr &Attr,
llvm::APSInt &Result) {
const auto *AttrExpr = Attr.getArgAsExpr(0);
if (!AttrExpr->isTypeDependent()) {
if (Optional<llvm::APSInt> Res =
AttrExpr->getIntegerConstantExpr(S.Context)) {
Result = *Res;
return true;
}
}
S.Diag(Attr.getLoc(), diag::err_attribute_argument_type)
<< Attr << AANT_ArgumentIntegerConstant << AttrExpr->getSourceRange();
Attr.setInvalid();
return false;
}
/// HandleNeonVectorTypeAttr - The "neon_vector_type" and
/// "neon_polyvector_type" attributes are used to create vector types that
/// are mangled according to ARM's ABI. Otherwise, these types are identical
/// to those created with the "vector_size" attribute. Unlike "vector_size"
/// the argument to these Neon attributes is the number of vector elements,
/// not the vector size in bytes. The vector width and element type must
/// match one of the standard Neon vector types.
static void HandleNeonVectorTypeAttr(QualType &CurType, const ParsedAttr &Attr,
Sema &S, VectorType::VectorKind VecKind) {
// Target must have NEON (or MVE, whose vectors are similar enough
// not to need a separate attribute)
if (!S.Context.getTargetInfo().hasFeature("neon") &&
!S.Context.getTargetInfo().hasFeature("mve")) {
S.Diag(Attr.getLoc(), diag::err_attribute_unsupported)
<< Attr << "'neon' or 'mve'";
Attr.setInvalid();
return;
}
// Check the attribute arguments.
if (Attr.getNumArgs() != 1) {
S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments) << Attr
<< 1;
Attr.setInvalid();
return;
}
// The number of elements must be an ICE.
llvm::APSInt numEltsInt(32);
if (!verifyValidIntegerConstantExpr(S, Attr, numEltsInt))
return;
// Only certain element types are supported for Neon vectors.
if (!isPermittedNeonBaseType(CurType, VecKind, S)) {
S.Diag(Attr.getLoc(), diag::err_attribute_invalid_vector_type) << CurType;
Attr.setInvalid();
return;
}
// The total size of the vector must be 64 or 128 bits.
unsigned typeSize = static_cast<unsigned>(S.Context.getTypeSize(CurType));
unsigned numElts = static_cast<unsigned>(numEltsInt.getZExtValue());
unsigned vecSize = typeSize * numElts;
if (vecSize != 64 && vecSize != 128) {
S.Diag(Attr.getLoc(), diag::err_attribute_bad_neon_vector_size) << CurType;
Attr.setInvalid();
return;
}
CurType = S.Context.getVectorType(CurType, numElts, VecKind);
}
/// HandleArmSveVectorBitsTypeAttr - The "arm_sve_vector_bits" attribute is
/// used to create fixed-length versions of sizeless SVE types defined by
/// the ACLE, such as svint32_t and svbool_t.
static void HandleArmSveVectorBitsTypeAttr(QualType &CurType, ParsedAttr &Attr,
Sema &S) {
// Target must have SVE.
if (!S.Context.getTargetInfo().hasFeature("sve")) {
S.Diag(Attr.getLoc(), diag::err_attribute_unsupported) << Attr << "'sve'";
Attr.setInvalid();
return;
}
// Attribute is unsupported if '-msve-vector-bits=<bits>' isn't specified, or
// if <bits>+ syntax is used.
if (!S.getLangOpts().VScaleMin ||
S.getLangOpts().VScaleMin != S.getLangOpts().VScaleMax) {
S.Diag(Attr.getLoc(), diag::err_attribute_arm_feature_sve_bits_unsupported)
<< Attr;
Attr.setInvalid();
return;
}
// Check the attribute arguments.
if (Attr.getNumArgs() != 1) {
S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments)
<< Attr << 1;
Attr.setInvalid();
return;
}
// The vector size must be an integer constant expression.
llvm::APSInt SveVectorSizeInBits(32);
if (!verifyValidIntegerConstantExpr(S, Attr, SveVectorSizeInBits))
return;
unsigned VecSize = static_cast<unsigned>(SveVectorSizeInBits.getZExtValue());
// The attribute vector size must match -msve-vector-bits.
if (VecSize != S.getLangOpts().VScaleMin * 128) {
S.Diag(Attr.getLoc(), diag::err_attribute_bad_sve_vector_size)
<< VecSize << S.getLangOpts().VScaleMin * 128;
Attr.setInvalid();
return;
}
// Attribute can only be attached to a single SVE vector or predicate type.
if (!CurType->isVLSTBuiltinType()) {
S.Diag(Attr.getLoc(), diag::err_attribute_invalid_sve_type)
<< Attr << CurType;
Attr.setInvalid();
return;
}
const auto *BT = CurType->castAs<BuiltinType>();
QualType EltType = CurType->getSveEltType(S.Context);
unsigned TypeSize = S.Context.getTypeSize(EltType);
VectorType::VectorKind VecKind = VectorType::SveFixedLengthDataVector;
if (BT->getKind() == BuiltinType::SveBool) {
// Predicates are represented as i8.
VecSize /= S.Context.getCharWidth() * S.Context.getCharWidth();
VecKind = VectorType::SveFixedLengthPredicateVector;
} else
VecSize /= TypeSize;
CurType = S.Context.getVectorType(EltType, VecSize, VecKind);
}
static void HandleArmMveStrictPolymorphismAttr(TypeProcessingState &State,
QualType &CurType,
ParsedAttr &Attr) {
const VectorType *VT = dyn_cast<VectorType>(CurType);
if (!VT || VT->getVectorKind() != VectorType::NeonVector) {
State.getSema().Diag(Attr.getLoc(),
diag::err_attribute_arm_mve_polymorphism);
Attr.setInvalid();
return;
}
CurType =
State.getAttributedType(createSimpleAttr<ArmMveStrictPolymorphismAttr>(
State.getSema().Context, Attr),
CurType, CurType);
}
/// Handle OpenCL Access Qualifier Attribute.
static void HandleOpenCLAccessAttr(QualType &CurType, const ParsedAttr &Attr,
Sema &S) {
// OpenCL v2.0 s6.6 - Access qualifier can be used only for image and pipe type.
if (!(CurType->isImageType() || CurType->isPipeType())) {
S.Diag(Attr.getLoc(), diag::err_opencl_invalid_access_qualifier);
Attr.setInvalid();
return;
}
if (const TypedefType* TypedefTy = CurType->getAs<TypedefType>()) {
QualType BaseTy = TypedefTy->desugar();
std::string PrevAccessQual;
if (BaseTy->isPipeType()) {
if (TypedefTy->getDecl()->hasAttr<OpenCLAccessAttr>()) {
OpenCLAccessAttr *Attr =
TypedefTy->getDecl()->getAttr<OpenCLAccessAttr>();
PrevAccessQual = Attr->getSpelling();
} else {
PrevAccessQual = "read_only";
}
} else if (const BuiltinType* ImgType = BaseTy->getAs<BuiltinType>()) {
switch (ImgType->getKind()) {
#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
case BuiltinType::Id: \
PrevAccessQual = #Access; \
break;
#include "clang/Basic/OpenCLImageTypes.def"
default:
llvm_unreachable("Unable to find corresponding image type.");
}
} else {
llvm_unreachable("unexpected type");
}
StringRef AttrName = Attr.getAttrName()->getName();
if (PrevAccessQual == AttrName.ltrim("_")) {
// Duplicated qualifiers
S.Diag(Attr.getLoc(), diag::warn_duplicate_declspec)
<< AttrName << Attr.getRange();
} else {
// Contradicting qualifiers
S.Diag(Attr.getLoc(), diag::err_opencl_multiple_access_qualifiers);
}
S.Diag(TypedefTy->getDecl()->getBeginLoc(),
diag::note_opencl_typedef_access_qualifier) << PrevAccessQual;
} else if (CurType->isPipeType()) {
if (Attr.getSemanticSpelling() == OpenCLAccessAttr::Keyword_write_only) {
QualType ElemType = CurType->castAs<PipeType>()->getElementType();
CurType = S.Context.getWritePipeType(ElemType);
}
}
}
/// HandleMatrixTypeAttr - "matrix_type" attribute, like ext_vector_type
static void HandleMatrixTypeAttr(QualType &CurType, const ParsedAttr &Attr,
Sema &S) {
if (!S.getLangOpts().MatrixTypes) {
S.Diag(Attr.getLoc(), diag::err_builtin_matrix_disabled);
return;
}
if (Attr.getNumArgs() != 2) {
S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments)
<< Attr << 2;
return;
}
Expr *RowsExpr = Attr.getArgAsExpr(0);
Expr *ColsExpr = Attr.getArgAsExpr(1);
QualType T = S.BuildMatrixType(CurType, RowsExpr, ColsExpr, Attr.getLoc());
if (!T.isNull())
CurType = T;
}
static void HandleAnnotateTypeAttr(TypeProcessingState &State,
QualType &CurType, const ParsedAttr &PA) {
Sema &S = State.getSema();
if (PA.getNumArgs() < 1) {
S.Diag(PA.getLoc(), diag::err_attribute_too_few_arguments) << PA << 1;
return;
}
// Make sure that there is a string literal as the annotation's first
// argument.
StringRef Str;
if (!S.checkStringLiteralArgumentAttr(PA, 0, Str))
return;
llvm::SmallVector<Expr *, 4> Args;
Args.reserve(PA.getNumArgs() - 1);
for (unsigned Idx = 1; Idx < PA.getNumArgs(); Idx++) {
assert(!PA.isArgIdent(Idx));
Args.push_back(PA.getArgAsExpr(Idx));
}
if (!S.ConstantFoldAttrArgs(PA, Args))
return;
auto *AnnotateTypeAttr =
AnnotateTypeAttr::Create(S.Context, Str, Args.data(), Args.size(), PA);
CurType = State.getAttributedType(AnnotateTypeAttr, CurType, CurType);
}
static void HandleLifetimeBoundAttr(TypeProcessingState &State,
QualType &CurType,
ParsedAttr &Attr) {
if (State.getDeclarator().isDeclarationOfFunction()) {
CurType = State.getAttributedType(
createSimpleAttr<LifetimeBoundAttr>(State.getSema().Context, Attr),
CurType, CurType);
}
}
static void processTypeAttrs(TypeProcessingState &state, QualType &type,
TypeAttrLocation TAL,
const ParsedAttributesView &attrs) {
state.setParsedNoDeref(false);
if (attrs.empty())
return;
// Scan through and apply attributes to this type where it makes sense. Some
// attributes (such as __address_space__, __vector_size__, etc) apply to the
// type, but others can be present in the type specifiers even though they
// apply to the decl. Here we apply type attributes and ignore the rest.
// This loop modifies the list pretty frequently, but we still need to make
// sure we visit every element once. Copy the attributes list, and iterate
// over that.
ParsedAttributesView AttrsCopy{attrs};
for (ParsedAttr &attr : AttrsCopy) {
// Skip attributes that were marked to be invalid.
if (attr.isInvalid())
continue;
if (attr.isStandardAttributeSyntax()) {
// [[gnu::...]] attributes are treated as declaration attributes, so may
// not appertain to a DeclaratorChunk. If we handle them as type
// attributes, accept them in that position and diagnose the GCC
// incompatibility.
if (attr.isGNUScope()) {
bool IsTypeAttr = attr.isTypeAttr();
if (TAL == TAL_DeclChunk) {
state.getSema().Diag(attr.getLoc(),
IsTypeAttr
? diag::warn_gcc_ignores_type_attr
: diag::warn_cxx11_gnu_attribute_on_type)
<< attr;
if (!IsTypeAttr)
continue;
}
} else if (TAL != TAL_DeclSpec && TAL != TAL_DeclChunk &&
!attr.isTypeAttr()) {
// Otherwise, only consider type processing for a C++11 attribute if
// - it has actually been applied to a type (decl-specifier-seq or
// declarator chunk), or
// - it is a type attribute, irrespective of where it was applied (so
// that we can support the legacy behavior of some type attributes
// that can be applied to the declaration name).
continue;
}
}
// If this is an attribute we can handle, do so now,
// otherwise, add it to the FnAttrs list for rechaining.
switch (attr.getKind()) {
default:
// A [[]] attribute on a declarator chunk must appertain to a type.
if (attr.isStandardAttributeSyntax() && TAL == TAL_DeclChunk) {
state.getSema().Diag(attr.getLoc(), diag::err_attribute_not_type_attr)
<< attr;
attr.setUsedAsTypeAttr();
}
break;
case ParsedAttr::UnknownAttribute:
if (attr.isStandardAttributeSyntax()) {
state.getSema().Diag(attr.getLoc(),
diag::warn_unknown_attribute_ignored)
<< attr << attr.getRange();
// Mark the attribute as invalid so we don't emit the same diagnostic
// multiple times.
attr.setInvalid();
}
break;
case ParsedAttr::IgnoredAttribute:
break;
case ParsedAttr::AT_BTFTypeTag:
HandleBTFTypeTagAttribute(type, attr, state);
attr.setUsedAsTypeAttr();
break;
case ParsedAttr::AT_MayAlias:
// FIXME: This attribute needs to actually be handled, but if we ignore
// it it breaks large amounts of Linux software.
attr.setUsedAsTypeAttr();
break;
case ParsedAttr::AT_OpenCLPrivateAddressSpace:
case ParsedAttr::AT_OpenCLGlobalAddressSpace:
case ParsedAttr::AT_OpenCLGlobalDeviceAddressSpace:
case ParsedAttr::AT_OpenCLGlobalHostAddressSpace:
case ParsedAttr::AT_OpenCLLocalAddressSpace:
case ParsedAttr::AT_OpenCLConstantAddressSpace:
case ParsedAttr::AT_OpenCLGenericAddressSpace:
case ParsedAttr::AT_AddressSpace:
HandleAddressSpaceTypeAttribute(type, attr, state);
attr.setUsedAsTypeAttr();
break;
OBJC_POINTER_TYPE_ATTRS_CASELIST:
if (!handleObjCPointerTypeAttr(state, attr, type))
distributeObjCPointerTypeAttr(state, attr, type);
attr.setUsedAsTypeAttr();
break;
case ParsedAttr::AT_VectorSize:
HandleVectorSizeAttr(type, attr, state.getSema());
attr.setUsedAsTypeAttr();
break;
case ParsedAttr::AT_ExtVectorType:
HandleExtVectorTypeAttr(type, attr, state.getSema());
attr.setUsedAsTypeAttr();
break;
case ParsedAttr::AT_NeonVectorType:
HandleNeonVectorTypeAttr(type, attr, state.getSema(),
VectorType::NeonVector);
attr.setUsedAsTypeAttr();
break;
case ParsedAttr::AT_NeonPolyVectorType:
HandleNeonVectorTypeAttr(type, attr, state.getSema(),
VectorType::NeonPolyVector);
attr.setUsedAsTypeAttr();
break;
case ParsedAttr::AT_ArmSveVectorBits:
HandleArmSveVectorBitsTypeAttr(type, attr, state.getSema());
attr.setUsedAsTypeAttr();
break;
case ParsedAttr::AT_ArmMveStrictPolymorphism: {
HandleArmMveStrictPolymorphismAttr(state, type, attr);
attr.setUsedAsTypeAttr();
break;
}
case ParsedAttr::AT_OpenCLAccess:
HandleOpenCLAccessAttr(type, attr, state.getSema());
attr.setUsedAsTypeAttr();
break;
case ParsedAttr::AT_LifetimeBound:
if (TAL == TAL_DeclChunk)
HandleLifetimeBoundAttr(state, type, attr);
break;
case ParsedAttr::AT_NoDeref: {
// FIXME: `noderef` currently doesn't work correctly in [[]] syntax.
// See https://github.com/llvm/llvm-project/issues/55790 for details.
// For the time being, we simply emit a warning that the attribute is
// ignored.
if (attr.isStandardAttributeSyntax()) {
state.getSema().Diag(attr.getLoc(), diag::warn_attribute_ignored)
<< attr;
break;
}
ASTContext &Ctx = state.getSema().Context;
type = state.getAttributedType(createSimpleAttr<NoDerefAttr>(Ctx, attr),
type, type);
attr.setUsedAsTypeAttr();
state.setParsedNoDeref(true);
break;
}
case ParsedAttr::AT_MatrixType:
HandleMatrixTypeAttr(type, attr, state.getSema());
attr.setUsedAsTypeAttr();
break;
MS_TYPE_ATTRS_CASELIST:
if (!handleMSPointerTypeQualifierAttr(state, attr, type))
attr.setUsedAsTypeAttr();
break;
NULLABILITY_TYPE_ATTRS_CASELIST:
// Either add nullability here or try to distribute it. We
// don't want to distribute the nullability specifier past any
// dependent type, because that complicates the user model.
if (type->canHaveNullability() || type->isDependentType() ||
type->isArrayType() ||
!distributeNullabilityTypeAttr(state, type, attr)) {
unsigned endIndex;
if (TAL == TAL_DeclChunk)
endIndex = state.getCurrentChunkIndex();
else
endIndex = state.getDeclarator().getNumTypeObjects();
bool allowOnArrayType =
state.getDeclarator().isPrototypeContext() &&
!hasOuterPointerLikeChunk(state.getDeclarator(), endIndex);
if (checkNullabilityTypeSpecifier(
state,
type,
attr,
allowOnArrayType)) {
attr.setInvalid();
}
attr.setUsedAsTypeAttr();
}
break;
case ParsedAttr::AT_ObjCKindOf:
// '__kindof' must be part of the decl-specifiers.
switch (TAL) {
case TAL_DeclSpec:
break;
case TAL_DeclChunk:
case TAL_DeclName:
state.getSema().Diag(attr.getLoc(),
diag::err_objc_kindof_wrong_position)
<< FixItHint::CreateRemoval(attr.getLoc())
<< FixItHint::CreateInsertion(
state.getDeclarator().getDeclSpec().getBeginLoc(),
"__kindof ");
break;
}
// Apply it regardless.
if (checkObjCKindOfType(state, type, attr))
attr.setInvalid();
break;
case ParsedAttr::AT_NoThrow:
// Exception Specifications aren't generally supported in C mode throughout
// clang, so revert to attribute-based handling for C.
if (!state.getSema().getLangOpts().CPlusPlus)
break;
LLVM_FALLTHROUGH;
FUNCTION_TYPE_ATTRS_CASELIST:
attr.setUsedAsTypeAttr();
// Attributes with standard syntax have strict rules for what they
// appertain to and hence should not use the "distribution" logic below.
if (attr.isStandardAttributeSyntax()) {
if (!handleFunctionTypeAttr(state, attr, type)) {
diagnoseBadTypeAttribute(state.getSema(), attr, type);
attr.setInvalid();
}
break;
}
// Never process function type attributes as part of the
// declaration-specifiers.
if (TAL == TAL_DeclSpec)
distributeFunctionTypeAttrFromDeclSpec(state, attr, type);
// Otherwise, handle the possible delays.
else if (!handleFunctionTypeAttr(state, attr, type))
distributeFunctionTypeAttr(state, attr, type);
break;
case ParsedAttr::AT_AcquireHandle: {
if (!type->isFunctionType())
return;
if (attr.getNumArgs() != 1) {
state.getSema().Diag(attr.getLoc(),
diag::err_attribute_wrong_number_arguments)
<< attr << 1;
attr.setInvalid();
return;
}
StringRef HandleType;
if (!state.getSema().checkStringLiteralArgumentAttr(attr, 0, HandleType))
return;
type = state.getAttributedType(
AcquireHandleAttr::Create(state.getSema().Context, HandleType, attr),
type, type);
attr.setUsedAsTypeAttr();
break;
}
case ParsedAttr::AT_AnnotateType: {
HandleAnnotateTypeAttr(state, type, attr);
attr.setUsedAsTypeAttr();
break;
}
}
// Handle attributes that are defined in a macro. We do not want this to be
// applied to ObjC builtin attributes.
if (isa<AttributedType>(type) && attr.hasMacroIdentifier() &&
!type.getQualifiers().hasObjCLifetime() &&
!type.getQualifiers().hasObjCGCAttr() &&
attr.getKind() != ParsedAttr::AT_ObjCGC &&
attr.getKind() != ParsedAttr::AT_ObjCOwnership) {
const IdentifierInfo *MacroII = attr.getMacroIdentifier();
type = state.getSema().Context.getMacroQualifiedType(type, MacroII);
state.setExpansionLocForMacroQualifiedType(
cast<MacroQualifiedType>(type.getTypePtr()),
attr.getMacroExpansionLoc());
}
}
}
void Sema::completeExprArrayBound(Expr *E) {
if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E->IgnoreParens())) {
if (VarDecl *Var = dyn_cast<VarDecl>(DRE->getDecl())) {
if (isTemplateInstantiation(Var->getTemplateSpecializationKind())) {
auto *Def = Var->getDefinition();
if (!Def) {
SourceLocation PointOfInstantiation = E->getExprLoc();
runWithSufficientStackSpace(PointOfInstantiation, [&] {
InstantiateVariableDefinition(PointOfInstantiation, Var);
});
Def = Var->getDefinition();
// If we don't already have a point of instantiation, and we managed
// to instantiate a definition, this is the point of instantiation.
// Otherwise, we don't request an end-of-TU instantiation, so this is
// not a point of instantiation.
// FIXME: Is this really the right behavior?
if (Var->getPointOfInstantiation().isInvalid() && Def) {
assert(Var->getTemplateSpecializationKind() ==
TSK_ImplicitInstantiation &&
"explicit instantiation with no point of instantiation");
Var->setTemplateSpecializationKind(
Var->getTemplateSpecializationKind(), PointOfInstantiation);
}
}
// Update the type to the definition's type both here and within the
// expression.
if (Def) {
DRE->setDecl(Def);
QualType T = Def->getType();
DRE->setType(T);
// FIXME: Update the type on all intervening expressions.
E->setType(T);
}
// We still go on to try to complete the type independently, as it
// may also require instantiations or diagnostics if it remains
// incomplete.
}
}
}
}
QualType Sema::getCompletedType(Expr *E) {
// Incomplete array types may be completed by the initializer attached to
// their definitions. For static data members of class templates and for
// variable templates, we need to instantiate the definition to get this
// initializer and complete the type.
if (E->getType()->isIncompleteArrayType())
completeExprArrayBound(E);
// FIXME: Are there other cases which require instantiating something other
// than the type to complete the type of an expression?
return E->getType();
}
/// Ensure that the type of the given expression is complete.
///
/// This routine checks whether the expression \p E has a complete type. If the
/// expression refers to an instantiable construct, that instantiation is
/// performed as needed to complete its type. Furthermore
/// Sema::RequireCompleteType is called for the expression's type (or in the
/// case of a reference type, the referred-to type).
///
/// \param E The expression whose type is required to be complete.
/// \param Kind Selects which completeness rules should be applied.
/// \param Diagnoser The object that will emit a diagnostic if the type is
/// incomplete.
///
/// \returns \c true if the type of \p E is incomplete and diagnosed, \c false
/// otherwise.
bool Sema::RequireCompleteExprType(Expr *E, CompleteTypeKind Kind,
TypeDiagnoser &Diagnoser) {
return RequireCompleteType(E->getExprLoc(), getCompletedType(E), Kind,
Diagnoser);
}
bool Sema::RequireCompleteExprType(Expr *E, unsigned DiagID) {
BoundTypeDiagnoser<> Diagnoser(DiagID);
return RequireCompleteExprType(E, CompleteTypeKind::Default, Diagnoser);
}
/// Ensure that the type T is a complete type.
///
/// This routine checks whether the type @p T is complete in any
/// context where a complete type is required. If @p T is a complete
/// type, returns false. If @p T is a class template specialization,
/// this routine then attempts to perform class template
/// instantiation. If instantiation fails, or if @p T is incomplete
/// and cannot be completed, issues the diagnostic @p diag (giving it
/// the type @p T) and returns true.
///
/// @param Loc The location in the source that the incomplete type
/// diagnostic should refer to.
///
/// @param T The type that this routine is examining for completeness.
///
/// @param Kind Selects which completeness rules should be applied.
///
/// @returns @c true if @p T is incomplete and a diagnostic was emitted,
/// @c false otherwise.
bool Sema::RequireCompleteType(SourceLocation Loc, QualType T,
CompleteTypeKind Kind,
TypeDiagnoser &Diagnoser) {
if (RequireCompleteTypeImpl(Loc, T, Kind, &Diagnoser))
return true;
if (const TagType *Tag = T->getAs<TagType>()) {
if (!Tag->getDecl()->isCompleteDefinitionRequired()) {
Tag->getDecl()->setCompleteDefinitionRequired();
Consumer.HandleTagDeclRequiredDefinition(Tag->getDecl());
}
}
return false;
}
bool Sema::hasStructuralCompatLayout(Decl *D, Decl *Suggested) {
llvm::DenseSet<std::pair<Decl *, Decl *>> NonEquivalentDecls;
if (!Suggested)
return false;
// FIXME: Add a specific mode for C11 6.2.7/1 in StructuralEquivalenceContext
// and isolate from other C++ specific checks.
StructuralEquivalenceContext Ctx(
D->getASTContext(), Suggested->getASTContext(), NonEquivalentDecls,
StructuralEquivalenceKind::Default,
false /*StrictTypeSpelling*/, true /*Complain*/,
true /*ErrorOnTagTypeMismatch*/);
return Ctx.IsEquivalent(D, Suggested);
}
bool Sema::hasAcceptableDefinition(NamedDecl *D, NamedDecl **Suggested,
AcceptableKind Kind, bool OnlyNeedComplete) {
// Easy case: if we don't have modules, all declarations are visible.
if (!getLangOpts().Modules && !getLangOpts().ModulesLocalVisibility)
return true;
// If this definition was instantiated from a template, map back to the
// pattern from which it was instantiated.
if (isa<TagDecl>(D) && cast<TagDecl>(D)->isBeingDefined()) {
// We're in the middle of defining it; this definition should be treated
// as visible.
return true;
} else if (auto *RD = dyn_cast<CXXRecordDecl>(D)) {
if (auto *Pattern = RD->getTemplateInstantiationPattern())
RD = Pattern;
D = RD->getDefinition();
} else if (auto *ED = dyn_cast<EnumDecl>(D)) {
if (auto *Pattern = ED->getTemplateInstantiationPattern())
ED = Pattern;
if (OnlyNeedComplete && (ED->isFixed() || getLangOpts().MSVCCompat)) {
// If the enum has a fixed underlying type, it may have been forward
// declared. In -fms-compatibility, `enum Foo;` will also forward declare
// the enum and assign it the underlying type of `int`. Since we're only
// looking for a complete type (not a definition), any visible declaration
// of it will do.
*Suggested = nullptr;
for (auto *Redecl : ED->redecls()) {
if (isAcceptable(Redecl, Kind))
return true;
if (Redecl->isThisDeclarationADefinition() ||
(Redecl->isCanonicalDecl() && !*Suggested))
*Suggested = Redecl;
}
return false;
}
D = ED->getDefinition();
} else if (auto *FD = dyn_cast<FunctionDecl>(D)) {
if (auto *Pattern = FD->getTemplateInstantiationPattern())
FD = Pattern;
D = FD->getDefinition();
} else if (auto *VD = dyn_cast<VarDecl>(D)) {
if (auto *Pattern = VD->getTemplateInstantiationPattern())
VD = Pattern;
D = VD->getDefinition();
}
assert(D && "missing definition for pattern of instantiated definition");
*Suggested = D;
auto DefinitionIsAcceptable = [&] {
// The (primary) definition might be in a visible module.
if (isAcceptable(D, Kind))
return true;
// A visible module might have a merged definition instead.
if (D->isModulePrivate() ? hasMergedDefinitionInCurrentModule(D)
: hasVisibleMergedDefinition(D)) {
if (CodeSynthesisContexts.empty() &&
!getLangOpts().ModulesLocalVisibility) {
// Cache the fact that this definition is implicitly visible because
// there is a visible merged definition.
D->setVisibleDespiteOwningModule();
}
return true;
}
return false;
};
if (DefinitionIsAcceptable())
return true;
// The external source may have additional definitions of this entity that are
// visible, so complete the redeclaration chain now and ask again.
if (auto *Source = Context.getExternalSource()) {
Source->CompleteRedeclChain(D);
return DefinitionIsAcceptable();
}
return false;
}
/// Determine whether there is any declaration of \p D that was ever a
/// definition (perhaps before module merging) and is currently visible.
/// \param D The definition of the entity.
/// \param Suggested Filled in with the declaration that should be made visible
/// in order to provide a definition of this entity.
/// \param OnlyNeedComplete If \c true, we only need the type to be complete,
/// not defined. This only matters for enums with a fixed underlying
/// type, since in all other cases, a type is complete if and only if it
/// is defined.
bool Sema::hasVisibleDefinition(NamedDecl *D, NamedDecl **Suggested,
bool OnlyNeedComplete) {
return hasAcceptableDefinition(D, Suggested, Sema::AcceptableKind::Visible,
OnlyNeedComplete);
}
/// Determine whether there is any declaration of \p D that was ever a
/// definition (perhaps before module merging) and is currently
/// reachable.
/// \param D The definition of the entity.
/// \param Suggested Filled in with the declaration that should be made
/// reachable
/// in order to provide a definition of this entity.
/// \param OnlyNeedComplete If \c true, we only need the type to be complete,
/// not defined. This only matters for enums with a fixed underlying
/// type, since in all other cases, a type is complete if and only if it
/// is defined.
bool Sema::hasReachableDefinition(NamedDecl *D, NamedDecl **Suggested,
bool OnlyNeedComplete) {
return hasAcceptableDefinition(D, Suggested, Sema::AcceptableKind::Reachable,
OnlyNeedComplete);
}
/// Locks in the inheritance model for the given class and all of its bases.
static void assignInheritanceModel(Sema &S, CXXRecordDecl *RD) {
RD = RD->getMostRecentNonInjectedDecl();
if (!RD->hasAttr<MSInheritanceAttr>()) {
MSInheritanceModel IM;
bool BestCase = false;
switch (S.MSPointerToMemberRepresentationMethod) {
case LangOptions::PPTMK_BestCase:
BestCase = true;
IM = RD->calculateInheritanceModel();
break;
case LangOptions::PPTMK_FullGeneralitySingleInheritance:
IM = MSInheritanceModel::Single;
break;
case LangOptions::PPTMK_FullGeneralityMultipleInheritance:
IM = MSInheritanceModel::Multiple;
break;
case LangOptions::PPTMK_FullGeneralityVirtualInheritance:
IM = MSInheritanceModel::Unspecified;
break;
}
SourceRange Loc = S.ImplicitMSInheritanceAttrLoc.isValid()
? S.ImplicitMSInheritanceAttrLoc
: RD->getSourceRange();
RD->addAttr(MSInheritanceAttr::CreateImplicit(
S.getASTContext(), BestCase, Loc, AttributeCommonInfo::AS_Microsoft,
MSInheritanceAttr::Spelling(IM)));
S.Consumer.AssignInheritanceModel(RD);
}
}
/// The implementation of RequireCompleteType
bool Sema::RequireCompleteTypeImpl(SourceLocation Loc, QualType T,
CompleteTypeKind Kind,
TypeDiagnoser *Diagnoser) {
// FIXME: Add this assertion to make sure we always get instantiation points.
// assert(!Loc.isInvalid() && "Invalid location in RequireCompleteType");
// FIXME: Add this assertion to help us flush out problems with
// checking for dependent types and type-dependent expressions.
//
// assert(!T->isDependentType() &&
// "Can't ask whether a dependent type is complete");
if (const MemberPointerType *MPTy = T->getAs<MemberPointerType>()) {
if (!MPTy->getClass()->isDependentType()) {
if (getLangOpts().CompleteMemberPointers &&
!MPTy->getClass()->getAsCXXRecordDecl()->isBeingDefined() &&
RequireCompleteType(Loc, QualType(MPTy->getClass(), 0), Kind,
diag::err_memptr_incomplete))
return true;
// We lock in the inheritance model once somebody has asked us to ensure
// that a pointer-to-member type is complete.
if (Context.getTargetInfo().getCXXABI().isMicrosoft()) {
(void)isCompleteType(Loc, QualType(MPTy->getClass(), 0));
assignInheritanceModel(*this, MPTy->getMostRecentCXXRecordDecl());
}
}
}
NamedDecl *Def = nullptr;
bool AcceptSizeless = (Kind == CompleteTypeKind::AcceptSizeless);
bool Incomplete = (T->isIncompleteType(&Def) ||
(!AcceptSizeless && T->isSizelessBuiltinType()));
// Check that any necessary explicit specializations are visible. For an
// enum, we just need the declaration, so don't check this.
if (Def && !isa<EnumDecl>(Def))
checkSpecializationReachability(Loc, Def);
// If we have a complete type, we're done.
if (!Incomplete) {
NamedDecl *Suggested = nullptr;
if (Def &&
!hasReachableDefinition(Def, &Suggested, /*OnlyNeedComplete=*/true)) {
// If the user is going to see an error here, recover by making the
// definition visible.
bool TreatAsComplete = Diagnoser && !isSFINAEContext();
if (Diagnoser && Suggested)
diagnoseMissingImport(Loc, Suggested, MissingImportKind::Definition,
/*Recover*/ TreatAsComplete);
return !TreatAsComplete;
} else if (Def && !TemplateInstCallbacks.empty()) {
CodeSynthesisContext TempInst;
TempInst.Kind = CodeSynthesisContext::Memoization;
TempInst.Template = Def;
TempInst.Entity = Def;
TempInst.PointOfInstantiation = Loc;
atTemplateBegin(TemplateInstCallbacks, *this, TempInst);
atTemplateEnd(TemplateInstCallbacks, *this, TempInst);
}
return false;
}
TagDecl *Tag = dyn_cast_or_null<TagDecl>(Def);
ObjCInterfaceDecl *IFace = dyn_cast_or_null<ObjCInterfaceDecl>(Def);
// Give the external source a chance to provide a definition of the type.
// This is kept separate from completing the redeclaration chain so that
// external sources such as LLDB can avoid synthesizing a type definition
// unless it's actually needed.
if (Tag || IFace) {
// Avoid diagnosing invalid decls as incomplete.
if (Def->isInvalidDecl())
return true;
// Give the external AST source a chance to complete the type.
if (auto *Source = Context.getExternalSource()) {
if (Tag && Tag->hasExternalLexicalStorage())
Source->CompleteType(Tag);
if (IFace && IFace->hasExternalLexicalStorage())
Source->CompleteType(IFace);
// If the external source completed the type, go through the motions
// again to ensure we're allowed to use the completed type.
if (!T->isIncompleteType())
return RequireCompleteTypeImpl(Loc, T, Kind, Diagnoser);
}
}
// If we have a class template specialization or a class member of a
// class template specialization, or an array with known size of such,
// try to instantiate it.
if (auto *RD = dyn_cast_or_null<CXXRecordDecl>(Tag)) {
bool Instantiated = false;
bool Diagnosed = false;
if (RD->isDependentContext()) {
// Don't try to instantiate a dependent class (eg, a member template of
// an instantiated class template specialization).
// FIXME: Can this ever happen?
} else if (auto *ClassTemplateSpec =
dyn_cast<ClassTemplateSpecializationDecl>(RD)) {
if (ClassTemplateSpec->getSpecializationKind() == TSK_Undeclared) {
runWithSufficientStackSpace(Loc, [&] {
Diagnosed = InstantiateClassTemplateSpecialization(
Loc, ClassTemplateSpec, TSK_ImplicitInstantiation,
/*Complain=*/Diagnoser);
});
Instantiated = true;
}
} else {
CXXRecordDecl *Pattern = RD->getInstantiatedFromMemberClass();
if (!RD->isBeingDefined() && Pattern) {
MemberSpecializationInfo *MSI = RD->getMemberSpecializationInfo();
assert(MSI && "Missing member specialization information?");
// This record was instantiated from a class within a template.
if (MSI->getTemplateSpecializationKind() !=
TSK_ExplicitSpecialization) {
runWithSufficientStackSpace(Loc, [&] {
Diagnosed = InstantiateClass(Loc, RD, Pattern,
getTemplateInstantiationArgs(RD),
TSK_ImplicitInstantiation,
/*Complain=*/Diagnoser);
});
Instantiated = true;
}
}
}
if (Instantiated) {
// Instantiate* might have already complained that the template is not
// defined, if we asked it to.
if (Diagnoser && Diagnosed)
return true;
// If we instantiated a definition, check that it's usable, even if
// instantiation produced an error, so that repeated calls to this
// function give consistent answers.
if (!T->isIncompleteType())
return RequireCompleteTypeImpl(Loc, T, Kind, Diagnoser);
}
}
// FIXME: If we didn't instantiate a definition because of an explicit
// specialization declaration, check that it's visible.
if (!Diagnoser)
return true;
Diagnoser->diagnose(*this, Loc, T);
// If the type was a forward declaration of a class/struct/union
// type, produce a note.
if (Tag && !Tag->isInvalidDecl() && !Tag->getLocation().isInvalid())
Diag(Tag->getLocation(),
Tag->isBeingDefined() ? diag::note_type_being_defined
: diag::note_forward_declaration)
<< Context.getTagDeclType(Tag);
// If the Objective-C class was a forward declaration, produce a note.
if (IFace && !IFace->isInvalidDecl() && !IFace->getLocation().isInvalid())
Diag(IFace->getLocation(), diag::note_forward_class);
// If we have external information that we can use to suggest a fix,
// produce a note.
if (ExternalSource)
ExternalSource->MaybeDiagnoseMissingCompleteType(Loc, T);
return true;
}
bool Sema::RequireCompleteType(SourceLocation Loc, QualType T,
CompleteTypeKind Kind, unsigned DiagID) {
BoundTypeDiagnoser<> Diagnoser(DiagID);
return RequireCompleteType(Loc, T, Kind, Diagnoser);
}
/// Get diagnostic %select index for tag kind for
/// literal type diagnostic message.
/// WARNING: Indexes apply to particular diagnostics only!
///
/// \returns diagnostic %select index.
static unsigned getLiteralDiagFromTagKind(TagTypeKind Tag) {
switch (Tag) {
case TTK_Struct: return 0;
case TTK_Interface: return 1;
case TTK_Class: return 2;
default: llvm_unreachable("Invalid tag kind for literal type diagnostic!");
}
}
/// Ensure that the type T is a literal type.
///
/// This routine checks whether the type @p T is a literal type. If @p T is an
/// incomplete type, an attempt is made to complete it. If @p T is a literal
/// type, or @p AllowIncompleteType is true and @p T is an incomplete type,
/// returns false. Otherwise, this routine issues the diagnostic @p PD (giving
/// it the type @p T), along with notes explaining why the type is not a
/// literal type, and returns true.
///
/// @param Loc The location in the source that the non-literal type
/// diagnostic should refer to.
///
/// @param T The type that this routine is examining for literalness.
///
/// @param Diagnoser Emits a diagnostic if T is not a literal type.
///
/// @returns @c true if @p T is not a literal type and a diagnostic was emitted,
/// @c false otherwise.
bool Sema::RequireLiteralType(SourceLocation Loc, QualType T,
TypeDiagnoser &Diagnoser) {
assert(!T->isDependentType() && "type should not be dependent");
QualType ElemType = Context.getBaseElementType(T);
if ((isCompleteType(Loc, ElemType) || ElemType->isVoidType()) &&
T->isLiteralType(Context))
return false;
Diagnoser.diagnose(*this, Loc, T);
if (T->isVariableArrayType())
return true;
const RecordType *RT = ElemType->getAs<RecordType>();
if (!RT)
return true;
const CXXRecordDecl *RD = cast<CXXRecordDecl>(RT->getDecl());
// A partially-defined class type can't be a literal type, because a literal
// class type must have a trivial destructor (which can't be checked until
// the class definition is complete).
if (RequireCompleteType(Loc, ElemType, diag::note_non_literal_incomplete, T))
return true;
// [expr.prim.lambda]p3:
// This class type is [not] a literal type.
if (RD->isLambda() && !getLangOpts().CPlusPlus17) {
Diag(RD->getLocation(), diag::note_non_literal_lambda);
return true;
}
// If the class has virtual base classes, then it's not an aggregate, and
// cannot have any constexpr constructors or a trivial default constructor,
// so is non-literal. This is better to diagnose than the resulting absence
// of constexpr constructors.
if (RD->getNumVBases()) {
Diag(RD->getLocation(), diag::note_non_literal_virtual_base)
<< getLiteralDiagFromTagKind(RD->getTagKind()) << RD->getNumVBases();
for (const auto &I : RD->vbases())
Diag(I.getBeginLoc(), diag::note_constexpr_virtual_base_here)
<< I.getSourceRange();
} else if (!RD->isAggregate() && !RD->hasConstexprNonCopyMoveConstructor() &&
!RD->hasTrivialDefaultConstructor()) {
Diag(RD->getLocation(), diag::note_non_literal_no_constexpr_ctors) << RD;
} else if (RD->hasNonLiteralTypeFieldsOrBases()) {
for (const auto &I : RD->bases()) {
if (!I.getType()->isLiteralType(Context)) {
Diag(I.getBeginLoc(), diag::note_non_literal_base_class)
<< RD << I.getType() << I.getSourceRange();
return true;
}
}
for (const auto *I : RD->fields()) {
if (!I->getType()->isLiteralType(Context) ||
I->getType().isVolatileQualified()) {
Diag(I->getLocation(), diag::note_non_literal_field)
<< RD << I << I->getType()
<< I->getType().isVolatileQualified();
return true;
}
}
} else if (getLangOpts().CPlusPlus20 ? !RD->hasConstexprDestructor()
: !RD->hasTrivialDestructor()) {
// All fields and bases are of literal types, so have trivial or constexpr
// destructors. If this class's destructor is non-trivial / non-constexpr,
// it must be user-declared.
CXXDestructorDecl *Dtor = RD->getDestructor();
assert(Dtor && "class has literal fields and bases but no dtor?");
if (!Dtor)
return true;
if (getLangOpts().CPlusPlus20) {
Diag(Dtor->getLocation(), diag::note_non_literal_non_constexpr_dtor)
<< RD;
} else {
Diag(Dtor->getLocation(), Dtor->isUserProvided()
? diag::note_non_literal_user_provided_dtor
: diag::note_non_literal_nontrivial_dtor)
<< RD;
if (!Dtor->isUserProvided())
SpecialMemberIsTrivial(Dtor, CXXDestructor, TAH_IgnoreTrivialABI,
/*Diagnose*/ true);
}
}
return true;
}
bool Sema::RequireLiteralType(SourceLocation Loc, QualType T, unsigned DiagID) {
BoundTypeDiagnoser<> Diagnoser(DiagID);
return RequireLiteralType(Loc, T, Diagnoser);
}
/// Retrieve a version of the type 'T' that is elaborated by Keyword, qualified
/// by the nested-name-specifier contained in SS, and that is (re)declared by
/// OwnedTagDecl, which is nullptr if this is not a (re)declaration.
QualType Sema::getElaboratedType(ElaboratedTypeKeyword Keyword,
const CXXScopeSpec &SS, QualType T,
TagDecl *OwnedTagDecl) {
if (T.isNull())
return T;
NestedNameSpecifier *NNS;
if (SS.isValid())
NNS = SS.getScopeRep();
else {
if (Keyword == ETK_None)
return T;
NNS = nullptr;
}
return Context.getElaboratedType(Keyword, NNS, T, OwnedTagDecl);
}
QualType Sema::BuildTypeofExprType(Expr *E) {
assert(!E->hasPlaceholderType() && "unexpected placeholder");
if (!getLangOpts().CPlusPlus && E->refersToBitField())
Diag(E->getExprLoc(), diag::err_sizeof_alignof_typeof_bitfield) << 2;
if (!E->isTypeDependent()) {
QualType T = E->getType();
if (const TagType *TT = T->getAs<TagType>())
DiagnoseUseOfDecl(TT->getDecl(), E->getExprLoc());
}
return Context.getTypeOfExprType(E);
}
/// getDecltypeForExpr - Given an expr, will return the decltype for
/// that expression, according to the rules in C++11
/// [dcl.type.simple]p4 and C++11 [expr.lambda.prim]p18.
QualType Sema::getDecltypeForExpr(Expr *E) {
if (E->isTypeDependent())
return Context.DependentTy;
Expr *IDExpr = E;
if (auto *ImplCastExpr = dyn_cast<ImplicitCastExpr>(E))
IDExpr = ImplCastExpr->getSubExpr();
// C++11 [dcl.type.simple]p4:
// The type denoted by decltype(e) is defined as follows:
// C++20:
// - if E is an unparenthesized id-expression naming a non-type
// template-parameter (13.2), decltype(E) is the type of the
// template-parameter after performing any necessary type deduction
// Note that this does not pick up the implicit 'const' for a template
// parameter object. This rule makes no difference before C++20 so we apply
// it unconditionally.
if (const auto *SNTTPE = dyn_cast<SubstNonTypeTemplateParmExpr>(IDExpr))
return SNTTPE->getParameterType(Context);
// - if e is an unparenthesized id-expression or an unparenthesized class
// member access (5.2.5), decltype(e) is the type of the entity named
// by e. If there is no such entity, or if e names a set of overloaded
// functions, the program is ill-formed;
//
// We apply the same rules for Objective-C ivar and property references.
if (const auto *DRE = dyn_cast<DeclRefExpr>(IDExpr)) {
const ValueDecl *VD = DRE->getDecl();
QualType T = VD->getType();
return isa<TemplateParamObjectDecl>(VD) ? T.getUnqualifiedType() : T;
}
if (const auto *ME = dyn_cast<MemberExpr>(IDExpr)) {
if (const auto *VD = ME->getMemberDecl())
if (isa<FieldDecl>(VD) || isa<VarDecl>(VD))
return VD->getType();
} else if (const auto *IR = dyn_cast<ObjCIvarRefExpr>(IDExpr)) {
return IR->getDecl()->getType();
} else if (const auto *PR = dyn_cast<ObjCPropertyRefExpr>(IDExpr)) {
if (PR->isExplicitProperty())
return PR->getExplicitProperty()->getType();
} else if (const auto *PE = dyn_cast<PredefinedExpr>(IDExpr)) {
return PE->getType();
}
// C++11 [expr.lambda.prim]p18:
// Every occurrence of decltype((x)) where x is a possibly
// parenthesized id-expression that names an entity of automatic
// storage duration is treated as if x were transformed into an
// access to a corresponding data member of the closure type that
// would have been declared if x were an odr-use of the denoted
// entity.
if (getCurLambda() && isa<ParenExpr>(IDExpr)) {
if (auto *DRE = dyn_cast<DeclRefExpr>(IDExpr->IgnoreParens())) {
if (auto *Var = dyn_cast<VarDecl>(DRE->getDecl())) {
QualType T = getCapturedDeclRefType(Var, DRE->getLocation());
if (!T.isNull())
return Context.getLValueReferenceType(T);
}
}
}
return Context.getReferenceQualifiedType(E);
}
QualType Sema::BuildDecltypeType(Expr *E, bool AsUnevaluated) {
assert(!E->hasPlaceholderType() && "unexpected placeholder");
if (AsUnevaluated && CodeSynthesisContexts.empty() &&
!E->isInstantiationDependent() && E->HasSideEffects(Context, false)) {
// The expression operand for decltype is in an unevaluated expression
// context, so side effects could result in unintended consequences.
// Exclude instantiation-dependent expressions, because 'decltype' is often
// used to build SFINAE gadgets.
Diag(E->getExprLoc(), diag::warn_side_effects_unevaluated_context);
}
return Context.getDecltypeType(E, getDecltypeForExpr(E));
}
QualType Sema::BuildUnaryTransformType(QualType BaseType,
UnaryTransformType::UTTKind UKind,
SourceLocation Loc) {
switch (UKind) {
case UnaryTransformType::EnumUnderlyingType:
if (!BaseType->isDependentType() && !BaseType->isEnumeralType()) {
Diag(Loc, diag::err_only_enums_have_underlying_types);
return QualType();
} else {
QualType Underlying = BaseType;
if (!BaseType->isDependentType()) {
// The enum could be incomplete if we're parsing its definition or
// recovering from an error.
NamedDecl *FwdDecl = nullptr;
if (BaseType->isIncompleteType(&FwdDecl)) {
Diag(Loc, diag::err_underlying_type_of_incomplete_enum) << BaseType;
Diag(FwdDecl->getLocation(), diag::note_forward_declaration) << FwdDecl;
return QualType();
}
EnumDecl *ED = BaseType->castAs<EnumType>()->getDecl();
assert(ED && "EnumType has no EnumDecl");
DiagnoseUseOfDecl(ED, Loc);
Underlying = ED->getIntegerType();
assert(!Underlying.isNull());
}
return Context.getUnaryTransformType(BaseType, Underlying,
UnaryTransformType::EnumUnderlyingType);
}
}
llvm_unreachable("unknown unary transform type");
}
QualType Sema::BuildAtomicType(QualType T, SourceLocation Loc) {
if (!isDependentOrGNUAutoType(T)) {
// FIXME: It isn't entirely clear whether incomplete atomic types
// are allowed or not; for simplicity, ban them for the moment.
if (RequireCompleteType(Loc, T, diag::err_atomic_specifier_bad_type, 0))
return QualType();
int DisallowedKind = -1;
if (T->isArrayType())
DisallowedKind = 1;
else if (T->isFunctionType())
DisallowedKind = 2;
else if (T->isReferenceType())
DisallowedKind = 3;
else if (T->isAtomicType())
DisallowedKind = 4;
else if (T.hasQualifiers())
DisallowedKind = 5;
else if (T->isSizelessType())
DisallowedKind = 6;
else if (!T.isTriviallyCopyableType(Context))
// Some other non-trivially-copyable type (probably a C++ class)
DisallowedKind = 7;
else if (T->isBitIntType())
DisallowedKind = 8;
if (DisallowedKind != -1) {
Diag(Loc, diag::err_atomic_specifier_bad_type) << DisallowedKind << T;
return QualType();
}
// FIXME: Do we need any handling for ARC here?
}
// Build the pointer type.
return Context.getAtomicType(T);
}
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 19149d079822..ab65612bce90 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -1,3600 +1,3606 @@
//===- ExprEngine.cpp - Path-Sensitive Expression-Level Dataflow ----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines a meta-engine for path-sensitive dataflow analysis that
// is built on CoreEngine, but provides the boilerplate to execute transfer
// functions and build the ExplodedGraph at the expression level.
//
//===----------------------------------------------------------------------===//
#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
#include "PrettyStackTraceLocationContext.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclBase.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/DeclObjC.h"
#include "clang/AST/Expr.h"
#include "clang/AST/ExprCXX.h"
#include "clang/AST/ExprObjC.h"
#include "clang/AST/ParentMap.h"
#include "clang/AST/PrettyPrinter.h"
#include "clang/AST/Stmt.h"
#include "clang/AST/StmtCXX.h"
#include "clang/AST/StmtObjC.h"
#include "clang/AST/Type.h"
#include "clang/Analysis/AnalysisDeclContext.h"
#include "clang/Analysis/CFG.h"
#include "clang/Analysis/ConstructionContext.h"
#include "clang/Analysis/ProgramPoint.h"
#include "clang/Basic/IdentifierTable.h"
#include "clang/Basic/JsonSupport.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/PrettyStackTrace.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/Specifiers.h"
#include "clang/StaticAnalyzer/Core/AnalyzerOptions.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/LoopUnrolling.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/LoopWidening.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/Store.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/ImmutableMap.h"
#include "llvm/ADT/ImmutableSet.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/DOTGraphTraits.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/SaveAndRestore.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
#include <memory>
#include <string>
#include <tuple>
#include <utility>
#include <vector>
using namespace clang;
using namespace ento;
#define DEBUG_TYPE "ExprEngine"
STATISTIC(NumRemoveDeadBindings,
"The # of times RemoveDeadBindings is called");
STATISTIC(NumMaxBlockCountReached,
"The # of aborted paths due to reaching the maximum block count in "
"a top level function");
STATISTIC(NumMaxBlockCountReachedInInlined,
"The # of aborted paths due to reaching the maximum block count in "
"an inlined function");
STATISTIC(NumTimesRetriedWithoutInlining,
"The # of times we re-evaluated a call without inlining");
//===----------------------------------------------------------------------===//
// Internal program state traits.
//===----------------------------------------------------------------------===//
namespace {
// When modeling a C++ constructor, for a variety of reasons we need to track
// the location of the object for the duration of its ConstructionContext.
// ObjectsUnderConstruction maps statements within the construction context
// to the object's location, so that on every such statement the location
// could have been retrieved.
/// ConstructedObjectKey is used for being able to find the path-sensitive
/// memory region of a freshly constructed object while modeling the AST node
/// that syntactically represents the object that is being constructed.
/// Semantics of such nodes may sometimes require access to the region that's
/// not otherwise present in the program state, or to the very fact that
/// the construction context was present and contained references to these
/// AST nodes.
class ConstructedObjectKey {
using ConstructedObjectKeyImpl =
std::pair<ConstructionContextItem, const LocationContext *>;
const ConstructedObjectKeyImpl Impl;
public:
explicit ConstructedObjectKey(const ConstructionContextItem &Item,
const LocationContext *LC)
: Impl(Item, LC) {}
const ConstructionContextItem &getItem() const { return Impl.first; }
const LocationContext *getLocationContext() const { return Impl.second; }
ASTContext &getASTContext() const {
return getLocationContext()->getDecl()->getASTContext();
}
void printJson(llvm::raw_ostream &Out, PrinterHelper *Helper,
PrintingPolicy &PP) const {
const Stmt *S = getItem().getStmtOrNull();
const CXXCtorInitializer *I = nullptr;
if (!S)
I = getItem().getCXXCtorInitializer();
if (S)
Out << "\"stmt_id\": " << S->getID(getASTContext());
else
Out << "\"init_id\": " << I->getID(getASTContext());
// Kind
Out << ", \"kind\": \"" << getItem().getKindAsString()
<< "\", \"argument_index\": ";
if (getItem().getKind() == ConstructionContextItem::ArgumentKind)
Out << getItem().getIndex();
else
Out << "null";
// Pretty-print
Out << ", \"pretty\": ";
if (S) {
S->printJson(Out, Helper, PP, /*AddQuotes=*/true);
} else {
Out << '\"' << I->getAnyMember()->getDeclName() << '\"';
}
}
void Profile(llvm::FoldingSetNodeID &ID) const {
ID.Add(Impl.first);
ID.AddPointer(Impl.second);
}
bool operator==(const ConstructedObjectKey &RHS) const {
return Impl == RHS.Impl;
}
bool operator<(const ConstructedObjectKey &RHS) const {
return Impl < RHS.Impl;
}
};
} // namespace
typedef llvm::ImmutableMap<ConstructedObjectKey, SVal>
ObjectsUnderConstructionMap;
REGISTER_TRAIT_WITH_PROGRAMSTATE(ObjectsUnderConstruction,
ObjectsUnderConstructionMap)
// This trait is responsible for storing the index of the element that is to be
// constructed in the next iteration. As a result a CXXConstructExpr is only
// stored if it is array type. Also the index is the index of the continous
// memory region, which is important for multi-dimensional arrays. E.g:: int
// arr[2][2]; assume arr[1][1] will be the next element under construction, so
// the index is 3.
typedef llvm::ImmutableMap<
std::pair<const CXXConstructExpr *, const LocationContext *>, unsigned>
IndexOfElementToConstructMap;
REGISTER_TRAIT_WITH_PROGRAMSTATE(IndexOfElementToConstruct,
IndexOfElementToConstructMap)
// This trait is responsible for holding our pending ArrayInitLoopExprs.
// It pairs the LocationContext and the initializer CXXConstructExpr with
// the size of the array that's being copy initialized.
typedef llvm::ImmutableMap<
std::pair<const CXXConstructExpr *, const LocationContext *>, unsigned>
PendingInitLoopMap;
REGISTER_TRAIT_WITH_PROGRAMSTATE(PendingInitLoop, PendingInitLoopMap)
//===----------------------------------------------------------------------===//
// Engine construction and deletion.
//===----------------------------------------------------------------------===//
static const char* TagProviderName = "ExprEngine";
ExprEngine::ExprEngine(cross_tu::CrossTranslationUnitContext &CTU,
AnalysisManager &mgr, SetOfConstDecls *VisitedCalleesIn,
FunctionSummariesTy *FS, InliningModes HowToInlineIn)
: CTU(CTU), IsCTUEnabled(mgr.getAnalyzerOptions().IsNaiveCTUEnabled),
AMgr(mgr), AnalysisDeclContexts(mgr.getAnalysisDeclContextManager()),
Engine(*this, FS, mgr.getAnalyzerOptions()), G(Engine.getGraph()),
StateMgr(getContext(), mgr.getStoreManagerCreator(),
mgr.getConstraintManagerCreator(), G.getAllocator(), this),
SymMgr(StateMgr.getSymbolManager()), MRMgr(StateMgr.getRegionManager()),
svalBuilder(StateMgr.getSValBuilder()), ObjCNoRet(mgr.getASTContext()),
BR(mgr, *this), VisitedCallees(VisitedCalleesIn),
HowToInline(HowToInlineIn) {
unsigned TrimInterval = mgr.options.GraphTrimInterval;
if (TrimInterval != 0) {
// Enable eager node reclamation when constructing the ExplodedGraph.
G.enableNodeReclamation(TrimInterval);
}
}
//===----------------------------------------------------------------------===//
// Utility methods.
//===----------------------------------------------------------------------===//
ProgramStateRef ExprEngine::getInitialState(const LocationContext *InitLoc) {
ProgramStateRef state = StateMgr.getInitialState(InitLoc);
const Decl *D = InitLoc->getDecl();
// Preconditions.
// FIXME: It would be nice if we had a more general mechanism to add
// such preconditions. Some day.
do {
if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
// Precondition: the first argument of 'main' is an integer guaranteed
// to be > 0.
const IdentifierInfo *II = FD->getIdentifier();
if (!II || !(II->getName() == "main" && FD->getNumParams() > 0))
break;
const ParmVarDecl *PD = FD->getParamDecl(0);
QualType T = PD->getType();
const auto *BT = dyn_cast<BuiltinType>(T);
if (!BT || !BT->isInteger())
break;
const MemRegion *R = state->getRegion(PD, InitLoc);
if (!R)
break;
SVal V = state->getSVal(loc::MemRegionVal(R));
SVal Constraint_untested = evalBinOp(state, BO_GT, V,
svalBuilder.makeZeroVal(T),
svalBuilder.getConditionType());
Optional<DefinedOrUnknownSVal> Constraint =
Constraint_untested.getAs<DefinedOrUnknownSVal>();
if (!Constraint)
break;
if (ProgramStateRef newState = state->assume(*Constraint, true))
state = newState;
}
break;
}
while (false);
if (const auto *MD = dyn_cast<ObjCMethodDecl>(D)) {
// Precondition: 'self' is always non-null upon entry to an Objective-C
// method.
const ImplicitParamDecl *SelfD = MD->getSelfDecl();
const MemRegion *R = state->getRegion(SelfD, InitLoc);
SVal V = state->getSVal(loc::MemRegionVal(R));
if (Optional<Loc> LV = V.getAs<Loc>()) {
// Assume that the pointer value in 'self' is non-null.
state = state->assume(*LV, true);
assert(state && "'self' cannot be null");
}
}
if (const auto *MD = dyn_cast<CXXMethodDecl>(D)) {
if (!MD->isStatic()) {
// Precondition: 'this' is always non-null upon entry to the
// top-level function. This is our starting assumption for
// analyzing an "open" program.
const StackFrameContext *SFC = InitLoc->getStackFrame();
if (SFC->getParent() == nullptr) {
loc::MemRegionVal L = svalBuilder.getCXXThis(MD, SFC);
SVal V = state->getSVal(L);
if (Optional<Loc> LV = V.getAs<Loc>()) {
state = state->assume(*LV, true);
assert(state && "'this' cannot be null");
}
}
}
}
return state;
}
ProgramStateRef ExprEngine::createTemporaryRegionIfNeeded(
ProgramStateRef State, const LocationContext *LC,
const Expr *InitWithAdjustments, const Expr *Result,
const SubRegion **OutRegionWithAdjustments) {
// FIXME: This function is a hack that works around the quirky AST
// we're often having with respect to C++ temporaries. If only we modelled
// the actual execution order of statements properly in the CFG,
// all the hassle with adjustments would not be necessary,
// and perhaps the whole function would be removed.
SVal InitValWithAdjustments = State->getSVal(InitWithAdjustments, LC);
if (!Result) {
// If we don't have an explicit result expression, we're in "if needed"
// mode. Only create a region if the current value is a NonLoc.
if (!isa<NonLoc>(InitValWithAdjustments)) {
if (OutRegionWithAdjustments)
*OutRegionWithAdjustments = nullptr;
return State;
}
Result = InitWithAdjustments;
} else {
// We need to create a region no matter what. Make sure we don't try to
// stuff a Loc into a non-pointer temporary region.
assert(!isa<Loc>(InitValWithAdjustments) ||
Loc::isLocType(Result->getType()) ||
Result->getType()->isMemberPointerType());
}
ProgramStateManager &StateMgr = State->getStateManager();
MemRegionManager &MRMgr = StateMgr.getRegionManager();
StoreManager &StoreMgr = StateMgr.getStoreManager();
// MaterializeTemporaryExpr may appear out of place, after a few field and
// base-class accesses have been made to the object, even though semantically
// it is the whole object that gets materialized and lifetime-extended.
//
// For example:
//
// `-MaterializeTemporaryExpr
// `-MemberExpr
// `-CXXTemporaryObjectExpr
//
// instead of the more natural
//
// `-MemberExpr
// `-MaterializeTemporaryExpr
// `-CXXTemporaryObjectExpr
//
// Use the usual methods for obtaining the expression of the base object,
// and record the adjustments that we need to make to obtain the sub-object
// that the whole expression 'Ex' refers to. This trick is usual,
// in the sense that CodeGen takes a similar route.
SmallVector<const Expr *, 2> CommaLHSs;
SmallVector<SubobjectAdjustment, 2> Adjustments;
const Expr *Init = InitWithAdjustments->skipRValueSubobjectAdjustments(
CommaLHSs, Adjustments);
// Take the region for Init, i.e. for the whole object. If we do not remember
// the region in which the object originally was constructed, come up with
// a new temporary region out of thin air and copy the contents of the object
// (which are currently present in the Environment, because Init is an rvalue)
// into that region. This is not correct, but it is better than nothing.
const TypedValueRegion *TR = nullptr;
if (const auto *MT = dyn_cast<MaterializeTemporaryExpr>(Result)) {
if (Optional<SVal> V = getObjectUnderConstruction(State, MT, LC)) {
State = finishObjectConstruction(State, MT, LC);
State = State->BindExpr(Result, LC, *V);
return State;
} else {
StorageDuration SD = MT->getStorageDuration();
// If this object is bound to a reference with static storage duration, we
// put it in a different region to prevent "address leakage" warnings.
if (SD == SD_Static || SD == SD_Thread) {
TR = MRMgr.getCXXStaticTempObjectRegion(Init);
} else {
TR = MRMgr.getCXXTempObjectRegion(Init, LC);
}
}
} else {
TR = MRMgr.getCXXTempObjectRegion(Init, LC);
}
SVal Reg = loc::MemRegionVal(TR);
SVal BaseReg = Reg;
// Make the necessary adjustments to obtain the sub-object.
for (const SubobjectAdjustment &Adj : llvm::reverse(Adjustments)) {
switch (Adj.Kind) {
case SubobjectAdjustment::DerivedToBaseAdjustment:
Reg = StoreMgr.evalDerivedToBase(Reg, Adj.DerivedToBase.BasePath);
break;
case SubobjectAdjustment::FieldAdjustment:
Reg = StoreMgr.getLValueField(Adj.Field, Reg);
break;
case SubobjectAdjustment::MemberPointerAdjustment:
// FIXME: Unimplemented.
State = State->invalidateRegions(Reg, InitWithAdjustments,
currBldrCtx->blockCount(), LC, true,
nullptr, nullptr, nullptr);
return State;
}
}
// What remains is to copy the value of the object to the new region.
// FIXME: In other words, what we should always do is copy value of the
// Init expression (which corresponds to the bigger object) to the whole
// temporary region TR. However, this value is often no longer present
// in the Environment. If it has disappeared, we instead invalidate TR.
// Still, what we can do is assign the value of expression Ex (which
// corresponds to the sub-object) to the TR's sub-region Reg. At least,
// values inside Reg would be correct.
SVal InitVal = State->getSVal(Init, LC);
if (InitVal.isUnknown()) {
InitVal = getSValBuilder().conjureSymbolVal(Result, LC, Init->getType(),
currBldrCtx->blockCount());
State = State->bindLoc(BaseReg.castAs<Loc>(), InitVal, LC, false);
// Then we'd need to take the value that certainly exists and bind it
// over.
if (InitValWithAdjustments.isUnknown()) {
// Try to recover some path sensitivity in case we couldn't
// compute the value.
InitValWithAdjustments = getSValBuilder().conjureSymbolVal(
Result, LC, InitWithAdjustments->getType(),
currBldrCtx->blockCount());
}
State =
State->bindLoc(Reg.castAs<Loc>(), InitValWithAdjustments, LC, false);
} else {
State = State->bindLoc(BaseReg.castAs<Loc>(), InitVal, LC, false);
}
// The result expression would now point to the correct sub-region of the
// newly created temporary region. Do this last in order to getSVal of Init
// correctly in case (Result == Init).
if (Result->isGLValue()) {
State = State->BindExpr(Result, LC, Reg);
} else {
State = State->BindExpr(Result, LC, InitValWithAdjustments);
}
// Notify checkers once for two bindLoc()s.
State = processRegionChange(State, TR, LC);
if (OutRegionWithAdjustments)
*OutRegionWithAdjustments = cast<SubRegion>(Reg.getAsRegion());
return State;
}
ProgramStateRef ExprEngine::setIndexOfElementToConstruct(
ProgramStateRef State, const CXXConstructExpr *E,
const LocationContext *LCtx, unsigned Idx) {
auto Key = std::make_pair(E, LCtx->getStackFrame());
assert(!State->contains<IndexOfElementToConstruct>(Key) || Idx > 0);
return State->set<IndexOfElementToConstruct>(Key, Idx);
}
Optional<unsigned> ExprEngine::getPendingInitLoop(ProgramStateRef State,
const CXXConstructExpr *E,
const LocationContext *LCtx) {
return Optional<unsigned>::create(
State->get<PendingInitLoop>({E, LCtx->getStackFrame()}));
}
ProgramStateRef ExprEngine::removePendingInitLoop(ProgramStateRef State,
const CXXConstructExpr *E,
const LocationContext *LCtx) {
auto Key = std::make_pair(E, LCtx->getStackFrame());
assert(E && State->contains<PendingInitLoop>(Key));
return State->remove<PendingInitLoop>(Key);
}
ProgramStateRef ExprEngine::setPendingInitLoop(ProgramStateRef State,
const CXXConstructExpr *E,
const LocationContext *LCtx,
unsigned Size) {
auto Key = std::make_pair(E, LCtx->getStackFrame());
assert(!State->contains<PendingInitLoop>(Key) && Size > 0);
return State->set<PendingInitLoop>(Key, Size);
}
Optional<unsigned>
ExprEngine::getIndexOfElementToConstruct(ProgramStateRef State,
const CXXConstructExpr *E,
const LocationContext *LCtx) {
return Optional<unsigned>::create(
State->get<IndexOfElementToConstruct>({E, LCtx->getStackFrame()}));
}
ProgramStateRef
ExprEngine::removeIndexOfElementToConstruct(ProgramStateRef State,
const CXXConstructExpr *E,
const LocationContext *LCtx) {
auto Key = std::make_pair(E, LCtx->getStackFrame());
assert(E && State->contains<IndexOfElementToConstruct>(Key));
return State->remove<IndexOfElementToConstruct>(Key);
}
ProgramStateRef
ExprEngine::addObjectUnderConstruction(ProgramStateRef State,
const ConstructionContextItem &Item,
const LocationContext *LC, SVal V) {
ConstructedObjectKey Key(Item, LC->getStackFrame());
const Expr *Init = nullptr;
if (auto DS = dyn_cast_or_null<DeclStmt>(Item.getStmtOrNull())) {
if (auto VD = dyn_cast_or_null<VarDecl>(DS->getSingleDecl()))
Init = VD->getInit();
}
if (auto LE = dyn_cast_or_null<LambdaExpr>(Item.getStmtOrNull()))
Init = *(LE->capture_init_begin() + Item.getIndex());
if (!Init && !Item.getStmtOrNull())
Init = Item.getCXXCtorInitializer()->getInit();
// In an ArrayInitLoopExpr the real initializer is returned by
// getSubExpr().
if (const auto *AILE = dyn_cast_or_null<ArrayInitLoopExpr>(Init))
Init = AILE->getSubExpr();
// FIXME: Currently the state might already contain the marker due to
// incorrect handling of temporaries bound to default parameters.
// The state will already contain the marker if we construct elements
// in an array, as we visit the same statement multiple times before
// the array declaration. The marker is removed when we exit the
// constructor call.
assert((!State->get<ObjectsUnderConstruction>(Key) ||
Key.getItem().getKind() ==
ConstructionContextItem::TemporaryDestructorKind ||
State->contains<IndexOfElementToConstruct>(
{dyn_cast_or_null<CXXConstructExpr>(Init), LC})) &&
"The object is already marked as `UnderConstruction`, when it's not "
"supposed to!");
return State->set<ObjectsUnderConstruction>(Key, V);
}
Optional<SVal>
ExprEngine::getObjectUnderConstruction(ProgramStateRef State,
const ConstructionContextItem &Item,
const LocationContext *LC) {
ConstructedObjectKey Key(Item, LC->getStackFrame());
return Optional<SVal>::create(State->get<ObjectsUnderConstruction>(Key));
}
ProgramStateRef
ExprEngine::finishObjectConstruction(ProgramStateRef State,
const ConstructionContextItem &Item,
const LocationContext *LC) {
ConstructedObjectKey Key(Item, LC->getStackFrame());
assert(State->contains<ObjectsUnderConstruction>(Key));
return State->remove<ObjectsUnderConstruction>(Key);
}
ProgramStateRef ExprEngine::elideDestructor(ProgramStateRef State,
const CXXBindTemporaryExpr *BTE,
const LocationContext *LC) {
ConstructedObjectKey Key({BTE, /*IsElided=*/true}, LC);
// FIXME: Currently the state might already contain the marker due to
// incorrect handling of temporaries bound to default parameters.
return State->set<ObjectsUnderConstruction>(Key, UnknownVal());
}
ProgramStateRef
ExprEngine::cleanupElidedDestructor(ProgramStateRef State,
const CXXBindTemporaryExpr *BTE,
const LocationContext *LC) {
ConstructedObjectKey Key({BTE, /*IsElided=*/true}, LC);
assert(State->contains<ObjectsUnderConstruction>(Key));
return State->remove<ObjectsUnderConstruction>(Key);
}
bool ExprEngine::isDestructorElided(ProgramStateRef State,
const CXXBindTemporaryExpr *BTE,
const LocationContext *LC) {
ConstructedObjectKey Key({BTE, /*IsElided=*/true}, LC);
return State->contains<ObjectsUnderConstruction>(Key);
}
bool ExprEngine::areAllObjectsFullyConstructed(ProgramStateRef State,
const LocationContext *FromLC,
const LocationContext *ToLC) {
const LocationContext *LC = FromLC;
while (LC != ToLC) {
assert(LC && "ToLC must be a parent of FromLC!");
for (auto I : State->get<ObjectsUnderConstruction>())
if (I.first.getLocationContext() == LC)
return false;
LC = LC->getParent();
}
return true;
}
//===----------------------------------------------------------------------===//
// Top-level transfer function logic (Dispatcher).
//===----------------------------------------------------------------------===//
/// evalAssume - Called by ConstraintManager. Used to call checker-specific
/// logic for handling assumptions on symbolic values.
ProgramStateRef ExprEngine::processAssume(ProgramStateRef state,
SVal cond, bool assumption) {
return getCheckerManager().runCheckersForEvalAssume(state, cond, assumption);
}
ProgramStateRef
ExprEngine::processRegionChanges(ProgramStateRef state,
const InvalidatedSymbols *invalidated,
ArrayRef<const MemRegion *> Explicits,
ArrayRef<const MemRegion *> Regions,
const LocationContext *LCtx,
const CallEvent *Call) {
return getCheckerManager().runCheckersForRegionChanges(state, invalidated,
Explicits, Regions,
LCtx, Call);
}
static void
printObjectsUnderConstructionJson(raw_ostream &Out, ProgramStateRef State,
const char *NL, const LocationContext *LCtx,
unsigned int Space = 0, bool IsDot = false) {
PrintingPolicy PP =
LCtx->getAnalysisDeclContext()->getASTContext().getPrintingPolicy();
++Space;
bool HasItem = false;
// Store the last key.
const ConstructedObjectKey *LastKey = nullptr;
for (const auto &I : State->get<ObjectsUnderConstruction>()) {
const ConstructedObjectKey &Key = I.first;
if (Key.getLocationContext() != LCtx)
continue;
if (!HasItem) {
Out << "[" << NL;
HasItem = true;
}
LastKey = &Key;
}
for (const auto &I : State->get<ObjectsUnderConstruction>()) {
const ConstructedObjectKey &Key = I.first;
SVal Value = I.second;
if (Key.getLocationContext() != LCtx)
continue;
Indent(Out, Space, IsDot) << "{ ";
Key.printJson(Out, nullptr, PP);
Out << ", \"value\": \"" << Value << "\" }";
if (&Key != LastKey)
Out << ',';
Out << NL;
}
if (HasItem)
Indent(Out, --Space, IsDot) << ']'; // End of "location_context".
else {
Out << "null ";
}
}
static void printIndicesOfElementsToConstructJson(
raw_ostream &Out, ProgramStateRef State, const char *NL,
const LocationContext *LCtx, const ASTContext &Context,
unsigned int Space = 0, bool IsDot = false) {
using KeyT = std::pair<const Expr *, const LocationContext *>;
PrintingPolicy PP =
LCtx->getAnalysisDeclContext()->getASTContext().getPrintingPolicy();
++Space;
bool HasItem = false;
// Store the last key.
KeyT LastKey;
for (const auto &I : State->get<IndexOfElementToConstruct>()) {
const KeyT &Key = I.first;
if (Key.second != LCtx)
continue;
if (!HasItem) {
Out << "[" << NL;
HasItem = true;
}
LastKey = Key;
}
for (const auto &I : State->get<IndexOfElementToConstruct>()) {
const KeyT &Key = I.first;
unsigned Value = I.second;
if (Key.second != LCtx)
continue;
Indent(Out, Space, IsDot) << "{ ";
// Expr
const Expr *E = Key.first;
Out << "\"stmt_id\": " << E->getID(Context);
// Kind - hack to display the current index
Out << ", \"kind\": \"Cur: " << Value - 1 << "\"";
// Pretty-print
Out << ", \"pretty\": ";
Out << "\"" << E->getStmtClassName() << " "
<< E->getSourceRange().printToString(Context.getSourceManager()) << " '"
<< QualType::getAsString(E->getType().split(), PP);
Out << "'\"";
Out << ", \"value\": \"Next: " << Value << "\" }";
if (Key != LastKey)
Out << ',';
Out << NL;
}
if (HasItem)
Indent(Out, --Space, IsDot) << ']'; // End of "location_context".
else {
Out << "null ";
}
}
void ExprEngine::printJson(raw_ostream &Out, ProgramStateRef State,
const LocationContext *LCtx, const char *NL,
unsigned int Space, bool IsDot) const {
Indent(Out, Space, IsDot) << "\"constructing_objects\": ";
if (LCtx && !State->get<ObjectsUnderConstruction>().isEmpty()) {
++Space;
Out << '[' << NL;
LCtx->printJson(Out, NL, Space, IsDot, [&](const LocationContext *LC) {
printObjectsUnderConstructionJson(Out, State, NL, LC, Space, IsDot);
});
--Space;
Indent(Out, Space, IsDot) << "]," << NL; // End of "constructing_objects".
} else {
Out << "null," << NL;
}
Indent(Out, Space, IsDot) << "\"index_of_element\": ";
if (LCtx && !State->get<IndexOfElementToConstruct>().isEmpty()) {
++Space;
auto &Context = getContext();
Out << '[' << NL;
LCtx->printJson(Out, NL, Space, IsDot, [&](const LocationContext *LC) {
printIndicesOfElementsToConstructJson(Out, State, NL, LC, Context, Space,
IsDot);
});
--Space;
Indent(Out, Space, IsDot) << "]," << NL; // End of "index_of_element".
} else {
Out << "null," << NL;
}
getCheckerManager().runCheckersForPrintStateJson(Out, State, NL, Space,
IsDot);
}
void ExprEngine::processEndWorklist() {
// This prints the name of the top-level function if we crash.
PrettyStackTraceLocationContext CrashInfo(getRootLocationContext());
getCheckerManager().runCheckersForEndAnalysis(G, BR, *this);
}
void ExprEngine::processCFGElement(const CFGElement E, ExplodedNode *Pred,
unsigned StmtIdx, NodeBuilderContext *Ctx) {
PrettyStackTraceLocationContext CrashInfo(Pred->getLocationContext());
currStmtIdx = StmtIdx;
currBldrCtx = Ctx;
switch (E.getKind()) {
case CFGElement::Statement:
case CFGElement::Constructor:
case CFGElement::CXXRecordTypedCall:
ProcessStmt(E.castAs<CFGStmt>().getStmt(), Pred);
return;
case CFGElement::Initializer:
ProcessInitializer(E.castAs<CFGInitializer>(), Pred);
return;
case CFGElement::NewAllocator:
ProcessNewAllocator(E.castAs<CFGNewAllocator>().getAllocatorExpr(),
Pred);
return;
case CFGElement::AutomaticObjectDtor:
case CFGElement::DeleteDtor:
case CFGElement::BaseDtor:
case CFGElement::MemberDtor:
case CFGElement::TemporaryDtor:
ProcessImplicitDtor(E.castAs<CFGImplicitDtor>(), Pred);
return;
case CFGElement::LoopExit:
ProcessLoopExit(E.castAs<CFGLoopExit>().getLoopStmt(), Pred);
return;
case CFGElement::LifetimeEnds:
case CFGElement::ScopeBegin:
case CFGElement::ScopeEnd:
return;
}
}
static bool shouldRemoveDeadBindings(AnalysisManager &AMgr,
const Stmt *S,
const ExplodedNode *Pred,
const LocationContext *LC) {
// Are we never purging state values?
if (AMgr.options.AnalysisPurgeOpt == PurgeNone)
return false;
// Is this the beginning of a basic block?
if (Pred->getLocation().getAs<BlockEntrance>())
return true;
// Is this on a non-expression?
if (!isa<Expr>(S))
return true;
// Run before processing a call.
if (CallEvent::isCallStmt(S))
return true;
// Is this an expression that is consumed by another expression? If so,
// postpone cleaning out the state.
ParentMap &PM = LC->getAnalysisDeclContext()->getParentMap();
return !PM.isConsumedExpr(cast<Expr>(S));
}
void ExprEngine::removeDead(ExplodedNode *Pred, ExplodedNodeSet &Out,
const Stmt *ReferenceStmt,
const LocationContext *LC,
const Stmt *DiagnosticStmt,
ProgramPoint::Kind K) {
assert((K == ProgramPoint::PreStmtPurgeDeadSymbolsKind ||
ReferenceStmt == nullptr || isa<ReturnStmt>(ReferenceStmt))
&& "PostStmt is not generally supported by the SymbolReaper yet");
assert(LC && "Must pass the current (or expiring) LocationContext");
if (!DiagnosticStmt) {
DiagnosticStmt = ReferenceStmt;
assert(DiagnosticStmt && "Required for clearing a LocationContext");
}
NumRemoveDeadBindings++;
ProgramStateRef CleanedState = Pred->getState();
// LC is the location context being destroyed, but SymbolReaper wants a
// location context that is still live. (If this is the top-level stack
// frame, this will be null.)
if (!ReferenceStmt) {
assert(K == ProgramPoint::PostStmtPurgeDeadSymbolsKind &&
"Use PostStmtPurgeDeadSymbolsKind for clearing a LocationContext");
LC = LC->getParent();
}
const StackFrameContext *SFC = LC ? LC->getStackFrame() : nullptr;
SymbolReaper SymReaper(SFC, ReferenceStmt, SymMgr, getStoreManager());
for (auto I : CleanedState->get<ObjectsUnderConstruction>()) {
if (SymbolRef Sym = I.second.getAsSymbol())
SymReaper.markLive(Sym);
if (const MemRegion *MR = I.second.getAsRegion())
SymReaper.markLive(MR);
}
getCheckerManager().runCheckersForLiveSymbols(CleanedState, SymReaper);
// Create a state in which dead bindings are removed from the environment
// and the store. TODO: The function should just return new env and store,
// not a new state.
CleanedState = StateMgr.removeDeadBindingsFromEnvironmentAndStore(
CleanedState, SFC, SymReaper);
// Process any special transfer function for dead symbols.
// A tag to track convenience transitions, which can be removed at cleanup.
static SimpleProgramPointTag cleanupTag(TagProviderName, "Clean Node");
// Call checkers with the non-cleaned state so that they could query the
// values of the soon to be dead symbols.
ExplodedNodeSet CheckedSet;
getCheckerManager().runCheckersForDeadSymbols(CheckedSet, Pred, SymReaper,
DiagnosticStmt, *this, K);
// For each node in CheckedSet, generate CleanedNodes that have the
// environment, the store, and the constraints cleaned up but have the
// user-supplied states as the predecessors.
StmtNodeBuilder Bldr(CheckedSet, Out, *currBldrCtx);
for (const auto I : CheckedSet) {
ProgramStateRef CheckerState = I->getState();
// The constraint manager has not been cleaned up yet, so clean up now.
CheckerState =
getConstraintManager().removeDeadBindings(CheckerState, SymReaper);
assert(StateMgr.haveEqualEnvironments(CheckerState, Pred->getState()) &&
"Checkers are not allowed to modify the Environment as a part of "
"checkDeadSymbols processing.");
assert(StateMgr.haveEqualStores(CheckerState, Pred->getState()) &&
"Checkers are not allowed to modify the Store as a part of "
"checkDeadSymbols processing.");
// Create a state based on CleanedState with CheckerState GDM and
// generate a transition to that state.
ProgramStateRef CleanedCheckerSt =
StateMgr.getPersistentStateWithGDM(CleanedState, CheckerState);
Bldr.generateNode(DiagnosticStmt, I, CleanedCheckerSt, &cleanupTag, K);
}
}
void ExprEngine::ProcessStmt(const Stmt *currStmt, ExplodedNode *Pred) {
// Reclaim any unnecessary nodes in the ExplodedGraph.
G.reclaimRecentlyAllocatedNodes();
PrettyStackTraceLoc CrashInfo(getContext().getSourceManager(),
currStmt->getBeginLoc(),
"Error evaluating statement");
// Remove dead bindings and symbols.
ExplodedNodeSet CleanedStates;
if (shouldRemoveDeadBindings(AMgr, currStmt, Pred,
Pred->getLocationContext())) {
removeDead(Pred, CleanedStates, currStmt,
Pred->getLocationContext());
} else
CleanedStates.Add(Pred);
// Visit the statement.
ExplodedNodeSet Dst;
for (const auto I : CleanedStates) {
ExplodedNodeSet DstI;
// Visit the statement.
Visit(currStmt, I, DstI);
Dst.insert(DstI);
}
// Enqueue the new nodes onto the work list.
Engine.enqueue(Dst, currBldrCtx->getBlock(), currStmtIdx);
}
void ExprEngine::ProcessLoopExit(const Stmt* S, ExplodedNode *Pred) {
PrettyStackTraceLoc CrashInfo(getContext().getSourceManager(),
S->getBeginLoc(),
"Error evaluating end of the loop");
ExplodedNodeSet Dst;
Dst.Add(Pred);
NodeBuilder Bldr(Pred, Dst, *currBldrCtx);
ProgramStateRef NewState = Pred->getState();
if(AMgr.options.ShouldUnrollLoops)
NewState = processLoopEnd(S, NewState);
LoopExit PP(S, Pred->getLocationContext());
Bldr.generateNode(PP, NewState, Pred);
// Enqueue the new nodes onto the work list.
Engine.enqueue(Dst, currBldrCtx->getBlock(), currStmtIdx);
}
void ExprEngine::ProcessInitializer(const CFGInitializer CFGInit,
ExplodedNode *Pred) {
const CXXCtorInitializer *BMI = CFGInit.getInitializer();
const Expr *Init = BMI->getInit()->IgnoreImplicit();
const LocationContext *LC = Pred->getLocationContext();
PrettyStackTraceLoc CrashInfo(getContext().getSourceManager(),
BMI->getSourceLocation(),
"Error evaluating initializer");
// We don't clean up dead bindings here.
const auto *stackFrame = cast<StackFrameContext>(Pred->getLocationContext());
const auto *decl = cast<CXXConstructorDecl>(stackFrame->getDecl());
ProgramStateRef State = Pred->getState();
SVal thisVal = State->getSVal(svalBuilder.getCXXThis(decl, stackFrame));
ExplodedNodeSet Tmp;
SVal FieldLoc;
// Evaluate the initializer, if necessary
if (BMI->isAnyMemberInitializer()) {
// Constructors build the object directly in the field,
// but non-objects must be copied in from the initializer.
if (getObjectUnderConstruction(State, BMI, LC)) {
// The field was directly constructed, so there is no need to bind.
// But we still need to stop tracking the object under construction.
State = finishObjectConstruction(State, BMI, LC);
NodeBuilder Bldr(Pred, Tmp, *currBldrCtx);
PostStore PS(Init, LC, /*Loc*/ nullptr, /*tag*/ nullptr);
Bldr.generateNode(PS, State, Pred);
} else {
const ValueDecl *Field;
if (BMI->isIndirectMemberInitializer()) {
Field = BMI->getIndirectMember();
FieldLoc = State->getLValue(BMI->getIndirectMember(), thisVal);
} else {
Field = BMI->getMember();
FieldLoc = State->getLValue(BMI->getMember(), thisVal);
}
SVal InitVal;
if (Init->getType()->isArrayType()) {
// Handle arrays of trivial type. We can represent this with a
// primitive load/copy from the base array region.
const ArraySubscriptExpr *ASE;
while ((ASE = dyn_cast<ArraySubscriptExpr>(Init)))
Init = ASE->getBase()->IgnoreImplicit();
SVal LValue = State->getSVal(Init, stackFrame);
if (!Field->getType()->isReferenceType())
if (Optional<Loc> LValueLoc = LValue.getAs<Loc>())
InitVal = State->getSVal(*LValueLoc);
// If we fail to get the value for some reason, use a symbolic value.
if (InitVal.isUnknownOrUndef()) {
SValBuilder &SVB = getSValBuilder();
InitVal = SVB.conjureSymbolVal(BMI->getInit(), stackFrame,
Field->getType(),
currBldrCtx->blockCount());
}
} else {
InitVal = State->getSVal(BMI->getInit(), stackFrame);
}
PostInitializer PP(BMI, FieldLoc.getAsRegion(), stackFrame);
evalBind(Tmp, Init, Pred, FieldLoc, InitVal, /*isInit=*/true, &PP);
}
} else {
assert(BMI->isBaseInitializer() || BMI->isDelegatingInitializer());
Tmp.insert(Pred);
// We already did all the work when visiting the CXXConstructExpr.
}
// Construct PostInitializer nodes whether the state changed or not,
// so that the diagnostics don't get confused.
PostInitializer PP(BMI, FieldLoc.getAsRegion(), stackFrame);
ExplodedNodeSet Dst;
NodeBuilder Bldr(Tmp, Dst, *currBldrCtx);
for (const auto I : Tmp) {
ProgramStateRef State = I->getState();
Bldr.generateNode(PP, State, I);
}
// Enqueue the new nodes onto the work list.
Engine.enqueue(Dst, currBldrCtx->getBlock(), currStmtIdx);
}
void ExprEngine::ProcessImplicitDtor(const CFGImplicitDtor D,
ExplodedNode *Pred) {
ExplodedNodeSet Dst;
switch (D.getKind()) {
case CFGElement::AutomaticObjectDtor:
ProcessAutomaticObjDtor(D.castAs<CFGAutomaticObjDtor>(), Pred, Dst);
break;
case CFGElement::BaseDtor:
ProcessBaseDtor(D.castAs<CFGBaseDtor>(), Pred, Dst);
break;
case CFGElement::MemberDtor:
ProcessMemberDtor(D.castAs<CFGMemberDtor>(), Pred, Dst);
break;
case CFGElement::TemporaryDtor:
ProcessTemporaryDtor(D.castAs<CFGTemporaryDtor>(), Pred, Dst);
break;
case CFGElement::DeleteDtor:
ProcessDeleteDtor(D.castAs<CFGDeleteDtor>(), Pred, Dst);
break;
default:
llvm_unreachable("Unexpected dtor kind.");
}
// Enqueue the new nodes onto the work list.
Engine.enqueue(Dst, currBldrCtx->getBlock(), currStmtIdx);
}
void ExprEngine::ProcessNewAllocator(const CXXNewExpr *NE,
ExplodedNode *Pred) {
ExplodedNodeSet Dst;
AnalysisManager &AMgr = getAnalysisManager();
AnalyzerOptions &Opts = AMgr.options;
// TODO: We're not evaluating allocators for all cases just yet as
// we're not handling the return value correctly, which causes false
// positives when the alpha.cplusplus.NewDeleteLeaks check is on.
if (Opts.MayInlineCXXAllocator)
VisitCXXNewAllocatorCall(NE, Pred, Dst);
else {
NodeBuilder Bldr(Pred, Dst, *currBldrCtx);
const LocationContext *LCtx = Pred->getLocationContext();
PostImplicitCall PP(NE->getOperatorNew(), NE->getBeginLoc(), LCtx);
Bldr.generateNode(PP, Pred->getState(), Pred);
}
Engine.enqueue(Dst, currBldrCtx->getBlock(), currStmtIdx);
}
void ExprEngine::ProcessAutomaticObjDtor(const CFGAutomaticObjDtor Dtor,
ExplodedNode *Pred,
ExplodedNodeSet &Dst) {
const VarDecl *varDecl = Dtor.getVarDecl();
QualType varType = varDecl->getType();
ProgramStateRef state = Pred->getState();
SVal dest = state->getLValue(varDecl, Pred->getLocationContext());
const MemRegion *Region = dest.castAs<loc::MemRegionVal>().getRegion();
if (varType->isReferenceType()) {
const MemRegion *ValueRegion = state->getSVal(Region).getAsRegion();
if (!ValueRegion) {
// FIXME: This should not happen. The language guarantees a presence
// of a valid initializer here, so the reference shall not be undefined.
// It seems that we're calling destructors over variables that
// were not initialized yet.
return;
}
Region = ValueRegion->getBaseRegion();
varType = cast<TypedValueRegion>(Region)->getValueType();
}
// FIXME: We need to run the same destructor on every element of the array.
// This workaround will just run the first destructor (which will still
// invalidate the entire array).
EvalCallOptions CallOpts;
Region = makeElementRegion(state, loc::MemRegionVal(Region), varType,
CallOpts.IsArrayCtorOrDtor)
.getAsRegion();
VisitCXXDestructor(varType, Region, Dtor.getTriggerStmt(),
/*IsBase=*/false, Pred, Dst, CallOpts);
}
void ExprEngine::ProcessDeleteDtor(const CFGDeleteDtor Dtor,
ExplodedNode *Pred,
ExplodedNodeSet &Dst) {
ProgramStateRef State = Pred->getState();
const LocationContext *LCtx = Pred->getLocationContext();
const CXXDeleteExpr *DE = Dtor.getDeleteExpr();
const Stmt *Arg = DE->getArgument();
QualType DTy = DE->getDestroyedType();
SVal ArgVal = State->getSVal(Arg, LCtx);
// If the argument to delete is known to be a null value,
// don't run destructor.
if (State->isNull(ArgVal).isConstrainedTrue()) {
QualType BTy = getContext().getBaseElementType(DTy);
const CXXRecordDecl *RD = BTy->getAsCXXRecordDecl();
const CXXDestructorDecl *Dtor = RD->getDestructor();
PostImplicitCall PP(Dtor, DE->getBeginLoc(), LCtx);
NodeBuilder Bldr(Pred, Dst, *currBldrCtx);
Bldr.generateNode(PP, Pred->getState(), Pred);
return;
}
EvalCallOptions CallOpts;
const MemRegion *ArgR = ArgVal.getAsRegion();
if (DE->isArrayForm()) {
// FIXME: We need to run the same destructor on every element of the array.
// This workaround will just run the first destructor (which will still
// invalidate the entire array).
CallOpts.IsArrayCtorOrDtor = true;
// Yes, it may even be a multi-dimensional array.
while (const auto *AT = getContext().getAsArrayType(DTy))
DTy = AT->getElementType();
if (ArgR)
ArgR = getStoreManager().GetElementZeroRegion(cast<SubRegion>(ArgR), DTy);
}
VisitCXXDestructor(DTy, ArgR, DE, /*IsBase=*/false, Pred, Dst, CallOpts);
}
void ExprEngine::ProcessBaseDtor(const CFGBaseDtor D,
ExplodedNode *Pred, ExplodedNodeSet &Dst) {
const LocationContext *LCtx = Pred->getLocationContext();
const auto *CurDtor = cast<CXXDestructorDecl>(LCtx->getDecl());
Loc ThisPtr = getSValBuilder().getCXXThis(CurDtor,
LCtx->getStackFrame());
SVal ThisVal = Pred->getState()->getSVal(ThisPtr);
// Create the base object region.
const CXXBaseSpecifier *Base = D.getBaseSpecifier();
QualType BaseTy = Base->getType();
SVal BaseVal = getStoreManager().evalDerivedToBase(ThisVal, BaseTy,
Base->isVirtual());
EvalCallOptions CallOpts;
VisitCXXDestructor(BaseTy, BaseVal.getAsRegion(), CurDtor->getBody(),
/*IsBase=*/true, Pred, Dst, CallOpts);
}
void ExprEngine::ProcessMemberDtor(const CFGMemberDtor D,
ExplodedNode *Pred, ExplodedNodeSet &Dst) {
const FieldDecl *Member = D.getFieldDecl();
QualType T = Member->getType();
ProgramStateRef State = Pred->getState();
const LocationContext *LCtx = Pred->getLocationContext();
const auto *CurDtor = cast<CXXDestructorDecl>(LCtx->getDecl());
Loc ThisStorageLoc =
getSValBuilder().getCXXThis(CurDtor, LCtx->getStackFrame());
Loc ThisLoc = State->getSVal(ThisStorageLoc).castAs<Loc>();
SVal FieldVal = State->getLValue(Member, ThisLoc);
// FIXME: We need to run the same destructor on every element of the array.
// This workaround will just run the first destructor (which will still
// invalidate the entire array).
EvalCallOptions CallOpts;
FieldVal = makeElementRegion(State, FieldVal, T, CallOpts.IsArrayCtorOrDtor);
VisitCXXDestructor(T, FieldVal.getAsRegion(), CurDtor->getBody(),
/*IsBase=*/false, Pred, Dst, CallOpts);
}
void ExprEngine::ProcessTemporaryDtor(const CFGTemporaryDtor D,
ExplodedNode *Pred,
ExplodedNodeSet &Dst) {
const CXXBindTemporaryExpr *BTE = D.getBindTemporaryExpr();
ProgramStateRef State = Pred->getState();
const LocationContext *LC = Pred->getLocationContext();
const MemRegion *MR = nullptr;
if (Optional<SVal> V =
getObjectUnderConstruction(State, D.getBindTemporaryExpr(),
Pred->getLocationContext())) {
// FIXME: Currently we insert temporary destructors for default parameters,
// but we don't insert the constructors, so the entry in
// ObjectsUnderConstruction may be missing.
State = finishObjectConstruction(State, D.getBindTemporaryExpr(),
Pred->getLocationContext());
MR = V->getAsRegion();
}
// If copy elision has occurred, and the constructor corresponding to the
// destructor was elided, we need to skip the destructor as well.
if (isDestructorElided(State, BTE, LC)) {
State = cleanupElidedDestructor(State, BTE, LC);
NodeBuilder Bldr(Pred, Dst, *currBldrCtx);
PostImplicitCall PP(D.getDestructorDecl(getContext()),
D.getBindTemporaryExpr()->getBeginLoc(),
Pred->getLocationContext());
Bldr.generateNode(PP, State, Pred);
return;
}
ExplodedNodeSet CleanDtorState;
StmtNodeBuilder StmtBldr(Pred, CleanDtorState, *currBldrCtx);
StmtBldr.generateNode(D.getBindTemporaryExpr(), Pred, State);
QualType T = D.getBindTemporaryExpr()->getSubExpr()->getType();
// FIXME: Currently CleanDtorState can be empty here due to temporaries being
// bound to default parameters.
assert(CleanDtorState.size() <= 1);
ExplodedNode *CleanPred =
CleanDtorState.empty() ? Pred : *CleanDtorState.begin();
EvalCallOptions CallOpts;
CallOpts.IsTemporaryCtorOrDtor = true;
if (!MR) {
// If we have no MR, we still need to unwrap the array to avoid destroying
// the whole array at once. Regardless, we'd eventually need to model array
// destructors properly, element-by-element.
while (const ArrayType *AT = getContext().getAsArrayType(T)) {
T = AT->getElementType();
CallOpts.IsArrayCtorOrDtor = true;
}
} else {
// We'd eventually need to makeElementRegion() trick here,
// but for now we don't have the respective construction contexts,
// so MR would always be null in this case. Do nothing for now.
}
VisitCXXDestructor(T, MR, D.getBindTemporaryExpr(),
/*IsBase=*/false, CleanPred, Dst, CallOpts);
}
void ExprEngine::processCleanupTemporaryBranch(const CXXBindTemporaryExpr *BTE,
NodeBuilderContext &BldCtx,
ExplodedNode *Pred,
ExplodedNodeSet &Dst,
const CFGBlock *DstT,
const CFGBlock *DstF) {
BranchNodeBuilder TempDtorBuilder(Pred, Dst, BldCtx, DstT, DstF);
ProgramStateRef State = Pred->getState();
const LocationContext *LC = Pred->getLocationContext();
if (getObjectUnderConstruction(State, BTE, LC)) {
TempDtorBuilder.markInfeasible(false);
TempDtorBuilder.generateNode(State, true, Pred);
} else {
TempDtorBuilder.markInfeasible(true);
TempDtorBuilder.generateNode(State, false, Pred);
}
}
void ExprEngine::VisitCXXBindTemporaryExpr(const CXXBindTemporaryExpr *BTE,
ExplodedNodeSet &PreVisit,
ExplodedNodeSet &Dst) {
// This is a fallback solution in case we didn't have a construction
// context when we were constructing the temporary. Otherwise the map should
// have been populated there.
if (!getAnalysisManager().options.ShouldIncludeTemporaryDtorsInCFG) {
// In case we don't have temporary destructors in the CFG, do not mark
// the initialization - we would otherwise never clean it up.
Dst = PreVisit;
return;
}
StmtNodeBuilder StmtBldr(PreVisit, Dst, *currBldrCtx);
for (ExplodedNode *Node : PreVisit) {
ProgramStateRef State = Node->getState();
const LocationContext *LC = Node->getLocationContext();
if (!getObjectUnderConstruction(State, BTE, LC)) {
// FIXME: Currently the state might also already contain the marker due to
// incorrect handling of temporaries bound to default parameters; for
// those, we currently skip the CXXBindTemporaryExpr but rely on adding
// temporary destructor nodes.
State = addObjectUnderConstruction(State, BTE, LC, UnknownVal());
}
StmtBldr.generateNode(BTE, Node, State);
}
}
ProgramStateRef ExprEngine::escapeValues(ProgramStateRef State,
ArrayRef<SVal> Vs,
PointerEscapeKind K,
const CallEvent *Call) const {
class CollectReachableSymbolsCallback final : public SymbolVisitor {
InvalidatedSymbols &Symbols;
public:
explicit CollectReachableSymbolsCallback(InvalidatedSymbols &Symbols)
: Symbols(Symbols) {}
const InvalidatedSymbols &getSymbols() const { return Symbols; }
bool VisitSymbol(SymbolRef Sym) override {
Symbols.insert(Sym);
return true;
}
};
InvalidatedSymbols Symbols;
CollectReachableSymbolsCallback CallBack(Symbols);
for (SVal V : Vs)
State->scanReachableSymbols(V, CallBack);
return getCheckerManager().runCheckersForPointerEscape(
State, CallBack.getSymbols(), Call, K, nullptr);
}
void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
ExplodedNodeSet &DstTop) {
PrettyStackTraceLoc CrashInfo(getContext().getSourceManager(),
S->getBeginLoc(), "Error evaluating statement");
ExplodedNodeSet Dst;
StmtNodeBuilder Bldr(Pred, DstTop, *currBldrCtx);
assert(!isa<Expr>(S) || S == cast<Expr>(S)->IgnoreParens());
switch (S->getStmtClass()) {
// C++, OpenMP and ARC stuff we don't support yet.
case Stmt::CXXDependentScopeMemberExprClass:
case Stmt::CXXTryStmtClass:
case Stmt::CXXTypeidExprClass:
case Stmt::CXXUuidofExprClass:
case Stmt::CXXFoldExprClass:
case Stmt::MSPropertyRefExprClass:
case Stmt::MSPropertySubscriptExprClass:
case Stmt::CXXUnresolvedConstructExprClass:
case Stmt::DependentScopeDeclRefExprClass:
case Stmt::ArrayTypeTraitExprClass:
case Stmt::ExpressionTraitExprClass:
case Stmt::UnresolvedLookupExprClass:
case Stmt::UnresolvedMemberExprClass:
case Stmt::TypoExprClass:
case Stmt::RecoveryExprClass:
case Stmt::CXXNoexceptExprClass:
case Stmt::PackExpansionExprClass:
case Stmt::SubstNonTypeTemplateParmPackExprClass:
case Stmt::FunctionParmPackExprClass:
case Stmt::CoroutineBodyStmtClass:
case Stmt::CoawaitExprClass:
case Stmt::DependentCoawaitExprClass:
case Stmt::CoreturnStmtClass:
case Stmt::CoyieldExprClass:
case Stmt::SEHTryStmtClass:
case Stmt::SEHExceptStmtClass:
case Stmt::SEHLeaveStmtClass:
case Stmt::SEHFinallyStmtClass:
case Stmt::OMPCanonicalLoopClass:
case Stmt::OMPParallelDirectiveClass:
case Stmt::OMPSimdDirectiveClass:
case Stmt::OMPForDirectiveClass:
case Stmt::OMPForSimdDirectiveClass:
case Stmt::OMPSectionsDirectiveClass:
case Stmt::OMPSectionDirectiveClass:
case Stmt::OMPSingleDirectiveClass:
case Stmt::OMPMasterDirectiveClass:
case Stmt::OMPCriticalDirectiveClass:
case Stmt::OMPParallelForDirectiveClass:
case Stmt::OMPParallelForSimdDirectiveClass:
case Stmt::OMPParallelSectionsDirectiveClass:
case Stmt::OMPParallelMasterDirectiveClass:
case Stmt::OMPParallelMaskedDirectiveClass:
case Stmt::OMPTaskDirectiveClass:
case Stmt::OMPTaskyieldDirectiveClass:
case Stmt::OMPBarrierDirectiveClass:
case Stmt::OMPTaskwaitDirectiveClass:
case Stmt::OMPTaskgroupDirectiveClass:
case Stmt::OMPFlushDirectiveClass:
case Stmt::OMPDepobjDirectiveClass:
case Stmt::OMPScanDirectiveClass:
case Stmt::OMPOrderedDirectiveClass:
case Stmt::OMPAtomicDirectiveClass:
case Stmt::OMPTargetDirectiveClass:
case Stmt::OMPTargetDataDirectiveClass:
case Stmt::OMPTargetEnterDataDirectiveClass:
case Stmt::OMPTargetExitDataDirectiveClass:
case Stmt::OMPTargetParallelDirectiveClass:
case Stmt::OMPTargetParallelForDirectiveClass:
case Stmt::OMPTargetUpdateDirectiveClass:
case Stmt::OMPTeamsDirectiveClass:
case Stmt::OMPCancellationPointDirectiveClass:
case Stmt::OMPCancelDirectiveClass:
case Stmt::OMPTaskLoopDirectiveClass:
case Stmt::OMPTaskLoopSimdDirectiveClass:
case Stmt::OMPMasterTaskLoopDirectiveClass:
case Stmt::OMPMaskedTaskLoopDirectiveClass:
case Stmt::OMPMasterTaskLoopSimdDirectiveClass:
case Stmt::OMPMaskedTaskLoopSimdDirectiveClass:
case Stmt::OMPParallelMasterTaskLoopDirectiveClass:
case Stmt::OMPParallelMaskedTaskLoopDirectiveClass:
case Stmt::OMPParallelMasterTaskLoopSimdDirectiveClass:
case Stmt::OMPParallelMaskedTaskLoopSimdDirectiveClass:
case Stmt::OMPDistributeDirectiveClass:
case Stmt::OMPDistributeParallelForDirectiveClass:
case Stmt::OMPDistributeParallelForSimdDirectiveClass:
case Stmt::OMPDistributeSimdDirectiveClass:
case Stmt::OMPTargetParallelForSimdDirectiveClass:
case Stmt::OMPTargetSimdDirectiveClass:
case Stmt::OMPTeamsDistributeDirectiveClass:
case Stmt::OMPTeamsDistributeSimdDirectiveClass:
case Stmt::OMPTeamsDistributeParallelForSimdDirectiveClass:
case Stmt::OMPTeamsDistributeParallelForDirectiveClass:
case Stmt::OMPTargetTeamsDirectiveClass:
case Stmt::OMPTargetTeamsDistributeDirectiveClass:
case Stmt::OMPTargetTeamsDistributeParallelForDirectiveClass:
case Stmt::OMPTargetTeamsDistributeParallelForSimdDirectiveClass:
case Stmt::OMPTargetTeamsDistributeSimdDirectiveClass:
case Stmt::OMPTileDirectiveClass:
case Stmt::OMPInteropDirectiveClass:
case Stmt::OMPDispatchDirectiveClass:
case Stmt::OMPMaskedDirectiveClass:
case Stmt::OMPGenericLoopDirectiveClass:
case Stmt::OMPTeamsGenericLoopDirectiveClass:
case Stmt::OMPTargetTeamsGenericLoopDirectiveClass:
case Stmt::OMPParallelGenericLoopDirectiveClass:
case Stmt::OMPTargetParallelGenericLoopDirectiveClass:
case Stmt::CapturedStmtClass:
case Stmt::OMPUnrollDirectiveClass:
case Stmt::OMPMetaDirectiveClass: {
const ExplodedNode *node = Bldr.generateSink(S, Pred, Pred->getState());
Engine.addAbortedBlock(node, currBldrCtx->getBlock());
break;
}
case Stmt::ParenExprClass:
llvm_unreachable("ParenExprs already handled.");
case Stmt::GenericSelectionExprClass:
llvm_unreachable("GenericSelectionExprs already handled.");
// Cases that should never be evaluated simply because they shouldn't
// appear in the CFG.
case Stmt::BreakStmtClass:
case Stmt::CaseStmtClass:
case Stmt::CompoundStmtClass:
case Stmt::ContinueStmtClass:
case Stmt::CXXForRangeStmtClass:
case Stmt::DefaultStmtClass:
case Stmt::DoStmtClass:
case Stmt::ForStmtClass:
case Stmt::GotoStmtClass:
case Stmt::IfStmtClass:
case Stmt::IndirectGotoStmtClass:
case Stmt::LabelStmtClass:
case Stmt::NoStmtClass:
case Stmt::NullStmtClass:
case Stmt::SwitchStmtClass:
case Stmt::WhileStmtClass:
case Expr::MSDependentExistsStmtClass:
llvm_unreachable("Stmt should not be in analyzer evaluation loop");
case Stmt::ImplicitValueInitExprClass:
// These nodes are shared in the CFG and would case caching out.
// Moreover, no additional evaluation required for them, the
// analyzer can reconstruct these values from the AST.
llvm_unreachable("Should be pruned from CFG");
case Stmt::ObjCSubscriptRefExprClass:
case Stmt::ObjCPropertyRefExprClass:
llvm_unreachable("These are handled by PseudoObjectExpr");
case Stmt::GNUNullExprClass: {
// GNU __null is a pointer-width integer, not an actual pointer.
ProgramStateRef state = Pred->getState();
state = state->BindExpr(
S, Pred->getLocationContext(),
svalBuilder.makeIntValWithWidth(getContext().VoidPtrTy, 0));
Bldr.generateNode(S, Pred, state);
break;
}
case Stmt::ObjCAtSynchronizedStmtClass:
Bldr.takeNodes(Pred);
VisitObjCAtSynchronizedStmt(cast<ObjCAtSynchronizedStmt>(S), Pred, Dst);
Bldr.addNodes(Dst);
break;
case Expr::ConstantExprClass:
case Stmt::ExprWithCleanupsClass:
// Handled due to fully linearised CFG.
break;
case Stmt::CXXBindTemporaryExprClass: {
Bldr.takeNodes(Pred);
ExplodedNodeSet PreVisit;
getCheckerManager().runCheckersForPreStmt(PreVisit, Pred, S, *this);
ExplodedNodeSet Next;
VisitCXXBindTemporaryExpr(cast<CXXBindTemporaryExpr>(S), PreVisit, Next);
getCheckerManager().runCheckersForPostStmt(Dst, Next, S, *this);
Bldr.addNodes(Dst);
break;
}
case Stmt::ArrayInitLoopExprClass:
Bldr.takeNodes(Pred);
VisitArrayInitLoopExpr(cast<ArrayInitLoopExpr>(S), Pred, Dst);
Bldr.addNodes(Dst);
break;
// Cases not handled yet; but will handle some day.
case Stmt::DesignatedInitExprClass:
case Stmt::DesignatedInitUpdateExprClass:
case Stmt::ArrayInitIndexExprClass:
case Stmt::ExtVectorElementExprClass:
case Stmt::ImaginaryLiteralClass:
case Stmt::ObjCAtCatchStmtClass:
case Stmt::ObjCAtFinallyStmtClass:
case Stmt::ObjCAtTryStmtClass:
case Stmt::ObjCAutoreleasePoolStmtClass:
case Stmt::ObjCEncodeExprClass:
case Stmt::ObjCIsaExprClass:
case Stmt::ObjCProtocolExprClass:
case Stmt::ObjCSelectorExprClass:
case Stmt::ParenListExprClass:
case Stmt::ShuffleVectorExprClass:
case Stmt::ConvertVectorExprClass:
case Stmt::VAArgExprClass:
case Stmt::CUDAKernelCallExprClass:
case Stmt::OpaqueValueExprClass:
case Stmt::AsTypeExprClass:
case Stmt::ConceptSpecializationExprClass:
case Stmt::CXXRewrittenBinaryOperatorClass:
case Stmt::RequiresExprClass:
// Fall through.
// Cases we intentionally don't evaluate, since they don't need
// to be explicitly evaluated.
case Stmt::PredefinedExprClass:
case Stmt::AddrLabelExprClass:
case Stmt::AttributedStmtClass:
case Stmt::IntegerLiteralClass:
case Stmt::FixedPointLiteralClass:
case Stmt::CharacterLiteralClass:
case Stmt::CXXScalarValueInitExprClass:
case Stmt::CXXBoolLiteralExprClass:
case Stmt::ObjCBoolLiteralExprClass:
case Stmt::ObjCAvailabilityCheckExprClass:
case Stmt::FloatingLiteralClass:
case Stmt::NoInitExprClass:
case Stmt::SizeOfPackExprClass:
case Stmt::StringLiteralClass:
case Stmt::SourceLocExprClass:
case Stmt::ObjCStringLiteralClass:
case Stmt::CXXPseudoDestructorExprClass:
case Stmt::SubstNonTypeTemplateParmExprClass:
case Stmt::CXXNullPtrLiteralExprClass:
case Stmt::OMPArraySectionExprClass:
case Stmt::OMPArrayShapingExprClass:
case Stmt::OMPIteratorExprClass:
case Stmt::SYCLUniqueStableNameExprClass:
case Stmt::TypeTraitExprClass: {
Bldr.takeNodes(Pred);
ExplodedNodeSet preVisit;
getCheckerManager().runCheckersForPreStmt(preVisit, Pred, S, *this);
getCheckerManager().runCheckersForPostStmt(Dst, preVisit, S, *this);
Bldr.addNodes(Dst);
break;
}
case Stmt::CXXDefaultArgExprClass:
case Stmt::CXXDefaultInitExprClass: {
Bldr.takeNodes(Pred);
ExplodedNodeSet PreVisit;
getCheckerManager().runCheckersForPreStmt(PreVisit, Pred, S, *this);
ExplodedNodeSet Tmp;
StmtNodeBuilder Bldr2(PreVisit, Tmp, *currBldrCtx);
const Expr *ArgE;
if (const auto *DefE = dyn_cast<CXXDefaultArgExpr>(S))
ArgE = DefE->getExpr();
else if (const auto *DefE = dyn_cast<CXXDefaultInitExpr>(S))
ArgE = DefE->getExpr();
else
llvm_unreachable("unknown constant wrapper kind");
bool IsTemporary = false;
if (const auto *MTE = dyn_cast<MaterializeTemporaryExpr>(ArgE)) {
ArgE = MTE->getSubExpr();
IsTemporary = true;
}
Optional<SVal> ConstantVal = svalBuilder.getConstantVal(ArgE);
if (!ConstantVal)
ConstantVal = UnknownVal();
const LocationContext *LCtx = Pred->getLocationContext();
for (const auto I : PreVisit) {
ProgramStateRef State = I->getState();
State = State->BindExpr(S, LCtx, *ConstantVal);
if (IsTemporary)
State = createTemporaryRegionIfNeeded(State, LCtx,
cast<Expr>(S),
cast<Expr>(S));
Bldr2.generateNode(S, I, State);
}
getCheckerManager().runCheckersForPostStmt(Dst, Tmp, S, *this);
Bldr.addNodes(Dst);
break;
}
// Cases we evaluate as opaque expressions, conjuring a symbol.
case Stmt::CXXStdInitializerListExprClass:
case Expr::ObjCArrayLiteralClass:
case Expr::ObjCDictionaryLiteralClass:
case Expr::ObjCBoxedExprClass: {
Bldr.takeNodes(Pred);
ExplodedNodeSet preVisit;
getCheckerManager().runCheckersForPreStmt(preVisit, Pred, S, *this);
ExplodedNodeSet Tmp;
StmtNodeBuilder Bldr2(preVisit, Tmp, *currBldrCtx);
const auto *Ex = cast<Expr>(S);
QualType resultType = Ex->getType();
for (const auto N : preVisit) {
const LocationContext *LCtx = N->getLocationContext();
SVal result = svalBuilder.conjureSymbolVal(nullptr, Ex, LCtx,
resultType,
currBldrCtx->blockCount());
ProgramStateRef State = N->getState()->BindExpr(Ex, LCtx, result);
// Escape pointers passed into the list, unless it's an ObjC boxed
// expression which is not a boxable C structure.
if (!(isa<ObjCBoxedExpr>(Ex) &&
!cast<ObjCBoxedExpr>(Ex)->getSubExpr()
->getType()->isRecordType()))
for (auto Child : Ex->children()) {
assert(Child);
SVal Val = State->getSVal(Child, LCtx);
State = escapeValues(State, Val, PSK_EscapeOther);
}
Bldr2.generateNode(S, N, State);
}
getCheckerManager().runCheckersForPostStmt(Dst, Tmp, S, *this);
Bldr.addNodes(Dst);
break;
}
case Stmt::ArraySubscriptExprClass:
Bldr.takeNodes(Pred);
VisitArraySubscriptExpr(cast<ArraySubscriptExpr>(S), Pred, Dst);
Bldr.addNodes(Dst);
break;
case Stmt::MatrixSubscriptExprClass:
llvm_unreachable("Support for MatrixSubscriptExpr is not implemented.");
break;
case Stmt::GCCAsmStmtClass:
Bldr.takeNodes(Pred);
VisitGCCAsmStmt(cast<GCCAsmStmt>(S), Pred, Dst);
Bldr.addNodes(Dst);
break;
case Stmt::MSAsmStmtClass:
Bldr.takeNodes(Pred);
VisitMSAsmStmt(cast<MSAsmStmt>(S), Pred, Dst);
Bldr.addNodes(Dst);
break;
case Stmt::BlockExprClass:
Bldr.takeNodes(Pred);
VisitBlockExpr(cast<BlockExpr>(S), Pred, Dst);
Bldr.addNodes(Dst);
break;
case Stmt::LambdaExprClass:
if (AMgr.options.ShouldInlineLambdas) {
Bldr.takeNodes(Pred);
VisitLambdaExpr(cast<LambdaExpr>(S), Pred, Dst);
Bldr.addNodes(Dst);
} else {
const ExplodedNode *node = Bldr.generateSink(S, Pred, Pred->getState());
Engine.addAbortedBlock(node, currBldrCtx->getBlock());
}
break;
case Stmt::BinaryOperatorClass: {
const auto *B = cast<BinaryOperator>(S);
if (B->isLogicalOp()) {
Bldr.takeNodes(Pred);
VisitLogicalExpr(B, Pred, Dst);
Bldr.addNodes(Dst);
break;
}
else if (B->getOpcode() == BO_Comma) {
ProgramStateRef state = Pred->getState();
Bldr.generateNode(B, Pred,
state->BindExpr(B, Pred->getLocationContext(),
state->getSVal(B->getRHS(),
Pred->getLocationContext())));
break;
}
Bldr.takeNodes(Pred);
if (AMgr.options.ShouldEagerlyAssume &&
(B->isRelationalOp() || B->isEqualityOp())) {
ExplodedNodeSet Tmp;
VisitBinaryOperator(cast<BinaryOperator>(S), Pred, Tmp);
evalEagerlyAssumeBinOpBifurcation(Dst, Tmp, cast<Expr>(S));
}
else
VisitBinaryOperator(cast<BinaryOperator>(S), Pred, Dst);
Bldr.addNodes(Dst);
break;
}
case Stmt::CXXOperatorCallExprClass: {
const auto *OCE = cast<CXXOperatorCallExpr>(S);
// For instance method operators, make sure the 'this' argument has a
// valid region.
const Decl *Callee = OCE->getCalleeDecl();
if (const auto *MD = dyn_cast_or_null<CXXMethodDecl>(Callee)) {
if (MD->isInstance()) {
ProgramStateRef State = Pred->getState();
const LocationContext *LCtx = Pred->getLocationContext();
ProgramStateRef NewState =
createTemporaryRegionIfNeeded(State, LCtx, OCE->getArg(0));
if (NewState != State) {
Pred = Bldr.generateNode(OCE, Pred, NewState, /*tag=*/nullptr,
ProgramPoint::PreStmtKind);
// Did we cache out?
if (!Pred)
break;
}
}
}
// FALLTHROUGH
LLVM_FALLTHROUGH;
}
case Stmt::CallExprClass:
case Stmt::CXXMemberCallExprClass:
case Stmt::UserDefinedLiteralClass:
Bldr.takeNodes(Pred);
VisitCallExpr(cast<CallExpr>(S), Pred, Dst);
Bldr.addNodes(Dst);
break;
case Stmt::CXXCatchStmtClass:
Bldr.takeNodes(Pred);
VisitCXXCatchStmt(cast<CXXCatchStmt>(S), Pred, Dst);
Bldr.addNodes(Dst);
break;
case Stmt::CXXTemporaryObjectExprClass:
case Stmt::CXXConstructExprClass:
Bldr.takeNodes(Pred);
VisitCXXConstructExpr(cast<CXXConstructExpr>(S), Pred, Dst);
Bldr.addNodes(Dst);
break;
case Stmt::CXXInheritedCtorInitExprClass:
Bldr.takeNodes(Pred);
VisitCXXInheritedCtorInitExpr(cast<CXXInheritedCtorInitExpr>(S), Pred,
Dst);
Bldr.addNodes(Dst);
break;
case Stmt::CXXNewExprClass: {
Bldr.takeNodes(Pred);
ExplodedNodeSet PreVisit;
getCheckerManager().runCheckersForPreStmt(PreVisit, Pred, S, *this);
ExplodedNodeSet PostVisit;
for (const auto i : PreVisit)
VisitCXXNewExpr(cast<CXXNewExpr>(S), i, PostVisit);
getCheckerManager().runCheckersForPostStmt(Dst, PostVisit, S, *this);
Bldr.addNodes(Dst);
break;
}
case Stmt::CXXDeleteExprClass: {
Bldr.takeNodes(Pred);
ExplodedNodeSet PreVisit;
const auto *CDE = cast<CXXDeleteExpr>(S);
getCheckerManager().runCheckersForPreStmt(PreVisit, Pred, S, *this);
ExplodedNodeSet PostVisit;
getCheckerManager().runCheckersForPostStmt(PostVisit, PreVisit, S, *this);
for (const auto i : PostVisit)
VisitCXXDeleteExpr(CDE, i, Dst);
Bldr.addNodes(Dst);
break;
}
// FIXME: ChooseExpr is really a constant. We need to fix
// the CFG do not model them as explicit control-flow.
case Stmt::ChooseExprClass: { // __builtin_choose_expr
Bldr.takeNodes(Pred);
const auto *C = cast<ChooseExpr>(S);
VisitGuardedExpr(C, C->getLHS(), C->getRHS(), Pred, Dst);
Bldr.addNodes(Dst);
break;
}
case Stmt::CompoundAssignOperatorClass:
Bldr.takeNodes(Pred);
VisitBinaryOperator(cast<BinaryOperator>(S), Pred, Dst);
Bldr.addNodes(Dst);
break;
case Stmt::CompoundLiteralExprClass:
Bldr.takeNodes(Pred);
VisitCompoundLiteralExpr(cast<CompoundLiteralExpr>(S), Pred, Dst);
Bldr.addNodes(Dst);
break;
case Stmt::BinaryConditionalOperatorClass:
case Stmt::ConditionalOperatorClass: { // '?' operator
Bldr.takeNodes(Pred);
const auto *C = cast<AbstractConditionalOperator>(S);
VisitGuardedExpr(C, C->getTrueExpr(), C->getFalseExpr(), Pred, Dst);
Bldr.addNodes(Dst);
break;
}
case Stmt::CXXThisExprClass:
Bldr.takeNodes(Pred);
VisitCXXThisExpr(cast<CXXThisExpr>(S), Pred, Dst);
Bldr.addNodes(Dst);
break;
case Stmt::DeclRefExprClass: {
Bldr.takeNodes(Pred);
const auto *DE = cast<DeclRefExpr>(S);
VisitCommonDeclRefExpr(DE, DE->getDecl(), Pred, Dst);
Bldr.addNodes(Dst);
break;
}
case Stmt::DeclStmtClass:
Bldr.takeNodes(Pred);
VisitDeclStmt(cast<DeclStmt>(S), Pred, Dst);
Bldr.addNodes(Dst);
break;
case Stmt::ImplicitCastExprClass:
case Stmt::CStyleCastExprClass:
case Stmt::CXXStaticCastExprClass:
case Stmt::CXXDynamicCastExprClass:
case Stmt::CXXReinterpretCastExprClass:
case Stmt::CXXConstCastExprClass:
case Stmt::CXXFunctionalCastExprClass:
case Stmt::BuiltinBitCastExprClass:
case Stmt::ObjCBridgedCastExprClass:
case Stmt::CXXAddrspaceCastExprClass: {
Bldr.takeNodes(Pred);
const auto *C = cast<CastExpr>(S);
ExplodedNodeSet dstExpr;
VisitCast(C, C->getSubExpr(), Pred, dstExpr);
// Handle the postvisit checks.
getCheckerManager().runCheckersForPostStmt(Dst, dstExpr, C, *this);
Bldr.addNodes(Dst);
break;
}
case Expr::MaterializeTemporaryExprClass: {
Bldr.takeNodes(Pred);
const auto *MTE = cast<MaterializeTemporaryExpr>(S);
ExplodedNodeSet dstPrevisit;
getCheckerManager().runCheckersForPreStmt(dstPrevisit, Pred, MTE, *this);
ExplodedNodeSet dstExpr;
for (const auto i : dstPrevisit)
CreateCXXTemporaryObject(MTE, i, dstExpr);
getCheckerManager().runCheckersForPostStmt(Dst, dstExpr, MTE, *this);
Bldr.addNodes(Dst);
break;
}
case Stmt::InitListExprClass:
Bldr.takeNodes(Pred);
VisitInitListExpr(cast<InitListExpr>(S), Pred, Dst);
Bldr.addNodes(Dst);
break;
case Stmt::MemberExprClass:
Bldr.takeNodes(Pred);
VisitMemberExpr(cast<MemberExpr>(S), Pred, Dst);
Bldr.addNodes(Dst);
break;
case Stmt::AtomicExprClass:
Bldr.takeNodes(Pred);
VisitAtomicExpr(cast<AtomicExpr>(S), Pred, Dst);
Bldr.addNodes(Dst);
break;
case Stmt::ObjCIvarRefExprClass:
Bldr.takeNodes(Pred);
VisitLvalObjCIvarRefExpr(cast<ObjCIvarRefExpr>(S), Pred, Dst);
Bldr.addNodes(Dst);
break;
case Stmt::ObjCForCollectionStmtClass:
Bldr.takeNodes(Pred);
VisitObjCForCollectionStmt(cast<ObjCForCollectionStmt>(S), Pred, Dst);
Bldr.addNodes(Dst);
break;
case Stmt::ObjCMessageExprClass:
Bldr.takeNodes(Pred);
VisitObjCMessage(cast<ObjCMessageExpr>(S), Pred, Dst);
Bldr.addNodes(Dst);
break;
case Stmt::ObjCAtThrowStmtClass:
case Stmt::CXXThrowExprClass:
// FIXME: This is not complete. We basically treat @throw as
// an abort.
Bldr.generateSink(S, Pred, Pred->getState());
break;
case Stmt::ReturnStmtClass:
Bldr.takeNodes(Pred);
VisitReturnStmt(cast<ReturnStmt>(S), Pred, Dst);
Bldr.addNodes(Dst);
break;
case Stmt::OffsetOfExprClass: {
Bldr.takeNodes(Pred);
ExplodedNodeSet PreVisit;
getCheckerManager().runCheckersForPreStmt(PreVisit, Pred, S, *this);
ExplodedNodeSet PostVisit;
for (const auto Node : PreVisit)
VisitOffsetOfExpr(cast<OffsetOfExpr>(S), Node, PostVisit);
getCheckerManager().runCheckersForPostStmt(Dst, PostVisit, S, *this);
Bldr.addNodes(Dst);
break;
}
case Stmt::UnaryExprOrTypeTraitExprClass:
Bldr.takeNodes(Pred);
VisitUnaryExprOrTypeTraitExpr(cast<UnaryExprOrTypeTraitExpr>(S),
Pred, Dst);
Bldr.addNodes(Dst);
break;
case Stmt::StmtExprClass: {
const auto *SE = cast<StmtExpr>(S);
if (SE->getSubStmt()->body_empty()) {
// Empty statement expression.
assert(SE->getType() == getContext().VoidTy
&& "Empty statement expression must have void type.");
break;
}
if (const auto *LastExpr =
dyn_cast<Expr>(*SE->getSubStmt()->body_rbegin())) {
ProgramStateRef state = Pred->getState();
Bldr.generateNode(SE, Pred,
state->BindExpr(SE, Pred->getLocationContext(),
state->getSVal(LastExpr,
Pred->getLocationContext())));
}
break;
}
case Stmt::UnaryOperatorClass: {
Bldr.takeNodes(Pred);
const auto *U = cast<UnaryOperator>(S);
if (AMgr.options.ShouldEagerlyAssume && (U->getOpcode() == UO_LNot)) {
ExplodedNodeSet Tmp;
VisitUnaryOperator(U, Pred, Tmp);
evalEagerlyAssumeBinOpBifurcation(Dst, Tmp, U);
}
else
VisitUnaryOperator(U, Pred, Dst);
Bldr.addNodes(Dst);
break;
}
case Stmt::PseudoObjectExprClass: {
Bldr.takeNodes(Pred);
ProgramStateRef state = Pred->getState();
const auto *PE = cast<PseudoObjectExpr>(S);
if (const Expr *Result = PE->getResultExpr()) {
SVal V = state->getSVal(Result, Pred->getLocationContext());
Bldr.generateNode(S, Pred,
state->BindExpr(S, Pred->getLocationContext(), V));
}
else
Bldr.generateNode(S, Pred,
state->BindExpr(S, Pred->getLocationContext(),
UnknownVal()));
Bldr.addNodes(Dst);
break;
}
case Expr::ObjCIndirectCopyRestoreExprClass: {
// ObjCIndirectCopyRestoreExpr implies passing a temporary for
// correctness of lifetime management. Due to limited analysis
// of ARC, this is implemented as direct arg passing.
Bldr.takeNodes(Pred);
ProgramStateRef state = Pred->getState();
const auto *OIE = cast<ObjCIndirectCopyRestoreExpr>(S);
const Expr *E = OIE->getSubExpr();
SVal V = state->getSVal(E, Pred->getLocationContext());
Bldr.generateNode(S, Pred,
state->BindExpr(S, Pred->getLocationContext(), V));
Bldr.addNodes(Dst);
break;
}
}
}
bool ExprEngine::replayWithoutInlining(ExplodedNode *N,
const LocationContext *CalleeLC) {
const StackFrameContext *CalleeSF = CalleeLC->getStackFrame();
const StackFrameContext *CallerSF = CalleeSF->getParent()->getStackFrame();
assert(CalleeSF && CallerSF);
ExplodedNode *BeforeProcessingCall = nullptr;
const Stmt *CE = CalleeSF->getCallSite();
// Find the first node before we started processing the call expression.
while (N) {
ProgramPoint L = N->getLocation();
BeforeProcessingCall = N;
N = N->pred_empty() ? nullptr : *(N->pred_begin());
// Skip the nodes corresponding to the inlined code.
if (L.getStackFrame() != CallerSF)
continue;
// We reached the caller. Find the node right before we started
// processing the call.
if (L.isPurgeKind())
continue;
if (L.getAs<PreImplicitCall>())
continue;
if (L.getAs<CallEnter>())
continue;
if (Optional<StmtPoint> SP = L.getAs<StmtPoint>())
if (SP->getStmt() == CE)
continue;
break;
}
if (!BeforeProcessingCall)
return false;
// TODO: Clean up the unneeded nodes.
// Build an Epsilon node from which we will restart the analyzes.
// Note that CE is permitted to be NULL!
ProgramPoint NewNodeLoc =
EpsilonPoint(BeforeProcessingCall->getLocationContext(), CE);
// Add the special flag to GDM to signal retrying with no inlining.
// Note, changing the state ensures that we are not going to cache out.
ProgramStateRef NewNodeState = BeforeProcessingCall->getState();
NewNodeState =
NewNodeState->set<ReplayWithoutInlining>(const_cast<Stmt *>(CE));
// Make the new node a successor of BeforeProcessingCall.
bool IsNew = false;
ExplodedNode *NewNode = G.getNode(NewNodeLoc, NewNodeState, false, &IsNew);
// We cached out at this point. Caching out is common due to us backtracking
// from the inlined function, which might spawn several paths.
if (!IsNew)
return true;
NewNode->addPredecessor(BeforeProcessingCall, G);
// Add the new node to the work list.
Engine.enqueueStmtNode(NewNode, CalleeSF->getCallSiteBlock(),
CalleeSF->getIndex());
NumTimesRetriedWithoutInlining++;
return true;
}
/// Block entrance. (Update counters).
void ExprEngine::processCFGBlockEntrance(const BlockEdge &L,
NodeBuilderWithSinks &nodeBuilder,
ExplodedNode *Pred) {
PrettyStackTraceLocationContext CrashInfo(Pred->getLocationContext());
// If we reach a loop which has a known bound (and meets
// other constraints) then consider completely unrolling it.
if(AMgr.options.ShouldUnrollLoops) {
unsigned maxBlockVisitOnPath = AMgr.options.maxBlockVisitOnPath;
const Stmt *Term = nodeBuilder.getContext().getBlock()->getTerminatorStmt();
if (Term) {
ProgramStateRef NewState = updateLoopStack(Term, AMgr.getASTContext(),
Pred, maxBlockVisitOnPath);
if (NewState != Pred->getState()) {
ExplodedNode *UpdatedNode = nodeBuilder.generateNode(NewState, Pred);
if (!UpdatedNode)
return;
Pred = UpdatedNode;
}
}
// Is we are inside an unrolled loop then no need the check the counters.
if(isUnrolledState(Pred->getState()))
return;
}
// If this block is terminated by a loop and it has already been visited the
// maximum number of times, widen the loop.
unsigned int BlockCount = nodeBuilder.getContext().blockCount();
if (BlockCount == AMgr.options.maxBlockVisitOnPath - 1 &&
AMgr.options.ShouldWidenLoops) {
const Stmt *Term = nodeBuilder.getContext().getBlock()->getTerminatorStmt();
if (!isa_and_nonnull<ForStmt, WhileStmt, DoStmt>(Term))
return;
// Widen.
const LocationContext *LCtx = Pred->getLocationContext();
ProgramStateRef WidenedState =
getWidenedLoopState(Pred->getState(), LCtx, BlockCount, Term);
nodeBuilder.generateNode(WidenedState, Pred);
return;
}
// FIXME: Refactor this into a checker.
if (BlockCount >= AMgr.options.maxBlockVisitOnPath) {
static SimpleProgramPointTag tag(TagProviderName, "Block count exceeded");
const ExplodedNode *Sink =
nodeBuilder.generateSink(Pred->getState(), Pred, &tag);
// Check if we stopped at the top level function or not.
// Root node should have the location context of the top most function.
const LocationContext *CalleeLC = Pred->getLocation().getLocationContext();
const LocationContext *CalleeSF = CalleeLC->getStackFrame();
const LocationContext *RootLC =
(*G.roots_begin())->getLocation().getLocationContext();
if (RootLC->getStackFrame() != CalleeSF) {
Engine.FunctionSummaries->markReachedMaxBlockCount(CalleeSF->getDecl());
// Re-run the call evaluation without inlining it, by storing the
// no-inlining policy in the state and enqueuing the new work item on
// the list. Replay should almost never fail. Use the stats to catch it
// if it does.
if ((!AMgr.options.NoRetryExhausted &&
replayWithoutInlining(Pred, CalleeLC)))
return;
NumMaxBlockCountReachedInInlined++;
} else
NumMaxBlockCountReached++;
// Make sink nodes as exhausted(for stats) only if retry failed.
Engine.blocksExhausted.push_back(std::make_pair(L, Sink));
}
}
//===----------------------------------------------------------------------===//
// Branch processing.
//===----------------------------------------------------------------------===//
/// RecoverCastedSymbol - A helper function for ProcessBranch that is used
/// to try to recover some path-sensitivity for casts of symbolic
/// integers that promote their values (which are currently not tracked well).
/// This function returns the SVal bound to Condition->IgnoreCasts if all the
// cast(s) did was sign-extend the original value.
static SVal RecoverCastedSymbol(ProgramStateRef state,
const Stmt *Condition,
const LocationContext *LCtx,
ASTContext &Ctx) {
const auto *Ex = dyn_cast<Expr>(Condition);
if (!Ex)
return UnknownVal();
uint64_t bits = 0;
bool bitsInit = false;
while (const auto *CE = dyn_cast<CastExpr>(Ex)) {
QualType T = CE->getType();
if (!T->isIntegralOrEnumerationType())
return UnknownVal();
uint64_t newBits = Ctx.getTypeSize(T);
if (!bitsInit || newBits < bits) {
bitsInit = true;
bits = newBits;
}
Ex = CE->getSubExpr();
}
// We reached a non-cast. Is it a symbolic value?
QualType T = Ex->getType();
if (!bitsInit || !T->isIntegralOrEnumerationType() ||
Ctx.getTypeSize(T) > bits)
return UnknownVal();
return state->getSVal(Ex, LCtx);
}
#ifndef NDEBUG
static const Stmt *getRightmostLeaf(const Stmt *Condition) {
while (Condition) {
const auto *BO = dyn_cast<BinaryOperator>(Condition);
if (!BO || !BO->isLogicalOp()) {
return Condition;
}
Condition = BO->getRHS()->IgnoreParens();
}
return nullptr;
}
#endif
// Returns the condition the branch at the end of 'B' depends on and whose value
// has been evaluated within 'B'.
// In most cases, the terminator condition of 'B' will be evaluated fully in
// the last statement of 'B'; in those cases, the resolved condition is the
// given 'Condition'.
// If the condition of the branch is a logical binary operator tree, the CFG is
// optimized: in that case, we know that the expression formed by all but the
// rightmost leaf of the logical binary operator tree must be true, and thus
// the branch condition is at this point equivalent to the truth value of that
// rightmost leaf; the CFG block thus only evaluates this rightmost leaf
// expression in its final statement. As the full condition in that case was
// not evaluated, and is thus not in the SVal cache, we need to use that leaf
// expression to evaluate the truth value of the condition in the current state
// space.
static const Stmt *ResolveCondition(const Stmt *Condition,
const CFGBlock *B) {
if (const auto *Ex = dyn_cast<Expr>(Condition))
Condition = Ex->IgnoreParens();
const auto *BO = dyn_cast<BinaryOperator>(Condition);
if (!BO || !BO->isLogicalOp())
return Condition;
assert(B->getTerminator().isStmtBranch() &&
"Other kinds of branches are handled separately!");
// For logical operations, we still have the case where some branches
// use the traditional "merge" approach and others sink the branch
// directly into the basic blocks representing the logical operation.
// We need to distinguish between those two cases here.
// The invariants are still shifting, but it is possible that the
// last element in a CFGBlock is not a CFGStmt. Look for the last
// CFGStmt as the value of the condition.
CFGBlock::const_reverse_iterator I = B->rbegin(), E = B->rend();
for (; I != E; ++I) {
CFGElement Elem = *I;
Optional<CFGStmt> CS = Elem.getAs<CFGStmt>();
if (!CS)
continue;
const Stmt *LastStmt = CS->getStmt();
assert(LastStmt == Condition || LastStmt == getRightmostLeaf(Condition));
return LastStmt;
}
llvm_unreachable("could not resolve condition");
}
using ObjCForLctxPair =
std::pair<const ObjCForCollectionStmt *, const LocationContext *>;
REGISTER_MAP_WITH_PROGRAMSTATE(ObjCForHasMoreIterations, ObjCForLctxPair, bool)
ProgramStateRef ExprEngine::setWhetherHasMoreIteration(
ProgramStateRef State, const ObjCForCollectionStmt *O,
const LocationContext *LC, bool HasMoreIteraton) {
assert(!State->contains<ObjCForHasMoreIterations>({O, LC}));
return State->set<ObjCForHasMoreIterations>({O, LC}, HasMoreIteraton);
}
ProgramStateRef
ExprEngine::removeIterationState(ProgramStateRef State,
const ObjCForCollectionStmt *O,
const LocationContext *LC) {
assert(State->contains<ObjCForHasMoreIterations>({O, LC}));
return State->remove<ObjCForHasMoreIterations>({O, LC});
}
bool ExprEngine::hasMoreIteration(ProgramStateRef State,
const ObjCForCollectionStmt *O,
const LocationContext *LC) {
assert(State->contains<ObjCForHasMoreIterations>({O, LC}));
return *State->get<ObjCForHasMoreIterations>({O, LC});
}
/// Split the state on whether there are any more iterations left for this loop.
/// Returns a (HasMoreIteration, HasNoMoreIteration) pair, or None when the
/// acquisition of the loop condition value failed.
static Optional<std::pair<ProgramStateRef, ProgramStateRef>>
assumeCondition(const Stmt *Condition, ExplodedNode *N) {
ProgramStateRef State = N->getState();
if (const auto *ObjCFor = dyn_cast<ObjCForCollectionStmt>(Condition)) {
bool HasMoreIteraton =
ExprEngine::hasMoreIteration(State, ObjCFor, N->getLocationContext());
// Checkers have already ran on branch conditions, so the current
// information as to whether the loop has more iteration becomes outdated
// after this point.
State = ExprEngine::removeIterationState(State, ObjCFor,
N->getLocationContext());
if (HasMoreIteraton)
return std::pair<ProgramStateRef, ProgramStateRef>{State, nullptr};
else
return std::pair<ProgramStateRef, ProgramStateRef>{nullptr, State};
}
SVal X = State->getSVal(Condition, N->getLocationContext());
if (X.isUnknownOrUndef()) {
// Give it a chance to recover from unknown.
if (const auto *Ex = dyn_cast<Expr>(Condition)) {
if (Ex->getType()->isIntegralOrEnumerationType()) {
// Try to recover some path-sensitivity. Right now casts of symbolic
// integers that promote their values are currently not tracked well.
// If 'Condition' is such an expression, try and recover the
// underlying value and use that instead.
SVal recovered =
RecoverCastedSymbol(State, Condition, N->getLocationContext(),
N->getState()->getStateManager().getContext());
if (!recovered.isUnknown()) {
X = recovered;
}
}
}
}
// If the condition is still unknown, give up.
if (X.isUnknownOrUndef())
return None;
DefinedSVal V = X.castAs<DefinedSVal>();
ProgramStateRef StTrue, StFalse;
return State->assume(V);
}
void ExprEngine::processBranch(const Stmt *Condition,
NodeBuilderContext& BldCtx,
ExplodedNode *Pred,
ExplodedNodeSet &Dst,
const CFGBlock *DstT,
const CFGBlock *DstF) {
assert((!Condition || !isa<CXXBindTemporaryExpr>(Condition)) &&
"CXXBindTemporaryExprs are handled by processBindTemporary.");
const LocationContext *LCtx = Pred->getLocationContext();
PrettyStackTraceLocationContext StackCrashInfo(LCtx);
currBldrCtx = &BldCtx;
// Check for NULL conditions; e.g. "for(;;)"
if (!Condition) {
BranchNodeBuilder NullCondBldr(Pred, Dst, BldCtx, DstT, DstF);
NullCondBldr.markInfeasible(false);
NullCondBldr.generateNode(Pred->getState(), true, Pred);
return;
}
if (const auto *Ex = dyn_cast<Expr>(Condition))
Condition = Ex->IgnoreParens();
Condition = ResolveCondition(Condition, BldCtx.getBlock());
PrettyStackTraceLoc CrashInfo(getContext().getSourceManager(),
Condition->getBeginLoc(),
"Error evaluating branch");
ExplodedNodeSet CheckersOutSet;
getCheckerManager().runCheckersForBranchCondition(Condition, CheckersOutSet,
Pred, *this);
// We generated only sinks.
if (CheckersOutSet.empty())
return;
BranchNodeBuilder builder(CheckersOutSet, Dst, BldCtx, DstT, DstF);
for (ExplodedNode *PredN : CheckersOutSet) {
if (PredN->isSink())
continue;
ProgramStateRef PrevState = PredN->getState();
ProgramStateRef StTrue, StFalse;
if (const auto KnownCondValueAssumption = assumeCondition(Condition, PredN))
std::tie(StTrue, StFalse) = *KnownCondValueAssumption;
else {
assert(!isa<ObjCForCollectionStmt>(Condition));
builder.generateNode(PrevState, true, PredN);
builder.generateNode(PrevState, false, PredN);
continue;
}
if (StTrue && StFalse)
assert(!isa<ObjCForCollectionStmt>(Condition));
// Process the true branch.
if (builder.isFeasible(true)) {
if (StTrue)
builder.generateNode(StTrue, true, PredN);
else
builder.markInfeasible(true);
}
// Process the false branch.
if (builder.isFeasible(false)) {
if (StFalse)
builder.generateNode(StFalse, false, PredN);
else
builder.markInfeasible(false);
}
}
currBldrCtx = nullptr;
}
/// The GDM component containing the set of global variables which have been
/// previously initialized with explicit initializers.
REGISTER_TRAIT_WITH_PROGRAMSTATE(InitializedGlobalsSet,
llvm::ImmutableSet<const VarDecl *>)
void ExprEngine::processStaticInitializer(const DeclStmt *DS,
NodeBuilderContext &BuilderCtx,
ExplodedNode *Pred,
ExplodedNodeSet &Dst,
const CFGBlock *DstT,
const CFGBlock *DstF) {
PrettyStackTraceLocationContext CrashInfo(Pred->getLocationContext());
currBldrCtx = &BuilderCtx;
const auto *VD = cast<VarDecl>(DS->getSingleDecl());
ProgramStateRef state = Pred->getState();
bool initHasRun = state->contains<InitializedGlobalsSet>(VD);
BranchNodeBuilder builder(Pred, Dst, BuilderCtx, DstT, DstF);
if (!initHasRun) {
state = state->add<InitializedGlobalsSet>(VD);
}
builder.generateNode(state, initHasRun, Pred);
builder.markInfeasible(!initHasRun);
currBldrCtx = nullptr;
}
/// processIndirectGoto - Called by CoreEngine. Used to generate successor
/// nodes by processing the 'effects' of a computed goto jump.
void ExprEngine::processIndirectGoto(IndirectGotoNodeBuilder &builder) {
ProgramStateRef state = builder.getState();
SVal V = state->getSVal(builder.getTarget(), builder.getLocationContext());
// Three possibilities:
//
// (1) We know the computed label.
// (2) The label is NULL (or some other constant), or Undefined.
// (3) We have no clue about the label. Dispatch to all targets.
//
using iterator = IndirectGotoNodeBuilder::iterator;
if (Optional<loc::GotoLabel> LV = V.getAs<loc::GotoLabel>()) {
const LabelDecl *L = LV->getLabel();
for (iterator I = builder.begin(), E = builder.end(); I != E; ++I) {
if (I.getLabel() == L) {
builder.generateNode(I, state);
return;
}
}
llvm_unreachable("No block with label.");
}
if (isa<UndefinedVal, loc::ConcreteInt>(V)) {
// Dispatch to the first target and mark it as a sink.
//ExplodedNode* N = builder.generateNode(builder.begin(), state, true);
// FIXME: add checker visit.
// UndefBranches.insert(N);
return;
}
// This is really a catch-all. We don't support symbolics yet.
// FIXME: Implement dispatch for symbolic pointers.
for (iterator I = builder.begin(), E = builder.end(); I != E; ++I)
builder.generateNode(I, state);
}
void ExprEngine::processBeginOfFunction(NodeBuilderContext &BC,
ExplodedNode *Pred,
ExplodedNodeSet &Dst,
const BlockEdge &L) {
SaveAndRestore<const NodeBuilderContext *> NodeContextRAII(currBldrCtx, &BC);
getCheckerManager().runCheckersForBeginFunction(Dst, L, Pred, *this);
}
/// ProcessEndPath - Called by CoreEngine. Used to generate end-of-path
/// nodes when the control reaches the end of a function.
void ExprEngine::processEndOfFunction(NodeBuilderContext& BC,
ExplodedNode *Pred,
const ReturnStmt *RS) {
ProgramStateRef State = Pred->getState();
if (!Pred->getStackFrame()->inTopFrame())
State = finishArgumentConstruction(
State, *getStateManager().getCallEventManager().getCaller(
Pred->getStackFrame(), Pred->getState()));
// FIXME: We currently cannot assert that temporaries are clear, because
// lifetime extended temporaries are not always modelled correctly. In some
// cases when we materialize the temporary, we do
// createTemporaryRegionIfNeeded(), and the region changes, and also the
// respective destructor becomes automatic from temporary. So for now clean up
// the state manually before asserting. Ideally, this braced block of code
// should go away.
{
const LocationContext *FromLC = Pred->getLocationContext();
const LocationContext *ToLC = FromLC->getStackFrame()->getParent();
const LocationContext *LC = FromLC;
while (LC != ToLC) {
assert(LC && "ToLC must be a parent of FromLC!");
for (auto I : State->get<ObjectsUnderConstruction>())
if (I.first.getLocationContext() == LC) {
// The comment above only pardons us for not cleaning up a
// temporary destructor. If any other statements are found here,
// it must be a separate problem.
assert(I.first.getItem().getKind() ==
ConstructionContextItem::TemporaryDestructorKind ||
I.first.getItem().getKind() ==
ConstructionContextItem::ElidedDestructorKind);
State = State->remove<ObjectsUnderConstruction>(I.first);
}
LC = LC->getParent();
}
}
// Perform the transition with cleanups.
if (State != Pred->getState()) {
ExplodedNodeSet PostCleanup;
NodeBuilder Bldr(Pred, PostCleanup, BC);
Pred = Bldr.generateNode(Pred->getLocation(), State, Pred);
if (!Pred) {
// The node with clean temporaries already exists. We might have reached
// it on a path on which we initialize different temporaries.
return;
}
}
assert(areAllObjectsFullyConstructed(Pred->getState(),
Pred->getLocationContext(),
Pred->getStackFrame()->getParent()));
PrettyStackTraceLocationContext CrashInfo(Pred->getLocationContext());
ExplodedNodeSet Dst;
if (Pred->getLocationContext()->inTopFrame()) {
// Remove dead symbols.
ExplodedNodeSet AfterRemovedDead;
removeDeadOnEndOfFunction(BC, Pred, AfterRemovedDead);
// Notify checkers.
for (const auto I : AfterRemovedDead)
getCheckerManager().runCheckersForEndFunction(BC, Dst, I, *this, RS);
} else {
getCheckerManager().runCheckersForEndFunction(BC, Dst, Pred, *this, RS);
}
Engine.enqueueEndOfFunction(Dst, RS);
}
/// ProcessSwitch - Called by CoreEngine. Used to generate successor
/// nodes by processing the 'effects' of a switch statement.
void ExprEngine::processSwitch(SwitchNodeBuilder& builder) {
using iterator = SwitchNodeBuilder::iterator;
ProgramStateRef state = builder.getState();
const Expr *CondE = builder.getCondition();
SVal CondV_untested = state->getSVal(CondE, builder.getLocationContext());
if (CondV_untested.isUndef()) {
//ExplodedNode* N = builder.generateDefaultCaseNode(state, true);
// FIXME: add checker
//UndefBranches.insert(N);
return;
}
DefinedOrUnknownSVal CondV = CondV_untested.castAs<DefinedOrUnknownSVal>();
ProgramStateRef DefaultSt = state;
iterator I = builder.begin(), EI = builder.end();
bool defaultIsFeasible = I == EI;
for ( ; I != EI; ++I) {
// Successor may be pruned out during CFG construction.
if (!I.getBlock())
continue;
const CaseStmt *Case = I.getCase();
// Evaluate the LHS of the case value.
llvm::APSInt V1 = Case->getLHS()->EvaluateKnownConstInt(getContext());
assert(V1.getBitWidth() == getContext().getIntWidth(CondE->getType()));
// Get the RHS of the case, if it exists.
llvm::APSInt V2;
if (const Expr *E = Case->getRHS())
V2 = E->EvaluateKnownConstInt(getContext());
else
V2 = V1;
ProgramStateRef StateCase;
if (Optional<NonLoc> NL = CondV.getAs<NonLoc>())
std::tie(StateCase, DefaultSt) =
DefaultSt->assumeInclusiveRange(*NL, V1, V2);
else // UnknownVal
StateCase = DefaultSt;
if (StateCase)
builder.generateCaseStmtNode(I, StateCase);
// Now "assume" that the case doesn't match. Add this state
// to the default state (if it is feasible).
if (DefaultSt)
defaultIsFeasible = true;
else {
defaultIsFeasible = false;
break;
}
}
if (!defaultIsFeasible)
return;
// If we have switch(enum value), the default branch is not
// feasible if all of the enum constants not covered by 'case:' statements
// are not feasible values for the switch condition.
//
// Note that this isn't as accurate as it could be. Even if there isn't
// a case for a particular enum value as long as that enum value isn't
// feasible then it shouldn't be considered for making 'default:' reachable.
const SwitchStmt *SS = builder.getSwitch();
const Expr *CondExpr = SS->getCond()->IgnoreParenImpCasts();
if (CondExpr->getType()->getAs<EnumType>()) {
if (SS->isAllEnumCasesCovered())
return;
}
builder.generateDefaultCaseNode(DefaultSt);
}
//===----------------------------------------------------------------------===//
// Transfer functions: Loads and stores.
//===----------------------------------------------------------------------===//
void ExprEngine::VisitCommonDeclRefExpr(const Expr *Ex, const NamedDecl *D,
ExplodedNode *Pred,
ExplodedNodeSet &Dst) {
StmtNodeBuilder Bldr(Pred, Dst, *currBldrCtx);
ProgramStateRef state = Pred->getState();
const LocationContext *LCtx = Pred->getLocationContext();
if (const auto *VD = dyn_cast<VarDecl>(D)) {
// C permits "extern void v", and if you cast the address to a valid type,
// you can even do things with it. We simply pretend
assert(Ex->isGLValue() || VD->getType()->isVoidType());
const LocationContext *LocCtxt = Pred->getLocationContext();
const Decl *D = LocCtxt->getDecl();
const auto *MD = dyn_cast_or_null<CXXMethodDecl>(D);
const auto *DeclRefEx = dyn_cast<DeclRefExpr>(Ex);
Optional<std::pair<SVal, QualType>> VInfo;
if (AMgr.options.ShouldInlineLambdas && DeclRefEx &&
DeclRefEx->refersToEnclosingVariableOrCapture() && MD &&
MD->getParent()->isLambda()) {
// Lookup the field of the lambda.
const CXXRecordDecl *CXXRec = MD->getParent();
llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
FieldDecl *LambdaThisCaptureField;
CXXRec->getCaptureFields(LambdaCaptureFields, LambdaThisCaptureField);
// Sema follows a sequence of complex rules to determine whether the
// variable should be captured.
if (const FieldDecl *FD = LambdaCaptureFields[VD]) {
Loc CXXThis =
svalBuilder.getCXXThis(MD, LocCtxt->getStackFrame());
SVal CXXThisVal = state->getSVal(CXXThis);
VInfo = std::make_pair(state->getLValue(FD, CXXThisVal), FD->getType());
}
}
if (!VInfo)
VInfo = std::make_pair(state->getLValue(VD, LocCtxt), VD->getType());
SVal V = VInfo->first;
bool IsReference = VInfo->second->isReferenceType();
// For references, the 'lvalue' is the pointer address stored in the
// reference region.
if (IsReference) {
if (const MemRegion *R = V.getAsRegion())
V = state->getSVal(R);
else
V = UnknownVal();
}
Bldr.generateNode(Ex, Pred, state->BindExpr(Ex, LCtx, V), nullptr,
ProgramPoint::PostLValueKind);
return;
}
if (const auto *ED = dyn_cast<EnumConstantDecl>(D)) {
assert(!Ex->isGLValue());
SVal V = svalBuilder.makeIntVal(ED->getInitVal());
Bldr.generateNode(Ex, Pred, state->BindExpr(Ex, LCtx, V));
return;
}
if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
SVal V = svalBuilder.getFunctionPointer(FD);
Bldr.generateNode(Ex, Pred, state->BindExpr(Ex, LCtx, V), nullptr,
ProgramPoint::PostLValueKind);
return;
}
if (isa<FieldDecl, IndirectFieldDecl>(D)) {
// Delegate all work related to pointer to members to the surrounding
// operator&.
return;
}
if (const auto *BD = dyn_cast<BindingDecl>(D)) {
const auto *DD = cast<DecompositionDecl>(BD->getDecomposedDecl());
SVal Base = state->getLValue(DD, LCtx);
if (DD->getType()->isReferenceType()) {
if (const MemRegion *R = Base.getAsRegion())
Base = state->getSVal(R);
else
Base = UnknownVal();
}
SVal V = UnknownVal();
// Handle binding to data members
if (const auto *ME = dyn_cast<MemberExpr>(BD->getBinding())) {
const auto *Field = cast<FieldDecl>(ME->getMemberDecl());
V = state->getLValue(Field, Base);
}
// Handle binding to arrays
else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(BD->getBinding())) {
SVal Idx = state->getSVal(ASE->getIdx(), LCtx);
// Note: the index of an element in a structured binding is automatically
// created and it is a unique identifier of the specific element. Thus it
// cannot be a value that varies at runtime.
assert(Idx.isConstant() && "BindingDecl array index is not a constant!");
V = state->getLValue(BD->getType(), Idx, Base);
}
// Handle binding to tuple-like structures
else if (const auto *HV = BD->getHoldingVar()) {
V = state->getLValue(HV, LCtx);
if (HV->getType()->isReferenceType()) {
if (const MemRegion *R = V.getAsRegion())
V = state->getSVal(R);
else
V = UnknownVal();
}
} else
llvm_unreachable("An unknown case of structured binding encountered!");
// In case of tuple-like types the references are already handled, so we
// don't want to handle them again.
if (BD->getType()->isReferenceType() && !BD->getHoldingVar()) {
if (const MemRegion *R = V.getAsRegion())
V = state->getSVal(R);
else
V = UnknownVal();
}
Bldr.generateNode(Ex, Pred, state->BindExpr(Ex, LCtx, V), nullptr,
ProgramPoint::PostLValueKind);
return;
}
+ if (const auto *TPO = dyn_cast<TemplateParamObjectDecl>(D)) {
+ // FIXME: We should meaningfully implement this.
+ (void)TPO;
+ return;
+ }
+
llvm_unreachable("Support for this Decl not implemented.");
}
/// VisitArrayInitLoopExpr - Transfer function for array init loop.
void ExprEngine::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *Ex,
ExplodedNode *Pred,
ExplodedNodeSet &Dst) {
ExplodedNodeSet CheckerPreStmt;
getCheckerManager().runCheckersForPreStmt(CheckerPreStmt, Pred, Ex, *this);
ExplodedNodeSet EvalSet;
StmtNodeBuilder Bldr(CheckerPreStmt, EvalSet, *currBldrCtx);
const Expr *Arr = Ex->getCommonExpr()->getSourceExpr();
for (auto *Node : CheckerPreStmt) {
// The constructor visitior has already taken care of everything.
if (auto *CE = dyn_cast<CXXConstructExpr>(Ex->getSubExpr()))
break;
const LocationContext *LCtx = Node->getLocationContext();
ProgramStateRef state = Node->getState();
SVal Base = UnknownVal();
// As in case of this expression the sub-expressions are not visited by any
// other transfer functions, they are handled by matching their AST.
// Case of implicit copy or move ctor of object with array member
//
// Note: ExprEngine::VisitMemberExpr is not able to bind the array to the
// environment.
//
// struct S {
// int arr[2];
// };
//
//
// S a;
// S b = a;
//
// The AST in case of a *copy constructor* looks like this:
// ArrayInitLoopExpr
// |-OpaqueValueExpr
// | `-MemberExpr <-- match this
// | `-DeclRefExpr
// ` ...
//
//
// S c;
// S d = std::move(d);
//
// In case of a *move constructor* the resulting AST looks like:
// ArrayInitLoopExpr
// |-OpaqueValueExpr
// | `-MemberExpr <-- match this first
// | `-CXXStaticCastExpr <-- match this after
// | `-DeclRefExpr
// ` ...
if (const auto *ME = dyn_cast<MemberExpr>(Arr)) {
Expr *MEBase = ME->getBase();
// Move ctor
if (auto CXXSCE = dyn_cast<CXXStaticCastExpr>(MEBase)) {
MEBase = CXXSCE->getSubExpr();
}
auto ObjDeclExpr = cast<DeclRefExpr>(MEBase);
SVal Obj = state->getLValue(cast<VarDecl>(ObjDeclExpr->getDecl()), LCtx);
Base = state->getLValue(cast<FieldDecl>(ME->getMemberDecl()), Obj);
}
// Case of lambda capture and decomposition declaration
//
// int arr[2];
//
// [arr]{ int a = arr[0]; }();
// auto[a, b] = arr;
//
// In both of these cases the AST looks like the following:
// ArrayInitLoopExpr
// |-OpaqueValueExpr
// | `-DeclRefExpr <-- match this
// ` ...
if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(Arr))
Base = state->getLValue(cast<VarDecl>(DRE->getDecl()), LCtx);
// Create a lazy compound value to the original array
if (const MemRegion *R = Base.getAsRegion())
Base = state->getSVal(R);
else
Base = UnknownVal();
Bldr.generateNode(Ex, Pred, state->BindExpr(Ex, LCtx, Base));
}
getCheckerManager().runCheckersForPostStmt(Dst, EvalSet, Ex, *this);
}
/// VisitArraySubscriptExpr - Transfer function for array accesses
void ExprEngine::VisitArraySubscriptExpr(const ArraySubscriptExpr *A,
ExplodedNode *Pred,
ExplodedNodeSet &Dst){
const Expr *Base = A->getBase()->IgnoreParens();
const Expr *Idx = A->getIdx()->IgnoreParens();
ExplodedNodeSet CheckerPreStmt;
getCheckerManager().runCheckersForPreStmt(CheckerPreStmt, Pred, A, *this);
ExplodedNodeSet EvalSet;
StmtNodeBuilder Bldr(CheckerPreStmt, EvalSet, *currBldrCtx);
bool IsVectorType = A->getBase()->getType()->isVectorType();
// The "like" case is for situations where C standard prohibits the type to
// be an lvalue, e.g. taking the address of a subscript of an expression of
// type "void *".
bool IsGLValueLike = A->isGLValue() ||
(A->getType().isCForbiddenLValueType() && !AMgr.getLangOpts().CPlusPlus);
for (auto *Node : CheckerPreStmt) {
const LocationContext *LCtx = Node->getLocationContext();
ProgramStateRef state = Node->getState();
if (IsGLValueLike) {
QualType T = A->getType();
// One of the forbidden LValue types! We still need to have sensible
// symbolic locations to represent this stuff. Note that arithmetic on
// void pointers is a GCC extension.
if (T->isVoidType())
T = getContext().CharTy;
SVal V = state->getLValue(T,
state->getSVal(Idx, LCtx),
state->getSVal(Base, LCtx));
Bldr.generateNode(A, Node, state->BindExpr(A, LCtx, V), nullptr,
ProgramPoint::PostLValueKind);
} else if (IsVectorType) {
// FIXME: non-glvalue vector reads are not modelled.
Bldr.generateNode(A, Node, state, nullptr);
} else {
llvm_unreachable("Array subscript should be an lValue when not \
a vector and not a forbidden lvalue type");
}
}
getCheckerManager().runCheckersForPostStmt(Dst, EvalSet, A, *this);
}
/// VisitMemberExpr - Transfer function for member expressions.
void ExprEngine::VisitMemberExpr(const MemberExpr *M, ExplodedNode *Pred,
ExplodedNodeSet &Dst) {
// FIXME: Prechecks eventually go in ::Visit().
ExplodedNodeSet CheckedSet;
getCheckerManager().runCheckersForPreStmt(CheckedSet, Pred, M, *this);
ExplodedNodeSet EvalSet;
ValueDecl *Member = M->getMemberDecl();
// Handle static member variables and enum constants accessed via
// member syntax.
if (isa<VarDecl, EnumConstantDecl>(Member)) {
for (const auto I : CheckedSet)
VisitCommonDeclRefExpr(M, Member, I, EvalSet);
} else {
StmtNodeBuilder Bldr(CheckedSet, EvalSet, *currBldrCtx);
ExplodedNodeSet Tmp;
for (const auto I : CheckedSet) {
ProgramStateRef state = I->getState();
const LocationContext *LCtx = I->getLocationContext();
Expr *BaseExpr = M->getBase();
// Handle C++ method calls.
if (const auto *MD = dyn_cast<CXXMethodDecl>(Member)) {
if (MD->isInstance())
state = createTemporaryRegionIfNeeded(state, LCtx, BaseExpr);
SVal MDVal = svalBuilder.getFunctionPointer(MD);
state = state->BindExpr(M, LCtx, MDVal);
Bldr.generateNode(M, I, state);
continue;
}
// Handle regular struct fields / member variables.
const SubRegion *MR = nullptr;
state = createTemporaryRegionIfNeeded(state, LCtx, BaseExpr,
/*Result=*/nullptr,
/*OutRegionWithAdjustments=*/&MR);
SVal baseExprVal =
MR ? loc::MemRegionVal(MR) : state->getSVal(BaseExpr, LCtx);
const auto *field = cast<FieldDecl>(Member);
SVal L = state->getLValue(field, baseExprVal);
if (M->isGLValue() || M->getType()->isArrayType()) {
// We special-case rvalues of array type because the analyzer cannot
// reason about them, since we expect all regions to be wrapped in Locs.
// We instead treat these as lvalues and assume that they will decay to
// pointers as soon as they are used.
if (!M->isGLValue()) {
assert(M->getType()->isArrayType());
const auto *PE =
dyn_cast<ImplicitCastExpr>(I->getParentMap().getParentIgnoreParens(M));
if (!PE || PE->getCastKind() != CK_ArrayToPointerDecay) {
llvm_unreachable("should always be wrapped in ArrayToPointerDecay");
}
}
if (field->getType()->isReferenceType()) {
if (const MemRegion *R = L.getAsRegion())
L = state->getSVal(R);
else
L = UnknownVal();
}
Bldr.generateNode(M, I, state->BindExpr(M, LCtx, L), nullptr,
ProgramPoint::PostLValueKind);
} else {
Bldr.takeNodes(I);
evalLoad(Tmp, M, M, I, state, L);
Bldr.addNodes(Tmp);
}
}
}
getCheckerManager().runCheckersForPostStmt(Dst, EvalSet, M, *this);
}
void ExprEngine::VisitAtomicExpr(const AtomicExpr *AE, ExplodedNode *Pred,
ExplodedNodeSet &Dst) {
ExplodedNodeSet AfterPreSet;
getCheckerManager().runCheckersForPreStmt(AfterPreSet, Pred, AE, *this);
// For now, treat all the arguments to C11 atomics as escaping.
// FIXME: Ideally we should model the behavior of the atomics precisely here.
ExplodedNodeSet AfterInvalidateSet;
StmtNodeBuilder Bldr(AfterPreSet, AfterInvalidateSet, *currBldrCtx);
for (const auto I : AfterPreSet) {
ProgramStateRef State = I->getState();
const LocationContext *LCtx = I->getLocationContext();
SmallVector<SVal, 8> ValuesToInvalidate;
for (unsigned SI = 0, Count = AE->getNumSubExprs(); SI != Count; SI++) {
const Expr *SubExpr = AE->getSubExprs()[SI];
SVal SubExprVal = State->getSVal(SubExpr, LCtx);
ValuesToInvalidate.push_back(SubExprVal);
}
State = State->invalidateRegions(ValuesToInvalidate, AE,
currBldrCtx->blockCount(),
LCtx,
/*CausedByPointerEscape*/true,
/*Symbols=*/nullptr);
SVal ResultVal = UnknownVal();
State = State->BindExpr(AE, LCtx, ResultVal);
Bldr.generateNode(AE, I, State, nullptr,
ProgramPoint::PostStmtKind);
}
getCheckerManager().runCheckersForPostStmt(Dst, AfterInvalidateSet, AE, *this);
}
// A value escapes in four possible cases:
// (1) We are binding to something that is not a memory region.
// (2) We are binding to a MemRegion that does not have stack storage.
// (3) We are binding to a top-level parameter region with a non-trivial
// destructor. We won't see the destructor during analysis, but it's there.
// (4) We are binding to a MemRegion with stack storage that the store
// does not understand.
ProgramStateRef ExprEngine::processPointerEscapedOnBind(
ProgramStateRef State, ArrayRef<std::pair<SVal, SVal>> LocAndVals,
const LocationContext *LCtx, PointerEscapeKind Kind,
const CallEvent *Call) {
SmallVector<SVal, 8> Escaped;
for (const std::pair<SVal, SVal> &LocAndVal : LocAndVals) {
// Cases (1) and (2).
const MemRegion *MR = LocAndVal.first.getAsRegion();
if (!MR || !MR->hasStackStorage()) {
Escaped.push_back(LocAndVal.second);
continue;
}
// Case (3).
if (const auto *VR = dyn_cast<VarRegion>(MR->getBaseRegion()))
if (VR->hasStackParametersStorage() && VR->getStackFrame()->inTopFrame())
if (const auto *RD = VR->getValueType()->getAsCXXRecordDecl())
if (!RD->hasTrivialDestructor()) {
Escaped.push_back(LocAndVal.second);
continue;
}
// Case (4): in order to test that, generate a new state with the binding
// added. If it is the same state, then it escapes (since the store cannot
// represent the binding).
// Do this only if we know that the store is not supposed to generate the
// same state.
SVal StoredVal = State->getSVal(MR);
if (StoredVal != LocAndVal.second)
if (State ==
(State->bindLoc(loc::MemRegionVal(MR), LocAndVal.second, LCtx)))
Escaped.push_back(LocAndVal.second);
}
if (Escaped.empty())
return State;
return escapeValues(State, Escaped, Kind, Call);
}
ProgramStateRef
ExprEngine::processPointerEscapedOnBind(ProgramStateRef State, SVal Loc,
SVal Val, const LocationContext *LCtx) {
std::pair<SVal, SVal> LocAndVal(Loc, Val);
return processPointerEscapedOnBind(State, LocAndVal, LCtx, PSK_EscapeOnBind,
nullptr);
}
ProgramStateRef
ExprEngine::notifyCheckersOfPointerEscape(ProgramStateRef State,
const InvalidatedSymbols *Invalidated,
ArrayRef<const MemRegion *> ExplicitRegions,
const CallEvent *Call,
RegionAndSymbolInvalidationTraits &ITraits) {
if (!Invalidated || Invalidated->empty())
return State;
if (!Call)
return getCheckerManager().runCheckersForPointerEscape(State,
*Invalidated,
nullptr,
PSK_EscapeOther,
&ITraits);
// If the symbols were invalidated by a call, we want to find out which ones
// were invalidated directly due to being arguments to the call.
InvalidatedSymbols SymbolsDirectlyInvalidated;
for (const auto I : ExplicitRegions) {
if (const SymbolicRegion *R = I->StripCasts()->getAs<SymbolicRegion>())
SymbolsDirectlyInvalidated.insert(R->getSymbol());
}
InvalidatedSymbols SymbolsIndirectlyInvalidated;
for (const auto &sym : *Invalidated) {
if (SymbolsDirectlyInvalidated.count(sym))
continue;
SymbolsIndirectlyInvalidated.insert(sym);
}
if (!SymbolsDirectlyInvalidated.empty())
State = getCheckerManager().runCheckersForPointerEscape(State,
SymbolsDirectlyInvalidated, Call, PSK_DirectEscapeOnCall, &ITraits);
// Notify about the symbols that get indirectly invalidated by the call.
if (!SymbolsIndirectlyInvalidated.empty())
State = getCheckerManager().runCheckersForPointerEscape(State,
SymbolsIndirectlyInvalidated, Call, PSK_IndirectEscapeOnCall, &ITraits);
return State;
}
/// evalBind - Handle the semantics of binding a value to a specific location.
/// This method is used by evalStore and (soon) VisitDeclStmt, and others.
void ExprEngine::evalBind(ExplodedNodeSet &Dst, const Stmt *StoreE,
ExplodedNode *Pred,
SVal location, SVal Val,
bool atDeclInit, const ProgramPoint *PP) {
const LocationContext *LC = Pred->getLocationContext();
PostStmt PS(StoreE, LC);
if (!PP)
PP = &PS;
// Do a previsit of the bind.
ExplodedNodeSet CheckedSet;
getCheckerManager().runCheckersForBind(CheckedSet, Pred, location, Val,
StoreE, *this, *PP);
StmtNodeBuilder Bldr(CheckedSet, Dst, *currBldrCtx);
// If the location is not a 'Loc', it will already be handled by
// the checkers. There is nothing left to do.
if (!isa<Loc>(location)) {
const ProgramPoint L = PostStore(StoreE, LC, /*Loc*/nullptr,
/*tag*/nullptr);
ProgramStateRef state = Pred->getState();
state = processPointerEscapedOnBind(state, location, Val, LC);
Bldr.generateNode(L, state, Pred);
return;
}
for (const auto PredI : CheckedSet) {
ProgramStateRef state = PredI->getState();
state = processPointerEscapedOnBind(state, location, Val, LC);
// When binding the value, pass on the hint that this is a initialization.
// For initializations, we do not need to inform clients of region
// changes.
state = state->bindLoc(location.castAs<Loc>(),
Val, LC, /* notifyChanges = */ !atDeclInit);
const MemRegion *LocReg = nullptr;
if (Optional<loc::MemRegionVal> LocRegVal =
location.getAs<loc::MemRegionVal>()) {
LocReg = LocRegVal->getRegion();
}
const ProgramPoint L = PostStore(StoreE, LC, LocReg, nullptr);
Bldr.generateNode(L, state, PredI);
}
}
/// evalStore - Handle the semantics of a store via an assignment.
/// @param Dst The node set to store generated state nodes
/// @param AssignE The assignment expression if the store happens in an
/// assignment.
/// @param LocationE The location expression that is stored to.
/// @param state The current simulation state
/// @param location The location to store the value
/// @param Val The value to be stored
void ExprEngine::evalStore(ExplodedNodeSet &Dst, const Expr *AssignE,
const Expr *LocationE,
ExplodedNode *Pred,
ProgramStateRef state, SVal location, SVal Val,
const ProgramPointTag *tag) {
// Proceed with the store. We use AssignE as the anchor for the PostStore
// ProgramPoint if it is non-NULL, and LocationE otherwise.
const Expr *StoreE = AssignE ? AssignE : LocationE;
// Evaluate the location (checks for bad dereferences).
ExplodedNodeSet Tmp;
evalLocation(Tmp, AssignE, LocationE, Pred, state, location, false);
if (Tmp.empty())
return;
if (location.isUndef())
return;
for (const auto I : Tmp)
evalBind(Dst, StoreE, I, location, Val, false);
}
void ExprEngine::evalLoad(ExplodedNodeSet &Dst,
const Expr *NodeEx,
const Expr *BoundEx,
ExplodedNode *Pred,
ProgramStateRef state,
SVal location,
const ProgramPointTag *tag,
QualType LoadTy) {
assert(!isa<NonLoc>(location) && "location cannot be a NonLoc.");
assert(NodeEx);
assert(BoundEx);
// Evaluate the location (checks for bad dereferences).
ExplodedNodeSet Tmp;
evalLocation(Tmp, NodeEx, BoundEx, Pred, state, location, true);
if (Tmp.empty())
return;
StmtNodeBuilder Bldr(Tmp, Dst, *currBldrCtx);
if (location.isUndef())
return;
// Proceed with the load.
for (const auto I : Tmp) {
state = I->getState();
const LocationContext *LCtx = I->getLocationContext();
SVal V = UnknownVal();
if (location.isValid()) {
if (LoadTy.isNull())
LoadTy = BoundEx->getType();
V = state->getSVal(location.castAs<Loc>(), LoadTy);
}
Bldr.generateNode(NodeEx, I, state->BindExpr(BoundEx, LCtx, V), tag,
ProgramPoint::PostLoadKind);
}
}
void ExprEngine::evalLocation(ExplodedNodeSet &Dst,
const Stmt *NodeEx,
const Stmt *BoundEx,
ExplodedNode *Pred,
ProgramStateRef state,
SVal location,
bool isLoad) {
StmtNodeBuilder BldrTop(Pred, Dst, *currBldrCtx);
// Early checks for performance reason.
if (location.isUnknown()) {
return;
}
ExplodedNodeSet Src;
BldrTop.takeNodes(Pred);
StmtNodeBuilder Bldr(Pred, Src, *currBldrCtx);
if (Pred->getState() != state) {
// Associate this new state with an ExplodedNode.
// FIXME: If I pass null tag, the graph is incorrect, e.g for
// int *p;
// p = 0;
// *p = 0xDEADBEEF;
// "p = 0" is not noted as "Null pointer value stored to 'p'" but
// instead "int *p" is noted as
// "Variable 'p' initialized to a null pointer value"
static SimpleProgramPointTag tag(TagProviderName, "Location");
Bldr.generateNode(NodeEx, Pred, state, &tag);
}
ExplodedNodeSet Tmp;
getCheckerManager().runCheckersForLocation(Tmp, Src, location, isLoad,
NodeEx, BoundEx, *this);
BldrTop.addNodes(Tmp);
}
std::pair<const ProgramPointTag *, const ProgramPointTag*>
ExprEngine::geteagerlyAssumeBinOpBifurcationTags() {
static SimpleProgramPointTag
eagerlyAssumeBinOpBifurcationTrue(TagProviderName,
"Eagerly Assume True"),
eagerlyAssumeBinOpBifurcationFalse(TagProviderName,
"Eagerly Assume False");
return std::make_pair(&eagerlyAssumeBinOpBifurcationTrue,
&eagerlyAssumeBinOpBifurcationFalse);
}
void ExprEngine::evalEagerlyAssumeBinOpBifurcation(ExplodedNodeSet &Dst,
ExplodedNodeSet &Src,
const Expr *Ex) {
StmtNodeBuilder Bldr(Src, Dst, *currBldrCtx);
for (const auto Pred : Src) {
// Test if the previous node was as the same expression. This can happen
// when the expression fails to evaluate to anything meaningful and
// (as an optimization) we don't generate a node.
ProgramPoint P = Pred->getLocation();
if (!P.getAs<PostStmt>() || P.castAs<PostStmt>().getStmt() != Ex) {
continue;
}
ProgramStateRef state = Pred->getState();
SVal V = state->getSVal(Ex, Pred->getLocationContext());
Optional<nonloc::SymbolVal> SEV = V.getAs<nonloc::SymbolVal>();
if (SEV && SEV->isExpression()) {
const std::pair<const ProgramPointTag *, const ProgramPointTag*> &tags =
geteagerlyAssumeBinOpBifurcationTags();
ProgramStateRef StateTrue, StateFalse;
std::tie(StateTrue, StateFalse) = state->assume(*SEV);
// First assume that the condition is true.
if (StateTrue) {
SVal Val = svalBuilder.makeIntVal(1U, Ex->getType());
StateTrue = StateTrue->BindExpr(Ex, Pred->getLocationContext(), Val);
Bldr.generateNode(Ex, Pred, StateTrue, tags.first);
}
// Next, assume that the condition is false.
if (StateFalse) {
SVal Val = svalBuilder.makeIntVal(0U, Ex->getType());
StateFalse = StateFalse->BindExpr(Ex, Pred->getLocationContext(), Val);
Bldr.generateNode(Ex, Pred, StateFalse, tags.second);
}
}
}
}
void ExprEngine::VisitGCCAsmStmt(const GCCAsmStmt *A, ExplodedNode *Pred,
ExplodedNodeSet &Dst) {
StmtNodeBuilder Bldr(Pred, Dst, *currBldrCtx);
// We have processed both the inputs and the outputs. All of the outputs
// should evaluate to Locs. Nuke all of their values.
// FIXME: Some day in the future it would be nice to allow a "plug-in"
// which interprets the inline asm and stores proper results in the
// outputs.
ProgramStateRef state = Pred->getState();
for (const Expr *O : A->outputs()) {
SVal X = state->getSVal(O, Pred->getLocationContext());
assert(!isa<NonLoc>(X)); // Should be an Lval, or unknown, undef.
if (Optional<Loc> LV = X.getAs<Loc>())
state = state->bindLoc(*LV, UnknownVal(), Pred->getLocationContext());
}
Bldr.generateNode(A, Pred, state);
}
void ExprEngine::VisitMSAsmStmt(const MSAsmStmt *A, ExplodedNode *Pred,
ExplodedNodeSet &Dst) {
StmtNodeBuilder Bldr(Pred, Dst, *currBldrCtx);
Bldr.generateNode(A, Pred, Pred->getState());
}
//===----------------------------------------------------------------------===//
// Visualization.
//===----------------------------------------------------------------------===//
namespace llvm {
template<>
struct DOTGraphTraits<ExplodedGraph*> : public DefaultDOTGraphTraits {
DOTGraphTraits (bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
static bool nodeHasBugReport(const ExplodedNode *N) {
BugReporter &BR = static_cast<ExprEngine &>(
N->getState()->getStateManager().getOwningEngine()).getBugReporter();
const auto EQClasses =
llvm::make_range(BR.EQClasses_begin(), BR.EQClasses_end());
for (const auto &EQ : EQClasses) {
for (const auto &I : EQ.getReports()) {
const auto *PR = dyn_cast<PathSensitiveBugReport>(I.get());
if (!PR)
continue;
const ExplodedNode *EN = PR->getErrorNode();
if (EN->getState() == N->getState() &&
EN->getLocation() == N->getLocation())
return true;
}
}
return false;
}
/// \p PreCallback: callback before break.
/// \p PostCallback: callback after break.
/// \p Stop: stop iteration if returns @c true
/// \return Whether @c Stop ever returned @c true.
static bool traverseHiddenNodes(
const ExplodedNode *N,
llvm::function_ref<void(const ExplodedNode *)> PreCallback,
llvm::function_ref<void(const ExplodedNode *)> PostCallback,
llvm::function_ref<bool(const ExplodedNode *)> Stop) {
while (true) {
PreCallback(N);
if (Stop(N))
return true;
if (N->succ_size() != 1 || !isNodeHidden(N->getFirstSucc(), nullptr))
break;
PostCallback(N);
N = N->getFirstSucc();
}
return false;
}
static bool isNodeHidden(const ExplodedNode *N, const ExplodedGraph *G) {
return N->isTrivial();
}
static std::string getNodeLabel(const ExplodedNode *N, ExplodedGraph *G){
std::string Buf;
llvm::raw_string_ostream Out(Buf);
const bool IsDot = true;
const unsigned int Space = 1;
ProgramStateRef State = N->getState();
Out << "{ \"state_id\": " << State->getID()
<< ",\\l";
Indent(Out, Space, IsDot) << "\"program_points\": [\\l";
// Dump program point for all the previously skipped nodes.
traverseHiddenNodes(
N,
[&](const ExplodedNode *OtherNode) {
Indent(Out, Space + 1, IsDot) << "{ ";
OtherNode->getLocation().printJson(Out, /*NL=*/"\\l");
Out << ", \"tag\": ";
if (const ProgramPointTag *Tag = OtherNode->getLocation().getTag())
Out << '\"' << Tag->getTagDescription() << "\"";
else
Out << "null";
Out << ", \"node_id\": " << OtherNode->getID() <<
", \"is_sink\": " << OtherNode->isSink() <<
", \"has_report\": " << nodeHasBugReport(OtherNode) << " }";
},
// Adds a comma and a new-line between each program point.
[&](const ExplodedNode *) { Out << ",\\l"; },
[&](const ExplodedNode *) { return false; });
Out << "\\l"; // Adds a new-line to the last program point.
Indent(Out, Space, IsDot) << "],\\l";
State->printDOT(Out, N->getLocationContext(), Space);
Out << "\\l}\\l";
return Out.str();
}
};
} // namespace llvm
void ExprEngine::ViewGraph(bool trim) {
std::string Filename = DumpGraph(trim);
llvm::DisplayGraph(Filename, false, llvm::GraphProgram::DOT);
}
void ExprEngine::ViewGraph(ArrayRef<const ExplodedNode *> Nodes) {
std::string Filename = DumpGraph(Nodes);
llvm::DisplayGraph(Filename, false, llvm::GraphProgram::DOT);
}
std::string ExprEngine::DumpGraph(bool trim, StringRef Filename) {
if (trim) {
std::vector<const ExplodedNode *> Src;
// Iterate through the reports and get their nodes.
for (BugReporter::EQClasses_iterator
EI = BR.EQClasses_begin(), EE = BR.EQClasses_end(); EI != EE; ++EI) {
const auto *R =
dyn_cast<PathSensitiveBugReport>(EI->getReports()[0].get());
if (!R)
continue;
const auto *N = const_cast<ExplodedNode *>(R->getErrorNode());
Src.push_back(N);
}
return DumpGraph(Src, Filename);
}
return llvm::WriteGraph(&G, "ExprEngine", /*ShortNames=*/false,
/*Title=*/"Exploded Graph",
/*Filename=*/std::string(Filename));
}
std::string ExprEngine::DumpGraph(ArrayRef<const ExplodedNode *> Nodes,
StringRef Filename) {
std::unique_ptr<ExplodedGraph> TrimmedG(G.trim(Nodes));
if (!TrimmedG.get()) {
llvm::errs() << "warning: Trimmed ExplodedGraph is empty.\n";
return "";
}
return llvm::WriteGraph(TrimmedG.get(), "TrimmedExprEngine",
/*ShortNames=*/false,
/*Title=*/"Trimmed Exploded Graph",
/*Filename=*/std::string(Filename));
}
void *ProgramStateTrait<ReplayWithoutInlining>::GDMIndex() {
static int index = 0;
return &index;
}
void ExprEngine::anchor() { }
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
index 9af296b1853a..b29665a63390 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
@@ -1,10763 +1,10763 @@
//===-- sanitizer_common_interceptors.inc -----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Common function interceptors for tools like AddressSanitizer,
// ThreadSanitizer, MemorySanitizer, etc.
//
// This file should be included into the tool's interceptor file,
// which has to define its own macros:
// COMMON_INTERCEPTOR_ENTER
// COMMON_INTERCEPTOR_ENTER_NOIGNORE
// COMMON_INTERCEPTOR_READ_RANGE
// COMMON_INTERCEPTOR_WRITE_RANGE
// COMMON_INTERCEPTOR_INITIALIZE_RANGE
// COMMON_INTERCEPTOR_DIR_ACQUIRE
// COMMON_INTERCEPTOR_FD_ACQUIRE
// COMMON_INTERCEPTOR_FD_RELEASE
// COMMON_INTERCEPTOR_FD_ACCESS
// COMMON_INTERCEPTOR_SET_THREAD_NAME
// COMMON_INTERCEPTOR_DLOPEN
// COMMON_INTERCEPTOR_ON_EXIT
// COMMON_INTERCEPTOR_MUTEX_PRE_LOCK
// COMMON_INTERCEPTOR_MUTEX_POST_LOCK
// COMMON_INTERCEPTOR_MUTEX_UNLOCK
// COMMON_INTERCEPTOR_MUTEX_REPAIR
// COMMON_INTERCEPTOR_SET_PTHREAD_NAME
// COMMON_INTERCEPTOR_HANDLE_RECVMSG
// COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED
// COMMON_INTERCEPTOR_MEMSET_IMPL
// COMMON_INTERCEPTOR_MEMMOVE_IMPL
// COMMON_INTERCEPTOR_MEMCPY_IMPL
// COMMON_INTERCEPTOR_MMAP_IMPL
// COMMON_INTERCEPTOR_COPY_STRING
// COMMON_INTERCEPTOR_STRNDUP_IMPL
// COMMON_INTERCEPTOR_STRERROR
//===----------------------------------------------------------------------===//
#include "interception/interception.h"
#include "sanitizer_addrhashmap.h"
#include "sanitizer_errno.h"
#include "sanitizer_placement_new.h"
#include "sanitizer_platform_interceptors.h"
#include "sanitizer_symbolizer.h"
#include "sanitizer_tls_get_addr.h"
#include <stdarg.h>
#if SANITIZER_INTERCEPTOR_HOOKS
#define CALL_WEAK_INTERCEPTOR_HOOK(f, ...) f(__VA_ARGS__);
#define DECLARE_WEAK_INTERCEPTOR_HOOK(f, ...) \
SANITIZER_INTERFACE_WEAK_DEF(void, f, __VA_ARGS__) {}
#else
#define DECLARE_WEAK_INTERCEPTOR_HOOK(f, ...)
#define CALL_WEAK_INTERCEPTOR_HOOK(f, ...)
#endif // SANITIZER_INTERCEPTOR_HOOKS
#if SANITIZER_WINDOWS && !defined(va_copy)
#define va_copy(dst, src) ((dst) = (src))
#endif // _WIN32
#if SANITIZER_FREEBSD
#define pthread_setname_np pthread_set_name_np
#define inet_aton __inet_aton
#define inet_pton __inet_pton
#define iconv __bsd_iconv
#endif
#if SANITIZER_NETBSD
#define clock_getres __clock_getres50
#define clock_gettime __clock_gettime50
#define clock_settime __clock_settime50
#define ctime __ctime50
#define ctime_r __ctime_r50
#define devname __devname50
#define fgetpos __fgetpos50
#define fsetpos __fsetpos50
#define fstatvfs __fstatvfs90
#define fstatvfs1 __fstatvfs190
#define fts_children __fts_children60
#define fts_close __fts_close60
#define fts_open __fts_open60
#define fts_read __fts_read60
#define fts_set __fts_set60
#define getitimer __getitimer50
#define getmntinfo __getmntinfo90
#define getpwent __getpwent50
#define getpwnam __getpwnam50
#define getpwnam_r __getpwnam_r50
#define getpwuid __getpwuid50
#define getpwuid_r __getpwuid_r50
#define getutent __getutent50
#define getutxent __getutxent50
#define getutxid __getutxid50
#define getutxline __getutxline50
#define getvfsstat __getvfsstat90
#define pututxline __pututxline50
#define glob __glob30
#define gmtime __gmtime50
#define gmtime_r __gmtime_r50
#define localtime __locatime50
#define localtime_r __localtime_r50
#define mktime __mktime50
#define lstat __lstat50
#define opendir __opendir30
#define readdir __readdir30
#define readdir_r __readdir_r30
#define scandir __scandir30
#define setitimer __setitimer50
#define setlocale __setlocale50
#define shmctl __shmctl50
#define sigaltstack __sigaltstack14
#define sigemptyset __sigemptyset14
#define sigfillset __sigfillset14
#define sigpending __sigpending14
#define sigprocmask __sigprocmask14
#define sigtimedwait __sigtimedwait50
#define stat __stat50
#define statvfs __statvfs90
#define statvfs1 __statvfs190
#define time __time50
#define times __times13
#define unvis __unvis50
#define wait3 __wait350
#define wait4 __wait450
extern const unsigned short *_ctype_tab_;
extern const short *_toupper_tab_;
extern const short *_tolower_tab_;
#endif
#if SANITIZER_MUSL && \
(defined(__i386__) || defined(__arm__) || SANITIZER_MIPS32 || SANITIZER_PPC32)
// musl 1.2.0 on existing 32-bit architectures uses new symbol names for the
// time-related functions that take 64-bit time_t values. See
// https://musl.libc.org/time64.html
#define adjtime __adjtime64
#define adjtimex __adjtimex_time64
#define aio_suspend __aio_suspend_time64
#define clock_adjtime __clock_adjtime64
#define clock_getres __clock_getres_time64
#define clock_gettime __clock_gettime64
#define clock_nanosleep __clock_nanosleep_time64
#define clock_settime __clock_settime64
#define cnd_timedwait __cnd_timedwait_time64
#define ctime __ctime64
#define ctime_r __ctime64_r
#define difftime __difftime64
#define dlsym __dlsym_time64
#define fstatat __fstatat_time64
#define fstat __fstat_time64
#define ftime __ftime64
#define futimens __futimens_time64
#define futimesat __futimesat_time64
#define futimes __futimes_time64
#define getitimer __getitimer_time64
#define getrusage __getrusage_time64
#define gettimeofday __gettimeofday_time64
#define gmtime __gmtime64
#define gmtime_r __gmtime64_r
#define localtime __localtime64
#define localtime_r __localtime64_r
#define lstat __lstat_time64
#define lutimes __lutimes_time64
#define mktime __mktime64
#define mq_timedreceive __mq_timedreceive_time64
#define mq_timedsend __mq_timedsend_time64
#define mtx_timedlock __mtx_timedlock_time64
#define nanosleep __nanosleep_time64
#define ppoll __ppoll_time64
#define pselect __pselect_time64
#define pthread_cond_timedwait __pthread_cond_timedwait_time64
#define pthread_mutex_timedlock __pthread_mutex_timedlock_time64
#define pthread_rwlock_timedrdlock __pthread_rwlock_timedrdlock_time64
#define pthread_rwlock_timedwrlock __pthread_rwlock_timedwrlock_time64
#define pthread_timedjoin_np __pthread_timedjoin_np_time64
#define recvmmsg __recvmmsg_time64
#define sched_rr_get_interval __sched_rr_get_interval_time64
#define select __select_time64
#define semtimedop __semtimedop_time64
#define sem_timedwait __sem_timedwait_time64
#define setitimer __setitimer_time64
#define settimeofday __settimeofday_time64
#define sigtimedwait __sigtimedwait_time64
#define stat __stat_time64
#define stime __stime64
#define thrd_sleep __thrd_sleep_time64
#define timegm __timegm_time64
#define timerfd_gettime __timerfd_gettime64
#define timerfd_settime __timerfd_settime64
#define timer_gettime __timer_gettime64
#define timer_settime __timer_settime64
#define timespec_get __timespec_get_time64
#define time __time64
#define utimensat __utimensat_time64
#define utimes __utimes_time64
#define utime __utime64
#define wait3 __wait3_time64
#define wait4 __wait4_time64
#endif
// Platform-specific options.
#if SANITIZER_APPLE
#define PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE 0
#elif SANITIZER_WINDOWS64
#define PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE 0
#else
#define PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE 1
#endif // SANITIZER_APPLE
#ifndef COMMON_INTERCEPTOR_INITIALIZE_RANGE
#define COMMON_INTERCEPTOR_INITIALIZE_RANGE(p, size) {}
#endif
#ifndef COMMON_INTERCEPTOR_UNPOISON_PARAM
#define COMMON_INTERCEPTOR_UNPOISON_PARAM(count) {}
#endif
#ifndef COMMON_INTERCEPTOR_FD_ACCESS
#define COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd) {}
#endif
#ifndef COMMON_INTERCEPTOR_MUTEX_PRE_LOCK
#define COMMON_INTERCEPTOR_MUTEX_PRE_LOCK(ctx, m) {}
#endif
#ifndef COMMON_INTERCEPTOR_MUTEX_POST_LOCK
#define COMMON_INTERCEPTOR_MUTEX_POST_LOCK(ctx, m) {}
#endif
#ifndef COMMON_INTERCEPTOR_MUTEX_UNLOCK
#define COMMON_INTERCEPTOR_MUTEX_UNLOCK(ctx, m) {}
#endif
#ifndef COMMON_INTERCEPTOR_MUTEX_REPAIR
#define COMMON_INTERCEPTOR_MUTEX_REPAIR(ctx, m) {}
#endif
#ifndef COMMON_INTERCEPTOR_MUTEX_INVALID
#define COMMON_INTERCEPTOR_MUTEX_INVALID(ctx, m) {}
#endif
#ifndef COMMON_INTERCEPTOR_HANDLE_RECVMSG
#define COMMON_INTERCEPTOR_HANDLE_RECVMSG(ctx, msg) ((void)(msg))
#endif
#ifndef COMMON_INTERCEPTOR_FILE_OPEN
#define COMMON_INTERCEPTOR_FILE_OPEN(ctx, file, path) {}
#endif
#ifndef COMMON_INTERCEPTOR_FILE_CLOSE
#define COMMON_INTERCEPTOR_FILE_CLOSE(ctx, file) {}
#endif
#ifndef COMMON_INTERCEPTOR_LIBRARY_LOADED
#define COMMON_INTERCEPTOR_LIBRARY_LOADED(filename, handle) {}
#endif
#ifndef COMMON_INTERCEPTOR_LIBRARY_UNLOADED
#define COMMON_INTERCEPTOR_LIBRARY_UNLOADED() {}
#endif
#ifndef COMMON_INTERCEPTOR_ENTER_NOIGNORE
#define COMMON_INTERCEPTOR_ENTER_NOIGNORE(ctx, ...) \
COMMON_INTERCEPTOR_ENTER(ctx, __VA_ARGS__)
#endif
#ifndef COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED
#define COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED (0)
#endif
#define COMMON_INTERCEPTOR_READ_STRING(ctx, s, n) \
COMMON_INTERCEPTOR_READ_RANGE((ctx), (s), \
common_flags()->strict_string_checks ? (internal_strlen(s)) + 1 : (n) )
#ifndef COMMON_INTERCEPTOR_DLOPEN
#define COMMON_INTERCEPTOR_DLOPEN(filename, flag) \
({ CheckNoDeepBind(filename, flag); REAL(dlopen)(filename, flag); })
#endif
#ifndef COMMON_INTERCEPTOR_GET_TLS_RANGE
#define COMMON_INTERCEPTOR_GET_TLS_RANGE(begin, end) *begin = *end = 0;
#endif
#ifndef COMMON_INTERCEPTOR_ACQUIRE
#define COMMON_INTERCEPTOR_ACQUIRE(ctx, u) {}
#endif
#ifndef COMMON_INTERCEPTOR_RELEASE
#define COMMON_INTERCEPTOR_RELEASE(ctx, u) {}
#endif
#ifndef COMMON_INTERCEPTOR_USER_CALLBACK_START
#define COMMON_INTERCEPTOR_USER_CALLBACK_START() {}
#endif
#ifndef COMMON_INTERCEPTOR_USER_CALLBACK_END
#define COMMON_INTERCEPTOR_USER_CALLBACK_END() {}
#endif
#ifdef SANITIZER_NLDBL_VERSION
#define COMMON_INTERCEPT_FUNCTION_LDBL(fn) \
COMMON_INTERCEPT_FUNCTION_VER(fn, SANITIZER_NLDBL_VERSION)
#else
#define COMMON_INTERCEPT_FUNCTION_LDBL(fn) \
COMMON_INTERCEPT_FUNCTION(fn)
#endif
#if SANITIZER_GLIBC
// If we could not find the versioned symbol, fall back to an unversioned
// lookup. This is needed to work around a GLibc bug that causes dlsym
// with RTLD_NEXT to return the oldest versioned symbol.
// See https://sourceware.org/bugzilla/show_bug.cgi?id=14932.
// For certain symbols (e.g. regexec) we have to perform a versioned lookup,
// but that versioned symbol will only exist for architectures where the
// oldest Glibc version pre-dates support for that architecture.
// For example, regexec@GLIBC_2.3.4 exists on x86_64, but not RISC-V.
// See also https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98920.
#define COMMON_INTERCEPT_FUNCTION_GLIBC_VER_MIN(fn, ver) \
COMMON_INTERCEPT_FUNCTION_VER_UNVERSIONED_FALLBACK(fn, ver)
#else
#define COMMON_INTERCEPT_FUNCTION_GLIBC_VER_MIN(fn, ver) \
COMMON_INTERCEPT_FUNCTION(fn)
#endif
#ifndef COMMON_INTERCEPTOR_MEMSET_IMPL
#define COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, dst, v, size) \
{ \
if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED) \
return internal_memset(dst, v, size); \
COMMON_INTERCEPTOR_ENTER(ctx, memset, dst, v, size); \
if (common_flags()->intercept_intrin) \
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, size); \
return REAL(memset)(dst, v, size); \
}
#endif
#ifndef COMMON_INTERCEPTOR_MEMMOVE_IMPL
#define COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, dst, src, size) \
{ \
if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED) \
return internal_memmove(dst, src, size); \
COMMON_INTERCEPTOR_ENTER(ctx, memmove, dst, src, size); \
if (common_flags()->intercept_intrin) { \
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, size); \
COMMON_INTERCEPTOR_READ_RANGE(ctx, src, size); \
} \
return REAL(memmove)(dst, src, size); \
}
#endif
#ifndef COMMON_INTERCEPTOR_MEMCPY_IMPL
#define COMMON_INTERCEPTOR_MEMCPY_IMPL(ctx, dst, src, size) \
{ \
if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED) { \
return internal_memmove(dst, src, size); \
} \
COMMON_INTERCEPTOR_ENTER(ctx, memcpy, dst, src, size); \
if (common_flags()->intercept_intrin) { \
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, size); \
COMMON_INTERCEPTOR_READ_RANGE(ctx, src, size); \
} \
return REAL(memcpy)(dst, src, size); \
}
#endif
#ifndef COMMON_INTERCEPTOR_MMAP_IMPL
#define COMMON_INTERCEPTOR_MMAP_IMPL(ctx, mmap, addr, sz, prot, flags, fd, \
off) \
{ return REAL(mmap)(addr, sz, prot, flags, fd, off); }
#endif
#ifndef COMMON_INTERCEPTOR_COPY_STRING
#define COMMON_INTERCEPTOR_COPY_STRING(ctx, to, from, size) {}
#endif
#ifndef COMMON_INTERCEPTOR_STRNDUP_IMPL
#define COMMON_INTERCEPTOR_STRNDUP_IMPL(ctx, s, size) \
COMMON_INTERCEPTOR_ENTER(ctx, strndup, s, size); \
uptr copy_length = internal_strnlen(s, size); \
char *new_mem = (char *)WRAP(malloc)(copy_length + 1); \
if (common_flags()->intercept_strndup) { \
COMMON_INTERCEPTOR_READ_STRING(ctx, s, Min(size, copy_length + 1)); \
} \
if (new_mem) { \
COMMON_INTERCEPTOR_COPY_STRING(ctx, new_mem, s, copy_length); \
internal_memcpy(new_mem, s, copy_length); \
new_mem[copy_length] = '\0'; \
} \
return new_mem;
#endif
#ifndef COMMON_INTERCEPTOR_STRERROR
#define COMMON_INTERCEPTOR_STRERROR() {}
#endif
struct FileMetadata {
// For open_memstream().
char **addr;
SIZE_T *size;
};
struct CommonInterceptorMetadata {
enum {
CIMT_INVALID = 0,
CIMT_FILE
} type;
union {
FileMetadata file;
};
};
#if SI_POSIX
typedef AddrHashMap<CommonInterceptorMetadata, 31051> MetadataHashMap;
static MetadataHashMap *interceptor_metadata_map;
UNUSED static void SetInterceptorMetadata(__sanitizer_FILE *addr,
const FileMetadata &file) {
MetadataHashMap::Handle h(interceptor_metadata_map, (uptr)addr);
CHECK(h.created());
h->type = CommonInterceptorMetadata::CIMT_FILE;
h->file = file;
}
UNUSED static const FileMetadata *GetInterceptorMetadata(
__sanitizer_FILE *addr) {
MetadataHashMap::Handle h(interceptor_metadata_map, (uptr)addr,
/* remove */ false,
/* create */ false);
if (addr && h.exists()) {
CHECK(!h.created());
CHECK(h->type == CommonInterceptorMetadata::CIMT_FILE);
return &h->file;
} else {
return 0;
}
}
UNUSED static void DeleteInterceptorMetadata(void *addr) {
MetadataHashMap::Handle h(interceptor_metadata_map, (uptr)addr, true);
CHECK(h.exists());
}
#endif // SI_POSIX
#if SANITIZER_INTERCEPT_STRLEN
INTERCEPTOR(SIZE_T, strlen, const char *s) {
// Sometimes strlen is called prior to InitializeCommonInterceptors,
// in which case the REAL(strlen) typically used in
// COMMON_INTERCEPTOR_ENTER will fail. We use internal_strlen here
// to handle that.
if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
return internal_strlen(s);
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strlen, s);
SIZE_T result = REAL(strlen)(s);
if (common_flags()->intercept_strlen)
COMMON_INTERCEPTOR_READ_RANGE(ctx, s, result + 1);
return result;
}
#define INIT_STRLEN COMMON_INTERCEPT_FUNCTION(strlen)
#else
#define INIT_STRLEN
#endif
#if SANITIZER_INTERCEPT_STRNLEN
INTERCEPTOR(SIZE_T, strnlen, const char *s, SIZE_T maxlen) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strnlen, s, maxlen);
SIZE_T length = REAL(strnlen)(s, maxlen);
if (common_flags()->intercept_strlen)
COMMON_INTERCEPTOR_READ_RANGE(ctx, s, Min(length + 1, maxlen));
return length;
}
#define INIT_STRNLEN COMMON_INTERCEPT_FUNCTION(strnlen)
#else
#define INIT_STRNLEN
#endif
#if SANITIZER_INTERCEPT_STRNDUP
INTERCEPTOR(char*, strndup, const char *s, uptr size) {
void *ctx;
COMMON_INTERCEPTOR_STRNDUP_IMPL(ctx, s, size);
}
#define INIT_STRNDUP COMMON_INTERCEPT_FUNCTION(strndup)
#else
#define INIT_STRNDUP
#endif // SANITIZER_INTERCEPT_STRNDUP
#if SANITIZER_INTERCEPT___STRNDUP
INTERCEPTOR(char*, __strndup, const char *s, uptr size) {
void *ctx;
COMMON_INTERCEPTOR_STRNDUP_IMPL(ctx, s, size);
}
#define INIT___STRNDUP COMMON_INTERCEPT_FUNCTION(__strndup)
#else
#define INIT___STRNDUP
#endif // SANITIZER_INTERCEPT___STRNDUP
#if SANITIZER_INTERCEPT_TEXTDOMAIN
INTERCEPTOR(char*, textdomain, const char *domainname) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, textdomain, domainname);
if (domainname) COMMON_INTERCEPTOR_READ_STRING(ctx, domainname, 0);
char *domain = REAL(textdomain)(domainname);
if (domain) {
COMMON_INTERCEPTOR_INITIALIZE_RANGE(domain, internal_strlen(domain) + 1);
}
return domain;
}
#define INIT_TEXTDOMAIN COMMON_INTERCEPT_FUNCTION(textdomain)
#else
#define INIT_TEXTDOMAIN
#endif
#if SANITIZER_INTERCEPT_STRCMP
static inline int CharCmpX(unsigned char c1, unsigned char c2) {
return (c1 == c2) ? 0 : (c1 < c2) ? -1 : 1;
}
DECLARE_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strcmp, uptr called_pc,
const char *s1, const char *s2, int result)
INTERCEPTOR(int, strcmp, const char *s1, const char *s2) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strcmp, s1, s2);
unsigned char c1, c2;
uptr i;
for (i = 0;; i++) {
c1 = (unsigned char)s1[i];
c2 = (unsigned char)s2[i];
if (c1 != c2 || c1 == '\0') break;
}
if (common_flags()->intercept_strcmp) {
COMMON_INTERCEPTOR_READ_STRING(ctx, s1, i + 1);
COMMON_INTERCEPTOR_READ_STRING(ctx, s2, i + 1);
}
int result = CharCmpX(c1, c2);
CALL_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strcmp, GET_CALLER_PC(), s1,
s2, result);
return result;
}
DECLARE_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strncmp, uptr called_pc,
const char *s1, const char *s2, uptr n,
int result)
INTERCEPTOR(int, strncmp, const char *s1, const char *s2, uptr size) {
if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
return internal_strncmp(s1, s2, size);
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strncmp, s1, s2, size);
unsigned char c1 = 0, c2 = 0;
uptr i;
for (i = 0; i < size; i++) {
c1 = (unsigned char)s1[i];
c2 = (unsigned char)s2[i];
if (c1 != c2 || c1 == '\0') break;
}
uptr i1 = i;
uptr i2 = i;
if (common_flags()->strict_string_checks) {
for (; i1 < size && s1[i1]; i1++) {}
for (; i2 < size && s2[i2]; i2++) {}
}
COMMON_INTERCEPTOR_READ_RANGE((ctx), (s1), Min(i1 + 1, size));
COMMON_INTERCEPTOR_READ_RANGE((ctx), (s2), Min(i2 + 1, size));
int result = CharCmpX(c1, c2);
CALL_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strncmp, GET_CALLER_PC(), s1,
s2, size, result);
return result;
}
#define INIT_STRCMP COMMON_INTERCEPT_FUNCTION(strcmp)
#define INIT_STRNCMP COMMON_INTERCEPT_FUNCTION(strncmp)
#else
#define INIT_STRCMP
#define INIT_STRNCMP
#endif
#if SANITIZER_INTERCEPT_STRCASECMP
static inline int CharCaseCmp(unsigned char c1, unsigned char c2) {
int c1_low = ToLower(c1);
int c2_low = ToLower(c2);
return c1_low - c2_low;
}
DECLARE_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strcasecmp, uptr called_pc,
const char *s1, const char *s2, int result)
INTERCEPTOR(int, strcasecmp, const char *s1, const char *s2) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strcasecmp, s1, s2);
unsigned char c1 = 0, c2 = 0;
uptr i;
for (i = 0;; i++) {
c1 = (unsigned char)s1[i];
c2 = (unsigned char)s2[i];
if (CharCaseCmp(c1, c2) != 0 || c1 == '\0') break;
}
COMMON_INTERCEPTOR_READ_STRING(ctx, s1, i + 1);
COMMON_INTERCEPTOR_READ_STRING(ctx, s2, i + 1);
int result = CharCaseCmp(c1, c2);
CALL_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strcasecmp, GET_CALLER_PC(),
s1, s2, result);
return result;
}
DECLARE_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strncasecmp, uptr called_pc,
const char *s1, const char *s2, uptr size,
int result)
INTERCEPTOR(int, strncasecmp, const char *s1, const char *s2, SIZE_T size) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strncasecmp, s1, s2, size);
unsigned char c1 = 0, c2 = 0;
uptr i;
for (i = 0; i < size; i++) {
c1 = (unsigned char)s1[i];
c2 = (unsigned char)s2[i];
if (CharCaseCmp(c1, c2) != 0 || c1 == '\0') break;
}
uptr i1 = i;
uptr i2 = i;
if (common_flags()->strict_string_checks) {
for (; i1 < size && s1[i1]; i1++) {}
for (; i2 < size && s2[i2]; i2++) {}
}
COMMON_INTERCEPTOR_READ_RANGE((ctx), (s1), Min(i1 + 1, size));
COMMON_INTERCEPTOR_READ_RANGE((ctx), (s2), Min(i2 + 1, size));
int result = CharCaseCmp(c1, c2);
CALL_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strncasecmp, GET_CALLER_PC(),
s1, s2, size, result);
return result;
}
#define INIT_STRCASECMP COMMON_INTERCEPT_FUNCTION(strcasecmp)
#define INIT_STRNCASECMP COMMON_INTERCEPT_FUNCTION(strncasecmp)
#else
#define INIT_STRCASECMP
#define INIT_STRNCASECMP
#endif
#if SANITIZER_INTERCEPT_STRSTR || SANITIZER_INTERCEPT_STRCASESTR
static inline void StrstrCheck(void *ctx, char *r, const char *s1,
const char *s2) {
uptr len1 = internal_strlen(s1);
uptr len2 = internal_strlen(s2);
COMMON_INTERCEPTOR_READ_STRING(ctx, s1, r ? r - s1 + len2 : len1 + 1);
COMMON_INTERCEPTOR_READ_RANGE(ctx, s2, len2 + 1);
}
#endif
#if SANITIZER_INTERCEPT_STRSTR
DECLARE_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strstr, uptr called_pc,
const char *s1, const char *s2, char *result)
INTERCEPTOR(char*, strstr, const char *s1, const char *s2) {
if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
return internal_strstr(s1, s2);
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strstr, s1, s2);
char *r = REAL(strstr)(s1, s2);
if (common_flags()->intercept_strstr)
StrstrCheck(ctx, r, s1, s2);
CALL_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strstr, GET_CALLER_PC(), s1,
s2, r);
return r;
}
#define INIT_STRSTR COMMON_INTERCEPT_FUNCTION(strstr);
#else
#define INIT_STRSTR
#endif
#if SANITIZER_INTERCEPT_STRCASESTR
DECLARE_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strcasestr, uptr called_pc,
const char *s1, const char *s2, char *result)
INTERCEPTOR(char*, strcasestr, const char *s1, const char *s2) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strcasestr, s1, s2);
char *r = REAL(strcasestr)(s1, s2);
if (common_flags()->intercept_strstr)
StrstrCheck(ctx, r, s1, s2);
CALL_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strcasestr, GET_CALLER_PC(),
s1, s2, r);
return r;
}
#define INIT_STRCASESTR COMMON_INTERCEPT_FUNCTION(strcasestr);
#else
#define INIT_STRCASESTR
#endif
#if SANITIZER_INTERCEPT_STRTOK
INTERCEPTOR(char*, strtok, char *str, const char *delimiters) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strtok, str, delimiters);
if (!common_flags()->intercept_strtok) {
return REAL(strtok)(str, delimiters);
}
if (common_flags()->strict_string_checks) {
// If strict_string_checks is enabled, we check the whole first argument
// string on the first call (strtok saves this string in a static buffer
// for subsequent calls). We do not need to check strtok's result.
// As the delimiters can change, we check them every call.
if (str != nullptr) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, str, internal_strlen(str) + 1);
}
COMMON_INTERCEPTOR_READ_RANGE(ctx, delimiters,
internal_strlen(delimiters) + 1);
return REAL(strtok)(str, delimiters);
} else {
// However, when strict_string_checks is disabled we cannot check the
// whole string on the first call. Instead, we check the result string
// which is guaranteed to be a NULL-terminated substring of the first
// argument. We also conservatively check one character of str and the
// delimiters.
if (str != nullptr) {
COMMON_INTERCEPTOR_READ_STRING(ctx, str, 1);
}
COMMON_INTERCEPTOR_READ_RANGE(ctx, delimiters, 1);
char *result = REAL(strtok)(str, delimiters);
if (result != nullptr) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, result, internal_strlen(result) + 1);
} else if (str != nullptr) {
// No delimiter were found, it's safe to assume that the entire str was
// scanned.
COMMON_INTERCEPTOR_READ_RANGE(ctx, str, internal_strlen(str) + 1);
}
return result;
}
}
#define INIT_STRTOK COMMON_INTERCEPT_FUNCTION(strtok)
#else
#define INIT_STRTOK
#endif
#if SANITIZER_INTERCEPT_MEMMEM
DECLARE_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_memmem, uptr called_pc,
const void *s1, SIZE_T len1, const void *s2,
SIZE_T len2, void *result)
INTERCEPTOR(void*, memmem, const void *s1, SIZE_T len1, const void *s2,
SIZE_T len2) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, memmem, s1, len1, s2, len2);
void *r = REAL(memmem)(s1, len1, s2, len2);
if (common_flags()->intercept_memmem) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, s1, len1);
COMMON_INTERCEPTOR_READ_RANGE(ctx, s2, len2);
}
CALL_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_memmem, GET_CALLER_PC(),
s1, len1, s2, len2, r);
return r;
}
#define INIT_MEMMEM COMMON_INTERCEPT_FUNCTION(memmem);
#else
#define INIT_MEMMEM
#endif // SANITIZER_INTERCEPT_MEMMEM
#if SANITIZER_INTERCEPT_STRCHR
INTERCEPTOR(char*, strchr, const char *s, int c) {
void *ctx;
if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
return internal_strchr(s, c);
COMMON_INTERCEPTOR_ENTER(ctx, strchr, s, c);
char *result = REAL(strchr)(s, c);
if (common_flags()->intercept_strchr) {
// Keep strlen as macro argument, as macro may ignore it.
COMMON_INTERCEPTOR_READ_STRING(ctx, s,
(result ? result - s : internal_strlen(s)) + 1);
}
return result;
}
#define INIT_STRCHR COMMON_INTERCEPT_FUNCTION(strchr)
#else
#define INIT_STRCHR
#endif
#if SANITIZER_INTERCEPT_STRCHRNUL
INTERCEPTOR(char*, strchrnul, const char *s, int c) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strchrnul, s, c);
char *result = REAL(strchrnul)(s, c);
uptr len = result - s + 1;
if (common_flags()->intercept_strchr)
COMMON_INTERCEPTOR_READ_STRING(ctx, s, len);
return result;
}
#define INIT_STRCHRNUL COMMON_INTERCEPT_FUNCTION(strchrnul)
#else
#define INIT_STRCHRNUL
#endif
#if SANITIZER_INTERCEPT_STRRCHR
INTERCEPTOR(char*, strrchr, const char *s, int c) {
void *ctx;
if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
return internal_strrchr(s, c);
COMMON_INTERCEPTOR_ENTER(ctx, strrchr, s, c);
if (common_flags()->intercept_strchr)
COMMON_INTERCEPTOR_READ_RANGE(ctx, s, internal_strlen(s) + 1);
return REAL(strrchr)(s, c);
}
#define INIT_STRRCHR COMMON_INTERCEPT_FUNCTION(strrchr)
#else
#define INIT_STRRCHR
#endif
#if SANITIZER_INTERCEPT_STRSPN
INTERCEPTOR(SIZE_T, strspn, const char *s1, const char *s2) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strspn, s1, s2);
SIZE_T r = REAL(strspn)(s1, s2);
if (common_flags()->intercept_strspn) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, s2, internal_strlen(s2) + 1);
COMMON_INTERCEPTOR_READ_STRING(ctx, s1, r + 1);
}
return r;
}
INTERCEPTOR(SIZE_T, strcspn, const char *s1, const char *s2) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strcspn, s1, s2);
SIZE_T r = REAL(strcspn)(s1, s2);
if (common_flags()->intercept_strspn) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, s2, internal_strlen(s2) + 1);
COMMON_INTERCEPTOR_READ_STRING(ctx, s1, r + 1);
}
return r;
}
#define INIT_STRSPN \
COMMON_INTERCEPT_FUNCTION(strspn); \
COMMON_INTERCEPT_FUNCTION(strcspn);
#else
#define INIT_STRSPN
#endif
#if SANITIZER_INTERCEPT_STRPBRK
INTERCEPTOR(char *, strpbrk, const char *s1, const char *s2) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strpbrk, s1, s2);
char *r = REAL(strpbrk)(s1, s2);
if (common_flags()->intercept_strpbrk) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, s2, internal_strlen(s2) + 1);
COMMON_INTERCEPTOR_READ_STRING(ctx, s1,
r ? r - s1 + 1 : internal_strlen(s1) + 1);
}
return r;
}
#define INIT_STRPBRK COMMON_INTERCEPT_FUNCTION(strpbrk);
#else
#define INIT_STRPBRK
#endif
#if SANITIZER_INTERCEPT_MEMSET
INTERCEPTOR(void *, memset, void *dst, int v, uptr size) {
void *ctx;
COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, dst, v, size);
}
#define INIT_MEMSET COMMON_INTERCEPT_FUNCTION(memset)
#else
#define INIT_MEMSET
#endif
#if SANITIZER_INTERCEPT_MEMMOVE
INTERCEPTOR(void *, memmove, void *dst, const void *src, uptr size) {
void *ctx;
COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, dst, src, size);
}
#define INIT_MEMMOVE COMMON_INTERCEPT_FUNCTION(memmove)
#else
#define INIT_MEMMOVE
#endif
#if SANITIZER_INTERCEPT_MEMCPY
INTERCEPTOR(void *, memcpy, void *dst, const void *src, uptr size) {
// On OS X, calling internal_memcpy here will cause memory corruptions,
// because memcpy and memmove are actually aliases of the same
// implementation. We need to use internal_memmove here.
// N.B.: If we switch this to internal_ we'll have to use internal_memmove
// due to memcpy being an alias of memmove on OS X.
void *ctx;
#if PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE
COMMON_INTERCEPTOR_MEMCPY_IMPL(ctx, dst, src, size);
#else
COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, dst, src, size);
#endif
}
#define INIT_MEMCPY \
do { \
if (PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE) { \
COMMON_INTERCEPT_FUNCTION(memcpy); \
} else { \
ASSIGN_REAL(memcpy, memmove); \
} \
CHECK(REAL(memcpy)); \
} while (false)
#else
#define INIT_MEMCPY
#endif
#if SANITIZER_INTERCEPT_MEMCMP
DECLARE_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_memcmp, uptr called_pc,
const void *s1, const void *s2, uptr n,
int result)
// Common code for `memcmp` and `bcmp`.
int MemcmpInterceptorCommon(void *ctx,
int (*real_fn)(const void *, const void *, uptr),
const void *a1, const void *a2, uptr size) {
if (common_flags()->intercept_memcmp) {
if (common_flags()->strict_memcmp) {
// Check the entire regions even if the first bytes of the buffers are
// different.
COMMON_INTERCEPTOR_READ_RANGE(ctx, a1, size);
COMMON_INTERCEPTOR_READ_RANGE(ctx, a2, size);
// Fallthrough to REAL(memcmp) below.
} else {
unsigned char c1 = 0, c2 = 0;
const unsigned char *s1 = (const unsigned char*)a1;
const unsigned char *s2 = (const unsigned char*)a2;
uptr i;
for (i = 0; i < size; i++) {
c1 = s1[i];
c2 = s2[i];
if (c1 != c2) break;
}
COMMON_INTERCEPTOR_READ_RANGE(ctx, s1, Min(i + 1, size));
COMMON_INTERCEPTOR_READ_RANGE(ctx, s2, Min(i + 1, size));
int r = CharCmpX(c1, c2);
CALL_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_memcmp, GET_CALLER_PC(),
a1, a2, size, r);
return r;
}
}
int result = real_fn(a1, a2, size);
CALL_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_memcmp, GET_CALLER_PC(), a1,
a2, size, result);
return result;
}
INTERCEPTOR(int, memcmp, const void *a1, const void *a2, uptr size) {
if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
return internal_memcmp(a1, a2, size);
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, memcmp, a1, a2, size);
return MemcmpInterceptorCommon(ctx, REAL(memcmp), a1, a2, size);
}
#define INIT_MEMCMP COMMON_INTERCEPT_FUNCTION(memcmp)
#else
#define INIT_MEMCMP
#endif
#if SANITIZER_INTERCEPT_BCMP
INTERCEPTOR(int, bcmp, const void *a1, const void *a2, uptr size) {
if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
return internal_memcmp(a1, a2, size);
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, bcmp, a1, a2, size);
return MemcmpInterceptorCommon(ctx, REAL(bcmp), a1, a2, size);
}
#define INIT_BCMP COMMON_INTERCEPT_FUNCTION(bcmp)
#else
#define INIT_BCMP
#endif
#if SANITIZER_INTERCEPT_MEMCHR
INTERCEPTOR(void*, memchr, const void *s, int c, SIZE_T n) {
if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
return internal_memchr(s, c, n);
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, memchr, s, c, n);
#if SANITIZER_WINDOWS
void *res;
if (REAL(memchr)) {
res = REAL(memchr)(s, c, n);
} else {
res = internal_memchr(s, c, n);
}
#else
void *res = REAL(memchr)(s, c, n);
#endif
uptr len = res ? (char *)res - (const char *)s + 1 : n;
COMMON_INTERCEPTOR_READ_RANGE(ctx, s, len);
return res;
}
#define INIT_MEMCHR COMMON_INTERCEPT_FUNCTION(memchr)
#else
#define INIT_MEMCHR
#endif
#if SANITIZER_INTERCEPT_MEMRCHR
INTERCEPTOR(void*, memrchr, const void *s, int c, SIZE_T n) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, memrchr, s, c, n);
COMMON_INTERCEPTOR_READ_RANGE(ctx, s, n);
return REAL(memrchr)(s, c, n);
}
#define INIT_MEMRCHR COMMON_INTERCEPT_FUNCTION(memrchr)
#else
#define INIT_MEMRCHR
#endif
#if SANITIZER_INTERCEPT_FREXP
INTERCEPTOR(double, frexp, double x, int *exp) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, frexp, x, exp);
// Assuming frexp() always writes to |exp|.
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, exp, sizeof(*exp));
double res = REAL(frexp)(x, exp);
COMMON_INTERCEPTOR_INITIALIZE_RANGE(exp, sizeof(*exp));
return res;
}
#define INIT_FREXP COMMON_INTERCEPT_FUNCTION(frexp);
#else
#define INIT_FREXP
#endif // SANITIZER_INTERCEPT_FREXP
#if SANITIZER_INTERCEPT_FREXPF_FREXPL
INTERCEPTOR(float, frexpf, float x, int *exp) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, frexpf, x, exp);
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, exp, sizeof(*exp));
float res = REAL(frexpf)(x, exp);
COMMON_INTERCEPTOR_INITIALIZE_RANGE(exp, sizeof(*exp));
return res;
}
INTERCEPTOR(long double, frexpl, long double x, int *exp) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, frexpl, x, exp);
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, exp, sizeof(*exp));
long double res = REAL(frexpl)(x, exp);
COMMON_INTERCEPTOR_INITIALIZE_RANGE(exp, sizeof(*exp));
return res;
}
#define INIT_FREXPF_FREXPL \
COMMON_INTERCEPT_FUNCTION(frexpf); \
COMMON_INTERCEPT_FUNCTION_LDBL(frexpl)
#else
#define INIT_FREXPF_FREXPL
#endif // SANITIZER_INTERCEPT_FREXPF_FREXPL
#if SI_POSIX
static void write_iovec(void *ctx, struct __sanitizer_iovec *iovec,
SIZE_T iovlen, SIZE_T maxlen) {
for (SIZE_T i = 0; i < iovlen && maxlen; ++i) {
SSIZE_T sz = Min(iovec[i].iov_len, maxlen);
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, iovec[i].iov_base, sz);
maxlen -= sz;
}
}
static void read_iovec(void *ctx, struct __sanitizer_iovec *iovec,
SIZE_T iovlen, SIZE_T maxlen) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, iovec, sizeof(*iovec) * iovlen);
for (SIZE_T i = 0; i < iovlen && maxlen; ++i) {
SSIZE_T sz = Min(iovec[i].iov_len, maxlen);
COMMON_INTERCEPTOR_READ_RANGE(ctx, iovec[i].iov_base, sz);
maxlen -= sz;
}
}
#endif
#if SANITIZER_INTERCEPT_READ
INTERCEPTOR(SSIZE_T, read, int fd, void *ptr, SIZE_T count) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, read, fd, ptr, count);
COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
SSIZE_T res = REAL(read)(fd, ptr, count);
if (res > 0) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, res);
if (res >= 0 && fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
return res;
}
#define INIT_READ COMMON_INTERCEPT_FUNCTION(read)
#else
#define INIT_READ
#endif
#if SANITIZER_INTERCEPT_FREAD
INTERCEPTOR(SIZE_T, fread, void *ptr, SIZE_T size, SIZE_T nmemb, void *file) {
// libc file streams can call user-supplied functions, see fopencookie.
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fread, ptr, size, nmemb, file);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
SIZE_T res = REAL(fread)(ptr, size, nmemb, file);
if (res > 0) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, res * size);
return res;
}
#define INIT_FREAD COMMON_INTERCEPT_FUNCTION(fread)
#else
#define INIT_FREAD
#endif
#if SANITIZER_INTERCEPT_PREAD
INTERCEPTOR(SSIZE_T, pread, int fd, void *ptr, SIZE_T count, OFF_T offset) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, pread, fd, ptr, count, offset);
COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
SSIZE_T res = REAL(pread)(fd, ptr, count, offset);
if (res > 0) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, res);
if (res >= 0 && fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
return res;
}
#define INIT_PREAD COMMON_INTERCEPT_FUNCTION(pread)
#else
#define INIT_PREAD
#endif
#if SANITIZER_INTERCEPT_PREAD64
INTERCEPTOR(SSIZE_T, pread64, int fd, void *ptr, SIZE_T count, OFF64_T offset) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, pread64, fd, ptr, count, offset);
COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
SSIZE_T res = REAL(pread64)(fd, ptr, count, offset);
if (res > 0) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, res);
if (res >= 0 && fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
return res;
}
#define INIT_PREAD64 COMMON_INTERCEPT_FUNCTION(pread64)
#else
#define INIT_PREAD64
#endif
#if SANITIZER_INTERCEPT_READV
INTERCEPTOR_WITH_SUFFIX(SSIZE_T, readv, int fd, __sanitizer_iovec *iov,
int iovcnt) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, readv, fd, iov, iovcnt);
COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
SSIZE_T res = REAL(readv)(fd, iov, iovcnt);
if (res > 0) write_iovec(ctx, iov, iovcnt, res);
if (res >= 0 && fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
return res;
}
#define INIT_READV COMMON_INTERCEPT_FUNCTION(readv)
#else
#define INIT_READV
#endif
#if SANITIZER_INTERCEPT_PREADV
INTERCEPTOR(SSIZE_T, preadv, int fd, __sanitizer_iovec *iov, int iovcnt,
OFF_T offset) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, preadv, fd, iov, iovcnt, offset);
COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
SSIZE_T res = REAL(preadv)(fd, iov, iovcnt, offset);
if (res > 0) write_iovec(ctx, iov, iovcnt, res);
if (res >= 0 && fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
return res;
}
#define INIT_PREADV COMMON_INTERCEPT_FUNCTION(preadv)
#else
#define INIT_PREADV
#endif
#if SANITIZER_INTERCEPT_PREADV64
INTERCEPTOR(SSIZE_T, preadv64, int fd, __sanitizer_iovec *iov, int iovcnt,
OFF64_T offset) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, preadv64, fd, iov, iovcnt, offset);
COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
SSIZE_T res = REAL(preadv64)(fd, iov, iovcnt, offset);
if (res > 0) write_iovec(ctx, iov, iovcnt, res);
if (res >= 0 && fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
return res;
}
#define INIT_PREADV64 COMMON_INTERCEPT_FUNCTION(preadv64)
#else
#define INIT_PREADV64
#endif
#if SANITIZER_INTERCEPT_WRITE
INTERCEPTOR(SSIZE_T, write, int fd, void *ptr, SIZE_T count) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, write, fd, ptr, count);
COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
if (fd >= 0) COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
SSIZE_T res = REAL(write)(fd, ptr, count);
// FIXME: this check should be _before_ the call to REAL(write), not after
if (res > 0) COMMON_INTERCEPTOR_READ_RANGE(ctx, ptr, res);
return res;
}
#define INIT_WRITE COMMON_INTERCEPT_FUNCTION(write)
#else
#define INIT_WRITE
#endif
#if SANITIZER_INTERCEPT_FWRITE
INTERCEPTOR(SIZE_T, fwrite, const void *p, uptr size, uptr nmemb, void *file) {
// libc file streams can call user-supplied functions, see fopencookie.
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fwrite, p, size, nmemb, file);
SIZE_T res = REAL(fwrite)(p, size, nmemb, file);
if (res > 0) COMMON_INTERCEPTOR_READ_RANGE(ctx, p, res * size);
return res;
}
#define INIT_FWRITE COMMON_INTERCEPT_FUNCTION(fwrite)
#else
#define INIT_FWRITE
#endif
#if SANITIZER_INTERCEPT_PWRITE
INTERCEPTOR(SSIZE_T, pwrite, int fd, void *ptr, SIZE_T count, OFF_T offset) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, pwrite, fd, ptr, count, offset);
COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
if (fd >= 0) COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
SSIZE_T res = REAL(pwrite)(fd, ptr, count, offset);
if (res > 0) COMMON_INTERCEPTOR_READ_RANGE(ctx, ptr, res);
return res;
}
#define INIT_PWRITE COMMON_INTERCEPT_FUNCTION(pwrite)
#else
#define INIT_PWRITE
#endif
#if SANITIZER_INTERCEPT_PWRITE64
INTERCEPTOR(SSIZE_T, pwrite64, int fd, void *ptr, OFF64_T count,
OFF64_T offset) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, pwrite64, fd, ptr, count, offset);
COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
if (fd >= 0) COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
SSIZE_T res = REAL(pwrite64)(fd, ptr, count, offset);
if (res > 0) COMMON_INTERCEPTOR_READ_RANGE(ctx, ptr, res);
return res;
}
#define INIT_PWRITE64 COMMON_INTERCEPT_FUNCTION(pwrite64)
#else
#define INIT_PWRITE64
#endif
#if SANITIZER_INTERCEPT_WRITEV
INTERCEPTOR_WITH_SUFFIX(SSIZE_T, writev, int fd, __sanitizer_iovec *iov,
int iovcnt) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, writev, fd, iov, iovcnt);
COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
if (fd >= 0) COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
SSIZE_T res = REAL(writev)(fd, iov, iovcnt);
if (res > 0) read_iovec(ctx, iov, iovcnt, res);
return res;
}
#define INIT_WRITEV COMMON_INTERCEPT_FUNCTION(writev)
#else
#define INIT_WRITEV
#endif
#if SANITIZER_INTERCEPT_PWRITEV
INTERCEPTOR(SSIZE_T, pwritev, int fd, __sanitizer_iovec *iov, int iovcnt,
OFF_T offset) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, pwritev, fd, iov, iovcnt, offset);
COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
if (fd >= 0) COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
SSIZE_T res = REAL(pwritev)(fd, iov, iovcnt, offset);
if (res > 0) read_iovec(ctx, iov, iovcnt, res);
return res;
}
#define INIT_PWRITEV COMMON_INTERCEPT_FUNCTION(pwritev)
#else
#define INIT_PWRITEV
#endif
#if SANITIZER_INTERCEPT_PWRITEV64
INTERCEPTOR(SSIZE_T, pwritev64, int fd, __sanitizer_iovec *iov, int iovcnt,
OFF64_T offset) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, pwritev64, fd, iov, iovcnt, offset);
COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
if (fd >= 0) COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
SSIZE_T res = REAL(pwritev64)(fd, iov, iovcnt, offset);
if (res > 0) read_iovec(ctx, iov, iovcnt, res);
return res;
}
#define INIT_PWRITEV64 COMMON_INTERCEPT_FUNCTION(pwritev64)
#else
#define INIT_PWRITEV64
#endif
#if SANITIZER_INTERCEPT_FGETS
INTERCEPTOR(char *, fgets, char *s, SIZE_T size, void *file) {
// libc file streams can call user-supplied functions, see fopencookie.
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fgets, s, size, file);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
char *res = REAL(fgets)(s, size, file);
if (res)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, s, internal_strlen(s) + 1);
return res;
}
#define INIT_FGETS COMMON_INTERCEPT_FUNCTION(fgets)
#else
#define INIT_FGETS
#endif
#if SANITIZER_INTERCEPT_FPUTS
INTERCEPTOR_WITH_SUFFIX(int, fputs, char *s, void *file) {
// libc file streams can call user-supplied functions, see fopencookie.
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fputs, s, file);
if (!SANITIZER_APPLE || s) { // `fputs(NULL, file)` is supported on Darwin.
COMMON_INTERCEPTOR_READ_RANGE(ctx, s, internal_strlen(s) + 1);
}
return REAL(fputs)(s, file);
}
#define INIT_FPUTS COMMON_INTERCEPT_FUNCTION(fputs)
#else
#define INIT_FPUTS
#endif
#if SANITIZER_INTERCEPT_PUTS
INTERCEPTOR(int, puts, char *s) {
// libc file streams can call user-supplied functions, see fopencookie.
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, puts, s);
if (!SANITIZER_APPLE || s) { // `puts(NULL)` is supported on Darwin.
COMMON_INTERCEPTOR_READ_RANGE(ctx, s, internal_strlen(s) + 1);
}
return REAL(puts)(s);
}
#define INIT_PUTS COMMON_INTERCEPT_FUNCTION(puts)
#else
#define INIT_PUTS
#endif
#if SANITIZER_INTERCEPT_PRCTL
INTERCEPTOR(int, prctl, int option, unsigned long arg2, unsigned long arg3,
unsigned long arg4, unsigned long arg5) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, prctl, option, arg2, arg3, arg4, arg5);
static const int PR_SET_NAME = 15;
static const int PR_SET_VMA = 0x53564d41;
static const int PR_SCHED_CORE = 62;
static const int PR_SCHED_CORE_GET = 0;
if (option == PR_SET_VMA && arg2 == 0UL) {
char *name = (char *)arg5;
COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
}
int res = REAL(prctl(option, arg2, arg3, arg4, arg5));
if (option == PR_SET_NAME) {
char buff[16];
internal_strncpy(buff, (char *)arg2, 15);
buff[15] = 0;
COMMON_INTERCEPTOR_SET_THREAD_NAME(ctx, buff);
} else if (res != -1 && option == PR_SCHED_CORE && arg2 == PR_SCHED_CORE_GET) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, (u64*)(arg5), sizeof(u64));
}
return res;
}
#define INIT_PRCTL COMMON_INTERCEPT_FUNCTION(prctl)
#else
#define INIT_PRCTL
#endif // SANITIZER_INTERCEPT_PRCTL
#if SANITIZER_INTERCEPT_TIME
INTERCEPTOR(unsigned long, time, unsigned long *t) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, time, t);
unsigned long local_t;
unsigned long res = REAL(time)(&local_t);
if (t && res != (unsigned long)-1) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, t, sizeof(*t));
*t = local_t;
}
return res;
}
#define INIT_TIME COMMON_INTERCEPT_FUNCTION(time);
#else
#define INIT_TIME
#endif // SANITIZER_INTERCEPT_TIME
#if SANITIZER_INTERCEPT_LOCALTIME_AND_FRIENDS
static void unpoison_tm(void *ctx, __sanitizer_tm *tm) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, tm, sizeof(*tm));
#if !SANITIZER_SOLARIS
if (tm->tm_zone) {
// Can not use COMMON_INTERCEPTOR_WRITE_RANGE here, because tm->tm_zone
// can point to shared memory and tsan would report a data race.
COMMON_INTERCEPTOR_INITIALIZE_RANGE(tm->tm_zone,
internal_strlen(tm->tm_zone) + 1);
}
#endif
}
INTERCEPTOR(__sanitizer_tm *, localtime, unsigned long *timep) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, localtime, timep);
__sanitizer_tm *res = REAL(localtime)(timep);
if (res) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, timep, sizeof(*timep));
unpoison_tm(ctx, res);
}
return res;
}
INTERCEPTOR(__sanitizer_tm *, localtime_r, unsigned long *timep, void *result) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, localtime_r, timep, result);
__sanitizer_tm *res = REAL(localtime_r)(timep, result);
if (res) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, timep, sizeof(*timep));
unpoison_tm(ctx, res);
}
return res;
}
INTERCEPTOR(__sanitizer_tm *, gmtime, unsigned long *timep) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, gmtime, timep);
__sanitizer_tm *res = REAL(gmtime)(timep);
if (res) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, timep, sizeof(*timep));
unpoison_tm(ctx, res);
}
return res;
}
INTERCEPTOR(__sanitizer_tm *, gmtime_r, unsigned long *timep, void *result) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, gmtime_r, timep, result);
__sanitizer_tm *res = REAL(gmtime_r)(timep, result);
if (res) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, timep, sizeof(*timep));
unpoison_tm(ctx, res);
}
return res;
}
INTERCEPTOR(char *, ctime, unsigned long *timep) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, ctime, timep);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
char *res = REAL(ctime)(timep);
if (res) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, timep, sizeof(*timep));
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
}
return res;
}
INTERCEPTOR(char *, ctime_r, unsigned long *timep, char *result) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, ctime_r, timep, result);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
char *res = REAL(ctime_r)(timep, result);
if (res) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, timep, sizeof(*timep));
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
}
return res;
}
INTERCEPTOR(char *, asctime, __sanitizer_tm *tm) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, asctime, tm);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
char *res = REAL(asctime)(tm);
if (res) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, tm, sizeof(*tm));
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
}
return res;
}
INTERCEPTOR(char *, asctime_r, __sanitizer_tm *tm, char *result) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, asctime_r, tm, result);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
char *res = REAL(asctime_r)(tm, result);
if (res) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, tm, sizeof(*tm));
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
}
return res;
}
INTERCEPTOR(long, mktime, __sanitizer_tm *tm) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, mktime, tm);
COMMON_INTERCEPTOR_READ_RANGE(ctx, &tm->tm_sec, sizeof(tm->tm_sec));
COMMON_INTERCEPTOR_READ_RANGE(ctx, &tm->tm_min, sizeof(tm->tm_min));
COMMON_INTERCEPTOR_READ_RANGE(ctx, &tm->tm_hour, sizeof(tm->tm_hour));
COMMON_INTERCEPTOR_READ_RANGE(ctx, &tm->tm_mday, sizeof(tm->tm_mday));
COMMON_INTERCEPTOR_READ_RANGE(ctx, &tm->tm_mon, sizeof(tm->tm_mon));
COMMON_INTERCEPTOR_READ_RANGE(ctx, &tm->tm_year, sizeof(tm->tm_year));
COMMON_INTERCEPTOR_READ_RANGE(ctx, &tm->tm_isdst, sizeof(tm->tm_isdst));
long res = REAL(mktime)(tm);
if (res != -1) unpoison_tm(ctx, tm);
return res;
}
#define INIT_LOCALTIME_AND_FRIENDS \
COMMON_INTERCEPT_FUNCTION(localtime); \
COMMON_INTERCEPT_FUNCTION(localtime_r); \
COMMON_INTERCEPT_FUNCTION(gmtime); \
COMMON_INTERCEPT_FUNCTION(gmtime_r); \
COMMON_INTERCEPT_FUNCTION(ctime); \
COMMON_INTERCEPT_FUNCTION(ctime_r); \
COMMON_INTERCEPT_FUNCTION(asctime); \
COMMON_INTERCEPT_FUNCTION(asctime_r); \
COMMON_INTERCEPT_FUNCTION(mktime);
#else
#define INIT_LOCALTIME_AND_FRIENDS
#endif // SANITIZER_INTERCEPT_LOCALTIME_AND_FRIENDS
#if SANITIZER_INTERCEPT_STRPTIME
INTERCEPTOR(char *, strptime, char *s, char *format, __sanitizer_tm *tm) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strptime, s, format, tm);
if (format)
COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
char *res = REAL(strptime)(s, format, tm);
COMMON_INTERCEPTOR_READ_STRING(ctx, s, res ? res - s : 0);
if (res && tm) {
// Do not call unpoison_tm here, because strptime does not, in fact,
// initialize the entire struct tm. For example, tm_zone pointer is left
// uninitialized.
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, tm, sizeof(*tm));
}
return res;
}
#define INIT_STRPTIME COMMON_INTERCEPT_FUNCTION(strptime);
#else
#define INIT_STRPTIME
#endif
#if SANITIZER_INTERCEPT_SCANF || SANITIZER_INTERCEPT_PRINTF
#include "sanitizer_common_interceptors_format.inc"
#define FORMAT_INTERCEPTOR_IMPL(name, vname, ...) \
{ \
void *ctx; \
va_list ap; \
va_start(ap, format); \
COMMON_INTERCEPTOR_ENTER(ctx, vname, __VA_ARGS__, ap); \
int res = WRAP(vname)(__VA_ARGS__, ap); \
va_end(ap); \
return res; \
}
#endif
#if SANITIZER_INTERCEPT_SCANF
#define VSCANF_INTERCEPTOR_IMPL(vname, allowGnuMalloc, ...) \
{ \
void *ctx; \
COMMON_INTERCEPTOR_ENTER(ctx, vname, __VA_ARGS__); \
va_list aq; \
va_copy(aq, ap); \
int res = REAL(vname)(__VA_ARGS__); \
if (res > 0) \
scanf_common(ctx, res, allowGnuMalloc, format, aq); \
va_end(aq); \
return res; \
}
INTERCEPTOR(int, vscanf, const char *format, va_list ap)
VSCANF_INTERCEPTOR_IMPL(vscanf, true, format, ap)
INTERCEPTOR(int, vsscanf, const char *str, const char *format, va_list ap)
VSCANF_INTERCEPTOR_IMPL(vsscanf, true, str, format, ap)
INTERCEPTOR(int, vfscanf, void *stream, const char *format, va_list ap)
VSCANF_INTERCEPTOR_IMPL(vfscanf, true, stream, format, ap)
#if SANITIZER_INTERCEPT_ISOC99_SCANF
INTERCEPTOR(int, __isoc99_vscanf, const char *format, va_list ap)
VSCANF_INTERCEPTOR_IMPL(__isoc99_vscanf, false, format, ap)
INTERCEPTOR(int, __isoc99_vsscanf, const char *str, const char *format,
va_list ap)
VSCANF_INTERCEPTOR_IMPL(__isoc99_vsscanf, false, str, format, ap)
INTERCEPTOR(int, __isoc99_vfscanf, void *stream, const char *format, va_list ap)
VSCANF_INTERCEPTOR_IMPL(__isoc99_vfscanf, false, stream, format, ap)
#endif // SANITIZER_INTERCEPT_ISOC99_SCANF
INTERCEPTOR(int, scanf, const char *format, ...)
FORMAT_INTERCEPTOR_IMPL(scanf, vscanf, format)
INTERCEPTOR(int, fscanf, void *stream, const char *format, ...)
FORMAT_INTERCEPTOR_IMPL(fscanf, vfscanf, stream, format)
INTERCEPTOR(int, sscanf, const char *str, const char *format, ...)
FORMAT_INTERCEPTOR_IMPL(sscanf, vsscanf, str, format)
#if SANITIZER_INTERCEPT_ISOC99_SCANF
INTERCEPTOR(int, __isoc99_scanf, const char *format, ...)
FORMAT_INTERCEPTOR_IMPL(__isoc99_scanf, __isoc99_vscanf, format)
INTERCEPTOR(int, __isoc99_fscanf, void *stream, const char *format, ...)
FORMAT_INTERCEPTOR_IMPL(__isoc99_fscanf, __isoc99_vfscanf, stream, format)
INTERCEPTOR(int, __isoc99_sscanf, const char *str, const char *format, ...)
FORMAT_INTERCEPTOR_IMPL(__isoc99_sscanf, __isoc99_vsscanf, str, format)
#endif
#endif
#if SANITIZER_INTERCEPT_SCANF
#define INIT_SCANF \
COMMON_INTERCEPT_FUNCTION_LDBL(scanf); \
COMMON_INTERCEPT_FUNCTION_LDBL(sscanf); \
COMMON_INTERCEPT_FUNCTION_LDBL(fscanf); \
COMMON_INTERCEPT_FUNCTION_LDBL(vscanf); \
COMMON_INTERCEPT_FUNCTION_LDBL(vsscanf); \
COMMON_INTERCEPT_FUNCTION_LDBL(vfscanf);
#else
#define INIT_SCANF
#endif
#if SANITIZER_INTERCEPT_ISOC99_SCANF
#define INIT_ISOC99_SCANF \
COMMON_INTERCEPT_FUNCTION(__isoc99_scanf); \
COMMON_INTERCEPT_FUNCTION(__isoc99_sscanf); \
COMMON_INTERCEPT_FUNCTION(__isoc99_fscanf); \
COMMON_INTERCEPT_FUNCTION(__isoc99_vscanf); \
COMMON_INTERCEPT_FUNCTION(__isoc99_vsscanf); \
COMMON_INTERCEPT_FUNCTION(__isoc99_vfscanf);
#else
#define INIT_ISOC99_SCANF
#endif
#if SANITIZER_INTERCEPT_PRINTF
#define VPRINTF_INTERCEPTOR_ENTER(vname, ...) \
void *ctx; \
COMMON_INTERCEPTOR_ENTER(ctx, vname, __VA_ARGS__); \
va_list aq; \
va_copy(aq, ap);
#define VPRINTF_INTERCEPTOR_RETURN() \
va_end(aq);
#define VPRINTF_INTERCEPTOR_IMPL(vname, ...) \
{ \
VPRINTF_INTERCEPTOR_ENTER(vname, __VA_ARGS__); \
if (common_flags()->check_printf) \
printf_common(ctx, format, aq); \
int res = REAL(vname)(__VA_ARGS__); \
VPRINTF_INTERCEPTOR_RETURN(); \
return res; \
}
// FIXME: under ASan the REAL() call below may write to freed memory and
// corrupt its metadata. See
// https://github.com/google/sanitizers/issues/321.
#define VSPRINTF_INTERCEPTOR_IMPL(vname, str, ...) \
{ \
VPRINTF_INTERCEPTOR_ENTER(vname, str, __VA_ARGS__) \
if (common_flags()->check_printf) { \
printf_common(ctx, format, aq); \
} \
int res = REAL(vname)(str, __VA_ARGS__); \
if (res >= 0) { \
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, str, res + 1); \
} \
VPRINTF_INTERCEPTOR_RETURN(); \
return res; \
}
// FIXME: under ASan the REAL() call below may write to freed memory and
// corrupt its metadata. See
// https://github.com/google/sanitizers/issues/321.
#define VSNPRINTF_INTERCEPTOR_IMPL(vname, str, size, ...) \
{ \
VPRINTF_INTERCEPTOR_ENTER(vname, str, size, __VA_ARGS__) \
if (common_flags()->check_printf) { \
printf_common(ctx, format, aq); \
} \
int res = REAL(vname)(str, size, __VA_ARGS__); \
if (res >= 0) { \
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, str, Min(size, (SIZE_T)(res + 1))); \
} \
VPRINTF_INTERCEPTOR_RETURN(); \
return res; \
}
// FIXME: under ASan the REAL() call below may write to freed memory and
// corrupt its metadata. See
// https://github.com/google/sanitizers/issues/321.
#define VASPRINTF_INTERCEPTOR_IMPL(vname, strp, ...) \
{ \
VPRINTF_INTERCEPTOR_ENTER(vname, strp, __VA_ARGS__) \
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, strp, sizeof(char *)); \
if (common_flags()->check_printf) { \
printf_common(ctx, format, aq); \
} \
int res = REAL(vname)(strp, __VA_ARGS__); \
if (res >= 0) { \
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *strp, res + 1); \
} \
VPRINTF_INTERCEPTOR_RETURN(); \
return res; \
}
INTERCEPTOR(int, vprintf, const char *format, va_list ap)
VPRINTF_INTERCEPTOR_IMPL(vprintf, format, ap)
INTERCEPTOR(int, vfprintf, __sanitizer_FILE *stream, const char *format,
va_list ap)
VPRINTF_INTERCEPTOR_IMPL(vfprintf, stream, format, ap)
INTERCEPTOR(int, vsnprintf, char *str, SIZE_T size, const char *format,
va_list ap)
VSNPRINTF_INTERCEPTOR_IMPL(vsnprintf, str, size, format, ap)
#if SANITIZER_INTERCEPT___PRINTF_CHK
INTERCEPTOR(int, __vsnprintf_chk, char *str, SIZE_T size, int flag,
SIZE_T size_to, const char *format, va_list ap)
VSNPRINTF_INTERCEPTOR_IMPL(vsnprintf, str, size, format, ap)
#endif
#if SANITIZER_INTERCEPT_PRINTF_L
INTERCEPTOR(int, vsnprintf_l, char *str, SIZE_T size, void *loc,
const char *format, va_list ap)
VSNPRINTF_INTERCEPTOR_IMPL(vsnprintf_l, str, size, loc, format, ap)
INTERCEPTOR(int, snprintf_l, char *str, SIZE_T size, void *loc,
const char *format, ...)
FORMAT_INTERCEPTOR_IMPL(snprintf_l, vsnprintf_l, str, size, loc, format)
#endif // SANITIZER_INTERCEPT_PRINTF_L
INTERCEPTOR(int, vsprintf, char *str, const char *format, va_list ap)
VSPRINTF_INTERCEPTOR_IMPL(vsprintf, str, format, ap)
#if SANITIZER_INTERCEPT___PRINTF_CHK
INTERCEPTOR(int, __vsprintf_chk, char *str, int flag, SIZE_T size_to,
const char *format, va_list ap)
VSPRINTF_INTERCEPTOR_IMPL(vsprintf, str, format, ap)
#endif
INTERCEPTOR(int, vasprintf, char **strp, const char *format, va_list ap)
VASPRINTF_INTERCEPTOR_IMPL(vasprintf, strp, format, ap)
#if SANITIZER_INTERCEPT_ISOC99_PRINTF
INTERCEPTOR(int, __isoc99_vprintf, const char *format, va_list ap)
VPRINTF_INTERCEPTOR_IMPL(__isoc99_vprintf, format, ap)
INTERCEPTOR(int, __isoc99_vfprintf, __sanitizer_FILE *stream,
const char *format, va_list ap)
VPRINTF_INTERCEPTOR_IMPL(__isoc99_vfprintf, stream, format, ap)
INTERCEPTOR(int, __isoc99_vsnprintf, char *str, SIZE_T size, const char *format,
va_list ap)
VSNPRINTF_INTERCEPTOR_IMPL(__isoc99_vsnprintf, str, size, format, ap)
INTERCEPTOR(int, __isoc99_vsprintf, char *str, const char *format,
va_list ap)
VSPRINTF_INTERCEPTOR_IMPL(__isoc99_vsprintf, str, format,
ap)
#endif // SANITIZER_INTERCEPT_ISOC99_PRINTF
INTERCEPTOR(int, printf, const char *format, ...)
FORMAT_INTERCEPTOR_IMPL(printf, vprintf, format)
INTERCEPTOR(int, fprintf, __sanitizer_FILE *stream, const char *format, ...)
FORMAT_INTERCEPTOR_IMPL(fprintf, vfprintf, stream, format)
#if SANITIZER_INTERCEPT___PRINTF_CHK
INTERCEPTOR(int, __fprintf_chk, __sanitizer_FILE *stream, SIZE_T size,
const char *format, ...)
FORMAT_INTERCEPTOR_IMPL(__fprintf_chk, vfprintf, stream, format)
#endif
INTERCEPTOR(int, sprintf, char *str, const char *format, ...)
FORMAT_INTERCEPTOR_IMPL(sprintf, vsprintf, str, format)
#if SANITIZER_INTERCEPT___PRINTF_CHK
INTERCEPTOR(int, __sprintf_chk, char *str, int flag, SIZE_T size_to,
const char *format, ...)
FORMAT_INTERCEPTOR_IMPL(__sprintf_chk, vsprintf, str, format)
#endif
INTERCEPTOR(int, snprintf, char *str, SIZE_T size, const char *format, ...)
FORMAT_INTERCEPTOR_IMPL(snprintf, vsnprintf, str, size, format)
#if SANITIZER_INTERCEPT___PRINTF_CHK
INTERCEPTOR(int, __snprintf_chk, char *str, SIZE_T size, int flag,
SIZE_T size_to, const char *format, ...)
FORMAT_INTERCEPTOR_IMPL(__snprintf_chk, vsnprintf, str, size, format)
#endif
INTERCEPTOR(int, asprintf, char **strp, const char *format, ...)
FORMAT_INTERCEPTOR_IMPL(asprintf, vasprintf, strp, format)
#if SANITIZER_INTERCEPT_ISOC99_PRINTF
INTERCEPTOR(int, __isoc99_printf, const char *format, ...)
FORMAT_INTERCEPTOR_IMPL(__isoc99_printf, __isoc99_vprintf, format)
INTERCEPTOR(int, __isoc99_fprintf, __sanitizer_FILE *stream, const char *format,
...)
FORMAT_INTERCEPTOR_IMPL(__isoc99_fprintf, __isoc99_vfprintf, stream, format)
INTERCEPTOR(int, __isoc99_sprintf, char *str, const char *format, ...)
FORMAT_INTERCEPTOR_IMPL(__isoc99_sprintf, __isoc99_vsprintf, str, format)
INTERCEPTOR(int, __isoc99_snprintf, char *str, SIZE_T size,
const char *format, ...)
FORMAT_INTERCEPTOR_IMPL(__isoc99_snprintf, __isoc99_vsnprintf, str, size,
format)
#endif // SANITIZER_INTERCEPT_ISOC99_PRINTF
#endif // SANITIZER_INTERCEPT_PRINTF
#if SANITIZER_INTERCEPT_PRINTF
#define INIT_PRINTF \
COMMON_INTERCEPT_FUNCTION_LDBL(printf); \
COMMON_INTERCEPT_FUNCTION_LDBL(sprintf); \
COMMON_INTERCEPT_FUNCTION_LDBL(snprintf); \
COMMON_INTERCEPT_FUNCTION_LDBL(asprintf); \
COMMON_INTERCEPT_FUNCTION_LDBL(fprintf); \
COMMON_INTERCEPT_FUNCTION_LDBL(vprintf); \
COMMON_INTERCEPT_FUNCTION_LDBL(vsprintf); \
COMMON_INTERCEPT_FUNCTION_LDBL(vsnprintf); \
COMMON_INTERCEPT_FUNCTION_LDBL(vasprintf); \
COMMON_INTERCEPT_FUNCTION_LDBL(vfprintf);
#else
#define INIT_PRINTF
#endif
#if SANITIZER_INTERCEPT___PRINTF_CHK
#define INIT___PRINTF_CHK \
COMMON_INTERCEPT_FUNCTION(__sprintf_chk); \
COMMON_INTERCEPT_FUNCTION(__snprintf_chk); \
COMMON_INTERCEPT_FUNCTION(__vsprintf_chk); \
COMMON_INTERCEPT_FUNCTION(__vsnprintf_chk); \
COMMON_INTERCEPT_FUNCTION(__fprintf_chk);
#else
#define INIT___PRINTF_CHK
#endif
#if SANITIZER_INTERCEPT_PRINTF_L
#define INIT_PRINTF_L \
COMMON_INTERCEPT_FUNCTION(snprintf_l); \
COMMON_INTERCEPT_FUNCTION(vsnprintf_l);
#else
#define INIT_PRINTF_L
#endif
#if SANITIZER_INTERCEPT_ISOC99_PRINTF
#define INIT_ISOC99_PRINTF \
COMMON_INTERCEPT_FUNCTION(__isoc99_printf); \
COMMON_INTERCEPT_FUNCTION(__isoc99_sprintf); \
COMMON_INTERCEPT_FUNCTION(__isoc99_snprintf); \
COMMON_INTERCEPT_FUNCTION(__isoc99_fprintf); \
COMMON_INTERCEPT_FUNCTION(__isoc99_vprintf); \
COMMON_INTERCEPT_FUNCTION(__isoc99_vsprintf); \
COMMON_INTERCEPT_FUNCTION(__isoc99_vsnprintf); \
COMMON_INTERCEPT_FUNCTION(__isoc99_vfprintf);
#else
#define INIT_ISOC99_PRINTF
#endif
#if SANITIZER_INTERCEPT_IOCTL
#include "sanitizer_common_interceptors_ioctl.inc"
#include "sanitizer_interceptors_ioctl_netbsd.inc"
INTERCEPTOR(int, ioctl, int d, unsigned long request, ...) {
// We need a frame pointer, because we call into ioctl_common_[pre|post] which
// can trigger a report and we need to be able to unwind through this
// function. On Mac in debug mode we might not have a frame pointer, because
// ioctl_common_[pre|post] doesn't get inlined here.
ENABLE_FRAME_POINTER;
void *ctx;
va_list ap;
va_start(ap, request);
void *arg = va_arg(ap, void *);
va_end(ap);
COMMON_INTERCEPTOR_ENTER(ctx, ioctl, d, request, arg);
CHECK(ioctl_initialized);
// Note: TSan does not use common flags, and they are zero-initialized.
// This effectively disables ioctl handling in TSan.
if (!common_flags()->handle_ioctl) return REAL(ioctl)(d, request, arg);
// Although request is unsigned long, the rest of the interceptor uses it
// as just "unsigned" to save space, because we know that all values fit in
// "unsigned" - they are compile-time constants.
const ioctl_desc *desc = ioctl_lookup(request);
ioctl_desc decoded_desc;
if (!desc) {
VPrintf(2, "Decoding unknown ioctl 0x%lx\n", request);
if (!ioctl_decode(request, &decoded_desc))
Printf("WARNING: failed decoding unknown ioctl 0x%lx\n", request);
else
desc = &decoded_desc;
}
if (desc) ioctl_common_pre(ctx, desc, d, request, arg);
int res = REAL(ioctl)(d, request, arg);
// FIXME: some ioctls have different return values for success and failure.
if (desc && res != -1) ioctl_common_post(ctx, desc, res, d, request, arg);
return res;
}
#define INIT_IOCTL \
ioctl_init(); \
COMMON_INTERCEPT_FUNCTION(ioctl);
#else
#define INIT_IOCTL
#endif
#if SANITIZER_POSIX
UNUSED static void unpoison_passwd(void *ctx, __sanitizer_passwd *pwd) {
if (pwd) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pwd, sizeof(*pwd));
if (pwd->pw_name)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pwd->pw_name,
internal_strlen(pwd->pw_name) + 1);
if (pwd->pw_passwd)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pwd->pw_passwd,
internal_strlen(pwd->pw_passwd) + 1);
#if !SANITIZER_ANDROID
if (pwd->pw_gecos)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pwd->pw_gecos,
internal_strlen(pwd->pw_gecos) + 1);
#endif
#if SANITIZER_APPLE || SANITIZER_FREEBSD || SANITIZER_NETBSD
if (pwd->pw_class)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pwd->pw_class,
internal_strlen(pwd->pw_class) + 1);
#endif
if (pwd->pw_dir)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pwd->pw_dir,
internal_strlen(pwd->pw_dir) + 1);
if (pwd->pw_shell)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pwd->pw_shell,
internal_strlen(pwd->pw_shell) + 1);
}
}
UNUSED static void unpoison_group(void *ctx, __sanitizer_group *grp) {
if (grp) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, grp, sizeof(*grp));
if (grp->gr_name)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, grp->gr_name,
internal_strlen(grp->gr_name) + 1);
if (grp->gr_passwd)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, grp->gr_passwd,
internal_strlen(grp->gr_passwd) + 1);
char **p = grp->gr_mem;
for (; *p; ++p) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *p, internal_strlen(*p) + 1);
}
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, grp->gr_mem,
(p - grp->gr_mem + 1) * sizeof(*p));
}
}
#endif // SANITIZER_POSIX
#if SANITIZER_INTERCEPT_GETPWNAM_AND_FRIENDS
INTERCEPTOR(__sanitizer_passwd *, getpwnam, const char *name) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getpwnam, name);
if (name)
COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
__sanitizer_passwd *res = REAL(getpwnam)(name);
unpoison_passwd(ctx, res);
return res;
}
INTERCEPTOR(__sanitizer_passwd *, getpwuid, u32 uid) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getpwuid, uid);
__sanitizer_passwd *res = REAL(getpwuid)(uid);
unpoison_passwd(ctx, res);
return res;
}
INTERCEPTOR(__sanitizer_group *, getgrnam, const char *name) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getgrnam, name);
COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
__sanitizer_group *res = REAL(getgrnam)(name);
unpoison_group(ctx, res);
return res;
}
INTERCEPTOR(__sanitizer_group *, getgrgid, u32 gid) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getgrgid, gid);
__sanitizer_group *res = REAL(getgrgid)(gid);
unpoison_group(ctx, res);
return res;
}
#define INIT_GETPWNAM_AND_FRIENDS \
COMMON_INTERCEPT_FUNCTION(getpwnam); \
COMMON_INTERCEPT_FUNCTION(getpwuid); \
COMMON_INTERCEPT_FUNCTION(getgrnam); \
COMMON_INTERCEPT_FUNCTION(getgrgid);
#else
#define INIT_GETPWNAM_AND_FRIENDS
#endif
#if SANITIZER_INTERCEPT_GETPWNAM_R_AND_FRIENDS
INTERCEPTOR(int, getpwnam_r, const char *name, __sanitizer_passwd *pwd,
char *buf, SIZE_T buflen, __sanitizer_passwd **result) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getpwnam_r, name, pwd, buf, buflen, result);
COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(getpwnam_r)(name, pwd, buf, buflen, result);
if (!res && result)
unpoison_passwd(ctx, *result);
if (result) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(*result));
return res;
}
INTERCEPTOR(int, getpwuid_r, u32 uid, __sanitizer_passwd *pwd, char *buf,
SIZE_T buflen, __sanitizer_passwd **result) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getpwuid_r, uid, pwd, buf, buflen, result);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(getpwuid_r)(uid, pwd, buf, buflen, result);
if (!res && result)
unpoison_passwd(ctx, *result);
if (result) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(*result));
return res;
}
INTERCEPTOR(int, getgrnam_r, const char *name, __sanitizer_group *grp,
char *buf, SIZE_T buflen, __sanitizer_group **result) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getgrnam_r, name, grp, buf, buflen, result);
COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(getgrnam_r)(name, grp, buf, buflen, result);
if (!res && result)
unpoison_group(ctx, *result);
if (result) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(*result));
return res;
}
INTERCEPTOR(int, getgrgid_r, u32 gid, __sanitizer_group *grp, char *buf,
SIZE_T buflen, __sanitizer_group **result) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getgrgid_r, gid, grp, buf, buflen, result);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(getgrgid_r)(gid, grp, buf, buflen, result);
if (!res && result)
unpoison_group(ctx, *result);
if (result) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(*result));
return res;
}
#define INIT_GETPWNAM_R_AND_FRIENDS \
COMMON_INTERCEPT_FUNCTION(getpwnam_r); \
COMMON_INTERCEPT_FUNCTION(getpwuid_r); \
COMMON_INTERCEPT_FUNCTION(getgrnam_r); \
COMMON_INTERCEPT_FUNCTION(getgrgid_r);
#else
#define INIT_GETPWNAM_R_AND_FRIENDS
#endif
#if SANITIZER_INTERCEPT_GETPWENT
INTERCEPTOR(__sanitizer_passwd *, getpwent, int dummy) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getpwent, dummy);
__sanitizer_passwd *res = REAL(getpwent)(dummy);
unpoison_passwd(ctx, res);
return res;
}
INTERCEPTOR(__sanitizer_group *, getgrent, int dummy) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getgrent, dummy);
__sanitizer_group *res = REAL(getgrent)(dummy);
unpoison_group(ctx, res);
return res;
}
#define INIT_GETPWENT \
COMMON_INTERCEPT_FUNCTION(getpwent); \
COMMON_INTERCEPT_FUNCTION(getgrent);
#else
#define INIT_GETPWENT
#endif
#if SANITIZER_INTERCEPT_FGETPWENT
INTERCEPTOR(__sanitizer_passwd *, fgetpwent, void *fp) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fgetpwent, fp);
__sanitizer_passwd *res = REAL(fgetpwent)(fp);
unpoison_passwd(ctx, res);
return res;
}
INTERCEPTOR(__sanitizer_group *, fgetgrent, void *fp) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fgetgrent, fp);
__sanitizer_group *res = REAL(fgetgrent)(fp);
unpoison_group(ctx, res);
return res;
}
#define INIT_FGETPWENT \
COMMON_INTERCEPT_FUNCTION(fgetpwent); \
COMMON_INTERCEPT_FUNCTION(fgetgrent);
#else
#define INIT_FGETPWENT
#endif
#if SANITIZER_INTERCEPT_GETPWENT_R
INTERCEPTOR(int, getpwent_r, __sanitizer_passwd *pwbuf, char *buf,
SIZE_T buflen, __sanitizer_passwd **pwbufp) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getpwent_r, pwbuf, buf, buflen, pwbufp);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(getpwent_r)(pwbuf, buf, buflen, pwbufp);
if (!res && pwbufp)
unpoison_passwd(ctx, *pwbufp);
if (pwbufp) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pwbufp, sizeof(*pwbufp));
return res;
}
INTERCEPTOR(int, getgrent_r, __sanitizer_group *pwbuf, char *buf, SIZE_T buflen,
__sanitizer_group **pwbufp) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getgrent_r, pwbuf, buf, buflen, pwbufp);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(getgrent_r)(pwbuf, buf, buflen, pwbufp);
if (!res && pwbufp)
unpoison_group(ctx, *pwbufp);
if (pwbufp) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pwbufp, sizeof(*pwbufp));
return res;
}
#define INIT_GETPWENT_R \
COMMON_INTERCEPT_FUNCTION(getpwent_r); \
COMMON_INTERCEPT_FUNCTION(getgrent_r);
#else
#define INIT_GETPWENT_R
#endif
#if SANITIZER_INTERCEPT_FGETPWENT_R
INTERCEPTOR(int, fgetpwent_r, void *fp, __sanitizer_passwd *pwbuf, char *buf,
SIZE_T buflen, __sanitizer_passwd **pwbufp) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fgetpwent_r, fp, pwbuf, buf, buflen, pwbufp);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(fgetpwent_r)(fp, pwbuf, buf, buflen, pwbufp);
if (!res && pwbufp)
unpoison_passwd(ctx, *pwbufp);
if (pwbufp) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pwbufp, sizeof(*pwbufp));
return res;
}
#define INIT_FGETPWENT_R \
COMMON_INTERCEPT_FUNCTION(fgetpwent_r);
#else
#define INIT_FGETPWENT_R
#endif
#if SANITIZER_INTERCEPT_FGETGRENT_R
INTERCEPTOR(int, fgetgrent_r, void *fp, __sanitizer_group *pwbuf, char *buf,
SIZE_T buflen, __sanitizer_group **pwbufp) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fgetgrent_r, fp, pwbuf, buf, buflen, pwbufp);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(fgetgrent_r)(fp, pwbuf, buf, buflen, pwbufp);
if (!res && pwbufp)
unpoison_group(ctx, *pwbufp);
if (pwbufp) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pwbufp, sizeof(*pwbufp));
return res;
}
#define INIT_FGETGRENT_R \
COMMON_INTERCEPT_FUNCTION(fgetgrent_r);
#else
#define INIT_FGETGRENT_R
#endif
#if SANITIZER_INTERCEPT_SETPWENT
// The only thing these interceptors do is disable any nested interceptors.
// These functions may open nss modules and call uninstrumented functions from
// them, and we don't want things like strlen() to trigger.
INTERCEPTOR(void, setpwent, int dummy) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, setpwent, dummy);
REAL(setpwent)(dummy);
}
INTERCEPTOR(void, endpwent, int dummy) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, endpwent, dummy);
REAL(endpwent)(dummy);
}
INTERCEPTOR(void, setgrent, int dummy) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, setgrent, dummy);
REAL(setgrent)(dummy);
}
INTERCEPTOR(void, endgrent, int dummy) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, endgrent, dummy);
REAL(endgrent)(dummy);
}
#define INIT_SETPWENT \
COMMON_INTERCEPT_FUNCTION(setpwent); \
COMMON_INTERCEPT_FUNCTION(endpwent); \
COMMON_INTERCEPT_FUNCTION(setgrent); \
COMMON_INTERCEPT_FUNCTION(endgrent);
#else
#define INIT_SETPWENT
#endif
#if SANITIZER_INTERCEPT_CLOCK_GETTIME
INTERCEPTOR(int, clock_getres, u32 clk_id, void *tp) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, clock_getres, clk_id, tp);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(clock_getres)(clk_id, tp);
if (!res && tp) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, tp, struct_timespec_sz);
}
return res;
}
INTERCEPTOR(int, clock_gettime, u32 clk_id, void *tp) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, clock_gettime, clk_id, tp);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(clock_gettime)(clk_id, tp);
if (!res) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, tp, struct_timespec_sz);
}
return res;
}
#if SANITIZER_GLIBC
namespace __sanitizer {
extern "C" {
int real_clock_gettime(u32 clk_id, void *tp) {
if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
return internal_clock_gettime(clk_id, tp);
return REAL(clock_gettime)(clk_id, tp);
}
} // extern "C"
} // namespace __sanitizer
#endif
INTERCEPTOR(int, clock_settime, u32 clk_id, const void *tp) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, clock_settime, clk_id, tp);
COMMON_INTERCEPTOR_READ_RANGE(ctx, tp, struct_timespec_sz);
return REAL(clock_settime)(clk_id, tp);
}
#define INIT_CLOCK_GETTIME \
COMMON_INTERCEPT_FUNCTION(clock_getres); \
COMMON_INTERCEPT_FUNCTION(clock_gettime); \
COMMON_INTERCEPT_FUNCTION(clock_settime);
#else
#define INIT_CLOCK_GETTIME
#endif
#if SANITIZER_INTERCEPT_CLOCK_GETCPUCLOCKID
INTERCEPTOR(int, clock_getcpuclockid, pid_t pid,
__sanitizer_clockid_t *clockid) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, clock_getcpuclockid, pid, clockid);
int res = REAL(clock_getcpuclockid)(pid, clockid);
if (!res && clockid) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, clockid, sizeof *clockid);
}
return res;
}
INTERCEPTOR(int, pthread_getcpuclockid, uptr thread,
__sanitizer_clockid_t *clockid) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, pthread_getcpuclockid, thread, clockid);
int res = REAL(pthread_getcpuclockid)(thread, clockid);
if (!res && clockid) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, clockid, sizeof *clockid);
}
return res;
}
#define INIT_CLOCK_GETCPUCLOCKID \
COMMON_INTERCEPT_FUNCTION(clock_getcpuclockid); \
COMMON_INTERCEPT_FUNCTION(pthread_getcpuclockid);
#else
#define INIT_CLOCK_GETCPUCLOCKID
#endif
#if SANITIZER_INTERCEPT_GETITIMER
INTERCEPTOR(int, getitimer, int which, void *curr_value) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getitimer, which, curr_value);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(getitimer)(which, curr_value);
if (!res && curr_value) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, curr_value, struct_itimerval_sz);
}
return res;
}
INTERCEPTOR(int, setitimer, int which, const void *new_value, void *old_value) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, setitimer, which, new_value, old_value);
if (new_value) {
// itimerval can contain padding that may be legitimately uninitialized
const struct __sanitizer_itimerval *nv =
(const struct __sanitizer_itimerval *)new_value;
COMMON_INTERCEPTOR_READ_RANGE(ctx, &nv->it_interval.tv_sec,
sizeof(__sanitizer_time_t));
COMMON_INTERCEPTOR_READ_RANGE(ctx, &nv->it_interval.tv_usec,
sizeof(__sanitizer_suseconds_t));
COMMON_INTERCEPTOR_READ_RANGE(ctx, &nv->it_value.tv_sec,
sizeof(__sanitizer_time_t));
COMMON_INTERCEPTOR_READ_RANGE(ctx, &nv->it_value.tv_usec,
sizeof(__sanitizer_suseconds_t));
}
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(setitimer)(which, new_value, old_value);
if (!res && old_value) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, old_value, struct_itimerval_sz);
}
return res;
}
#define INIT_GETITIMER \
COMMON_INTERCEPT_FUNCTION(getitimer); \
COMMON_INTERCEPT_FUNCTION(setitimer);
#else
#define INIT_GETITIMER
#endif
#if SANITIZER_INTERCEPT_GLOB
static void unpoison_glob_t(void *ctx, __sanitizer_glob_t *pglob) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pglob, sizeof(*pglob));
// +1 for NULL pointer at the end.
if (pglob->gl_pathv)
COMMON_INTERCEPTOR_WRITE_RANGE(
ctx, pglob->gl_pathv, (pglob->gl_pathc + 1) * sizeof(*pglob->gl_pathv));
for (SIZE_T i = 0; i < pglob->gl_pathc; ++i) {
char *p = pglob->gl_pathv[i];
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p, internal_strlen(p) + 1);
}
}
#if SANITIZER_SOLARIS
INTERCEPTOR(int, glob, const char *pattern, int flags,
int (*errfunc)(const char *epath, int eerrno),
__sanitizer_glob_t *pglob) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, glob, pattern, flags, errfunc, pglob);
COMMON_INTERCEPTOR_READ_STRING(ctx, pattern, 0);
int res = REAL(glob)(pattern, flags, errfunc, pglob);
if ((!res || res == glob_nomatch) && pglob) unpoison_glob_t(ctx, pglob);
return res;
}
#else
static THREADLOCAL __sanitizer_glob_t *pglob_copy;
static void wrapped_gl_closedir(void *dir) {
COMMON_INTERCEPTOR_UNPOISON_PARAM(1);
pglob_copy->gl_closedir(dir);
}
static void *wrapped_gl_readdir(void *dir) {
COMMON_INTERCEPTOR_UNPOISON_PARAM(1);
return pglob_copy->gl_readdir(dir);
}
static void *wrapped_gl_opendir(const char *s) {
COMMON_INTERCEPTOR_UNPOISON_PARAM(1);
COMMON_INTERCEPTOR_INITIALIZE_RANGE(s, internal_strlen(s) + 1);
return pglob_copy->gl_opendir(s);
}
static int wrapped_gl_lstat(const char *s, void *st) {
COMMON_INTERCEPTOR_UNPOISON_PARAM(2);
COMMON_INTERCEPTOR_INITIALIZE_RANGE(s, internal_strlen(s) + 1);
return pglob_copy->gl_lstat(s, st);
}
static int wrapped_gl_stat(const char *s, void *st) {
COMMON_INTERCEPTOR_UNPOISON_PARAM(2);
COMMON_INTERCEPTOR_INITIALIZE_RANGE(s, internal_strlen(s) + 1);
return pglob_copy->gl_stat(s, st);
}
static const __sanitizer_glob_t kGlobCopy = {
0, 0, 0,
0, wrapped_gl_closedir, wrapped_gl_readdir,
wrapped_gl_opendir, wrapped_gl_lstat, wrapped_gl_stat};
INTERCEPTOR(int, glob, const char *pattern, int flags,
int (*errfunc)(const char *epath, int eerrno),
__sanitizer_glob_t *pglob) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, glob, pattern, flags, errfunc, pglob);
COMMON_INTERCEPTOR_READ_STRING(ctx, pattern, 0);
__sanitizer_glob_t glob_copy;
internal_memcpy(&glob_copy, &kGlobCopy, sizeof(glob_copy));
if (flags & glob_altdirfunc) {
Swap(pglob->gl_closedir, glob_copy.gl_closedir);
Swap(pglob->gl_readdir, glob_copy.gl_readdir);
Swap(pglob->gl_opendir, glob_copy.gl_opendir);
Swap(pglob->gl_lstat, glob_copy.gl_lstat);
Swap(pglob->gl_stat, glob_copy.gl_stat);
pglob_copy = &glob_copy;
}
int res = REAL(glob)(pattern, flags, errfunc, pglob);
if (flags & glob_altdirfunc) {
Swap(pglob->gl_closedir, glob_copy.gl_closedir);
Swap(pglob->gl_readdir, glob_copy.gl_readdir);
Swap(pglob->gl_opendir, glob_copy.gl_opendir);
Swap(pglob->gl_lstat, glob_copy.gl_lstat);
Swap(pglob->gl_stat, glob_copy.gl_stat);
}
pglob_copy = 0;
if ((!res || res == glob_nomatch) && pglob) unpoison_glob_t(ctx, pglob);
return res;
}
#endif // SANITIZER_SOLARIS
#define INIT_GLOB \
COMMON_INTERCEPT_FUNCTION(glob);
#else // SANITIZER_INTERCEPT_GLOB
#define INIT_GLOB
#endif // SANITIZER_INTERCEPT_GLOB
#if SANITIZER_INTERCEPT_GLOB64
INTERCEPTOR(int, glob64, const char *pattern, int flags,
int (*errfunc)(const char *epath, int eerrno),
__sanitizer_glob_t *pglob) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, glob64, pattern, flags, errfunc, pglob);
COMMON_INTERCEPTOR_READ_STRING(ctx, pattern, 0);
__sanitizer_glob_t glob_copy;
internal_memcpy(&glob_copy, &kGlobCopy, sizeof(glob_copy));
if (flags & glob_altdirfunc) {
Swap(pglob->gl_closedir, glob_copy.gl_closedir);
Swap(pglob->gl_readdir, glob_copy.gl_readdir);
Swap(pglob->gl_opendir, glob_copy.gl_opendir);
Swap(pglob->gl_lstat, glob_copy.gl_lstat);
Swap(pglob->gl_stat, glob_copy.gl_stat);
pglob_copy = &glob_copy;
}
int res = REAL(glob64)(pattern, flags, errfunc, pglob);
if (flags & glob_altdirfunc) {
Swap(pglob->gl_closedir, glob_copy.gl_closedir);
Swap(pglob->gl_readdir, glob_copy.gl_readdir);
Swap(pglob->gl_opendir, glob_copy.gl_opendir);
Swap(pglob->gl_lstat, glob_copy.gl_lstat);
Swap(pglob->gl_stat, glob_copy.gl_stat);
}
pglob_copy = 0;
if ((!res || res == glob_nomatch) && pglob) unpoison_glob_t(ctx, pglob);
return res;
}
#define INIT_GLOB64 \
COMMON_INTERCEPT_FUNCTION(glob64);
#else // SANITIZER_INTERCEPT_GLOB64
#define INIT_GLOB64
#endif // SANITIZER_INTERCEPT_GLOB64
#if SANITIZER_INTERCEPT___B64_TO
INTERCEPTOR(int, __b64_ntop, unsigned char const *src, SIZE_T srclength,
char *target, SIZE_T targsize) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, __b64_ntop, src, srclength, target, targsize);
COMMON_INTERCEPTOR_READ_RANGE(ctx, src, srclength);
int res = REAL(__b64_ntop)(src, srclength, target, targsize);
if (res >= 0)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, target, res + 1);
return res;
}
INTERCEPTOR(int, __b64_pton, char const *src, char *target, SIZE_T targsize) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, __b64_pton, src, target, targsize);
COMMON_INTERCEPTOR_READ_RANGE(ctx, src, internal_strlen(src) + 1);
int res = REAL(__b64_pton)(src, target, targsize);
if (res >= 0)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, target, res);
return res;
}
#define INIT___B64_TO \
COMMON_INTERCEPT_FUNCTION(__b64_ntop); \
COMMON_INTERCEPT_FUNCTION(__b64_pton);
#else // SANITIZER_INTERCEPT___B64_TO
#define INIT___B64_TO
#endif // SANITIZER_INTERCEPT___B64_TO
#if SANITIZER_INTERCEPT___DN_EXPAND
# if __GLIBC_PREREQ(2, 34)
// Changed with https://sourceware.org/git/?p=glibc.git;h=640bbdf
# define DN_EXPAND_INTERCEPTOR_NAME dn_expand
# else
# define DN_EXPAND_INTERCEPTOR_NAME __dn_expand
# endif
INTERCEPTOR(int, DN_EXPAND_INTERCEPTOR_NAME, unsigned char const *base,
unsigned char const *end, unsigned char const *src, char *dest,
int space) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, DN_EXPAND_INTERCEPTOR_NAME, base, end, src,
dest, space);
// TODO: add read check if __dn_comp intercept added
int res = REAL(DN_EXPAND_INTERCEPTOR_NAME)(base, end, src, dest, space);
if (res >= 0)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dest, internal_strlen(dest) + 1);
return res;
}
# define INIT___DN_EXPAND \
COMMON_INTERCEPT_FUNCTION(DN_EXPAND_INTERCEPTOR_NAME);
#else // SANITIZER_INTERCEPT___DN_EXPAND
# define INIT___DN_EXPAND
#endif // SANITIZER_INTERCEPT___DN_EXPAND
#if SANITIZER_INTERCEPT_POSIX_SPAWN
template <class RealSpawnPtr>
static int PosixSpawnImpl(void *ctx, RealSpawnPtr *real_posix_spawn, pid_t *pid,
const char *file_or_path, const void *file_actions,
const void *attrp, char *const argv[],
char *const envp[]) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, file_or_path,
internal_strlen(file_or_path) + 1);
if (argv) {
for (char *const *s = argv; ; ++s) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, s, sizeof(*s));
if (!*s) break;
COMMON_INTERCEPTOR_READ_RANGE(ctx, *s, internal_strlen(*s) + 1);
}
}
if (envp) {
for (char *const *s = envp; ; ++s) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, s, sizeof(*s));
if (!*s) break;
COMMON_INTERCEPTOR_READ_RANGE(ctx, *s, internal_strlen(*s) + 1);
}
}
int res =
real_posix_spawn(pid, file_or_path, file_actions, attrp, argv, envp);
if (res == 0)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pid, sizeof(*pid));
return res;
}
INTERCEPTOR(int, posix_spawn, pid_t *pid, const char *path,
const void *file_actions, const void *attrp, char *const argv[],
char *const envp[]) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, posix_spawn, pid, path, file_actions, attrp,
argv, envp);
return PosixSpawnImpl(ctx, REAL(posix_spawn), pid, path, file_actions, attrp,
argv, envp);
}
INTERCEPTOR(int, posix_spawnp, pid_t *pid, const char *file,
const void *file_actions, const void *attrp, char *const argv[],
char *const envp[]) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, posix_spawnp, pid, file, file_actions, attrp,
argv, envp);
return PosixSpawnImpl(ctx, REAL(posix_spawnp), pid, file, file_actions, attrp,
argv, envp);
}
# define INIT_POSIX_SPAWN \
COMMON_INTERCEPT_FUNCTION(posix_spawn); \
COMMON_INTERCEPT_FUNCTION(posix_spawnp);
#else // SANITIZER_INTERCEPT_POSIX_SPAWN
# define INIT_POSIX_SPAWN
#endif // SANITIZER_INTERCEPT_POSIX_SPAWN
#if SANITIZER_INTERCEPT_WAIT
// According to sys/wait.h, wait(), waitid(), waitpid() may have symbol version
// suffixes on Darwin. See the declaration of INTERCEPTOR_WITH_SUFFIX for
// details.
INTERCEPTOR_WITH_SUFFIX(int, wait, int *status) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, wait, status);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(wait)(status);
if (res != -1 && status)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, status, sizeof(*status));
return res;
}
// On FreeBSD id_t is always 64-bit wide.
#if SANITIZER_FREEBSD && (SANITIZER_WORDSIZE == 32)
INTERCEPTOR_WITH_SUFFIX(int, waitid, int idtype, long long id, void *infop,
int options) {
#else
INTERCEPTOR_WITH_SUFFIX(int, waitid, int idtype, int id, void *infop,
int options) {
#endif
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, waitid, idtype, id, infop, options);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(waitid)(idtype, id, infop, options);
if (res != -1 && infop)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, infop, siginfo_t_sz);
return res;
}
INTERCEPTOR_WITH_SUFFIX(int, waitpid, int pid, int *status, int options) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, waitpid, pid, status, options);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(waitpid)(pid, status, options);
if (res != -1 && status)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, status, sizeof(*status));
return res;
}
INTERCEPTOR(int, wait3, int *status, int options, void *rusage) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, wait3, status, options, rusage);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(wait3)(status, options, rusage);
if (res != -1) {
if (status) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, status, sizeof(*status));
if (rusage) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, rusage, struct_rusage_sz);
}
return res;
}
#if SANITIZER_ANDROID
INTERCEPTOR(int, __wait4, int pid, int *status, int options, void *rusage) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, __wait4, pid, status, options, rusage);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(__wait4)(pid, status, options, rusage);
if (res != -1) {
if (status) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, status, sizeof(*status));
if (rusage) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, rusage, struct_rusage_sz);
}
return res;
}
#define INIT_WAIT4 COMMON_INTERCEPT_FUNCTION(__wait4);
#else
INTERCEPTOR(int, wait4, int pid, int *status, int options, void *rusage) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, wait4, pid, status, options, rusage);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(wait4)(pid, status, options, rusage);
if (res != -1) {
if (status) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, status, sizeof(*status));
if (rusage) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, rusage, struct_rusage_sz);
}
return res;
}
#define INIT_WAIT4 COMMON_INTERCEPT_FUNCTION(wait4);
#endif // SANITIZER_ANDROID
#define INIT_WAIT \
COMMON_INTERCEPT_FUNCTION(wait); \
COMMON_INTERCEPT_FUNCTION(waitid); \
COMMON_INTERCEPT_FUNCTION(waitpid); \
COMMON_INTERCEPT_FUNCTION(wait3);
#else
#define INIT_WAIT
#define INIT_WAIT4
#endif
#if SANITIZER_INTERCEPT_INET
INTERCEPTOR(char *, inet_ntop, int af, const void *src, char *dst, u32 size) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, inet_ntop, af, src, dst, size);
uptr sz = __sanitizer_in_addr_sz(af);
if (sz) COMMON_INTERCEPTOR_READ_RANGE(ctx, src, sz);
// FIXME: figure out read size based on the address family.
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
char *res = REAL(inet_ntop)(af, src, dst, size);
if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
return res;
}
INTERCEPTOR(int, inet_pton, int af, const char *src, void *dst) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, inet_pton, af, src, dst);
COMMON_INTERCEPTOR_READ_STRING(ctx, src, 0);
// FIXME: figure out read size based on the address family.
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(inet_pton)(af, src, dst);
if (res == 1) {
uptr sz = __sanitizer_in_addr_sz(af);
if (sz) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, sz);
}
return res;
}
#define INIT_INET \
COMMON_INTERCEPT_FUNCTION(inet_ntop); \
COMMON_INTERCEPT_FUNCTION(inet_pton);
#else
#define INIT_INET
#endif
#if SANITIZER_INTERCEPT_INET
INTERCEPTOR(int, inet_aton, const char *cp, void *dst) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, inet_aton, cp, dst);
if (cp) COMMON_INTERCEPTOR_READ_RANGE(ctx, cp, internal_strlen(cp) + 1);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(inet_aton)(cp, dst);
if (res != 0) {
uptr sz = __sanitizer_in_addr_sz(af_inet);
if (sz) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, sz);
}
return res;
}
#define INIT_INET_ATON COMMON_INTERCEPT_FUNCTION(inet_aton);
#else
#define INIT_INET_ATON
#endif
#if SANITIZER_INTERCEPT_PTHREAD_GETSCHEDPARAM
INTERCEPTOR(int, pthread_getschedparam, uptr thread, int *policy, int *param) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, pthread_getschedparam, thread, policy, param);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(pthread_getschedparam)(thread, policy, param);
if (res == 0) {
if (policy) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, policy, sizeof(*policy));
if (param) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, param, sizeof(*param));
}
return res;
}
#define INIT_PTHREAD_GETSCHEDPARAM \
COMMON_INTERCEPT_FUNCTION(pthread_getschedparam);
#else
#define INIT_PTHREAD_GETSCHEDPARAM
#endif
#if SANITIZER_INTERCEPT_GETADDRINFO
INTERCEPTOR(int, getaddrinfo, char *node, char *service,
struct __sanitizer_addrinfo *hints,
struct __sanitizer_addrinfo **out) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getaddrinfo, node, service, hints, out);
if (node) COMMON_INTERCEPTOR_READ_RANGE(ctx, node, internal_strlen(node) + 1);
if (service)
COMMON_INTERCEPTOR_READ_RANGE(ctx, service, internal_strlen(service) + 1);
if (hints)
COMMON_INTERCEPTOR_READ_RANGE(ctx, hints, sizeof(__sanitizer_addrinfo));
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(getaddrinfo)(node, service, hints, out);
if (res == 0 && out) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, out, sizeof(*out));
struct __sanitizer_addrinfo *p = *out;
while (p) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p, sizeof(*p));
if (p->ai_addr)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p->ai_addr, p->ai_addrlen);
if (p->ai_canonname)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p->ai_canonname,
internal_strlen(p->ai_canonname) + 1);
p = p->ai_next;
}
}
return res;
}
#define INIT_GETADDRINFO COMMON_INTERCEPT_FUNCTION(getaddrinfo);
#else
#define INIT_GETADDRINFO
#endif
#if SANITIZER_INTERCEPT_GETNAMEINFO
INTERCEPTOR(int, getnameinfo, void *sockaddr, unsigned salen, char *host,
unsigned hostlen, char *serv, unsigned servlen, int flags) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getnameinfo, sockaddr, salen, host, hostlen,
serv, servlen, flags);
// FIXME: consider adding READ_RANGE(sockaddr, salen)
// There is padding in in_addr that may make this too noisy
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res =
REAL(getnameinfo)(sockaddr, salen, host, hostlen, serv, servlen, flags);
if (res == 0) {
if (host && hostlen)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, host, internal_strlen(host) + 1);
if (serv && servlen)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, serv, internal_strlen(serv) + 1);
}
return res;
}
#define INIT_GETNAMEINFO COMMON_INTERCEPT_FUNCTION(getnameinfo);
#else
#define INIT_GETNAMEINFO
#endif
#if SANITIZER_INTERCEPT_GETSOCKNAME
INTERCEPTOR(int, getsockname, int sock_fd, void *addr, unsigned *addrlen) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getsockname, sock_fd, addr, addrlen);
unsigned addr_sz;
if (addrlen) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, addrlen, sizeof(*addrlen));
addr_sz = *addrlen;
}
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(getsockname)(sock_fd, addr, addrlen);
if (!res && addr && addrlen) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, addr, Min(addr_sz, *addrlen));
}
return res;
}
#define INIT_GETSOCKNAME COMMON_INTERCEPT_FUNCTION(getsockname);
#else
#define INIT_GETSOCKNAME
#endif
#if SANITIZER_INTERCEPT_GETHOSTBYNAME || SANITIZER_INTERCEPT_GETHOSTBYNAME_R
static void write_hostent(void *ctx, struct __sanitizer_hostent *h) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, h, sizeof(__sanitizer_hostent));
if (h->h_name)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, h->h_name, internal_strlen(h->h_name) + 1);
char **p = h->h_aliases;
while (*p) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *p, internal_strlen(*p) + 1);
++p;
}
COMMON_INTERCEPTOR_WRITE_RANGE(
ctx, h->h_aliases, (p - h->h_aliases + 1) * sizeof(*h->h_aliases));
p = h->h_addr_list;
while (*p) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *p, h->h_length);
++p;
}
COMMON_INTERCEPTOR_WRITE_RANGE(
ctx, h->h_addr_list, (p - h->h_addr_list + 1) * sizeof(*h->h_addr_list));
}
#endif
#if SANITIZER_INTERCEPT_GETHOSTBYNAME
INTERCEPTOR(struct __sanitizer_hostent *, gethostbyname, char *name) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, gethostbyname, name);
struct __sanitizer_hostent *res = REAL(gethostbyname)(name);
if (res) write_hostent(ctx, res);
return res;
}
INTERCEPTOR(struct __sanitizer_hostent *, gethostbyaddr, void *addr, int len,
int type) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, gethostbyaddr, addr, len, type);
COMMON_INTERCEPTOR_READ_RANGE(ctx, addr, len);
struct __sanitizer_hostent *res = REAL(gethostbyaddr)(addr, len, type);
if (res) write_hostent(ctx, res);
return res;
}
INTERCEPTOR(struct __sanitizer_hostent *, gethostent, int fake) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, gethostent, fake);
struct __sanitizer_hostent *res = REAL(gethostent)(fake);
if (res) write_hostent(ctx, res);
return res;
}
#define INIT_GETHOSTBYNAME \
COMMON_INTERCEPT_FUNCTION(gethostent); \
COMMON_INTERCEPT_FUNCTION(gethostbyaddr); \
COMMON_INTERCEPT_FUNCTION(gethostbyname);
#else
#define INIT_GETHOSTBYNAME
#endif // SANITIZER_INTERCEPT_GETHOSTBYNAME
#if SANITIZER_INTERCEPT_GETHOSTBYNAME2
INTERCEPTOR(struct __sanitizer_hostent *, gethostbyname2, char *name, int af) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, gethostbyname2, name, af);
struct __sanitizer_hostent *res = REAL(gethostbyname2)(name, af);
if (res) write_hostent(ctx, res);
return res;
}
#define INIT_GETHOSTBYNAME2 COMMON_INTERCEPT_FUNCTION(gethostbyname2);
#else
#define INIT_GETHOSTBYNAME2
#endif // SANITIZER_INTERCEPT_GETHOSTBYNAME2
#if SANITIZER_INTERCEPT_GETHOSTBYNAME_R
INTERCEPTOR(int, gethostbyname_r, char *name, struct __sanitizer_hostent *ret,
char *buf, SIZE_T buflen, __sanitizer_hostent **result,
int *h_errnop) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, gethostbyname_r, name, ret, buf, buflen, result,
h_errnop);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(gethostbyname_r)(name, ret, buf, buflen, result, h_errnop);
if (result) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(*result));
if (res == 0 && *result) write_hostent(ctx, *result);
}
if (h_errnop)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, h_errnop, sizeof(*h_errnop));
return res;
}
#define INIT_GETHOSTBYNAME_R COMMON_INTERCEPT_FUNCTION(gethostbyname_r);
#else
#define INIT_GETHOSTBYNAME_R
#endif
#if SANITIZER_INTERCEPT_GETHOSTENT_R
INTERCEPTOR(int, gethostent_r, struct __sanitizer_hostent *ret, char *buf,
SIZE_T buflen, __sanitizer_hostent **result, int *h_errnop) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, gethostent_r, ret, buf, buflen, result,
h_errnop);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(gethostent_r)(ret, buf, buflen, result, h_errnop);
if (result) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(*result));
if (res == 0 && *result) write_hostent(ctx, *result);
}
if (h_errnop)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, h_errnop, sizeof(*h_errnop));
return res;
}
#define INIT_GETHOSTENT_R \
COMMON_INTERCEPT_FUNCTION(gethostent_r);
#else
#define INIT_GETHOSTENT_R
#endif
#if SANITIZER_INTERCEPT_GETHOSTBYADDR_R
INTERCEPTOR(int, gethostbyaddr_r, void *addr, int len, int type,
struct __sanitizer_hostent *ret, char *buf, SIZE_T buflen,
__sanitizer_hostent **result, int *h_errnop) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, gethostbyaddr_r, addr, len, type, ret, buf,
buflen, result, h_errnop);
COMMON_INTERCEPTOR_READ_RANGE(ctx, addr, len);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(gethostbyaddr_r)(addr, len, type, ret, buf, buflen, result,
h_errnop);
if (result) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(*result));
if (res == 0 && *result) write_hostent(ctx, *result);
}
if (h_errnop)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, h_errnop, sizeof(*h_errnop));
return res;
}
#define INIT_GETHOSTBYADDR_R \
COMMON_INTERCEPT_FUNCTION(gethostbyaddr_r);
#else
#define INIT_GETHOSTBYADDR_R
#endif
#if SANITIZER_INTERCEPT_GETHOSTBYNAME2_R
INTERCEPTOR(int, gethostbyname2_r, char *name, int af,
struct __sanitizer_hostent *ret, char *buf, SIZE_T buflen,
__sanitizer_hostent **result, int *h_errnop) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, gethostbyname2_r, name, af, ret, buf, buflen,
result, h_errnop);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res =
REAL(gethostbyname2_r)(name, af, ret, buf, buflen, result, h_errnop);
if (result) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(*result));
if (res == 0 && *result) write_hostent(ctx, *result);
}
if (h_errnop)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, h_errnop, sizeof(*h_errnop));
return res;
}
#define INIT_GETHOSTBYNAME2_R \
COMMON_INTERCEPT_FUNCTION(gethostbyname2_r);
#else
#define INIT_GETHOSTBYNAME2_R
#endif
#if SANITIZER_INTERCEPT_GETSOCKOPT
INTERCEPTOR(int, getsockopt, int sockfd, int level, int optname, void *optval,
int *optlen) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getsockopt, sockfd, level, optname, optval,
optlen);
if (optlen) COMMON_INTERCEPTOR_READ_RANGE(ctx, optlen, sizeof(*optlen));
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(getsockopt)(sockfd, level, optname, optval, optlen);
if (res == 0)
if (optval && optlen) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, optval, *optlen);
return res;
}
#define INIT_GETSOCKOPT COMMON_INTERCEPT_FUNCTION(getsockopt);
#else
#define INIT_GETSOCKOPT
#endif
#if SANITIZER_INTERCEPT_ACCEPT
INTERCEPTOR(int, accept, int fd, void *addr, unsigned *addrlen) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, accept, fd, addr, addrlen);
unsigned addrlen0 = 0;
if (addrlen) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, addrlen, sizeof(*addrlen));
addrlen0 = *addrlen;
}
int fd2 = REAL(accept)(fd, addr, addrlen);
if (fd2 >= 0) {
if (fd >= 0) COMMON_INTERCEPTOR_FD_SOCKET_ACCEPT(ctx, fd, fd2);
if (addr && addrlen)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, addr, Min(*addrlen, addrlen0));
}
return fd2;
}
#define INIT_ACCEPT COMMON_INTERCEPT_FUNCTION(accept);
#else
#define INIT_ACCEPT
#endif
#if SANITIZER_INTERCEPT_ACCEPT4
INTERCEPTOR(int, accept4, int fd, void *addr, unsigned *addrlen, int f) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, accept4, fd, addr, addrlen, f);
unsigned addrlen0 = 0;
if (addrlen) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, addrlen, sizeof(*addrlen));
addrlen0 = *addrlen;
}
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int fd2 = REAL(accept4)(fd, addr, addrlen, f);
if (fd2 >= 0) {
if (fd >= 0) COMMON_INTERCEPTOR_FD_SOCKET_ACCEPT(ctx, fd, fd2);
if (addr && addrlen)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, addr, Min(*addrlen, addrlen0));
}
return fd2;
}
#define INIT_ACCEPT4 COMMON_INTERCEPT_FUNCTION(accept4);
#else
#define INIT_ACCEPT4
#endif
#if SANITIZER_INTERCEPT_PACCEPT
INTERCEPTOR(int, paccept, int fd, void *addr, unsigned *addrlen,
__sanitizer_sigset_t *set, int f) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, paccept, fd, addr, addrlen, set, f);
unsigned addrlen0 = 0;
if (addrlen) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, addrlen, sizeof(*addrlen));
addrlen0 = *addrlen;
}
if (set) COMMON_INTERCEPTOR_READ_RANGE(ctx, set, sizeof(*set));
int fd2 = REAL(paccept)(fd, addr, addrlen, set, f);
if (fd2 >= 0) {
if (fd >= 0) COMMON_INTERCEPTOR_FD_SOCKET_ACCEPT(ctx, fd, fd2);
if (addr && addrlen)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, addr, Min(*addrlen, addrlen0));
}
return fd2;
}
#define INIT_PACCEPT COMMON_INTERCEPT_FUNCTION(paccept);
#else
#define INIT_PACCEPT
#endif
#if SANITIZER_INTERCEPT_MODF
INTERCEPTOR(double, modf, double x, double *iptr) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, modf, x, iptr);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
double res = REAL(modf)(x, iptr);
if (iptr) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, iptr, sizeof(*iptr));
}
return res;
}
INTERCEPTOR(float, modff, float x, float *iptr) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, modff, x, iptr);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
float res = REAL(modff)(x, iptr);
if (iptr) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, iptr, sizeof(*iptr));
}
return res;
}
INTERCEPTOR(long double, modfl, long double x, long double *iptr) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, modfl, x, iptr);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
long double res = REAL(modfl)(x, iptr);
if (iptr) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, iptr, sizeof(*iptr));
}
return res;
}
#define INIT_MODF \
COMMON_INTERCEPT_FUNCTION(modf); \
COMMON_INTERCEPT_FUNCTION(modff); \
COMMON_INTERCEPT_FUNCTION_LDBL(modfl);
#else
#define INIT_MODF
#endif
#if SANITIZER_INTERCEPT_RECVMSG || SANITIZER_INTERCEPT_RECVMMSG
static void write_msghdr(void *ctx, struct __sanitizer_msghdr *msg,
SSIZE_T maxlen) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, msg, sizeof(*msg));
if (msg->msg_name && msg->msg_namelen)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, msg->msg_name, msg->msg_namelen);
if (msg->msg_iov && msg->msg_iovlen)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, msg->msg_iov,
sizeof(*msg->msg_iov) * msg->msg_iovlen);
write_iovec(ctx, msg->msg_iov, msg->msg_iovlen, maxlen);
if (msg->msg_control && msg->msg_controllen)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, msg->msg_control, msg->msg_controllen);
}
#endif
#if SANITIZER_INTERCEPT_RECVMSG
INTERCEPTOR(SSIZE_T, recvmsg, int fd, struct __sanitizer_msghdr *msg,
int flags) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, recvmsg, fd, msg, flags);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
SSIZE_T res = REAL(recvmsg)(fd, msg, flags);
if (res >= 0) {
if (fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
if (msg) {
write_msghdr(ctx, msg, res);
COMMON_INTERCEPTOR_HANDLE_RECVMSG(ctx, msg);
}
}
return res;
}
#define INIT_RECVMSG COMMON_INTERCEPT_FUNCTION(recvmsg);
#else
#define INIT_RECVMSG
#endif
#if SANITIZER_INTERCEPT_RECVMMSG
INTERCEPTOR(int, recvmmsg, int fd, struct __sanitizer_mmsghdr *msgvec,
unsigned int vlen, int flags, void *timeout) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, recvmmsg, fd, msgvec, vlen, flags, timeout);
if (timeout) COMMON_INTERCEPTOR_READ_RANGE(ctx, timeout, struct_timespec_sz);
int res = REAL(recvmmsg)(fd, msgvec, vlen, flags, timeout);
if (res >= 0) {
if (fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
for (int i = 0; i < res; ++i) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, &msgvec[i].msg_len,
sizeof(msgvec[i].msg_len));
write_msghdr(ctx, &msgvec[i].msg_hdr, msgvec[i].msg_len);
COMMON_INTERCEPTOR_HANDLE_RECVMSG(ctx, &msgvec[i].msg_hdr);
}
}
return res;
}
#define INIT_RECVMMSG COMMON_INTERCEPT_FUNCTION(recvmmsg);
#else
#define INIT_RECVMMSG
#endif
#if SANITIZER_INTERCEPT_SENDMSG || SANITIZER_INTERCEPT_SENDMMSG
static void read_msghdr_control(void *ctx, void *control, uptr controllen) {
const unsigned kCmsgDataOffset =
RoundUpTo(sizeof(__sanitizer_cmsghdr), sizeof(uptr));
char *p = (char *)control;
char *const control_end = p + controllen;
while (true) {
if (p + sizeof(__sanitizer_cmsghdr) > control_end) break;
__sanitizer_cmsghdr *cmsg = (__sanitizer_cmsghdr *)p;
COMMON_INTERCEPTOR_READ_RANGE(ctx, &cmsg->cmsg_len, sizeof(cmsg->cmsg_len));
if (p + RoundUpTo(cmsg->cmsg_len, sizeof(uptr)) > control_end) break;
COMMON_INTERCEPTOR_READ_RANGE(ctx, &cmsg->cmsg_level,
sizeof(cmsg->cmsg_level));
COMMON_INTERCEPTOR_READ_RANGE(ctx, &cmsg->cmsg_type,
sizeof(cmsg->cmsg_type));
if (cmsg->cmsg_len > kCmsgDataOffset) {
char *data = p + kCmsgDataOffset;
unsigned data_len = cmsg->cmsg_len - kCmsgDataOffset;
if (data_len > 0) COMMON_INTERCEPTOR_READ_RANGE(ctx, data, data_len);
}
p += RoundUpTo(cmsg->cmsg_len, sizeof(uptr));
}
}
static void read_msghdr(void *ctx, struct __sanitizer_msghdr *msg,
SSIZE_T maxlen) {
#define R(f) \
COMMON_INTERCEPTOR_READ_RANGE(ctx, &msg->msg_##f, sizeof(msg->msg_##f))
R(name);
R(namelen);
R(iov);
R(iovlen);
R(control);
R(controllen);
R(flags);
#undef R
if (msg->msg_name && msg->msg_namelen)
COMMON_INTERCEPTOR_READ_RANGE(ctx, msg->msg_name, msg->msg_namelen);
if (msg->msg_iov && msg->msg_iovlen)
COMMON_INTERCEPTOR_READ_RANGE(ctx, msg->msg_iov,
sizeof(*msg->msg_iov) * msg->msg_iovlen);
read_iovec(ctx, msg->msg_iov, msg->msg_iovlen, maxlen);
if (msg->msg_control && msg->msg_controllen)
read_msghdr_control(ctx, msg->msg_control, msg->msg_controllen);
}
#endif
#if SANITIZER_INTERCEPT_SENDMSG
INTERCEPTOR(SSIZE_T, sendmsg, int fd, struct __sanitizer_msghdr *msg,
int flags) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, sendmsg, fd, msg, flags);
if (fd >= 0) {
COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
}
SSIZE_T res = REAL(sendmsg)(fd, msg, flags);
if (common_flags()->intercept_send && res >= 0 && msg)
read_msghdr(ctx, msg, res);
return res;
}
#define INIT_SENDMSG COMMON_INTERCEPT_FUNCTION(sendmsg);
#else
#define INIT_SENDMSG
#endif
#if SANITIZER_INTERCEPT_SENDMMSG
INTERCEPTOR(int, sendmmsg, int fd, struct __sanitizer_mmsghdr *msgvec,
unsigned vlen, int flags) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, sendmmsg, fd, msgvec, vlen, flags);
if (fd >= 0) {
COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
}
int res = REAL(sendmmsg)(fd, msgvec, vlen, flags);
if (res >= 0 && msgvec) {
for (int i = 0; i < res; ++i) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, &msgvec[i].msg_len,
sizeof(msgvec[i].msg_len));
if (common_flags()->intercept_send)
read_msghdr(ctx, &msgvec[i].msg_hdr, msgvec[i].msg_len);
}
}
return res;
}
#define INIT_SENDMMSG COMMON_INTERCEPT_FUNCTION(sendmmsg);
#else
#define INIT_SENDMMSG
#endif
#if SANITIZER_INTERCEPT_SYSMSG
INTERCEPTOR(int, msgsnd, int msqid, const void *msgp, SIZE_T msgsz,
int msgflg) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, msgsnd, msqid, msgp, msgsz, msgflg);
if (msgp)
COMMON_INTERCEPTOR_READ_RANGE(ctx, msgp, sizeof(long) + msgsz);
int res = REAL(msgsnd)(msqid, msgp, msgsz, msgflg);
return res;
}
INTERCEPTOR(SSIZE_T, msgrcv, int msqid, void *msgp, SIZE_T msgsz,
long msgtyp, int msgflg) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, msgrcv, msqid, msgp, msgsz, msgtyp, msgflg);
SSIZE_T len = REAL(msgrcv)(msqid, msgp, msgsz, msgtyp, msgflg);
if (len != -1)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, msgp, sizeof(long) + len);
return len;
}
#define INIT_SYSMSG \
COMMON_INTERCEPT_FUNCTION(msgsnd); \
COMMON_INTERCEPT_FUNCTION(msgrcv);
#else
#define INIT_SYSMSG
#endif
#if SANITIZER_INTERCEPT_GETPEERNAME
INTERCEPTOR(int, getpeername, int sockfd, void *addr, unsigned *addrlen) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getpeername, sockfd, addr, addrlen);
unsigned addr_sz;
if (addrlen) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, addrlen, sizeof(*addrlen));
addr_sz = *addrlen;
}
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(getpeername)(sockfd, addr, addrlen);
if (!res && addr && addrlen) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, addr, Min(addr_sz, *addrlen));
}
return res;
}
#define INIT_GETPEERNAME COMMON_INTERCEPT_FUNCTION(getpeername);
#else
#define INIT_GETPEERNAME
#endif
#if SANITIZER_INTERCEPT_SYSINFO
INTERCEPTOR(int, sysinfo, void *info) {
void *ctx;
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
COMMON_INTERCEPTOR_ENTER(ctx, sysinfo, info);
int res = REAL(sysinfo)(info);
if (!res && info)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, info, struct_sysinfo_sz);
return res;
}
#define INIT_SYSINFO COMMON_INTERCEPT_FUNCTION(sysinfo);
#else
#define INIT_SYSINFO
#endif
#if SANITIZER_INTERCEPT_READDIR
INTERCEPTOR(__sanitizer_dirent *, opendir, const char *path) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, opendir, path);
COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
__sanitizer_dirent *res = REAL(opendir)(path);
if (res)
COMMON_INTERCEPTOR_DIR_ACQUIRE(ctx, path);
return res;
}
INTERCEPTOR(__sanitizer_dirent *, readdir, void *dirp) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, readdir, dirp);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
__sanitizer_dirent *res = REAL(readdir)(dirp);
if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, res->d_reclen);
return res;
}
INTERCEPTOR(int, readdir_r, void *dirp, __sanitizer_dirent *entry,
__sanitizer_dirent **result) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, readdir_r, dirp, entry, result);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(readdir_r)(dirp, entry, result);
if (!res) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(*result));
if (*result)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *result, (*result)->d_reclen);
}
return res;
}
#define INIT_READDIR \
COMMON_INTERCEPT_FUNCTION(opendir); \
COMMON_INTERCEPT_FUNCTION(readdir); \
COMMON_INTERCEPT_FUNCTION(readdir_r);
#else
#define INIT_READDIR
#endif
#if SANITIZER_INTERCEPT_READDIR64
INTERCEPTOR(__sanitizer_dirent64 *, readdir64, void *dirp) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, readdir64, dirp);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
__sanitizer_dirent64 *res = REAL(readdir64)(dirp);
if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, res->d_reclen);
return res;
}
INTERCEPTOR(int, readdir64_r, void *dirp, __sanitizer_dirent64 *entry,
__sanitizer_dirent64 **result) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, readdir64_r, dirp, entry, result);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(readdir64_r)(dirp, entry, result);
if (!res) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(*result));
if (*result)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *result, (*result)->d_reclen);
}
return res;
}
#define INIT_READDIR64 \
COMMON_INTERCEPT_FUNCTION(readdir64); \
COMMON_INTERCEPT_FUNCTION(readdir64_r);
#else
#define INIT_READDIR64
#endif
#if SANITIZER_INTERCEPT_PTRACE
INTERCEPTOR(uptr, ptrace, int request, int pid, void *addr, void *data) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, ptrace, request, pid, addr, data);
__sanitizer_iovec local_iovec;
if (data) {
if (request == ptrace_setregs) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, data, struct_user_regs_struct_sz);
} else if (request == ptrace_setfpregs) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, data, struct_user_fpregs_struct_sz);
} else if (request == ptrace_setfpxregs) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, data, struct_user_fpxregs_struct_sz);
} else if (request == ptrace_setvfpregs) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, data, struct_user_vfpregs_struct_sz);
} else if (request == ptrace_setsiginfo) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, data, siginfo_t_sz);
// Some kernel might zero the iovec::iov_base in case of invalid
// write access. In this case copy the invalid address for further
// inspection.
} else if (request == ptrace_setregset || request == ptrace_getregset) {
__sanitizer_iovec *iovec = (__sanitizer_iovec*)data;
COMMON_INTERCEPTOR_READ_RANGE(ctx, iovec, sizeof(*iovec));
local_iovec = *iovec;
if (request == ptrace_setregset)
COMMON_INTERCEPTOR_READ_RANGE(ctx, iovec->iov_base, iovec->iov_len);
}
}
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
uptr res = REAL(ptrace)(request, pid, addr, data);
if (!res && data) {
// Note that PEEK* requests assign different meaning to the return value.
// This function does not handle them (nor does it need to).
if (request == ptrace_getregs) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data, struct_user_regs_struct_sz);
} else if (request == ptrace_getfpregs) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data, struct_user_fpregs_struct_sz);
} else if (request == ptrace_getfpxregs) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data, struct_user_fpxregs_struct_sz);
} else if (request == ptrace_getvfpregs) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data, struct_user_vfpregs_struct_sz);
} else if (request == ptrace_getsiginfo) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data, siginfo_t_sz);
} else if (request == ptrace_geteventmsg) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data, sizeof(unsigned long));
} else if (request == ptrace_getregset) {
__sanitizer_iovec *iovec = (__sanitizer_iovec*)data;
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, iovec, sizeof(*iovec));
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, local_iovec.iov_base,
local_iovec.iov_len);
}
}
return res;
}
#define INIT_PTRACE COMMON_INTERCEPT_FUNCTION(ptrace);
#else
#define INIT_PTRACE
#endif
#if SANITIZER_INTERCEPT_SETLOCALE
static void unpoison_ctype_arrays(void *ctx) {
#if SANITIZER_NETBSD
// These arrays contain 256 regular elements in unsigned char range + 1 EOF
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, _ctype_tab_, 257 * sizeof(short));
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, _toupper_tab_, 257 * sizeof(short));
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, _tolower_tab_, 257 * sizeof(short));
#endif
}
INTERCEPTOR(char *, setlocale, int category, char *locale) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, setlocale, category, locale);
if (locale)
COMMON_INTERCEPTOR_READ_RANGE(ctx, locale, internal_strlen(locale) + 1);
char *res = REAL(setlocale)(category, locale);
if (res) {
COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1);
unpoison_ctype_arrays(ctx);
}
return res;
}
#define INIT_SETLOCALE COMMON_INTERCEPT_FUNCTION(setlocale);
#else
#define INIT_SETLOCALE
#endif
#if SANITIZER_INTERCEPT_GETCWD
INTERCEPTOR(char *, getcwd, char *buf, SIZE_T size) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getcwd, buf, size);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
char *res = REAL(getcwd)(buf, size);
if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
return res;
}
#define INIT_GETCWD COMMON_INTERCEPT_FUNCTION(getcwd);
#else
#define INIT_GETCWD
#endif
#if SANITIZER_INTERCEPT_GET_CURRENT_DIR_NAME
INTERCEPTOR(char *, get_current_dir_name, int fake) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, get_current_dir_name, fake);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
char *res = REAL(get_current_dir_name)(fake);
if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
return res;
}
#define INIT_GET_CURRENT_DIR_NAME \
COMMON_INTERCEPT_FUNCTION(get_current_dir_name);
#else
#define INIT_GET_CURRENT_DIR_NAME
#endif
UNUSED static inline void FixRealStrtolEndptr(const char *nptr, char **endptr) {
CHECK(endptr);
if (nptr == *endptr) {
// No digits were found at strtol call, we need to find out the last
// symbol accessed by strtoll on our own.
// We get this symbol by skipping leading blanks and optional +/- sign.
while (IsSpace(*nptr)) nptr++;
if (*nptr == '+' || *nptr == '-') nptr++;
*endptr = const_cast<char *>(nptr);
}
CHECK(*endptr >= nptr);
}
UNUSED static inline void StrtolFixAndCheck(void *ctx, const char *nptr,
char **endptr, char *real_endptr, int base) {
if (endptr) {
*endptr = real_endptr;
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, endptr, sizeof(*endptr));
}
// If base has unsupported value, strtol can exit with EINVAL
// without reading any characters. So do additional checks only
// if base is valid.
bool is_valid_base = (base == 0) || (2 <= base && base <= 36);
if (is_valid_base) {
FixRealStrtolEndptr(nptr, &real_endptr);
}
COMMON_INTERCEPTOR_READ_STRING(ctx, nptr, is_valid_base ?
(real_endptr - nptr) + 1 : 0);
}
#if SANITIZER_INTERCEPT_STRTOIMAX
INTERCEPTOR(INTMAX_T, strtoimax, const char *nptr, char **endptr, int base) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strtoimax, nptr, endptr, base);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
char *real_endptr;
INTMAX_T res = REAL(strtoimax)(nptr, &real_endptr, base);
StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base);
return res;
}
INTERCEPTOR(UINTMAX_T, strtoumax, const char *nptr, char **endptr, int base) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strtoumax, nptr, endptr, base);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
char *real_endptr;
UINTMAX_T res = REAL(strtoumax)(nptr, &real_endptr, base);
StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base);
return res;
}
#define INIT_STRTOIMAX \
COMMON_INTERCEPT_FUNCTION(strtoimax); \
COMMON_INTERCEPT_FUNCTION(strtoumax);
#else
#define INIT_STRTOIMAX
#endif
#if SANITIZER_INTERCEPT_MBSTOWCS
INTERCEPTOR(SIZE_T, mbstowcs, wchar_t *dest, const char *src, SIZE_T len) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, mbstowcs, dest, src, len);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
SIZE_T res = REAL(mbstowcs)(dest, src, len);
if (res != (SIZE_T) - 1 && dest) {
SIZE_T write_cnt = res + (res < len);
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dest, write_cnt * sizeof(wchar_t));
}
return res;
}
INTERCEPTOR(SIZE_T, mbsrtowcs, wchar_t *dest, const char **src, SIZE_T len,
void *ps) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, mbsrtowcs, dest, src, len, ps);
if (src) COMMON_INTERCEPTOR_READ_RANGE(ctx, src, sizeof(*src));
if (ps) COMMON_INTERCEPTOR_READ_RANGE(ctx, ps, mbstate_t_sz);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
SIZE_T res = REAL(mbsrtowcs)(dest, src, len, ps);
if (res != (SIZE_T)(-1) && dest && src) {
// This function, and several others, may or may not write the terminating
// \0 character. They write it iff they clear *src.
SIZE_T write_cnt = res + !*src;
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dest, write_cnt * sizeof(wchar_t));
}
return res;
}
#define INIT_MBSTOWCS \
COMMON_INTERCEPT_FUNCTION(mbstowcs); \
COMMON_INTERCEPT_FUNCTION(mbsrtowcs);
#else
#define INIT_MBSTOWCS
#endif
#if SANITIZER_INTERCEPT_MBSNRTOWCS
INTERCEPTOR(SIZE_T, mbsnrtowcs, wchar_t *dest, const char **src, SIZE_T nms,
SIZE_T len, void *ps) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, mbsnrtowcs, dest, src, nms, len, ps);
if (src) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, src, sizeof(*src));
if (nms) COMMON_INTERCEPTOR_READ_RANGE(ctx, *src, nms);
}
if (ps) COMMON_INTERCEPTOR_READ_RANGE(ctx, ps, mbstate_t_sz);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
SIZE_T res = REAL(mbsnrtowcs)(dest, src, nms, len, ps);
if (res != (SIZE_T)(-1) && dest && src) {
SIZE_T write_cnt = res + !*src;
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dest, write_cnt * sizeof(wchar_t));
}
return res;
}
#define INIT_MBSNRTOWCS COMMON_INTERCEPT_FUNCTION(mbsnrtowcs);
#else
#define INIT_MBSNRTOWCS
#endif
#if SANITIZER_INTERCEPT_WCSTOMBS
INTERCEPTOR(SIZE_T, wcstombs, char *dest, const wchar_t *src, SIZE_T len) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, wcstombs, dest, src, len);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
SIZE_T res = REAL(wcstombs)(dest, src, len);
if (res != (SIZE_T) - 1 && dest) {
SIZE_T write_cnt = res + (res < len);
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dest, write_cnt);
}
return res;
}
INTERCEPTOR(SIZE_T, wcsrtombs, char *dest, const wchar_t **src, SIZE_T len,
void *ps) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, wcsrtombs, dest, src, len, ps);
if (src) COMMON_INTERCEPTOR_READ_RANGE(ctx, src, sizeof(*src));
if (ps) COMMON_INTERCEPTOR_READ_RANGE(ctx, ps, mbstate_t_sz);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
SIZE_T res = REAL(wcsrtombs)(dest, src, len, ps);
if (res != (SIZE_T) - 1 && dest && src) {
SIZE_T write_cnt = res + !*src;
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dest, write_cnt);
}
return res;
}
#define INIT_WCSTOMBS \
COMMON_INTERCEPT_FUNCTION(wcstombs); \
COMMON_INTERCEPT_FUNCTION(wcsrtombs);
#else
#define INIT_WCSTOMBS
#endif
#if SANITIZER_INTERCEPT_WCSNRTOMBS
INTERCEPTOR(SIZE_T, wcsnrtombs, char *dest, const wchar_t **src, SIZE_T nms,
SIZE_T len, void *ps) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, wcsnrtombs, dest, src, nms, len, ps);
if (src) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, src, sizeof(*src));
if (nms) COMMON_INTERCEPTOR_READ_RANGE(ctx, *src, nms);
}
if (ps) COMMON_INTERCEPTOR_READ_RANGE(ctx, ps, mbstate_t_sz);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
SIZE_T res = REAL(wcsnrtombs)(dest, src, nms, len, ps);
if (res != ((SIZE_T)-1) && dest && src) {
SIZE_T write_cnt = res + !*src;
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dest, write_cnt);
}
return res;
}
#define INIT_WCSNRTOMBS COMMON_INTERCEPT_FUNCTION(wcsnrtombs);
#else
#define INIT_WCSNRTOMBS
#endif
#if SANITIZER_INTERCEPT_WCRTOMB
INTERCEPTOR(SIZE_T, wcrtomb, char *dest, wchar_t src, void *ps) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, wcrtomb, dest, src, ps);
if (ps) COMMON_INTERCEPTOR_READ_RANGE(ctx, ps, mbstate_t_sz);
if (!dest)
return REAL(wcrtomb)(dest, src, ps);
char local_dest[32];
SIZE_T res = REAL(wcrtomb)(local_dest, src, ps);
if (res != ((SIZE_T)-1)) {
CHECK_LE(res, sizeof(local_dest));
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dest, res);
REAL(memcpy)(dest, local_dest, res);
}
return res;
}
#define INIT_WCRTOMB COMMON_INTERCEPT_FUNCTION(wcrtomb);
#else
#define INIT_WCRTOMB
#endif
#if SANITIZER_INTERCEPT_WCTOMB
INTERCEPTOR(int, wctomb, char *dest, wchar_t src) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, wctomb, dest, src);
if (!dest)
return REAL(wctomb)(dest, src);
char local_dest[32];
int res = REAL(wctomb)(local_dest, src);
if (res != -1) {
CHECK_LE(res, sizeof(local_dest));
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dest, res);
REAL(memcpy)(dest, local_dest, res);
}
return res;
}
#define INIT_WCTOMB COMMON_INTERCEPT_FUNCTION(wctomb);
#else
#define INIT_WCTOMB
#endif
#if SANITIZER_INTERCEPT_TCGETATTR
INTERCEPTOR(int, tcgetattr, int fd, void *termios_p) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, tcgetattr, fd, termios_p);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(tcgetattr)(fd, termios_p);
if (!res && termios_p)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, termios_p, struct_termios_sz);
return res;
}
#define INIT_TCGETATTR COMMON_INTERCEPT_FUNCTION(tcgetattr);
#else
#define INIT_TCGETATTR
#endif
#if SANITIZER_INTERCEPT_REALPATH
INTERCEPTOR(char *, realpath, const char *path, char *resolved_path) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, realpath, path, resolved_path);
if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
// Workaround a bug in glibc where dlsym(RTLD_NEXT, ...) returns the oldest
// version of a versioned symbol. For realpath(), this gives us something
// (called __old_realpath) that does not handle NULL in the second argument.
// Handle it as part of the interceptor.
char *allocated_path = nullptr;
if (!resolved_path)
allocated_path = resolved_path = (char *)WRAP(malloc)(path_max + 1);
char *res = REAL(realpath)(path, resolved_path);
if (allocated_path && !res)
WRAP(free)(allocated_path);
if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
return res;
}
# define INIT_REALPATH COMMON_INTERCEPT_FUNCTION(realpath);
#else
#define INIT_REALPATH
#endif
#if SANITIZER_INTERCEPT_CANONICALIZE_FILE_NAME
INTERCEPTOR(char *, canonicalize_file_name, const char *path) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, canonicalize_file_name, path);
if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
char *res = REAL(canonicalize_file_name)(path);
if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
return res;
}
#define INIT_CANONICALIZE_FILE_NAME \
COMMON_INTERCEPT_FUNCTION(canonicalize_file_name);
#else
#define INIT_CANONICALIZE_FILE_NAME
#endif
#if SANITIZER_INTERCEPT_CONFSTR
INTERCEPTOR(SIZE_T, confstr, int name, char *buf, SIZE_T len) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, confstr, name, buf, len);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
SIZE_T res = REAL(confstr)(name, buf, len);
if (buf && res)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, res < len ? res : len);
return res;
}
#define INIT_CONFSTR COMMON_INTERCEPT_FUNCTION(confstr);
#else
#define INIT_CONFSTR
#endif
#if SANITIZER_INTERCEPT_SCHED_GETAFFINITY
INTERCEPTOR(int, sched_getaffinity, int pid, SIZE_T cpusetsize, void *mask) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, sched_getaffinity, pid, cpusetsize, mask);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(sched_getaffinity)(pid, cpusetsize, mask);
if (mask && !res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, mask, cpusetsize);
return res;
}
#define INIT_SCHED_GETAFFINITY COMMON_INTERCEPT_FUNCTION(sched_getaffinity);
#else
#define INIT_SCHED_GETAFFINITY
#endif
#if SANITIZER_INTERCEPT_SCHED_GETPARAM
INTERCEPTOR(int, sched_getparam, int pid, void *param) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, sched_getparam, pid, param);
int res = REAL(sched_getparam)(pid, param);
if (!res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, param, struct_sched_param_sz);
return res;
}
#define INIT_SCHED_GETPARAM COMMON_INTERCEPT_FUNCTION(sched_getparam);
#else
#define INIT_SCHED_GETPARAM
#endif
#if SANITIZER_INTERCEPT_STRERROR
INTERCEPTOR(char *, strerror, int errnum) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strerror, errnum);
COMMON_INTERCEPTOR_STRERROR();
char *res = REAL(strerror)(errnum);
if (res) COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1);
return res;
}
#define INIT_STRERROR COMMON_INTERCEPT_FUNCTION(strerror);
#else
#define INIT_STRERROR
#endif
#if SANITIZER_INTERCEPT_STRERROR_R
// There are 2 versions of strerror_r:
// * POSIX version returns 0 on success, negative error code on failure,
// writes message to buf.
// * GNU version returns message pointer, which points to either buf or some
// static storage.
#if ((_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600) && !_GNU_SOURCE) || \
SANITIZER_APPLE || SANITIZER_ANDROID || SANITIZER_NETBSD || \
SANITIZER_FREEBSD
// POSIX version. Spec is not clear on whether buf is NULL-terminated.
// At least on OSX, buf contents are valid even when the call fails.
INTERCEPTOR(int, strerror_r, int errnum, char *buf, SIZE_T buflen) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strerror_r, errnum, buf, buflen);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(strerror_r)(errnum, buf, buflen);
SIZE_T sz = internal_strnlen(buf, buflen);
if (sz < buflen) ++sz;
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, sz);
return res;
}
#else
// GNU version.
INTERCEPTOR(char *, strerror_r, int errnum, char *buf, SIZE_T buflen) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strerror_r, errnum, buf, buflen);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
char *res = REAL(strerror_r)(errnum, buf, buflen);
if (res == buf)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
else
COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1);
return res;
}
#endif //(_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600) && !_GNU_SOURCE ||
//SANITIZER_APPLE
#define INIT_STRERROR_R COMMON_INTERCEPT_FUNCTION(strerror_r);
#else
#define INIT_STRERROR_R
#endif
#if SANITIZER_INTERCEPT_XPG_STRERROR_R
INTERCEPTOR(int, __xpg_strerror_r, int errnum, char *buf, SIZE_T buflen) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, __xpg_strerror_r, errnum, buf, buflen);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(__xpg_strerror_r)(errnum, buf, buflen);
// This version always returns a null-terminated string.
if (buf && buflen)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, internal_strlen(buf) + 1);
return res;
}
#define INIT_XPG_STRERROR_R COMMON_INTERCEPT_FUNCTION(__xpg_strerror_r);
#else
#define INIT_XPG_STRERROR_R
#endif
#if SANITIZER_INTERCEPT_SCANDIR
typedef int (*scandir_filter_f)(const struct __sanitizer_dirent *);
typedef int (*scandir_compar_f)(const struct __sanitizer_dirent **,
const struct __sanitizer_dirent **);
static THREADLOCAL scandir_filter_f scandir_filter;
static THREADLOCAL scandir_compar_f scandir_compar;
static int wrapped_scandir_filter(const struct __sanitizer_dirent *dir) {
COMMON_INTERCEPTOR_UNPOISON_PARAM(1);
COMMON_INTERCEPTOR_INITIALIZE_RANGE(dir, dir->d_reclen);
return scandir_filter(dir);
}
static int wrapped_scandir_compar(const struct __sanitizer_dirent **a,
const struct __sanitizer_dirent **b) {
COMMON_INTERCEPTOR_UNPOISON_PARAM(2);
COMMON_INTERCEPTOR_INITIALIZE_RANGE(a, sizeof(*a));
COMMON_INTERCEPTOR_INITIALIZE_RANGE(*a, (*a)->d_reclen);
COMMON_INTERCEPTOR_INITIALIZE_RANGE(b, sizeof(*b));
COMMON_INTERCEPTOR_INITIALIZE_RANGE(*b, (*b)->d_reclen);
return scandir_compar(a, b);
}
INTERCEPTOR(int, scandir, char *dirp, __sanitizer_dirent ***namelist,
scandir_filter_f filter, scandir_compar_f compar) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, scandir, dirp, namelist, filter, compar);
if (dirp) COMMON_INTERCEPTOR_READ_RANGE(ctx, dirp, internal_strlen(dirp) + 1);
scandir_filter = filter;
scandir_compar = compar;
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(scandir)(dirp, namelist,
filter ? wrapped_scandir_filter : nullptr,
compar ? wrapped_scandir_compar : nullptr);
scandir_filter = nullptr;
scandir_compar = nullptr;
if (namelist && res > 0) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, namelist, sizeof(*namelist));
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *namelist, sizeof(**namelist) * res);
for (int i = 0; i < res; ++i)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, (*namelist)[i],
(*namelist)[i]->d_reclen);
}
return res;
}
#define INIT_SCANDIR COMMON_INTERCEPT_FUNCTION(scandir);
#else
#define INIT_SCANDIR
#endif
#if SANITIZER_INTERCEPT_SCANDIR64
typedef int (*scandir64_filter_f)(const struct __sanitizer_dirent64 *);
typedef int (*scandir64_compar_f)(const struct __sanitizer_dirent64 **,
const struct __sanitizer_dirent64 **);
static THREADLOCAL scandir64_filter_f scandir64_filter;
static THREADLOCAL scandir64_compar_f scandir64_compar;
static int wrapped_scandir64_filter(const struct __sanitizer_dirent64 *dir) {
COMMON_INTERCEPTOR_UNPOISON_PARAM(1);
COMMON_INTERCEPTOR_INITIALIZE_RANGE(dir, dir->d_reclen);
return scandir64_filter(dir);
}
static int wrapped_scandir64_compar(const struct __sanitizer_dirent64 **a,
const struct __sanitizer_dirent64 **b) {
COMMON_INTERCEPTOR_UNPOISON_PARAM(2);
COMMON_INTERCEPTOR_INITIALIZE_RANGE(a, sizeof(*a));
COMMON_INTERCEPTOR_INITIALIZE_RANGE(*a, (*a)->d_reclen);
COMMON_INTERCEPTOR_INITIALIZE_RANGE(b, sizeof(*b));
COMMON_INTERCEPTOR_INITIALIZE_RANGE(*b, (*b)->d_reclen);
return scandir64_compar(a, b);
}
INTERCEPTOR(int, scandir64, char *dirp, __sanitizer_dirent64 ***namelist,
scandir64_filter_f filter, scandir64_compar_f compar) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, scandir64, dirp, namelist, filter, compar);
if (dirp) COMMON_INTERCEPTOR_READ_RANGE(ctx, dirp, internal_strlen(dirp) + 1);
scandir64_filter = filter;
scandir64_compar = compar;
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res =
REAL(scandir64)(dirp, namelist,
filter ? wrapped_scandir64_filter : nullptr,
compar ? wrapped_scandir64_compar : nullptr);
scandir64_filter = nullptr;
scandir64_compar = nullptr;
if (namelist && res > 0) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, namelist, sizeof(*namelist));
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *namelist, sizeof(**namelist) * res);
for (int i = 0; i < res; ++i)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, (*namelist)[i],
(*namelist)[i]->d_reclen);
}
return res;
}
#define INIT_SCANDIR64 COMMON_INTERCEPT_FUNCTION(scandir64);
#else
#define INIT_SCANDIR64
#endif
#if SANITIZER_INTERCEPT_GETGROUPS
INTERCEPTOR(int, getgroups, int size, u32 *lst) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getgroups, size, lst);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(getgroups)(size, lst);
if (res >= 0 && lst && size > 0)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, lst, res * sizeof(*lst));
return res;
}
#define INIT_GETGROUPS COMMON_INTERCEPT_FUNCTION(getgroups);
#else
#define INIT_GETGROUPS
#endif
#if SANITIZER_INTERCEPT_POLL
static void read_pollfd(void *ctx, __sanitizer_pollfd *fds,
__sanitizer_nfds_t nfds) {
for (unsigned i = 0; i < nfds; ++i) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, &fds[i].fd, sizeof(fds[i].fd));
COMMON_INTERCEPTOR_READ_RANGE(ctx, &fds[i].events, sizeof(fds[i].events));
}
}
static void write_pollfd(void *ctx, __sanitizer_pollfd *fds,
__sanitizer_nfds_t nfds) {
for (unsigned i = 0; i < nfds; ++i)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, &fds[i].revents,
sizeof(fds[i].revents));
}
INTERCEPTOR(int, poll, __sanitizer_pollfd *fds, __sanitizer_nfds_t nfds,
int timeout) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, poll, fds, nfds, timeout);
if (fds && nfds) read_pollfd(ctx, fds, nfds);
int res = COMMON_INTERCEPTOR_BLOCK_REAL(poll)(fds, nfds, timeout);
if (fds && nfds) write_pollfd(ctx, fds, nfds);
return res;
}
#define INIT_POLL COMMON_INTERCEPT_FUNCTION(poll);
#else
#define INIT_POLL
#endif
#if SANITIZER_INTERCEPT_PPOLL
INTERCEPTOR(int, ppoll, __sanitizer_pollfd *fds, __sanitizer_nfds_t nfds,
void *timeout_ts, __sanitizer_sigset_t *sigmask) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, ppoll, fds, nfds, timeout_ts, sigmask);
if (fds && nfds) read_pollfd(ctx, fds, nfds);
if (timeout_ts)
COMMON_INTERCEPTOR_READ_RANGE(ctx, timeout_ts, struct_timespec_sz);
if (sigmask) COMMON_INTERCEPTOR_READ_RANGE(ctx, sigmask, sizeof(*sigmask));
int res =
COMMON_INTERCEPTOR_BLOCK_REAL(ppoll)(fds, nfds, timeout_ts, sigmask);
if (fds && nfds) write_pollfd(ctx, fds, nfds);
return res;
}
#define INIT_PPOLL COMMON_INTERCEPT_FUNCTION(ppoll);
#else
#define INIT_PPOLL
#endif
#if SANITIZER_INTERCEPT_WORDEXP
INTERCEPTOR(int, wordexp, char *s, __sanitizer_wordexp_t *p, int flags) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, wordexp, s, p, flags);
if (s) COMMON_INTERCEPTOR_READ_RANGE(ctx, s, internal_strlen(s) + 1);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(wordexp)(s, p, flags);
if (!res && p) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p, sizeof(*p));
uptr we_wordc =
((flags & wordexp_wrde_dooffs) ? p->we_offs : 0) + p->we_wordc;
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p->we_wordv,
sizeof(*p->we_wordv) * (we_wordc + 1));
for (uptr i = 0; i < we_wordc; ++i) {
char *w = p->we_wordv[i];
if (w) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, w, internal_strlen(w) + 1);
}
}
return res;
}
#define INIT_WORDEXP COMMON_INTERCEPT_FUNCTION(wordexp);
#else
#define INIT_WORDEXP
#endif
#if SANITIZER_INTERCEPT_SIGWAIT
INTERCEPTOR(int, sigwait, __sanitizer_sigset_t *set, int *sig) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, sigwait, set, sig);
if (set) COMMON_INTERCEPTOR_READ_RANGE(ctx, set, sizeof(*set));
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = COMMON_INTERCEPTOR_BLOCK_REAL(sigwait)(set, sig);
if (!res && sig) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, sig, sizeof(*sig));
return res;
}
#define INIT_SIGWAIT COMMON_INTERCEPT_FUNCTION(sigwait);
#else
#define INIT_SIGWAIT
#endif
#if SANITIZER_INTERCEPT_SIGWAITINFO
INTERCEPTOR(int, sigwaitinfo, __sanitizer_sigset_t *set, void *info) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, sigwaitinfo, set, info);
if (set) COMMON_INTERCEPTOR_READ_RANGE(ctx, set, sizeof(*set));
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = COMMON_INTERCEPTOR_BLOCK_REAL(sigwaitinfo)(set, info);
if (res > 0 && info) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, info, siginfo_t_sz);
return res;
}
#define INIT_SIGWAITINFO COMMON_INTERCEPT_FUNCTION(sigwaitinfo);
#else
#define INIT_SIGWAITINFO
#endif
#if SANITIZER_INTERCEPT_SIGTIMEDWAIT
INTERCEPTOR(int, sigtimedwait, __sanitizer_sigset_t *set, void *info,
void *timeout) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, sigtimedwait, set, info, timeout);
if (timeout) COMMON_INTERCEPTOR_READ_RANGE(ctx, timeout, struct_timespec_sz);
if (set) COMMON_INTERCEPTOR_READ_RANGE(ctx, set, sizeof(*set));
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = COMMON_INTERCEPTOR_BLOCK_REAL(sigtimedwait)(set, info, timeout);
if (res > 0 && info) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, info, siginfo_t_sz);
return res;
}
#define INIT_SIGTIMEDWAIT COMMON_INTERCEPT_FUNCTION(sigtimedwait);
#else
#define INIT_SIGTIMEDWAIT
#endif
#if SANITIZER_INTERCEPT_SIGSETOPS
INTERCEPTOR(int, sigemptyset, __sanitizer_sigset_t *set) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, sigemptyset, set);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(sigemptyset)(set);
if (!res && set) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, set, sizeof(*set));
return res;
}
INTERCEPTOR(int, sigfillset, __sanitizer_sigset_t *set) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, sigfillset, set);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(sigfillset)(set);
if (!res && set) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, set, sizeof(*set));
return res;
}
#define INIT_SIGSETOPS \
COMMON_INTERCEPT_FUNCTION(sigemptyset); \
COMMON_INTERCEPT_FUNCTION(sigfillset);
#else
#define INIT_SIGSETOPS
#endif
#if SANITIZER_INTERCEPT_SIGSET_LOGICOPS
INTERCEPTOR(int, sigandset, __sanitizer_sigset_t *dst,
__sanitizer_sigset_t *src1, __sanitizer_sigset_t *src2) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, sigandset, dst, src1, src2);
if (src1)
COMMON_INTERCEPTOR_READ_RANGE(ctx, src1, sizeof(*src1));
if (src2)
COMMON_INTERCEPTOR_READ_RANGE(ctx, src2, sizeof(*src2));
int res = REAL(sigandset)(dst, src1, src2);
if (!res && dst)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, sizeof(*dst));
return res;
}
INTERCEPTOR(int, sigorset, __sanitizer_sigset_t *dst,
__sanitizer_sigset_t *src1, __sanitizer_sigset_t *src2) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, sigorset, dst, src1, src2);
if (src1)
COMMON_INTERCEPTOR_READ_RANGE(ctx, src1, sizeof(*src1));
if (src2)
COMMON_INTERCEPTOR_READ_RANGE(ctx, src2, sizeof(*src2));
int res = REAL(sigorset)(dst, src1, src2);
if (!res && dst)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, sizeof(*dst));
return res;
}
#define INIT_SIGSET_LOGICOPS \
COMMON_INTERCEPT_FUNCTION(sigandset); \
COMMON_INTERCEPT_FUNCTION(sigorset);
#else
#define INIT_SIGSET_LOGICOPS
#endif
#if SANITIZER_INTERCEPT_SIGPENDING
INTERCEPTOR(int, sigpending, __sanitizer_sigset_t *set) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, sigpending, set);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(sigpending)(set);
if (!res && set) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, set, sizeof(*set));
return res;
}
#define INIT_SIGPENDING COMMON_INTERCEPT_FUNCTION(sigpending);
#else
#define INIT_SIGPENDING
#endif
#if SANITIZER_INTERCEPT_SIGPROCMASK
INTERCEPTOR(int, sigprocmask, int how, __sanitizer_sigset_t *set,
__sanitizer_sigset_t *oldset) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, sigprocmask, how, set, oldset);
if (set) COMMON_INTERCEPTOR_READ_RANGE(ctx, set, sizeof(*set));
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(sigprocmask)(how, set, oldset);
if (!res && oldset)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, oldset, sizeof(*oldset));
return res;
}
#define INIT_SIGPROCMASK COMMON_INTERCEPT_FUNCTION(sigprocmask);
#else
#define INIT_SIGPROCMASK
#endif
#if SANITIZER_INTERCEPT_PTHREAD_SIGMASK
INTERCEPTOR(int, pthread_sigmask, int how, __sanitizer_sigset_t *set,
__sanitizer_sigset_t *oldset) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, pthread_sigmask, how, set, oldset);
if (set) COMMON_INTERCEPTOR_READ_RANGE(ctx, set, sizeof(*set));
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(pthread_sigmask)(how, set, oldset);
if (!res && oldset)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, oldset, sizeof(*oldset));
return res;
}
#define INIT_PTHREAD_SIGMASK COMMON_INTERCEPT_FUNCTION(pthread_sigmask);
#else
#define INIT_PTHREAD_SIGMASK
#endif
#if SANITIZER_INTERCEPT_BACKTRACE
INTERCEPTOR(int, backtrace, void **buffer, int size) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, backtrace, buffer, size);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(backtrace)(buffer, size);
if (res && buffer)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buffer, res * sizeof(*buffer));
return res;
}
INTERCEPTOR(char **, backtrace_symbols, void **buffer, int size) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, backtrace_symbols, buffer, size);
if (buffer && size)
COMMON_INTERCEPTOR_READ_RANGE(ctx, buffer, size * sizeof(*buffer));
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
char **res = REAL(backtrace_symbols)(buffer, size);
if (res && size) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, size * sizeof(*res));
for (int i = 0; i < size; ++i)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res[i], internal_strlen(res[i]) + 1);
}
return res;
}
#define INIT_BACKTRACE \
COMMON_INTERCEPT_FUNCTION(backtrace); \
COMMON_INTERCEPT_FUNCTION(backtrace_symbols);
#else
#define INIT_BACKTRACE
#endif
#if SANITIZER_INTERCEPT__EXIT
INTERCEPTOR(void, _exit, int status) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, _exit, status);
COMMON_INTERCEPTOR_USER_CALLBACK_START();
int status1 = COMMON_INTERCEPTOR_ON_EXIT(ctx);
COMMON_INTERCEPTOR_USER_CALLBACK_END();
if (status == 0) status = status1;
REAL(_exit)(status);
}
#define INIT__EXIT COMMON_INTERCEPT_FUNCTION(_exit);
#else
#define INIT__EXIT
#endif
#if SANITIZER_INTERCEPT_PTHREAD_MUTEX
INTERCEPTOR(int, pthread_mutex_lock, void *m) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, pthread_mutex_lock, m);
COMMON_INTERCEPTOR_MUTEX_PRE_LOCK(ctx, m);
int res = REAL(pthread_mutex_lock)(m);
if (res == errno_EOWNERDEAD)
COMMON_INTERCEPTOR_MUTEX_REPAIR(ctx, m);
if (res == 0 || res == errno_EOWNERDEAD)
COMMON_INTERCEPTOR_MUTEX_POST_LOCK(ctx, m);
if (res == errno_EINVAL)
COMMON_INTERCEPTOR_MUTEX_INVALID(ctx, m);
return res;
}
INTERCEPTOR(int, pthread_mutex_unlock, void *m) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, pthread_mutex_unlock, m);
COMMON_INTERCEPTOR_MUTEX_UNLOCK(ctx, m);
int res = REAL(pthread_mutex_unlock)(m);
if (res == errno_EINVAL)
COMMON_INTERCEPTOR_MUTEX_INVALID(ctx, m);
return res;
}
#define INIT_PTHREAD_MUTEX_LOCK COMMON_INTERCEPT_FUNCTION(pthread_mutex_lock)
#define INIT_PTHREAD_MUTEX_UNLOCK \
COMMON_INTERCEPT_FUNCTION(pthread_mutex_unlock)
#else
#define INIT_PTHREAD_MUTEX_LOCK
#define INIT_PTHREAD_MUTEX_UNLOCK
#endif
#if SANITIZER_INTERCEPT___PTHREAD_MUTEX
INTERCEPTOR(int, __pthread_mutex_lock, void *m) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, __pthread_mutex_lock, m);
COMMON_INTERCEPTOR_MUTEX_PRE_LOCK(ctx, m);
int res = REAL(__pthread_mutex_lock)(m);
if (res == errno_EOWNERDEAD)
COMMON_INTERCEPTOR_MUTEX_REPAIR(ctx, m);
if (res == 0 || res == errno_EOWNERDEAD)
COMMON_INTERCEPTOR_MUTEX_POST_LOCK(ctx, m);
if (res == errno_EINVAL)
COMMON_INTERCEPTOR_MUTEX_INVALID(ctx, m);
return res;
}
INTERCEPTOR(int, __pthread_mutex_unlock, void *m) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, __pthread_mutex_unlock, m);
COMMON_INTERCEPTOR_MUTEX_UNLOCK(ctx, m);
int res = REAL(__pthread_mutex_unlock)(m);
if (res == errno_EINVAL)
COMMON_INTERCEPTOR_MUTEX_INVALID(ctx, m);
return res;
}
#define INIT___PTHREAD_MUTEX_LOCK \
COMMON_INTERCEPT_FUNCTION(__pthread_mutex_lock)
#define INIT___PTHREAD_MUTEX_UNLOCK \
COMMON_INTERCEPT_FUNCTION(__pthread_mutex_unlock)
#else
#define INIT___PTHREAD_MUTEX_LOCK
#define INIT___PTHREAD_MUTEX_UNLOCK
#endif
#if SANITIZER_INTERCEPT___LIBC_MUTEX
INTERCEPTOR(int, __libc_mutex_lock, void *m)
ALIAS(WRAPPER_NAME(pthread_mutex_lock));
INTERCEPTOR(int, __libc_mutex_unlock, void *m)
ALIAS(WRAPPER_NAME(pthread_mutex_unlock));
INTERCEPTOR(int, __libc_thr_setcancelstate, int state, int *oldstate)
ALIAS(WRAPPER_NAME(pthread_setcancelstate));
#define INIT___LIBC_MUTEX_LOCK COMMON_INTERCEPT_FUNCTION(__libc_mutex_lock)
#define INIT___LIBC_MUTEX_UNLOCK COMMON_INTERCEPT_FUNCTION(__libc_mutex_unlock)
#define INIT___LIBC_THR_SETCANCELSTATE \
COMMON_INTERCEPT_FUNCTION(__libc_thr_setcancelstate)
#else
#define INIT___LIBC_MUTEX_LOCK
#define INIT___LIBC_MUTEX_UNLOCK
#define INIT___LIBC_THR_SETCANCELSTATE
#endif
#if SANITIZER_INTERCEPT_GETMNTENT || SANITIZER_INTERCEPT_GETMNTENT_R
static void write_mntent(void *ctx, __sanitizer_mntent *mnt) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, mnt, sizeof(*mnt));
if (mnt->mnt_fsname)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, mnt->mnt_fsname,
internal_strlen(mnt->mnt_fsname) + 1);
if (mnt->mnt_dir)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, mnt->mnt_dir,
internal_strlen(mnt->mnt_dir) + 1);
if (mnt->mnt_type)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, mnt->mnt_type,
internal_strlen(mnt->mnt_type) + 1);
if (mnt->mnt_opts)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, mnt->mnt_opts,
internal_strlen(mnt->mnt_opts) + 1);
}
#endif
#if SANITIZER_INTERCEPT_GETMNTENT
INTERCEPTOR(__sanitizer_mntent *, getmntent, void *fp) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getmntent, fp);
__sanitizer_mntent *res = REAL(getmntent)(fp);
if (res) write_mntent(ctx, res);
return res;
}
#define INIT_GETMNTENT COMMON_INTERCEPT_FUNCTION(getmntent);
#else
#define INIT_GETMNTENT
#endif
#if SANITIZER_INTERCEPT_GETMNTENT_R
INTERCEPTOR(__sanitizer_mntent *, getmntent_r, void *fp,
__sanitizer_mntent *mntbuf, char *buf, int buflen) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getmntent_r, fp, mntbuf, buf, buflen);
__sanitizer_mntent *res = REAL(getmntent_r)(fp, mntbuf, buf, buflen);
if (res) write_mntent(ctx, res);
return res;
}
#define INIT_GETMNTENT_R COMMON_INTERCEPT_FUNCTION(getmntent_r);
#else
#define INIT_GETMNTENT_R
#endif
#if SANITIZER_INTERCEPT_STATFS
INTERCEPTOR(int, statfs, char *path, void *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, statfs, path, buf);
if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(statfs)(path, buf);
if (!res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, struct_statfs_sz);
return res;
}
INTERCEPTOR(int, fstatfs, int fd, void *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fstatfs, fd, buf);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(fstatfs)(fd, buf);
if (!res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, struct_statfs_sz);
return res;
}
#define INIT_STATFS \
COMMON_INTERCEPT_FUNCTION(statfs); \
COMMON_INTERCEPT_FUNCTION(fstatfs);
#else
#define INIT_STATFS
#endif
#if SANITIZER_INTERCEPT_STATFS64
INTERCEPTOR(int, statfs64, char *path, void *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, statfs64, path, buf);
if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(statfs64)(path, buf);
if (!res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, struct_statfs64_sz);
return res;
}
INTERCEPTOR(int, fstatfs64, int fd, void *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fstatfs64, fd, buf);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(fstatfs64)(fd, buf);
if (!res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, struct_statfs64_sz);
return res;
}
#define INIT_STATFS64 \
COMMON_INTERCEPT_FUNCTION(statfs64); \
COMMON_INTERCEPT_FUNCTION(fstatfs64);
#else
#define INIT_STATFS64
#endif
#if SANITIZER_INTERCEPT_STATVFS
INTERCEPTOR(int, statvfs, char *path, void *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, statvfs, path, buf);
if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(statvfs)(path, buf);
if (!res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, struct_statvfs_sz);
return res;
}
INTERCEPTOR(int, fstatvfs, int fd, void *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fstatvfs, fd, buf);
COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(fstatvfs)(fd, buf);
if (!res) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, struct_statvfs_sz);
if (fd >= 0)
COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
}
return res;
}
#define INIT_STATVFS \
COMMON_INTERCEPT_FUNCTION(statvfs); \
COMMON_INTERCEPT_FUNCTION(fstatvfs);
#else
#define INIT_STATVFS
#endif
#if SANITIZER_INTERCEPT_STATVFS64
INTERCEPTOR(int, statvfs64, char *path, void *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, statvfs64, path, buf);
if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(statvfs64)(path, buf);
if (!res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, struct_statvfs64_sz);
return res;
}
INTERCEPTOR(int, fstatvfs64, int fd, void *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fstatvfs64, fd, buf);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(fstatvfs64)(fd, buf);
if (!res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, struct_statvfs64_sz);
return res;
}
#define INIT_STATVFS64 \
COMMON_INTERCEPT_FUNCTION(statvfs64); \
COMMON_INTERCEPT_FUNCTION(fstatvfs64);
#else
#define INIT_STATVFS64
#endif
#if SANITIZER_INTERCEPT_INITGROUPS
INTERCEPTOR(int, initgroups, char *user, u32 group) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, initgroups, user, group);
if (user) COMMON_INTERCEPTOR_READ_RANGE(ctx, user, internal_strlen(user) + 1);
int res = REAL(initgroups)(user, group);
return res;
}
#define INIT_INITGROUPS COMMON_INTERCEPT_FUNCTION(initgroups);
#else
#define INIT_INITGROUPS
#endif
#if SANITIZER_INTERCEPT_ETHER_NTOA_ATON
INTERCEPTOR(char *, ether_ntoa, __sanitizer_ether_addr *addr) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, ether_ntoa, addr);
if (addr) COMMON_INTERCEPTOR_READ_RANGE(ctx, addr, sizeof(*addr));
char *res = REAL(ether_ntoa)(addr);
if (res) COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1);
return res;
}
INTERCEPTOR(__sanitizer_ether_addr *, ether_aton, char *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, ether_aton, buf);
if (buf) COMMON_INTERCEPTOR_READ_RANGE(ctx, buf, internal_strlen(buf) + 1);
__sanitizer_ether_addr *res = REAL(ether_aton)(buf);
if (res) COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, sizeof(*res));
return res;
}
#define INIT_ETHER_NTOA_ATON \
COMMON_INTERCEPT_FUNCTION(ether_ntoa); \
COMMON_INTERCEPT_FUNCTION(ether_aton);
#else
#define INIT_ETHER_NTOA_ATON
#endif
#if SANITIZER_INTERCEPT_ETHER_HOST
INTERCEPTOR(int, ether_ntohost, char *hostname, __sanitizer_ether_addr *addr) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, ether_ntohost, hostname, addr);
if (addr) COMMON_INTERCEPTOR_READ_RANGE(ctx, addr, sizeof(*addr));
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(ether_ntohost)(hostname, addr);
if (!res && hostname)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, hostname, internal_strlen(hostname) + 1);
return res;
}
INTERCEPTOR(int, ether_hostton, char *hostname, __sanitizer_ether_addr *addr) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, ether_hostton, hostname, addr);
if (hostname)
COMMON_INTERCEPTOR_READ_RANGE(ctx, hostname, internal_strlen(hostname) + 1);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(ether_hostton)(hostname, addr);
if (!res && addr) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, addr, sizeof(*addr));
return res;
}
INTERCEPTOR(int, ether_line, char *line, __sanitizer_ether_addr *addr,
char *hostname) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, ether_line, line, addr, hostname);
if (line) COMMON_INTERCEPTOR_READ_RANGE(ctx, line, internal_strlen(line) + 1);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(ether_line)(line, addr, hostname);
if (!res) {
if (addr) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, addr, sizeof(*addr));
if (hostname)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, hostname, internal_strlen(hostname) + 1);
}
return res;
}
#define INIT_ETHER_HOST \
COMMON_INTERCEPT_FUNCTION(ether_ntohost); \
COMMON_INTERCEPT_FUNCTION(ether_hostton); \
COMMON_INTERCEPT_FUNCTION(ether_line);
#else
#define INIT_ETHER_HOST
#endif
#if SANITIZER_INTERCEPT_ETHER_R
INTERCEPTOR(char *, ether_ntoa_r, __sanitizer_ether_addr *addr, char *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, ether_ntoa_r, addr, buf);
if (addr) COMMON_INTERCEPTOR_READ_RANGE(ctx, addr, sizeof(*addr));
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
char *res = REAL(ether_ntoa_r)(addr, buf);
if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
return res;
}
INTERCEPTOR(__sanitizer_ether_addr *, ether_aton_r, char *buf,
__sanitizer_ether_addr *addr) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, ether_aton_r, buf, addr);
if (buf) COMMON_INTERCEPTOR_READ_RANGE(ctx, buf, internal_strlen(buf) + 1);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
__sanitizer_ether_addr *res = REAL(ether_aton_r)(buf, addr);
if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, sizeof(*res));
return res;
}
#define INIT_ETHER_R \
COMMON_INTERCEPT_FUNCTION(ether_ntoa_r); \
COMMON_INTERCEPT_FUNCTION(ether_aton_r);
#else
#define INIT_ETHER_R
#endif
#if SANITIZER_INTERCEPT_SHMCTL
INTERCEPTOR(int, shmctl, int shmid, int cmd, void *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, shmctl, shmid, cmd, buf);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(shmctl)(shmid, cmd, buf);
if (res >= 0) {
unsigned sz = 0;
if (cmd == shmctl_ipc_stat || cmd == shmctl_shm_stat)
sz = sizeof(__sanitizer_shmid_ds);
else if (cmd == shmctl_ipc_info)
sz = struct_shminfo_sz;
else if (cmd == shmctl_shm_info)
sz = struct_shm_info_sz;
if (sz) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, sz);
}
return res;
}
#define INIT_SHMCTL COMMON_INTERCEPT_FUNCTION(shmctl);
#else
#define INIT_SHMCTL
#endif
#if SANITIZER_INTERCEPT_RANDOM_R
INTERCEPTOR(int, random_r, void *buf, u32 *result) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, random_r, buf, result);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(random_r)(buf, result);
if (!res && result)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(*result));
return res;
}
#define INIT_RANDOM_R COMMON_INTERCEPT_FUNCTION(random_r);
#else
#define INIT_RANDOM_R
#endif
// FIXME: under ASan the REAL() call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
#if SANITIZER_INTERCEPT_PTHREAD_ATTR_GET || \
SANITIZER_INTERCEPT_PTHREAD_ATTR_GET_SCHED || \
SANITIZER_INTERCEPT_PTHREAD_ATTR_GETINHERITSSCHED || \
SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GET || \
SANITIZER_INTERCEPT_PTHREAD_RWLOCKATTR_GET || \
SANITIZER_INTERCEPT_PTHREAD_CONDATTR_GET || \
SANITIZER_INTERCEPT_PTHREAD_BARRIERATTR_GET
#define INTERCEPTOR_PTHREAD_OBJECT_ATTR_GET(fn, sz) \
INTERCEPTOR(int, fn, void *attr, void *r) { \
void *ctx; \
COMMON_INTERCEPTOR_ENTER(ctx, fn, attr, r); \
int res = REAL(fn)(attr, r); \
if (!res && r) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, r, sz); \
return res; \
}
#define INTERCEPTOR_PTHREAD_ATTR_GET(what, sz) \
INTERCEPTOR_PTHREAD_OBJECT_ATTR_GET(pthread_attr_get##what, sz)
#define INTERCEPTOR_PTHREAD_MUTEXATTR_GET(what, sz) \
INTERCEPTOR_PTHREAD_OBJECT_ATTR_GET(pthread_mutexattr_get##what, sz)
#define INTERCEPTOR_PTHREAD_RWLOCKATTR_GET(what, sz) \
INTERCEPTOR_PTHREAD_OBJECT_ATTR_GET(pthread_rwlockattr_get##what, sz)
#define INTERCEPTOR_PTHREAD_CONDATTR_GET(what, sz) \
INTERCEPTOR_PTHREAD_OBJECT_ATTR_GET(pthread_condattr_get##what, sz)
#define INTERCEPTOR_PTHREAD_BARRIERATTR_GET(what, sz) \
INTERCEPTOR_PTHREAD_OBJECT_ATTR_GET(pthread_barrierattr_get##what, sz)
#endif
#if SANITIZER_INTERCEPT_PTHREAD_ATTR_GET
INTERCEPTOR_PTHREAD_ATTR_GET(detachstate, sizeof(int))
INTERCEPTOR_PTHREAD_ATTR_GET(guardsize, sizeof(SIZE_T))
INTERCEPTOR_PTHREAD_ATTR_GET(scope, sizeof(int))
INTERCEPTOR_PTHREAD_ATTR_GET(stacksize, sizeof(SIZE_T))
INTERCEPTOR(int, pthread_attr_getstack, void *attr, void **addr, SIZE_T *size) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, pthread_attr_getstack, attr, addr, size);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(pthread_attr_getstack)(attr, addr, size);
if (!res) {
if (addr) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, addr, sizeof(*addr));
if (size) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, size, sizeof(*size));
}
return res;
}
// We may need to call the real pthread_attr_getstack from the run-time
// in sanitizer_common, but we don't want to include the interception headers
// there. So, just define this function here.
namespace __sanitizer {
extern "C" {
int real_pthread_attr_getstack(void *attr, void **addr, SIZE_T *size) {
return REAL(pthread_attr_getstack)(attr, addr, size);
}
} // extern "C"
} // namespace __sanitizer
#define INIT_PTHREAD_ATTR_GET \
COMMON_INTERCEPT_FUNCTION(pthread_attr_getdetachstate); \
COMMON_INTERCEPT_FUNCTION(pthread_attr_getguardsize); \
COMMON_INTERCEPT_FUNCTION(pthread_attr_getscope); \
COMMON_INTERCEPT_FUNCTION(pthread_attr_getstacksize); \
COMMON_INTERCEPT_FUNCTION(pthread_attr_getstack);
#else
#define INIT_PTHREAD_ATTR_GET
#endif
#if SANITIZER_INTERCEPT_PTHREAD_ATTR_GET_SCHED
INTERCEPTOR_PTHREAD_ATTR_GET(schedparam, struct_sched_param_sz)
INTERCEPTOR_PTHREAD_ATTR_GET(schedpolicy, sizeof(int))
#define INIT_PTHREAD_ATTR_GET_SCHED \
COMMON_INTERCEPT_FUNCTION(pthread_attr_getschedparam); \
COMMON_INTERCEPT_FUNCTION(pthread_attr_getschedpolicy);
#else
#define INIT_PTHREAD_ATTR_GET_SCHED
#endif
#if SANITIZER_INTERCEPT_PTHREAD_ATTR_GETINHERITSCHED
INTERCEPTOR_PTHREAD_ATTR_GET(inheritsched, sizeof(int))
#define INIT_PTHREAD_ATTR_GETINHERITSCHED \
COMMON_INTERCEPT_FUNCTION(pthread_attr_getinheritsched);
#else
#define INIT_PTHREAD_ATTR_GETINHERITSCHED
#endif
#if SANITIZER_INTERCEPT_PTHREAD_ATTR_GETAFFINITY_NP
INTERCEPTOR(int, pthread_attr_getaffinity_np, void *attr, SIZE_T cpusetsize,
void *cpuset) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, pthread_attr_getaffinity_np, attr, cpusetsize,
cpuset);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(pthread_attr_getaffinity_np)(attr, cpusetsize, cpuset);
if (!res && cpusetsize && cpuset)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cpuset, cpusetsize);
return res;
}
#define INIT_PTHREAD_ATTR_GETAFFINITY_NP \
COMMON_INTERCEPT_FUNCTION(pthread_attr_getaffinity_np);
#else
#define INIT_PTHREAD_ATTR_GETAFFINITY_NP
#endif
#if SANITIZER_INTERCEPT_PTHREAD_GETAFFINITY_NP
INTERCEPTOR(int, pthread_getaffinity_np, void *attr, SIZE_T cpusetsize,
void *cpuset) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, pthread_getaffinity_np, attr, cpusetsize,
cpuset);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(pthread_getaffinity_np)(attr, cpusetsize, cpuset);
if (!res && cpusetsize && cpuset)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cpuset, cpusetsize);
return res;
}
#define INIT_PTHREAD_GETAFFINITY_NP \
COMMON_INTERCEPT_FUNCTION(pthread_getaffinity_np);
#else
#define INIT_PTHREAD_GETAFFINITY_NP
#endif
#if SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETPSHARED
INTERCEPTOR_PTHREAD_MUTEXATTR_GET(pshared, sizeof(int))
#define INIT_PTHREAD_MUTEXATTR_GETPSHARED \
COMMON_INTERCEPT_FUNCTION(pthread_mutexattr_getpshared);
#else
#define INIT_PTHREAD_MUTEXATTR_GETPSHARED
#endif
#if SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETTYPE
INTERCEPTOR_PTHREAD_MUTEXATTR_GET(type, sizeof(int))
#define INIT_PTHREAD_MUTEXATTR_GETTYPE \
COMMON_INTERCEPT_FUNCTION(pthread_mutexattr_gettype);
#else
#define INIT_PTHREAD_MUTEXATTR_GETTYPE
#endif
#if SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETPROTOCOL
INTERCEPTOR_PTHREAD_MUTEXATTR_GET(protocol, sizeof(int))
#define INIT_PTHREAD_MUTEXATTR_GETPROTOCOL \
COMMON_INTERCEPT_FUNCTION(pthread_mutexattr_getprotocol);
#else
#define INIT_PTHREAD_MUTEXATTR_GETPROTOCOL
#endif
#if SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETPRIOCEILING
INTERCEPTOR_PTHREAD_MUTEXATTR_GET(prioceiling, sizeof(int))
#define INIT_PTHREAD_MUTEXATTR_GETPRIOCEILING \
COMMON_INTERCEPT_FUNCTION(pthread_mutexattr_getprioceiling);
#else
#define INIT_PTHREAD_MUTEXATTR_GETPRIOCEILING
#endif
#if SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETROBUST
INTERCEPTOR_PTHREAD_MUTEXATTR_GET(robust, sizeof(int))
#define INIT_PTHREAD_MUTEXATTR_GETROBUST \
COMMON_INTERCEPT_FUNCTION(pthread_mutexattr_getrobust);
#else
#define INIT_PTHREAD_MUTEXATTR_GETROBUST
#endif
#if SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETROBUST_NP
INTERCEPTOR_PTHREAD_MUTEXATTR_GET(robust_np, sizeof(int))
#define INIT_PTHREAD_MUTEXATTR_GETROBUST_NP \
COMMON_INTERCEPT_FUNCTION(pthread_mutexattr_getrobust_np);
#else
#define INIT_PTHREAD_MUTEXATTR_GETROBUST_NP
#endif
#if SANITIZER_INTERCEPT_PTHREAD_RWLOCKATTR_GETPSHARED
INTERCEPTOR_PTHREAD_RWLOCKATTR_GET(pshared, sizeof(int))
#define INIT_PTHREAD_RWLOCKATTR_GETPSHARED \
COMMON_INTERCEPT_FUNCTION(pthread_rwlockattr_getpshared);
#else
#define INIT_PTHREAD_RWLOCKATTR_GETPSHARED
#endif
#if SANITIZER_INTERCEPT_PTHREAD_RWLOCKATTR_GETKIND_NP
INTERCEPTOR_PTHREAD_RWLOCKATTR_GET(kind_np, sizeof(int))
#define INIT_PTHREAD_RWLOCKATTR_GETKIND_NP \
COMMON_INTERCEPT_FUNCTION(pthread_rwlockattr_getkind_np);
#else
#define INIT_PTHREAD_RWLOCKATTR_GETKIND_NP
#endif
#if SANITIZER_INTERCEPT_PTHREAD_CONDATTR_GETPSHARED
INTERCEPTOR_PTHREAD_CONDATTR_GET(pshared, sizeof(int))
#define INIT_PTHREAD_CONDATTR_GETPSHARED \
COMMON_INTERCEPT_FUNCTION(pthread_condattr_getpshared);
#else
#define INIT_PTHREAD_CONDATTR_GETPSHARED
#endif
#if SANITIZER_INTERCEPT_PTHREAD_CONDATTR_GETCLOCK
INTERCEPTOR_PTHREAD_CONDATTR_GET(clock, sizeof(int))
#define INIT_PTHREAD_CONDATTR_GETCLOCK \
COMMON_INTERCEPT_FUNCTION(pthread_condattr_getclock);
#else
#define INIT_PTHREAD_CONDATTR_GETCLOCK
#endif
#if SANITIZER_INTERCEPT_PTHREAD_BARRIERATTR_GETPSHARED
INTERCEPTOR_PTHREAD_BARRIERATTR_GET(pshared, sizeof(int)) // !mac !android
#define INIT_PTHREAD_BARRIERATTR_GETPSHARED \
COMMON_INTERCEPT_FUNCTION(pthread_barrierattr_getpshared);
#else
#define INIT_PTHREAD_BARRIERATTR_GETPSHARED
#endif
#if SANITIZER_INTERCEPT_TMPNAM
INTERCEPTOR(char *, tmpnam, char *s) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, tmpnam, s);
char *res = REAL(tmpnam)(s);
if (res) {
if (s)
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, s, internal_strlen(s) + 1);
else
COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1);
}
return res;
}
#define INIT_TMPNAM COMMON_INTERCEPT_FUNCTION(tmpnam);
#else
#define INIT_TMPNAM
#endif
#if SANITIZER_INTERCEPT_TMPNAM_R
INTERCEPTOR(char *, tmpnam_r, char *s) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, tmpnam_r, s);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
char *res = REAL(tmpnam_r)(s);
if (res && s) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, s, internal_strlen(s) + 1);
return res;
}
#define INIT_TMPNAM_R COMMON_INTERCEPT_FUNCTION(tmpnam_r);
#else
#define INIT_TMPNAM_R
#endif
#if SANITIZER_INTERCEPT_PTSNAME
INTERCEPTOR(char *, ptsname, int fd) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, ptsname, fd);
char *res = REAL(ptsname)(fd);
if (res != nullptr)
COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1);
return res;
}
#define INIT_PTSNAME COMMON_INTERCEPT_FUNCTION(ptsname);
#else
#define INIT_PTSNAME
#endif
#if SANITIZER_INTERCEPT_PTSNAME_R
INTERCEPTOR(int, ptsname_r, int fd, char *name, SIZE_T namesize) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, ptsname_r, fd, name, namesize);
int res = REAL(ptsname_r)(fd, name, namesize);
if (res == 0)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, name, internal_strlen(name) + 1);
return res;
}
#define INIT_PTSNAME_R COMMON_INTERCEPT_FUNCTION(ptsname_r);
#else
#define INIT_PTSNAME_R
#endif
#if SANITIZER_INTERCEPT_TTYNAME
INTERCEPTOR(char *, ttyname, int fd) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, ttyname, fd);
char *res = REAL(ttyname)(fd);
if (res != nullptr)
COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1);
return res;
}
#define INIT_TTYNAME COMMON_INTERCEPT_FUNCTION(ttyname);
#else
#define INIT_TTYNAME
#endif
#if SANITIZER_INTERCEPT_TTYNAME_R
INTERCEPTOR(int, ttyname_r, int fd, char *name, SIZE_T namesize) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, ttyname_r, fd, name, namesize);
int res = REAL(ttyname_r)(fd, name, namesize);
if (res == 0)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, name, internal_strlen(name) + 1);
return res;
}
#define INIT_TTYNAME_R COMMON_INTERCEPT_FUNCTION(ttyname_r);
#else
#define INIT_TTYNAME_R
#endif
#if SANITIZER_INTERCEPT_TEMPNAM
INTERCEPTOR(char *, tempnam, char *dir, char *pfx) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, tempnam, dir, pfx);
if (dir) COMMON_INTERCEPTOR_READ_RANGE(ctx, dir, internal_strlen(dir) + 1);
if (pfx) COMMON_INTERCEPTOR_READ_RANGE(ctx, pfx, internal_strlen(pfx) + 1);
char *res = REAL(tempnam)(dir, pfx);
if (res) COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1);
return res;
}
#define INIT_TEMPNAM COMMON_INTERCEPT_FUNCTION(tempnam);
#else
#define INIT_TEMPNAM
#endif
#if SANITIZER_INTERCEPT_PTHREAD_SETNAME_NP && !SANITIZER_NETBSD
INTERCEPTOR(int, pthread_setname_np, uptr thread, const char *name) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, pthread_setname_np, thread, name);
COMMON_INTERCEPTOR_READ_STRING(ctx, name, 0);
COMMON_INTERCEPTOR_SET_PTHREAD_NAME(ctx, thread, name);
return REAL(pthread_setname_np)(thread, name);
}
#define INIT_PTHREAD_SETNAME_NP COMMON_INTERCEPT_FUNCTION(pthread_setname_np);
#elif SANITIZER_INTERCEPT_PTHREAD_SETNAME_NP && SANITIZER_NETBSD
INTERCEPTOR(int, pthread_setname_np, uptr thread, const char *name, void *arg) {
void *ctx;
char newname[32]; // PTHREAD_MAX_NAMELEN_NP=32
COMMON_INTERCEPTOR_ENTER(ctx, pthread_setname_np, thread, name, arg);
COMMON_INTERCEPTOR_READ_STRING(ctx, name, 0);
internal_snprintf(newname, sizeof(newname), name, arg);
COMMON_INTERCEPTOR_SET_PTHREAD_NAME(ctx, thread, newname);
return REAL(pthread_setname_np)(thread, name, arg);
}
#define INIT_PTHREAD_SETNAME_NP COMMON_INTERCEPT_FUNCTION(pthread_setname_np);
#else
#define INIT_PTHREAD_SETNAME_NP
#endif
#if SANITIZER_INTERCEPT_PTHREAD_GETNAME_NP
INTERCEPTOR(int, pthread_getname_np, uptr thread, char *name, SIZE_T len) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, pthread_getname_np, thread, name, len);
int res = REAL(pthread_getname_np)(thread, name, len);
if (!res)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, name, internal_strnlen(name, len) + 1);
return res;
}
#define INIT_PTHREAD_GETNAME_NP COMMON_INTERCEPT_FUNCTION(pthread_getname_np);
#else
#define INIT_PTHREAD_GETNAME_NP
#endif
#if SANITIZER_INTERCEPT_SINCOS
INTERCEPTOR(void, sincos, double x, double *sin, double *cos) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, sincos, x, sin, cos);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
REAL(sincos)(x, sin, cos);
if (sin) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, sin, sizeof(*sin));
if (cos) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cos, sizeof(*cos));
}
INTERCEPTOR(void, sincosf, float x, float *sin, float *cos) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, sincosf, x, sin, cos);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
REAL(sincosf)(x, sin, cos);
if (sin) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, sin, sizeof(*sin));
if (cos) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cos, sizeof(*cos));
}
INTERCEPTOR(void, sincosl, long double x, long double *sin, long double *cos) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, sincosl, x, sin, cos);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
REAL(sincosl)(x, sin, cos);
if (sin) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, sin, sizeof(*sin));
if (cos) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cos, sizeof(*cos));
}
#define INIT_SINCOS \
COMMON_INTERCEPT_FUNCTION(sincos); \
COMMON_INTERCEPT_FUNCTION(sincosf); \
COMMON_INTERCEPT_FUNCTION_LDBL(sincosl);
#else
#define INIT_SINCOS
#endif
#if SANITIZER_INTERCEPT_REMQUO
INTERCEPTOR(double, remquo, double x, double y, int *quo) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, remquo, x, y, quo);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
double res = REAL(remquo)(x, y, quo);
if (quo) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, quo, sizeof(*quo));
return res;
}
INTERCEPTOR(float, remquof, float x, float y, int *quo) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, remquof, x, y, quo);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
float res = REAL(remquof)(x, y, quo);
if (quo) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, quo, sizeof(*quo));
return res;
}
#define INIT_REMQUO \
COMMON_INTERCEPT_FUNCTION(remquo); \
COMMON_INTERCEPT_FUNCTION(remquof);
#else
#define INIT_REMQUO
#endif
#if SANITIZER_INTERCEPT_REMQUOL
INTERCEPTOR(long double, remquol, long double x, long double y, int *quo) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, remquol, x, y, quo);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
long double res = REAL(remquol)(x, y, quo);
if (quo) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, quo, sizeof(*quo));
return res;
}
#define INIT_REMQUOL \
COMMON_INTERCEPT_FUNCTION_LDBL(remquol);
#else
#define INIT_REMQUOL
#endif
#if SANITIZER_INTERCEPT_LGAMMA
extern int signgam;
INTERCEPTOR(double, lgamma, double x) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, lgamma, x);
double res = REAL(lgamma)(x);
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, &signgam, sizeof(signgam));
return res;
}
INTERCEPTOR(float, lgammaf, float x) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, lgammaf, x);
float res = REAL(lgammaf)(x);
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, &signgam, sizeof(signgam));
return res;
}
#define INIT_LGAMMA \
COMMON_INTERCEPT_FUNCTION(lgamma); \
COMMON_INTERCEPT_FUNCTION(lgammaf);
#else
#define INIT_LGAMMA
#endif
#if SANITIZER_INTERCEPT_LGAMMAL
INTERCEPTOR(long double, lgammal, long double x) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, lgammal, x);
long double res = REAL(lgammal)(x);
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, &signgam, sizeof(signgam));
return res;
}
#define INIT_LGAMMAL \
COMMON_INTERCEPT_FUNCTION_LDBL(lgammal);
#else
#define INIT_LGAMMAL
#endif
#if SANITIZER_INTERCEPT_LGAMMA_R
INTERCEPTOR(double, lgamma_r, double x, int *signp) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, lgamma_r, x, signp);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
double res = REAL(lgamma_r)(x, signp);
if (signp) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, signp, sizeof(*signp));
return res;
}
INTERCEPTOR(float, lgammaf_r, float x, int *signp) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, lgammaf_r, x, signp);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
float res = REAL(lgammaf_r)(x, signp);
if (signp) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, signp, sizeof(*signp));
return res;
}
#define INIT_LGAMMA_R \
COMMON_INTERCEPT_FUNCTION(lgamma_r); \
COMMON_INTERCEPT_FUNCTION(lgammaf_r);
#else
#define INIT_LGAMMA_R
#endif
#if SANITIZER_INTERCEPT_LGAMMAL_R
INTERCEPTOR(long double, lgammal_r, long double x, int *signp) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, lgammal_r, x, signp);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
long double res = REAL(lgammal_r)(x, signp);
if (signp) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, signp, sizeof(*signp));
return res;
}
#define INIT_LGAMMAL_R COMMON_INTERCEPT_FUNCTION_LDBL(lgammal_r);
#else
#define INIT_LGAMMAL_R
#endif
#if SANITIZER_INTERCEPT_DRAND48_R
INTERCEPTOR(int, drand48_r, void *buffer, double *result) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, drand48_r, buffer, result);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(drand48_r)(buffer, result);
if (result) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(*result));
return res;
}
INTERCEPTOR(int, lrand48_r, void *buffer, long *result) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, lrand48_r, buffer, result);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(lrand48_r)(buffer, result);
if (result) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(*result));
return res;
}
#define INIT_DRAND48_R \
COMMON_INTERCEPT_FUNCTION(drand48_r); \
COMMON_INTERCEPT_FUNCTION(lrand48_r);
#else
#define INIT_DRAND48_R
#endif
#if SANITIZER_INTERCEPT_RAND_R
INTERCEPTOR(int, rand_r, unsigned *seedp) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, rand_r, seedp);
COMMON_INTERCEPTOR_READ_RANGE(ctx, seedp, sizeof(*seedp));
return REAL(rand_r)(seedp);
}
#define INIT_RAND_R COMMON_INTERCEPT_FUNCTION(rand_r);
#else
#define INIT_RAND_R
#endif
#if SANITIZER_INTERCEPT_GETLINE
INTERCEPTOR(SSIZE_T, getline, char **lineptr, SIZE_T *n, void *stream) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getline, lineptr, n, stream);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
SSIZE_T res = REAL(getline)(lineptr, n, stream);
if (res > 0) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, lineptr, sizeof(*lineptr));
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n, sizeof(*n));
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *lineptr, res + 1);
}
return res;
}
// FIXME: under ASan the call below may write to freed memory and corrupt its
// metadata. See
// https://github.com/google/sanitizers/issues/321.
#define GETDELIM_INTERCEPTOR_IMPL(vname) \
{ \
void *ctx; \
COMMON_INTERCEPTOR_ENTER(ctx, vname, lineptr, n, delim, stream); \
SSIZE_T res = REAL(vname)(lineptr, n, delim, stream); \
if (res > 0) { \
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, lineptr, sizeof(*lineptr)); \
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n, sizeof(*n)); \
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *lineptr, res + 1); \
} \
return res; \
}
INTERCEPTOR(SSIZE_T, __getdelim, char **lineptr, SIZE_T *n, int delim,
void *stream)
GETDELIM_INTERCEPTOR_IMPL(__getdelim)
// There's no __getdelim() on FreeBSD so we supply the getdelim() interceptor
// with its own body.
INTERCEPTOR(SSIZE_T, getdelim, char **lineptr, SIZE_T *n, int delim,
void *stream)
GETDELIM_INTERCEPTOR_IMPL(getdelim)
#define INIT_GETLINE \
COMMON_INTERCEPT_FUNCTION(getline); \
COMMON_INTERCEPT_FUNCTION(__getdelim); \
COMMON_INTERCEPT_FUNCTION(getdelim);
#else
#define INIT_GETLINE
#endif
#if SANITIZER_INTERCEPT_ICONV
INTERCEPTOR(SIZE_T, iconv, void *cd, char **inbuf, SIZE_T *inbytesleft,
char **outbuf, SIZE_T *outbytesleft) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, iconv, cd, inbuf, inbytesleft, outbuf,
outbytesleft);
if (inbytesleft)
COMMON_INTERCEPTOR_READ_RANGE(ctx, inbytesleft, sizeof(*inbytesleft));
if (inbuf && inbytesleft)
COMMON_INTERCEPTOR_READ_RANGE(ctx, *inbuf, *inbytesleft);
if (outbytesleft)
COMMON_INTERCEPTOR_READ_RANGE(ctx, outbytesleft, sizeof(*outbytesleft));
void *outbuf_orig = outbuf ? *outbuf : nullptr;
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
SIZE_T res = REAL(iconv)(cd, inbuf, inbytesleft, outbuf, outbytesleft);
if (outbuf && *outbuf > outbuf_orig) {
SIZE_T sz = (char *)*outbuf - (char *)outbuf_orig;
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, outbuf_orig, sz);
}
return res;
}
#define INIT_ICONV COMMON_INTERCEPT_FUNCTION(iconv);
#else
#define INIT_ICONV
#endif
#if SANITIZER_INTERCEPT_TIMES
INTERCEPTOR(__sanitizer_clock_t, times, void *tms) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, times, tms);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
__sanitizer_clock_t res = REAL(times)(tms);
if (res != (__sanitizer_clock_t)-1 && tms)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, tms, struct_tms_sz);
return res;
}
#define INIT_TIMES COMMON_INTERCEPT_FUNCTION(times);
#else
#define INIT_TIMES
#endif
#if SANITIZER_S390 && \
(SANITIZER_INTERCEPT_TLS_GET_ADDR || SANITIZER_INTERCEPT_TLS_GET_OFFSET)
extern "C" uptr __tls_get_offset_wrapper(void *arg, uptr (*fn)(void *arg));
DEFINE_REAL(uptr, __tls_get_offset, void *arg)
#endif
#if SANITIZER_INTERCEPT_TLS_GET_ADDR
#if !SANITIZER_S390
#define INIT_TLS_GET_ADDR COMMON_INTERCEPT_FUNCTION(__tls_get_addr)
// If you see any crashes around this functions, there are 2 known issues with
// it: 1. __tls_get_addr can be called with mis-aligned stack due to:
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
// 2. It can be called recursively if sanitizer code uses __tls_get_addr
// to access thread local variables (it should not happen normally,
// because sanitizers use initial-exec tls model).
INTERCEPTOR(void *, __tls_get_addr, void *arg) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, __tls_get_addr, arg);
void *res = REAL(__tls_get_addr)(arg);
uptr tls_begin, tls_end;
COMMON_INTERCEPTOR_GET_TLS_RANGE(&tls_begin, &tls_end);
DTLS::DTV *dtv = DTLS_on_tls_get_addr(arg, res, tls_begin, tls_end);
if (dtv) {
// New DTLS block has been allocated.
COMMON_INTERCEPTOR_INITIALIZE_RANGE((void *)dtv->beg, dtv->size);
}
return res;
}
#if SANITIZER_PPC
// On PowerPC, we also need to intercept __tls_get_addr_opt, which has
// mostly the same semantics as __tls_get_addr, but its presence enables
// some optimizations in linker (which are safe to ignore here).
extern "C" __attribute__((alias("__interceptor___tls_get_addr"),
visibility("default")))
void *__tls_get_addr_opt(void *arg);
#endif
#else // SANITIZER_S390
// On s390, we have to intercept two functions here:
// - __tls_get_addr_internal, which is a glibc-internal function that is like
// the usual __tls_get_addr, but returns a TP-relative offset instead of
// a proper pointer. It is used by dlsym for TLS symbols.
// - __tls_get_offset, which is like the above, but also takes a GOT-relative
// descriptor offset as an argument instead of a pointer. GOT address
// is passed in r12, so it's necessary to write it in assembly. This is
// the function used by the compiler.
#define INIT_TLS_GET_ADDR COMMON_INTERCEPT_FUNCTION(__tls_get_offset)
INTERCEPTOR(uptr, __tls_get_addr_internal, void *arg) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, __tls_get_addr_internal, arg);
uptr res = __tls_get_offset_wrapper(arg, REAL(__tls_get_offset));
uptr tp = reinterpret_cast<uptr>(__builtin_thread_pointer());
void *ptr = reinterpret_cast<void *>(res + tp);
uptr tls_begin, tls_end;
COMMON_INTERCEPTOR_GET_TLS_RANGE(&tls_begin, &tls_end);
DTLS::DTV *dtv = DTLS_on_tls_get_addr(arg, ptr, tls_begin, tls_end);
if (dtv) {
// New DTLS block has been allocated.
COMMON_INTERCEPTOR_INITIALIZE_RANGE((void *)dtv->beg, dtv->size);
}
return res;
}
#endif // SANITIZER_S390
#else
#define INIT_TLS_GET_ADDR
#endif
#if SANITIZER_S390 && \
(SANITIZER_INTERCEPT_TLS_GET_ADDR || SANITIZER_INTERCEPT_TLS_GET_OFFSET)
extern "C" uptr __tls_get_offset(void *arg);
extern "C" uptr __interceptor___tls_get_offset(void *arg);
// We need a hidden symbol aliasing the above, so that we can jump
// directly to it from the assembly below.
extern "C" __attribute__((alias("__interceptor___tls_get_addr_internal"),
visibility("hidden")))
uptr __tls_get_addr_hidden(void *arg);
// Now carefully intercept __tls_get_offset.
asm(
".text\n"
// The __intercept_ version has to exist, so that gen_dynamic_list.py
// exports our symbol.
".weak __tls_get_offset\n"
".type __tls_get_offset, @function\n"
"__tls_get_offset:\n"
".global __interceptor___tls_get_offset\n"
".type __interceptor___tls_get_offset, @function\n"
"__interceptor___tls_get_offset:\n"
#ifdef __s390x__
"la %r2, 0(%r2,%r12)\n"
"jg __tls_get_addr_hidden\n"
#else
"basr %r3,0\n"
"0: la %r2,0(%r2,%r12)\n"
"l %r4,1f-0b(%r3)\n"
"b 0(%r4,%r3)\n"
"1: .long __tls_get_addr_hidden - 0b\n"
#endif
".size __interceptor___tls_get_offset, .-__interceptor___tls_get_offset\n"
// Assembly wrapper to call REAL(__tls_get_offset)(arg)
".type __tls_get_offset_wrapper, @function\n"
"__tls_get_offset_wrapper:\n"
#ifdef __s390x__
"sgr %r2,%r12\n"
#else
"sr %r2,%r12\n"
#endif
"br %r3\n"
".size __tls_get_offset_wrapper, .-__tls_get_offset_wrapper\n"
);
#endif
#if SANITIZER_INTERCEPT_LISTXATTR
INTERCEPTOR(SSIZE_T, listxattr, const char *path, char *list, SIZE_T size) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, listxattr, path, list, size);
if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
SSIZE_T res = REAL(listxattr)(path, list, size);
// Here and below, size == 0 is a special case where nothing is written to the
// buffer, and res contains the desired buffer size.
if (size && res > 0 && list) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, list, res);
return res;
}
INTERCEPTOR(SSIZE_T, llistxattr, const char *path, char *list, SIZE_T size) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, llistxattr, path, list, size);
if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
SSIZE_T res = REAL(llistxattr)(path, list, size);
if (size && res > 0 && list) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, list, res);
return res;
}
INTERCEPTOR(SSIZE_T, flistxattr, int fd, char *list, SIZE_T size) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, flistxattr, fd, list, size);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
SSIZE_T res = REAL(flistxattr)(fd, list, size);
if (size && res > 0 && list) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, list, res);
return res;
}
#define INIT_LISTXATTR \
COMMON_INTERCEPT_FUNCTION(listxattr); \
COMMON_INTERCEPT_FUNCTION(llistxattr); \
COMMON_INTERCEPT_FUNCTION(flistxattr);
#else
#define INIT_LISTXATTR
#endif
#if SANITIZER_INTERCEPT_GETXATTR
INTERCEPTOR(SSIZE_T, getxattr, const char *path, const char *name, char *value,
SIZE_T size) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getxattr, path, name, value, size);
if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
if (name) COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
SSIZE_T res = REAL(getxattr)(path, name, value, size);
if (size && res > 0 && value) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, value, res);
return res;
}
INTERCEPTOR(SSIZE_T, lgetxattr, const char *path, const char *name, char *value,
SIZE_T size) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, lgetxattr, path, name, value, size);
if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
if (name) COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
SSIZE_T res = REAL(lgetxattr)(path, name, value, size);
if (size && res > 0 && value) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, value, res);
return res;
}
INTERCEPTOR(SSIZE_T, fgetxattr, int fd, const char *name, char *value,
SIZE_T size) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fgetxattr, fd, name, value, size);
if (name) COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
SSIZE_T res = REAL(fgetxattr)(fd, name, value, size);
if (size && res > 0 && value) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, value, res);
return res;
}
#define INIT_GETXATTR \
COMMON_INTERCEPT_FUNCTION(getxattr); \
COMMON_INTERCEPT_FUNCTION(lgetxattr); \
COMMON_INTERCEPT_FUNCTION(fgetxattr);
#else
#define INIT_GETXATTR
#endif
#if SANITIZER_INTERCEPT_GETRESID
INTERCEPTOR(int, getresuid, void *ruid, void *euid, void *suid) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getresuid, ruid, euid, suid);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(getresuid)(ruid, euid, suid);
if (res >= 0) {
if (ruid) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ruid, uid_t_sz);
if (euid) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, euid, uid_t_sz);
if (suid) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, suid, uid_t_sz);
}
return res;
}
INTERCEPTOR(int, getresgid, void *rgid, void *egid, void *sgid) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getresgid, rgid, egid, sgid);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(getresgid)(rgid, egid, sgid);
if (res >= 0) {
if (rgid) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, rgid, gid_t_sz);
if (egid) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, egid, gid_t_sz);
if (sgid) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, sgid, gid_t_sz);
}
return res;
}
#define INIT_GETRESID \
COMMON_INTERCEPT_FUNCTION(getresuid); \
COMMON_INTERCEPT_FUNCTION(getresgid);
#else
#define INIT_GETRESID
#endif
#if SANITIZER_INTERCEPT_GETIFADDRS
// As long as getifaddrs()/freeifaddrs() use calloc()/free(), we don't need to
// intercept freeifaddrs(). If that ceases to be the case, we might need to
// intercept it to poison the memory again.
INTERCEPTOR(int, getifaddrs, __sanitizer_ifaddrs **ifap) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getifaddrs, ifap);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(getifaddrs)(ifap);
if (res == 0 && ifap) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ifap, sizeof(void *));
__sanitizer_ifaddrs *p = *ifap;
while (p) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p, sizeof(__sanitizer_ifaddrs));
if (p->ifa_name)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p->ifa_name,
internal_strlen(p->ifa_name) + 1);
if (p->ifa_addr)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p->ifa_addr, struct_sockaddr_sz);
if (p->ifa_netmask)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p->ifa_netmask, struct_sockaddr_sz);
// On Linux this is a union, but the other member also points to a
// struct sockaddr, so the following is sufficient.
if (p->ifa_dstaddr)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p->ifa_dstaddr, struct_sockaddr_sz);
// FIXME(smatveev): Unpoison p->ifa_data as well.
p = p->ifa_next;
}
}
return res;
}
#define INIT_GETIFADDRS \
COMMON_INTERCEPT_FUNCTION(getifaddrs);
#else
#define INIT_GETIFADDRS
#endif
#if SANITIZER_INTERCEPT_IF_INDEXTONAME
INTERCEPTOR(char *, if_indextoname, unsigned int ifindex, char* ifname) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, if_indextoname, ifindex, ifname);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
char *res = REAL(if_indextoname)(ifindex, ifname);
if (res && ifname)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ifname, internal_strlen(ifname) + 1);
return res;
}
INTERCEPTOR(unsigned int, if_nametoindex, const char* ifname) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, if_nametoindex, ifname);
if (ifname)
COMMON_INTERCEPTOR_READ_RANGE(ctx, ifname, internal_strlen(ifname) + 1);
return REAL(if_nametoindex)(ifname);
}
#define INIT_IF_INDEXTONAME \
COMMON_INTERCEPT_FUNCTION(if_indextoname); \
COMMON_INTERCEPT_FUNCTION(if_nametoindex);
#else
#define INIT_IF_INDEXTONAME
#endif
#if SANITIZER_INTERCEPT_CAPGET
INTERCEPTOR(int, capget, void *hdrp, void *datap) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, capget, hdrp, datap);
if (hdrp)
COMMON_INTERCEPTOR_READ_RANGE(ctx, hdrp, __user_cap_header_struct_sz);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(capget)(hdrp, datap);
if (res == 0 && datap)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, datap, __user_cap_data_struct_sz);
// We can also return -1 and write to hdrp->version if the version passed in
// hdrp->version is unsupported. But that's not a trivial condition to check,
// and anyway COMMON_INTERCEPTOR_READ_RANGE protects us to some extent.
return res;
}
INTERCEPTOR(int, capset, void *hdrp, const void *datap) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, capset, hdrp, datap);
if (hdrp)
COMMON_INTERCEPTOR_READ_RANGE(ctx, hdrp, __user_cap_header_struct_sz);
if (datap)
COMMON_INTERCEPTOR_READ_RANGE(ctx, datap, __user_cap_data_struct_sz);
return REAL(capset)(hdrp, datap);
}
#define INIT_CAPGET \
COMMON_INTERCEPT_FUNCTION(capget); \
COMMON_INTERCEPT_FUNCTION(capset);
#else
#define INIT_CAPGET
#endif
#if SANITIZER_INTERCEPT_AEABI_MEM
INTERCEPTOR(void *, __aeabi_memmove, void *to, const void *from, uptr size) {
void *ctx;
COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, to, from, size);
}
INTERCEPTOR(void *, __aeabi_memmove4, void *to, const void *from, uptr size) {
void *ctx;
COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, to, from, size);
}
INTERCEPTOR(void *, __aeabi_memmove8, void *to, const void *from, uptr size) {
void *ctx;
COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, to, from, size);
}
INTERCEPTOR(void *, __aeabi_memcpy, void *to, const void *from, uptr size) {
void *ctx;
COMMON_INTERCEPTOR_MEMCPY_IMPL(ctx, to, from, size);
}
INTERCEPTOR(void *, __aeabi_memcpy4, void *to, const void *from, uptr size) {
void *ctx;
COMMON_INTERCEPTOR_MEMCPY_IMPL(ctx, to, from, size);
}
INTERCEPTOR(void *, __aeabi_memcpy8, void *to, const void *from, uptr size) {
void *ctx;
COMMON_INTERCEPTOR_MEMCPY_IMPL(ctx, to, from, size);
}
// Note the argument order.
INTERCEPTOR(void *, __aeabi_memset, void *block, uptr size, int c) {
void *ctx;
COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, c, size);
}
INTERCEPTOR(void *, __aeabi_memset4, void *block, uptr size, int c) {
void *ctx;
COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, c, size);
}
INTERCEPTOR(void *, __aeabi_memset8, void *block, uptr size, int c) {
void *ctx;
COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, c, size);
}
INTERCEPTOR(void *, __aeabi_memclr, void *block, uptr size) {
void *ctx;
COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, 0, size);
}
INTERCEPTOR(void *, __aeabi_memclr4, void *block, uptr size) {
void *ctx;
COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, 0, size);
}
INTERCEPTOR(void *, __aeabi_memclr8, void *block, uptr size) {
void *ctx;
COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, 0, size);
}
#define INIT_AEABI_MEM \
COMMON_INTERCEPT_FUNCTION(__aeabi_memmove); \
COMMON_INTERCEPT_FUNCTION(__aeabi_memmove4); \
COMMON_INTERCEPT_FUNCTION(__aeabi_memmove8); \
COMMON_INTERCEPT_FUNCTION(__aeabi_memcpy); \
COMMON_INTERCEPT_FUNCTION(__aeabi_memcpy4); \
COMMON_INTERCEPT_FUNCTION(__aeabi_memcpy8); \
COMMON_INTERCEPT_FUNCTION(__aeabi_memset); \
COMMON_INTERCEPT_FUNCTION(__aeabi_memset4); \
COMMON_INTERCEPT_FUNCTION(__aeabi_memset8); \
COMMON_INTERCEPT_FUNCTION(__aeabi_memclr); \
COMMON_INTERCEPT_FUNCTION(__aeabi_memclr4); \
COMMON_INTERCEPT_FUNCTION(__aeabi_memclr8);
#else
#define INIT_AEABI_MEM
#endif // SANITIZER_INTERCEPT_AEABI_MEM
#if SANITIZER_INTERCEPT___BZERO
INTERCEPTOR(void *, __bzero, void *block, uptr size) {
void *ctx;
COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, 0, size);
}
#define INIT___BZERO COMMON_INTERCEPT_FUNCTION(__bzero);
#else
#define INIT___BZERO
#endif // SANITIZER_INTERCEPT___BZERO
#if SANITIZER_INTERCEPT_BZERO
INTERCEPTOR(void *, bzero, void *block, uptr size) {
void *ctx;
COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, 0, size);
}
#define INIT_BZERO COMMON_INTERCEPT_FUNCTION(bzero);
#else
#define INIT_BZERO
#endif // SANITIZER_INTERCEPT_BZERO
#if SANITIZER_INTERCEPT_FTIME
INTERCEPTOR(int, ftime, __sanitizer_timeb *tp) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, ftime, tp);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(ftime)(tp);
if (tp)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, tp, sizeof(*tp));
return res;
}
#define INIT_FTIME COMMON_INTERCEPT_FUNCTION(ftime);
#else
#define INIT_FTIME
#endif // SANITIZER_INTERCEPT_FTIME
#if SANITIZER_INTERCEPT_XDR
INTERCEPTOR(void, xdrmem_create, __sanitizer_XDR *xdrs, uptr addr,
unsigned size, int op) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, xdrmem_create, xdrs, addr, size, op);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
REAL(xdrmem_create)(xdrs, addr, size, op);
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, xdrs, sizeof(*xdrs));
if (op == __sanitizer_XDR_ENCODE) {
// It's not obvious how much data individual xdr_ routines write.
// Simply unpoison the entire target buffer in advance.
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, (void *)addr, size);
}
}
INTERCEPTOR(void, xdrstdio_create, __sanitizer_XDR *xdrs, void *file, int op) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, xdrstdio_create, xdrs, file, op);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
REAL(xdrstdio_create)(xdrs, file, op);
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, xdrs, sizeof(*xdrs));
}
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
#define XDR_INTERCEPTOR(F, T) \
INTERCEPTOR(int, F, __sanitizer_XDR *xdrs, T *p) { \
void *ctx; \
COMMON_INTERCEPTOR_ENTER(ctx, F, xdrs, p); \
if (p && xdrs->x_op == __sanitizer_XDR_ENCODE) \
COMMON_INTERCEPTOR_READ_RANGE(ctx, p, sizeof(*p)); \
int res = REAL(F)(xdrs, p); \
if (res && p && xdrs->x_op == __sanitizer_XDR_DECODE) \
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p, sizeof(*p)); \
return res; \
}
XDR_INTERCEPTOR(xdr_short, short)
XDR_INTERCEPTOR(xdr_u_short, unsigned short)
XDR_INTERCEPTOR(xdr_int, int)
XDR_INTERCEPTOR(xdr_u_int, unsigned)
XDR_INTERCEPTOR(xdr_long, long)
XDR_INTERCEPTOR(xdr_u_long, unsigned long)
XDR_INTERCEPTOR(xdr_hyper, long long)
XDR_INTERCEPTOR(xdr_u_hyper, unsigned long long)
XDR_INTERCEPTOR(xdr_longlong_t, long long)
XDR_INTERCEPTOR(xdr_u_longlong_t, unsigned long long)
XDR_INTERCEPTOR(xdr_int8_t, u8)
XDR_INTERCEPTOR(xdr_uint8_t, u8)
XDR_INTERCEPTOR(xdr_int16_t, u16)
XDR_INTERCEPTOR(xdr_uint16_t, u16)
XDR_INTERCEPTOR(xdr_int32_t, u32)
XDR_INTERCEPTOR(xdr_uint32_t, u32)
XDR_INTERCEPTOR(xdr_int64_t, u64)
XDR_INTERCEPTOR(xdr_uint64_t, u64)
XDR_INTERCEPTOR(xdr_quad_t, long long)
XDR_INTERCEPTOR(xdr_u_quad_t, unsigned long long)
XDR_INTERCEPTOR(xdr_bool, bool)
XDR_INTERCEPTOR(xdr_enum, int)
XDR_INTERCEPTOR(xdr_char, char)
XDR_INTERCEPTOR(xdr_u_char, unsigned char)
XDR_INTERCEPTOR(xdr_float, float)
XDR_INTERCEPTOR(xdr_double, double)
// FIXME: intercept xdr_array, opaque, union, vector, reference, pointer,
// wrapstring, sizeof
INTERCEPTOR(int, xdr_bytes, __sanitizer_XDR *xdrs, char **p, unsigned *sizep,
unsigned maxsize) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, xdr_bytes, xdrs, p, sizep, maxsize);
if (p && sizep && xdrs->x_op == __sanitizer_XDR_ENCODE) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, p, sizeof(*p));
COMMON_INTERCEPTOR_READ_RANGE(ctx, sizep, sizeof(*sizep));
COMMON_INTERCEPTOR_READ_RANGE(ctx, *p, *sizep);
}
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(xdr_bytes)(xdrs, p, sizep, maxsize);
if (p && sizep && xdrs->x_op == __sanitizer_XDR_DECODE) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p, sizeof(*p));
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, sizep, sizeof(*sizep));
if (res && *p && *sizep) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *p, *sizep);
}
return res;
}
INTERCEPTOR(int, xdr_string, __sanitizer_XDR *xdrs, char **p,
unsigned maxsize) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, xdr_string, xdrs, p, maxsize);
if (p && xdrs->x_op == __sanitizer_XDR_ENCODE) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, p, sizeof(*p));
COMMON_INTERCEPTOR_READ_RANGE(ctx, *p, internal_strlen(*p) + 1);
}
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(xdr_string)(xdrs, p, maxsize);
if (p && xdrs->x_op == __sanitizer_XDR_DECODE) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p, sizeof(*p));
if (res && *p)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *p, internal_strlen(*p) + 1);
}
return res;
}
#define INIT_XDR \
COMMON_INTERCEPT_FUNCTION(xdrmem_create); \
COMMON_INTERCEPT_FUNCTION(xdrstdio_create); \
COMMON_INTERCEPT_FUNCTION(xdr_short); \
COMMON_INTERCEPT_FUNCTION(xdr_u_short); \
COMMON_INTERCEPT_FUNCTION(xdr_int); \
COMMON_INTERCEPT_FUNCTION(xdr_u_int); \
COMMON_INTERCEPT_FUNCTION(xdr_long); \
COMMON_INTERCEPT_FUNCTION(xdr_u_long); \
COMMON_INTERCEPT_FUNCTION(xdr_hyper); \
COMMON_INTERCEPT_FUNCTION(xdr_u_hyper); \
COMMON_INTERCEPT_FUNCTION(xdr_longlong_t); \
COMMON_INTERCEPT_FUNCTION(xdr_u_longlong_t); \
COMMON_INTERCEPT_FUNCTION(xdr_int8_t); \
COMMON_INTERCEPT_FUNCTION(xdr_uint8_t); \
COMMON_INTERCEPT_FUNCTION(xdr_int16_t); \
COMMON_INTERCEPT_FUNCTION(xdr_uint16_t); \
COMMON_INTERCEPT_FUNCTION(xdr_int32_t); \
COMMON_INTERCEPT_FUNCTION(xdr_uint32_t); \
COMMON_INTERCEPT_FUNCTION(xdr_int64_t); \
COMMON_INTERCEPT_FUNCTION(xdr_uint64_t); \
COMMON_INTERCEPT_FUNCTION(xdr_quad_t); \
COMMON_INTERCEPT_FUNCTION(xdr_u_quad_t); \
COMMON_INTERCEPT_FUNCTION(xdr_bool); \
COMMON_INTERCEPT_FUNCTION(xdr_enum); \
COMMON_INTERCEPT_FUNCTION(xdr_char); \
COMMON_INTERCEPT_FUNCTION(xdr_u_char); \
COMMON_INTERCEPT_FUNCTION(xdr_float); \
COMMON_INTERCEPT_FUNCTION(xdr_double); \
COMMON_INTERCEPT_FUNCTION(xdr_bytes); \
COMMON_INTERCEPT_FUNCTION(xdr_string);
#else
#define INIT_XDR
#endif // SANITIZER_INTERCEPT_XDR
#if SANITIZER_INTERCEPT_XDRREC
typedef int (*xdrrec_cb)(char*, char*, int);
struct XdrRecWrapper {
char *handle;
xdrrec_cb rd, wr;
};
typedef AddrHashMap<XdrRecWrapper *, 11> XdrRecWrapMap;
static XdrRecWrapMap *xdrrec_wrap_map;
static int xdrrec_wr_wrap(char *handle, char *buf, int count) {
COMMON_INTERCEPTOR_UNPOISON_PARAM(3);
COMMON_INTERCEPTOR_INITIALIZE_RANGE(buf, count);
XdrRecWrapper *wrap = (XdrRecWrapper *)handle;
return wrap->wr(wrap->handle, buf, count);
}
static int xdrrec_rd_wrap(char *handle, char *buf, int count) {
COMMON_INTERCEPTOR_UNPOISON_PARAM(3);
XdrRecWrapper *wrap = (XdrRecWrapper *)handle;
return wrap->rd(wrap->handle, buf, count);
}
// This doesn't apply to the solaris version as it has a different function
// signature.
INTERCEPTOR(void, xdrrec_create, __sanitizer_XDR *xdr, unsigned sndsize,
unsigned rcvsize, char *handle, int (*rd)(char*, char*, int),
int (*wr)(char*, char*, int)) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, xdrrec_create, xdr, sndsize, rcvsize,
handle, rd, wr);
COMMON_INTERCEPTOR_READ_RANGE(ctx, &xdr->x_op, sizeof xdr->x_op);
// We can't allocate a wrapper on the stack, as the handle is used outside
// this stack frame. So we put it on the heap, and keep track of it with
// the HashMap (keyed by x_private). When we later need to xdr_destroy,
// we can index the map, free the wrapper, and then clean the map entry.
XdrRecWrapper *wrap_data =
(XdrRecWrapper *)InternalAlloc(sizeof(XdrRecWrapper));
wrap_data->handle = handle;
wrap_data->rd = rd;
wrap_data->wr = wr;
if (wr)
wr = xdrrec_wr_wrap;
if (rd)
rd = xdrrec_rd_wrap;
handle = (char *)wrap_data;
REAL(xdrrec_create)(xdr, sndsize, rcvsize, handle, rd, wr);
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, xdr, sizeof *xdr);
XdrRecWrapMap::Handle wrap(xdrrec_wrap_map, xdr->x_private, false, true);
*wrap = wrap_data;
}
// We have to intercept this to be able to free wrapper memory;
// otherwise it's not necessary.
INTERCEPTOR(void, xdr_destroy, __sanitizer_XDR *xdr) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, xdr_destroy, xdr);
XdrRecWrapMap::Handle wrap(xdrrec_wrap_map, xdr->x_private, true);
InternalFree(*wrap);
REAL(xdr_destroy)(xdr);
}
#define INIT_XDRREC_LINUX \
static u64 xdrrec_wrap_mem[sizeof(XdrRecWrapMap) / sizeof(u64) + 1]; \
xdrrec_wrap_map = new ((void *)&xdrrec_wrap_mem) XdrRecWrapMap(); \
COMMON_INTERCEPT_FUNCTION(xdrrec_create); \
COMMON_INTERCEPT_FUNCTION(xdr_destroy);
#else
#define INIT_XDRREC_LINUX
#endif
#if SANITIZER_INTERCEPT_TSEARCH
INTERCEPTOR(void *, tsearch, void *key, void **rootp,
int (*compar)(const void *, const void *)) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, tsearch, key, rootp, compar);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
void *res = REAL(tsearch)(key, rootp, compar);
if (res && *(void **)res == key)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, sizeof(void *));
return res;
}
#define INIT_TSEARCH COMMON_INTERCEPT_FUNCTION(tsearch);
#else
#define INIT_TSEARCH
#endif
#if SANITIZER_INTERCEPT_LIBIO_INTERNALS || SANITIZER_INTERCEPT_FOPEN || \
SANITIZER_INTERCEPT_OPEN_MEMSTREAM
void unpoison_file(__sanitizer_FILE *fp) {
#if SANITIZER_HAS_STRUCT_FILE
COMMON_INTERCEPTOR_INITIALIZE_RANGE(fp, sizeof(*fp));
#if SANITIZER_NETBSD
if (fp->_bf._base && fp->_bf._size > 0)
COMMON_INTERCEPTOR_INITIALIZE_RANGE(fp->_bf._base,
fp->_bf._size);
#else
if (fp->_IO_read_base && fp->_IO_read_base < fp->_IO_read_end)
COMMON_INTERCEPTOR_INITIALIZE_RANGE(fp->_IO_read_base,
fp->_IO_read_end - fp->_IO_read_base);
if (fp->_IO_write_base && fp->_IO_write_base < fp->_IO_write_end)
COMMON_INTERCEPTOR_INITIALIZE_RANGE(fp->_IO_write_base,
fp->_IO_write_end - fp->_IO_write_base);
#endif
#endif // SANITIZER_HAS_STRUCT_FILE
}
#endif
#if SANITIZER_INTERCEPT_LIBIO_INTERNALS
// These guys are called when a .c source is built with -O2.
INTERCEPTOR(int, __uflow, __sanitizer_FILE *fp) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, __uflow, fp);
int res = REAL(__uflow)(fp);
unpoison_file(fp);
return res;
}
INTERCEPTOR(int, __underflow, __sanitizer_FILE *fp) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, __underflow, fp);
int res = REAL(__underflow)(fp);
unpoison_file(fp);
return res;
}
INTERCEPTOR(int, __overflow, __sanitizer_FILE *fp, int ch) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, __overflow, fp, ch);
int res = REAL(__overflow)(fp, ch);
unpoison_file(fp);
return res;
}
INTERCEPTOR(int, __wuflow, __sanitizer_FILE *fp) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, __wuflow, fp);
int res = REAL(__wuflow)(fp);
unpoison_file(fp);
return res;
}
INTERCEPTOR(int, __wunderflow, __sanitizer_FILE *fp) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, __wunderflow, fp);
int res = REAL(__wunderflow)(fp);
unpoison_file(fp);
return res;
}
INTERCEPTOR(int, __woverflow, __sanitizer_FILE *fp, int ch) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, __woverflow, fp, ch);
int res = REAL(__woverflow)(fp, ch);
unpoison_file(fp);
return res;
}
#define INIT_LIBIO_INTERNALS \
COMMON_INTERCEPT_FUNCTION(__uflow); \
COMMON_INTERCEPT_FUNCTION(__underflow); \
COMMON_INTERCEPT_FUNCTION(__overflow); \
COMMON_INTERCEPT_FUNCTION(__wuflow); \
COMMON_INTERCEPT_FUNCTION(__wunderflow); \
COMMON_INTERCEPT_FUNCTION(__woverflow);
#else
#define INIT_LIBIO_INTERNALS
#endif
#if SANITIZER_INTERCEPT_FOPEN
INTERCEPTOR(__sanitizer_FILE *, fopen, const char *path, const char *mode) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fopen, path, mode);
if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
COMMON_INTERCEPTOR_READ_RANGE(ctx, mode, internal_strlen(mode) + 1);
__sanitizer_FILE *res = REAL(fopen)(path, mode);
COMMON_INTERCEPTOR_FILE_OPEN(ctx, res, path);
if (res) unpoison_file(res);
return res;
}
INTERCEPTOR(__sanitizer_FILE *, fdopen, int fd, const char *mode) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fdopen, fd, mode);
COMMON_INTERCEPTOR_READ_RANGE(ctx, mode, internal_strlen(mode) + 1);
__sanitizer_FILE *res = REAL(fdopen)(fd, mode);
if (res) unpoison_file(res);
return res;
}
INTERCEPTOR(__sanitizer_FILE *, freopen, const char *path, const char *mode,
__sanitizer_FILE *fp) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, freopen, path, mode, fp);
if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
COMMON_INTERCEPTOR_READ_RANGE(ctx, mode, internal_strlen(mode) + 1);
COMMON_INTERCEPTOR_FILE_CLOSE(ctx, fp);
__sanitizer_FILE *res = REAL(freopen)(path, mode, fp);
COMMON_INTERCEPTOR_FILE_OPEN(ctx, res, path);
if (res) unpoison_file(res);
return res;
}
#define INIT_FOPEN \
COMMON_INTERCEPT_FUNCTION(fopen); \
COMMON_INTERCEPT_FUNCTION(fdopen); \
COMMON_INTERCEPT_FUNCTION(freopen);
#else
#define INIT_FOPEN
#endif
#if SANITIZER_INTERCEPT_FLOPEN
INTERCEPTOR(int, flopen, const char *path, int flags, ...) {
void *ctx;
va_list ap;
va_start(ap, flags);
u16 mode = static_cast<u16>(va_arg(ap, u32));
va_end(ap);
COMMON_INTERCEPTOR_ENTER(ctx, flopen, path, flags, mode);
if (path) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
}
return REAL(flopen)(path, flags, mode);
}
INTERCEPTOR(int, flopenat, int dirfd, const char *path, int flags, ...) {
void *ctx;
va_list ap;
va_start(ap, flags);
u16 mode = static_cast<u16>(va_arg(ap, u32));
va_end(ap);
COMMON_INTERCEPTOR_ENTER(ctx, flopen, path, flags, mode);
if (path) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
}
return REAL(flopenat)(dirfd, path, flags, mode);
}
#define INIT_FLOPEN \
COMMON_INTERCEPT_FUNCTION(flopen); \
COMMON_INTERCEPT_FUNCTION(flopenat);
#else
#define INIT_FLOPEN
#endif
#if SANITIZER_INTERCEPT_FOPEN64
INTERCEPTOR(__sanitizer_FILE *, fopen64, const char *path, const char *mode) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fopen64, path, mode);
COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
COMMON_INTERCEPTOR_READ_RANGE(ctx, mode, internal_strlen(mode) + 1);
__sanitizer_FILE *res = REAL(fopen64)(path, mode);
COMMON_INTERCEPTOR_FILE_OPEN(ctx, res, path);
if (res) unpoison_file(res);
return res;
}
INTERCEPTOR(__sanitizer_FILE *, freopen64, const char *path, const char *mode,
__sanitizer_FILE *fp) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, freopen64, path, mode, fp);
if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
COMMON_INTERCEPTOR_READ_RANGE(ctx, mode, internal_strlen(mode) + 1);
COMMON_INTERCEPTOR_FILE_CLOSE(ctx, fp);
__sanitizer_FILE *res = REAL(freopen64)(path, mode, fp);
COMMON_INTERCEPTOR_FILE_OPEN(ctx, res, path);
if (res) unpoison_file(res);
return res;
}
#define INIT_FOPEN64 \
COMMON_INTERCEPT_FUNCTION(fopen64); \
COMMON_INTERCEPT_FUNCTION(freopen64);
#else
#define INIT_FOPEN64
#endif
#if SANITIZER_INTERCEPT_OPEN_MEMSTREAM
INTERCEPTOR(__sanitizer_FILE *, open_memstream, char **ptr, SIZE_T *sizeloc) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, open_memstream, ptr, sizeloc);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
__sanitizer_FILE *res = REAL(open_memstream)(ptr, sizeloc);
if (res) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, sizeof(*ptr));
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, sizeloc, sizeof(*sizeloc));
unpoison_file(res);
FileMetadata file = {ptr, sizeloc};
SetInterceptorMetadata(res, file);
}
return res;
}
INTERCEPTOR(__sanitizer_FILE *, open_wmemstream, wchar_t **ptr,
SIZE_T *sizeloc) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, open_wmemstream, ptr, sizeloc);
__sanitizer_FILE *res = REAL(open_wmemstream)(ptr, sizeloc);
if (res) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, sizeof(*ptr));
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, sizeloc, sizeof(*sizeloc));
unpoison_file(res);
FileMetadata file = {(char **)ptr, sizeloc};
SetInterceptorMetadata(res, file);
}
return res;
}
INTERCEPTOR(__sanitizer_FILE *, fmemopen, void *buf, SIZE_T size,
const char *mode) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fmemopen, buf, size, mode);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
__sanitizer_FILE *res = REAL(fmemopen)(buf, size, mode);
if (res) unpoison_file(res);
return res;
}
#define INIT_OPEN_MEMSTREAM \
COMMON_INTERCEPT_FUNCTION(open_memstream); \
COMMON_INTERCEPT_FUNCTION(open_wmemstream); \
COMMON_INTERCEPT_FUNCTION(fmemopen);
#else
#define INIT_OPEN_MEMSTREAM
#endif
#if SANITIZER_INTERCEPT_OBSTACK
static void initialize_obstack(__sanitizer_obstack *obstack) {
COMMON_INTERCEPTOR_INITIALIZE_RANGE(obstack, sizeof(*obstack));
if (obstack->chunk)
COMMON_INTERCEPTOR_INITIALIZE_RANGE(obstack->chunk,
sizeof(*obstack->chunk));
}
INTERCEPTOR(int, _obstack_begin_1, __sanitizer_obstack *obstack, int sz,
int align, void *(*alloc_fn)(uptr arg, uptr sz),
void (*free_fn)(uptr arg, void *p)) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, _obstack_begin_1, obstack, sz, align, alloc_fn,
free_fn);
int res = REAL(_obstack_begin_1)(obstack, sz, align, alloc_fn, free_fn);
if (res) initialize_obstack(obstack);
return res;
}
INTERCEPTOR(int, _obstack_begin, __sanitizer_obstack *obstack, int sz,
int align, void *(*alloc_fn)(uptr sz), void (*free_fn)(void *p)) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, _obstack_begin, obstack, sz, align, alloc_fn,
free_fn);
int res = REAL(_obstack_begin)(obstack, sz, align, alloc_fn, free_fn);
if (res) initialize_obstack(obstack);
return res;
}
INTERCEPTOR(void, _obstack_newchunk, __sanitizer_obstack *obstack, int length) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, _obstack_newchunk, obstack, length);
REAL(_obstack_newchunk)(obstack, length);
if (obstack->chunk)
COMMON_INTERCEPTOR_INITIALIZE_RANGE(
obstack->chunk, obstack->next_free - (char *)obstack->chunk);
}
#define INIT_OBSTACK \
COMMON_INTERCEPT_FUNCTION(_obstack_begin_1); \
COMMON_INTERCEPT_FUNCTION(_obstack_begin); \
COMMON_INTERCEPT_FUNCTION(_obstack_newchunk);
#else
#define INIT_OBSTACK
#endif
#if SANITIZER_INTERCEPT_FFLUSH
INTERCEPTOR(int, fflush, __sanitizer_FILE *fp) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fflush, fp);
if (fp)
unpoison_file(fp);
int res = REAL(fflush)(fp);
// FIXME: handle fp == NULL
if (fp) {
const FileMetadata *m = GetInterceptorMetadata(fp);
if (m) COMMON_INTERCEPTOR_INITIALIZE_RANGE(*m->addr, *m->size);
}
return res;
}
#define INIT_FFLUSH COMMON_INTERCEPT_FUNCTION(fflush);
#else
#define INIT_FFLUSH
#endif
#if SANITIZER_INTERCEPT_FCLOSE
INTERCEPTOR(int, fclose, __sanitizer_FILE *fp) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fclose, fp);
COMMON_INTERCEPTOR_FILE_CLOSE(ctx, fp);
const FileMetadata *m = GetInterceptorMetadata(fp);
if (fp)
unpoison_file(fp);
int res = REAL(fclose)(fp);
if (m) {
COMMON_INTERCEPTOR_INITIALIZE_RANGE(*m->addr, *m->size);
DeleteInterceptorMetadata(fp);
}
return res;
}
#define INIT_FCLOSE COMMON_INTERCEPT_FUNCTION(fclose);
#else
#define INIT_FCLOSE
#endif
#if SANITIZER_INTERCEPT_DLOPEN_DLCLOSE
INTERCEPTOR(void*, dlopen, const char *filename, int flag) {
void *ctx;
COMMON_INTERCEPTOR_ENTER_NOIGNORE(ctx, dlopen, filename, flag);
if (filename) COMMON_INTERCEPTOR_READ_STRING(ctx, filename, 0);
void *res = COMMON_INTERCEPTOR_DLOPEN(filename, flag);
Symbolizer::GetOrInit()->InvalidateModuleList();
COMMON_INTERCEPTOR_LIBRARY_LOADED(filename, res);
return res;
}
INTERCEPTOR(int, dlclose, void *handle) {
void *ctx;
COMMON_INTERCEPTOR_ENTER_NOIGNORE(ctx, dlclose, handle);
int res = REAL(dlclose)(handle);
Symbolizer::GetOrInit()->InvalidateModuleList();
COMMON_INTERCEPTOR_LIBRARY_UNLOADED();
return res;
}
#define INIT_DLOPEN_DLCLOSE \
COMMON_INTERCEPT_FUNCTION(dlopen); \
COMMON_INTERCEPT_FUNCTION(dlclose);
#else
#define INIT_DLOPEN_DLCLOSE
#endif
#if SANITIZER_INTERCEPT_GETPASS
INTERCEPTOR(char *, getpass, const char *prompt) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getpass, prompt);
if (prompt)
COMMON_INTERCEPTOR_READ_RANGE(ctx, prompt, internal_strlen(prompt)+1);
char *res = REAL(getpass)(prompt);
if (res) COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res)+1);
return res;
}
#define INIT_GETPASS COMMON_INTERCEPT_FUNCTION(getpass);
#else
#define INIT_GETPASS
#endif
#if SANITIZER_INTERCEPT_TIMERFD
INTERCEPTOR(int, timerfd_settime, int fd, int flags, void *new_value,
void *old_value) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, timerfd_settime, fd, flags, new_value,
old_value);
COMMON_INTERCEPTOR_READ_RANGE(ctx, new_value, struct_itimerspec_sz);
int res = REAL(timerfd_settime)(fd, flags, new_value, old_value);
if (res != -1 && old_value)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, old_value, struct_itimerspec_sz);
return res;
}
INTERCEPTOR(int, timerfd_gettime, int fd, void *curr_value) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, timerfd_gettime, fd, curr_value);
int res = REAL(timerfd_gettime)(fd, curr_value);
if (res != -1 && curr_value)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, curr_value, struct_itimerspec_sz);
return res;
}
#define INIT_TIMERFD \
COMMON_INTERCEPT_FUNCTION(timerfd_settime); \
COMMON_INTERCEPT_FUNCTION(timerfd_gettime);
#else
#define INIT_TIMERFD
#endif
#if SANITIZER_INTERCEPT_MLOCKX
// Linux kernel has a bug that leads to kernel deadlock if a process
// maps TBs of memory and then calls mlock().
static void MlockIsUnsupported() {
static atomic_uint8_t printed;
if (atomic_exchange(&printed, 1, memory_order_relaxed))
return;
VPrintf(1, "%s ignores mlock/mlockall/munlock/munlockall\n",
SanitizerToolName);
}
INTERCEPTOR(int, mlock, const void *addr, uptr len) {
MlockIsUnsupported();
return 0;
}
INTERCEPTOR(int, munlock, const void *addr, uptr len) {
MlockIsUnsupported();
return 0;
}
INTERCEPTOR(int, mlockall, int flags) {
MlockIsUnsupported();
return 0;
}
INTERCEPTOR(int, munlockall, void) {
MlockIsUnsupported();
return 0;
}
#define INIT_MLOCKX \
COMMON_INTERCEPT_FUNCTION(mlock); \
COMMON_INTERCEPT_FUNCTION(munlock); \
COMMON_INTERCEPT_FUNCTION(mlockall); \
COMMON_INTERCEPT_FUNCTION(munlockall);
#else
#define INIT_MLOCKX
#endif // SANITIZER_INTERCEPT_MLOCKX
#if SANITIZER_INTERCEPT_FOPENCOOKIE
struct WrappedCookie {
void *real_cookie;
__sanitizer_cookie_io_functions_t real_io_funcs;
};
static uptr wrapped_read(void *cookie, char *buf, uptr size) {
COMMON_INTERCEPTOR_UNPOISON_PARAM(3);
WrappedCookie *wrapped_cookie = (WrappedCookie *)cookie;
__sanitizer_cookie_io_read real_read = wrapped_cookie->real_io_funcs.read;
return real_read ? real_read(wrapped_cookie->real_cookie, buf, size) : 0;
}
static uptr wrapped_write(void *cookie, const char *buf, uptr size) {
COMMON_INTERCEPTOR_UNPOISON_PARAM(3);
WrappedCookie *wrapped_cookie = (WrappedCookie *)cookie;
__sanitizer_cookie_io_write real_write = wrapped_cookie->real_io_funcs.write;
return real_write ? real_write(wrapped_cookie->real_cookie, buf, size) : size;
}
static int wrapped_seek(void *cookie, u64 *offset, int whence) {
COMMON_INTERCEPTOR_UNPOISON_PARAM(3);
COMMON_INTERCEPTOR_INITIALIZE_RANGE(offset, sizeof(*offset));
WrappedCookie *wrapped_cookie = (WrappedCookie *)cookie;
__sanitizer_cookie_io_seek real_seek = wrapped_cookie->real_io_funcs.seek;
return real_seek ? real_seek(wrapped_cookie->real_cookie, offset, whence)
: -1;
}
static int wrapped_close(void *cookie) {
COMMON_INTERCEPTOR_UNPOISON_PARAM(1);
WrappedCookie *wrapped_cookie = (WrappedCookie *)cookie;
__sanitizer_cookie_io_close real_close = wrapped_cookie->real_io_funcs.close;
int res = real_close ? real_close(wrapped_cookie->real_cookie) : 0;
InternalFree(wrapped_cookie);
return res;
}
INTERCEPTOR(__sanitizer_FILE *, fopencookie, void *cookie, const char *mode,
__sanitizer_cookie_io_functions_t io_funcs) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fopencookie, cookie, mode, io_funcs);
WrappedCookie *wrapped_cookie =
(WrappedCookie *)InternalAlloc(sizeof(WrappedCookie));
wrapped_cookie->real_cookie = cookie;
wrapped_cookie->real_io_funcs = io_funcs;
__sanitizer_FILE *res =
REAL(fopencookie)(wrapped_cookie, mode, {wrapped_read, wrapped_write,
wrapped_seek, wrapped_close});
return res;
}
#define INIT_FOPENCOOKIE COMMON_INTERCEPT_FUNCTION(fopencookie);
#else
#define INIT_FOPENCOOKIE
#endif // SANITIZER_INTERCEPT_FOPENCOOKIE
#if SANITIZER_INTERCEPT_SEM
INTERCEPTOR(int, sem_init, __sanitizer_sem_t *s, int pshared, unsigned value) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, sem_init, s, pshared, value);
// Workaround a bug in glibc's "old" semaphore implementation by
// zero-initializing the sem_t contents. This has to be done here because
- // interceptors bind to the lowest symbols version by default, hitting the
+ // interceptors bind to the lowest version before glibc 2.36, hitting the
// buggy code path while the non-sanitized build of the same code works fine.
REAL(memset)(s, 0, sizeof(*s));
int res = REAL(sem_init)(s, pshared, value);
return res;
}
INTERCEPTOR(int, sem_destroy, __sanitizer_sem_t *s) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, sem_destroy, s);
int res = REAL(sem_destroy)(s);
return res;
}
INTERCEPTOR(int, sem_wait, __sanitizer_sem_t *s) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, sem_wait, s);
int res = COMMON_INTERCEPTOR_BLOCK_REAL(sem_wait)(s);
if (res == 0) {
COMMON_INTERCEPTOR_ACQUIRE(ctx, (uptr)s);
}
return res;
}
INTERCEPTOR(int, sem_trywait, __sanitizer_sem_t *s) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, sem_trywait, s);
int res = REAL(sem_trywait)(s);
if (res == 0) {
COMMON_INTERCEPTOR_ACQUIRE(ctx, (uptr)s);
}
return res;
}
INTERCEPTOR(int, sem_timedwait, __sanitizer_sem_t *s, void *abstime) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, sem_timedwait, s, abstime);
COMMON_INTERCEPTOR_READ_RANGE(ctx, abstime, struct_timespec_sz);
int res = COMMON_INTERCEPTOR_BLOCK_REAL(sem_timedwait)(s, abstime);
if (res == 0) {
COMMON_INTERCEPTOR_ACQUIRE(ctx, (uptr)s);
}
return res;
}
INTERCEPTOR(int, sem_post, __sanitizer_sem_t *s) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, sem_post, s);
COMMON_INTERCEPTOR_RELEASE(ctx, (uptr)s);
int res = REAL(sem_post)(s);
return res;
}
INTERCEPTOR(int, sem_getvalue, __sanitizer_sem_t *s, int *sval) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, sem_getvalue, s, sval);
int res = REAL(sem_getvalue)(s, sval);
if (res == 0) {
COMMON_INTERCEPTOR_ACQUIRE(ctx, (uptr)s);
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, sval, sizeof(*sval));
}
return res;
}
INTERCEPTOR(__sanitizer_sem_t *, sem_open, const char *name, int oflag, ...) {
void *ctx;
va_list ap;
va_start(ap, oflag);
u32 mode = va_arg(ap, u32);
u32 value = va_arg(ap, u32);
COMMON_INTERCEPTOR_ENTER(ctx, sem_open, name, oflag, mode, value);
COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
__sanitizer_sem_t *s = REAL(sem_open)(name, oflag, mode, value);
if (s)
COMMON_INTERCEPTOR_INITIALIZE_RANGE(s, sizeof(*s));
va_end(ap);
return s;
}
INTERCEPTOR(int, sem_unlink, const char *name) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, sem_unlink, name);
COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
return REAL(sem_unlink)(name);
}
# define INIT_SEM \
COMMON_INTERCEPT_FUNCTION(sem_init); \
COMMON_INTERCEPT_FUNCTION(sem_destroy); \
COMMON_INTERCEPT_FUNCTION(sem_wait); \
COMMON_INTERCEPT_FUNCTION(sem_trywait); \
COMMON_INTERCEPT_FUNCTION(sem_timedwait); \
COMMON_INTERCEPT_FUNCTION(sem_post); \
COMMON_INTERCEPT_FUNCTION(sem_getvalue); \
COMMON_INTERCEPT_FUNCTION(sem_open); \
COMMON_INTERCEPT_FUNCTION(sem_unlink);
#else
# define INIT_SEM
#endif // SANITIZER_INTERCEPT_SEM
#if SANITIZER_INTERCEPT_PTHREAD_SETCANCEL
INTERCEPTOR(int, pthread_setcancelstate, int state, int *oldstate) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, pthread_setcancelstate, state, oldstate);
int res = REAL(pthread_setcancelstate)(state, oldstate);
if (res == 0 && oldstate != nullptr)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, oldstate, sizeof(*oldstate));
return res;
}
INTERCEPTOR(int, pthread_setcanceltype, int type, int *oldtype) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, pthread_setcanceltype, type, oldtype);
int res = REAL(pthread_setcanceltype)(type, oldtype);
if (res == 0 && oldtype != nullptr)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, oldtype, sizeof(*oldtype));
return res;
}
#define INIT_PTHREAD_SETCANCEL \
COMMON_INTERCEPT_FUNCTION(pthread_setcancelstate); \
COMMON_INTERCEPT_FUNCTION(pthread_setcanceltype);
#else
#define INIT_PTHREAD_SETCANCEL
#endif
#if SANITIZER_INTERCEPT_MINCORE
INTERCEPTOR(int, mincore, void *addr, uptr length, unsigned char *vec) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, mincore, addr, length, vec);
int res = REAL(mincore)(addr, length, vec);
if (res == 0) {
uptr page_size = GetPageSizeCached();
uptr vec_size = ((length + page_size - 1) & (~(page_size - 1))) / page_size;
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, vec, vec_size);
}
return res;
}
#define INIT_MINCORE COMMON_INTERCEPT_FUNCTION(mincore);
#else
#define INIT_MINCORE
#endif
#if SANITIZER_INTERCEPT_PROCESS_VM_READV
INTERCEPTOR(SSIZE_T, process_vm_readv, int pid, __sanitizer_iovec *local_iov,
uptr liovcnt, __sanitizer_iovec *remote_iov, uptr riovcnt,
uptr flags) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, process_vm_readv, pid, local_iov, liovcnt,
remote_iov, riovcnt, flags);
SSIZE_T res = REAL(process_vm_readv)(pid, local_iov, liovcnt, remote_iov,
riovcnt, flags);
if (res > 0)
write_iovec(ctx, local_iov, liovcnt, res);
return res;
}
INTERCEPTOR(SSIZE_T, process_vm_writev, int pid, __sanitizer_iovec *local_iov,
uptr liovcnt, __sanitizer_iovec *remote_iov, uptr riovcnt,
uptr flags) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, process_vm_writev, pid, local_iov, liovcnt,
remote_iov, riovcnt, flags);
SSIZE_T res = REAL(process_vm_writev)(pid, local_iov, liovcnt, remote_iov,
riovcnt, flags);
if (res > 0)
read_iovec(ctx, local_iov, liovcnt, res);
return res;
}
#define INIT_PROCESS_VM_READV \
COMMON_INTERCEPT_FUNCTION(process_vm_readv); \
COMMON_INTERCEPT_FUNCTION(process_vm_writev);
#else
#define INIT_PROCESS_VM_READV
#endif
#if SANITIZER_INTERCEPT_CTERMID
INTERCEPTOR(char *, ctermid, char *s) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, ctermid, s);
char *res = REAL(ctermid)(s);
if (res) {
COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1);
}
return res;
}
#define INIT_CTERMID COMMON_INTERCEPT_FUNCTION(ctermid);
#else
#define INIT_CTERMID
#endif
#if SANITIZER_INTERCEPT_CTERMID_R
INTERCEPTOR(char *, ctermid_r, char *s) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, ctermid_r, s);
char *res = REAL(ctermid_r)(s);
if (res) {
COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1);
}
return res;
}
#define INIT_CTERMID_R COMMON_INTERCEPT_FUNCTION(ctermid_r);
#else
#define INIT_CTERMID_R
#endif
#if SANITIZER_INTERCEPT_RECV_RECVFROM
INTERCEPTOR(SSIZE_T, recv, int fd, void *buf, SIZE_T len, int flags) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, recv, fd, buf, len, flags);
COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
SSIZE_T res = REAL(recv)(fd, buf, len, flags);
if (res > 0) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, Min((SIZE_T)res, len));
}
if (res >= 0 && fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
return res;
}
INTERCEPTOR(SSIZE_T, recvfrom, int fd, void *buf, SIZE_T len, int flags,
void *srcaddr, int *addrlen) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, recvfrom, fd, buf, len, flags, srcaddr,
addrlen);
COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
SIZE_T srcaddr_sz;
if (srcaddr) srcaddr_sz = *addrlen;
(void)srcaddr_sz; // prevent "set but not used" warning
SSIZE_T res = REAL(recvfrom)(fd, buf, len, flags, srcaddr, addrlen);
if (res > 0)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, Min((SIZE_T)res, len));
if (res >= 0 && srcaddr)
COMMON_INTERCEPTOR_INITIALIZE_RANGE(srcaddr,
Min((SIZE_T)*addrlen, srcaddr_sz));
return res;
}
#define INIT_RECV_RECVFROM \
COMMON_INTERCEPT_FUNCTION(recv); \
COMMON_INTERCEPT_FUNCTION(recvfrom);
#else
#define INIT_RECV_RECVFROM
#endif
#if SANITIZER_INTERCEPT_SEND_SENDTO
INTERCEPTOR(SSIZE_T, send, int fd, void *buf, SIZE_T len, int flags) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, send, fd, buf, len, flags);
if (fd >= 0) {
COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
}
SSIZE_T res = REAL(send)(fd, buf, len, flags);
if (common_flags()->intercept_send && res > 0)
COMMON_INTERCEPTOR_READ_RANGE(ctx, buf, Min((SIZE_T)res, len));
return res;
}
INTERCEPTOR(SSIZE_T, sendto, int fd, void *buf, SIZE_T len, int flags,
void *dstaddr, int addrlen) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, sendto, fd, buf, len, flags, dstaddr, addrlen);
if (fd >= 0) {
COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
}
// Can't check dstaddr as it may have uninitialized padding at the end.
SSIZE_T res = REAL(sendto)(fd, buf, len, flags, dstaddr, addrlen);
if (common_flags()->intercept_send && res > 0)
COMMON_INTERCEPTOR_READ_RANGE(ctx, buf, Min((SIZE_T)res, len));
return res;
}
#define INIT_SEND_SENDTO \
COMMON_INTERCEPT_FUNCTION(send); \
COMMON_INTERCEPT_FUNCTION(sendto);
#else
#define INIT_SEND_SENDTO
#endif
#if SANITIZER_INTERCEPT_EVENTFD_READ_WRITE
INTERCEPTOR(int, eventfd_read, int fd, u64 *value) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, eventfd_read, fd, value);
COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
int res = REAL(eventfd_read)(fd, value);
if (res == 0) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, value, sizeof(*value));
if (fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
}
return res;
}
INTERCEPTOR(int, eventfd_write, int fd, u64 value) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, eventfd_write, fd, value);
if (fd >= 0) {
COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
}
int res = REAL(eventfd_write)(fd, value);
return res;
}
#define INIT_EVENTFD_READ_WRITE \
COMMON_INTERCEPT_FUNCTION(eventfd_read); \
COMMON_INTERCEPT_FUNCTION(eventfd_write)
#else
#define INIT_EVENTFD_READ_WRITE
#endif
#if SANITIZER_INTERCEPT_STAT
INTERCEPTOR(int, stat, const char *path, void *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, stat, path, buf);
if (common_flags()->intercept_stat)
COMMON_INTERCEPTOR_READ_STRING(ctx, path, 0);
int res = REAL(stat)(path, buf);
if (!res)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, __sanitizer::struct_stat_sz);
return res;
}
#define INIT_STAT COMMON_INTERCEPT_FUNCTION(stat)
#else
#define INIT_STAT
#endif
#if SANITIZER_INTERCEPT_STAT64
INTERCEPTOR(int, stat64, const char *path, void *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, stat64, path, buf);
if (common_flags()->intercept_stat)
COMMON_INTERCEPTOR_READ_STRING(ctx, path, 0);
int res = REAL(stat64)(path, buf);
if (!res)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, __sanitizer::struct_stat64_sz);
return res;
}
#define INIT_STAT64 COMMON_INTERCEPT_FUNCTION(stat64)
#else
#define INIT_STAT64
#endif
#if SANITIZER_INTERCEPT_LSTAT
INTERCEPTOR(int, lstat, const char *path, void *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, lstat, path, buf);
if (common_flags()->intercept_stat)
COMMON_INTERCEPTOR_READ_STRING(ctx, path, 0);
int res = REAL(lstat)(path, buf);
if (!res)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, __sanitizer::struct_stat_sz);
return res;
}
#define INIT_LSTAT COMMON_INTERCEPT_FUNCTION(lstat)
#else
#define INIT_LSTAT
#endif
#if SANITIZER_INTERCEPT_STAT64
INTERCEPTOR(int, lstat64, const char *path, void *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, lstat64, path, buf);
if (common_flags()->intercept_stat)
COMMON_INTERCEPTOR_READ_STRING(ctx, path, 0);
int res = REAL(lstat64)(path, buf);
if (!res)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, __sanitizer::struct_stat64_sz);
return res;
}
#define INIT_LSTAT64 COMMON_INTERCEPT_FUNCTION(lstat64)
#else
#define INIT_LSTAT64
#endif
#if SANITIZER_INTERCEPT___XSTAT
INTERCEPTOR(int, __xstat, int version, const char *path, void *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, __xstat, version, path, buf);
if (common_flags()->intercept_stat)
COMMON_INTERCEPTOR_READ_STRING(ctx, path, 0);
int res = REAL(__xstat)(version, path, buf);
if (!res)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, __sanitizer::struct_stat_sz);
return res;
}
#define INIT___XSTAT COMMON_INTERCEPT_FUNCTION(__xstat)
#else
#define INIT___XSTAT
#endif
#if SANITIZER_INTERCEPT___XSTAT64
INTERCEPTOR(int, __xstat64, int version, const char *path, void *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, __xstat64, version, path, buf);
if (common_flags()->intercept_stat)
COMMON_INTERCEPTOR_READ_STRING(ctx, path, 0);
int res = REAL(__xstat64)(version, path, buf);
if (!res)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, __sanitizer::struct_stat64_sz);
return res;
}
#define INIT___XSTAT64 COMMON_INTERCEPT_FUNCTION(__xstat64)
#else
#define INIT___XSTAT64
#endif
#if SANITIZER_INTERCEPT___LXSTAT
INTERCEPTOR(int, __lxstat, int version, const char *path, void *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, __lxstat, version, path, buf);
if (common_flags()->intercept_stat)
COMMON_INTERCEPTOR_READ_STRING(ctx, path, 0);
int res = REAL(__lxstat)(version, path, buf);
if (!res)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, __sanitizer::struct_stat_sz);
return res;
}
#define INIT___LXSTAT COMMON_INTERCEPT_FUNCTION(__lxstat)
#else
#define INIT___LXSTAT
#endif
#if SANITIZER_INTERCEPT___LXSTAT64
INTERCEPTOR(int, __lxstat64, int version, const char *path, void *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, __lxstat64, version, path, buf);
if (common_flags()->intercept_stat)
COMMON_INTERCEPTOR_READ_STRING(ctx, path, 0);
int res = REAL(__lxstat64)(version, path, buf);
if (!res)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, __sanitizer::struct_stat64_sz);
return res;
}
#define INIT___LXSTAT64 COMMON_INTERCEPT_FUNCTION(__lxstat64)
#else
#define INIT___LXSTAT64
#endif
// FIXME: add other *stat interceptor
#if SANITIZER_INTERCEPT_UTMP
INTERCEPTOR(void *, getutent, int dummy) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getutent, dummy);
void *res = REAL(getutent)(dummy);
if (res)
COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, __sanitizer::struct_utmp_sz);
return res;
}
INTERCEPTOR(void *, getutid, void *ut) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getutid, ut);
void *res = REAL(getutid)(ut);
if (res)
COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, __sanitizer::struct_utmp_sz);
return res;
}
INTERCEPTOR(void *, getutline, void *ut) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getutline, ut);
void *res = REAL(getutline)(ut);
if (res)
COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, __sanitizer::struct_utmp_sz);
return res;
}
#define INIT_UTMP \
COMMON_INTERCEPT_FUNCTION(getutent); \
COMMON_INTERCEPT_FUNCTION(getutid); \
COMMON_INTERCEPT_FUNCTION(getutline);
#else
#define INIT_UTMP
#endif
#if SANITIZER_INTERCEPT_UTMPX
INTERCEPTOR(void *, getutxent, int dummy) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getutxent, dummy);
void *res = REAL(getutxent)(dummy);
if (res)
COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, __sanitizer::struct_utmpx_sz);
return res;
}
INTERCEPTOR(void *, getutxid, void *ut) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getutxid, ut);
void *res = REAL(getutxid)(ut);
if (res)
COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, __sanitizer::struct_utmpx_sz);
return res;
}
INTERCEPTOR(void *, getutxline, void *ut) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getutxline, ut);
void *res = REAL(getutxline)(ut);
if (res)
COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, __sanitizer::struct_utmpx_sz);
return res;
}
INTERCEPTOR(void *, pututxline, const void *ut) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, pututxline, ut);
if (ut)
COMMON_INTERCEPTOR_READ_RANGE(ctx, ut, __sanitizer::struct_utmpx_sz);
void *res = REAL(pututxline)(ut);
if (res)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, __sanitizer::struct_utmpx_sz);
return res;
}
#define INIT_UTMPX \
COMMON_INTERCEPT_FUNCTION(getutxent); \
COMMON_INTERCEPT_FUNCTION(getutxid); \
COMMON_INTERCEPT_FUNCTION(getutxline); \
COMMON_INTERCEPT_FUNCTION(pututxline);
#else
#define INIT_UTMPX
#endif
#if SANITIZER_INTERCEPT_GETLOADAVG
INTERCEPTOR(int, getloadavg, double *loadavg, int nelem) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getloadavg, loadavg, nelem);
int res = REAL(getloadavg)(loadavg, nelem);
if (res > 0)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, loadavg, res * sizeof(*loadavg));
return res;
}
#define INIT_GETLOADAVG \
COMMON_INTERCEPT_FUNCTION(getloadavg);
#else
#define INIT_GETLOADAVG
#endif
#if SANITIZER_INTERCEPT_MCHECK_MPROBE
INTERCEPTOR(int, mcheck, void (*abortfunc)(int mstatus)) {
return 0;
}
INTERCEPTOR(int, mcheck_pedantic, void (*abortfunc)(int mstatus)) {
return 0;
}
INTERCEPTOR(int, mprobe, void *ptr) {
return 0;
}
#endif
INTERCEPTOR(SIZE_T, wcslen, const wchar_t *s) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, wcslen, s);
SIZE_T res = REAL(wcslen)(s);
COMMON_INTERCEPTOR_READ_RANGE(ctx, s, sizeof(wchar_t) * (res + 1));
return res;
}
INTERCEPTOR(SIZE_T, wcsnlen, const wchar_t *s, SIZE_T n) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, wcsnlen, s, n);
SIZE_T res = REAL(wcsnlen)(s, n);
COMMON_INTERCEPTOR_READ_RANGE(ctx, s, sizeof(wchar_t) * Min(res + 1, n));
return res;
}
#define INIT_WCSLEN \
COMMON_INTERCEPT_FUNCTION(wcslen); \
COMMON_INTERCEPT_FUNCTION(wcsnlen);
#if SANITIZER_INTERCEPT_WCSCAT
INTERCEPTOR(wchar_t *, wcscat, wchar_t *dst, const wchar_t *src) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, wcscat, dst, src);
SIZE_T src_size = internal_wcslen(src);
SIZE_T dst_size = internal_wcslen(dst);
COMMON_INTERCEPTOR_READ_RANGE(ctx, src, (src_size + 1) * sizeof(wchar_t));
COMMON_INTERCEPTOR_READ_RANGE(ctx, dst, (dst_size + 1) * sizeof(wchar_t));
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst + dst_size,
(src_size + 1) * sizeof(wchar_t));
return REAL(wcscat)(dst, src);
}
INTERCEPTOR(wchar_t *, wcsncat, wchar_t *dst, const wchar_t *src, SIZE_T n) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, wcsncat, dst, src, n);
SIZE_T src_size = internal_wcsnlen(src, n);
SIZE_T dst_size = internal_wcslen(dst);
COMMON_INTERCEPTOR_READ_RANGE(ctx, src,
Min(src_size + 1, n) * sizeof(wchar_t));
COMMON_INTERCEPTOR_READ_RANGE(ctx, dst, (dst_size + 1) * sizeof(wchar_t));
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst + dst_size,
(src_size + 1) * sizeof(wchar_t));
return REAL(wcsncat)(dst, src, n);
}
#define INIT_WCSCAT \
COMMON_INTERCEPT_FUNCTION(wcscat); \
COMMON_INTERCEPT_FUNCTION(wcsncat);
#else
#define INIT_WCSCAT
#endif
#if SANITIZER_INTERCEPT_WCSDUP
INTERCEPTOR(wchar_t *, wcsdup, wchar_t *s) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, wcsdup, s);
SIZE_T len = internal_wcslen(s);
COMMON_INTERCEPTOR_READ_RANGE(ctx, s, sizeof(wchar_t) * (len + 1));
wchar_t *result = REAL(wcsdup)(s);
if (result)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(wchar_t) * (len + 1));
return result;
}
#define INIT_WCSDUP COMMON_INTERCEPT_FUNCTION(wcsdup);
#else
#define INIT_WCSDUP
#endif
#if SANITIZER_INTERCEPT_STRXFRM
static SIZE_T RealStrLen(const char *str) { return internal_strlen(str); }
static SIZE_T RealStrLen(const wchar_t *str) { return internal_wcslen(str); }
#define STRXFRM_INTERCEPTOR_IMPL(strxfrm, dest, src, len, ...) \
{ \
void *ctx; \
COMMON_INTERCEPTOR_ENTER(ctx, strxfrm, dest, src, len, ##__VA_ARGS__); \
COMMON_INTERCEPTOR_READ_RANGE(ctx, src, \
sizeof(*src) * (RealStrLen(src) + 1)); \
SIZE_T res = REAL(strxfrm)(dest, src, len, ##__VA_ARGS__); \
if (res < len) \
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dest, sizeof(*src) * (res + 1)); \
return res; \
}
INTERCEPTOR(SIZE_T, strxfrm, char *dest, const char *src, SIZE_T len) {
STRXFRM_INTERCEPTOR_IMPL(strxfrm, dest, src, len);
}
INTERCEPTOR(SIZE_T, strxfrm_l, char *dest, const char *src, SIZE_T len,
void *locale) {
STRXFRM_INTERCEPTOR_IMPL(strxfrm_l, dest, src, len, locale);
}
#define INIT_STRXFRM \
COMMON_INTERCEPT_FUNCTION(strxfrm); \
COMMON_INTERCEPT_FUNCTION(strxfrm_l);
#else
#define INIT_STRXFRM
#endif
#if SANITIZER_INTERCEPT___STRXFRM_L
INTERCEPTOR(SIZE_T, __strxfrm_l, char *dest, const char *src, SIZE_T len,
void *locale) {
STRXFRM_INTERCEPTOR_IMPL(__strxfrm_l, dest, src, len, locale);
}
#define INIT___STRXFRM_L COMMON_INTERCEPT_FUNCTION(__strxfrm_l);
#else
#define INIT___STRXFRM_L
#endif
#if SANITIZER_INTERCEPT_WCSXFRM
INTERCEPTOR(SIZE_T, wcsxfrm, wchar_t *dest, const wchar_t *src, SIZE_T len) {
STRXFRM_INTERCEPTOR_IMPL(wcsxfrm, dest, src, len);
}
INTERCEPTOR(SIZE_T, wcsxfrm_l, wchar_t *dest, const wchar_t *src, SIZE_T len,
void *locale) {
STRXFRM_INTERCEPTOR_IMPL(wcsxfrm_l, dest, src, len, locale);
}
#define INIT_WCSXFRM \
COMMON_INTERCEPT_FUNCTION(wcsxfrm); \
COMMON_INTERCEPT_FUNCTION(wcsxfrm_l);
#else
#define INIT_WCSXFRM
#endif
#if SANITIZER_INTERCEPT___WCSXFRM_L
INTERCEPTOR(SIZE_T, __wcsxfrm_l, wchar_t *dest, const wchar_t *src, SIZE_T len,
void *locale) {
STRXFRM_INTERCEPTOR_IMPL(__wcsxfrm_l, dest, src, len, locale);
}
#define INIT___WCSXFRM_L COMMON_INTERCEPT_FUNCTION(__wcsxfrm_l);
#else
#define INIT___WCSXFRM_L
#endif
#if SANITIZER_INTERCEPT_ACCT
INTERCEPTOR(int, acct, const char *file) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, acct, file);
if (file)
COMMON_INTERCEPTOR_READ_RANGE(ctx, file, internal_strlen(file) + 1);
return REAL(acct)(file);
}
#define INIT_ACCT COMMON_INTERCEPT_FUNCTION(acct)
#else
#define INIT_ACCT
#endif
#if SANITIZER_INTERCEPT_USER_FROM_UID
INTERCEPTOR(const char *, user_from_uid, u32 uid, int nouser) {
void *ctx;
const char *user;
COMMON_INTERCEPTOR_ENTER(ctx, user_from_uid, uid, nouser);
user = REAL(user_from_uid)(uid, nouser);
if (user)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, user, internal_strlen(user) + 1);
return user;
}
#define INIT_USER_FROM_UID COMMON_INTERCEPT_FUNCTION(user_from_uid)
#else
#define INIT_USER_FROM_UID
#endif
#if SANITIZER_INTERCEPT_UID_FROM_USER
INTERCEPTOR(int, uid_from_user, const char *name, u32 *uid) {
void *ctx;
int res;
COMMON_INTERCEPTOR_ENTER(ctx, uid_from_user, name, uid);
if (name)
COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
res = REAL(uid_from_user)(name, uid);
if (uid)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, uid, sizeof(*uid));
return res;
}
#define INIT_UID_FROM_USER COMMON_INTERCEPT_FUNCTION(uid_from_user)
#else
#define INIT_UID_FROM_USER
#endif
#if SANITIZER_INTERCEPT_GROUP_FROM_GID
INTERCEPTOR(const char *, group_from_gid, u32 gid, int nogroup) {
void *ctx;
const char *group;
COMMON_INTERCEPTOR_ENTER(ctx, group_from_gid, gid, nogroup);
group = REAL(group_from_gid)(gid, nogroup);
if (group)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, group, internal_strlen(group) + 1);
return group;
}
#define INIT_GROUP_FROM_GID COMMON_INTERCEPT_FUNCTION(group_from_gid)
#else
#define INIT_GROUP_FROM_GID
#endif
#if SANITIZER_INTERCEPT_GID_FROM_GROUP
INTERCEPTOR(int, gid_from_group, const char *group, u32 *gid) {
void *ctx;
int res;
COMMON_INTERCEPTOR_ENTER(ctx, gid_from_group, group, gid);
if (group)
COMMON_INTERCEPTOR_READ_RANGE(ctx, group, internal_strlen(group) + 1);
res = REAL(gid_from_group)(group, gid);
if (gid)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, gid, sizeof(*gid));
return res;
}
#define INIT_GID_FROM_GROUP COMMON_INTERCEPT_FUNCTION(gid_from_group)
#else
#define INIT_GID_FROM_GROUP
#endif
#if SANITIZER_INTERCEPT_ACCESS
INTERCEPTOR(int, access, const char *path, int mode) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, access, path, mode);
if (path)
COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
return REAL(access)(path, mode);
}
#define INIT_ACCESS COMMON_INTERCEPT_FUNCTION(access)
#else
#define INIT_ACCESS
#endif
#if SANITIZER_INTERCEPT_FACCESSAT
INTERCEPTOR(int, faccessat, int fd, const char *path, int mode, int flags) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, faccessat, fd, path, mode, flags);
if (path)
COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
return REAL(faccessat)(fd, path, mode, flags);
}
#define INIT_FACCESSAT COMMON_INTERCEPT_FUNCTION(faccessat)
#else
#define INIT_FACCESSAT
#endif
#if SANITIZER_INTERCEPT_GETGROUPLIST
INTERCEPTOR(int, getgrouplist, const char *name, u32 basegid, u32 *groups,
int *ngroups) {
void *ctx;
int res;
COMMON_INTERCEPTOR_ENTER(ctx, getgrouplist, name, basegid, groups, ngroups);
if (name)
COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
if (ngroups)
COMMON_INTERCEPTOR_READ_RANGE(ctx, ngroups, sizeof(*ngroups));
res = REAL(getgrouplist)(name, basegid, groups, ngroups);
if (!res && groups && ngroups) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, groups, sizeof(*groups) * (*ngroups));
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ngroups, sizeof(*ngroups));
}
return res;
}
#define INIT_GETGROUPLIST COMMON_INTERCEPT_FUNCTION(getgrouplist);
#else
#define INIT_GETGROUPLIST
#endif
#if SANITIZER_INTERCEPT_GETGROUPMEMBERSHIP
INTERCEPTOR(int, getgroupmembership, const char *name, u32 basegid, u32 *groups,
int maxgrp, int *ngroups) {
void *ctx;
int res;
COMMON_INTERCEPTOR_ENTER(ctx, getgroupmembership, name, basegid, groups,
maxgrp, ngroups);
if (name)
COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
res = REAL(getgroupmembership)(name, basegid, groups, maxgrp, ngroups);
if (!res && groups && ngroups) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, groups, sizeof(*groups) * (*ngroups));
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ngroups, sizeof(*ngroups));
}
return res;
}
#define INIT_GETGROUPMEMBERSHIP COMMON_INTERCEPT_FUNCTION(getgroupmembership);
#else
#define INIT_GETGROUPMEMBERSHIP
#endif
#if SANITIZER_INTERCEPT_READLINK
INTERCEPTOR(SSIZE_T, readlink, const char *path, char *buf, SIZE_T bufsiz) {
void* ctx;
COMMON_INTERCEPTOR_ENTER(ctx, readlink, path, buf, bufsiz);
COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
SSIZE_T res = REAL(readlink)(path, buf, bufsiz);
if (res > 0)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, res);
return res;
}
#define INIT_READLINK COMMON_INTERCEPT_FUNCTION(readlink)
#else
#define INIT_READLINK
#endif
#if SANITIZER_INTERCEPT_READLINKAT
INTERCEPTOR(SSIZE_T, readlinkat, int dirfd, const char *path, char *buf,
SIZE_T bufsiz) {
void* ctx;
COMMON_INTERCEPTOR_ENTER(ctx, readlinkat, dirfd, path, buf, bufsiz);
COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
SSIZE_T res = REAL(readlinkat)(dirfd, path, buf, bufsiz);
if (res > 0)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, res);
return res;
}
#define INIT_READLINKAT COMMON_INTERCEPT_FUNCTION(readlinkat)
#else
#define INIT_READLINKAT
#endif
#if SANITIZER_INTERCEPT_NAME_TO_HANDLE_AT
INTERCEPTOR(int, name_to_handle_at, int dirfd, const char *pathname,
struct file_handle *handle, int *mount_id, int flags) {
void* ctx;
COMMON_INTERCEPTOR_ENTER(ctx, name_to_handle_at, dirfd, pathname, handle,
mount_id, flags);
COMMON_INTERCEPTOR_READ_RANGE(ctx, pathname, internal_strlen(pathname) + 1);
__sanitizer_file_handle *sanitizer_handle =
reinterpret_cast<__sanitizer_file_handle*>(handle);
COMMON_INTERCEPTOR_READ_RANGE(
ctx, &sanitizer_handle->handle_bytes,
sizeof(sanitizer_handle->handle_bytes));
int res = REAL(name_to_handle_at)(dirfd, pathname, handle, mount_id, flags);
if (!res) {
COMMON_INTERCEPTOR_WRITE_RANGE(
ctx, &sanitizer_handle->handle_bytes,
sizeof(sanitizer_handle->handle_bytes));
COMMON_INTERCEPTOR_WRITE_RANGE(
ctx, &sanitizer_handle->handle_type,
sizeof(sanitizer_handle->handle_type));
COMMON_INTERCEPTOR_WRITE_RANGE(
ctx, &sanitizer_handle->f_handle, sanitizer_handle->handle_bytes);
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, mount_id, sizeof(*mount_id));
}
return res;
}
#define INIT_NAME_TO_HANDLE_AT COMMON_INTERCEPT_FUNCTION(name_to_handle_at)
#else
#define INIT_NAME_TO_HANDLE_AT
#endif
#if SANITIZER_INTERCEPT_OPEN_BY_HANDLE_AT
INTERCEPTOR(int, open_by_handle_at, int mount_fd, struct file_handle* handle,
int flags) {
void* ctx;
COMMON_INTERCEPTOR_ENTER(ctx, open_by_handle_at, mount_fd, handle, flags);
__sanitizer_file_handle *sanitizer_handle =
reinterpret_cast<__sanitizer_file_handle*>(handle);
COMMON_INTERCEPTOR_READ_RANGE(
ctx, &sanitizer_handle->handle_bytes,
sizeof(sanitizer_handle->handle_bytes));
COMMON_INTERCEPTOR_READ_RANGE(
ctx, &sanitizer_handle->handle_type,
sizeof(sanitizer_handle->handle_type));
COMMON_INTERCEPTOR_READ_RANGE(
ctx, &sanitizer_handle->f_handle, sanitizer_handle->handle_bytes);
return REAL(open_by_handle_at)(mount_fd, handle, flags);
}
#define INIT_OPEN_BY_HANDLE_AT COMMON_INTERCEPT_FUNCTION(open_by_handle_at)
#else
#define INIT_OPEN_BY_HANDLE_AT
#endif
#if SANITIZER_INTERCEPT_STRLCPY
INTERCEPTOR(SIZE_T, strlcpy, char *dst, char *src, SIZE_T size) {
void *ctx;
SIZE_T res;
COMMON_INTERCEPTOR_ENTER(ctx, strlcpy, dst, src, size);
if (src) {
// Keep strnlen as macro argument, as macro may ignore it.
COMMON_INTERCEPTOR_READ_STRING(
ctx, src, Min(internal_strnlen(src, size), size - 1) + 1);
}
res = REAL(strlcpy)(dst, src, size);
COMMON_INTERCEPTOR_COPY_STRING(ctx, dst, src, internal_strlen(dst) + 1);
return res;
}
INTERCEPTOR(SIZE_T, strlcat, char *dst, char *src, SIZE_T size) {
void *ctx;
SIZE_T len = 0;
COMMON_INTERCEPTOR_ENTER(ctx, strlcat, dst, src, size);
// src is checked in the strlcpy() interceptor
if (dst) {
len = internal_strnlen(dst, size);
COMMON_INTERCEPTOR_READ_STRING(ctx, dst, Min(len, size - 1) + 1);
}
// Reuse the rest of the code in the strlcpy() interceptor
return WRAP(strlcpy)(dst + len, src, size - len) + len;
}
#define INIT_STRLCPY \
COMMON_INTERCEPT_FUNCTION(strlcpy); \
COMMON_INTERCEPT_FUNCTION(strlcat);
#else
#define INIT_STRLCPY
#endif
#if SANITIZER_INTERCEPT_MMAP
INTERCEPTOR(void *, mmap, void *addr, SIZE_T sz, int prot, int flags, int fd,
OFF_T off) {
void *ctx;
if (common_flags()->detect_write_exec)
ReportMmapWriteExec(prot, flags);
if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
return (void *)internal_mmap(addr, sz, prot, flags, fd, off);
COMMON_INTERCEPTOR_ENTER(ctx, mmap, addr, sz, prot, flags, fd, off);
COMMON_INTERCEPTOR_MMAP_IMPL(ctx, mmap, addr, sz, prot, flags, fd, off);
}
INTERCEPTOR(int, mprotect, void *addr, SIZE_T sz, int prot) {
void *ctx;
if (common_flags()->detect_write_exec)
ReportMmapWriteExec(prot, 0);
if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
return (int)internal_mprotect(addr, sz, prot);
COMMON_INTERCEPTOR_ENTER(ctx, mprotect, addr, sz, prot);
MprotectMallocZones(addr, prot);
return REAL(mprotect)(addr, sz, prot);
}
#define INIT_MMAP \
COMMON_INTERCEPT_FUNCTION(mmap); \
COMMON_INTERCEPT_FUNCTION(mprotect);
#else
#define INIT_MMAP
#endif
#if SANITIZER_INTERCEPT_MMAP64
INTERCEPTOR(void *, mmap64, void *addr, SIZE_T sz, int prot, int flags, int fd,
OFF64_T off) {
void *ctx;
if (common_flags()->detect_write_exec)
ReportMmapWriteExec(prot, flags);
if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
return (void *)internal_mmap(addr, sz, prot, flags, fd, off);
COMMON_INTERCEPTOR_ENTER(ctx, mmap64, addr, sz, prot, flags, fd, off);
COMMON_INTERCEPTOR_MMAP_IMPL(ctx, mmap64, addr, sz, prot, flags, fd, off);
}
#define INIT_MMAP64 COMMON_INTERCEPT_FUNCTION(mmap64);
#else
#define INIT_MMAP64
#endif
#if SANITIZER_INTERCEPT_DEVNAME
INTERCEPTOR(char *, devname, u64 dev, u32 type) {
void *ctx;
char *name;
COMMON_INTERCEPTOR_ENTER(ctx, devname, dev, type);
name = REAL(devname)(dev, type);
if (name)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, name, internal_strlen(name) + 1);
return name;
}
#define INIT_DEVNAME COMMON_INTERCEPT_FUNCTION(devname);
#else
#define INIT_DEVNAME
#endif
#if SANITIZER_INTERCEPT_DEVNAME_R
#if SANITIZER_NETBSD
#define DEVNAME_R_RETTYPE int
#define DEVNAME_R_SUCCESS(x) (!(x))
#else
#define DEVNAME_R_RETTYPE char*
#define DEVNAME_R_SUCCESS(x) (x)
#endif
INTERCEPTOR(DEVNAME_R_RETTYPE, devname_r, u64 dev, u32 type, char *path,
uptr len) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, devname_r, dev, type, path, len);
DEVNAME_R_RETTYPE res = REAL(devname_r)(dev, type, path, len);
if (DEVNAME_R_SUCCESS(res))
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, path, internal_strlen(path) + 1);
return res;
}
#define INIT_DEVNAME_R COMMON_INTERCEPT_FUNCTION(devname_r);
#else
#define INIT_DEVNAME_R
#endif
#if SANITIZER_INTERCEPT_FGETLN
INTERCEPTOR(char *, fgetln, __sanitizer_FILE *stream, SIZE_T *len) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fgetln, stream, len);
char *str = REAL(fgetln)(stream, len);
if (str && len) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, len, sizeof(*len));
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, str, *len);
}
return str;
}
#define INIT_FGETLN COMMON_INTERCEPT_FUNCTION(fgetln)
#else
#define INIT_FGETLN
#endif
#if SANITIZER_INTERCEPT_STRMODE
INTERCEPTOR(void, strmode, u32 mode, char *bp) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strmode, mode, bp);
REAL(strmode)(mode, bp);
if (bp)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, bp, internal_strlen(bp) + 1);
}
#define INIT_STRMODE COMMON_INTERCEPT_FUNCTION(strmode)
#else
#define INIT_STRMODE
#endif
#if SANITIZER_INTERCEPT_TTYENT
INTERCEPTOR(struct __sanitizer_ttyent *, getttyent, void) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getttyent);
struct __sanitizer_ttyent *ttyent = REAL(getttyent)();
if (ttyent)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ttyent, struct_ttyent_sz);
return ttyent;
}
INTERCEPTOR(struct __sanitizer_ttyent *, getttynam, char *name) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getttynam, name);
if (name)
COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
struct __sanitizer_ttyent *ttyent = REAL(getttynam)(name);
if (ttyent)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ttyent, struct_ttyent_sz);
return ttyent;
}
#define INIT_TTYENT \
COMMON_INTERCEPT_FUNCTION(getttyent); \
COMMON_INTERCEPT_FUNCTION(getttynam);
#else
#define INIT_TTYENT
#endif
#if SANITIZER_INTERCEPT_TTYENTPATH
INTERCEPTOR(int, setttyentpath, char *path) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, setttyentpath, path);
if (path)
COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
return REAL(setttyentpath)(path);
}
#define INIT_TTYENTPATH COMMON_INTERCEPT_FUNCTION(setttyentpath);
#else
#define INIT_TTYENTPATH
#endif
#if SANITIZER_INTERCEPT_PROTOENT
static void write_protoent(void *ctx, struct __sanitizer_protoent *p) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p, sizeof(*p));
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p->p_name, internal_strlen(p->p_name) + 1);
SIZE_T pp_size = 1; // One handles the trailing \0
for (char **pp = p->p_aliases; *pp; ++pp, ++pp_size)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *pp, internal_strlen(*pp) + 1);
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p->p_aliases,
pp_size * sizeof(char **));
}
INTERCEPTOR(struct __sanitizer_protoent *, getprotoent) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getprotoent);
struct __sanitizer_protoent *p = REAL(getprotoent)();
if (p)
write_protoent(ctx, p);
return p;
}
INTERCEPTOR(struct __sanitizer_protoent *, getprotobyname, const char *name) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getprotobyname, name);
if (name)
COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
struct __sanitizer_protoent *p = REAL(getprotobyname)(name);
if (p)
write_protoent(ctx, p);
return p;
}
INTERCEPTOR(struct __sanitizer_protoent *, getprotobynumber, int proto) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getprotobynumber, proto);
struct __sanitizer_protoent *p = REAL(getprotobynumber)(proto);
if (p)
write_protoent(ctx, p);
return p;
}
#define INIT_PROTOENT \
COMMON_INTERCEPT_FUNCTION(getprotoent); \
COMMON_INTERCEPT_FUNCTION(getprotobyname); \
COMMON_INTERCEPT_FUNCTION(getprotobynumber)
#else
#define INIT_PROTOENT
#endif
#if SANITIZER_INTERCEPT_PROTOENT_R
INTERCEPTOR(int, getprotoent_r, struct __sanitizer_protoent *result_buf,
char *buf, SIZE_T buflen, struct __sanitizer_protoent **result) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getprotoent_r, result_buf, buf, buflen,
result);
int res = REAL(getprotoent_r)(result_buf, buf, buflen, result);
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof *result);
if (!res && *result)
write_protoent(ctx, *result);
return res;
}
INTERCEPTOR(int, getprotobyname_r, const char *name,
struct __sanitizer_protoent *result_buf, char *buf, SIZE_T buflen,
struct __sanitizer_protoent **result) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getprotobyname_r, name, result_buf, buf,
buflen, result);
if (name)
COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
int res = REAL(getprotobyname_r)(name, result_buf, buf, buflen, result);
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof *result);
if (!res && *result)
write_protoent(ctx, *result);
return res;
}
INTERCEPTOR(int, getprotobynumber_r, int num,
struct __sanitizer_protoent *result_buf, char *buf,
SIZE_T buflen, struct __sanitizer_protoent **result) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getprotobynumber_r, num, result_buf, buf,
buflen, result);
int res = REAL(getprotobynumber_r)(num, result_buf, buf, buflen, result);
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof *result);
if (!res && *result)
write_protoent(ctx, *result);
return res;
}
#define INIT_PROTOENT_R \
COMMON_INTERCEPT_FUNCTION(getprotoent_r); \
COMMON_INTERCEPT_FUNCTION(getprotobyname_r); \
COMMON_INTERCEPT_FUNCTION(getprotobynumber_r);
#else
#define INIT_PROTOENT_R
#endif
#if SANITIZER_INTERCEPT_NETENT
INTERCEPTOR(struct __sanitizer_netent *, getnetent) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getnetent);
struct __sanitizer_netent *n = REAL(getnetent)();
if (n) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n, sizeof(*n));
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n->n_name, internal_strlen(n->n_name) + 1);
SIZE_T nn_size = 1; // One handles the trailing \0
for (char **nn = n->n_aliases; *nn; ++nn, ++nn_size)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *nn, internal_strlen(*nn) + 1);
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n->n_aliases,
nn_size * sizeof(char **));
}
return n;
}
INTERCEPTOR(struct __sanitizer_netent *, getnetbyname, const char *name) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getnetbyname, name);
if (name)
COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
struct __sanitizer_netent *n = REAL(getnetbyname)(name);
if (n) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n, sizeof(*n));
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n->n_name, internal_strlen(n->n_name) + 1);
SIZE_T nn_size = 1; // One handles the trailing \0
for (char **nn = n->n_aliases; *nn; ++nn, ++nn_size)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *nn, internal_strlen(*nn) + 1);
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n->n_aliases,
nn_size * sizeof(char **));
}
return n;
}
INTERCEPTOR(struct __sanitizer_netent *, getnetbyaddr, u32 net, int type) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getnetbyaddr, net, type);
struct __sanitizer_netent *n = REAL(getnetbyaddr)(net, type);
if (n) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n, sizeof(*n));
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n->n_name, internal_strlen(n->n_name) + 1);
SIZE_T nn_size = 1; // One handles the trailing \0
for (char **nn = n->n_aliases; *nn; ++nn, ++nn_size)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *nn, internal_strlen(*nn) + 1);
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n->n_aliases,
nn_size * sizeof(char **));
}
return n;
}
#define INIT_NETENT \
COMMON_INTERCEPT_FUNCTION(getnetent); \
COMMON_INTERCEPT_FUNCTION(getnetbyname); \
COMMON_INTERCEPT_FUNCTION(getnetbyaddr)
#else
#define INIT_NETENT
#endif
#if SANITIZER_INTERCEPT_GETMNTINFO
INTERCEPTOR(int, getmntinfo, void **mntbufp, int flags) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getmntinfo, mntbufp, flags);
int cnt = REAL(getmntinfo)(mntbufp, flags);
if (cnt > 0 && mntbufp) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, mntbufp, sizeof(void *));
if (*mntbufp)
#if SANITIZER_NETBSD
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *mntbufp, cnt * struct_statvfs_sz);
#else
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *mntbufp, cnt * struct_statfs_sz);
#endif
}
return cnt;
}
#define INIT_GETMNTINFO COMMON_INTERCEPT_FUNCTION(getmntinfo)
#else
#define INIT_GETMNTINFO
#endif
#if SANITIZER_INTERCEPT_MI_VECTOR_HASH
INTERCEPTOR(void, mi_vector_hash, const void *key, SIZE_T len, u32 seed,
u32 hashes[3]) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, mi_vector_hash, key, len, seed, hashes);
if (key)
COMMON_INTERCEPTOR_READ_RANGE(ctx, key, len);
REAL(mi_vector_hash)(key, len, seed, hashes);
if (hashes)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, hashes, sizeof(hashes[0]) * 3);
}
#define INIT_MI_VECTOR_HASH COMMON_INTERCEPT_FUNCTION(mi_vector_hash)
#else
#define INIT_MI_VECTOR_HASH
#endif
#if SANITIZER_INTERCEPT_SETVBUF
INTERCEPTOR(int, setvbuf, __sanitizer_FILE *stream, char *buf, int mode,
SIZE_T size) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, setvbuf, stream, buf, mode, size);
int ret = REAL(setvbuf)(stream, buf, mode, size);
if (buf)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, size);
if (stream)
unpoison_file(stream);
return ret;
}
INTERCEPTOR(void, setbuf, __sanitizer_FILE *stream, char *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, setbuf, stream, buf);
REAL(setbuf)(stream, buf);
if (buf) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, __sanitizer_bufsiz);
}
if (stream)
unpoison_file(stream);
}
INTERCEPTOR(void, setbuffer, __sanitizer_FILE *stream, char *buf, SIZE_T size) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, setbuffer, stream, buf, size);
REAL(setbuffer)(stream, buf, size);
if (buf) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, size);
}
if (stream)
unpoison_file(stream);
}
INTERCEPTOR(void, setlinebuf, __sanitizer_FILE *stream) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, setlinebuf, stream);
REAL(setlinebuf)(stream);
if (stream)
unpoison_file(stream);
}
#define INIT_SETVBUF COMMON_INTERCEPT_FUNCTION(setvbuf); \
COMMON_INTERCEPT_FUNCTION(setbuf); \
COMMON_INTERCEPT_FUNCTION(setbuffer); \
COMMON_INTERCEPT_FUNCTION(setlinebuf)
#else
#define INIT_SETVBUF
#endif
#if SANITIZER_INTERCEPT_GETVFSSTAT
INTERCEPTOR(int, getvfsstat, void *buf, SIZE_T bufsize, int flags) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getvfsstat, buf, bufsize, flags);
int ret = REAL(getvfsstat)(buf, bufsize, flags);
if (buf && ret > 0)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, ret * struct_statvfs_sz);
return ret;
}
#define INIT_GETVFSSTAT COMMON_INTERCEPT_FUNCTION(getvfsstat)
#else
#define INIT_GETVFSSTAT
#endif
#if SANITIZER_INTERCEPT_REGEX
INTERCEPTOR(int, regcomp, void *preg, const char *pattern, int cflags) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, regcomp, preg, pattern, cflags);
if (pattern)
COMMON_INTERCEPTOR_READ_RANGE(ctx, pattern, internal_strlen(pattern) + 1);
int res = REAL(regcomp)(preg, pattern, cflags);
if (preg)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, preg, struct_regex_sz);
return res;
}
INTERCEPTOR(int, regexec, const void *preg, const char *string, SIZE_T nmatch,
struct __sanitizer_regmatch *pmatch[], int eflags) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, regexec, preg, string, nmatch, pmatch, eflags);
if (preg)
COMMON_INTERCEPTOR_READ_RANGE(ctx, preg, struct_regex_sz);
if (string)
COMMON_INTERCEPTOR_READ_RANGE(ctx, string, internal_strlen(string) + 1);
int res = REAL(regexec)(preg, string, nmatch, pmatch, eflags);
if (!res && pmatch)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pmatch, nmatch * struct_regmatch_sz);
return res;
}
INTERCEPTOR(SIZE_T, regerror, int errcode, const void *preg, char *errbuf,
SIZE_T errbuf_size) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, regerror, errcode, preg, errbuf, errbuf_size);
if (preg)
COMMON_INTERCEPTOR_READ_RANGE(ctx, preg, struct_regex_sz);
SIZE_T res = REAL(regerror)(errcode, preg, errbuf, errbuf_size);
if (errbuf)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, errbuf, internal_strlen(errbuf) + 1);
return res;
}
INTERCEPTOR(void, regfree, const void *preg) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, regfree, preg);
if (preg)
COMMON_INTERCEPTOR_READ_RANGE(ctx, preg, struct_regex_sz);
REAL(regfree)(preg);
}
#define INIT_REGEX \
COMMON_INTERCEPT_FUNCTION(regcomp); \
COMMON_INTERCEPT_FUNCTION_GLIBC_VER_MIN(regexec, "GLIBC_2.3.4"); \
COMMON_INTERCEPT_FUNCTION(regerror); \
COMMON_INTERCEPT_FUNCTION(regfree);
#else
#define INIT_REGEX
#endif
#if SANITIZER_INTERCEPT_REGEXSUB
INTERCEPTOR(SSIZE_T, regnsub, char *buf, SIZE_T bufsiz, const char *sub,
const struct __sanitizer_regmatch *rm, const char *str) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, regnsub, buf, bufsiz, sub, rm, str);
if (sub)
COMMON_INTERCEPTOR_READ_RANGE(ctx, sub, internal_strlen(sub) + 1);
// The implementation demands and hardcodes 10 elements
if (rm)
COMMON_INTERCEPTOR_READ_RANGE(ctx, rm, 10 * struct_regmatch_sz);
if (str)
COMMON_INTERCEPTOR_READ_RANGE(ctx, str, internal_strlen(str) + 1);
SSIZE_T res = REAL(regnsub)(buf, bufsiz, sub, rm, str);
if (res > 0 && buf)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, internal_strlen(buf) + 1);
return res;
}
INTERCEPTOR(SSIZE_T, regasub, char **buf, const char *sub,
const struct __sanitizer_regmatch *rm, const char *sstr) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, regasub, buf, sub, rm, sstr);
if (sub)
COMMON_INTERCEPTOR_READ_RANGE(ctx, sub, internal_strlen(sub) + 1);
// Hardcode 10 elements as this is hardcoded size
if (rm)
COMMON_INTERCEPTOR_READ_RANGE(ctx, rm, 10 * struct_regmatch_sz);
if (sstr)
COMMON_INTERCEPTOR_READ_RANGE(ctx, sstr, internal_strlen(sstr) + 1);
SSIZE_T res = REAL(regasub)(buf, sub, rm, sstr);
if (res > 0 && buf) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, sizeof(char *));
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *buf, internal_strlen(*buf) + 1);
}
return res;
}
#define INIT_REGEXSUB \
COMMON_INTERCEPT_FUNCTION(regnsub); \
COMMON_INTERCEPT_FUNCTION(regasub);
#else
#define INIT_REGEXSUB
#endif
#if SANITIZER_INTERCEPT_FTS
INTERCEPTOR(void *, fts_open, char *const *path_argv, int options,
int (*compar)(void **, void **)) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fts_open, path_argv, options, compar);
if (path_argv) {
for (char *const *pa = path_argv; ; ++pa) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, pa, sizeof(char **));
if (!*pa)
break;
COMMON_INTERCEPTOR_READ_RANGE(ctx, *pa, internal_strlen(*pa) + 1);
}
}
// TODO(kamil): handle compar callback
void *fts = REAL(fts_open)(path_argv, options, compar);
if (fts)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, fts, struct_FTS_sz);
return fts;
}
INTERCEPTOR(void *, fts_read, void *ftsp) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fts_read, ftsp);
if (ftsp)
COMMON_INTERCEPTOR_READ_RANGE(ctx, ftsp, struct_FTS_sz);
void *ftsent = REAL(fts_read)(ftsp);
if (ftsent)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ftsent, struct_FTSENT_sz);
return ftsent;
}
INTERCEPTOR(void *, fts_children, void *ftsp, int options) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fts_children, ftsp, options);
if (ftsp)
COMMON_INTERCEPTOR_READ_RANGE(ctx, ftsp, struct_FTS_sz);
void *ftsent = REAL(fts_children)(ftsp, options);
if (ftsent)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ftsent, struct_FTSENT_sz);
return ftsent;
}
INTERCEPTOR(int, fts_set, void *ftsp, void *f, int options) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fts_set, ftsp, f, options);
if (ftsp)
COMMON_INTERCEPTOR_READ_RANGE(ctx, ftsp, struct_FTS_sz);
if (f)
COMMON_INTERCEPTOR_READ_RANGE(ctx, f, struct_FTSENT_sz);
return REAL(fts_set)(ftsp, f, options);
}
INTERCEPTOR(int, fts_close, void *ftsp) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fts_close, ftsp);
if (ftsp)
COMMON_INTERCEPTOR_READ_RANGE(ctx, ftsp, struct_FTS_sz);
return REAL(fts_close)(ftsp);
}
#define INIT_FTS \
COMMON_INTERCEPT_FUNCTION(fts_open); \
COMMON_INTERCEPT_FUNCTION(fts_read); \
COMMON_INTERCEPT_FUNCTION(fts_children); \
COMMON_INTERCEPT_FUNCTION(fts_set); \
COMMON_INTERCEPT_FUNCTION(fts_close);
#else
#define INIT_FTS
#endif
#if SANITIZER_INTERCEPT_SYSCTL
INTERCEPTOR(int, sysctl, int *name, unsigned int namelen, void *oldp,
SIZE_T *oldlenp, void *newp, SIZE_T newlen) {
void *ctx;
if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
return internal_sysctl(name, namelen, oldp, oldlenp, newp, newlen);
COMMON_INTERCEPTOR_ENTER(ctx, sysctl, name, namelen, oldp, oldlenp, newp,
newlen);
if (name)
COMMON_INTERCEPTOR_READ_RANGE(ctx, name, namelen * sizeof(*name));
if (oldlenp)
COMMON_INTERCEPTOR_READ_RANGE(ctx, oldlenp, sizeof(*oldlenp));
if (newp && newlen)
COMMON_INTERCEPTOR_READ_RANGE(ctx, newp, newlen);
int res = REAL(sysctl)(name, namelen, oldp, oldlenp, newp, newlen);
if (!res) {
if (oldlenp) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, oldlenp, sizeof(*oldlenp));
if (oldp)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, oldp, *oldlenp);
}
}
return res;
}
INTERCEPTOR(int, sysctlbyname, char *sname, void *oldp, SIZE_T *oldlenp,
void *newp, SIZE_T newlen) {
void *ctx;
if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
return internal_sysctlbyname(sname, oldp, oldlenp, newp, newlen);
COMMON_INTERCEPTOR_ENTER(ctx, sysctlbyname, sname, oldp, oldlenp, newp,
newlen);
if (sname)
COMMON_INTERCEPTOR_READ_RANGE(ctx, sname, internal_strlen(sname) + 1);
if (oldlenp)
COMMON_INTERCEPTOR_READ_RANGE(ctx, oldlenp, sizeof(*oldlenp));
if (newp && newlen)
COMMON_INTERCEPTOR_READ_RANGE(ctx, newp, newlen);
int res = REAL(sysctlbyname)(sname, oldp, oldlenp, newp, newlen);
if (!res) {
if (oldlenp) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, oldlenp, sizeof(*oldlenp));
if (oldp)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, oldp, *oldlenp);
}
}
return res;
}
INTERCEPTOR(int, sysctlnametomib, const char *sname, int *name,
SIZE_T *namelenp) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, sysctlnametomib, sname, name, namelenp);
if (sname)
COMMON_INTERCEPTOR_READ_RANGE(ctx, sname, internal_strlen(sname) + 1);
if (namelenp)
COMMON_INTERCEPTOR_READ_RANGE(ctx, namelenp, sizeof(*namelenp));
int res = REAL(sysctlnametomib)(sname, name, namelenp);
if (!res) {
if (namelenp) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, namelenp, sizeof(*namelenp));
if (name)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, name, *namelenp * sizeof(*name));
}
}
return res;
}
#define INIT_SYSCTL \
COMMON_INTERCEPT_FUNCTION(sysctl); \
COMMON_INTERCEPT_FUNCTION(sysctlbyname); \
COMMON_INTERCEPT_FUNCTION(sysctlnametomib);
#else
#define INIT_SYSCTL
#endif
#if SANITIZER_INTERCEPT_ASYSCTL
INTERCEPTOR(void *, asysctl, const int *name, SIZE_T namelen, SIZE_T *len) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, asysctl, name, namelen, len);
if (name)
COMMON_INTERCEPTOR_READ_RANGE(ctx, name, sizeof(*name) * namelen);
void *res = REAL(asysctl)(name, namelen, len);
if (res && len) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, len, sizeof(*len));
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, *len);
}
return res;
}
INTERCEPTOR(void *, asysctlbyname, const char *sname, SIZE_T *len) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, asysctlbyname, sname, len);
if (sname)
COMMON_INTERCEPTOR_READ_RANGE(ctx, sname, internal_strlen(sname) + 1);
void *res = REAL(asysctlbyname)(sname, len);
if (res && len) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, len, sizeof(*len));
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, *len);
}
return res;
}
#define INIT_ASYSCTL \
COMMON_INTERCEPT_FUNCTION(asysctl); \
COMMON_INTERCEPT_FUNCTION(asysctlbyname);
#else
#define INIT_ASYSCTL
#endif
#if SANITIZER_INTERCEPT_SYSCTLGETMIBINFO
INTERCEPTOR(int, sysctlgetmibinfo, char *sname, int *name,
unsigned int *namelenp, char *cname, SIZE_T *csz, void **rnode,
int v) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, sysctlgetmibinfo, sname, name, namelenp, cname,
csz, rnode, v);
if (sname)
COMMON_INTERCEPTOR_READ_RANGE(ctx, sname, internal_strlen(sname) + 1);
if (namelenp)
COMMON_INTERCEPTOR_READ_RANGE(ctx, namelenp, sizeof(*namelenp));
if (csz)
COMMON_INTERCEPTOR_READ_RANGE(ctx, csz, sizeof(*csz));
// Skip rnode, it's rarely used and not trivial to sanitize
// It's also used mostly internally
int res = REAL(sysctlgetmibinfo)(sname, name, namelenp, cname, csz, rnode, v);
if (!res) {
if (namelenp) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, namelenp, sizeof(*namelenp));
if (name)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, name, *namelenp * sizeof(*name));
}
if (csz) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, csz, sizeof(*csz));
if (cname)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cname, *csz);
}
}
return res;
}
#define INIT_SYSCTLGETMIBINFO \
COMMON_INTERCEPT_FUNCTION(sysctlgetmibinfo);
#else
#define INIT_SYSCTLGETMIBINFO
#endif
#if SANITIZER_INTERCEPT_NL_LANGINFO
INTERCEPTOR(char *, nl_langinfo, long item) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, nl_langinfo, item);
char *ret = REAL(nl_langinfo)(item);
if (ret)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, internal_strlen(ret) + 1);
return ret;
}
#define INIT_NL_LANGINFO COMMON_INTERCEPT_FUNCTION(nl_langinfo)
#else
#define INIT_NL_LANGINFO
#endif
#if SANITIZER_INTERCEPT_MODCTL
INTERCEPTOR(int, modctl, int operation, void *argp) {
void *ctx;
int ret;
COMMON_INTERCEPTOR_ENTER(ctx, modctl, operation, argp);
if (operation == modctl_load) {
if (argp) {
__sanitizer_modctl_load_t *ml = (__sanitizer_modctl_load_t *)argp;
COMMON_INTERCEPTOR_READ_RANGE(ctx, ml, sizeof(*ml));
if (ml->ml_filename)
COMMON_INTERCEPTOR_READ_RANGE(ctx, ml->ml_filename,
internal_strlen(ml->ml_filename) + 1);
if (ml->ml_props)
COMMON_INTERCEPTOR_READ_RANGE(ctx, ml->ml_props, ml->ml_propslen);
}
ret = REAL(modctl)(operation, argp);
} else if (operation == modctl_unload) {
if (argp) {
const char *name = (const char *)argp;
COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
}
ret = REAL(modctl)(operation, argp);
} else if (operation == modctl_stat) {
uptr iov_len;
struct __sanitizer_iovec *iov = (struct __sanitizer_iovec *)argp;
if (iov) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, iov, sizeof(*iov));
iov_len = iov->iov_len;
}
ret = REAL(modctl)(operation, argp);
if (iov)
COMMON_INTERCEPTOR_WRITE_RANGE(
ctx, iov->iov_base, Min(iov_len, iov->iov_len));
} else if (operation == modctl_exists) {
ret = REAL(modctl)(operation, argp);
} else {
ret = REAL(modctl)(operation, argp);
}
return ret;
}
#define INIT_MODCTL COMMON_INTERCEPT_FUNCTION(modctl)
#else
#define INIT_MODCTL
#endif
#if SANITIZER_INTERCEPT_STRTONUM
INTERCEPTOR(long long, strtonum, const char *nptr, long long minval,
long long maxval, const char **errstr) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strtonum, nptr, minval, maxval, errstr);
// TODO(kamil): Implement strtoll as a common inteceptor
char *real_endptr;
long long ret = (long long)REAL(strtoimax)(nptr, &real_endptr, 10);
StrtolFixAndCheck(ctx, nptr, nullptr, real_endptr, 10);
ret = REAL(strtonum)(nptr, minval, maxval, errstr);
if (errstr) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, errstr, sizeof(const char *));
if (*errstr)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *errstr, internal_strlen(*errstr) + 1);
}
return ret;
}
#define INIT_STRTONUM COMMON_INTERCEPT_FUNCTION(strtonum)
#else
#define INIT_STRTONUM
#endif
#if SANITIZER_INTERCEPT_FPARSELN
INTERCEPTOR(char *, fparseln, __sanitizer_FILE *stream, SIZE_T *len,
SIZE_T *lineno, const char delim[3], int flags) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fparseln, stream, len, lineno, delim, flags);
if (lineno)
COMMON_INTERCEPTOR_READ_RANGE(ctx, lineno, sizeof(*lineno));
if (delim)
COMMON_INTERCEPTOR_READ_RANGE(ctx, delim, sizeof(delim[0]) * 3);
char *ret = REAL(fparseln)(stream, len, lineno, delim, flags);
if (ret) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, internal_strlen(ret) + 1);
if (len)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, len, sizeof(*len));
if (lineno)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, lineno, sizeof(*lineno));
}
return ret;
}
#define INIT_FPARSELN COMMON_INTERCEPT_FUNCTION(fparseln)
#else
#define INIT_FPARSELN
#endif
#if SANITIZER_INTERCEPT_STATVFS1
INTERCEPTOR(int, statvfs1, const char *path, void *buf, int flags) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, statvfs1, path, buf, flags);
if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
int res = REAL(statvfs1)(path, buf, flags);
if (!res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, struct_statvfs_sz);
return res;
}
INTERCEPTOR(int, fstatvfs1, int fd, void *buf, int flags) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fstatvfs1, fd, buf, flags);
COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
int res = REAL(fstatvfs1)(fd, buf, flags);
if (!res) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, struct_statvfs_sz);
if (fd >= 0)
COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
}
return res;
}
#define INIT_STATVFS1 \
COMMON_INTERCEPT_FUNCTION(statvfs1); \
COMMON_INTERCEPT_FUNCTION(fstatvfs1);
#else
#define INIT_STATVFS1
#endif
#if SANITIZER_INTERCEPT_STRTOI
INTERCEPTOR(INTMAX_T, strtoi, const char *nptr, char **endptr, int base,
INTMAX_T low, INTMAX_T high, int *rstatus) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strtoi, nptr, endptr, base, low, high, rstatus);
char *real_endptr;
INTMAX_T ret = REAL(strtoi)(nptr, &real_endptr, base, low, high, rstatus);
StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base);
if (rstatus)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, rstatus, sizeof(*rstatus));
return ret;
}
INTERCEPTOR(UINTMAX_T, strtou, const char *nptr, char **endptr, int base,
UINTMAX_T low, UINTMAX_T high, int *rstatus) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strtou, nptr, endptr, base, low, high, rstatus);
char *real_endptr;
UINTMAX_T ret = REAL(strtou)(nptr, &real_endptr, base, low, high, rstatus);
StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base);
if (rstatus)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, rstatus, sizeof(*rstatus));
return ret;
}
#define INIT_STRTOI \
COMMON_INTERCEPT_FUNCTION(strtoi); \
COMMON_INTERCEPT_FUNCTION(strtou)
#else
#define INIT_STRTOI
#endif
#if SANITIZER_INTERCEPT_CAPSICUM
#define CAP_RIGHTS_INIT_INTERCEPTOR(cap_rights_init, rights, ...) \
{ \
void *ctx; \
COMMON_INTERCEPTOR_ENTER(ctx, cap_rights_init, rights, ##__VA_ARGS__); \
if (rights) \
COMMON_INTERCEPTOR_READ_RANGE(ctx, rights, sizeof(*rights)); \
__sanitizer_cap_rights_t *ret = \
REAL(cap_rights_init)(rights, ##__VA_ARGS__); \
if (ret) \
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, sizeof(*ret)); \
return ret; \
}
#define CAP_RIGHTS_SET_INTERCEPTOR(cap_rights_set, rights, ...) \
{ \
void *ctx; \
COMMON_INTERCEPTOR_ENTER(ctx, cap_rights_set, rights, ##__VA_ARGS__); \
if (rights) \
COMMON_INTERCEPTOR_READ_RANGE(ctx, rights, sizeof(*rights)); \
__sanitizer_cap_rights_t *ret = \
REAL(cap_rights_set)(rights, ##__VA_ARGS__); \
if (ret) \
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, sizeof(*ret)); \
return ret; \
}
#define CAP_RIGHTS_CLEAR_INTERCEPTOR(cap_rights_clear, rights, ...) \
{ \
void *ctx; \
COMMON_INTERCEPTOR_ENTER(ctx, cap_rights_clear, rights, ##__VA_ARGS__); \
if (rights) \
COMMON_INTERCEPTOR_READ_RANGE(ctx, rights, sizeof(*rights)); \
__sanitizer_cap_rights_t *ret = \
REAL(cap_rights_clear)(rights, ##__VA_ARGS__); \
if (ret) \
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, sizeof(*ret)); \
return ret; \
}
#define CAP_RIGHTS_IS_SET_INTERCEPTOR(cap_rights_is_set, rights, ...) \
{ \
void *ctx; \
COMMON_INTERCEPTOR_ENTER(ctx, cap_rights_is_set, rights, ##__VA_ARGS__); \
if (rights) \
COMMON_INTERCEPTOR_READ_RANGE(ctx, rights, sizeof(*rights)); \
return REAL(cap_rights_is_set)(rights, ##__VA_ARGS__); \
}
INTERCEPTOR(__sanitizer_cap_rights_t *, cap_rights_init,
__sanitizer_cap_rights_t *rights) {
CAP_RIGHTS_INIT_INTERCEPTOR(cap_rights_init, rights);
}
INTERCEPTOR(__sanitizer_cap_rights_t *, cap_rights_set,
__sanitizer_cap_rights_t *rights) {
CAP_RIGHTS_SET_INTERCEPTOR(cap_rights_set, rights);
}
INTERCEPTOR(__sanitizer_cap_rights_t *, cap_rights_clear,
__sanitizer_cap_rights_t *rights) {
CAP_RIGHTS_CLEAR_INTERCEPTOR(cap_rights_clear, rights);
}
INTERCEPTOR(bool, cap_rights_is_set,
__sanitizer_cap_rights_t *rights) {
CAP_RIGHTS_IS_SET_INTERCEPTOR(cap_rights_is_set, rights);
}
INTERCEPTOR(int, cap_rights_limit, int fd,
const __sanitizer_cap_rights_t *rights) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, cap_rights_limit, fd, rights);
if (rights)
COMMON_INTERCEPTOR_READ_RANGE(ctx, rights, sizeof(*rights));
return REAL(cap_rights_limit)(fd, rights);
}
INTERCEPTOR(int, cap_rights_get, int fd, __sanitizer_cap_rights_t *rights) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, cap_rights_get, fd, rights);
int ret = REAL(cap_rights_get)(fd, rights);
if (!ret && rights)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, rights, sizeof(*rights));
return ret;
}
INTERCEPTOR(bool, cap_rights_is_valid, const __sanitizer_cap_rights_t *rights) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, cap_rights_is_valid, rights);
if (rights)
COMMON_INTERCEPTOR_READ_RANGE(ctx, rights, sizeof(*rights));
return REAL(cap_rights_is_valid(rights));
}
INTERCEPTOR(__sanitizer_cap_rights *, cap_rights_merge,
__sanitizer_cap_rights *dst, const __sanitizer_cap_rights *src) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, cap_rights_merge, dst, src);
if (src)
COMMON_INTERCEPTOR_READ_RANGE(ctx, src, sizeof(*src));
__sanitizer_cap_rights *ret = REAL(cap_rights_merge)(dst, src);
if (dst)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, sizeof(*dst));
return ret;
}
INTERCEPTOR(__sanitizer_cap_rights *, cap_rights_remove,
__sanitizer_cap_rights *dst, const __sanitizer_cap_rights *src) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, cap_rights_remove, dst, src);
if (src)
COMMON_INTERCEPTOR_READ_RANGE(ctx, src, sizeof(*src));
__sanitizer_cap_rights *ret = REAL(cap_rights_remove)(dst, src);
if (dst)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, sizeof(*dst));
return ret;
}
INTERCEPTOR(bool, cap_rights_contains, const __sanitizer_cap_rights *big,
const __sanitizer_cap_rights *little) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, cap_rights_contains, big, little);
if (little)
COMMON_INTERCEPTOR_READ_RANGE(ctx, little, sizeof(*little));
if (big)
COMMON_INTERCEPTOR_READ_RANGE(ctx, big, sizeof(*big));
return REAL(cap_rights_contains)(big, little);
}
INTERCEPTOR(int, cap_ioctls_limit, int fd, const uptr *cmds, SIZE_T ncmds) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, cap_ioctls_limit, fd, cmds, ncmds);
if (cmds)
COMMON_INTERCEPTOR_READ_RANGE(ctx, cmds, sizeof(*cmds) * ncmds);
return REAL(cap_ioctls_limit)(fd, cmds, ncmds);
}
INTERCEPTOR(int, cap_ioctls_get, int fd, uptr *cmds, SIZE_T maxcmds) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, cap_ioctls_get, fd, cmds, maxcmds);
int ret = REAL(cap_ioctls_get)(fd, cmds, maxcmds);
if (!ret && cmds)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cmds, sizeof(*cmds) * maxcmds);
return ret;
}
#define INIT_CAPSICUM \
COMMON_INTERCEPT_FUNCTION(cap_rights_init); \
COMMON_INTERCEPT_FUNCTION(cap_rights_set); \
COMMON_INTERCEPT_FUNCTION(cap_rights_clear); \
COMMON_INTERCEPT_FUNCTION(cap_rights_is_set); \
COMMON_INTERCEPT_FUNCTION(cap_rights_get); \
COMMON_INTERCEPT_FUNCTION(cap_rights_limit); \
COMMON_INTERCEPT_FUNCTION(cap_rights_contains); \
COMMON_INTERCEPT_FUNCTION(cap_rights_remove); \
COMMON_INTERCEPT_FUNCTION(cap_rights_merge); \
COMMON_INTERCEPT_FUNCTION(cap_rights_is_valid); \
COMMON_INTERCEPT_FUNCTION(cap_ioctls_get); \
COMMON_INTERCEPT_FUNCTION(cap_ioctls_limit)
#else
#define INIT_CAPSICUM
#endif
#if SANITIZER_INTERCEPT_SHA1
INTERCEPTOR(void, SHA1Init, void *context) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, SHA1Init, context);
REAL(SHA1Init)(context);
if (context)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, SHA1_CTX_sz);
}
INTERCEPTOR(void, SHA1Update, void *context, const u8 *data, unsigned len) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, SHA1Update, context, data, len);
if (data && len > 0)
COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len);
if (context)
COMMON_INTERCEPTOR_READ_RANGE(ctx, context, SHA1_CTX_sz);
REAL(SHA1Update)(context, data, len);
if (context)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, SHA1_CTX_sz);
}
INTERCEPTOR(void, SHA1Final, u8 digest[20], void *context) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, SHA1Final, digest, context);
if (context)
COMMON_INTERCEPTOR_READ_RANGE(ctx, context, SHA1_CTX_sz);
REAL(SHA1Final)(digest, context);
if (digest)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, digest, sizeof(u8) * 20);
}
INTERCEPTOR(void, SHA1Transform, u32 state[5], u8 buffer[64]) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, SHA1Transform, state, buffer);
if (state)
COMMON_INTERCEPTOR_READ_RANGE(ctx, state, sizeof(u32) * 5);
if (buffer)
COMMON_INTERCEPTOR_READ_RANGE(ctx, buffer, sizeof(u8) * 64);
REAL(SHA1Transform)(state, buffer);
if (state)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, state, sizeof(u32) * 5);
}
INTERCEPTOR(char *, SHA1End, void *context, char *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, SHA1End, context, buf);
if (context)
COMMON_INTERCEPTOR_READ_RANGE(ctx, context, SHA1_CTX_sz);
char *ret = REAL(SHA1End)(context, buf);
if (ret)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, SHA1_return_length);
return ret;
}
INTERCEPTOR(char *, SHA1File, char *filename, char *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, SHA1File, filename, buf);
if (filename)
COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1);
char *ret = REAL(SHA1File)(filename, buf);
if (ret)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, SHA1_return_length);
return ret;
}
INTERCEPTOR(char *, SHA1FileChunk, char *filename, char *buf, OFF_T offset,
OFF_T length) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, SHA1FileChunk, filename, buf, offset, length);
if (filename)
COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1);
char *ret = REAL(SHA1FileChunk)(filename, buf, offset, length);
if (ret)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, SHA1_return_length);
return ret;
}
INTERCEPTOR(char *, SHA1Data, u8 *data, SIZE_T len, char *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, SHA1Data, data, len, buf);
if (data)
COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len);
char *ret = REAL(SHA1Data)(data, len, buf);
if (ret)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, SHA1_return_length);
return ret;
}
#define INIT_SHA1 \
COMMON_INTERCEPT_FUNCTION(SHA1Init); \
COMMON_INTERCEPT_FUNCTION(SHA1Update); \
COMMON_INTERCEPT_FUNCTION(SHA1Final); \
COMMON_INTERCEPT_FUNCTION(SHA1Transform); \
COMMON_INTERCEPT_FUNCTION(SHA1End); \
COMMON_INTERCEPT_FUNCTION(SHA1File); \
COMMON_INTERCEPT_FUNCTION(SHA1FileChunk); \
COMMON_INTERCEPT_FUNCTION(SHA1Data)
#else
#define INIT_SHA1
#endif
#if SANITIZER_INTERCEPT_MD4
INTERCEPTOR(void, MD4Init, void *context) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, MD4Init, context);
REAL(MD4Init)(context);
if (context)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, MD4_CTX_sz);
}
INTERCEPTOR(void, MD4Update, void *context, const unsigned char *data,
unsigned int len) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, MD4Update, context, data, len);
if (data && len > 0)
COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len);
if (context)
COMMON_INTERCEPTOR_READ_RANGE(ctx, context, MD4_CTX_sz);
REAL(MD4Update)(context, data, len);
if (context)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, MD4_CTX_sz);
}
INTERCEPTOR(void, MD4Final, unsigned char digest[16], void *context) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, MD4Final, digest, context);
if (context)
COMMON_INTERCEPTOR_READ_RANGE(ctx, context, MD4_CTX_sz);
REAL(MD4Final)(digest, context);
if (digest)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, digest, sizeof(unsigned char) * 16);
}
INTERCEPTOR(char *, MD4End, void *context, char *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, MD4End, context, buf);
if (context)
COMMON_INTERCEPTOR_READ_RANGE(ctx, context, MD4_CTX_sz);
char *ret = REAL(MD4End)(context, buf);
if (ret)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD4_return_length);
return ret;
}
INTERCEPTOR(char *, MD4File, const char *filename, char *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, MD4File, filename, buf);
if (filename)
COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1);
char *ret = REAL(MD4File)(filename, buf);
if (ret)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD4_return_length);
return ret;
}
INTERCEPTOR(char *, MD4Data, const unsigned char *data, unsigned int len,
char *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, MD4Data, data, len, buf);
if (data && len > 0)
COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len);
char *ret = REAL(MD4Data)(data, len, buf);
if (ret)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD4_return_length);
return ret;
}
#define INIT_MD4 \
COMMON_INTERCEPT_FUNCTION(MD4Init); \
COMMON_INTERCEPT_FUNCTION(MD4Update); \
COMMON_INTERCEPT_FUNCTION(MD4Final); \
COMMON_INTERCEPT_FUNCTION(MD4End); \
COMMON_INTERCEPT_FUNCTION(MD4File); \
COMMON_INTERCEPT_FUNCTION(MD4Data)
#else
#define INIT_MD4
#endif
#if SANITIZER_INTERCEPT_RMD160
INTERCEPTOR(void, RMD160Init, void *context) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, RMD160Init, context);
REAL(RMD160Init)(context);
if (context)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, RMD160_CTX_sz);
}
INTERCEPTOR(void, RMD160Update, void *context, const u8 *data, unsigned len) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, RMD160Update, context, data, len);
if (data && len > 0)
COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len);
if (context)
COMMON_INTERCEPTOR_READ_RANGE(ctx, context, RMD160_CTX_sz);
REAL(RMD160Update)(context, data, len);
if (context)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, RMD160_CTX_sz);
}
INTERCEPTOR(void, RMD160Final, u8 digest[20], void *context) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, RMD160Final, digest, context);
if (context)
COMMON_INTERCEPTOR_READ_RANGE(ctx, context, RMD160_CTX_sz);
REAL(RMD160Final)(digest, context);
if (digest)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, digest, sizeof(u8) * 20);
}
INTERCEPTOR(void, RMD160Transform, u32 state[5], u16 buffer[16]) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, RMD160Transform, state, buffer);
if (state)
COMMON_INTERCEPTOR_READ_RANGE(ctx, state, sizeof(u32) * 5);
if (buffer)
COMMON_INTERCEPTOR_READ_RANGE(ctx, buffer, sizeof(u32) * 16);
REAL(RMD160Transform)(state, buffer);
if (state)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, state, sizeof(u32) * 5);
}
INTERCEPTOR(char *, RMD160End, void *context, char *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, RMD160End, context, buf);
if (context)
COMMON_INTERCEPTOR_READ_RANGE(ctx, context, RMD160_CTX_sz);
char *ret = REAL(RMD160End)(context, buf);
if (ret)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, RMD160_return_length);
return ret;
}
INTERCEPTOR(char *, RMD160File, char *filename, char *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, RMD160File, filename, buf);
if (filename)
COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1);
char *ret = REAL(RMD160File)(filename, buf);
if (ret)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, RMD160_return_length);
return ret;
}
INTERCEPTOR(char *, RMD160FileChunk, char *filename, char *buf, OFF_T offset,
OFF_T length) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, RMD160FileChunk, filename, buf, offset, length);
if (filename)
COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1);
char *ret = REAL(RMD160FileChunk)(filename, buf, offset, length);
if (ret)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, RMD160_return_length);
return ret;
}
INTERCEPTOR(char *, RMD160Data, u8 *data, SIZE_T len, char *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, RMD160Data, data, len, buf);
if (data && len > 0)
COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len);
char *ret = REAL(RMD160Data)(data, len, buf);
if (ret)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, RMD160_return_length);
return ret;
}
#define INIT_RMD160 \
COMMON_INTERCEPT_FUNCTION(RMD160Init); \
COMMON_INTERCEPT_FUNCTION(RMD160Update); \
COMMON_INTERCEPT_FUNCTION(RMD160Final); \
COMMON_INTERCEPT_FUNCTION(RMD160Transform); \
COMMON_INTERCEPT_FUNCTION(RMD160End); \
COMMON_INTERCEPT_FUNCTION(RMD160File); \
COMMON_INTERCEPT_FUNCTION(RMD160FileChunk); \
COMMON_INTERCEPT_FUNCTION(RMD160Data)
#else
#define INIT_RMD160
#endif
#if SANITIZER_INTERCEPT_MD5
INTERCEPTOR(void, MD5Init, void *context) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, MD5Init, context);
REAL(MD5Init)(context);
if (context)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, MD5_CTX_sz);
}
INTERCEPTOR(void, MD5Update, void *context, const unsigned char *data,
unsigned int len) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, MD5Update, context, data, len);
if (data && len > 0)
COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len);
if (context)
COMMON_INTERCEPTOR_READ_RANGE(ctx, context, MD5_CTX_sz);
REAL(MD5Update)(context, data, len);
if (context)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, MD5_CTX_sz);
}
INTERCEPTOR(void, MD5Final, unsigned char digest[16], void *context) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, MD5Final, digest, context);
if (context)
COMMON_INTERCEPTOR_READ_RANGE(ctx, context, MD5_CTX_sz);
REAL(MD5Final)(digest, context);
if (digest)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, digest, sizeof(unsigned char) * 16);
}
INTERCEPTOR(char *, MD5End, void *context, char *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, MD5End, context, buf);
if (context)
COMMON_INTERCEPTOR_READ_RANGE(ctx, context, MD5_CTX_sz);
char *ret = REAL(MD5End)(context, buf);
if (ret)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD5_return_length);
return ret;
}
INTERCEPTOR(char *, MD5File, const char *filename, char *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, MD5File, filename, buf);
if (filename)
COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1);
char *ret = REAL(MD5File)(filename, buf);
if (ret)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD5_return_length);
return ret;
}
INTERCEPTOR(char *, MD5Data, const unsigned char *data, unsigned int len,
char *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, MD5Data, data, len, buf);
if (data && len > 0)
COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len);
char *ret = REAL(MD5Data)(data, len, buf);
if (ret)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD5_return_length);
return ret;
}
#define INIT_MD5 \
COMMON_INTERCEPT_FUNCTION(MD5Init); \
COMMON_INTERCEPT_FUNCTION(MD5Update); \
COMMON_INTERCEPT_FUNCTION(MD5Final); \
COMMON_INTERCEPT_FUNCTION(MD5End); \
COMMON_INTERCEPT_FUNCTION(MD5File); \
COMMON_INTERCEPT_FUNCTION(MD5Data)
#else
#define INIT_MD5
#endif
#if SANITIZER_INTERCEPT_FSEEK
INTERCEPTOR(int, fseek, __sanitizer_FILE *stream, long int offset, int whence) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fseek, stream, offset, whence);
return REAL(fseek)(stream, offset, whence);
}
INTERCEPTOR(int, fseeko, __sanitizer_FILE *stream, OFF_T offset, int whence) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fseeko, stream, offset, whence);
return REAL(fseeko)(stream, offset, whence);
}
INTERCEPTOR(long int, ftell, __sanitizer_FILE *stream) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, ftell, stream);
return REAL(ftell)(stream);
}
INTERCEPTOR(OFF_T, ftello, __sanitizer_FILE *stream) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, ftello, stream);
return REAL(ftello)(stream);
}
INTERCEPTOR(void, rewind, __sanitizer_FILE *stream) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, rewind, stream);
return REAL(rewind)(stream);
}
INTERCEPTOR(int, fgetpos, __sanitizer_FILE *stream, void *pos) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fgetpos, stream, pos);
int ret = REAL(fgetpos)(stream, pos);
if (pos && !ret)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pos, fpos_t_sz);
return ret;
}
INTERCEPTOR(int, fsetpos, __sanitizer_FILE *stream, const void *pos) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fsetpos, stream, pos);
if (pos)
COMMON_INTERCEPTOR_READ_RANGE(ctx, pos, fpos_t_sz);
return REAL(fsetpos)(stream, pos);
}
#define INIT_FSEEK \
COMMON_INTERCEPT_FUNCTION(fseek); \
COMMON_INTERCEPT_FUNCTION(fseeko); \
COMMON_INTERCEPT_FUNCTION(ftell); \
COMMON_INTERCEPT_FUNCTION(ftello); \
COMMON_INTERCEPT_FUNCTION(rewind); \
COMMON_INTERCEPT_FUNCTION(fgetpos); \
COMMON_INTERCEPT_FUNCTION(fsetpos)
#else
#define INIT_FSEEK
#endif
#if SANITIZER_INTERCEPT_MD2
INTERCEPTOR(void, MD2Init, void *context) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, MD2Init, context);
REAL(MD2Init)(context);
if (context)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, MD2_CTX_sz);
}
INTERCEPTOR(void, MD2Update, void *context, const unsigned char *data,
unsigned int len) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, MD2Update, context, data, len);
if (data && len > 0)
COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len);
if (context)
COMMON_INTERCEPTOR_READ_RANGE(ctx, context, MD2_CTX_sz);
REAL(MD2Update)(context, data, len);
if (context)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, MD2_CTX_sz);
}
INTERCEPTOR(void, MD2Final, unsigned char digest[16], void *context) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, MD2Final, digest, context);
if (context)
COMMON_INTERCEPTOR_READ_RANGE(ctx, context, MD2_CTX_sz);
REAL(MD2Final)(digest, context);
if (digest)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, digest, sizeof(unsigned char) * 16);
}
INTERCEPTOR(char *, MD2End, void *context, char *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, MD2End, context, buf);
if (context)
COMMON_INTERCEPTOR_READ_RANGE(ctx, context, MD2_CTX_sz);
char *ret = REAL(MD2End)(context, buf);
if (ret)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD2_return_length);
return ret;
}
INTERCEPTOR(char *, MD2File, const char *filename, char *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, MD2File, filename, buf);
if (filename)
COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1);
char *ret = REAL(MD2File)(filename, buf);
if (ret)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD2_return_length);
return ret;
}
INTERCEPTOR(char *, MD2Data, const unsigned char *data, unsigned int len,
char *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, MD2Data, data, len, buf);
if (data && len > 0)
COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len);
char *ret = REAL(MD2Data)(data, len, buf);
if (ret)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD2_return_length);
return ret;
}
#define INIT_MD2 \
COMMON_INTERCEPT_FUNCTION(MD2Init); \
COMMON_INTERCEPT_FUNCTION(MD2Update); \
COMMON_INTERCEPT_FUNCTION(MD2Final); \
COMMON_INTERCEPT_FUNCTION(MD2End); \
COMMON_INTERCEPT_FUNCTION(MD2File); \
COMMON_INTERCEPT_FUNCTION(MD2Data)
#else
#define INIT_MD2
#endif
#if SANITIZER_INTERCEPT_SHA2
#define SHA2_INTERCEPTORS(LEN, SHA2_STATE_T) \
INTERCEPTOR(void, SHA##LEN##_Init, void *context) { \
void *ctx; \
COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_Init, context); \
REAL(SHA##LEN##_Init)(context); \
if (context) \
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, SHA##LEN##_CTX_sz); \
} \
INTERCEPTOR(void, SHA##LEN##_Update, void *context, \
const u8 *data, SIZE_T len) { \
void *ctx; \
COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_Update, context, data, len); \
if (data && len > 0) \
COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len); \
if (context) \
COMMON_INTERCEPTOR_READ_RANGE(ctx, context, SHA##LEN##_CTX_sz); \
REAL(SHA##LEN##_Update)(context, data, len); \
if (context) \
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, SHA##LEN##_CTX_sz); \
} \
INTERCEPTOR(void, SHA##LEN##_Final, u8 digest[LEN/8], \
void *context) { \
void *ctx; \
CHECK_EQ(SHA##LEN##_digest_length, LEN/8); \
COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_Final, digest, context); \
if (context) \
COMMON_INTERCEPTOR_READ_RANGE(ctx, context, SHA##LEN##_CTX_sz); \
REAL(SHA##LEN##_Final)(digest, context); \
if (digest) \
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, digest, \
sizeof(digest[0]) * \
SHA##LEN##_digest_length); \
} \
INTERCEPTOR(char *, SHA##LEN##_End, void *context, char *buf) { \
void *ctx; \
COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_End, context, buf); \
if (context) \
COMMON_INTERCEPTOR_READ_RANGE(ctx, context, SHA##LEN##_CTX_sz); \
char *ret = REAL(SHA##LEN##_End)(context, buf); \
if (ret) \
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, SHA##LEN##_return_length); \
return ret; \
} \
INTERCEPTOR(char *, SHA##LEN##_File, const char *filename, char *buf) { \
void *ctx; \
COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_File, filename, buf); \
if (filename) \
COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1);\
char *ret = REAL(SHA##LEN##_File)(filename, buf); \
if (ret) \
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, SHA##LEN##_return_length); \
return ret; \
} \
INTERCEPTOR(char *, SHA##LEN##_FileChunk, const char *filename, char *buf, \
OFF_T offset, OFF_T length) { \
void *ctx; \
COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_FileChunk, filename, buf, offset, \
length); \
if (filename) \
COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1);\
char *ret = REAL(SHA##LEN##_FileChunk)(filename, buf, offset, length); \
if (ret) \
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, SHA##LEN##_return_length); \
return ret; \
} \
INTERCEPTOR(char *, SHA##LEN##_Data, u8 *data, SIZE_T len, char *buf) { \
void *ctx; \
COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_Data, data, len, buf); \
if (data && len > 0) \
COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len); \
char *ret = REAL(SHA##LEN##_Data)(data, len, buf); \
if (ret) \
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, SHA##LEN##_return_length); \
return ret; \
}
SHA2_INTERCEPTORS(224, u32)
SHA2_INTERCEPTORS(256, u32)
SHA2_INTERCEPTORS(384, u64)
SHA2_INTERCEPTORS(512, u64)
#define INIT_SHA2_INTECEPTORS(LEN) \
COMMON_INTERCEPT_FUNCTION(SHA##LEN##_Init); \
COMMON_INTERCEPT_FUNCTION(SHA##LEN##_Update); \
COMMON_INTERCEPT_FUNCTION(SHA##LEN##_Final); \
COMMON_INTERCEPT_FUNCTION(SHA##LEN##_End); \
COMMON_INTERCEPT_FUNCTION(SHA##LEN##_File); \
COMMON_INTERCEPT_FUNCTION(SHA##LEN##_FileChunk); \
COMMON_INTERCEPT_FUNCTION(SHA##LEN##_Data)
#define INIT_SHA2 \
INIT_SHA2_INTECEPTORS(224); \
INIT_SHA2_INTECEPTORS(256); \
INIT_SHA2_INTECEPTORS(384); \
INIT_SHA2_INTECEPTORS(512)
#undef SHA2_INTERCEPTORS
#else
#define INIT_SHA2
#endif
#if SANITIZER_INTERCEPT_VIS
INTERCEPTOR(char *, vis, char *dst, int c, int flag, int nextc) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, vis, dst, c, flag, nextc);
char *end = REAL(vis)(dst, c, flag, nextc);
// dst is NULL terminated and end points to the NULL char
if (dst && end)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, end - dst + 1);
return end;
}
INTERCEPTOR(char *, nvis, char *dst, SIZE_T dlen, int c, int flag, int nextc) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, nvis, dst, dlen, c, flag, nextc);
char *end = REAL(nvis)(dst, dlen, c, flag, nextc);
// nvis cannot make sure the dst is NULL terminated
if (dst && end)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, end - dst + 1);
return end;
}
INTERCEPTOR(int, strvis, char *dst, const char *src, int flag) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strvis, dst, src, flag);
if (src)
COMMON_INTERCEPTOR_READ_RANGE(ctx, src, internal_strlen(src) + 1);
int len = REAL(strvis)(dst, src, flag);
if (dst)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, len + 1);
return len;
}
INTERCEPTOR(int, stravis, char **dst, const char *src, int flag) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, stravis, dst, src, flag);
if (src)
COMMON_INTERCEPTOR_READ_RANGE(ctx, src, internal_strlen(src) + 1);
int len = REAL(stravis)(dst, src, flag);
if (dst) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, sizeof(char *));
if (*dst)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *dst, len + 1);
}
return len;
}
INTERCEPTOR(int, strnvis, char *dst, SIZE_T dlen, const char *src, int flag) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strnvis, dst, dlen, src, flag);
if (src)
COMMON_INTERCEPTOR_READ_RANGE(ctx, src, internal_strlen(src) + 1);
int len = REAL(strnvis)(dst, dlen, src, flag);
// The interface will be valid even if there is no space for NULL char
if (dst && len > 0)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, len + 1);
return len;
}
INTERCEPTOR(int, strvisx, char *dst, const char *src, SIZE_T len, int flag) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strvisx, dst, src, len, flag);
if (src)
COMMON_INTERCEPTOR_READ_RANGE(ctx, src, len);
int ret = REAL(strvisx)(dst, src, len, flag);
if (dst)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, ret + 1);
return ret;
}
INTERCEPTOR(int, strnvisx, char *dst, SIZE_T dlen, const char *src, SIZE_T len,
int flag) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strnvisx, dst, dlen, src, len, flag);
if (src)
COMMON_INTERCEPTOR_READ_RANGE(ctx, src, len);
int ret = REAL(strnvisx)(dst, dlen, src, len, flag);
if (dst && ret >= 0)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, ret + 1);
return ret;
}
INTERCEPTOR(int, strenvisx, char *dst, SIZE_T dlen, const char *src, SIZE_T len,
int flag, int *cerr_ptr) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strenvisx, dst, dlen, src, len, flag, cerr_ptr);
if (src)
COMMON_INTERCEPTOR_READ_RANGE(ctx, src, len);
// FIXME: only need to be checked when "flag | VIS_NOLOCALE" doesn't hold
// according to the implementation
if (cerr_ptr)
COMMON_INTERCEPTOR_READ_RANGE(ctx, cerr_ptr, sizeof(int));
int ret = REAL(strenvisx)(dst, dlen, src, len, flag, cerr_ptr);
if (dst && ret >= 0)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, ret + 1);
if (cerr_ptr)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cerr_ptr, sizeof(int));
return ret;
}
INTERCEPTOR(char *, svis, char *dst, int c, int flag, int nextc,
const char *extra) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, svis, dst, c, flag, nextc, extra);
if (extra)
COMMON_INTERCEPTOR_READ_RANGE(ctx, extra, internal_strlen(extra) + 1);
char *end = REAL(svis)(dst, c, flag, nextc, extra);
if (dst && end)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, end - dst + 1);
return end;
}
INTERCEPTOR(char *, snvis, char *dst, SIZE_T dlen, int c, int flag, int nextc,
const char *extra) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, snvis, dst, dlen, c, flag, nextc, extra);
if (extra)
COMMON_INTERCEPTOR_READ_RANGE(ctx, extra, internal_strlen(extra) + 1);
char *end = REAL(snvis)(dst, dlen, c, flag, nextc, extra);
if (dst && end)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst,
Min((SIZE_T)(end - dst + 1), dlen));
return end;
}
INTERCEPTOR(int, strsvis, char *dst, const char *src, int flag,
const char *extra) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strsvis, dst, src, flag, extra);
if (src)
COMMON_INTERCEPTOR_READ_RANGE(ctx, src, internal_strlen(src) + 1);
if (extra)
COMMON_INTERCEPTOR_READ_RANGE(ctx, extra, internal_strlen(extra) + 1);
int len = REAL(strsvis)(dst, src, flag, extra);
if (dst)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, len + 1);
return len;
}
INTERCEPTOR(int, strsnvis, char *dst, SIZE_T dlen, const char *src, int flag,
const char *extra) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strsnvis, dst, dlen, src, flag, extra);
if (src)
COMMON_INTERCEPTOR_READ_RANGE(ctx, src, internal_strlen(src) + 1);
if (extra)
COMMON_INTERCEPTOR_READ_RANGE(ctx, extra, internal_strlen(extra) + 1);
int len = REAL(strsnvis)(dst, dlen, src, flag, extra);
// The interface will be valid even if there is no space for NULL char
if (dst && len >= 0)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, len + 1);
return len;
}
INTERCEPTOR(int, strsvisx, char *dst, const char *src, SIZE_T len, int flag,
const char *extra) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strsvisx, dst, src, len, flag, extra);
if (src)
COMMON_INTERCEPTOR_READ_RANGE(ctx, src, len);
if (extra)
COMMON_INTERCEPTOR_READ_RANGE(ctx, extra, internal_strlen(extra) + 1);
int ret = REAL(strsvisx)(dst, src, len, flag, extra);
if (dst)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, ret + 1);
return ret;
}
INTERCEPTOR(int, strsnvisx, char *dst, SIZE_T dlen, const char *src, SIZE_T len,
int flag, const char *extra) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strsnvisx, dst, dlen, src, len, flag, extra);
if (src)
COMMON_INTERCEPTOR_READ_RANGE(ctx, src, len);
if (extra)
COMMON_INTERCEPTOR_READ_RANGE(ctx, extra, internal_strlen(extra) + 1);
int ret = REAL(strsnvisx)(dst, dlen, src, len, flag, extra);
if (dst && ret >= 0)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, ret + 1);
return ret;
}
INTERCEPTOR(int, strsenvisx, char *dst, SIZE_T dlen, const char *src,
SIZE_T len, int flag, const char *extra, int *cerr_ptr) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strsenvisx, dst, dlen, src, len, flag, extra,
cerr_ptr);
if (src)
COMMON_INTERCEPTOR_READ_RANGE(ctx, src, len);
if (extra)
COMMON_INTERCEPTOR_READ_RANGE(ctx, extra, internal_strlen(extra) + 1);
// FIXME: only need to be checked when "flag | VIS_NOLOCALE" doesn't hold
// according to the implementation
if (cerr_ptr)
COMMON_INTERCEPTOR_READ_RANGE(ctx, cerr_ptr, sizeof(int));
int ret = REAL(strsenvisx)(dst, dlen, src, len, flag, extra, cerr_ptr);
if (dst && ret >= 0)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, ret + 1);
if (cerr_ptr)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cerr_ptr, sizeof(int));
return ret;
}
INTERCEPTOR(int, unvis, char *cp, int c, int *astate, int flag) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, unvis, cp, c, astate, flag);
if (astate)
COMMON_INTERCEPTOR_READ_RANGE(ctx, astate, sizeof(*astate));
int ret = REAL(unvis)(cp, c, astate, flag);
if (ret == unvis_valid || ret == unvis_validpush) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cp, sizeof(*cp));
}
return ret;
}
INTERCEPTOR(int, strunvis, char *dst, const char *src) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strunvis, dst, src);
if (src)
COMMON_INTERCEPTOR_READ_RANGE(ctx, src, internal_strlen(src) + 1);
int ret = REAL(strunvis)(dst, src);
if (ret != -1)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, ret + 1);
return ret;
}
INTERCEPTOR(int, strnunvis, char *dst, SIZE_T dlen, const char *src) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strnunvis, dst, dlen, src);
if (src)
COMMON_INTERCEPTOR_READ_RANGE(ctx, src, internal_strlen(src) + 1);
int ret = REAL(strnunvis)(dst, dlen, src);
if (ret != -1)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, ret + 1);
return ret;
}
INTERCEPTOR(int, strunvisx, char *dst, const char *src, int flag) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strunvisx, dst, src, flag);
if (src)
COMMON_INTERCEPTOR_READ_RANGE(ctx, src, internal_strlen(src) + 1);
int ret = REAL(strunvisx)(dst, src, flag);
if (ret != -1)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, ret + 1);
return ret;
}
INTERCEPTOR(int, strnunvisx, char *dst, SIZE_T dlen, const char *src,
int flag) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strnunvisx, dst, dlen, src, flag);
if (src)
COMMON_INTERCEPTOR_READ_RANGE(ctx, src, internal_strlen(src) + 1);
int ret = REAL(strnunvisx)(dst, dlen, src, flag);
if (ret != -1)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, ret + 1);
return ret;
}
#define INIT_VIS \
COMMON_INTERCEPT_FUNCTION(vis); \
COMMON_INTERCEPT_FUNCTION(nvis); \
COMMON_INTERCEPT_FUNCTION(strvis); \
COMMON_INTERCEPT_FUNCTION(stravis); \
COMMON_INTERCEPT_FUNCTION(strnvis); \
COMMON_INTERCEPT_FUNCTION(strvisx); \
COMMON_INTERCEPT_FUNCTION(strnvisx); \
COMMON_INTERCEPT_FUNCTION(strenvisx); \
COMMON_INTERCEPT_FUNCTION(svis); \
COMMON_INTERCEPT_FUNCTION(snvis); \
COMMON_INTERCEPT_FUNCTION(strsvis); \
COMMON_INTERCEPT_FUNCTION(strsnvis); \
COMMON_INTERCEPT_FUNCTION(strsvisx); \
COMMON_INTERCEPT_FUNCTION(strsnvisx); \
COMMON_INTERCEPT_FUNCTION(strsenvisx); \
COMMON_INTERCEPT_FUNCTION(unvis); \
COMMON_INTERCEPT_FUNCTION(strunvis); \
COMMON_INTERCEPT_FUNCTION(strnunvis); \
COMMON_INTERCEPT_FUNCTION(strunvisx); \
COMMON_INTERCEPT_FUNCTION(strnunvisx)
#else
#define INIT_VIS
#endif
#if SANITIZER_INTERCEPT_CDB
INTERCEPTOR(struct __sanitizer_cdbr *, cdbr_open, const char *path, int flags) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, cdbr_open, path, flags);
if (path)
COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
struct __sanitizer_cdbr *cdbr = REAL(cdbr_open)(path, flags);
if (cdbr)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cdbr, sizeof(*cdbr));
return cdbr;
}
INTERCEPTOR(struct __sanitizer_cdbr *, cdbr_open_mem, void *base, SIZE_T size,
int flags, void (*unmap)(void *, void *, SIZE_T), void *cookie) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, cdbr_open_mem, base, size, flags, unmap,
cookie);
if (base && size)
COMMON_INTERCEPTOR_READ_RANGE(ctx, base, size);
struct __sanitizer_cdbr *cdbr =
REAL(cdbr_open_mem)(base, size, flags, unmap, cookie);
if (cdbr)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cdbr, sizeof(*cdbr));
return cdbr;
}
INTERCEPTOR(u32, cdbr_entries, struct __sanitizer_cdbr *cdbr) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, cdbr_entries, cdbr);
if (cdbr)
COMMON_INTERCEPTOR_READ_RANGE(ctx, cdbr, sizeof(*cdbr));
return REAL(cdbr_entries)(cdbr);
}
INTERCEPTOR(int, cdbr_get, struct __sanitizer_cdbr *cdbr, u32 index,
const void **data, SIZE_T *datalen) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, cdbr_get, cdbr, index, data, datalen);
if (cdbr)
COMMON_INTERCEPTOR_READ_RANGE(ctx, cdbr, sizeof(*cdbr));
int ret = REAL(cdbr_get)(cdbr, index, data, datalen);
if (!ret) {
if (data)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data, sizeof(*data));
if (datalen)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, datalen, sizeof(*datalen));
if (data && datalen)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *data, *datalen);
}
return ret;
}
INTERCEPTOR(int, cdbr_find, struct __sanitizer_cdbr *cdbr, const void *key,
SIZE_T keylen, const void **data, SIZE_T *datalen) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, cdbr_find, cdbr, key, keylen, data, datalen);
if (cdbr)
COMMON_INTERCEPTOR_READ_RANGE(ctx, cdbr, sizeof(*cdbr));
if (key)
COMMON_INTERCEPTOR_READ_RANGE(ctx, key, keylen);
int ret = REAL(cdbr_find)(cdbr, key, keylen, data, datalen);
if (!ret) {
if (data)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data, sizeof(*data));
if (datalen)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, datalen, sizeof(*datalen));
if (data && datalen)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *data, *datalen);
}
return ret;
}
INTERCEPTOR(void, cdbr_close, struct __sanitizer_cdbr *cdbr) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, cdbr_close, cdbr);
if (cdbr)
COMMON_INTERCEPTOR_READ_RANGE(ctx, cdbr, sizeof(*cdbr));
REAL(cdbr_close)(cdbr);
}
INTERCEPTOR(struct __sanitizer_cdbw *, cdbw_open) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, cdbw_open);
struct __sanitizer_cdbw *ret = REAL(cdbw_open)();
if (ret)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, sizeof(*ret));
return ret;
}
INTERCEPTOR(int, cdbw_put, struct __sanitizer_cdbw *cdbw, const void *key,
SIZE_T keylen, const void *data, SIZE_T datalen) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, cdbw_put, cdbw, key, keylen, data, datalen);
if (cdbw)
COMMON_INTERCEPTOR_READ_RANGE(ctx, cdbw, sizeof(*cdbw));
if (data && datalen)
COMMON_INTERCEPTOR_READ_RANGE(ctx, data, datalen);
if (key && keylen)
COMMON_INTERCEPTOR_READ_RANGE(ctx, key, keylen);
int ret = REAL(cdbw_put)(cdbw, key, keylen, data, datalen);
if (!ret && cdbw)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cdbw, sizeof(*cdbw));
return ret;
}
INTERCEPTOR(int, cdbw_put_data, struct __sanitizer_cdbw *cdbw, const void *data,
SIZE_T datalen, u32 *index) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, cdbw_put_data, cdbw, data, datalen, index);
if (cdbw)
COMMON_INTERCEPTOR_READ_RANGE(ctx, cdbw, sizeof(*cdbw));
if (data && datalen)
COMMON_INTERCEPTOR_READ_RANGE(ctx, data, datalen);
int ret = REAL(cdbw_put_data)(cdbw, data, datalen, index);
if (!ret) {
if (index)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, index, sizeof(*index));
if (cdbw)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cdbw, sizeof(*cdbw));
}
return ret;
}
INTERCEPTOR(int, cdbw_put_key, struct __sanitizer_cdbw *cdbw, const void *key,
SIZE_T keylen, u32 index) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, cdbw_put_key, cdbw, key, keylen, index);
if (cdbw)
COMMON_INTERCEPTOR_READ_RANGE(ctx, cdbw, sizeof(*cdbw));
if (key && keylen)
COMMON_INTERCEPTOR_READ_RANGE(ctx, key, keylen);
int ret = REAL(cdbw_put_key)(cdbw, key, keylen, index);
if (!ret && cdbw)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cdbw, sizeof(*cdbw));
return ret;
}
INTERCEPTOR(int, cdbw_output, struct __sanitizer_cdbw *cdbw, int output,
const char descr[16], u32 (*seedgen)(void)) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, cdbw_output, cdbw, output, descr, seedgen);
COMMON_INTERCEPTOR_FD_ACCESS(ctx, output);
if (cdbw)
COMMON_INTERCEPTOR_READ_RANGE(ctx, cdbw, sizeof(*cdbw));
if (descr)
COMMON_INTERCEPTOR_READ_RANGE(ctx, descr, internal_strnlen(descr, 16));
if (seedgen)
COMMON_INTERCEPTOR_READ_RANGE(ctx, (void *)seedgen, sizeof(seedgen));
int ret = REAL(cdbw_output)(cdbw, output, descr, seedgen);
if (!ret) {
if (cdbw)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cdbw, sizeof(*cdbw));
if (output >= 0)
COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, output);
}
return ret;
}
INTERCEPTOR(void, cdbw_close, struct __sanitizer_cdbw *cdbw) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, cdbw_close, cdbw);
if (cdbw)
COMMON_INTERCEPTOR_READ_RANGE(ctx, cdbw, sizeof(*cdbw));
REAL(cdbw_close)(cdbw);
}
#define INIT_CDB \
COMMON_INTERCEPT_FUNCTION(cdbr_open); \
COMMON_INTERCEPT_FUNCTION(cdbr_open_mem); \
COMMON_INTERCEPT_FUNCTION(cdbr_entries); \
COMMON_INTERCEPT_FUNCTION(cdbr_get); \
COMMON_INTERCEPT_FUNCTION(cdbr_find); \
COMMON_INTERCEPT_FUNCTION(cdbr_close); \
COMMON_INTERCEPT_FUNCTION(cdbw_open); \
COMMON_INTERCEPT_FUNCTION(cdbw_put); \
COMMON_INTERCEPT_FUNCTION(cdbw_put_data); \
COMMON_INTERCEPT_FUNCTION(cdbw_put_key); \
COMMON_INTERCEPT_FUNCTION(cdbw_output); \
COMMON_INTERCEPT_FUNCTION(cdbw_close)
#else
#define INIT_CDB
#endif
#if SANITIZER_INTERCEPT_GETFSENT
INTERCEPTOR(void *, getfsent) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getfsent);
void *ret = REAL(getfsent)();
if (ret)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, struct_fstab_sz);
return ret;
}
INTERCEPTOR(void *, getfsspec, const char *spec) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getfsspec, spec);
if (spec)
COMMON_INTERCEPTOR_READ_RANGE(ctx, spec, internal_strlen(spec) + 1);
void *ret = REAL(getfsspec)(spec);
if (ret)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, struct_fstab_sz);
return ret;
}
INTERCEPTOR(void *, getfsfile, const char *file) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getfsfile, file);
if (file)
COMMON_INTERCEPTOR_READ_RANGE(ctx, file, internal_strlen(file) + 1);
void *ret = REAL(getfsfile)(file);
if (ret)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, struct_fstab_sz);
return ret;
}
#define INIT_GETFSENT \
COMMON_INTERCEPT_FUNCTION(getfsent); \
COMMON_INTERCEPT_FUNCTION(getfsspec); \
COMMON_INTERCEPT_FUNCTION(getfsfile);
#else
#define INIT_GETFSENT
#endif
#if SANITIZER_INTERCEPT_ARC4RANDOM
INTERCEPTOR(void, arc4random_buf, void *buf, SIZE_T len) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, arc4random_buf, buf, len);
REAL(arc4random_buf)(buf, len);
if (buf && len)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, len);
}
INTERCEPTOR(void, arc4random_addrandom, u8 *dat, int datlen) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, arc4random_addrandom, dat, datlen);
if (dat && datlen)
COMMON_INTERCEPTOR_READ_RANGE(ctx, dat, datlen);
REAL(arc4random_addrandom)(dat, datlen);
}
#define INIT_ARC4RANDOM \
COMMON_INTERCEPT_FUNCTION(arc4random_buf); \
COMMON_INTERCEPT_FUNCTION(arc4random_addrandom);
#else
#define INIT_ARC4RANDOM
#endif
#if SANITIZER_INTERCEPT_POPEN
INTERCEPTOR(__sanitizer_FILE *, popen, const char *command, const char *type) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, popen, command, type);
if (command)
COMMON_INTERCEPTOR_READ_RANGE(ctx, command, internal_strlen(command) + 1);
if (type)
COMMON_INTERCEPTOR_READ_RANGE(ctx, type, internal_strlen(type) + 1);
__sanitizer_FILE *res = REAL(popen)(command, type);
COMMON_INTERCEPTOR_FILE_OPEN(ctx, res, nullptr);
if (res) unpoison_file(res);
return res;
}
#define INIT_POPEN COMMON_INTERCEPT_FUNCTION(popen)
#else
#define INIT_POPEN
#endif
#if SANITIZER_INTERCEPT_POPENVE
INTERCEPTOR(__sanitizer_FILE *, popenve, const char *path,
char *const *argv, char *const *envp, const char *type) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, popenve, path, argv, envp, type);
if (path)
COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
if (argv) {
for (char *const *pa = argv; ; ++pa) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, pa, sizeof(char **));
if (!*pa)
break;
COMMON_INTERCEPTOR_READ_RANGE(ctx, *pa, internal_strlen(*pa) + 1);
}
}
if (envp) {
for (char *const *pa = envp; ; ++pa) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, pa, sizeof(char **));
if (!*pa)
break;
COMMON_INTERCEPTOR_READ_RANGE(ctx, *pa, internal_strlen(*pa) + 1);
}
}
if (type)
COMMON_INTERCEPTOR_READ_RANGE(ctx, type, internal_strlen(type) + 1);
__sanitizer_FILE *res = REAL(popenve)(path, argv, envp, type);
COMMON_INTERCEPTOR_FILE_OPEN(ctx, res, nullptr);
if (res) unpoison_file(res);
return res;
}
#define INIT_POPENVE COMMON_INTERCEPT_FUNCTION(popenve)
#else
#define INIT_POPENVE
#endif
#if SANITIZER_INTERCEPT_PCLOSE
INTERCEPTOR(int, pclose, __sanitizer_FILE *fp) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, pclose, fp);
COMMON_INTERCEPTOR_FILE_CLOSE(ctx, fp);
const FileMetadata *m = GetInterceptorMetadata(fp);
int res = REAL(pclose)(fp);
if (m) {
COMMON_INTERCEPTOR_INITIALIZE_RANGE(*m->addr, *m->size);
DeleteInterceptorMetadata(fp);
}
return res;
}
#define INIT_PCLOSE COMMON_INTERCEPT_FUNCTION(pclose);
#else
#define INIT_PCLOSE
#endif
#if SANITIZER_INTERCEPT_FUNOPEN
typedef int (*funopen_readfn)(void *cookie, char *buf, int len);
typedef int (*funopen_writefn)(void *cookie, const char *buf, int len);
typedef OFF_T (*funopen_seekfn)(void *cookie, OFF_T offset, int whence);
typedef int (*funopen_closefn)(void *cookie);
struct WrappedFunopenCookie {
void *real_cookie;
funopen_readfn real_read;
funopen_writefn real_write;
funopen_seekfn real_seek;
funopen_closefn real_close;
};
static int wrapped_funopen_read(void *cookie, char *buf, int len) {
COMMON_INTERCEPTOR_UNPOISON_PARAM(3);
WrappedFunopenCookie *wrapped_cookie = (WrappedFunopenCookie *)cookie;
funopen_readfn real_read = wrapped_cookie->real_read;
return real_read(wrapped_cookie->real_cookie, buf, len);
}
static int wrapped_funopen_write(void *cookie, const char *buf, int len) {
COMMON_INTERCEPTOR_UNPOISON_PARAM(3);
WrappedFunopenCookie *wrapped_cookie = (WrappedFunopenCookie *)cookie;
funopen_writefn real_write = wrapped_cookie->real_write;
return real_write(wrapped_cookie->real_cookie, buf, len);
}
static OFF_T wrapped_funopen_seek(void *cookie, OFF_T offset, int whence) {
COMMON_INTERCEPTOR_UNPOISON_PARAM(3);
WrappedFunopenCookie *wrapped_cookie = (WrappedFunopenCookie *)cookie;
funopen_seekfn real_seek = wrapped_cookie->real_seek;
return real_seek(wrapped_cookie->real_cookie, offset, whence);
}
static int wrapped_funopen_close(void *cookie) {
COMMON_INTERCEPTOR_UNPOISON_PARAM(1);
WrappedFunopenCookie *wrapped_cookie = (WrappedFunopenCookie *)cookie;
funopen_closefn real_close = wrapped_cookie->real_close;
int res = real_close(wrapped_cookie->real_cookie);
InternalFree(wrapped_cookie);
return res;
}
INTERCEPTOR(__sanitizer_FILE *, funopen, void *cookie, funopen_readfn readfn,
funopen_writefn writefn, funopen_seekfn seekfn,
funopen_closefn closefn) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, funopen, cookie, readfn, writefn, seekfn,
closefn);
WrappedFunopenCookie *wrapped_cookie =
(WrappedFunopenCookie *)InternalAlloc(sizeof(WrappedFunopenCookie));
wrapped_cookie->real_cookie = cookie;
wrapped_cookie->real_read = readfn;
wrapped_cookie->real_write = writefn;
wrapped_cookie->real_seek = seekfn;
wrapped_cookie->real_close = closefn;
__sanitizer_FILE *res =
REAL(funopen)(wrapped_cookie,
readfn ? wrapped_funopen_read : nullptr,
writefn ? wrapped_funopen_write : nullptr,
seekfn ? wrapped_funopen_seek : nullptr,
closefn ? wrapped_funopen_close : nullptr);
if (res)
unpoison_file(res);
return res;
}
#define INIT_FUNOPEN COMMON_INTERCEPT_FUNCTION(funopen)
#else
#define INIT_FUNOPEN
#endif
#if SANITIZER_INTERCEPT_FUNOPEN2
typedef SSIZE_T (*funopen2_readfn)(void *cookie, void *buf, SIZE_T len);
typedef SSIZE_T (*funopen2_writefn)(void *cookie, const void *buf, SIZE_T len);
typedef OFF_T (*funopen2_seekfn)(void *cookie, OFF_T offset, int whence);
typedef int (*funopen2_flushfn)(void *cookie);
typedef int (*funopen2_closefn)(void *cookie);
struct WrappedFunopen2Cookie {
void *real_cookie;
funopen2_readfn real_read;
funopen2_writefn real_write;
funopen2_seekfn real_seek;
funopen2_flushfn real_flush;
funopen2_closefn real_close;
};
static SSIZE_T wrapped_funopen2_read(void *cookie, void *buf, SIZE_T len) {
COMMON_INTERCEPTOR_UNPOISON_PARAM(3);
WrappedFunopen2Cookie *wrapped_cookie = (WrappedFunopen2Cookie *)cookie;
funopen2_readfn real_read = wrapped_cookie->real_read;
return real_read(wrapped_cookie->real_cookie, buf, len);
}
static SSIZE_T wrapped_funopen2_write(void *cookie, const void *buf,
SIZE_T len) {
COMMON_INTERCEPTOR_UNPOISON_PARAM(3);
WrappedFunopen2Cookie *wrapped_cookie = (WrappedFunopen2Cookie *)cookie;
funopen2_writefn real_write = wrapped_cookie->real_write;
return real_write(wrapped_cookie->real_cookie, buf, len);
}
static OFF_T wrapped_funopen2_seek(void *cookie, OFF_T offset, int whence) {
COMMON_INTERCEPTOR_UNPOISON_PARAM(3);
WrappedFunopen2Cookie *wrapped_cookie = (WrappedFunopen2Cookie *)cookie;
funopen2_seekfn real_seek = wrapped_cookie->real_seek;
return real_seek(wrapped_cookie->real_cookie, offset, whence);
}
static int wrapped_funopen2_flush(void *cookie) {
COMMON_INTERCEPTOR_UNPOISON_PARAM(1);
WrappedFunopen2Cookie *wrapped_cookie = (WrappedFunopen2Cookie *)cookie;
funopen2_flushfn real_flush = wrapped_cookie->real_flush;
return real_flush(wrapped_cookie->real_cookie);
}
static int wrapped_funopen2_close(void *cookie) {
COMMON_INTERCEPTOR_UNPOISON_PARAM(1);
WrappedFunopen2Cookie *wrapped_cookie = (WrappedFunopen2Cookie *)cookie;
funopen2_closefn real_close = wrapped_cookie->real_close;
int res = real_close(wrapped_cookie->real_cookie);
InternalFree(wrapped_cookie);
return res;
}
INTERCEPTOR(__sanitizer_FILE *, funopen2, void *cookie, funopen2_readfn readfn,
funopen2_writefn writefn, funopen2_seekfn seekfn,
funopen2_flushfn flushfn, funopen2_closefn closefn) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, funopen2, cookie, readfn, writefn, seekfn,
flushfn, closefn);
WrappedFunopen2Cookie *wrapped_cookie =
(WrappedFunopen2Cookie *)InternalAlloc(sizeof(WrappedFunopen2Cookie));
wrapped_cookie->real_cookie = cookie;
wrapped_cookie->real_read = readfn;
wrapped_cookie->real_write = writefn;
wrapped_cookie->real_seek = seekfn;
wrapped_cookie->real_flush = flushfn;
wrapped_cookie->real_close = closefn;
__sanitizer_FILE *res =
REAL(funopen2)(wrapped_cookie,
readfn ? wrapped_funopen2_read : nullptr,
writefn ? wrapped_funopen2_write : nullptr,
seekfn ? wrapped_funopen2_seek : nullptr,
flushfn ? wrapped_funopen2_flush : nullptr,
closefn ? wrapped_funopen2_close : nullptr);
if (res)
unpoison_file(res);
return res;
}
#define INIT_FUNOPEN2 COMMON_INTERCEPT_FUNCTION(funopen2)
#else
#define INIT_FUNOPEN2
#endif
#if SANITIZER_INTERCEPT_FDEVNAME
INTERCEPTOR(char *, fdevname, int fd) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fdevname, fd);
COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
char *name = REAL(fdevname)(fd);
if (name) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, name, internal_strlen(name) + 1);
if (fd > 0)
COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
}
return name;
}
INTERCEPTOR(char *, fdevname_r, int fd, char *buf, SIZE_T len) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fdevname_r, fd, buf, len);
COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
char *name = REAL(fdevname_r)(fd, buf, len);
if (name && buf && len > 0) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, internal_strlen(buf) + 1);
if (fd > 0)
COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
}
return name;
}
#define INIT_FDEVNAME \
COMMON_INTERCEPT_FUNCTION(fdevname); \
COMMON_INTERCEPT_FUNCTION(fdevname_r);
#else
#define INIT_FDEVNAME
#endif
#if SANITIZER_INTERCEPT_GETUSERSHELL
INTERCEPTOR(char *, getusershell) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getusershell);
char *res = REAL(getusershell)();
if (res)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
return res;
}
#define INIT_GETUSERSHELL COMMON_INTERCEPT_FUNCTION(getusershell);
#else
#define INIT_GETUSERSHELL
#endif
#if SANITIZER_INTERCEPT_SL_INIT
INTERCEPTOR(void *, sl_init) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, sl_init);
void *res = REAL(sl_init)();
if (res)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, __sanitizer::struct_StringList_sz);
return res;
}
INTERCEPTOR(int, sl_add, void *sl, char *item) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, sl_add, sl, item);
if (sl)
COMMON_INTERCEPTOR_READ_RANGE(ctx, sl, __sanitizer::struct_StringList_sz);
if (item)
COMMON_INTERCEPTOR_READ_RANGE(ctx, item, internal_strlen(item) + 1);
int res = REAL(sl_add)(sl, item);
if (!res)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, sl, __sanitizer::struct_StringList_sz);
return res;
}
INTERCEPTOR(char *, sl_find, void *sl, const char *item) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, sl_find, sl, item);
if (sl)
COMMON_INTERCEPTOR_READ_RANGE(ctx, sl, __sanitizer::struct_StringList_sz);
if (item)
COMMON_INTERCEPTOR_READ_RANGE(ctx, item, internal_strlen(item) + 1);
char *res = REAL(sl_find)(sl, item);
if (res)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
return res;
}
INTERCEPTOR(void, sl_free, void *sl, int freeall) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, sl_free, sl, freeall);
if (sl)
COMMON_INTERCEPTOR_READ_RANGE(ctx, sl, __sanitizer::struct_StringList_sz);
REAL(sl_free)(sl, freeall);
}
#define INIT_SL_INIT \
COMMON_INTERCEPT_FUNCTION(sl_init); \
COMMON_INTERCEPT_FUNCTION(sl_add); \
COMMON_INTERCEPT_FUNCTION(sl_find); \
COMMON_INTERCEPT_FUNCTION(sl_free);
#else
#define INIT_SL_INIT
#endif
#if SANITIZER_INTERCEPT_GETRANDOM
INTERCEPTOR(SSIZE_T, getrandom, void *buf, SIZE_T buflen, unsigned int flags) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getrandom, buf, buflen, flags);
SSIZE_T n = REAL(getrandom)(buf, buflen, flags);
if (n > 0) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, n);
}
return n;
}
#define INIT_GETRANDOM COMMON_INTERCEPT_FUNCTION(getrandom)
#else
#define INIT_GETRANDOM
#endif
#if SANITIZER_INTERCEPT_CRYPT
INTERCEPTOR(char *, crypt, char *key, char *salt) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, crypt, key, salt);
COMMON_INTERCEPTOR_READ_RANGE(ctx, key, internal_strlen(key) + 1);
COMMON_INTERCEPTOR_READ_RANGE(ctx, salt, internal_strlen(salt) + 1);
char *res = REAL(crypt)(key, salt);
if (res != nullptr)
COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1);
return res;
}
#define INIT_CRYPT COMMON_INTERCEPT_FUNCTION(crypt);
#else
#define INIT_CRYPT
#endif
#if SANITIZER_INTERCEPT_CRYPT_R
INTERCEPTOR(char *, crypt_r, char *key, char *salt, void *data) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, crypt_r, key, salt, data);
COMMON_INTERCEPTOR_READ_RANGE(ctx, key, internal_strlen(key) + 1);
COMMON_INTERCEPTOR_READ_RANGE(ctx, salt, internal_strlen(salt) + 1);
char *res = REAL(crypt_r)(key, salt, data);
if (res != nullptr) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data,
__sanitizer::struct_crypt_data_sz);
COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1);
}
return res;
}
#define INIT_CRYPT_R COMMON_INTERCEPT_FUNCTION(crypt_r);
#else
#define INIT_CRYPT_R
#endif
#if SANITIZER_INTERCEPT_GETENTROPY
INTERCEPTOR(int, getentropy, void *buf, SIZE_T buflen) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getentropy, buf, buflen);
int r = REAL(getentropy)(buf, buflen);
if (r == 0) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, buflen);
}
return r;
}
#define INIT_GETENTROPY COMMON_INTERCEPT_FUNCTION(getentropy)
#else
#define INIT_GETENTROPY
#endif
#if SANITIZER_INTERCEPT_QSORT_R
typedef int (*qsort_r_compar_f)(const void *, const void *, void *);
struct qsort_r_compar_params {
SIZE_T size;
qsort_r_compar_f compar;
void *arg;
};
static int wrapped_qsort_r_compar(const void *a, const void *b, void *arg) {
qsort_r_compar_params *params = (qsort_r_compar_params *)arg;
COMMON_INTERCEPTOR_UNPOISON_PARAM(3);
COMMON_INTERCEPTOR_INITIALIZE_RANGE(a, params->size);
COMMON_INTERCEPTOR_INITIALIZE_RANGE(b, params->size);
return params->compar(a, b, params->arg);
}
INTERCEPTOR(void, qsort_r, void *base, SIZE_T nmemb, SIZE_T size,
qsort_r_compar_f compar, void *arg) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, qsort_r, base, nmemb, size, compar, arg);
// Run the comparator over all array elements to detect any memory issues.
if (nmemb > 1) {
for (SIZE_T i = 0; i < nmemb - 1; ++i) {
void *p = (void *)((char *)base + i * size);
void *q = (void *)((char *)base + (i + 1) * size);
COMMON_INTERCEPTOR_UNPOISON_PARAM(3);
compar(p, q, arg);
}
}
qsort_r_compar_params params = {size, compar, arg};
REAL(qsort_r)(base, nmemb, size, wrapped_qsort_r_compar, &params);
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, base, nmemb * size);
}
# define INIT_QSORT_R COMMON_INTERCEPT_FUNCTION(qsort_r)
#else
# define INIT_QSORT_R
#endif
#if SANITIZER_INTERCEPT_QSORT && SANITIZER_INTERCEPT_QSORT_R
INTERCEPTOR(void, qsort, void *base, SIZE_T nmemb, SIZE_T size,
qsort_r_compar_f compar) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, qsort, base, nmemb, size, compar);
WRAP(qsort_r)(base, nmemb, size, compar, nullptr);
}
# define INIT_QSORT COMMON_INTERCEPT_FUNCTION(qsort)
#elif SANITIZER_INTERCEPT_QSORT && !SANITIZER_INTERCEPT_QSORT_R
// Glibc qsort uses a temporary buffer allocated either on stack or on heap.
// Poisoned memory from there may get copied into the comparator arguments,
// where it needs to be dealt with. But even that is not enough - the results of
// the sort may be copied into the input/output array based on the results of
// the comparator calls, but directly from the temp memory, bypassing the
// unpoisoning done in wrapped_qsort_compar. We deal with this by, again,
// unpoisoning the entire array after the sort is done.
//
// We can not check that the entire array is initialized at the beginning. IMHO,
// it's fine for parts of the sorted objects to contain uninitialized memory,
// ex. as padding in structs.
typedef int (*qsort_compar_f)(const void *, const void *);
static THREADLOCAL qsort_compar_f qsort_compar;
static THREADLOCAL SIZE_T qsort_size;
static int wrapped_qsort_compar(const void *a, const void *b) {
COMMON_INTERCEPTOR_UNPOISON_PARAM(2);
COMMON_INTERCEPTOR_INITIALIZE_RANGE(a, qsort_size);
COMMON_INTERCEPTOR_INITIALIZE_RANGE(b, qsort_size);
return qsort_compar(a, b);
}
INTERCEPTOR(void, qsort, void *base, SIZE_T nmemb, SIZE_T size,
qsort_compar_f compar) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, qsort, base, nmemb, size, compar);
// Run the comparator over all array elements to detect any memory issues.
if (nmemb > 1) {
for (SIZE_T i = 0; i < nmemb - 1; ++i) {
void *p = (void *)((char *)base + i * size);
void *q = (void *)((char *)base + (i + 1) * size);
COMMON_INTERCEPTOR_UNPOISON_PARAM(2);
compar(p, q);
}
}
qsort_compar_f old_compar = qsort_compar;
SIZE_T old_size = qsort_size;
// Handle qsort() implementations that recurse using an
// interposable function call:
bool already_wrapped = compar == wrapped_qsort_compar;
if (already_wrapped) {
// This case should only happen if the qsort() implementation calls itself
// using a preemptible function call (e.g. the FreeBSD libc version).
// Check that the size and comparator arguments are as expected.
CHECK_NE(compar, qsort_compar);
CHECK_EQ(qsort_size, size);
} else {
qsort_compar = compar;
qsort_size = size;
}
REAL(qsort)(base, nmemb, size, wrapped_qsort_compar);
if (!already_wrapped) {
qsort_compar = old_compar;
qsort_size = old_size;
}
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, base, nmemb * size);
}
# define INIT_QSORT COMMON_INTERCEPT_FUNCTION(qsort)
#else
# define INIT_QSORT
#endif
#if SANITIZER_INTERCEPT_BSEARCH
typedef int (*bsearch_compar_f)(const void *, const void *);
struct bsearch_compar_params {
const void *key;
bsearch_compar_f compar;
};
static int wrapped_bsearch_compar(const void *key, const void *b) {
const bsearch_compar_params *params = (const bsearch_compar_params *)key;
COMMON_INTERCEPTOR_UNPOISON_PARAM(2);
return params->compar(params->key, b);
}
INTERCEPTOR(void *, bsearch, const void *key, const void *base, SIZE_T nmemb,
SIZE_T size, bsearch_compar_f compar) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, bsearch, key, base, nmemb, size, compar);
bsearch_compar_params params = {key, compar};
return REAL(bsearch)(&params, base, nmemb, size, wrapped_bsearch_compar);
}
# define INIT_BSEARCH COMMON_INTERCEPT_FUNCTION(bsearch)
#else
# define INIT_BSEARCH
#endif
#if SANITIZER_INTERCEPT_SIGALTSTACK
INTERCEPTOR(int, sigaltstack, void *ss, void *oss) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, sigaltstack, ss, oss);
int r = REAL(sigaltstack)(ss, oss);
if (r == 0 && oss != nullptr) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, oss, struct_stack_t_sz);
}
return r;
}
#define INIT_SIGALTSTACK COMMON_INTERCEPT_FUNCTION(sigaltstack)
#else
#define INIT_SIGALTSTACK
#endif
#if SANITIZER_INTERCEPT_PROCCTL
INTERCEPTOR(int, procctl, int idtype, u64 id, int cmd, uptr data) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, procctl, idtype, id, cmd, data);
static const int PROC_REAP_ACQUIRE = 2;
static const int PROC_REAP_RELEASE = 3;
static const int PROC_REAP_STATUS = 4;
static const int PROC_REAP_GETPIDS = 5;
static const int PROC_REAP_KILL = 6;
if (cmd < PROC_REAP_ACQUIRE || cmd > PROC_REAP_KILL) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, (void *)data, sizeof(int));
} else {
// reap_acquire/reap_release bears no arguments.
if (cmd > PROC_REAP_RELEASE) {
unsigned int reapsz;
switch (cmd) {
case PROC_REAP_STATUS:
reapsz = struct_procctl_reaper_status_sz;
break;
case PROC_REAP_GETPIDS:
reapsz = struct_procctl_reaper_pids_sz;
break;
case PROC_REAP_KILL:
reapsz = struct_procctl_reaper_kill_sz;
break;
}
COMMON_INTERCEPTOR_READ_RANGE(ctx, (void *)data, reapsz);
}
}
return REAL(procctl)(idtype, id, cmd, data);
}
#define INIT_PROCCTL COMMON_INTERCEPT_FUNCTION(procctl)
#else
#define INIT_PROCCTL
#endif
#if SANITIZER_INTERCEPT_UNAME
INTERCEPTOR(int, uname, struct utsname *utsname) {
#if SANITIZER_LINUX
if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
return internal_uname(utsname);
#endif
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, uname, utsname);
int res = REAL(uname)(utsname);
if (!res)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, utsname,
__sanitizer::struct_utsname_sz);
return res;
}
#define INIT_UNAME COMMON_INTERCEPT_FUNCTION(uname)
#else
#define INIT_UNAME
#endif
#if SANITIZER_INTERCEPT___XUNAME
// FreeBSD's <sys/utsname.h> define uname() as
// static __inline int uname(struct utsname *name) {
// return __xuname(SYS_NMLN, (void*)name);
// }
INTERCEPTOR(int, __xuname, int size, void *utsname) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, __xuname, size, utsname);
int res = REAL(__xuname)(size, utsname);
if (!res)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, utsname,
__sanitizer::struct_utsname_sz);
return res;
}
#define INIT___XUNAME COMMON_INTERCEPT_FUNCTION(__xuname)
#else
#define INIT___XUNAME
#endif
#include "sanitizer_common_interceptors_netbsd_compat.inc"
static void InitializeCommonInterceptors() {
#if SI_POSIX
static u64 metadata_mem[sizeof(MetadataHashMap) / sizeof(u64) + 1];
interceptor_metadata_map = new ((void *)&metadata_mem) MetadataHashMap();
#endif
INIT_MMAP;
INIT_MMAP64;
INIT_TEXTDOMAIN;
INIT_STRLEN;
INIT_STRNLEN;
INIT_STRNDUP;
INIT___STRNDUP;
INIT_STRCMP;
INIT_STRNCMP;
INIT_STRCASECMP;
INIT_STRNCASECMP;
INIT_STRSTR;
INIT_STRCASESTR;
INIT_STRCHR;
INIT_STRCHRNUL;
INIT_STRRCHR;
INIT_STRSPN;
INIT_STRTOK;
INIT_STRPBRK;
INIT_STRXFRM;
INIT___STRXFRM_L;
INIT_MEMSET;
INIT_MEMMOVE;
INIT_MEMCPY;
INIT_MEMCHR;
INIT_MEMCMP;
INIT_BCMP;
INIT_MEMRCHR;
INIT_MEMMEM;
INIT_READ;
INIT_FREAD;
INIT_PREAD;
INIT_PREAD64;
INIT_READV;
INIT_PREADV;
INIT_PREADV64;
INIT_WRITE;
INIT_FWRITE;
INIT_PWRITE;
INIT_PWRITE64;
INIT_WRITEV;
INIT_PWRITEV;
INIT_PWRITEV64;
INIT_FGETS;
INIT_FPUTS;
INIT_PUTS;
INIT_PRCTL;
INIT_LOCALTIME_AND_FRIENDS;
INIT_STRPTIME;
INIT_SCANF;
INIT_ISOC99_SCANF;
INIT_PRINTF;
INIT_PRINTF_L;
INIT_ISOC99_PRINTF;
INIT_FREXP;
INIT_FREXPF_FREXPL;
INIT_GETPWNAM_AND_FRIENDS;
INIT_GETPWNAM_R_AND_FRIENDS;
INIT_GETPWENT;
INIT_FGETPWENT;
INIT_GETPWENT_R;
INIT_FGETPWENT_R;
INIT_FGETGRENT_R;
INIT_SETPWENT;
INIT_CLOCK_GETTIME;
INIT_CLOCK_GETCPUCLOCKID;
INIT_GETITIMER;
INIT_TIME;
INIT_GLOB;
INIT_GLOB64;
INIT___B64_TO;
INIT___DN_EXPAND;
INIT_POSIX_SPAWN;
INIT_WAIT;
INIT_WAIT4;
INIT_INET;
INIT_PTHREAD_GETSCHEDPARAM;
INIT_GETADDRINFO;
INIT_GETNAMEINFO;
INIT_GETSOCKNAME;
INIT_GETHOSTBYNAME;
INIT_GETHOSTBYNAME2;
INIT_GETHOSTBYNAME_R;
INIT_GETHOSTBYNAME2_R;
INIT_GETHOSTBYADDR_R;
INIT_GETHOSTENT_R;
INIT_GETSOCKOPT;
INIT_ACCEPT;
INIT_ACCEPT4;
INIT_PACCEPT;
INIT_MODF;
INIT_RECVMSG;
INIT_SENDMSG;
INIT_RECVMMSG;
INIT_SENDMMSG;
INIT_SYSMSG;
INIT_GETPEERNAME;
INIT_IOCTL;
INIT_INET_ATON;
INIT_SYSINFO;
INIT_READDIR;
INIT_READDIR64;
INIT_PTRACE;
INIT_SETLOCALE;
INIT_GETCWD;
INIT_GET_CURRENT_DIR_NAME;
INIT_STRTOIMAX;
INIT_MBSTOWCS;
INIT_MBSNRTOWCS;
INIT_WCSTOMBS;
INIT_WCSNRTOMBS;
INIT_WCRTOMB;
INIT_WCTOMB;
INIT_TCGETATTR;
INIT_REALPATH;
INIT_CANONICALIZE_FILE_NAME;
INIT_CONFSTR;
INIT_SCHED_GETAFFINITY;
INIT_SCHED_GETPARAM;
INIT_STRERROR;
INIT_STRERROR_R;
INIT_XPG_STRERROR_R;
INIT_SCANDIR;
INIT_SCANDIR64;
INIT_GETGROUPS;
INIT_POLL;
INIT_PPOLL;
INIT_WORDEXP;
INIT_SIGWAIT;
INIT_SIGWAITINFO;
INIT_SIGTIMEDWAIT;
INIT_SIGSETOPS;
INIT_SIGSET_LOGICOPS;
INIT_SIGPENDING;
INIT_SIGPROCMASK;
INIT_PTHREAD_SIGMASK;
INIT_BACKTRACE;
INIT__EXIT;
INIT_PTHREAD_MUTEX_LOCK;
INIT_PTHREAD_MUTEX_UNLOCK;
INIT___PTHREAD_MUTEX_LOCK;
INIT___PTHREAD_MUTEX_UNLOCK;
INIT___LIBC_MUTEX_LOCK;
INIT___LIBC_MUTEX_UNLOCK;
INIT___LIBC_THR_SETCANCELSTATE;
INIT_GETMNTENT;
INIT_GETMNTENT_R;
INIT_STATFS;
INIT_STATFS64;
INIT_STATVFS;
INIT_STATVFS64;
INIT_INITGROUPS;
INIT_ETHER_NTOA_ATON;
INIT_ETHER_HOST;
INIT_ETHER_R;
INIT_SHMCTL;
INIT_RANDOM_R;
INIT_PTHREAD_ATTR_GET;
INIT_PTHREAD_ATTR_GET_SCHED;
INIT_PTHREAD_ATTR_GETINHERITSCHED;
INIT_PTHREAD_ATTR_GETAFFINITY_NP;
INIT_PTHREAD_GETAFFINITY_NP;
INIT_PTHREAD_MUTEXATTR_GETPSHARED;
INIT_PTHREAD_MUTEXATTR_GETTYPE;
INIT_PTHREAD_MUTEXATTR_GETPROTOCOL;
INIT_PTHREAD_MUTEXATTR_GETPRIOCEILING;
INIT_PTHREAD_MUTEXATTR_GETROBUST;
INIT_PTHREAD_MUTEXATTR_GETROBUST_NP;
INIT_PTHREAD_RWLOCKATTR_GETPSHARED;
INIT_PTHREAD_RWLOCKATTR_GETKIND_NP;
INIT_PTHREAD_CONDATTR_GETPSHARED;
INIT_PTHREAD_CONDATTR_GETCLOCK;
INIT_PTHREAD_BARRIERATTR_GETPSHARED;
INIT_TMPNAM;
INIT_TMPNAM_R;
INIT_PTSNAME;
INIT_PTSNAME_R;
INIT_TTYNAME;
INIT_TTYNAME_R;
INIT_TEMPNAM;
INIT_PTHREAD_SETNAME_NP;
INIT_PTHREAD_GETNAME_NP;
INIT_SINCOS;
INIT_REMQUO;
INIT_REMQUOL;
INIT_LGAMMA;
INIT_LGAMMAL;
INIT_LGAMMA_R;
INIT_LGAMMAL_R;
INIT_DRAND48_R;
INIT_RAND_R;
INIT_GETLINE;
INIT_ICONV;
INIT_TIMES;
INIT_TLS_GET_ADDR;
INIT_LISTXATTR;
INIT_GETXATTR;
INIT_GETRESID;
INIT_GETIFADDRS;
INIT_IF_INDEXTONAME;
INIT_CAPGET;
INIT_AEABI_MEM;
INIT___BZERO;
INIT_BZERO;
INIT_FTIME;
INIT_XDR;
INIT_XDRREC_LINUX;
INIT_TSEARCH;
INIT_LIBIO_INTERNALS;
INIT_FOPEN;
INIT_FOPEN64;
INIT_FLOPEN;
INIT_OPEN_MEMSTREAM;
INIT_OBSTACK;
INIT_FFLUSH;
INIT_FCLOSE;
INIT_DLOPEN_DLCLOSE;
INIT_GETPASS;
INIT_TIMERFD;
INIT_MLOCKX;
INIT_FOPENCOOKIE;
INIT_SEM;
INIT_PTHREAD_SETCANCEL;
INIT_MINCORE;
INIT_PROCESS_VM_READV;
INIT_CTERMID;
INIT_CTERMID_R;
INIT_RECV_RECVFROM;
INIT_SEND_SENDTO;
INIT_STAT;
INIT_STAT64;
INIT_EVENTFD_READ_WRITE;
INIT_LSTAT;
INIT_LSTAT64;
INIT___XSTAT;
INIT___XSTAT64;
INIT___LXSTAT;
INIT___LXSTAT64;
// FIXME: add other *stat interceptors.
INIT_UTMP;
INIT_UTMPX;
INIT_GETLOADAVG;
INIT_WCSLEN;
INIT_WCSCAT;
INIT_WCSDUP;
INIT_WCSXFRM;
INIT___WCSXFRM_L;
INIT_ACCT;
INIT_USER_FROM_UID;
INIT_UID_FROM_USER;
INIT_GROUP_FROM_GID;
INIT_GID_FROM_GROUP;
INIT_ACCESS;
INIT_FACCESSAT;
INIT_GETGROUPLIST;
INIT_GETGROUPMEMBERSHIP;
INIT_READLINK;
INIT_READLINKAT;
INIT_NAME_TO_HANDLE_AT;
INIT_OPEN_BY_HANDLE_AT;
INIT_STRLCPY;
INIT_DEVNAME;
INIT_DEVNAME_R;
INIT_FGETLN;
INIT_STRMODE;
INIT_TTYENT;
INIT_PROTOENT;
INIT_PROTOENT_R;
INIT_NETENT;
INIT_GETMNTINFO;
INIT_MI_VECTOR_HASH;
INIT_SETVBUF;
INIT_GETVFSSTAT;
INIT_REGEX;
INIT_REGEXSUB;
INIT_FTS;
INIT_SYSCTL;
INIT_ASYSCTL;
INIT_SYSCTLGETMIBINFO;
INIT_NL_LANGINFO;
INIT_MODCTL;
INIT_STRTONUM;
INIT_FPARSELN;
INIT_STATVFS1;
INIT_STRTOI;
INIT_CAPSICUM;
INIT_SHA1;
INIT_MD4;
INIT_RMD160;
INIT_MD5;
INIT_FSEEK;
INIT_MD2;
INIT_SHA2;
INIT_VIS;
INIT_CDB;
INIT_GETFSENT;
INIT_ARC4RANDOM;
INIT_POPEN;
INIT_POPENVE;
INIT_PCLOSE;
INIT_FUNOPEN;
INIT_FUNOPEN2;
INIT_FDEVNAME;
INIT_GETUSERSHELL;
INIT_SL_INIT;
INIT_GETRANDOM;
INIT_CRYPT;
INIT_CRYPT_R;
INIT_GETENTROPY;
INIT_QSORT;
INIT_QSORT_R;
INIT_BSEARCH;
INIT_SIGALTSTACK;
INIT_PROCCTL
INIT_UNAME;
INIT___XUNAME;
INIT___PRINTF_CHK;
}
diff --git a/libcxx/include/__config b/libcxx/include/__config
index 589b5c3b2241..5f62b974170f 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -1,1231 +1,1231 @@
// -*- C++ -*-
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef _LIBCPP___CONFIG
#define _LIBCPP___CONFIG
#include <__config_site>
#if defined(_MSC_VER) && !defined(__clang__)
# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# define _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER
# endif
#endif
#ifndef _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER
# pragma GCC system_header
#endif
#if defined(__apple_build_version__)
# define _LIBCPP_COMPILER_CLANG_BASED
# define _LIBCPP_APPLE_CLANG_VER (__apple_build_version__ / 10000)
#elif defined(__clang__)
# define _LIBCPP_COMPILER_CLANG_BASED
# define _LIBCPP_CLANG_VER (__clang_major__ * 100 + __clang_minor__)
#elif defined(__GNUC__)
# define _LIBCPP_COMPILER_GCC
#elif defined(_MSC_VER)
# define _LIBCPP_COMPILER_MSVC
#endif
#ifdef __cplusplus
-# define _LIBCPP_VERSION 15003
+# define _LIBCPP_VERSION 15006
# define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y
# define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y)
// Valid C++ identifier that revs with every libc++ version. This can be used to
// generate identifiers that must be unique for every released libc++ version.
# define _LIBCPP_VERSIONED_IDENTIFIER _LIBCPP_CONCAT(v, _LIBCPP_VERSION)
# if __STDC_HOSTED__ == 0
# define _LIBCPP_FREESTANDING
# endif
# ifndef _LIBCPP_STD_VER
# if __cplusplus <= 201103L
# define _LIBCPP_STD_VER 11
# elif __cplusplus <= 201402L
# define _LIBCPP_STD_VER 14
# elif __cplusplus <= 201703L
# define _LIBCPP_STD_VER 17
# elif __cplusplus <= 202002L
# define _LIBCPP_STD_VER 20
# else
# define _LIBCPP_STD_VER 22 // current year, or date of c++2b ratification
# endif
# endif // _LIBCPP_STD_VER
# if defined(__ELF__)
# define _LIBCPP_OBJECT_FORMAT_ELF 1
# elif defined(__MACH__)
# define _LIBCPP_OBJECT_FORMAT_MACHO 1
# elif defined(_WIN32)
# define _LIBCPP_OBJECT_FORMAT_COFF 1
# elif defined(__wasm__)
# define _LIBCPP_OBJECT_FORMAT_WASM 1
# elif defined(_AIX)
# define _LIBCPP_OBJECT_FORMAT_XCOFF 1
# else
// ... add new file formats here ...
# endif
# if _LIBCPP_ABI_VERSION >= 2
// Change short string representation so that string data starts at offset 0,
// improving its alignment in some cases.
# define _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
// Fix deque iterator type in order to support incomplete types.
# define _LIBCPP_ABI_INCOMPLETE_TYPES_IN_DEQUE
// Fix undefined behavior in how std::list stores its linked nodes.
# define _LIBCPP_ABI_LIST_REMOVE_NODE_POINTER_UB
// Fix undefined behavior in how __tree stores its end and parent nodes.
# define _LIBCPP_ABI_TREE_REMOVE_NODE_POINTER_UB
// Fix undefined behavior in how __hash_table stores its pointer types.
# define _LIBCPP_ABI_FIX_UNORDERED_NODE_POINTER_UB
# define _LIBCPP_ABI_FORWARD_LIST_REMOVE_NODE_POINTER_UB
# define _LIBCPP_ABI_FIX_UNORDERED_CONTAINER_SIZE_TYPE
// Define a key function for `bad_function_call` in the library, to centralize
// its vtable and typeinfo to libc++ rather than having all other libraries
// using that class define their own copies.
# define _LIBCPP_ABI_BAD_FUNCTION_CALL_KEY_FUNCTION
// Override the default return value of exception::what() for
// bad_function_call::what() with a string that is specific to
// bad_function_call (see http://wg21.link/LWG2233). This is an ABI break
// because it changes the vtable layout of bad_function_call.
# define _LIBCPP_ABI_BAD_FUNCTION_CALL_GOOD_WHAT_MESSAGE
// Enable optimized version of __do_get_(un)signed which avoids redundant copies.
# define _LIBCPP_ABI_OPTIMIZED_LOCALE_NUM_GET
// Give reverse_iterator<T> one data member of type T, not two.
// Also, in C++17 and later, don't derive iterator types from std::iterator.
# define _LIBCPP_ABI_NO_ITERATOR_BASES
// Use the smallest possible integer type to represent the index of the variant.
// Previously libc++ used "unsigned int" exclusively.
# define _LIBCPP_ABI_VARIANT_INDEX_TYPE_OPTIMIZATION
// Unstable attempt to provide a more optimized std::function
# define _LIBCPP_ABI_OPTIMIZED_FUNCTION
// All the regex constants must be distinct and nonzero.
# define _LIBCPP_ABI_REGEX_CONSTANTS_NONZERO
// Re-worked external template instantiations for std::string with a focus on
// performance and fast-path inlining.
# define _LIBCPP_ABI_STRING_OPTIMIZED_EXTERNAL_INSTANTIATION
// Enable clang::trivial_abi on std::unique_ptr.
# define _LIBCPP_ABI_ENABLE_UNIQUE_PTR_TRIVIAL_ABI
// Enable clang::trivial_abi on std::shared_ptr and std::weak_ptr
# define _LIBCPP_ABI_ENABLE_SHARED_PTR_TRIVIAL_ABI
// std::random_device holds some state when it uses an implementation that gets
// entropy from a file (see _LIBCPP_USING_DEV_RANDOM). When switching from this
// implementation to another one on a platform that has already shipped
// std::random_device, one needs to retain the same object layout to remain ABI
// compatible. This switch removes these workarounds for platforms that don't care
// about ABI compatibility.
# define _LIBCPP_ABI_NO_RANDOM_DEVICE_COMPATIBILITY_LAYOUT
// Don't export the legacy __basic_string_common class and its methods from the built library.
# define _LIBCPP_ABI_DO_NOT_EXPORT_BASIC_STRING_COMMON
// Don't export the legacy __vector_base_common class and its methods from the built library.
# define _LIBCPP_ABI_DO_NOT_EXPORT_VECTOR_BASE_COMMON
// According to the Standard, `bitset::operator[] const` returns bool
# define _LIBCPP_ABI_BITSET_VECTOR_BOOL_CONST_SUBSCRIPT_RETURN_BOOL
// Remove the base 10 implementation of std::to_chars from the dylib.
// The implementation moved to the header, but we still export the symbols from
// the dylib for backwards compatibility.
# define _LIBCPP_ABI_DO_NOT_EXPORT_TO_CHARS_BASE_10
# elif _LIBCPP_ABI_VERSION == 1
# if !(defined(_LIBCPP_OBJECT_FORMAT_COFF) || defined(_LIBCPP_OBJECT_FORMAT_XCOFF))
// Enable compiling copies of now inline methods into the dylib to support
// applications compiled against older libraries. This is unnecessary with
// COFF dllexport semantics, since dllexport forces a non-inline definition
// of inline functions to be emitted anyway. Our own non-inline copy would
// conflict with the dllexport-emitted copy, so we disable it. For XCOFF,
// the linker will take issue with the symbols in the shared object if the
// weak inline methods get visibility (such as from -fvisibility-inlines-hidden),
// so disable it.
# define _LIBCPP_DEPRECATED_ABI_LEGACY_LIBRARY_DEFINITIONS_FOR_INLINE_FUNCTIONS
# endif
// Feature macros for disabling pre ABI v1 features. All of these options
// are deprecated.
# if defined(__FreeBSD__)
# define _LIBCPP_DEPRECATED_ABI_DISABLE_PAIR_TRIVIAL_COPY_CTOR
# endif
# endif
# if defined(_LIBCPP_BUILDING_LIBRARY) || _LIBCPP_ABI_VERSION >= 2
// Enable additional explicit instantiations of iostreams components. This
// reduces the number of weak definitions generated in programs that use
// iostreams by providing a single strong definition in the shared library.
# define _LIBCPP_ABI_ENABLE_ADDITIONAL_IOSTREAM_EXPLICIT_INSTANTIATIONS_1
// Define a key function for `bad_function_call` in the library, to centralize
// its vtable and typeinfo to libc++ rather than having all other libraries
// using that class define their own copies.
# define _LIBCPP_ABI_BAD_FUNCTION_CALL_KEY_FUNCTION
# endif
# define _LIBCPP_TOSTRING2(x) # x
# define _LIBCPP_TOSTRING(x) _LIBCPP_TOSTRING2(x)
# if __cplusplus < 201103L
# define _LIBCPP_CXX03_LANG
# endif
# ifndef __has_attribute
# define __has_attribute(__x) 0
# endif
# ifndef __has_builtin
# define __has_builtin(__x) 0
# endif
# ifndef __has_extension
# define __has_extension(__x) 0
# endif
# ifndef __has_feature
# define __has_feature(__x) 0
# endif
# ifndef __has_cpp_attribute
# define __has_cpp_attribute(__x) 0
# endif
// '__is_identifier' returns '0' if '__x' is a reserved identifier provided by
// the compiler and '1' otherwise.
# ifndef __is_identifier
# define __is_identifier(__x) 1
# endif
# ifndef __has_declspec_attribute
# define __has_declspec_attribute(__x) 0
# endif
# define __has_keyword(__x) !(__is_identifier(__x))
# ifndef __has_include
# define __has_include(...) 0
# endif
# if !defined(_LIBCPP_COMPILER_CLANG_BASED) && __cplusplus < 201103L
# error "libc++ only supports C++03 with Clang-based compilers. Please enable C++11"
# endif
# ifdef _LIBCPP_COMPILER_MSVC
# error If you successfully use libc++ with MSVC please tell the libc++ developers and consider upstreaming your \
changes. We are not aware of anybody using this configuration and know that at least some code is currently broken. \
If there are users of this configuration we are happy to provide support.
# endif
// FIXME: ABI detection should be done via compiler builtin macros. This
// is just a placeholder until Clang implements such macros. For now assume
// that Windows compilers pretending to be MSVC++ target the Microsoft ABI,
// and allow the user to explicitly specify the ABI to handle cases where this
// heuristic falls short.
# if defined(_LIBCPP_ABI_FORCE_ITANIUM) && defined(_LIBCPP_ABI_FORCE_MICROSOFT)
# error "Only one of _LIBCPP_ABI_FORCE_ITANIUM and _LIBCPP_ABI_FORCE_MICROSOFT can be defined"
# elif defined(_LIBCPP_ABI_FORCE_ITANIUM)
# define _LIBCPP_ABI_ITANIUM
# elif defined(_LIBCPP_ABI_FORCE_MICROSOFT)
# define _LIBCPP_ABI_MICROSOFT
# else
# if defined(_WIN32) && defined(_MSC_VER)
# define _LIBCPP_ABI_MICROSOFT
# else
# define _LIBCPP_ABI_ITANIUM
# endif
# endif
# if defined(_LIBCPP_ABI_MICROSOFT) && !defined(_LIBCPP_NO_VCRUNTIME)
# define _LIBCPP_ABI_VCRUNTIME
# endif
# if __has_feature(experimental_library)
# ifndef _LIBCPP_ENABLE_EXPERIMENTAL
# define _LIBCPP_ENABLE_EXPERIMENTAL
# endif
# endif
// Incomplete features get their own specific disabling flags. This makes it
// easier to grep for target specific flags once the feature is complete.
# if !defined(_LIBCPP_ENABLE_EXPERIMENTAL) && !defined(_LIBCPP_BUILDING_LIBRARY)
# define _LIBCPP_HAS_NO_INCOMPLETE_FORMAT
# define _LIBCPP_HAS_NO_INCOMPLETE_RANGES
# endif
// Need to detect which libc we're using if we're on Linux.
# if defined(__linux__)
# include <features.h>
# if defined(__GLIBC_PREREQ)
# define _LIBCPP_GLIBC_PREREQ(a, b) __GLIBC_PREREQ(a, b)
# else
# define _LIBCPP_GLIBC_PREREQ(a, b) 0
# endif // defined(__GLIBC_PREREQ)
# endif // defined(__linux__)
# if defined(__MVS__)
# include <features.h> // for __NATIVE_ASCII_F
# endif
# ifdef __LITTLE_ENDIAN__
# if __LITTLE_ENDIAN__
# define _LIBCPP_LITTLE_ENDIAN
# endif // __LITTLE_ENDIAN__
# endif // __LITTLE_ENDIAN__
# ifdef __BIG_ENDIAN__
# if __BIG_ENDIAN__
# define _LIBCPP_BIG_ENDIAN
# endif // __BIG_ENDIAN__
# endif // __BIG_ENDIAN__
# ifdef __BYTE_ORDER__
# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
# define _LIBCPP_LITTLE_ENDIAN
# elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
# define _LIBCPP_BIG_ENDIAN
# endif // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
# endif // __BYTE_ORDER__
# ifdef __FreeBSD__
# include <sys/endian.h>
# include <osreldate.h>
# if _BYTE_ORDER == _LITTLE_ENDIAN
# define _LIBCPP_LITTLE_ENDIAN
# else // _BYTE_ORDER == _LITTLE_ENDIAN
# define _LIBCPP_BIG_ENDIAN
# endif // _BYTE_ORDER == _LITTLE_ENDIAN
# endif // __FreeBSD__
# if defined(__NetBSD__) || defined(__OpenBSD__)
# include <sys/endian.h>
# if _BYTE_ORDER == _LITTLE_ENDIAN
# define _LIBCPP_LITTLE_ENDIAN
# else // _BYTE_ORDER == _LITTLE_ENDIAN
# define _LIBCPP_BIG_ENDIAN
# endif // _BYTE_ORDER == _LITTLE_ENDIAN
# endif // defined(__NetBSD__) || defined(__OpenBSD__)
# if defined(_WIN32)
# define _LIBCPP_WIN32API
# define _LIBCPP_LITTLE_ENDIAN
# define _LIBCPP_SHORT_WCHAR 1
// Both MinGW and native MSVC provide a "MSVC"-like environment
# define _LIBCPP_MSVCRT_LIKE
// If mingw not explicitly detected, assume using MS C runtime only if
// a MS compatibility version is specified.
# if defined(_MSC_VER) && !defined(__MINGW32__)
# define _LIBCPP_MSVCRT // Using Microsoft's C Runtime library
# endif
# if (defined(_M_AMD64) || defined(__x86_64__)) || (defined(_M_ARM) || defined(__arm__))
# define _LIBCPP_HAS_BITSCAN64
# endif
# define _LIBCPP_HAS_OPEN_WITH_WCHAR
# endif // defined(_WIN32)
# ifdef __sun__
# include <sys/isa_defs.h>
# ifdef _LITTLE_ENDIAN
# define _LIBCPP_LITTLE_ENDIAN
# else
# define _LIBCPP_BIG_ENDIAN
# endif
# endif // __sun__
# if defined(_AIX) && !defined(__64BIT__)
// The size of wchar is 2 byte on 32-bit mode on AIX.
# define _LIBCPP_SHORT_WCHAR 1
# endif
// Libc++ supports various implementations of std::random_device.
//
// _LIBCPP_USING_DEV_RANDOM
// Read entropy from the given file, by default `/dev/urandom`.
// If a token is provided, it is assumed to be the path to a file
// to read entropy from. This is the default behavior if nothing
// else is specified. This implementation requires storing state
// inside `std::random_device`.
//
// _LIBCPP_USING_ARC4_RANDOM
// Use arc4random(). This allows obtaining random data even when
// using sandboxing mechanisms. On some platforms like Apple, this
// is the recommended source of entropy for user-space programs.
// When this option is used, the token passed to `std::random_device`'s
// constructor *must* be "/dev/urandom" -- anything else is an error.
//
// _LIBCPP_USING_GETENTROPY
// Use getentropy().
// When this option is used, the token passed to `std::random_device`'s
// constructor *must* be "/dev/urandom" -- anything else is an error.
//
// _LIBCPP_USING_FUCHSIA_CPRNG
// Use Fuchsia's zx_cprng_draw() system call, which is specified to
// deliver high-quality entropy and cannot fail.
// When this option is used, the token passed to `std::random_device`'s
// constructor *must* be "/dev/urandom" -- anything else is an error.
//
// _LIBCPP_USING_NACL_RANDOM
// NaCl's sandbox (which PNaCl also runs in) doesn't allow filesystem access,
// including accesses to the special files under `/dev`. This implementation
// uses the NaCL syscall `nacl_secure_random_init()` to get entropy.
// When this option is used, the token passed to `std::random_device`'s
// constructor *must* be "/dev/urandom" -- anything else is an error.
//
// _LIBCPP_USING_WIN32_RANDOM
// Use rand_s(), for use on Windows.
// When this option is used, the token passed to `std::random_device`'s
// constructor *must* be "/dev/urandom" -- anything else is an error.
# if defined(__APPLE__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
defined(__DragonFly__) || defined(__sun__)
# define _LIBCPP_USING_ARC4_RANDOM
# elif defined(__wasi__) || defined(__EMSCRIPTEN__)
# define _LIBCPP_USING_GETENTROPY
# elif defined(__Fuchsia__)
# define _LIBCPP_USING_FUCHSIA_CPRNG
# elif defined(__native_client__)
# define _LIBCPP_USING_NACL_RANDOM
# elif defined(_LIBCPP_WIN32API)
# define _LIBCPP_USING_WIN32_RANDOM
# else
# define _LIBCPP_USING_DEV_RANDOM
# endif
# if !defined(_LIBCPP_LITTLE_ENDIAN) && !defined(_LIBCPP_BIG_ENDIAN)
# include <endian.h>
# if __BYTE_ORDER == __LITTLE_ENDIAN
# define _LIBCPP_LITTLE_ENDIAN
# elif __BYTE_ORDER == __BIG_ENDIAN
# define _LIBCPP_BIG_ENDIAN
# else // __BYTE_ORDER == __BIG_ENDIAN
# error unable to determine endian
# endif
# endif // !defined(_LIBCPP_LITTLE_ENDIAN) && !defined(_LIBCPP_BIG_ENDIAN)
# if __has_attribute(__no_sanitize__) && !defined(_LIBCPP_COMPILER_GCC)
# define _LIBCPP_NO_CFI __attribute__((__no_sanitize__("cfi")))
# else
# define _LIBCPP_NO_CFI
# endif
# ifndef _LIBCPP_CXX03_LANG
# define _LIBCPP_ALIGNOF(_Tp) alignof(_Tp)
# define _ALIGNAS_TYPE(x) alignas(x)
# define _ALIGNAS(x) alignas(x)
# define _LIBCPP_NORETURN [[noreturn]]
# define _NOEXCEPT noexcept
# define _NOEXCEPT_(x) noexcept(x)
# else
# define _LIBCPP_ALIGNOF(_Tp) _Alignof(_Tp)
# define _ALIGNAS_TYPE(x) __attribute__((__aligned__(_LIBCPP_ALIGNOF(x))))
# define _ALIGNAS(x) __attribute__((__aligned__(x)))
# define _LIBCPP_NORETURN __attribute__((noreturn))
# define _LIBCPP_HAS_NO_NOEXCEPT
# define nullptr __nullptr
# define _NOEXCEPT throw()
# define _NOEXCEPT_(x)
typedef __char16_t char16_t;
typedef __char32_t char32_t;
# endif
# if !defined(__cpp_exceptions) || __cpp_exceptions < 199711L
# define _LIBCPP_NO_EXCEPTIONS
# endif
# define _LIBCPP_PREFERRED_ALIGNOF(_Tp) __alignof(_Tp)
# if defined(_LIBCPP_COMPILER_CLANG_BASED)
# if defined(__APPLE__) && !defined(__i386__) && !defined(__x86_64__) && (!defined(__arm__) || __ARM_ARCH_7K__ >= 2)
# define _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
# endif
// Objective-C++ features (opt-in)
# if __has_feature(objc_arc)
# define _LIBCPP_HAS_OBJC_ARC
# endif
# if __has_feature(objc_arc_weak)
# define _LIBCPP_HAS_OBJC_ARC_WEAK
# endif
# if __has_extension(blocks)
# define _LIBCPP_HAS_EXTENSION_BLOCKS
# endif
# if defined(_LIBCPP_HAS_EXTENSION_BLOCKS) && defined(__APPLE__)
# define _LIBCPP_HAS_BLOCKS_RUNTIME
# endif
# if !__has_feature(address_sanitizer)
# define _LIBCPP_HAS_NO_ASAN
# endif
// Allow for build-time disabling of unsigned integer sanitization
# if __has_attribute(no_sanitize)
# define _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK __attribute__((__no_sanitize__("unsigned-integer-overflow")))
# endif
# define _LIBCPP_ALWAYS_INLINE __attribute__((__always_inline__))
# define _LIBCPP_DISABLE_EXTENSION_WARNING __extension__
# elif defined(_LIBCPP_COMPILER_GCC)
# if !defined(__SANITIZE_ADDRESS__)
# define _LIBCPP_HAS_NO_ASAN
# endif
# define _LIBCPP_ALWAYS_INLINE __attribute__((__always_inline__))
# define _LIBCPP_DISABLE_EXTENSION_WARNING __extension__
# elif defined(_LIBCPP_COMPILER_MSVC)
# define _LIBCPP_WARNING(x) __pragma(message(__FILE__ "(" _LIBCPP_TOSTRING(__LINE__) ") : warning note: " x))
# if _MSC_VER < 1900
# error "MSVC versions prior to Visual Studio 2015 are not supported"
# endif
# define _LIBCPP_NORETURN __declspec(noreturn)
# define _LIBCPP_WEAK
# define _LIBCPP_HAS_NO_ASAN
# define _LIBCPP_ALWAYS_INLINE __forceinline
# define _LIBCPP_HAS_NO_VECTOR_EXTENSION
# define _LIBCPP_DISABLE_EXTENSION_WARNING
# endif // _LIBCPP_COMPILER_[CLANG|GCC|MSVC]
# if defined(_LIBCPP_OBJECT_FORMAT_COFF)
# ifdef _DLL
# define _LIBCPP_CRT_FUNC __declspec(dllimport)
# else
# define _LIBCPP_CRT_FUNC
# endif
# if defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS) || (defined(__MINGW32__) && !defined(_LIBCPP_BUILDING_LIBRARY))
# define _LIBCPP_DLL_VIS
# define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS
# define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS
# define _LIBCPP_OVERRIDABLE_FUNC_VIS
# define _LIBCPP_EXPORTED_FROM_ABI
# elif defined(_LIBCPP_BUILDING_LIBRARY)
# define _LIBCPP_DLL_VIS __declspec(dllexport)
# if defined(__MINGW32__)
# define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS _LIBCPP_DLL_VIS
# define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS
# else
# define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS
# define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS _LIBCPP_DLL_VIS
# endif
# define _LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_DLL_VIS
# define _LIBCPP_EXPORTED_FROM_ABI __declspec(dllexport)
# else
# define _LIBCPP_DLL_VIS __declspec(dllimport)
# define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS _LIBCPP_DLL_VIS
# define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS
# define _LIBCPP_OVERRIDABLE_FUNC_VIS
# define _LIBCPP_EXPORTED_FROM_ABI __declspec(dllimport)
# endif
# define _LIBCPP_TYPE_VIS _LIBCPP_DLL_VIS
# define _LIBCPP_FUNC_VIS _LIBCPP_DLL_VIS
# define _LIBCPP_EXCEPTION_ABI _LIBCPP_DLL_VIS
# define _LIBCPP_HIDDEN
# define _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
# define _LIBCPP_TEMPLATE_VIS
# define _LIBCPP_TEMPLATE_DATA_VIS
# define _LIBCPP_ENUM_VIS
# else
# if !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS)
# define _LIBCPP_VISIBILITY(vis) __attribute__((__visibility__(vis)))
# else
# define _LIBCPP_VISIBILITY(vis)
# endif
# define _LIBCPP_HIDDEN _LIBCPP_VISIBILITY("hidden")
# define _LIBCPP_FUNC_VIS _LIBCPP_VISIBILITY("default")
# define _LIBCPP_TYPE_VIS _LIBCPP_VISIBILITY("default")
# define _LIBCPP_TEMPLATE_DATA_VIS _LIBCPP_VISIBILITY("default")
# define _LIBCPP_EXPORTED_FROM_ABI _LIBCPP_VISIBILITY("default")
# define _LIBCPP_EXCEPTION_ABI _LIBCPP_VISIBILITY("default")
# define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS _LIBCPP_VISIBILITY("default")
# define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS
// TODO: Make this a proper customization point or remove the option to override it.
# ifndef _LIBCPP_OVERRIDABLE_FUNC_VIS
# define _LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_VISIBILITY("default")
# endif
# if !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS)
// The inline should be removed once PR32114 is resolved
# define _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS inline _LIBCPP_HIDDEN
# else
# define _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
# endif
# if !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS)
# if __has_attribute(__type_visibility__)
# define _LIBCPP_TEMPLATE_VIS __attribute__((__type_visibility__("default")))
# else
# define _LIBCPP_TEMPLATE_VIS __attribute__((__visibility__("default")))
# endif
# else
# define _LIBCPP_TEMPLATE_VIS
# endif
# if !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS) && __has_attribute(__type_visibility__)
# define _LIBCPP_ENUM_VIS __attribute__((__type_visibility__("default")))
# else
# define _LIBCPP_ENUM_VIS
# endif
# endif // defined(_LIBCPP_OBJECT_FORMAT_COFF)
# if __has_attribute(exclude_from_explicit_instantiation)
# define _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION __attribute__((__exclude_from_explicit_instantiation__))
# else
// Try to approximate the effect of exclude_from_explicit_instantiation
// (which is that entities are not assumed to be provided by explicit
// template instantiations in the dylib) by always inlining those entities.
# define _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION _LIBCPP_ALWAYS_INLINE
# endif
// This macro marks a symbol as being hidden from libc++'s ABI. This is achieved
// on two levels:
// 1. The symbol is given hidden visibility, which ensures that users won't start exporting
// symbols from their dynamic library by means of using the libc++ headers. This ensures
// that those symbols stay private to the dynamic library in which it is defined.
//
// 2. The symbol is given an ABI tag that changes with each version of libc++. This ensures
// that no ODR violation can arise from mixing two TUs compiled with different versions
// of libc++ where we would have changed the definition of a symbol. If the symbols shared
// the same name, the ODR would require that their definitions be token-by-token equivalent,
// which basically prevents us from being able to make any change to any function in our
// headers. Using this ABI tag ensures that the symbol name is "bumped" artificially at
// each release, which lets us change the definition of these symbols at our leisure.
// Note that historically, this has been achieved in various ways, including force-inlining
// all functions or giving internal linkage to all functions. Both these (previous) solutions
// suffer from drawbacks that lead notably to code bloat.
//
// Note that we use _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION to ensure that we don't depend
// on _LIBCPP_HIDE_FROM_ABI methods of classes explicitly instantiated in the dynamic library.
//
// TODO: We provide a escape hatch with _LIBCPP_NO_ABI_TAG for folks who want to avoid increasing
// the length of symbols with an ABI tag. In practice, we should remove the escape hatch and
// use compression mangling instead, see https://github.com/itanium-cxx-abi/cxx-abi/issues/70.
# ifndef _LIBCPP_NO_ABI_TAG
# define _LIBCPP_HIDE_FROM_ABI \
_LIBCPP_HIDDEN _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION \
__attribute__((__abi_tag__(_LIBCPP_TOSTRING(_LIBCPP_VERSIONED_IDENTIFIER))))
# else
# define _LIBCPP_HIDE_FROM_ABI _LIBCPP_HIDDEN _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION
# endif
# ifdef _LIBCPP_BUILDING_LIBRARY
# if _LIBCPP_ABI_VERSION > 1
# define _LIBCPP_HIDE_FROM_ABI_AFTER_V1 _LIBCPP_HIDE_FROM_ABI
# else
# define _LIBCPP_HIDE_FROM_ABI_AFTER_V1
# endif
# else
# define _LIBCPP_HIDE_FROM_ABI_AFTER_V1 _LIBCPP_HIDE_FROM_ABI
# endif
// Just so we can migrate to the new macros gradually.
# define _LIBCPP_INLINE_VISIBILITY _LIBCPP_HIDE_FROM_ABI
// Inline namespaces are available in Clang/GCC/MSVC regardless of C++ dialect.
// clang-format off
# define _LIBCPP_BEGIN_NAMESPACE_STD namespace std { inline namespace _LIBCPP_ABI_NAMESPACE {
# define _LIBCPP_END_NAMESPACE_STD }}
# define _VSTD std
_LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_END_NAMESPACE_STD
# if _LIBCPP_STD_VER > 14
# define _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM \
_LIBCPP_BEGIN_NAMESPACE_STD inline namespace __fs { namespace filesystem {
# else
# define _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM \
_LIBCPP_BEGIN_NAMESPACE_STD namespace __fs { namespace filesystem {
# endif
# define _LIBCPP_END_NAMESPACE_FILESYSTEM _LIBCPP_END_NAMESPACE_STD }}
// clang-format on
# define _VSTD_FS std::__fs::filesystem
# if __has_attribute(__enable_if__)
# define _LIBCPP_PREFERRED_OVERLOAD __attribute__((__enable_if__(true, "")))
# endif
# ifndef __SIZEOF_INT128__
# define _LIBCPP_HAS_NO_INT128
# endif
# ifdef _LIBCPP_CXX03_LANG
# define static_assert(...) _Static_assert(__VA_ARGS__)
# define decltype(...) __decltype(__VA_ARGS__)
# endif // _LIBCPP_CXX03_LANG
# ifdef _LIBCPP_CXX03_LANG
# define _LIBCPP_CONSTEXPR
# else
# define _LIBCPP_CONSTEXPR constexpr
# endif
# ifndef __cpp_consteval
# define _LIBCPP_CONSTEVAL _LIBCPP_CONSTEXPR
# else
# define _LIBCPP_CONSTEVAL consteval
# endif
# ifdef __GNUC__
# define _LIBCPP_NOALIAS __attribute__((__malloc__))
# else
# define _LIBCPP_NOALIAS
# endif
# if __has_attribute(using_if_exists)
# define _LIBCPP_USING_IF_EXISTS __attribute__((using_if_exists))
# else
# define _LIBCPP_USING_IF_EXISTS
# endif
# ifdef _LIBCPP_CXX03_LANG
# define _LIBCPP_DECLARE_STRONG_ENUM(x) \
struct _LIBCPP_TYPE_VIS x { \
enum __lx
// clang-format off
# define _LIBCPP_DECLARE_STRONG_ENUM_EPILOG(x) \
__lx __v_; \
_LIBCPP_INLINE_VISIBILITY x(__lx __v) : __v_(__v) {} \
_LIBCPP_INLINE_VISIBILITY explicit x(int __v) : __v_(static_cast<__lx>(__v)) {} \
_LIBCPP_INLINE_VISIBILITY operator int() const { return __v_; } \
};
// clang-format on
# else // _LIBCPP_CXX03_LANG
# define _LIBCPP_DECLARE_STRONG_ENUM(x) enum class _LIBCPP_ENUM_VIS x
# define _LIBCPP_DECLARE_STRONG_ENUM_EPILOG(x)
# endif // _LIBCPP_CXX03_LANG
# if defined(__APPLE__) || defined(__FreeBSD__) || defined(_LIBCPP_MSVCRT_LIKE) || defined(__sun__) || \
defined(__NetBSD__)
# define _LIBCPP_LOCALE__L_EXTENSIONS 1
# endif
# ifdef __FreeBSD__
# define _DECLARE_C99_LDBL_MATH 1
# endif
// If we are getting operator new from the MSVC CRT, then allocation overloads
// for align_val_t were added in 19.12, aka VS 2017 version 15.3.
# if defined(_LIBCPP_MSVCRT) && defined(_MSC_VER) && _MSC_VER < 1912
# define _LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION
# elif defined(_LIBCPP_ABI_VCRUNTIME) && !defined(__cpp_aligned_new)
// We're deferring to Microsoft's STL to provide aligned new et al. We don't
// have it unless the language feature test macro is defined.
# define _LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION
# elif defined(__MVS__)
# define _LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION
# endif
# if defined(_LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION) || (!defined(__cpp_aligned_new) || __cpp_aligned_new < 201606)
# define _LIBCPP_HAS_NO_ALIGNED_ALLOCATION
# endif
# if defined(__APPLE__) || defined(__FreeBSD__)
# define _LIBCPP_HAS_DEFAULTRUNELOCALE
# endif
# if defined(__APPLE__) || defined(__FreeBSD__) || defined(__sun__)
# define _LIBCPP_WCTYPE_IS_MASK
# endif
# if _LIBCPP_STD_VER <= 17 || !defined(__cpp_char8_t)
# define _LIBCPP_HAS_NO_CHAR8_T
# endif
// Deprecation macros.
//
// Deprecations warnings are always enabled, except when users explicitly opt-out
// by defining _LIBCPP_DISABLE_DEPRECATION_WARNINGS.
# if !defined(_LIBCPP_DISABLE_DEPRECATION_WARNINGS)
# if __has_attribute(deprecated)
# define _LIBCPP_DEPRECATED __attribute__((deprecated))
# define _LIBCPP_DEPRECATED_(m) __attribute__((deprected(m)))
# elif _LIBCPP_STD_VER > 11
# define _LIBCPP_DEPRECATED [[deprecated]]
# define _LIBCPP_DEPRECATED_(m) [[deprecated(m)]]
# else
# define _LIBCPP_DEPRECATED
# define _LIBCPP_DEPRECATED_(m)
# endif
# else
# define _LIBCPP_DEPRECATED
# define _LIBCPP_DEPRECATED_(m)
# endif
# if !defined(_LIBCPP_CXX03_LANG)
# define _LIBCPP_DEPRECATED_IN_CXX11 _LIBCPP_DEPRECATED
# else
# define _LIBCPP_DEPRECATED_IN_CXX11
# endif
# if _LIBCPP_STD_VER > 11
# define _LIBCPP_DEPRECATED_IN_CXX14 _LIBCPP_DEPRECATED
# else
# define _LIBCPP_DEPRECATED_IN_CXX14
# endif
# if _LIBCPP_STD_VER > 14
# define _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_DEPRECATED
# else
# define _LIBCPP_DEPRECATED_IN_CXX17
# endif
# if _LIBCPP_STD_VER > 17
# define _LIBCPP_DEPRECATED_IN_CXX20 _LIBCPP_DEPRECATED
# else
# define _LIBCPP_DEPRECATED_IN_CXX20
# endif
# if !defined(_LIBCPP_HAS_NO_CHAR8_T)
# define _LIBCPP_DEPRECATED_WITH_CHAR8_T _LIBCPP_DEPRECATED
# else
# define _LIBCPP_DEPRECATED_WITH_CHAR8_T
# endif
// Macros to enter and leave a state where deprecation warnings are suppressed.
# if defined(_LIBCPP_COMPILER_CLANG_BASED) || defined(_LIBCPP_COMPILER_GCC)
# define _LIBCPP_SUPPRESS_DEPRECATED_PUSH \
_Pragma("GCC diagnostic push") _Pragma("GCC diagnostic ignored \"-Wdeprecated\"") \
_Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
# define _LIBCPP_SUPPRESS_DEPRECATED_POP _Pragma("GCC diagnostic pop")
# else
# define _LIBCPP_SUPPRESS_DEPRECATED_PUSH
# define _LIBCPP_SUPPRESS_DEPRECATED_POP
# endif
# if _LIBCPP_STD_VER <= 11
# define _LIBCPP_EXPLICIT_AFTER_CXX11
# else
# define _LIBCPP_EXPLICIT_AFTER_CXX11 explicit
# endif
# if _LIBCPP_STD_VER > 11
# define _LIBCPP_CONSTEXPR_AFTER_CXX11 constexpr
# else
# define _LIBCPP_CONSTEXPR_AFTER_CXX11
# endif
# if _LIBCPP_STD_VER > 14
# define _LIBCPP_CONSTEXPR_AFTER_CXX14 constexpr
# else
# define _LIBCPP_CONSTEXPR_AFTER_CXX14
# endif
# if _LIBCPP_STD_VER > 17
# define _LIBCPP_CONSTEXPR_AFTER_CXX17 constexpr
# else
# define _LIBCPP_CONSTEXPR_AFTER_CXX17
# endif
# if __has_cpp_attribute(nodiscard) || defined(_LIBCPP_COMPILER_MSVC)
# define _LIBCPP_NODISCARD [[nodiscard]]
# elif defined(_LIBCPP_COMPILER_CLANG_BASED) && !defined(_LIBCPP_CXX03_LANG)
# define _LIBCPP_NODISCARD [[clang::warn_unused_result]]
# else
// We can't use GCC's [[gnu::warn_unused_result]] and
// __attribute__((warn_unused_result)), because GCC does not silence them via
// (void) cast.
# define _LIBCPP_NODISCARD
# endif
// _LIBCPP_NODISCARD_EXT may be used to apply [[nodiscard]] to entities not
// specified as such as an extension.
# if defined(_LIBCPP_ENABLE_NODISCARD) && !defined(_LIBCPP_DISABLE_NODISCARD_EXT)
# define _LIBCPP_NODISCARD_EXT _LIBCPP_NODISCARD
# else
# define _LIBCPP_NODISCARD_EXT
# endif
# if !defined(_LIBCPP_DISABLE_NODISCARD_AFTER_CXX17) && (_LIBCPP_STD_VER > 17 || defined(_LIBCPP_ENABLE_NODISCARD))
# define _LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_NODISCARD
# else
# define _LIBCPP_NODISCARD_AFTER_CXX17
# endif
# if __has_attribute(no_destroy)
# define _LIBCPP_NO_DESTROY __attribute__((__no_destroy__))
# else
# define _LIBCPP_NO_DESTROY
# endif
# ifndef _LIBCPP_HAS_NO_ASAN
extern "C" _LIBCPP_FUNC_VIS void
__sanitizer_annotate_contiguous_container(const void*, const void*, const void*, const void*);
# endif
// Try to find out if RTTI is disabled.
# if !defined(__cpp_rtti) || __cpp_rtti < 199711L
# define _LIBCPP_NO_RTTI
# endif
# ifndef _LIBCPP_WEAK
# define _LIBCPP_WEAK __attribute__((__weak__))
# endif
// Thread API
// clang-format off
# if !defined(_LIBCPP_HAS_NO_THREADS) && \
!defined(_LIBCPP_HAS_THREAD_API_PTHREAD) && \
!defined(_LIBCPP_HAS_THREAD_API_WIN32) && \
!defined(_LIBCPP_HAS_THREAD_API_EXTERNAL)
# if defined(__FreeBSD__) || \
defined(__wasi__) || \
defined(__NetBSD__) || \
defined(__OpenBSD__) || \
defined(__NuttX__) || \
defined(__linux__) || \
defined(__GNU__) || \
defined(__APPLE__) || \
defined(__sun__) || \
defined(__MVS__) || \
defined(_AIX) || \
defined(__EMSCRIPTEN__)
// clang-format on
# define _LIBCPP_HAS_THREAD_API_PTHREAD
# elif defined(__Fuchsia__)
// TODO(44575): Switch to C11 thread API when possible.
# define _LIBCPP_HAS_THREAD_API_PTHREAD
# elif defined(_LIBCPP_WIN32API)
# define _LIBCPP_HAS_THREAD_API_WIN32
# else
# error "No thread API"
# endif // _LIBCPP_HAS_THREAD_API
# endif // _LIBCPP_HAS_NO_THREADS
# if defined(_LIBCPP_HAS_THREAD_API_PTHREAD)
# if defined(__ANDROID__) && __ANDROID_API__ >= 30
# define _LIBCPP_HAS_COND_CLOCKWAIT
# elif defined(_LIBCPP_GLIBC_PREREQ)
# if _LIBCPP_GLIBC_PREREQ(2, 30)
# define _LIBCPP_HAS_COND_CLOCKWAIT
# endif
# endif
# endif
# if defined(_LIBCPP_HAS_NO_THREADS) && defined(_LIBCPP_HAS_THREAD_API_PTHREAD)
# error _LIBCPP_HAS_THREAD_API_PTHREAD may only be defined when \
_LIBCPP_HAS_NO_THREADS is not defined.
# endif
# if defined(_LIBCPP_HAS_NO_THREADS) && defined(_LIBCPP_HAS_THREAD_API_EXTERNAL)
# error _LIBCPP_HAS_THREAD_API_EXTERNAL may not be defined when \
_LIBCPP_HAS_NO_THREADS is defined.
# endif
# if defined(_LIBCPP_HAS_NO_MONOTONIC_CLOCK) && !defined(_LIBCPP_HAS_NO_THREADS)
# error _LIBCPP_HAS_NO_MONOTONIC_CLOCK may only be defined when \
_LIBCPP_HAS_NO_THREADS is defined.
# endif
# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(__STDCPP_THREADS__)
# define __STDCPP_THREADS__ 1
# endif
// The glibc and Bionic implementation of pthreads implements
// pthread_mutex_destroy as nop for regular mutexes. Additionally, Win32
// mutexes have no destroy mechanism.
//
// This optimization can't be performed on Apple platforms, where
// pthread_mutex_destroy can allow the kernel to release resources.
// See https://llvm.org/D64298 for details.
//
// TODO(EricWF): Enable this optimization on Bionic after speaking to their
// respective stakeholders.
// clang-format off
# if (defined(_LIBCPP_HAS_THREAD_API_PTHREAD) && defined(__GLIBC__)) || \
(defined(_LIBCPP_HAS_THREAD_API_C11) && defined(__Fuchsia__)) || \
defined(_LIBCPP_HAS_THREAD_API_WIN32)
// clang-format on
# define _LIBCPP_HAS_TRIVIAL_MUTEX_DESTRUCTION
# endif
// Destroying a condvar is a nop on Windows.
//
// This optimization can't be performed on Apple platforms, where
// pthread_cond_destroy can allow the kernel to release resources.
// See https://llvm.org/D64298 for details.
//
// TODO(EricWF): This is potentially true for some pthread implementations
// as well.
# if (defined(_LIBCPP_HAS_THREAD_API_C11) && defined(__Fuchsia__)) || defined(_LIBCPP_HAS_THREAD_API_WIN32)
# define _LIBCPP_HAS_TRIVIAL_CONDVAR_DESTRUCTION
# endif
// Some systems do not provide gets() in their C library, for security reasons.
# if defined(_LIBCPP_MSVCRT) || (defined(__FreeBSD_version) && __FreeBSD_version >= 1300043) || defined(__OpenBSD__)
# define _LIBCPP_C_HAS_NO_GETS
# endif
# if defined(__BIONIC__) || defined(__NuttX__) || defined(__Fuchsia__) || defined(__wasi__) || \
defined(_LIBCPP_HAS_MUSL_LIBC) || defined(__OpenBSD__)
# define _LIBCPP_PROVIDES_DEFAULT_RUNE_TABLE
# endif
# if __has_feature(cxx_atomic) || __has_extension(c_atomic) || __has_keyword(_Atomic)
# define _LIBCPP_HAS_C_ATOMIC_IMP
# elif defined(_LIBCPP_COMPILER_GCC)
# define _LIBCPP_HAS_GCC_ATOMIC_IMP
# endif
# if !defined(_LIBCPP_HAS_C_ATOMIC_IMP) && !defined(_LIBCPP_HAS_GCC_ATOMIC_IMP) && \
!defined(_LIBCPP_HAS_EXTERNAL_ATOMIC_IMP)
# define _LIBCPP_HAS_NO_ATOMIC_HEADER
# else
# ifndef _LIBCPP_ATOMIC_FLAG_TYPE
# define _LIBCPP_ATOMIC_FLAG_TYPE bool
# endif
# ifdef _LIBCPP_FREESTANDING
# define _LIBCPP_ATOMIC_ONLY_USE_BUILTINS
# endif
# endif
# ifndef _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK
# define _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK
# endif
# if defined(_LIBCPP_ENABLE_THREAD_SAFETY_ANNOTATIONS)
# if defined(__clang__) && __has_attribute(acquire_capability)
// Work around the attribute handling in clang. When both __declspec and
// __attribute__ are present, the processing goes awry preventing the definition
// of the types. In MinGW mode, __declspec evaluates to __attribute__, and thus
// combining the two does work.
# if !defined(_MSC_VER)
# define _LIBCPP_HAS_THREAD_SAFETY_ANNOTATIONS
# endif
# endif
# endif
# ifdef _LIBCPP_HAS_THREAD_SAFETY_ANNOTATIONS
# define _LIBCPP_THREAD_SAFETY_ANNOTATION(x) __attribute__((x))
# else
# define _LIBCPP_THREAD_SAFETY_ANNOTATION(x)
# endif
# if _LIBCPP_STD_VER > 17
# define _LIBCPP_CONSTINIT constinit
# elif __has_attribute(require_constant_initialization)
# define _LIBCPP_CONSTINIT __attribute__((__require_constant_initialization__))
# else
# define _LIBCPP_CONSTINIT
# endif
# if __has_attribute(diagnose_if) && !defined(_LIBCPP_DISABLE_ADDITIONAL_DIAGNOSTICS)
# define _LIBCPP_DIAGNOSE_WARNING(...) __attribute__((diagnose_if(__VA_ARGS__, "warning")))
# define _LIBCPP_DIAGNOSE_ERROR(...) __attribute__((diagnose_if(__VA_ARGS__, "error")))
# else
# define _LIBCPP_DIAGNOSE_WARNING(...)
# define _LIBCPP_DIAGNOSE_ERROR(...)
# endif
// Use a function like macro to imply that it must be followed by a semicolon
# if __has_cpp_attribute(fallthrough)
# define _LIBCPP_FALLTHROUGH() [[fallthrough]]
# elif __has_attribute(__fallthrough__)
# define _LIBCPP_FALLTHROUGH() __attribute__((__fallthrough__))
# else
# define _LIBCPP_FALLTHROUGH() ((void)0)
# endif
# if __has_attribute(__nodebug__)
# define _LIBCPP_NODEBUG __attribute__((__nodebug__))
# else
# define _LIBCPP_NODEBUG
# endif
# if __has_attribute(__standalone_debug__)
# define _LIBCPP_STANDALONE_DEBUG __attribute__((__standalone_debug__))
# else
# define _LIBCPP_STANDALONE_DEBUG
# endif
# if __has_attribute(__preferred_name__)
# define _LIBCPP_PREFERRED_NAME(x) __attribute__((__preferred_name__(x)))
# else
# define _LIBCPP_PREFERRED_NAME(x)
# endif
// We often repeat things just for handling wide characters in the library.
// When wide characters are disabled, it can be useful to have a quick way of
// disabling it without having to resort to #if-#endif, which has a larger
// impact on readability.
# if defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS)
# define _LIBCPP_IF_WIDE_CHARACTERS(...)
# else
# define _LIBCPP_IF_WIDE_CHARACTERS(...) __VA_ARGS__
# endif
# if defined(_LIBCPP_ABI_MICROSOFT) && (defined(_LIBCPP_COMPILER_MSVC) || __has_declspec_attribute(empty_bases))
# define _LIBCPP_DECLSPEC_EMPTY_BASES __declspec(empty_bases)
# else
# define _LIBCPP_DECLSPEC_EMPTY_BASES
# endif
# if defined(_LIBCPP_ENABLE_CXX17_REMOVED_FEATURES)
# define _LIBCPP_ENABLE_CXX17_REMOVED_AUTO_PTR
# define _LIBCPP_ENABLE_CXX17_REMOVED_BINDERS
# define _LIBCPP_ENABLE_CXX17_REMOVED_RANDOM_SHUFFLE
# define _LIBCPP_ENABLE_CXX17_REMOVED_UNEXPECTED_FUNCTIONS
# define _LIBCPP_ENABLE_CXX17_REMOVED_UNARY_BINARY_FUNCTION
# endif // _LIBCPP_ENABLE_CXX17_REMOVED_FEATURES
// Leave the deprecation notices in by default, but don't remove unary_function and
// binary_function entirely just yet. That way, folks will have one release to act
// on the deprecation warnings.
# ifndef _LIBCPP_ENABLE_CXX17_REMOVED_UNARY_BINARY_FUNCTION
# define _LIBCPP_ENABLE_CXX17_REMOVED_UNARY_BINARY_FUNCTION
# endif
# if defined(_LIBCPP_ENABLE_CXX20_REMOVED_FEATURES)
# define _LIBCPP_ENABLE_CXX20_REMOVED_ALLOCATOR_MEMBERS
# define _LIBCPP_ENABLE_CXX20_REMOVED_ALLOCATOR_VOID_SPECIALIZATION
# define _LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS
# define _LIBCPP_ENABLE_CXX20_REMOVED_NEGATORS
# define _LIBCPP_ENABLE_CXX20_REMOVED_RAW_STORAGE_ITERATOR
# define _LIBCPP_ENABLE_CXX20_REMOVED_TYPE_TRAITS
# endif // _LIBCPP_ENABLE_CXX20_REMOVED_FEATURES
# if !defined(__cpp_impl_coroutine) || __cpp_impl_coroutine < 201902L
# define _LIBCPP_HAS_NO_CXX20_COROUTINES
# endif
# define _LIBCPP_PUSH_MACROS _Pragma("push_macro(\"min\")") _Pragma("push_macro(\"max\")")
# define _LIBCPP_POP_MACROS _Pragma("pop_macro(\"min\")") _Pragma("pop_macro(\"max\")")
# ifndef _LIBCPP_NO_AUTO_LINK
# if defined(_LIBCPP_ABI_MICROSOFT) && !defined(_LIBCPP_BUILDING_LIBRARY)
# if !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS)
# pragma comment(lib, "c++.lib")
# else
# pragma comment(lib, "libc++.lib")
# endif
# endif // defined(_LIBCPP_ABI_MICROSOFT) && !defined(_LIBCPP_BUILDING_LIBRARY)
# endif // _LIBCPP_NO_AUTO_LINK
// Configures the fopen close-on-exec mode character, if any. This string will
// be appended to any mode string used by fstream for fopen/fdopen.
//
// Not all platforms support this, but it helps avoid fd-leaks on platforms that
// do.
# if defined(__BIONIC__)
# define _LIBCPP_FOPEN_CLOEXEC_MODE "e"
# else
# define _LIBCPP_FOPEN_CLOEXEC_MODE
# endif
// Support for _FILE_OFFSET_BITS=64 landed gradually in Android, so the full set
// of functions used in cstdio may not be available for low API levels when
// using 64-bit file offsets on LP32.
# if defined(__BIONIC__) && defined(__USE_FILE_OFFSET64) && __ANDROID_API__ < 24
# define _LIBCPP_HAS_NO_FGETPOS_FSETPOS
# endif
# if __has_attribute(init_priority)
// TODO: Remove this once we drop support for building libc++ with old Clangs
# if (defined(_LIBCPP_CLANG_VER) && _LIBCPP_CLANG_VER < 1200) || \
(defined(__apple_build_version__) && __apple_build_version__ < 13000000)
# define _LIBCPP_INIT_PRIORITY_MAX __attribute__((init_priority(101)))
# else
# define _LIBCPP_INIT_PRIORITY_MAX __attribute__((init_priority(100)))
# endif
# else
# define _LIBCPP_INIT_PRIORITY_MAX
# endif
# if defined(__GNUC__) || defined(__clang__)
// The attribute uses 1-based indices for ordinary and static member functions.
// The attribute uses 2-based indices for non-static member functions.
# define _LIBCPP_ATTRIBUTE_FORMAT(archetype, format_string_index, first_format_arg_index) \
__attribute__((__format__(archetype, format_string_index, first_format_arg_index)))
# else
# define _LIBCPP_ATTRIBUTE_FORMAT(archetype, format_string_index, first_format_arg_index) /* nothing */
# endif
# if __has_cpp_attribute(msvc::no_unique_address)
// MSVC implements [[no_unique_address]] as a silent no-op currently.
// (If/when MSVC breaks its C++ ABI, it will be changed to work as intended.)
// However, MSVC implements [[msvc::no_unique_address]] which does what
// [[no_unique_address]] is supposed to do, in general.
// Clang-cl does not yet (14.0) implement either [[no_unique_address]] or
// [[msvc::no_unique_address]] though. If/when it does implement
// [[msvc::no_unique_address]], this should be preferred though.
# define _LIBCPP_NO_UNIQUE_ADDRESS [[msvc::no_unique_address]]
# elif __has_cpp_attribute(no_unique_address)
# define _LIBCPP_NO_UNIQUE_ADDRESS [[no_unique_address]]
# else
# define _LIBCPP_NO_UNIQUE_ADDRESS /* nothing */
// Note that this can be replaced by #error as soon as clang-cl
// implements msvc::no_unique_address, since there should be no C++20
// compiler that doesn't support one of the two attributes at that point.
// We generally don't want to use this macro outside of C++20-only code,
// because using it conditionally in one language version only would make
// the ABI inconsistent.
# endif
# ifdef _LIBCPP_COMPILER_CLANG_BASED
# define _LIBCPP_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push")
# define _LIBCPP_DIAGNOSTIC_POP _Pragma("clang diagnostic pop")
# define _LIBCPP_CLANG_DIAGNOSTIC_IGNORED(str) _Pragma(_LIBCPP_TOSTRING(clang diagnostic ignored str))
# define _LIBCPP_GCC_DIAGNOSTIC_IGNORED(str)
# elif defined(_LIBCPP_COMPILER_GCC)
# define _LIBCPP_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push")
# define _LIBCPP_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop")
# define _LIBCPP_CLANG_DIAGNOSTIC_IGNORED(str)
# define _LIBCPP_GCC_DIAGNOSTIC_IGNORED(str) _Pragma(_LIBCPP_TOSTRING(GCC diagnostic ignored str))
# else
# define _LIBCPP_DIAGNOSTIC_PUSH
# define _LIBCPP_DIAGNOSTIC_POP
# define _LIBCPP_CLANG_DIAGNOSTIC_IGNORED(str)
# define _LIBCPP_GCC_DIAGNOSTIC_IGNORED(str)
# endif
# if defined(_AIX) && !defined(_LIBCPP_COMPILER_GCC)
# define _LIBCPP_PACKED_BYTE_FOR_AIX _Pragma("pack(1)")
# define _LIBCPP_PACKED_BYTE_FOR_AIX_END _Pragma("pack(pop)")
# else
# define _LIBCPP_PACKED_BYTE_FOR_AIX /* empty */
# define _LIBCPP_PACKED_BYTE_FOR_AIX_END /* empty */
# endif
# if __has_attribute(__packed__)
# define _LIBCPP_PACKED __attribute__((__packed__))
# else
# define _LIBCPP_PACKED
# endif
#endif // __cplusplus
#endif // _LIBCPP___CONFIG
diff --git a/libcxx/include/__functional/function.h b/libcxx/include/__functional/function.h
index db3af6e24101..55b607f3f804 100644
--- a/libcxx/include/__functional/function.h
+++ b/libcxx/include/__functional/function.h
@@ -1,2813 +1,2823 @@
// -*- C++ -*-
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef _LIBCPP___FUNCTIONAL_FUNCTION_H
#define _LIBCPP___FUNCTIONAL_FUNCTION_H
#include <__assert>
#include <__config>
#include <__functional/binary_function.h>
#include <__functional/invoke.h>
#include <__functional/unary_function.h>
#include <__iterator/iterator_traits.h>
#include <__memory/addressof.h>
#include <__memory/allocator_traits.h>
#include <__memory/compressed_pair.h>
#include <__memory/shared_ptr.h>
#include <__utility/forward.h>
#include <__utility/move.h>
#include <__utility/swap.h>
#include <exception>
#include <memory> // TODO: replace with <__memory/__builtin_new_allocator.h>
#include <type_traits>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
#endif
_LIBCPP_BEGIN_NAMESPACE_STD
// bad_function_call
_LIBCPP_DIAGNOSTIC_PUSH
_LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wweak-vtables")
class _LIBCPP_EXCEPTION_ABI bad_function_call
: public exception
{
public:
// Note that when a key function is not used, every translation unit that uses
// bad_function_call will end up containing a weak definition of the vtable and
// typeinfo.
#ifdef _LIBCPP_ABI_BAD_FUNCTION_CALL_KEY_FUNCTION
virtual ~bad_function_call() _NOEXCEPT;
#else
virtual ~bad_function_call() _NOEXCEPT {}
#endif
#ifdef _LIBCPP_ABI_BAD_FUNCTION_CALL_GOOD_WHAT_MESSAGE
virtual const char* what() const _NOEXCEPT;
#endif
};
_LIBCPP_DIAGNOSTIC_POP
_LIBCPP_NORETURN inline _LIBCPP_INLINE_VISIBILITY
void __throw_bad_function_call()
{
#ifndef _LIBCPP_NO_EXCEPTIONS
throw bad_function_call();
#else
_VSTD::abort();
#endif
}
#if defined(_LIBCPP_CXX03_LANG) && !defined(_LIBCPP_DISABLE_DEPRECATION_WARNINGS) && __has_attribute(deprecated)
# define _LIBCPP_DEPRECATED_CXX03_FUNCTION \
__attribute__((deprecated("Using std::function in C++03 is not supported anymore. Please upgrade to C++11 or later, or use a different type")))
#else
# define _LIBCPP_DEPRECATED_CXX03_FUNCTION /* nothing */
#endif
template<class _Fp> class _LIBCPP_DEPRECATED_CXX03_FUNCTION _LIBCPP_TEMPLATE_VIS function; // undefined
namespace __function
{
template<class _Rp>
struct __maybe_derive_from_unary_function
{
};
template<class _Rp, class _A1>
struct __maybe_derive_from_unary_function<_Rp(_A1)>
: public __unary_function<_A1, _Rp>
{
};
template<class _Rp>
struct __maybe_derive_from_binary_function
{
};
template<class _Rp, class _A1, class _A2>
struct __maybe_derive_from_binary_function<_Rp(_A1, _A2)>
: public __binary_function<_A1, _A2, _Rp>
{
};
template <class _Fp>
_LIBCPP_INLINE_VISIBILITY
bool __not_null(_Fp const&) { return true; }
template <class _Fp>
_LIBCPP_INLINE_VISIBILITY
bool __not_null(_Fp* __ptr) { return __ptr; }
template <class _Ret, class _Class>
_LIBCPP_INLINE_VISIBILITY
bool __not_null(_Ret _Class::*__ptr) { return __ptr; }
template <class _Fp>
_LIBCPP_INLINE_VISIBILITY
bool __not_null(function<_Fp> const& __f) { return !!__f; }
#ifdef _LIBCPP_HAS_EXTENSION_BLOCKS
template <class _Rp, class ..._Args>
_LIBCPP_INLINE_VISIBILITY
bool __not_null(_Rp (^__p)(_Args...)) { return __p; }
#endif
} // namespace __function
#ifndef _LIBCPP_CXX03_LANG
namespace __function {
// __alloc_func holds a functor and an allocator.
template <class _Fp, class _Ap, class _FB> class __alloc_func;
template <class _Fp, class _FB>
class __default_alloc_func;
template <class _Fp, class _Ap, class _Rp, class... _ArgTypes>
class __alloc_func<_Fp, _Ap, _Rp(_ArgTypes...)>
{
__compressed_pair<_Fp, _Ap> __f_;
public:
typedef _LIBCPP_NODEBUG _Fp _Target;
typedef _LIBCPP_NODEBUG _Ap _Alloc;
_LIBCPP_INLINE_VISIBILITY
const _Target& __target() const { return __f_.first(); }
// WIN32 APIs may define __allocator, so use __get_allocator instead.
_LIBCPP_INLINE_VISIBILITY
const _Alloc& __get_allocator() const { return __f_.second(); }
_LIBCPP_INLINE_VISIBILITY
explicit __alloc_func(_Target&& __f)
: __f_(piecewise_construct, _VSTD::forward_as_tuple(_VSTD::move(__f)),
_VSTD::forward_as_tuple())
{
}
_LIBCPP_INLINE_VISIBILITY
explicit __alloc_func(const _Target& __f, const _Alloc& __a)
: __f_(piecewise_construct, _VSTD::forward_as_tuple(__f),
_VSTD::forward_as_tuple(__a))
{
}
_LIBCPP_INLINE_VISIBILITY
explicit __alloc_func(const _Target& __f, _Alloc&& __a)
: __f_(piecewise_construct, _VSTD::forward_as_tuple(__f),
_VSTD::forward_as_tuple(_VSTD::move(__a)))
{
}
_LIBCPP_INLINE_VISIBILITY
explicit __alloc_func(_Target&& __f, _Alloc&& __a)
: __f_(piecewise_construct, _VSTD::forward_as_tuple(_VSTD::move(__f)),
_VSTD::forward_as_tuple(_VSTD::move(__a)))
{
}
_LIBCPP_INLINE_VISIBILITY
_Rp operator()(_ArgTypes&&... __arg)
{
typedef __invoke_void_return_wrapper<_Rp> _Invoker;
return _Invoker::__call(__f_.first(),
_VSTD::forward<_ArgTypes>(__arg)...);
}
_LIBCPP_INLINE_VISIBILITY
__alloc_func* __clone() const
{
typedef allocator_traits<_Alloc> __alloc_traits;
typedef
typename __rebind_alloc_helper<__alloc_traits, __alloc_func>::type
_AA;
_AA __a(__f_.second());
typedef __allocator_destructor<_AA> _Dp;
unique_ptr<__alloc_func, _Dp> __hold(__a.allocate(1), _Dp(__a, 1));
::new ((void*)__hold.get()) __alloc_func(__f_.first(), _Alloc(__a));
return __hold.release();
}
_LIBCPP_INLINE_VISIBILITY
void destroy() _NOEXCEPT { __f_.~__compressed_pair<_Target, _Alloc>(); }
static void __destroy_and_delete(__alloc_func* __f) {
typedef allocator_traits<_Alloc> __alloc_traits;
typedef typename __rebind_alloc_helper<__alloc_traits, __alloc_func>::type
_FunAlloc;
_FunAlloc __a(__f->__get_allocator());
__f->destroy();
__a.deallocate(__f, 1);
}
};
template <class _Fp, class _Rp, class... _ArgTypes>
class __default_alloc_func<_Fp, _Rp(_ArgTypes...)> {
_Fp __f_;
public:
typedef _LIBCPP_NODEBUG _Fp _Target;
_LIBCPP_INLINE_VISIBILITY
const _Target& __target() const { return __f_; }
_LIBCPP_INLINE_VISIBILITY
explicit __default_alloc_func(_Target&& __f) : __f_(_VSTD::move(__f)) {}
_LIBCPP_INLINE_VISIBILITY
explicit __default_alloc_func(const _Target& __f) : __f_(__f) {}
_LIBCPP_INLINE_VISIBILITY
_Rp operator()(_ArgTypes&&... __arg) {
typedef __invoke_void_return_wrapper<_Rp> _Invoker;
return _Invoker::__call(__f_, _VSTD::forward<_ArgTypes>(__arg)...);
}
_LIBCPP_INLINE_VISIBILITY
__default_alloc_func* __clone() const {
__builtin_new_allocator::__holder_t __hold =
__builtin_new_allocator::__allocate_type<__default_alloc_func>(1);
__default_alloc_func* __res =
::new ((void*)__hold.get()) __default_alloc_func(__f_);
(void)__hold.release();
return __res;
}
_LIBCPP_INLINE_VISIBILITY
void destroy() _NOEXCEPT { __f_.~_Target(); }
static void __destroy_and_delete(__default_alloc_func* __f) {
__f->destroy();
__builtin_new_allocator::__deallocate_type<__default_alloc_func>(__f, 1);
}
};
// __base provides an abstract interface for copyable functors.
template<class _Fp> class _LIBCPP_TEMPLATE_VIS __base;
template<class _Rp, class ..._ArgTypes>
class __base<_Rp(_ArgTypes...)>
{
__base(const __base&);
__base& operator=(const __base&);
public:
_LIBCPP_INLINE_VISIBILITY __base() {}
_LIBCPP_INLINE_VISIBILITY virtual ~__base() {}
virtual __base* __clone() const = 0;
virtual void __clone(__base*) const = 0;
virtual void destroy() _NOEXCEPT = 0;
virtual void destroy_deallocate() _NOEXCEPT = 0;
virtual _Rp operator()(_ArgTypes&& ...) = 0;
#ifndef _LIBCPP_NO_RTTI
virtual const void* target(const type_info&) const _NOEXCEPT = 0;
virtual const std::type_info& target_type() const _NOEXCEPT = 0;
#endif // _LIBCPP_NO_RTTI
};
// __func implements __base for a given functor type.
template<class _FD, class _Alloc, class _FB> class __func;
template<class _Fp, class _Alloc, class _Rp, class ..._ArgTypes>
class __func<_Fp, _Alloc, _Rp(_ArgTypes...)>
: public __base<_Rp(_ArgTypes...)>
{
__alloc_func<_Fp, _Alloc, _Rp(_ArgTypes...)> __f_;
public:
_LIBCPP_INLINE_VISIBILITY
explicit __func(_Fp&& __f)
: __f_(_VSTD::move(__f)) {}
_LIBCPP_INLINE_VISIBILITY
explicit __func(const _Fp& __f, const _Alloc& __a)
: __f_(__f, __a) {}
_LIBCPP_INLINE_VISIBILITY
explicit __func(const _Fp& __f, _Alloc&& __a)
: __f_(__f, _VSTD::move(__a)) {}
_LIBCPP_INLINE_VISIBILITY
explicit __func(_Fp&& __f, _Alloc&& __a)
: __f_(_VSTD::move(__f), _VSTD::move(__a)) {}
virtual __base<_Rp(_ArgTypes...)>* __clone() const;
virtual void __clone(__base<_Rp(_ArgTypes...)>*) const;
virtual void destroy() _NOEXCEPT;
virtual void destroy_deallocate() _NOEXCEPT;
virtual _Rp operator()(_ArgTypes&&... __arg);
#ifndef _LIBCPP_NO_RTTI
virtual const void* target(const type_info&) const _NOEXCEPT;
virtual const std::type_info& target_type() const _NOEXCEPT;
#endif // _LIBCPP_NO_RTTI
};
template<class _Fp, class _Alloc, class _Rp, class ..._ArgTypes>
__base<_Rp(_ArgTypes...)>*
__func<_Fp, _Alloc, _Rp(_ArgTypes...)>::__clone() const
{
typedef allocator_traits<_Alloc> __alloc_traits;
typedef typename __rebind_alloc_helper<__alloc_traits, __func>::type _Ap;
_Ap __a(__f_.__get_allocator());
typedef __allocator_destructor<_Ap> _Dp;
unique_ptr<__func, _Dp> __hold(__a.allocate(1), _Dp(__a, 1));
::new ((void*)__hold.get()) __func(__f_.__target(), _Alloc(__a));
return __hold.release();
}
template<class _Fp, class _Alloc, class _Rp, class ..._ArgTypes>
void
__func<_Fp, _Alloc, _Rp(_ArgTypes...)>::__clone(__base<_Rp(_ArgTypes...)>* __p) const
{
::new ((void*)__p) __func(__f_.__target(), __f_.__get_allocator());
}
template<class _Fp, class _Alloc, class _Rp, class ..._ArgTypes>
void
__func<_Fp, _Alloc, _Rp(_ArgTypes...)>::destroy() _NOEXCEPT
{
__f_.destroy();
}
template<class _Fp, class _Alloc, class _Rp, class ..._ArgTypes>
void
__func<_Fp, _Alloc, _Rp(_ArgTypes...)>::destroy_deallocate() _NOEXCEPT
{
typedef allocator_traits<_Alloc> __alloc_traits;
typedef typename __rebind_alloc_helper<__alloc_traits, __func>::type _Ap;
_Ap __a(__f_.__get_allocator());
__f_.destroy();
__a.deallocate(this, 1);
}
template<class _Fp, class _Alloc, class _Rp, class ..._ArgTypes>
_Rp
__func<_Fp, _Alloc, _Rp(_ArgTypes...)>::operator()(_ArgTypes&& ... __arg)
{
return __f_(_VSTD::forward<_ArgTypes>(__arg)...);
}
#ifndef _LIBCPP_NO_RTTI
template<class _Fp, class _Alloc, class _Rp, class ..._ArgTypes>
const void*
__func<_Fp, _Alloc, _Rp(_ArgTypes...)>::target(const type_info& __ti) const _NOEXCEPT
{
if (__ti == typeid(_Fp))
return _VSTD::addressof(__f_.__target());
return nullptr;
}
template<class _Fp, class _Alloc, class _Rp, class ..._ArgTypes>
const std::type_info&
__func<_Fp, _Alloc, _Rp(_ArgTypes...)>::target_type() const _NOEXCEPT
{
return typeid(_Fp);
}
#endif // _LIBCPP_NO_RTTI
// __value_func creates a value-type from a __func.
template <class _Fp> class __value_func;
template <class _Rp, class... _ArgTypes> class __value_func<_Rp(_ArgTypes...)>
{
typename aligned_storage<3 * sizeof(void*)>::type __buf_;
typedef __base<_Rp(_ArgTypes...)> __func;
__func* __f_;
_LIBCPP_NO_CFI static __func* __as_base(void* __p)
{
return reinterpret_cast<__func*>(__p);
}
public:
_LIBCPP_INLINE_VISIBILITY
__value_func() _NOEXCEPT : __f_(nullptr) {}
template <class _Fp, class _Alloc>
_LIBCPP_INLINE_VISIBILITY __value_func(_Fp&& __f, const _Alloc& __a)
: __f_(nullptr)
{
typedef allocator_traits<_Alloc> __alloc_traits;
typedef __function::__func<_Fp, _Alloc, _Rp(_ArgTypes...)> _Fun;
typedef typename __rebind_alloc_helper<__alloc_traits, _Fun>::type
_FunAlloc;
if (__function::__not_null(__f))
{
_FunAlloc __af(__a);
if (sizeof(_Fun) <= sizeof(__buf_) &&
is_nothrow_copy_constructible<_Fp>::value &&
is_nothrow_copy_constructible<_FunAlloc>::value)
{
__f_ =
::new ((void*)&__buf_) _Fun(_VSTD::move(__f), _Alloc(__af));
}
else
{
typedef __allocator_destructor<_FunAlloc> _Dp;
unique_ptr<__func, _Dp> __hold(__af.allocate(1), _Dp(__af, 1));
::new ((void*)__hold.get()) _Fun(_VSTD::move(__f), _Alloc(__a));
__f_ = __hold.release();
}
}
}
template <class _Fp,
class = typename enable_if<!is_same<typename decay<_Fp>::type, __value_func>::value>::type>
_LIBCPP_INLINE_VISIBILITY explicit __value_func(_Fp&& __f)
: __value_func(_VSTD::forward<_Fp>(__f), allocator<_Fp>()) {}
_LIBCPP_INLINE_VISIBILITY
__value_func(const __value_func& __f)
{
if (__f.__f_ == nullptr)
__f_ = nullptr;
else if ((void*)__f.__f_ == &__f.__buf_)
{
__f_ = __as_base(&__buf_);
__f.__f_->__clone(__f_);
}
else
__f_ = __f.__f_->__clone();
}
_LIBCPP_INLINE_VISIBILITY
__value_func(__value_func&& __f) _NOEXCEPT
{
if (__f.__f_ == nullptr)
__f_ = nullptr;
else if ((void*)__f.__f_ == &__f.__buf_)
{
__f_ = __as_base(&__buf_);
__f.__f_->__clone(__f_);
}
else
{
__f_ = __f.__f_;
__f.__f_ = nullptr;
}
}
_LIBCPP_INLINE_VISIBILITY
~__value_func()
{
if ((void*)__f_ == &__buf_)
__f_->destroy();
else if (__f_)
__f_->destroy_deallocate();
}
_LIBCPP_INLINE_VISIBILITY
__value_func& operator=(__value_func&& __f)
{
*this = nullptr;
if (__f.__f_ == nullptr)
__f_ = nullptr;
else if ((void*)__f.__f_ == &__f.__buf_)
{
__f_ = __as_base(&__buf_);
__f.__f_->__clone(__f_);
}
else
{
__f_ = __f.__f_;
__f.__f_ = nullptr;
}
return *this;
}
_LIBCPP_INLINE_VISIBILITY
__value_func& operator=(nullptr_t)
{
__func* __f = __f_;
__f_ = nullptr;
if ((void*)__f == &__buf_)
__f->destroy();
else if (__f)
__f->destroy_deallocate();
return *this;
}
_LIBCPP_INLINE_VISIBILITY
_Rp operator()(_ArgTypes&&... __args) const
{
if (__f_ == nullptr)
__throw_bad_function_call();
return (*__f_)(_VSTD::forward<_ArgTypes>(__args)...);
}
_LIBCPP_INLINE_VISIBILITY
void swap(__value_func& __f) _NOEXCEPT
{
if (&__f == this)
return;
if ((void*)__f_ == &__buf_ && (void*)__f.__f_ == &__f.__buf_)
{
typename aligned_storage<sizeof(__buf_)>::type __tempbuf;
__func* __t = __as_base(&__tempbuf);
__f_->__clone(__t);
__f_->destroy();
__f_ = nullptr;
__f.__f_->__clone(__as_base(&__buf_));
__f.__f_->destroy();
__f.__f_ = nullptr;
__f_ = __as_base(&__buf_);
__t->__clone(__as_base(&__f.__buf_));
__t->destroy();
__f.__f_ = __as_base(&__f.__buf_);
}
else if ((void*)__f_ == &__buf_)
{
__f_->__clone(__as_base(&__f.__buf_));
__f_->destroy();
__f_ = __f.__f_;
__f.__f_ = __as_base(&__f.__buf_);
}
else if ((void*)__f.__f_ == &__f.__buf_)
{
__f.__f_->__clone(__as_base(&__buf_));
__f.__f_->destroy();
__f.__f_ = __f_;
__f_ = __as_base(&__buf_);
}
else
_VSTD::swap(__f_, __f.__f_);
}
_LIBCPP_INLINE_VISIBILITY
explicit operator bool() const _NOEXCEPT { return __f_ != nullptr; }
#ifndef _LIBCPP_NO_RTTI
_LIBCPP_INLINE_VISIBILITY
const std::type_info& target_type() const _NOEXCEPT
{
if (__f_ == nullptr)
return typeid(void);
return __f_->target_type();
}
template <typename _Tp>
_LIBCPP_INLINE_VISIBILITY const _Tp* target() const _NOEXCEPT
{
if (__f_ == nullptr)
return nullptr;
return (const _Tp*)__f_->target(typeid(_Tp));
}
#endif // _LIBCPP_NO_RTTI
};
// Storage for a functor object, to be used with __policy to manage copy and
// destruction.
union __policy_storage
{
mutable char __small[sizeof(void*) * 2];
void* __large;
};
// True if _Fun can safely be held in __policy_storage.__small.
template <typename _Fun>
struct __use_small_storage
: public integral_constant<
bool, sizeof(_Fun) <= sizeof(__policy_storage) &&
_LIBCPP_ALIGNOF(_Fun) <= _LIBCPP_ALIGNOF(__policy_storage) &&
is_trivially_copy_constructible<_Fun>::value &&
is_trivially_destructible<_Fun>::value> {};
// Policy contains information about how to copy, destroy, and move the
// underlying functor. You can think of it as a vtable of sorts.
struct __policy
{
// Used to copy or destroy __large values. null for trivial objects.
void* (*const __clone)(const void*);
void (*const __destroy)(void*);
// True if this is the null policy (no value).
const bool __is_null;
// The target type. May be null if RTTI is disabled.
const std::type_info* const __type_info;
// Returns a pointer to a static policy object suitable for the functor
// type.
template <typename _Fun>
_LIBCPP_INLINE_VISIBILITY static const __policy* __create()
{
return __choose_policy<_Fun>(__use_small_storage<_Fun>());
}
_LIBCPP_INLINE_VISIBILITY
static const __policy* __create_empty()
{
static const _LIBCPP_CONSTEXPR __policy __policy_ = {nullptr, nullptr,
true,
#ifndef _LIBCPP_NO_RTTI
&typeid(void)
#else
nullptr
#endif
};
return &__policy_;
}
private:
template <typename _Fun> static void* __large_clone(const void* __s)
{
const _Fun* __f = static_cast<const _Fun*>(__s);
return __f->__clone();
}
template <typename _Fun>
static void __large_destroy(void* __s) {
_Fun::__destroy_and_delete(static_cast<_Fun*>(__s));
}
template <typename _Fun>
_LIBCPP_INLINE_VISIBILITY static const __policy*
__choose_policy(/* is_small = */ false_type) {
static const _LIBCPP_CONSTEXPR __policy __policy_ = {
&__large_clone<_Fun>, &__large_destroy<_Fun>, false,
#ifndef _LIBCPP_NO_RTTI
&typeid(typename _Fun::_Target)
#else
nullptr
#endif
};
return &__policy_;
}
template <typename _Fun>
_LIBCPP_INLINE_VISIBILITY static const __policy*
__choose_policy(/* is_small = */ true_type)
{
static const _LIBCPP_CONSTEXPR __policy __policy_ = {
nullptr, nullptr, false,
#ifndef _LIBCPP_NO_RTTI
&typeid(typename _Fun::_Target)
#else
nullptr
#endif
};
return &__policy_;
}
};
// Used to choose between perfect forwarding or pass-by-value. Pass-by-value is
// faster for types that can be passed in registers.
template <typename _Tp>
using __fast_forward =
typename conditional<is_scalar<_Tp>::value, _Tp, _Tp&&>::type;
// __policy_invoker calls an instance of __alloc_func held in __policy_storage.
template <class _Fp> struct __policy_invoker;
template <class _Rp, class... _ArgTypes>
struct __policy_invoker<_Rp(_ArgTypes...)>
{
typedef _Rp (*__Call)(const __policy_storage*,
__fast_forward<_ArgTypes>...);
__Call __call_;
// Creates an invoker that throws bad_function_call.
_LIBCPP_INLINE_VISIBILITY
__policy_invoker() : __call_(&__call_empty) {}
// Creates an invoker that calls the given instance of __func.
template <typename _Fun>
_LIBCPP_INLINE_VISIBILITY static __policy_invoker __create()
{
return __policy_invoker(&__call_impl<_Fun>);
}
private:
_LIBCPP_INLINE_VISIBILITY
explicit __policy_invoker(__Call __c) : __call_(__c) {}
static _Rp __call_empty(const __policy_storage*,
__fast_forward<_ArgTypes>...)
{
__throw_bad_function_call();
}
template <typename _Fun>
static _Rp __call_impl(const __policy_storage* __buf,
__fast_forward<_ArgTypes>... __args)
{
_Fun* __f = reinterpret_cast<_Fun*>(__use_small_storage<_Fun>::value
? &__buf->__small
: __buf->__large);
return (*__f)(_VSTD::forward<_ArgTypes>(__args)...);
}
};
// __policy_func uses a __policy and __policy_invoker to create a type-erased,
// copyable functor.
template <class _Fp> class __policy_func;
template <class _Rp, class... _ArgTypes> class __policy_func<_Rp(_ArgTypes...)>
{
// Inline storage for small objects.
__policy_storage __buf_;
// Calls the value stored in __buf_. This could technically be part of
// policy, but storing it here eliminates a level of indirection inside
// operator().
typedef __function::__policy_invoker<_Rp(_ArgTypes...)> __invoker;
__invoker __invoker_;
// The policy that describes how to move / copy / destroy __buf_. Never
// null, even if the function is empty.
const __policy* __policy_;
public:
_LIBCPP_INLINE_VISIBILITY
__policy_func() : __policy_(__policy::__create_empty()) {}
template <class _Fp, class _Alloc>
_LIBCPP_INLINE_VISIBILITY __policy_func(_Fp&& __f, const _Alloc& __a)
: __policy_(__policy::__create_empty())
{
typedef __alloc_func<_Fp, _Alloc, _Rp(_ArgTypes...)> _Fun;
typedef allocator_traits<_Alloc> __alloc_traits;
typedef typename __rebind_alloc_helper<__alloc_traits, _Fun>::type
_FunAlloc;
if (__function::__not_null(__f))
{
__invoker_ = __invoker::template __create<_Fun>();
__policy_ = __policy::__create<_Fun>();
_FunAlloc __af(__a);
if (__use_small_storage<_Fun>())
{
::new ((void*)&__buf_.__small)
_Fun(_VSTD::move(__f), _Alloc(__af));
}
else
{
typedef __allocator_destructor<_FunAlloc> _Dp;
unique_ptr<_Fun, _Dp> __hold(__af.allocate(1), _Dp(__af, 1));
::new ((void*)__hold.get())
_Fun(_VSTD::move(__f), _Alloc(__af));
__buf_.__large = __hold.release();
}
}
}
template <class _Fp, class = typename enable_if<!is_same<typename decay<_Fp>::type, __policy_func>::value>::type>
_LIBCPP_INLINE_VISIBILITY explicit __policy_func(_Fp&& __f)
: __policy_(__policy::__create_empty()) {
typedef __default_alloc_func<_Fp, _Rp(_ArgTypes...)> _Fun;
if (__function::__not_null(__f)) {
__invoker_ = __invoker::template __create<_Fun>();
__policy_ = __policy::__create<_Fun>();
if (__use_small_storage<_Fun>()) {
::new ((void*)&__buf_.__small) _Fun(_VSTD::move(__f));
} else {
__builtin_new_allocator::__holder_t __hold =
__builtin_new_allocator::__allocate_type<_Fun>(1);
__buf_.__large = ::new ((void*)__hold.get()) _Fun(_VSTD::move(__f));
(void)__hold.release();
}
}
}
_LIBCPP_INLINE_VISIBILITY
__policy_func(const __policy_func& __f)
: __buf_(__f.__buf_), __invoker_(__f.__invoker_),
__policy_(__f.__policy_)
{
if (__policy_->__clone)
__buf_.__large = __policy_->__clone(__f.__buf_.__large);
}
_LIBCPP_INLINE_VISIBILITY
__policy_func(__policy_func&& __f)
: __buf_(__f.__buf_), __invoker_(__f.__invoker_),
__policy_(__f.__policy_)
{
if (__policy_->__destroy)
{
__f.__policy_ = __policy::__create_empty();
__f.__invoker_ = __invoker();
}
}
_LIBCPP_INLINE_VISIBILITY
~__policy_func()
{
if (__policy_->__destroy)
__policy_->__destroy(__buf_.__large);
}
_LIBCPP_INLINE_VISIBILITY
__policy_func& operator=(__policy_func&& __f)
{
*this = nullptr;
__buf_ = __f.__buf_;
__invoker_ = __f.__invoker_;
__policy_ = __f.__policy_;
__f.__policy_ = __policy::__create_empty();
__f.__invoker_ = __invoker();
return *this;
}
_LIBCPP_INLINE_VISIBILITY
__policy_func& operator=(nullptr_t)
{
const __policy* __p = __policy_;
__policy_ = __policy::__create_empty();
__invoker_ = __invoker();
if (__p->__destroy)
__p->__destroy(__buf_.__large);
return *this;
}
_LIBCPP_INLINE_VISIBILITY
_Rp operator()(_ArgTypes&&... __args) const
{
return __invoker_.__call_(_VSTD::addressof(__buf_),
_VSTD::forward<_ArgTypes>(__args)...);
}
_LIBCPP_INLINE_VISIBILITY
void swap(__policy_func& __f)
{
_VSTD::swap(__invoker_, __f.__invoker_);
_VSTD::swap(__policy_, __f.__policy_);
_VSTD::swap(__buf_, __f.__buf_);
}
_LIBCPP_INLINE_VISIBILITY
explicit operator bool() const _NOEXCEPT
{
return !__policy_->__is_null;
}
#ifndef _LIBCPP_NO_RTTI
_LIBCPP_INLINE_VISIBILITY
const std::type_info& target_type() const _NOEXCEPT
{
return *__policy_->__type_info;
}
template <typename _Tp>
_LIBCPP_INLINE_VISIBILITY const _Tp* target() const _NOEXCEPT
{
if (__policy_->__is_null || typeid(_Tp) != *__policy_->__type_info)
return nullptr;
if (__policy_->__clone) // Out of line storage.
return reinterpret_cast<const _Tp*>(__buf_.__large);
else
return reinterpret_cast<const _Tp*>(&__buf_.__small);
}
#endif // _LIBCPP_NO_RTTI
};
-#if defined(_LIBCPP_HAS_BLOCKS_RUNTIME) && !defined(_LIBCPP_HAS_OBJC_ARC)
+#if defined(_LIBCPP_HAS_BLOCKS_RUNTIME)
extern "C" void *_Block_copy(const void *);
extern "C" void _Block_release(const void *);
template<class _Rp1, class ..._ArgTypes1, class _Alloc, class _Rp, class ..._ArgTypes>
class __func<_Rp1(^)(_ArgTypes1...), _Alloc, _Rp(_ArgTypes...)>
: public __base<_Rp(_ArgTypes...)>
{
typedef _Rp1(^__block_type)(_ArgTypes1...);
__block_type __f_;
public:
_LIBCPP_INLINE_VISIBILITY
explicit __func(__block_type const& __f)
+#ifdef _LIBCPP_HAS_OBJC_ARC
+ : __f_(__f)
+#else
: __f_(reinterpret_cast<__block_type>(__f ? _Block_copy(__f) : nullptr))
+#endif
{ }
// [TODO] add && to save on a retain
_LIBCPP_INLINE_VISIBILITY
explicit __func(__block_type __f, const _Alloc& /* unused */)
+#ifdef _LIBCPP_HAS_OBJC_ARC
+ : __f_(__f)
+#else
: __f_(reinterpret_cast<__block_type>(__f ? _Block_copy(__f) : nullptr))
+#endif
{ }
virtual __base<_Rp(_ArgTypes...)>* __clone() const {
_LIBCPP_ASSERT(false,
"Block pointers are just pointers, so they should always fit into "
"std::function's small buffer optimization. This function should "
"never be invoked.");
return nullptr;
}
virtual void __clone(__base<_Rp(_ArgTypes...)>* __p) const {
::new ((void*)__p) __func(__f_);
}
virtual void destroy() _NOEXCEPT {
+#ifndef _LIBCPP_HAS_OBJC_ARC
if (__f_)
_Block_release(__f_);
+#endif
__f_ = 0;
}
virtual void destroy_deallocate() _NOEXCEPT {
_LIBCPP_ASSERT(false,
"Block pointers are just pointers, so they should always fit into "
"std::function's small buffer optimization. This function should "
"never be invoked.");
}
virtual _Rp operator()(_ArgTypes&& ... __arg) {
return _VSTD::__invoke(__f_, _VSTD::forward<_ArgTypes>(__arg)...);
}
#ifndef _LIBCPP_NO_RTTI
virtual const void* target(type_info const& __ti) const _NOEXCEPT {
if (__ti == typeid(__func::__block_type))
return &__f_;
return (const void*)nullptr;
}
virtual const std::type_info& target_type() const _NOEXCEPT {
return typeid(__func::__block_type);
}
#endif // _LIBCPP_NO_RTTI
};
-#endif // _LIBCPP_HAS_EXTENSION_BLOCKS && !_LIBCPP_HAS_OBJC_ARC
+#endif // _LIBCPP_HAS_EXTENSION_BLOCKS
} // namespace __function
template<class _Rp, class ..._ArgTypes>
class _LIBCPP_TEMPLATE_VIS function<_Rp(_ArgTypes...)>
: public __function::__maybe_derive_from_unary_function<_Rp(_ArgTypes...)>,
public __function::__maybe_derive_from_binary_function<_Rp(_ArgTypes...)>
{
#ifndef _LIBCPP_ABI_OPTIMIZED_FUNCTION
typedef __function::__value_func<_Rp(_ArgTypes...)> __func;
#else
typedef __function::__policy_func<_Rp(_ArgTypes...)> __func;
#endif
__func __f_;
template <class _Fp, bool = _And<
_IsNotSame<__uncvref_t<_Fp>, function>,
__invokable<_Fp, _ArgTypes...>
>::value>
struct __callable;
template <class _Fp>
struct __callable<_Fp, true>
{
static const bool value = is_void<_Rp>::value ||
__is_core_convertible<typename __invoke_of<_Fp, _ArgTypes...>::type,
_Rp>::value;
};
template <class _Fp>
struct __callable<_Fp, false>
{
static const bool value = false;
};
template <class _Fp>
using _EnableIfLValueCallable = typename enable_if<__callable<_Fp&>::value>::type;
public:
typedef _Rp result_type;
// construct/copy/destroy:
_LIBCPP_INLINE_VISIBILITY
function() _NOEXCEPT { }
_LIBCPP_INLINE_VISIBILITY
function(nullptr_t) _NOEXCEPT {}
function(const function&);
function(function&&) _NOEXCEPT;
template<class _Fp, class = _EnableIfLValueCallable<_Fp>>
function(_Fp);
#if _LIBCPP_STD_VER <= 14
template<class _Alloc>
_LIBCPP_INLINE_VISIBILITY
function(allocator_arg_t, const _Alloc&) _NOEXCEPT {}
template<class _Alloc>
_LIBCPP_INLINE_VISIBILITY
function(allocator_arg_t, const _Alloc&, nullptr_t) _NOEXCEPT {}
template<class _Alloc>
function(allocator_arg_t, const _Alloc&, const function&);
template<class _Alloc>
function(allocator_arg_t, const _Alloc&, function&&);
template<class _Fp, class _Alloc, class = _EnableIfLValueCallable<_Fp>>
function(allocator_arg_t, const _Alloc& __a, _Fp __f);
#endif
function& operator=(const function&);
function& operator=(function&&) _NOEXCEPT;
function& operator=(nullptr_t) _NOEXCEPT;
template<class _Fp, class = _EnableIfLValueCallable<typename decay<_Fp>::type>>
function& operator=(_Fp&&);
~function();
// function modifiers:
void swap(function&) _NOEXCEPT;
#if _LIBCPP_STD_VER <= 14
template<class _Fp, class _Alloc>
_LIBCPP_INLINE_VISIBILITY
void assign(_Fp&& __f, const _Alloc& __a)
{function(allocator_arg, __a, _VSTD::forward<_Fp>(__f)).swap(*this);}
#endif
// function capacity:
_LIBCPP_INLINE_VISIBILITY
explicit operator bool() const _NOEXCEPT {
return static_cast<bool>(__f_);
}
// deleted overloads close possible hole in the type system
template<class _R2, class... _ArgTypes2>
bool operator==(const function<_R2(_ArgTypes2...)>&) const = delete;
template<class _R2, class... _ArgTypes2>
bool operator!=(const function<_R2(_ArgTypes2...)>&) const = delete;
public:
// function invocation:
_Rp operator()(_ArgTypes...) const;
#ifndef _LIBCPP_NO_RTTI
// function target access:
const std::type_info& target_type() const _NOEXCEPT;
template <typename _Tp> _Tp* target() _NOEXCEPT;
template <typename _Tp> const _Tp* target() const _NOEXCEPT;
#endif // _LIBCPP_NO_RTTI
};
#if _LIBCPP_STD_VER >= 17
template<class _Rp, class ..._Ap>
function(_Rp(*)(_Ap...)) -> function<_Rp(_Ap...)>;
template<class _Fp>
struct __strip_signature;
template<class _Rp, class _Gp, class ..._Ap>
struct __strip_signature<_Rp (_Gp::*) (_Ap...)> { using type = _Rp(_Ap...); };
template<class _Rp, class _Gp, class ..._Ap>
struct __strip_signature<_Rp (_Gp::*) (_Ap...) const> { using type = _Rp(_Ap...); };
template<class _Rp, class _Gp, class ..._Ap>
struct __strip_signature<_Rp (_Gp::*) (_Ap...) volatile> { using type = _Rp(_Ap...); };
template<class _Rp, class _Gp, class ..._Ap>
struct __strip_signature<_Rp (_Gp::*) (_Ap...) const volatile> { using type = _Rp(_Ap...); };
template<class _Rp, class _Gp, class ..._Ap>
struct __strip_signature<_Rp (_Gp::*) (_Ap...) &> { using type = _Rp(_Ap...); };
template<class _Rp, class _Gp, class ..._Ap>
struct __strip_signature<_Rp (_Gp::*) (_Ap...) const &> { using type = _Rp(_Ap...); };
template<class _Rp, class _Gp, class ..._Ap>
struct __strip_signature<_Rp (_Gp::*) (_Ap...) volatile &> { using type = _Rp(_Ap...); };
template<class _Rp, class _Gp, class ..._Ap>
struct __strip_signature<_Rp (_Gp::*) (_Ap...) const volatile &> { using type = _Rp(_Ap...); };
template<class _Rp, class _Gp, class ..._Ap>
struct __strip_signature<_Rp (_Gp::*) (_Ap...) noexcept> { using type = _Rp(_Ap...); };
template<class _Rp, class _Gp, class ..._Ap>
struct __strip_signature<_Rp (_Gp::*) (_Ap...) const noexcept> { using type = _Rp(_Ap...); };
template<class _Rp, class _Gp, class ..._Ap>
struct __strip_signature<_Rp (_Gp::*) (_Ap...) volatile noexcept> { using type = _Rp(_Ap...); };
template<class _Rp, class _Gp, class ..._Ap>
struct __strip_signature<_Rp (_Gp::*) (_Ap...) const volatile noexcept> { using type = _Rp(_Ap...); };
template<class _Rp, class _Gp, class ..._Ap>
struct __strip_signature<_Rp (_Gp::*) (_Ap...) & noexcept> { using type = _Rp(_Ap...); };
template<class _Rp, class _Gp, class ..._Ap>
struct __strip_signature<_Rp (_Gp::*) (_Ap...) const & noexcept> { using type = _Rp(_Ap...); };
template<class _Rp, class _Gp, class ..._Ap>
struct __strip_signature<_Rp (_Gp::*) (_Ap...) volatile & noexcept> { using type = _Rp(_Ap...); };
template<class _Rp, class _Gp, class ..._Ap>
struct __strip_signature<_Rp (_Gp::*) (_Ap...) const volatile & noexcept> { using type = _Rp(_Ap...); };
template<class _Fp, class _Stripped = typename __strip_signature<decltype(&_Fp::operator())>::type>
function(_Fp) -> function<_Stripped>;
#endif // _LIBCPP_STD_VER >= 17
template<class _Rp, class ..._ArgTypes>
function<_Rp(_ArgTypes...)>::function(const function& __f) : __f_(__f.__f_) {}
#if _LIBCPP_STD_VER <= 14
template<class _Rp, class ..._ArgTypes>
template <class _Alloc>
function<_Rp(_ArgTypes...)>::function(allocator_arg_t, const _Alloc&,
const function& __f) : __f_(__f.__f_) {}
#endif
template <class _Rp, class... _ArgTypes>
function<_Rp(_ArgTypes...)>::function(function&& __f) _NOEXCEPT
: __f_(_VSTD::move(__f.__f_)) {}
#if _LIBCPP_STD_VER <= 14
template<class _Rp, class ..._ArgTypes>
template <class _Alloc>
function<_Rp(_ArgTypes...)>::function(allocator_arg_t, const _Alloc&,
function&& __f)
: __f_(_VSTD::move(__f.__f_)) {}
#endif
template <class _Rp, class... _ArgTypes>
template <class _Fp, class>
function<_Rp(_ArgTypes...)>::function(_Fp __f) : __f_(_VSTD::move(__f)) {}
#if _LIBCPP_STD_VER <= 14
template <class _Rp, class... _ArgTypes>
template <class _Fp, class _Alloc, class>
function<_Rp(_ArgTypes...)>::function(allocator_arg_t, const _Alloc& __a,
_Fp __f)
: __f_(_VSTD::move(__f), __a) {}
#endif
template<class _Rp, class ..._ArgTypes>
function<_Rp(_ArgTypes...)>&
function<_Rp(_ArgTypes...)>::operator=(const function& __f)
{
function(__f).swap(*this);
return *this;
}
template<class _Rp, class ..._ArgTypes>
function<_Rp(_ArgTypes...)>&
function<_Rp(_ArgTypes...)>::operator=(function&& __f) _NOEXCEPT
{
__f_ = _VSTD::move(__f.__f_);
return *this;
}
template<class _Rp, class ..._ArgTypes>
function<_Rp(_ArgTypes...)>&
function<_Rp(_ArgTypes...)>::operator=(nullptr_t) _NOEXCEPT
{
__f_ = nullptr;
return *this;
}
template<class _Rp, class ..._ArgTypes>
template <class _Fp, class>
function<_Rp(_ArgTypes...)>&
function<_Rp(_ArgTypes...)>::operator=(_Fp&& __f)
{
function(_VSTD::forward<_Fp>(__f)).swap(*this);
return *this;
}
template<class _Rp, class ..._ArgTypes>
function<_Rp(_ArgTypes...)>::~function() {}
template<class _Rp, class ..._ArgTypes>
void
function<_Rp(_ArgTypes...)>::swap(function& __f) _NOEXCEPT
{
__f_.swap(__f.__f_);
}
template<class _Rp, class ..._ArgTypes>
_Rp
function<_Rp(_ArgTypes...)>::operator()(_ArgTypes... __arg) const
{
return __f_(_VSTD::forward<_ArgTypes>(__arg)...);
}
#ifndef _LIBCPP_NO_RTTI
template<class _Rp, class ..._ArgTypes>
const std::type_info&
function<_Rp(_ArgTypes...)>::target_type() const _NOEXCEPT
{
return __f_.target_type();
}
template<class _Rp, class ..._ArgTypes>
template <typename _Tp>
_Tp*
function<_Rp(_ArgTypes...)>::target() _NOEXCEPT
{
return (_Tp*)(__f_.template target<_Tp>());
}
template<class _Rp, class ..._ArgTypes>
template <typename _Tp>
const _Tp*
function<_Rp(_ArgTypes...)>::target() const _NOEXCEPT
{
return __f_.template target<_Tp>();
}
#endif // _LIBCPP_NO_RTTI
template <class _Rp, class... _ArgTypes>
inline _LIBCPP_INLINE_VISIBILITY
bool
operator==(const function<_Rp(_ArgTypes...)>& __f, nullptr_t) _NOEXCEPT {return !__f;}
template <class _Rp, class... _ArgTypes>
inline _LIBCPP_INLINE_VISIBILITY
bool
operator==(nullptr_t, const function<_Rp(_ArgTypes...)>& __f) _NOEXCEPT {return !__f;}
template <class _Rp, class... _ArgTypes>
inline _LIBCPP_INLINE_VISIBILITY
bool
operator!=(const function<_Rp(_ArgTypes...)>& __f, nullptr_t) _NOEXCEPT {return (bool)__f;}
template <class _Rp, class... _ArgTypes>
inline _LIBCPP_INLINE_VISIBILITY
bool
operator!=(nullptr_t, const function<_Rp(_ArgTypes...)>& __f) _NOEXCEPT {return (bool)__f;}
template <class _Rp, class... _ArgTypes>
inline _LIBCPP_INLINE_VISIBILITY
void
swap(function<_Rp(_ArgTypes...)>& __x, function<_Rp(_ArgTypes...)>& __y) _NOEXCEPT
{return __x.swap(__y);}
#elif defined(_LIBCPP_ENABLE_CXX03_FUNCTION)
namespace __function {
template<class _Fp> class __base;
template<class _Rp>
class __base<_Rp()>
{
__base(const __base&);
__base& operator=(const __base&);
public:
__base() {}
virtual ~__base() {}
virtual __base* __clone() const = 0;
virtual void __clone(__base*) const = 0;
virtual void destroy() = 0;
virtual void destroy_deallocate() = 0;
virtual _Rp operator()() = 0;
#ifndef _LIBCPP_NO_RTTI
virtual const void* target(const type_info&) const = 0;
virtual const std::type_info& target_type() const = 0;
#endif // _LIBCPP_NO_RTTI
};
template<class _Rp, class _A0>
class __base<_Rp(_A0)>
{
__base(const __base&);
__base& operator=(const __base&);
public:
__base() {}
virtual ~__base() {}
virtual __base* __clone() const = 0;
virtual void __clone(__base*) const = 0;
virtual void destroy() = 0;
virtual void destroy_deallocate() = 0;
virtual _Rp operator()(_A0) = 0;
#ifndef _LIBCPP_NO_RTTI
virtual const void* target(const type_info&) const = 0;
virtual const std::type_info& target_type() const = 0;
#endif // _LIBCPP_NO_RTTI
};
template<class _Rp, class _A0, class _A1>
class __base<_Rp(_A0, _A1)>
{
__base(const __base&);
__base& operator=(const __base&);
public:
__base() {}
virtual ~__base() {}
virtual __base* __clone() const = 0;
virtual void __clone(__base*) const = 0;
virtual void destroy() = 0;
virtual void destroy_deallocate() = 0;
virtual _Rp operator()(_A0, _A1) = 0;
#ifndef _LIBCPP_NO_RTTI
virtual const void* target(const type_info&) const = 0;
virtual const std::type_info& target_type() const = 0;
#endif // _LIBCPP_NO_RTTI
};
template<class _Rp, class _A0, class _A1, class _A2>
class __base<_Rp(_A0, _A1, _A2)>
{
__base(const __base&);
__base& operator=(const __base&);
public:
__base() {}
virtual ~__base() {}
virtual __base* __clone() const = 0;
virtual void __clone(__base*) const = 0;
virtual void destroy() = 0;
virtual void destroy_deallocate() = 0;
virtual _Rp operator()(_A0, _A1, _A2) = 0;
#ifndef _LIBCPP_NO_RTTI
virtual const void* target(const type_info&) const = 0;
virtual const std::type_info& target_type() const = 0;
#endif // _LIBCPP_NO_RTTI
};
template<class _FD, class _Alloc, class _FB> class __func;
template<class _Fp, class _Alloc, class _Rp>
class __func<_Fp, _Alloc, _Rp()>
: public __base<_Rp()>
{
__compressed_pair<_Fp, _Alloc> __f_;
public:
explicit __func(_Fp __f) : __f_(_VSTD::move(__f), __default_init_tag()) {}
explicit __func(_Fp __f, _Alloc __a) : __f_(_VSTD::move(__f), _VSTD::move(__a)) {}
virtual __base<_Rp()>* __clone() const;
virtual void __clone(__base<_Rp()>*) const;
virtual void destroy();
virtual void destroy_deallocate();
virtual _Rp operator()();
#ifndef _LIBCPP_NO_RTTI
virtual const void* target(const type_info&) const;
virtual const std::type_info& target_type() const;
#endif // _LIBCPP_NO_RTTI
};
template<class _Fp, class _Alloc, class _Rp>
__base<_Rp()>*
__func<_Fp, _Alloc, _Rp()>::__clone() const
{
typedef allocator_traits<_Alloc> __alloc_traits;
typedef typename __rebind_alloc_helper<__alloc_traits, __func>::type _Ap;
_Ap __a(__f_.second());
typedef __allocator_destructor<_Ap> _Dp;
unique_ptr<__func, _Dp> __hold(__a.allocate(1), _Dp(__a, 1));
::new ((void*)__hold.get()) __func(__f_.first(), _Alloc(__a));
return __hold.release();
}
template<class _Fp, class _Alloc, class _Rp>
void
__func<_Fp, _Alloc, _Rp()>::__clone(__base<_Rp()>* __p) const
{
::new ((void*)__p) __func(__f_.first(), __f_.second());
}
template<class _Fp, class _Alloc, class _Rp>
void
__func<_Fp, _Alloc, _Rp()>::destroy()
{
__f_.~__compressed_pair<_Fp, _Alloc>();
}
template<class _Fp, class _Alloc, class _Rp>
void
__func<_Fp, _Alloc, _Rp()>::destroy_deallocate()
{
typedef allocator_traits<_Alloc> __alloc_traits;
typedef typename __rebind_alloc_helper<__alloc_traits, __func>::type _Ap;
_Ap __a(__f_.second());
__f_.~__compressed_pair<_Fp, _Alloc>();
__a.deallocate(this, 1);
}
template<class _Fp, class _Alloc, class _Rp>
_Rp
__func<_Fp, _Alloc, _Rp()>::operator()()
{
typedef __invoke_void_return_wrapper<_Rp> _Invoker;
return _Invoker::__call(__f_.first());
}
#ifndef _LIBCPP_NO_RTTI
template<class _Fp, class _Alloc, class _Rp>
const void*
__func<_Fp, _Alloc, _Rp()>::target(const type_info& __ti) const
{
if (__ti == typeid(_Fp))
return _VSTD::addressof(__f_.first());
return (const void*)0;
}
template<class _Fp, class _Alloc, class _Rp>
const std::type_info&
__func<_Fp, _Alloc, _Rp()>::target_type() const
{
return typeid(_Fp);
}
#endif // _LIBCPP_NO_RTTI
template<class _Fp, class _Alloc, class _Rp, class _A0>
class __func<_Fp, _Alloc, _Rp(_A0)>
: public __base<_Rp(_A0)>
{
__compressed_pair<_Fp, _Alloc> __f_;
public:
_LIBCPP_INLINE_VISIBILITY explicit __func(_Fp __f) : __f_(_VSTD::move(__f), __default_init_tag()) {}
_LIBCPP_INLINE_VISIBILITY explicit __func(_Fp __f, _Alloc __a)
: __f_(_VSTD::move(__f), _VSTD::move(__a)) {}
virtual __base<_Rp(_A0)>* __clone() const;
virtual void __clone(__base<_Rp(_A0)>*) const;
virtual void destroy();
virtual void destroy_deallocate();
virtual _Rp operator()(_A0);
#ifndef _LIBCPP_NO_RTTI
virtual const void* target(const type_info&) const;
virtual const std::type_info& target_type() const;
#endif // _LIBCPP_NO_RTTI
};
template<class _Fp, class _Alloc, class _Rp, class _A0>
__base<_Rp(_A0)>*
__func<_Fp, _Alloc, _Rp(_A0)>::__clone() const
{
typedef allocator_traits<_Alloc> __alloc_traits;
typedef typename __rebind_alloc_helper<__alloc_traits, __func>::type _Ap;
_Ap __a(__f_.second());
typedef __allocator_destructor<_Ap> _Dp;
unique_ptr<__func, _Dp> __hold(__a.allocate(1), _Dp(__a, 1));
::new ((void*)__hold.get()) __func(__f_.first(), _Alloc(__a));
return __hold.release();
}
template<class _Fp, class _Alloc, class _Rp, class _A0>
void
__func<_Fp, _Alloc, _Rp(_A0)>::__clone(__base<_Rp(_A0)>* __p) const
{
::new ((void*)__p) __func(__f_.first(), __f_.second());
}
template<class _Fp, class _Alloc, class _Rp, class _A0>
void
__func<_Fp, _Alloc, _Rp(_A0)>::destroy()
{
__f_.~__compressed_pair<_Fp, _Alloc>();
}
template<class _Fp, class _Alloc, class _Rp, class _A0>
void
__func<_Fp, _Alloc, _Rp(_A0)>::destroy_deallocate()
{
typedef allocator_traits<_Alloc> __alloc_traits;
typedef typename __rebind_alloc_helper<__alloc_traits, __func>::type _Ap;
_Ap __a(__f_.second());
__f_.~__compressed_pair<_Fp, _Alloc>();
__a.deallocate(this, 1);
}
template<class _Fp, class _Alloc, class _Rp, class _A0>
_Rp
__func<_Fp, _Alloc, _Rp(_A0)>::operator()(_A0 __a0)
{
typedef __invoke_void_return_wrapper<_Rp> _Invoker;
return _Invoker::__call(__f_.first(), __a0);
}
#ifndef _LIBCPP_NO_RTTI
template<class _Fp, class _Alloc, class _Rp, class _A0>
const void*
__func<_Fp, _Alloc, _Rp(_A0)>::target(const type_info& __ti) const
{
if (__ti == typeid(_Fp))
return &__f_.first();
return (const void*)0;
}
template<class _Fp, class _Alloc, class _Rp, class _A0>
const std::type_info&
__func<_Fp, _Alloc, _Rp(_A0)>::target_type() const
{
return typeid(_Fp);
}
#endif // _LIBCPP_NO_RTTI
template<class _Fp, class _Alloc, class _Rp, class _A0, class _A1>
class __func<_Fp, _Alloc, _Rp(_A0, _A1)>
: public __base<_Rp(_A0, _A1)>
{
__compressed_pair<_Fp, _Alloc> __f_;
public:
_LIBCPP_INLINE_VISIBILITY explicit __func(_Fp __f) : __f_(_VSTD::move(__f), __default_init_tag()) {}
_LIBCPP_INLINE_VISIBILITY explicit __func(_Fp __f, _Alloc __a)
: __f_(_VSTD::move(__f), _VSTD::move(__a)) {}
virtual __base<_Rp(_A0, _A1)>* __clone() const;
virtual void __clone(__base<_Rp(_A0, _A1)>*) const;
virtual void destroy();
virtual void destroy_deallocate();
virtual _Rp operator()(_A0, _A1);
#ifndef _LIBCPP_NO_RTTI
virtual const void* target(const type_info&) const;
virtual const std::type_info& target_type() const;
#endif // _LIBCPP_NO_RTTI
};
template<class _Fp, class _Alloc, class _Rp, class _A0, class _A1>
__base<_Rp(_A0, _A1)>*
__func<_Fp, _Alloc, _Rp(_A0, _A1)>::__clone() const
{
typedef allocator_traits<_Alloc> __alloc_traits;
typedef typename __rebind_alloc_helper<__alloc_traits, __func>::type _Ap;
_Ap __a(__f_.second());
typedef __allocator_destructor<_Ap> _Dp;
unique_ptr<__func, _Dp> __hold(__a.allocate(1), _Dp(__a, 1));
::new ((void*)__hold.get()) __func(__f_.first(), _Alloc(__a));
return __hold.release();
}
template<class _Fp, class _Alloc, class _Rp, class _A0, class _A1>
void
__func<_Fp, _Alloc, _Rp(_A0, _A1)>::__clone(__base<_Rp(_A0, _A1)>* __p) const
{
::new ((void*)__p) __func(__f_.first(), __f_.second());
}
template<class _Fp, class _Alloc, class _Rp, class _A0, class _A1>
void
__func<_Fp, _Alloc, _Rp(_A0, _A1)>::destroy()
{
__f_.~__compressed_pair<_Fp, _Alloc>();
}
template<class _Fp, class _Alloc, class _Rp, class _A0, class _A1>
void
__func<_Fp, _Alloc, _Rp(_A0, _A1)>::destroy_deallocate()
{
typedef allocator_traits<_Alloc> __alloc_traits;
typedef typename __rebind_alloc_helper<__alloc_traits, __func>::type _Ap;
_Ap __a(__f_.second());
__f_.~__compressed_pair<_Fp, _Alloc>();
__a.deallocate(this, 1);
}
template<class _Fp, class _Alloc, class _Rp, class _A0, class _A1>
_Rp
__func<_Fp, _Alloc, _Rp(_A0, _A1)>::operator()(_A0 __a0, _A1 __a1)
{
typedef __invoke_void_return_wrapper<_Rp> _Invoker;
return _Invoker::__call(__f_.first(), __a0, __a1);
}
#ifndef _LIBCPP_NO_RTTI
template<class _Fp, class _Alloc, class _Rp, class _A0, class _A1>
const void*
__func<_Fp, _Alloc, _Rp(_A0, _A1)>::target(const type_info& __ti) const
{
if (__ti == typeid(_Fp))
return &__f_.first();
return (const void*)0;
}
template<class _Fp, class _Alloc, class _Rp, class _A0, class _A1>
const std::type_info&
__func<_Fp, _Alloc, _Rp(_A0, _A1)>::target_type() const
{
return typeid(_Fp);
}
#endif // _LIBCPP_NO_RTTI
template<class _Fp, class _Alloc, class _Rp, class _A0, class _A1, class _A2>
class __func<_Fp, _Alloc, _Rp(_A0, _A1, _A2)>
: public __base<_Rp(_A0, _A1, _A2)>
{
__compressed_pair<_Fp, _Alloc> __f_;
public:
_LIBCPP_INLINE_VISIBILITY explicit __func(_Fp __f) : __f_(_VSTD::move(__f), __default_init_tag()) {}
_LIBCPP_INLINE_VISIBILITY explicit __func(_Fp __f, _Alloc __a)
: __f_(_VSTD::move(__f), _VSTD::move(__a)) {}
virtual __base<_Rp(_A0, _A1, _A2)>* __clone() const;
virtual void __clone(__base<_Rp(_A0, _A1, _A2)>*) const;
virtual void destroy();
virtual void destroy_deallocate();
virtual _Rp operator()(_A0, _A1, _A2);
#ifndef _LIBCPP_NO_RTTI
virtual const void* target(const type_info&) const;
virtual const std::type_info& target_type() const;
#endif // _LIBCPP_NO_RTTI
};
template<class _Fp, class _Alloc, class _Rp, class _A0, class _A1, class _A2>
__base<_Rp(_A0, _A1, _A2)>*
__func<_Fp, _Alloc, _Rp(_A0, _A1, _A2)>::__clone() const
{
typedef allocator_traits<_Alloc> __alloc_traits;
typedef typename __rebind_alloc_helper<__alloc_traits, __func>::type _Ap;
_Ap __a(__f_.second());
typedef __allocator_destructor<_Ap> _Dp;
unique_ptr<__func, _Dp> __hold(__a.allocate(1), _Dp(__a, 1));
::new ((void*)__hold.get()) __func(__f_.first(), _Alloc(__a));
return __hold.release();
}
template<class _Fp, class _Alloc, class _Rp, class _A0, class _A1, class _A2>
void
__func<_Fp, _Alloc, _Rp(_A0, _A1, _A2)>::__clone(__base<_Rp(_A0, _A1, _A2)>* __p) const
{
::new ((void*)__p) __func(__f_.first(), __f_.second());
}
template<class _Fp, class _Alloc, class _Rp, class _A0, class _A1, class _A2>
void
__func<_Fp, _Alloc, _Rp(_A0, _A1, _A2)>::destroy()
{
__f_.~__compressed_pair<_Fp, _Alloc>();
}
template<class _Fp, class _Alloc, class _Rp, class _A0, class _A1, class _A2>
void
__func<_Fp, _Alloc, _Rp(_A0, _A1, _A2)>::destroy_deallocate()
{
typedef allocator_traits<_Alloc> __alloc_traits;
typedef typename __rebind_alloc_helper<__alloc_traits, __func>::type _Ap;
_Ap __a(__f_.second());
__f_.~__compressed_pair<_Fp, _Alloc>();
__a.deallocate(this, 1);
}
template<class _Fp, class _Alloc, class _Rp, class _A0, class _A1, class _A2>
_Rp
__func<_Fp, _Alloc, _Rp(_A0, _A1, _A2)>::operator()(_A0 __a0, _A1 __a1, _A2 __a2)
{
typedef __invoke_void_return_wrapper<_Rp> _Invoker;
return _Invoker::__call(__f_.first(), __a0, __a1, __a2);
}
#ifndef _LIBCPP_NO_RTTI
template<class _Fp, class _Alloc, class _Rp, class _A0, class _A1, class _A2>
const void*
__func<_Fp, _Alloc, _Rp(_A0, _A1, _A2)>::target(const type_info& __ti) const
{
if (__ti == typeid(_Fp))
return &__f_.first();
return (const void*)0;
}
template<class _Fp, class _Alloc, class _Rp, class _A0, class _A1, class _A2>
const std::type_info&
__func<_Fp, _Alloc, _Rp(_A0, _A1, _A2)>::target_type() const
{
return typeid(_Fp);
}
#endif // _LIBCPP_NO_RTTI
} // namespace __function
template<class _Rp>
class _LIBCPP_TEMPLATE_VIS function<_Rp()>
{
typedef __function::__base<_Rp()> __base;
aligned_storage<3*sizeof(void*)>::type __buf_;
__base* __f_;
public:
typedef _Rp result_type;
// 20.7.16.2.1, construct/copy/destroy:
_LIBCPP_INLINE_VISIBILITY explicit function() : __f_(0) {}
_LIBCPP_INLINE_VISIBILITY function(nullptr_t) : __f_(0) {}
function(const function&);
template<class _Fp>
function(_Fp,
typename enable_if<!is_integral<_Fp>::value>::type* = 0);
template<class _Alloc>
_LIBCPP_INLINE_VISIBILITY
function(allocator_arg_t, const _Alloc&) : __f_(0) {}
template<class _Alloc>
_LIBCPP_INLINE_VISIBILITY
function(allocator_arg_t, const _Alloc&, nullptr_t) : __f_(0) {}
template<class _Alloc>
function(allocator_arg_t, const _Alloc&, const function&);
template<class _Fp, class _Alloc>
function(allocator_arg_t, const _Alloc& __a, _Fp __f,
typename enable_if<!is_integral<_Fp>::value>::type* = 0);
function& operator=(const function&);
function& operator=(nullptr_t);
template<class _Fp>
typename enable_if
<
!is_integral<_Fp>::value,
function&
>::type
operator=(_Fp);
~function();
// 20.7.16.2.2, function modifiers:
void swap(function&);
template<class _Fp, class _Alloc>
_LIBCPP_INLINE_VISIBILITY
void assign(_Fp __f, const _Alloc& __a)
{function(allocator_arg, __a, __f).swap(*this);}
// 20.7.16.2.3, function capacity:
_LIBCPP_INLINE_VISIBILITY explicit operator bool() const {return __f_;}
template<class _R2>
bool operator==(const function<_R2()>&) const = delete;
template<class _R2>
bool operator!=(const function<_R2()>&) const = delete;
// 20.7.16.2.4, function invocation:
_Rp operator()() const;
#ifndef _LIBCPP_NO_RTTI
// 20.7.16.2.5, function target access:
const std::type_info& target_type() const;
template <typename _Tp> _Tp* target();
template <typename _Tp> const _Tp* target() const;
#endif // _LIBCPP_NO_RTTI
};
template<class _Rp>
function<_Rp()>::function(const function& __f)
{
if (__f.__f_ == 0)
__f_ = 0;
else if (__f.__f_ == (const __base*)&__f.__buf_)
{
__f_ = (__base*)&__buf_;
__f.__f_->__clone(__f_);
}
else
__f_ = __f.__f_->__clone();
}
template<class _Rp>
template<class _Alloc>
function<_Rp()>::function(allocator_arg_t, const _Alloc&, const function& __f)
{
if (__f.__f_ == 0)
__f_ = 0;
else if (__f.__f_ == (const __base*)&__f.__buf_)
{
__f_ = (__base*)&__buf_;
__f.__f_->__clone(__f_);
}
else
__f_ = __f.__f_->__clone();
}
template<class _Rp>
template <class _Fp>
function<_Rp()>::function(_Fp __f,
typename enable_if<!is_integral<_Fp>::value>::type*)
: __f_(0)
{
if (__function::__not_null(__f))
{
typedef __function::__func<_Fp, allocator<_Fp>, _Rp()> _FF;
if (sizeof(_FF) <= sizeof(__buf_))
{
__f_ = (__base*)&__buf_;
::new ((void*)__f_) _FF(__f);
}
else
{
typedef allocator<_FF> _Ap;
_Ap __a;
typedef __allocator_destructor<_Ap> _Dp;
unique_ptr<__base, _Dp> __hold(__a.allocate(1), _Dp(__a, 1));
::new ((void*)__hold.get()) _FF(__f, allocator<_Fp>(__a));
__f_ = __hold.release();
}
}
}
template<class _Rp>
template <class _Fp, class _Alloc>
function<_Rp()>::function(allocator_arg_t, const _Alloc& __a0, _Fp __f,
typename enable_if<!is_integral<_Fp>::value>::type*)
: __f_(0)
{
typedef allocator_traits<_Alloc> __alloc_traits;
if (__function::__not_null(__f))
{
typedef __function::__func<_Fp, _Alloc, _Rp()> _FF;
if (sizeof(_FF) <= sizeof(__buf_))
{
__f_ = (__base*)&__buf_;
::new ((void*)__f_) _FF(__f, __a0);
}
else
{
typedef typename __rebind_alloc_helper<__alloc_traits, _FF>::type _Ap;
_Ap __a(__a0);
typedef __allocator_destructor<_Ap> _Dp;
unique_ptr<__base, _Dp> __hold(__a.allocate(1), _Dp(__a, 1));
::new ((void*)__hold.get()) _FF(__f, _Alloc(__a));
__f_ = __hold.release();
}
}
}
template<class _Rp>
function<_Rp()>&
function<_Rp()>::operator=(const function& __f)
{
if (__f)
function(__f).swap(*this);
else
*this = nullptr;
return *this;
}
template<class _Rp>
function<_Rp()>&
function<_Rp()>::operator=(nullptr_t)
{
__base* __t = __f_;
__f_ = 0;
if (__t == (__base*)&__buf_)
__t->destroy();
else if (__t)
__t->destroy_deallocate();
return *this;
}
template<class _Rp>
template <class _Fp>
typename enable_if
<
!is_integral<_Fp>::value,
function<_Rp()>&
>::type
function<_Rp()>::operator=(_Fp __f)
{
function(_VSTD::move(__f)).swap(*this);
return *this;
}
template<class _Rp>
function<_Rp()>::~function()
{
if (__f_ == (__base*)&__buf_)
__f_->destroy();
else if (__f_)
__f_->destroy_deallocate();
}
template<class _Rp>
void
function<_Rp()>::swap(function& __f)
{
if (_VSTD::addressof(__f) == this)
return;
if (__f_ == (__base*)&__buf_ && __f.__f_ == (__base*)&__f.__buf_)
{
typename aligned_storage<sizeof(__buf_)>::type __tempbuf;
__base* __t = (__base*)&__tempbuf;
__f_->__clone(__t);
__f_->destroy();
__f_ = 0;
__f.__f_->__clone((__base*)&__buf_);
__f.__f_->destroy();
__f.__f_ = 0;
__f_ = (__base*)&__buf_;
__t->__clone((__base*)&__f.__buf_);
__t->destroy();
__f.__f_ = (__base*)&__f.__buf_;
}
else if (__f_ == (__base*)&__buf_)
{
__f_->__clone((__base*)&__f.__buf_);
__f_->destroy();
__f_ = __f.__f_;
__f.__f_ = (__base*)&__f.__buf_;
}
else if (__f.__f_ == (__base*)&__f.__buf_)
{
__f.__f_->__clone((__base*)&__buf_);
__f.__f_->destroy();
__f.__f_ = __f_;
__f_ = (__base*)&__buf_;
}
else
_VSTD::swap(__f_, __f.__f_);
}
template<class _Rp>
_Rp
function<_Rp()>::operator()() const
{
if (__f_ == 0)
__throw_bad_function_call();
return (*__f_)();
}
#ifndef _LIBCPP_NO_RTTI
template<class _Rp>
const std::type_info&
function<_Rp()>::target_type() const
{
if (__f_ == 0)
return typeid(void);
return __f_->target_type();
}
template<class _Rp>
template <typename _Tp>
_Tp*
function<_Rp()>::target()
{
if (__f_ == 0)
return (_Tp*)0;
return (_Tp*) const_cast<void *>(__f_->target(typeid(_Tp)));
}
template<class _Rp>
template <typename _Tp>
const _Tp*
function<_Rp()>::target() const
{
if (__f_ == 0)
return (const _Tp*)0;
return (const _Tp*)__f_->target(typeid(_Tp));
}
#endif // _LIBCPP_NO_RTTI
template<class _Rp, class _A0>
class _LIBCPP_TEMPLATE_VIS function<_Rp(_A0)>
: public unary_function<_A0, _Rp>
{
typedef __function::__base<_Rp(_A0)> __base;
aligned_storage<3*sizeof(void*)>::type __buf_;
__base* __f_;
public:
typedef _Rp result_type;
// 20.7.16.2.1, construct/copy/destroy:
_LIBCPP_INLINE_VISIBILITY explicit function() : __f_(0) {}
_LIBCPP_INLINE_VISIBILITY function(nullptr_t) : __f_(0) {}
function(const function&);
template<class _Fp>
function(_Fp,
typename enable_if<!is_integral<_Fp>::value>::type* = 0);
template<class _Alloc>
_LIBCPP_INLINE_VISIBILITY
function(allocator_arg_t, const _Alloc&) : __f_(0) {}
template<class _Alloc>
_LIBCPP_INLINE_VISIBILITY
function(allocator_arg_t, const _Alloc&, nullptr_t) : __f_(0) {}
template<class _Alloc>
function(allocator_arg_t, const _Alloc&, const function&);
template<class _Fp, class _Alloc>
function(allocator_arg_t, const _Alloc& __a, _Fp __f,
typename enable_if<!is_integral<_Fp>::value>::type* = 0);
function& operator=(const function&);
function& operator=(nullptr_t);
template<class _Fp>
typename enable_if
<
!is_integral<_Fp>::value,
function&
>::type
operator=(_Fp);
~function();
// 20.7.16.2.2, function modifiers:
void swap(function&);
template<class _Fp, class _Alloc>
_LIBCPP_INLINE_VISIBILITY
void assign(_Fp __f, const _Alloc& __a)
{function(allocator_arg, __a, __f).swap(*this);}
// 20.7.16.2.3, function capacity:
_LIBCPP_INLINE_VISIBILITY explicit operator bool() const {return __f_;}
template<class _R2, class _B0>
bool operator==(const function<_R2(_B0)>&) const = delete;
template<class _R2, class _B0>
bool operator!=(const function<_R2(_B0)>&) const = delete;
// 20.7.16.2.4, function invocation:
_Rp operator()(_A0) const;
#ifndef _LIBCPP_NO_RTTI
// 20.7.16.2.5, function target access:
const std::type_info& target_type() const;
template <typename _Tp> _Tp* target();
template <typename _Tp> const _Tp* target() const;
#endif // _LIBCPP_NO_RTTI
};
template<class _Rp, class _A0>
function<_Rp(_A0)>::function(const function& __f)
{
if (__f.__f_ == 0)
__f_ = 0;
else if (__f.__f_ == (const __base*)&__f.__buf_)
{
__f_ = (__base*)&__buf_;
__f.__f_->__clone(__f_);
}
else
__f_ = __f.__f_->__clone();
}
template<class _Rp, class _A0>
template<class _Alloc>
function<_Rp(_A0)>::function(allocator_arg_t, const _Alloc&, const function& __f)
{
if (__f.__f_ == 0)
__f_ = 0;
else if (__f.__f_ == (const __base*)&__f.__buf_)
{
__f_ = (__base*)&__buf_;
__f.__f_->__clone(__f_);
}
else
__f_ = __f.__f_->__clone();
}
template<class _Rp, class _A0>
template <class _Fp>
function<_Rp(_A0)>::function(_Fp __f,
typename enable_if<!is_integral<_Fp>::value>::type*)
: __f_(0)
{
if (__function::__not_null(__f))
{
typedef __function::__func<_Fp, allocator<_Fp>, _Rp(_A0)> _FF;
if (sizeof(_FF) <= sizeof(__buf_))
{
__f_ = (__base*)&__buf_;
::new ((void*)__f_) _FF(__f);
}
else
{
typedef allocator<_FF> _Ap;
_Ap __a;
typedef __allocator_destructor<_Ap> _Dp;
unique_ptr<__base, _Dp> __hold(__a.allocate(1), _Dp(__a, 1));
::new ((void*)__hold.get()) _FF(__f, allocator<_Fp>(__a));
__f_ = __hold.release();
}
}
}
template<class _Rp, class _A0>
template <class _Fp, class _Alloc>
function<_Rp(_A0)>::function(allocator_arg_t, const _Alloc& __a0, _Fp __f,
typename enable_if<!is_integral<_Fp>::value>::type*)
: __f_(0)
{
typedef allocator_traits<_Alloc> __alloc_traits;
if (__function::__not_null(__f))
{
typedef __function::__func<_Fp, _Alloc, _Rp(_A0)> _FF;
if (sizeof(_FF) <= sizeof(__buf_))
{
__f_ = (__base*)&__buf_;
::new ((void*)__f_) _FF(__f, __a0);
}
else
{
typedef typename __rebind_alloc_helper<__alloc_traits, _FF>::type _Ap;
_Ap __a(__a0);
typedef __allocator_destructor<_Ap> _Dp;
unique_ptr<__base, _Dp> __hold(__a.allocate(1), _Dp(__a, 1));
::new ((void*)__hold.get()) _FF(__f, _Alloc(__a));
__f_ = __hold.release();
}
}
}
template<class _Rp, class _A0>
function<_Rp(_A0)>&
function<_Rp(_A0)>::operator=(const function& __f)
{
if (__f)
function(__f).swap(*this);
else
*this = nullptr;
return *this;
}
template<class _Rp, class _A0>
function<_Rp(_A0)>&
function<_Rp(_A0)>::operator=(nullptr_t)
{
__base* __t = __f_;
__f_ = 0;
if (__t == (__base*)&__buf_)
__t->destroy();
else if (__t)
__t->destroy_deallocate();
return *this;
}
template<class _Rp, class _A0>
template <class _Fp>
typename enable_if
<
!is_integral<_Fp>::value,
function<_Rp(_A0)>&
>::type
function<_Rp(_A0)>::operator=(_Fp __f)
{
function(_VSTD::move(__f)).swap(*this);
return *this;
}
template<class _Rp, class _A0>
function<_Rp(_A0)>::~function()
{
if (__f_ == (__base*)&__buf_)
__f_->destroy();
else if (__f_)
__f_->destroy_deallocate();
}
template<class _Rp, class _A0>
void
function<_Rp(_A0)>::swap(function& __f)
{
if (_VSTD::addressof(__f) == this)
return;
if (__f_ == (__base*)&__buf_ && __f.__f_ == (__base*)&__f.__buf_)
{
typename aligned_storage<sizeof(__buf_)>::type __tempbuf;
__base* __t = (__base*)&__tempbuf;
__f_->__clone(__t);
__f_->destroy();
__f_ = 0;
__f.__f_->__clone((__base*)&__buf_);
__f.__f_->destroy();
__f.__f_ = 0;
__f_ = (__base*)&__buf_;
__t->__clone((__base*)&__f.__buf_);
__t->destroy();
__f.__f_ = (__base*)&__f.__buf_;
}
else if (__f_ == (__base*)&__buf_)
{
__f_->__clone((__base*)&__f.__buf_);
__f_->destroy();
__f_ = __f.__f_;
__f.__f_ = (__base*)&__f.__buf_;
}
else if (__f.__f_ == (__base*)&__f.__buf_)
{
__f.__f_->__clone((__base*)&__buf_);
__f.__f_->destroy();
__f.__f_ = __f_;
__f_ = (__base*)&__buf_;
}
else
_VSTD::swap(__f_, __f.__f_);
}
template<class _Rp, class _A0>
_Rp
function<_Rp(_A0)>::operator()(_A0 __a0) const
{
if (__f_ == 0)
__throw_bad_function_call();
return (*__f_)(__a0);
}
#ifndef _LIBCPP_NO_RTTI
template<class _Rp, class _A0>
const std::type_info&
function<_Rp(_A0)>::target_type() const
{
if (__f_ == 0)
return typeid(void);
return __f_->target_type();
}
template<class _Rp, class _A0>
template <typename _Tp>
_Tp*
function<_Rp(_A0)>::target()
{
if (__f_ == 0)
return (_Tp*)0;
return (_Tp*) const_cast<void *>(__f_->target(typeid(_Tp)));
}
template<class _Rp, class _A0>
template <typename _Tp>
const _Tp*
function<_Rp(_A0)>::target() const
{
if (__f_ == 0)
return (const _Tp*)0;
return (const _Tp*)__f_->target(typeid(_Tp));
}
#endif // _LIBCPP_NO_RTTI
template<class _Rp, class _A0, class _A1>
class _LIBCPP_TEMPLATE_VIS function<_Rp(_A0, _A1)>
: public binary_function<_A0, _A1, _Rp>
{
typedef __function::__base<_Rp(_A0, _A1)> __base;
aligned_storage<3*sizeof(void*)>::type __buf_;
__base* __f_;
public:
typedef _Rp result_type;
// 20.7.16.2.1, construct/copy/destroy:
_LIBCPP_INLINE_VISIBILITY explicit function() : __f_(0) {}
_LIBCPP_INLINE_VISIBILITY function(nullptr_t) : __f_(0) {}
function(const function&);
template<class _Fp>
function(_Fp,
typename enable_if<!is_integral<_Fp>::value>::type* = 0);
template<class _Alloc>
_LIBCPP_INLINE_VISIBILITY
function(allocator_arg_t, const _Alloc&) : __f_(0) {}
template<class _Alloc>
_LIBCPP_INLINE_VISIBILITY
function(allocator_arg_t, const _Alloc&, nullptr_t) : __f_(0) {}
template<class _Alloc>
function(allocator_arg_t, const _Alloc&, const function&);
template<class _Fp, class _Alloc>
function(allocator_arg_t, const _Alloc& __a, _Fp __f,
typename enable_if<!is_integral<_Fp>::value>::type* = 0);
function& operator=(const function&);
function& operator=(nullptr_t);
template<class _Fp>
typename enable_if
<
!is_integral<_Fp>::value,
function&
>::type
operator=(_Fp);
~function();
// 20.7.16.2.2, function modifiers:
void swap(function&);
template<class _Fp, class _Alloc>
_LIBCPP_INLINE_VISIBILITY
void assign(_Fp __f, const _Alloc& __a)
{function(allocator_arg, __a, __f).swap(*this);}
// 20.7.16.2.3, function capacity:
_LIBCPP_INLINE_VISIBILITY explicit operator bool() const {return __f_;}
template<class _R2, class _B0, class _B1>
bool operator==(const function<_R2(_B0, _B1)>&) const = delete;
template<class _R2, class _B0, class _B1>
bool operator!=(const function<_R2(_B0, _B1)>&) const = delete;
// 20.7.16.2.4, function invocation:
_Rp operator()(_A0, _A1) const;
#ifndef _LIBCPP_NO_RTTI
// 20.7.16.2.5, function target access:
const std::type_info& target_type() const;
template <typename _Tp> _Tp* target();
template <typename _Tp> const _Tp* target() const;
#endif // _LIBCPP_NO_RTTI
};
template<class _Rp, class _A0, class _A1>
function<_Rp(_A0, _A1)>::function(const function& __f)
{
if (__f.__f_ == 0)
__f_ = 0;
else if (__f.__f_ == (const __base*)&__f.__buf_)
{
__f_ = (__base*)&__buf_;
__f.__f_->__clone(__f_);
}
else
__f_ = __f.__f_->__clone();
}
template<class _Rp, class _A0, class _A1>
template<class _Alloc>
function<_Rp(_A0, _A1)>::function(allocator_arg_t, const _Alloc&, const function& __f)
{
if (__f.__f_ == 0)
__f_ = 0;
else if (__f.__f_ == (const __base*)&__f.__buf_)
{
__f_ = (__base*)&__buf_;
__f.__f_->__clone(__f_);
}
else
__f_ = __f.__f_->__clone();
}
template<class _Rp, class _A0, class _A1>
template <class _Fp>
function<_Rp(_A0, _A1)>::function(_Fp __f,
typename enable_if<!is_integral<_Fp>::value>::type*)
: __f_(0)
{
if (__function::__not_null(__f))
{
typedef __function::__func<_Fp, allocator<_Fp>, _Rp(_A0, _A1)> _FF;
if (sizeof(_FF) <= sizeof(__buf_))
{
__f_ = (__base*)&__buf_;
::new ((void*)__f_) _FF(__f);
}
else
{
typedef allocator<_FF> _Ap;
_Ap __a;
typedef __allocator_destructor<_Ap> _Dp;
unique_ptr<__base, _Dp> __hold(__a.allocate(1), _Dp(__a, 1));
::new ((void*)__hold.get()) _FF(__f, allocator<_Fp>(__a));
__f_ = __hold.release();
}
}
}
template<class _Rp, class _A0, class _A1>
template <class _Fp, class _Alloc>
function<_Rp(_A0, _A1)>::function(allocator_arg_t, const _Alloc& __a0, _Fp __f,
typename enable_if<!is_integral<_Fp>::value>::type*)
: __f_(0)
{
typedef allocator_traits<_Alloc> __alloc_traits;
if (__function::__not_null(__f))
{
typedef __function::__func<_Fp, _Alloc, _Rp(_A0, _A1)> _FF;
if (sizeof(_FF) <= sizeof(__buf_))
{
__f_ = (__base*)&__buf_;
::new ((void*)__f_) _FF(__f, __a0);
}
else
{
typedef typename __rebind_alloc_helper<__alloc_traits, _FF>::type _Ap;
_Ap __a(__a0);
typedef __allocator_destructor<_Ap> _Dp;
unique_ptr<__base, _Dp> __hold(__a.allocate(1), _Dp(__a, 1));
::new ((void*)__hold.get()) _FF(__f, _Alloc(__a));
__f_ = __hold.release();
}
}
}
template<class _Rp, class _A0, class _A1>
function<_Rp(_A0, _A1)>&
function<_Rp(_A0, _A1)>::operator=(const function& __f)
{
if (__f)
function(__f).swap(*this);
else
*this = nullptr;
return *this;
}
template<class _Rp, class _A0, class _A1>
function<_Rp(_A0, _A1)>&
function<_Rp(_A0, _A1)>::operator=(nullptr_t)
{
__base* __t = __f_;
__f_ = 0;
if (__t == (__base*)&__buf_)
__t->destroy();
else if (__t)
__t->destroy_deallocate();
return *this;
}
template<class _Rp, class _A0, class _A1>
template <class _Fp>
typename enable_if
<
!is_integral<_Fp>::value,
function<_Rp(_A0, _A1)>&
>::type
function<_Rp(_A0, _A1)>::operator=(_Fp __f)
{
function(_VSTD::move(__f)).swap(*this);
return *this;
}
template<class _Rp, class _A0, class _A1>
function<_Rp(_A0, _A1)>::~function()
{
if (__f_ == (__base*)&__buf_)
__f_->destroy();
else if (__f_)
__f_->destroy_deallocate();
}
template<class _Rp, class _A0, class _A1>
void
function<_Rp(_A0, _A1)>::swap(function& __f)
{
if (_VSTD::addressof(__f) == this)
return;
if (__f_ == (__base*)&__buf_ && __f.__f_ == (__base*)&__f.__buf_)
{
typename aligned_storage<sizeof(__buf_)>::type __tempbuf;
__base* __t = (__base*)&__tempbuf;
__f_->__clone(__t);
__f_->destroy();
__f_ = 0;
__f.__f_->__clone((__base*)&__buf_);
__f.__f_->destroy();
__f.__f_ = 0;
__f_ = (__base*)&__buf_;
__t->__clone((__base*)&__f.__buf_);
__t->destroy();
__f.__f_ = (__base*)&__f.__buf_;
}
else if (__f_ == (__base*)&__buf_)
{
__f_->__clone((__base*)&__f.__buf_);
__f_->destroy();
__f_ = __f.__f_;
__f.__f_ = (__base*)&__f.__buf_;
}
else if (__f.__f_ == (__base*)&__f.__buf_)
{
__f.__f_->__clone((__base*)&__buf_);
__f.__f_->destroy();
__f.__f_ = __f_;
__f_ = (__base*)&__buf_;
}
else
_VSTD::swap(__f_, __f.__f_);
}
template<class _Rp, class _A0, class _A1>
_Rp
function<_Rp(_A0, _A1)>::operator()(_A0 __a0, _A1 __a1) const
{
if (__f_ == 0)
__throw_bad_function_call();
return (*__f_)(__a0, __a1);
}
#ifndef _LIBCPP_NO_RTTI
template<class _Rp, class _A0, class _A1>
const std::type_info&
function<_Rp(_A0, _A1)>::target_type() const
{
if (__f_ == 0)
return typeid(void);
return __f_->target_type();
}
template<class _Rp, class _A0, class _A1>
template <typename _Tp>
_Tp*
function<_Rp(_A0, _A1)>::target()
{
if (__f_ == 0)
return (_Tp*)0;
return (_Tp*) const_cast<void *>(__f_->target(typeid(_Tp)));
}
template<class _Rp, class _A0, class _A1>
template <typename _Tp>
const _Tp*
function<_Rp(_A0, _A1)>::target() const
{
if (__f_ == 0)
return (const _Tp*)0;
return (const _Tp*)__f_->target(typeid(_Tp));
}
#endif // _LIBCPP_NO_RTTI
template<class _Rp, class _A0, class _A1, class _A2>
class _LIBCPP_TEMPLATE_VIS function<_Rp(_A0, _A1, _A2)>
{
typedef __function::__base<_Rp(_A0, _A1, _A2)> __base;
aligned_storage<3*sizeof(void*)>::type __buf_;
__base* __f_;
public:
typedef _Rp result_type;
// 20.7.16.2.1, construct/copy/destroy:
_LIBCPP_INLINE_VISIBILITY explicit function() : __f_(0) {}
_LIBCPP_INLINE_VISIBILITY function(nullptr_t) : __f_(0) {}
function(const function&);
template<class _Fp>
function(_Fp,
typename enable_if<!is_integral<_Fp>::value>::type* = 0);
template<class _Alloc>
_LIBCPP_INLINE_VISIBILITY
function(allocator_arg_t, const _Alloc&) : __f_(0) {}
template<class _Alloc>
_LIBCPP_INLINE_VISIBILITY
function(allocator_arg_t, const _Alloc&, nullptr_t) : __f_(0) {}
template<class _Alloc>
function(allocator_arg_t, const _Alloc&, const function&);
template<class _Fp, class _Alloc>
function(allocator_arg_t, const _Alloc& __a, _Fp __f,
typename enable_if<!is_integral<_Fp>::value>::type* = 0);
function& operator=(const function&);
function& operator=(nullptr_t);
template<class _Fp>
typename enable_if
<
!is_integral<_Fp>::value,
function&
>::type
operator=(_Fp);
~function();
// 20.7.16.2.2, function modifiers:
void swap(function&);
template<class _Fp, class _Alloc>
_LIBCPP_INLINE_VISIBILITY
void assign(_Fp __f, const _Alloc& __a)
{function(allocator_arg, __a, __f).swap(*this);}
// 20.7.16.2.3, function capacity:
_LIBCPP_INLINE_VISIBILITY explicit operator bool() const {return __f_;}
template<class _R2, class _B0, class _B1, class _B2>
bool operator==(const function<_R2(_B0, _B1, _B2)>&) const = delete;
template<class _R2, class _B0, class _B1, class _B2>
bool operator!=(const function<_R2(_B0, _B1, _B2)>&) const = delete;
// 20.7.16.2.4, function invocation:
_Rp operator()(_A0, _A1, _A2) const;
#ifndef _LIBCPP_NO_RTTI
// 20.7.16.2.5, function target access:
const std::type_info& target_type() const;
template <typename _Tp> _Tp* target();
template <typename _Tp> const _Tp* target() const;
#endif // _LIBCPP_NO_RTTI
};
template<class _Rp, class _A0, class _A1, class _A2>
function<_Rp(_A0, _A1, _A2)>::function(const function& __f)
{
if (__f.__f_ == 0)
__f_ = 0;
else if (__f.__f_ == (const __base*)&__f.__buf_)
{
__f_ = (__base*)&__buf_;
__f.__f_->__clone(__f_);
}
else
__f_ = __f.__f_->__clone();
}
template<class _Rp, class _A0, class _A1, class _A2>
template<class _Alloc>
function<_Rp(_A0, _A1, _A2)>::function(allocator_arg_t, const _Alloc&,
const function& __f)
{
if (__f.__f_ == 0)
__f_ = 0;
else if (__f.__f_ == (const __base*)&__f.__buf_)
{
__f_ = (__base*)&__buf_;
__f.__f_->__clone(__f_);
}
else
__f_ = __f.__f_->__clone();
}
template<class _Rp, class _A0, class _A1, class _A2>
template <class _Fp>
function<_Rp(_A0, _A1, _A2)>::function(_Fp __f,
typename enable_if<!is_integral<_Fp>::value>::type*)
: __f_(0)
{
if (__function::__not_null(__f))
{
typedef __function::__func<_Fp, allocator<_Fp>, _Rp(_A0, _A1, _A2)> _FF;
if (sizeof(_FF) <= sizeof(__buf_))
{
__f_ = (__base*)&__buf_;
::new ((void*)__f_) _FF(__f);
}
else
{
typedef allocator<_FF> _Ap;
_Ap __a;
typedef __allocator_destructor<_Ap> _Dp;
unique_ptr<__base, _Dp> __hold(__a.allocate(1), _Dp(__a, 1));
::new ((void*)__hold.get()) _FF(__f, allocator<_Fp>(__a));
__f_ = __hold.release();
}
}
}
template<class _Rp, class _A0, class _A1, class _A2>
template <class _Fp, class _Alloc>
function<_Rp(_A0, _A1, _A2)>::function(allocator_arg_t, const _Alloc& __a0, _Fp __f,
typename enable_if<!is_integral<_Fp>::value>::type*)
: __f_(0)
{
typedef allocator_traits<_Alloc> __alloc_traits;
if (__function::__not_null(__f))
{
typedef __function::__func<_Fp, _Alloc, _Rp(_A0, _A1, _A2)> _FF;
if (sizeof(_FF) <= sizeof(__buf_))
{
__f_ = (__base*)&__buf_;
::new ((void*)__f_) _FF(__f, __a0);
}
else
{
typedef typename __rebind_alloc_helper<__alloc_traits, _FF>::type _Ap;
_Ap __a(__a0);
typedef __allocator_destructor<_Ap> _Dp;
unique_ptr<__base, _Dp> __hold(__a.allocate(1), _Dp(__a, 1));
::new ((void*)__hold.get()) _FF(__f, _Alloc(__a));
__f_ = __hold.release();
}
}
}
template<class _Rp, class _A0, class _A1, class _A2>
function<_Rp(_A0, _A1, _A2)>&
function<_Rp(_A0, _A1, _A2)>::operator=(const function& __f)
{
if (__f)
function(__f).swap(*this);
else
*this = nullptr;
return *this;
}
template<class _Rp, class _A0, class _A1, class _A2>
function<_Rp(_A0, _A1, _A2)>&
function<_Rp(_A0, _A1, _A2)>::operator=(nullptr_t)
{
__base* __t = __f_;
__f_ = 0;
if (__t == (__base*)&__buf_)
__t->destroy();
else if (__t)
__t->destroy_deallocate();
return *this;
}
template<class _Rp, class _A0, class _A1, class _A2>
template <class _Fp>
typename enable_if
<
!is_integral<_Fp>::value,
function<_Rp(_A0, _A1, _A2)>&
>::type
function<_Rp(_A0, _A1, _A2)>::operator=(_Fp __f)
{
function(_VSTD::move(__f)).swap(*this);
return *this;
}
template<class _Rp, class _A0, class _A1, class _A2>
function<_Rp(_A0, _A1, _A2)>::~function()
{
if (__f_ == (__base*)&__buf_)
__f_->destroy();
else if (__f_)
__f_->destroy_deallocate();
}
template<class _Rp, class _A0, class _A1, class _A2>
void
function<_Rp(_A0, _A1, _A2)>::swap(function& __f)
{
if (_VSTD::addressof(__f) == this)
return;
if (__f_ == (__base*)&__buf_ && __f.__f_ == (__base*)&__f.__buf_)
{
typename aligned_storage<sizeof(__buf_)>::type __tempbuf;
__base* __t = (__base*)&__tempbuf;
__f_->__clone(__t);
__f_->destroy();
__f_ = 0;
__f.__f_->__clone((__base*)&__buf_);
__f.__f_->destroy();
__f.__f_ = 0;
__f_ = (__base*)&__buf_;
__t->__clone((__base*)&__f.__buf_);
__t->destroy();
__f.__f_ = (__base*)&__f.__buf_;
}
else if (__f_ == (__base*)&__buf_)
{
__f_->__clone((__base*)&__f.__buf_);
__f_->destroy();
__f_ = __f.__f_;
__f.__f_ = (__base*)&__f.__buf_;
}
else if (__f.__f_ == (__base*)&__f.__buf_)
{
__f.__f_->__clone((__base*)&__buf_);
__f.__f_->destroy();
__f.__f_ = __f_;
__f_ = (__base*)&__buf_;
}
else
_VSTD::swap(__f_, __f.__f_);
}
template<class _Rp, class _A0, class _A1, class _A2>
_Rp
function<_Rp(_A0, _A1, _A2)>::operator()(_A0 __a0, _A1 __a1, _A2 __a2) const
{
if (__f_ == 0)
__throw_bad_function_call();
return (*__f_)(__a0, __a1, __a2);
}
#ifndef _LIBCPP_NO_RTTI
template<class _Rp, class _A0, class _A1, class _A2>
const std::type_info&
function<_Rp(_A0, _A1, _A2)>::target_type() const
{
if (__f_ == 0)
return typeid(void);
return __f_->target_type();
}
template<class _Rp, class _A0, class _A1, class _A2>
template <typename _Tp>
_Tp*
function<_Rp(_A0, _A1, _A2)>::target()
{
if (__f_ == 0)
return (_Tp*)0;
return (_Tp*) const_cast<void *>(__f_->target(typeid(_Tp)));
}
template<class _Rp, class _A0, class _A1, class _A2>
template <typename _Tp>
const _Tp*
function<_Rp(_A0, _A1, _A2)>::target() const
{
if (__f_ == 0)
return (const _Tp*)0;
return (const _Tp*)__f_->target(typeid(_Tp));
}
#endif // _LIBCPP_NO_RTTI
template <class _Fp>
inline _LIBCPP_INLINE_VISIBILITY
bool
operator==(const function<_Fp>& __f, nullptr_t) {return !__f;}
template <class _Fp>
inline _LIBCPP_INLINE_VISIBILITY
bool
operator==(nullptr_t, const function<_Fp>& __f) {return !__f;}
template <class _Fp>
inline _LIBCPP_INLINE_VISIBILITY
bool
operator!=(const function<_Fp>& __f, nullptr_t) {return (bool)__f;}
template <class _Fp>
inline _LIBCPP_INLINE_VISIBILITY
bool
operator!=(nullptr_t, const function<_Fp>& __f) {return (bool)__f;}
template <class _Fp>
inline _LIBCPP_INLINE_VISIBILITY
void
swap(function<_Fp>& __x, function<_Fp>& __y)
{return __x.swap(__y);}
#endif // _LIBCPP_CXX03_LANG
_LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP___FUNCTIONAL_FUNCTION_H
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 296fb4220012..58d863776430 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1,2800 +1,2803 @@
//===- Driver.cpp ---------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// The driver drives the entire linking process. It is responsible for
// parsing command line options and doing whatever it is instructed to do.
//
// One notable thing in the LLD's driver when compared to other linkers is
// that the LLD's driver is agnostic on the host operating system.
// Other linkers usually have implicit default values (such as a dynamic
// linker path or library paths) for each host OS.
//
// I don't think implicit default values are useful because they are
// usually explicitly specified by the compiler driver. They can even
// be harmful when you are doing cross-linking. Therefore, in LLD, we
// simply trust the compiler driver to pass all required options and
// don't try to make effort on our side.
//
//===----------------------------------------------------------------------===//
#include "Driver.h"
#include "Config.h"
#include "ICF.h"
#include "InputFiles.h"
#include "InputSection.h"
#include "LinkerScript.h"
#include "MarkLive.h"
#include "OutputSections.h"
#include "ScriptParser.h"
#include "SymbolTable.h"
#include "Symbols.h"
#include "SyntheticSections.h"
#include "Target.h"
#include "Writer.h"
#include "lld/Common/Args.h"
#include "lld/Common/Driver.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Filesystem.h"
#include "lld/Common/Memory.h"
#include "lld/Common/Strings.h"
#include "lld/Common/TargetOptionsCommandFlags.h"
#include "lld/Common/Version.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/LTO/LTO.h"
#include "llvm/Object/Archive.h"
#include "llvm/Remarks/HotnessThresholdParser.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/GlobPattern.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/Parallel.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/TarWriter.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdlib>
#include <utility>
using namespace llvm;
using namespace llvm::ELF;
using namespace llvm::object;
using namespace llvm::sys;
using namespace llvm::support;
using namespace lld;
using namespace lld::elf;
std::unique_ptr<Configuration> elf::config;
std::unique_ptr<Ctx> elf::ctx;
std::unique_ptr<LinkerDriver> elf::driver;
static void setConfigs(opt::InputArgList &args);
static void readConfigs(opt::InputArgList &args);
void elf::errorOrWarn(const Twine &msg) {
if (config->noinhibitExec)
warn(msg);
else
error(msg);
}
bool elf::link(ArrayRef<const char *> args, llvm::raw_ostream &stdoutOS,
llvm::raw_ostream &stderrOS, bool exitEarly,
bool disableOutput) {
// This driver-specific context will be freed later by lldMain().
auto *ctx = new CommonLinkerContext;
ctx->e.initialize(stdoutOS, stderrOS, exitEarly, disableOutput);
ctx->e.cleanupCallback = []() {
inputSections.clear();
outputSections.clear();
symAux.clear();
tar = nullptr;
in.reset();
partitions.clear();
partitions.emplace_back();
SharedFile::vernauxNum = 0;
};
ctx->e.logName = args::getFilenameWithoutExe(args[0]);
ctx->e.errorLimitExceededMsg = "too many errors emitted, stopping now (use "
"--error-limit=0 to see all errors)";
config = std::make_unique<Configuration>();
elf::ctx = std::make_unique<Ctx>();
driver = std::make_unique<LinkerDriver>();
script = std::make_unique<LinkerScript>();
symtab = std::make_unique<SymbolTable>();
partitions.clear();
partitions.emplace_back();
config->progName = args[0];
driver->linkerMain(args);
return errorCount() == 0;
}
// Parses a linker -m option.
static std::tuple<ELFKind, uint16_t, uint8_t> parseEmulation(StringRef emul) {
uint8_t osabi = 0;
StringRef s = emul;
if (s.endswith("_fbsd")) {
s = s.drop_back(5);
osabi = ELFOSABI_FREEBSD;
}
std::pair<ELFKind, uint16_t> ret =
StringSwitch<std::pair<ELFKind, uint16_t>>(s)
.Cases("aarch64elf", "aarch64linux", {ELF64LEKind, EM_AARCH64})
.Cases("aarch64elfb", "aarch64linuxb", {ELF64BEKind, EM_AARCH64})
.Cases("armelf", "armelf_linux_eabi", {ELF32LEKind, EM_ARM})
.Case("elf32_x86_64", {ELF32LEKind, EM_X86_64})
.Cases("elf32btsmip", "elf32btsmipn32", {ELF32BEKind, EM_MIPS})
.Cases("elf32ltsmip", "elf32ltsmipn32", {ELF32LEKind, EM_MIPS})
.Case("elf32lriscv", {ELF32LEKind, EM_RISCV})
.Cases("elf32ppc", "elf32ppclinux", {ELF32BEKind, EM_PPC})
.Cases("elf32lppc", "elf32lppclinux", {ELF32LEKind, EM_PPC})
.Case("elf64btsmip", {ELF64BEKind, EM_MIPS})
.Case("elf64ltsmip", {ELF64LEKind, EM_MIPS})
.Case("elf64lriscv", {ELF64LEKind, EM_RISCV})
.Case("elf64ppc", {ELF64BEKind, EM_PPC64})
.Case("elf64lppc", {ELF64LEKind, EM_PPC64})
.Cases("elf_amd64", "elf_x86_64", {ELF64LEKind, EM_X86_64})
.Case("elf_i386", {ELF32LEKind, EM_386})
.Case("elf_iamcu", {ELF32LEKind, EM_IAMCU})
.Case("elf64_sparc", {ELF64BEKind, EM_SPARCV9})
.Case("msp430elf", {ELF32LEKind, EM_MSP430})
.Default({ELFNoneKind, EM_NONE});
if (ret.first == ELFNoneKind)
error("unknown emulation: " + emul);
if (ret.second == EM_MSP430)
osabi = ELFOSABI_STANDALONE;
return std::make_tuple(ret.first, ret.second, osabi);
}
// Returns slices of MB by parsing MB as an archive file.
// Each slice consists of a member file in the archive.
std::vector<std::pair<MemoryBufferRef, uint64_t>> static getArchiveMembers(
MemoryBufferRef mb) {
std::unique_ptr<Archive> file =
CHECK(Archive::create(mb),
mb.getBufferIdentifier() + ": failed to parse archive");
std::vector<std::pair<MemoryBufferRef, uint64_t>> v;
Error err = Error::success();
bool addToTar = file->isThin() && tar;
for (const Archive::Child &c : file->children(err)) {
MemoryBufferRef mbref =
CHECK(c.getMemoryBufferRef(),
mb.getBufferIdentifier() +
": could not get the buffer for a child of the archive");
if (addToTar)
tar->append(relativeToRoot(check(c.getFullName())), mbref.getBuffer());
v.push_back(std::make_pair(mbref, c.getChildOffset()));
}
if (err)
fatal(mb.getBufferIdentifier() + ": Archive::children failed: " +
toString(std::move(err)));
// Take ownership of memory buffers created for members of thin archives.
std::vector<std::unique_ptr<MemoryBuffer>> mbs = file->takeThinBuffers();
std::move(mbs.begin(), mbs.end(), std::back_inserter(ctx->memoryBuffers));
return v;
}
static bool isBitcode(MemoryBufferRef mb) {
return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode;
}
// Opens a file and create a file object. Path has to be resolved already.
void LinkerDriver::addFile(StringRef path, bool withLOption) {
using namespace sys::fs;
Optional<MemoryBufferRef> buffer = readFile(path);
if (!buffer)
return;
MemoryBufferRef mbref = *buffer;
if (config->formatBinary) {
files.push_back(make<BinaryFile>(mbref));
return;
}
switch (identify_magic(mbref.getBuffer())) {
case file_magic::unknown:
readLinkerScript(mbref);
return;
case file_magic::archive: {
if (inWholeArchive) {
for (const auto &p : getArchiveMembers(mbref)) {
if (isBitcode(p.first))
files.push_back(make<BitcodeFile>(p.first, path, p.second, false));
else
files.push_back(createObjFile(p.first, path));
}
return;
}
auto members = getArchiveMembers(mbref);
archiveFiles.emplace_back(path, members.size());
// Handle archives and --start-lib/--end-lib using the same code path. This
// scans all the ELF relocatable object files and bitcode files in the
// archive rather than just the index file, with the benefit that the
// symbols are only loaded once. For many projects archives see high
// utilization rates and it is a net performance win. --start-lib scans
// symbols in the same order that llvm-ar adds them to the index, so in the
// common case the semantics are identical. If the archive symbol table was
// created in a different order, or is incomplete, this strategy has
// different semantics. Such output differences are considered user error.
//
// All files within the archive get the same group ID to allow mutual
// references for --warn-backrefs.
bool saved = InputFile::isInGroup;
InputFile::isInGroup = true;
for (const std::pair<MemoryBufferRef, uint64_t> &p : members) {
auto magic = identify_magic(p.first.getBuffer());
if (magic == file_magic::elf_relocatable)
files.push_back(createObjFile(p.first, path, true));
else if (magic == file_magic::bitcode)
files.push_back(make<BitcodeFile>(p.first, path, p.second, true));
else
warn(path + ": archive member '" + p.first.getBufferIdentifier() +
"' is neither ET_REL nor LLVM bitcode");
}
InputFile::isInGroup = saved;
if (!saved)
++InputFile::nextGroupId;
return;
}
case file_magic::elf_shared_object:
if (config->isStatic || config->relocatable) {
error("attempted static link of dynamic object " + path);
return;
}
// Shared objects are identified by soname. soname is (if specified)
// DT_SONAME and falls back to filename. If a file was specified by -lfoo,
// the directory part is ignored. Note that path may be a temporary and
// cannot be stored into SharedFile::soName.
path = mbref.getBufferIdentifier();
files.push_back(
make<SharedFile>(mbref, withLOption ? path::filename(path) : path));
return;
case file_magic::bitcode:
files.push_back(make<BitcodeFile>(mbref, "", 0, inLib));
break;
case file_magic::elf_relocatable:
files.push_back(createObjFile(mbref, "", inLib));
break;
default:
error(path + ": unknown file type");
}
}
// Add a given library by searching it from input search paths.
void LinkerDriver::addLibrary(StringRef name) {
if (Optional<std::string> path = searchLibrary(name))
addFile(saver().save(*path), /*withLOption=*/true);
else
error("unable to find library -l" + name, ErrorTag::LibNotFound, {name});
}
// This function is called on startup. We need this for LTO since
// LTO calls LLVM functions to compile bitcode files to native code.
// Technically this can be delayed until we read bitcode files, but
// we don't bother to do lazily because the initialization is fast.
static void initLLVM() {
InitializeAllTargets();
InitializeAllTargetMCs();
InitializeAllAsmPrinters();
InitializeAllAsmParsers();
}
// Some command line options or some combinations of them are not allowed.
// This function checks for such errors.
static void checkOptions() {
// The MIPS ABI as of 2016 does not support the GNU-style symbol lookup
// table which is a relatively new feature.
if (config->emachine == EM_MIPS && config->gnuHash)
error("the .gnu.hash section is not compatible with the MIPS target");
if (config->fixCortexA53Errata843419 && config->emachine != EM_AARCH64)
error("--fix-cortex-a53-843419 is only supported on AArch64 targets");
if (config->fixCortexA8 && config->emachine != EM_ARM)
error("--fix-cortex-a8 is only supported on ARM targets");
if (config->tocOptimize && config->emachine != EM_PPC64)
error("--toc-optimize is only supported on PowerPC64 targets");
if (config->pcRelOptimize && config->emachine != EM_PPC64)
error("--pcrel-optimize is only supported on PowerPC64 targets");
if (config->pie && config->shared)
error("-shared and -pie may not be used together");
if (!config->shared && !config->filterList.empty())
error("-F may not be used without -shared");
if (!config->shared && !config->auxiliaryList.empty())
error("-f may not be used without -shared");
if (config->strip == StripPolicy::All && config->emitRelocs)
error("--strip-all and --emit-relocs may not be used together");
if (config->zText && config->zIfuncNoplt)
error("-z text and -z ifunc-noplt may not be used together");
if (config->relocatable) {
if (config->shared)
error("-r and -shared may not be used together");
if (config->gdbIndex)
error("-r and --gdb-index may not be used together");
if (config->icf != ICFLevel::None)
error("-r and --icf may not be used together");
if (config->pie)
error("-r and -pie may not be used together");
if (config->exportDynamic)
error("-r and --export-dynamic may not be used together");
}
if (config->executeOnly) {
if (config->emachine != EM_AARCH64)
error("--execute-only is only supported on AArch64 targets");
if (config->singleRoRx && !script->hasSectionsCommand)
error("--execute-only and --no-rosegment cannot be used together");
}
if (config->zRetpolineplt && config->zForceIbt)
error("-z force-ibt may not be used with -z retpolineplt");
if (config->emachine != EM_AARCH64) {
if (config->zPacPlt)
error("-z pac-plt only supported on AArch64");
if (config->zForceBti)
error("-z force-bti only supported on AArch64");
if (config->zBtiReport != "none")
error("-z bti-report only supported on AArch64");
}
if (config->emachine != EM_386 && config->emachine != EM_X86_64 &&
config->zCetReport != "none")
error("-z cet-report only supported on X86 and X86_64");
}
static const char *getReproduceOption(opt::InputArgList &args) {
if (auto *arg = args.getLastArg(OPT_reproduce))
return arg->getValue();
return getenv("LLD_REPRODUCE");
}
static bool hasZOption(opt::InputArgList &args, StringRef key) {
for (auto *arg : args.filtered(OPT_z))
if (key == arg->getValue())
return true;
return false;
}
static bool getZFlag(opt::InputArgList &args, StringRef k1, StringRef k2,
bool Default) {
for (auto *arg : args.filtered_reverse(OPT_z)) {
if (k1 == arg->getValue())
return true;
if (k2 == arg->getValue())
return false;
}
return Default;
}
static SeparateSegmentKind getZSeparate(opt::InputArgList &args) {
for (auto *arg : args.filtered_reverse(OPT_z)) {
StringRef v = arg->getValue();
if (v == "noseparate-code")
return SeparateSegmentKind::None;
if (v == "separate-code")
return SeparateSegmentKind::Code;
if (v == "separate-loadable-segments")
return SeparateSegmentKind::Loadable;
}
return SeparateSegmentKind::None;
}
static GnuStackKind getZGnuStack(opt::InputArgList &args) {
for (auto *arg : args.filtered_reverse(OPT_z)) {
if (StringRef("execstack") == arg->getValue())
return GnuStackKind::Exec;
if (StringRef("noexecstack") == arg->getValue())
return GnuStackKind::NoExec;
if (StringRef("nognustack") == arg->getValue())
return GnuStackKind::None;
}
return GnuStackKind::NoExec;
}
static uint8_t getZStartStopVisibility(opt::InputArgList &args) {
for (auto *arg : args.filtered_reverse(OPT_z)) {
std::pair<StringRef, StringRef> kv = StringRef(arg->getValue()).split('=');
if (kv.first == "start-stop-visibility") {
if (kv.second == "default")
return STV_DEFAULT;
else if (kv.second == "internal")
return STV_INTERNAL;
else if (kv.second == "hidden")
return STV_HIDDEN;
else if (kv.second == "protected")
return STV_PROTECTED;
error("unknown -z start-stop-visibility= value: " + StringRef(kv.second));
}
}
return STV_PROTECTED;
}
constexpr const char *knownZFlags[] = {
"combreloc",
"copyreloc",
"defs",
"execstack",
"force-bti",
"force-ibt",
"global",
"hazardplt",
"ifunc-noplt",
"initfirst",
"interpose",
"keep-text-section-prefix",
"lazy",
"muldefs",
"nocombreloc",
"nocopyreloc",
"nodefaultlib",
"nodelete",
"nodlopen",
"noexecstack",
"nognustack",
"nokeep-text-section-prefix",
"nopack-relative-relocs",
"norelro",
"noseparate-code",
"nostart-stop-gc",
"notext",
"now",
"origin",
"pac-plt",
"pack-relative-relocs",
"rel",
"rela",
"relro",
"retpolineplt",
"rodynamic",
"separate-code",
"separate-loadable-segments",
"shstk",
"start-stop-gc",
"text",
"undefs",
"wxneeded",
};
static bool isKnownZFlag(StringRef s) {
return llvm::is_contained(knownZFlags, s) ||
s.startswith("common-page-size=") || s.startswith("bti-report=") ||
s.startswith("cet-report=") ||
s.startswith("dead-reloc-in-nonalloc=") ||
s.startswith("max-page-size=") || s.startswith("stack-size=") ||
s.startswith("start-stop-visibility=");
}
// Report a warning for an unknown -z option.
static void checkZOptions(opt::InputArgList &args) {
for (auto *arg : args.filtered(OPT_z))
if (!isKnownZFlag(arg->getValue()))
warn("unknown -z value: " + StringRef(arg->getValue()));
}
constexpr const char *saveTempsValues[] = {
"resolution", "preopt", "promote", "internalize", "import",
"opt", "precodegen", "prelink", "combinedindex"};
void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
ELFOptTable parser;
opt::InputArgList args = parser.parse(argsArr.slice(1));
// Interpret these flags early because error()/warn() depend on them.
errorHandler().errorLimit = args::getInteger(args, OPT_error_limit, 20);
errorHandler().fatalWarnings =
args.hasFlag(OPT_fatal_warnings, OPT_no_fatal_warnings, false);
checkZOptions(args);
// Handle -help
if (args.hasArg(OPT_help)) {
printHelp();
return;
}
// Handle -v or -version.
//
// A note about "compatible with GNU linkers" message: this is a hack for
// scripts generated by GNU Libtool up to 2021-10 to recognize LLD as
// a GNU compatible linker. See
// <https://lists.gnu.org/archive/html/libtool/2017-01/msg00007.html>.
//
// This is somewhat ugly hack, but in reality, we had no choice other
// than doing this. Considering the very long release cycle of Libtool,
// it is not easy to improve it to recognize LLD as a GNU compatible
// linker in a timely manner. Even if we can make it, there are still a
// lot of "configure" scripts out there that are generated by old version
// of Libtool. We cannot convince every software developer to migrate to
// the latest version and re-generate scripts. So we have this hack.
if (args.hasArg(OPT_v) || args.hasArg(OPT_version))
message(getLLDVersion() + " (compatible with GNU linkers)");
if (const char *path = getReproduceOption(args)) {
// Note that --reproduce is a debug option so you can ignore it
// if you are trying to understand the whole picture of the code.
Expected<std::unique_ptr<TarWriter>> errOrWriter =
TarWriter::create(path, path::stem(path));
if (errOrWriter) {
tar = std::move(*errOrWriter);
tar->append("response.txt", createResponseFile(args));
tar->append("version.txt", getLLDVersion() + "\n");
StringRef ltoSampleProfile = args.getLastArgValue(OPT_lto_sample_profile);
if (!ltoSampleProfile.empty())
readFile(ltoSampleProfile);
} else {
error("--reproduce: " + toString(errOrWriter.takeError()));
}
}
readConfigs(args);
// The behavior of -v or --version is a bit strange, but this is
// needed for compatibility with GNU linkers.
if (args.hasArg(OPT_v) && !args.hasArg(OPT_INPUT))
return;
if (args.hasArg(OPT_version))
return;
// Initialize time trace profiler.
if (config->timeTraceEnabled)
timeTraceProfilerInitialize(config->timeTraceGranularity, config->progName);
{
llvm::TimeTraceScope timeScope("ExecuteLinker");
initLLVM();
createFiles(args);
if (errorCount())
return;
inferMachineType();
setConfigs(args);
checkOptions();
if (errorCount())
return;
link(args);
}
if (config->timeTraceEnabled) {
checkError(timeTraceProfilerWrite(
args.getLastArgValue(OPT_time_trace_eq).str(), config->outputFile));
timeTraceProfilerCleanup();
}
}
static std::string getRpath(opt::InputArgList &args) {
std::vector<StringRef> v = args::getStrings(args, OPT_rpath);
return llvm::join(v.begin(), v.end(), ":");
}
// Determines what we should do if there are remaining unresolved
// symbols after the name resolution.
static void setUnresolvedSymbolPolicy(opt::InputArgList &args) {
UnresolvedPolicy errorOrWarn = args.hasFlag(OPT_error_unresolved_symbols,
OPT_warn_unresolved_symbols, true)
? UnresolvedPolicy::ReportError
: UnresolvedPolicy::Warn;
// -shared implies --unresolved-symbols=ignore-all because missing
// symbols are likely to be resolved at runtime.
bool diagRegular = !config->shared, diagShlib = !config->shared;
for (const opt::Arg *arg : args) {
switch (arg->getOption().getID()) {
case OPT_unresolved_symbols: {
StringRef s = arg->getValue();
if (s == "ignore-all") {
diagRegular = false;
diagShlib = false;
} else if (s == "ignore-in-object-files") {
diagRegular = false;
diagShlib = true;
} else if (s == "ignore-in-shared-libs") {
diagRegular = true;
diagShlib = false;
} else if (s == "report-all") {
diagRegular = true;
diagShlib = true;
} else {
error("unknown --unresolved-symbols value: " + s);
}
break;
}
case OPT_no_undefined:
diagRegular = true;
break;
case OPT_z:
if (StringRef(arg->getValue()) == "defs")
diagRegular = true;
else if (StringRef(arg->getValue()) == "undefs")
diagRegular = false;
break;
case OPT_allow_shlib_undefined:
diagShlib = false;
break;
case OPT_no_allow_shlib_undefined:
diagShlib = true;
break;
}
}
config->unresolvedSymbols =
diagRegular ? errorOrWarn : UnresolvedPolicy::Ignore;
config->unresolvedSymbolsInShlib =
diagShlib ? errorOrWarn : UnresolvedPolicy::Ignore;
}
static Target2Policy getTarget2(opt::InputArgList &args) {
StringRef s = args.getLastArgValue(OPT_target2, "got-rel");
if (s == "rel")
return Target2Policy::Rel;
if (s == "abs")
return Target2Policy::Abs;
if (s == "got-rel")
return Target2Policy::GotRel;
error("unknown --target2 option: " + s);
return Target2Policy::GotRel;
}
static bool isOutputFormatBinary(opt::InputArgList &args) {
StringRef s = args.getLastArgValue(OPT_oformat, "elf");
if (s == "binary")
return true;
if (!s.startswith("elf"))
error("unknown --oformat value: " + s);
return false;
}
static DiscardPolicy getDiscard(opt::InputArgList &args) {
auto *arg =
args.getLastArg(OPT_discard_all, OPT_discard_locals, OPT_discard_none);
if (!arg)
return DiscardPolicy::Default;
if (arg->getOption().getID() == OPT_discard_all)
return DiscardPolicy::All;
if (arg->getOption().getID() == OPT_discard_locals)
return DiscardPolicy::Locals;
return DiscardPolicy::None;
}
static StringRef getDynamicLinker(opt::InputArgList &args) {
auto *arg = args.getLastArg(OPT_dynamic_linker, OPT_no_dynamic_linker);
if (!arg)
return "";
if (arg->getOption().getID() == OPT_no_dynamic_linker) {
// --no-dynamic-linker suppresses undefined weak symbols in .dynsym
config->noDynamicLinker = true;
return "";
}
return arg->getValue();
}
static int getMemtagMode(opt::InputArgList &args) {
StringRef memtagModeArg = args.getLastArgValue(OPT_android_memtag_mode);
if (!config->androidMemtagHeap && !config->androidMemtagStack) {
if (!memtagModeArg.empty())
error("when using --android-memtag-mode, at least one of "
"--android-memtag-heap or "
"--android-memtag-stack is required");
return ELF::NT_MEMTAG_LEVEL_NONE;
}
if (memtagModeArg == "sync" || memtagModeArg.empty())
return ELF::NT_MEMTAG_LEVEL_SYNC;
if (memtagModeArg == "async")
return ELF::NT_MEMTAG_LEVEL_ASYNC;
if (memtagModeArg == "none")
return ELF::NT_MEMTAG_LEVEL_NONE;
error("unknown --android-memtag-mode value: \"" + memtagModeArg +
"\", should be one of {async, sync, none}");
return ELF::NT_MEMTAG_LEVEL_NONE;
}
static ICFLevel getICF(opt::InputArgList &args) {
auto *arg = args.getLastArg(OPT_icf_none, OPT_icf_safe, OPT_icf_all);
if (!arg || arg->getOption().getID() == OPT_icf_none)
return ICFLevel::None;
if (arg->getOption().getID() == OPT_icf_safe)
return ICFLevel::Safe;
return ICFLevel::All;
}
static StripPolicy getStrip(opt::InputArgList &args) {
if (args.hasArg(OPT_relocatable))
return StripPolicy::None;
auto *arg = args.getLastArg(OPT_strip_all, OPT_strip_debug);
if (!arg)
return StripPolicy::None;
if (arg->getOption().getID() == OPT_strip_all)
return StripPolicy::All;
return StripPolicy::Debug;
}
static uint64_t parseSectionAddress(StringRef s, opt::InputArgList &args,
const opt::Arg &arg) {
uint64_t va = 0;
if (s.startswith("0x"))
s = s.drop_front(2);
if (!to_integer(s, va, 16))
error("invalid argument: " + arg.getAsString(args));
return va;
}
static StringMap<uint64_t> getSectionStartMap(opt::InputArgList &args) {
StringMap<uint64_t> ret;
for (auto *arg : args.filtered(OPT_section_start)) {
StringRef name;
StringRef addr;
std::tie(name, addr) = StringRef(arg->getValue()).split('=');
ret[name] = parseSectionAddress(addr, args, *arg);
}
if (auto *arg = args.getLastArg(OPT_Ttext))
ret[".text"] = parseSectionAddress(arg->getValue(), args, *arg);
if (auto *arg = args.getLastArg(OPT_Tdata))
ret[".data"] = parseSectionAddress(arg->getValue(), args, *arg);
if (auto *arg = args.getLastArg(OPT_Tbss))
ret[".bss"] = parseSectionAddress(arg->getValue(), args, *arg);
return ret;
}
static SortSectionPolicy getSortSection(opt::InputArgList &args) {
StringRef s = args.getLastArgValue(OPT_sort_section);
if (s == "alignment")
return SortSectionPolicy::Alignment;
if (s == "name")
return SortSectionPolicy::Name;
if (!s.empty())
error("unknown --sort-section rule: " + s);
return SortSectionPolicy::Default;
}
static OrphanHandlingPolicy getOrphanHandling(opt::InputArgList &args) {
StringRef s = args.getLastArgValue(OPT_orphan_handling, "place");
if (s == "warn")
return OrphanHandlingPolicy::Warn;
if (s == "error")
return OrphanHandlingPolicy::Error;
if (s != "place")
error("unknown --orphan-handling mode: " + s);
return OrphanHandlingPolicy::Place;
}
// Parse --build-id or --build-id=<style>. We handle "tree" as a
// synonym for "sha1" because all our hash functions including
// --build-id=sha1 are actually tree hashes for performance reasons.
static std::pair<BuildIdKind, std::vector<uint8_t>>
getBuildId(opt::InputArgList &args) {
auto *arg = args.getLastArg(OPT_build_id);
if (!arg)
return {BuildIdKind::None, {}};
StringRef s = arg->getValue();
if (s == "fast")
return {BuildIdKind::Fast, {}};
if (s == "md5")
return {BuildIdKind::Md5, {}};
if (s == "sha1" || s == "tree")
return {BuildIdKind::Sha1, {}};
if (s == "uuid")
return {BuildIdKind::Uuid, {}};
if (s.startswith("0x"))
return {BuildIdKind::Hexstring, parseHex(s.substr(2))};
if (s != "none")
error("unknown --build-id style: " + s);
return {BuildIdKind::None, {}};
}
static std::pair<bool, bool> getPackDynRelocs(opt::InputArgList &args) {
StringRef s = args.getLastArgValue(OPT_pack_dyn_relocs, "none");
if (s == "android")
return {true, false};
if (s == "relr")
return {false, true};
if (s == "android+relr")
return {true, true};
if (s != "none")
error("unknown --pack-dyn-relocs format: " + s);
return {false, false};
}
static void readCallGraph(MemoryBufferRef mb) {
// Build a map from symbol name to section
DenseMap<StringRef, Symbol *> map;
for (ELFFileBase *file : ctx->objectFiles)
for (Symbol *sym : file->getSymbols())
map[sym->getName()] = sym;
auto findSection = [&](StringRef name) -> InputSectionBase * {
Symbol *sym = map.lookup(name);
if (!sym) {
if (config->warnSymbolOrdering)
warn(mb.getBufferIdentifier() + ": no such symbol: " + name);
return nullptr;
}
maybeWarnUnorderableSymbol(sym);
if (Defined *dr = dyn_cast_or_null<Defined>(sym))
return dyn_cast_or_null<InputSectionBase>(dr->section);
return nullptr;
};
for (StringRef line : args::getLines(mb)) {
SmallVector<StringRef, 3> fields;
line.split(fields, ' ');
uint64_t count;
if (fields.size() != 3 || !to_integer(fields[2], count)) {
error(mb.getBufferIdentifier() + ": parse error");
return;
}
if (InputSectionBase *from = findSection(fields[0]))
if (InputSectionBase *to = findSection(fields[1]))
config->callGraphProfile[std::make_pair(from, to)] += count;
}
}
// If SHT_LLVM_CALL_GRAPH_PROFILE and its relocation section exist, returns
// true and populates cgProfile and symbolIndices.
template <class ELFT>
static bool
processCallGraphRelocations(SmallVector<uint32_t, 32> &symbolIndices,
ArrayRef<typename ELFT::CGProfile> &cgProfile,
ObjFile<ELFT> *inputObj) {
if (inputObj->cgProfileSectionIndex == SHN_UNDEF)
return false;
ArrayRef<Elf_Shdr_Impl<ELFT>> objSections =
inputObj->template getELFShdrs<ELFT>();
symbolIndices.clear();
const ELFFile<ELFT> &obj = inputObj->getObj();
cgProfile =
check(obj.template getSectionContentsAsArray<typename ELFT::CGProfile>(
objSections[inputObj->cgProfileSectionIndex]));
for (size_t i = 0, e = objSections.size(); i < e; ++i) {
const Elf_Shdr_Impl<ELFT> &sec = objSections[i];
if (sec.sh_info == inputObj->cgProfileSectionIndex) {
if (sec.sh_type == SHT_RELA) {
ArrayRef<typename ELFT::Rela> relas =
CHECK(obj.relas(sec), "could not retrieve cg profile rela section");
for (const typename ELFT::Rela &rel : relas)
symbolIndices.push_back(rel.getSymbol(config->isMips64EL));
break;
}
if (sec.sh_type == SHT_REL) {
ArrayRef<typename ELFT::Rel> rels =
CHECK(obj.rels(sec), "could not retrieve cg profile rel section");
for (const typename ELFT::Rel &rel : rels)
symbolIndices.push_back(rel.getSymbol(config->isMips64EL));
break;
}
}
}
if (symbolIndices.empty())
warn("SHT_LLVM_CALL_GRAPH_PROFILE exists, but relocation section doesn't");
return !symbolIndices.empty();
}
template <class ELFT> static void readCallGraphsFromObjectFiles() {
SmallVector<uint32_t, 32> symbolIndices;
ArrayRef<typename ELFT::CGProfile> cgProfile;
for (auto file : ctx->objectFiles) {
auto *obj = cast<ObjFile<ELFT>>(file);
if (!processCallGraphRelocations(symbolIndices, cgProfile, obj))
continue;
if (symbolIndices.size() != cgProfile.size() * 2)
fatal("number of relocations doesn't match Weights");
for (uint32_t i = 0, size = cgProfile.size(); i < size; ++i) {
const Elf_CGProfile_Impl<ELFT> &cgpe = cgProfile[i];
uint32_t fromIndex = symbolIndices[i * 2];
uint32_t toIndex = symbolIndices[i * 2 + 1];
auto *fromSym = dyn_cast<Defined>(&obj->getSymbol(fromIndex));
auto *toSym = dyn_cast<Defined>(&obj->getSymbol(toIndex));
if (!fromSym || !toSym)
continue;
auto *from = dyn_cast_or_null<InputSectionBase>(fromSym->section);
auto *to = dyn_cast_or_null<InputSectionBase>(toSym->section);
if (from && to)
config->callGraphProfile[{from, to}] += cgpe.cgp_weight;
}
}
}
static bool getCompressDebugSections(opt::InputArgList &args) {
StringRef s = args.getLastArgValue(OPT_compress_debug_sections, "none");
if (s == "none")
return false;
if (s != "zlib")
error("unknown --compress-debug-sections value: " + s);
if (!compression::zlib::isAvailable())
error("--compress-debug-sections: zlib is not available");
return true;
}
static StringRef getAliasSpelling(opt::Arg *arg) {
if (const opt::Arg *alias = arg->getAlias())
return alias->getSpelling();
return arg->getSpelling();
}
static std::pair<StringRef, StringRef> getOldNewOptions(opt::InputArgList &args,
unsigned id) {
auto *arg = args.getLastArg(id);
if (!arg)
return {"", ""};
StringRef s = arg->getValue();
std::pair<StringRef, StringRef> ret = s.split(';');
if (ret.second.empty())
error(getAliasSpelling(arg) + " expects 'old;new' format, but got " + s);
return ret;
}
// Parse the symbol ordering file and warn for any duplicate entries.
static std::vector<StringRef> getSymbolOrderingFile(MemoryBufferRef mb) {
SetVector<StringRef> names;
for (StringRef s : args::getLines(mb))
if (!names.insert(s) && config->warnSymbolOrdering)
warn(mb.getBufferIdentifier() + ": duplicate ordered symbol: " + s);
return names.takeVector();
}
static bool getIsRela(opt::InputArgList &args) {
// If -z rel or -z rela is specified, use the last option.
for (auto *arg : args.filtered_reverse(OPT_z)) {
StringRef s(arg->getValue());
if (s == "rel")
return false;
if (s == "rela")
return true;
}
// Otherwise use the psABI defined relocation entry format.
uint16_t m = config->emachine;
return m == EM_AARCH64 || m == EM_AMDGPU || m == EM_HEXAGON || m == EM_PPC ||
m == EM_PPC64 || m == EM_RISCV || m == EM_X86_64;
}
static void parseClangOption(StringRef opt, const Twine &msg) {
std::string err;
raw_string_ostream os(err);
const char *argv[] = {config->progName.data(), opt.data()};
if (cl::ParseCommandLineOptions(2, argv, "", &os))
return;
os.flush();
error(msg + ": " + StringRef(err).trim());
}
// Checks the parameter of the bti-report and cet-report options.
static bool isValidReportString(StringRef arg) {
return arg == "none" || arg == "warning" || arg == "error";
}
// Initializes Config members by the command line options.
static void readConfigs(opt::InputArgList &args) {
errorHandler().verbose = args.hasArg(OPT_verbose);
errorHandler().vsDiagnostics =
args.hasArg(OPT_visual_studio_diagnostics_format, false);
config->allowMultipleDefinition =
args.hasFlag(OPT_allow_multiple_definition,
OPT_no_allow_multiple_definition, false) ||
hasZOption(args, "muldefs");
config->androidMemtagHeap =
args.hasFlag(OPT_android_memtag_heap, OPT_no_android_memtag_heap, false);
config->androidMemtagStack = args.hasFlag(OPT_android_memtag_stack,
OPT_no_android_memtag_stack, false);
config->androidMemtagMode = getMemtagMode(args);
config->auxiliaryList = args::getStrings(args, OPT_auxiliary);
if (opt::Arg *arg =
args.getLastArg(OPT_Bno_symbolic, OPT_Bsymbolic_non_weak_functions,
OPT_Bsymbolic_functions, OPT_Bsymbolic)) {
if (arg->getOption().matches(OPT_Bsymbolic_non_weak_functions))
config->bsymbolic = BsymbolicKind::NonWeakFunctions;
else if (arg->getOption().matches(OPT_Bsymbolic_functions))
config->bsymbolic = BsymbolicKind::Functions;
else if (arg->getOption().matches(OPT_Bsymbolic))
config->bsymbolic = BsymbolicKind::All;
}
config->checkSections =
args.hasFlag(OPT_check_sections, OPT_no_check_sections, true);
config->chroot = args.getLastArgValue(OPT_chroot);
config->compressDebugSections = getCompressDebugSections(args);
config->cref = args.hasArg(OPT_cref);
config->optimizeBBJumps =
args.hasFlag(OPT_optimize_bb_jumps, OPT_no_optimize_bb_jumps, false);
config->demangle = args.hasFlag(OPT_demangle, OPT_no_demangle, true);
config->dependencyFile = args.getLastArgValue(OPT_dependency_file);
config->dependentLibraries = args.hasFlag(OPT_dependent_libraries, OPT_no_dependent_libraries, true);
config->disableVerify = args.hasArg(OPT_disable_verify);
config->discard = getDiscard(args);
config->dwoDir = args.getLastArgValue(OPT_plugin_opt_dwo_dir_eq);
config->dynamicLinker = getDynamicLinker(args);
config->ehFrameHdr =
args.hasFlag(OPT_eh_frame_hdr, OPT_no_eh_frame_hdr, false);
config->emitLLVM = args.hasArg(OPT_plugin_opt_emit_llvm, false);
config->emitRelocs = args.hasArg(OPT_emit_relocs);
config->callGraphProfileSort = args.hasFlag(
OPT_call_graph_profile_sort, OPT_no_call_graph_profile_sort, true);
config->enableNewDtags =
args.hasFlag(OPT_enable_new_dtags, OPT_disable_new_dtags, true);
config->entry = args.getLastArgValue(OPT_entry);
errorHandler().errorHandlingScript =
args.getLastArgValue(OPT_error_handling_script);
config->executeOnly =
args.hasFlag(OPT_execute_only, OPT_no_execute_only, false);
config->exportDynamic =
args.hasFlag(OPT_export_dynamic, OPT_no_export_dynamic, false) ||
args.hasArg(OPT_shared);
config->filterList = args::getStrings(args, OPT_filter);
config->fini = args.getLastArgValue(OPT_fini, "_fini");
config->fixCortexA53Errata843419 = args.hasArg(OPT_fix_cortex_a53_843419) &&
!args.hasArg(OPT_relocatable);
config->fixCortexA8 =
args.hasArg(OPT_fix_cortex_a8) && !args.hasArg(OPT_relocatable);
config->fortranCommon =
args.hasFlag(OPT_fortran_common, OPT_no_fortran_common, false);
config->gcSections = args.hasFlag(OPT_gc_sections, OPT_no_gc_sections, false);
config->gnuUnique = args.hasFlag(OPT_gnu_unique, OPT_no_gnu_unique, true);
config->gdbIndex = args.hasFlag(OPT_gdb_index, OPT_no_gdb_index, false);
config->icf = getICF(args);
config->ignoreDataAddressEquality =
args.hasArg(OPT_ignore_data_address_equality);
config->ignoreFunctionAddressEquality =
args.hasArg(OPT_ignore_function_address_equality);
config->init = args.getLastArgValue(OPT_init, "_init");
config->ltoAAPipeline = args.getLastArgValue(OPT_lto_aa_pipeline);
config->ltoCSProfileGenerate = args.hasArg(OPT_lto_cs_profile_generate);
config->ltoCSProfileFile = args.getLastArgValue(OPT_lto_cs_profile_file);
config->ltoPGOWarnMismatch = args.hasFlag(OPT_lto_pgo_warn_mismatch,
OPT_no_lto_pgo_warn_mismatch, true);
config->ltoDebugPassManager = args.hasArg(OPT_lto_debug_pass_manager);
config->ltoEmitAsm = args.hasArg(OPT_lto_emit_asm);
config->ltoNewPmPasses = args.getLastArgValue(OPT_lto_newpm_passes);
config->ltoWholeProgramVisibility =
args.hasFlag(OPT_lto_whole_program_visibility,
OPT_no_lto_whole_program_visibility, false);
config->ltoo = args::getInteger(args, OPT_lto_O, 2);
config->ltoObjPath = args.getLastArgValue(OPT_lto_obj_path_eq);
config->ltoPartitions = args::getInteger(args, OPT_lto_partitions, 1);
config->ltoSampleProfile = args.getLastArgValue(OPT_lto_sample_profile);
config->ltoBasicBlockSections =
args.getLastArgValue(OPT_lto_basic_block_sections);
config->ltoUniqueBasicBlockSectionNames =
args.hasFlag(OPT_lto_unique_basic_block_section_names,
OPT_no_lto_unique_basic_block_section_names, false);
config->mapFile = args.getLastArgValue(OPT_Map);
config->mipsGotSize = args::getInteger(args, OPT_mips_got_size, 0xfff0);
config->mergeArmExidx =
args.hasFlag(OPT_merge_exidx_entries, OPT_no_merge_exidx_entries, true);
config->mmapOutputFile =
args.hasFlag(OPT_mmap_output_file, OPT_no_mmap_output_file, true);
config->nmagic = args.hasFlag(OPT_nmagic, OPT_no_nmagic, false);
config->noinhibitExec = args.hasArg(OPT_noinhibit_exec);
config->nostdlib = args.hasArg(OPT_nostdlib);
config->oFormatBinary = isOutputFormatBinary(args);
config->omagic = args.hasFlag(OPT_omagic, OPT_no_omagic, false);
config->opaquePointers = args.hasFlag(
OPT_plugin_opt_opaque_pointers, OPT_plugin_opt_no_opaque_pointers, true);
config->optRemarksFilename = args.getLastArgValue(OPT_opt_remarks_filename);
config->optStatsFilename = args.getLastArgValue(OPT_plugin_opt_stats_file);
// Parse remarks hotness threshold. Valid value is either integer or 'auto'.
if (auto *arg = args.getLastArg(OPT_opt_remarks_hotness_threshold)) {
auto resultOrErr = remarks::parseHotnessThresholdOption(arg->getValue());
if (!resultOrErr)
error(arg->getSpelling() + ": invalid argument '" + arg->getValue() +
"', only integer or 'auto' is supported");
else
config->optRemarksHotnessThreshold = *resultOrErr;
}
config->optRemarksPasses = args.getLastArgValue(OPT_opt_remarks_passes);
config->optRemarksWithHotness = args.hasArg(OPT_opt_remarks_with_hotness);
config->optRemarksFormat = args.getLastArgValue(OPT_opt_remarks_format);
config->optimize = args::getInteger(args, OPT_O, 1);
config->orphanHandling = getOrphanHandling(args);
config->outputFile = args.getLastArgValue(OPT_o);
config->packageMetadata = args.getLastArgValue(OPT_package_metadata);
config->pie = args.hasFlag(OPT_pie, OPT_no_pie, false);
config->printIcfSections =
args.hasFlag(OPT_print_icf_sections, OPT_no_print_icf_sections, false);
config->printGcSections =
args.hasFlag(OPT_print_gc_sections, OPT_no_print_gc_sections, false);
config->printArchiveStats = args.getLastArgValue(OPT_print_archive_stats);
config->printSymbolOrder =
args.getLastArgValue(OPT_print_symbol_order);
config->relax = args.hasFlag(OPT_relax, OPT_no_relax, true);
config->rpath = getRpath(args);
config->relocatable = args.hasArg(OPT_relocatable);
if (args.hasArg(OPT_save_temps)) {
// --save-temps implies saving all temps.
for (const char *s : saveTempsValues)
config->saveTempsArgs.insert(s);
} else {
for (auto *arg : args.filtered(OPT_save_temps_eq)) {
StringRef s = arg->getValue();
if (llvm::is_contained(saveTempsValues, s))
config->saveTempsArgs.insert(s);
else
error("unknown --save-temps value: " + s);
}
}
config->searchPaths = args::getStrings(args, OPT_library_path);
config->sectionStartMap = getSectionStartMap(args);
config->shared = args.hasArg(OPT_shared);
config->singleRoRx = !args.hasFlag(OPT_rosegment, OPT_no_rosegment, true);
config->soName = args.getLastArgValue(OPT_soname);
config->sortSection = getSortSection(args);
config->splitStackAdjustSize = args::getInteger(args, OPT_split_stack_adjust_size, 16384);
config->strip = getStrip(args);
config->sysroot = args.getLastArgValue(OPT_sysroot);
config->target1Rel = args.hasFlag(OPT_target1_rel, OPT_target1_abs, false);
config->target2 = getTarget2(args);
config->thinLTOCacheDir = args.getLastArgValue(OPT_thinlto_cache_dir);
config->thinLTOCachePolicy = CHECK(
parseCachePruningPolicy(args.getLastArgValue(OPT_thinlto_cache_policy)),
"--thinlto-cache-policy: invalid cache policy");
config->thinLTOEmitImportsFiles = args.hasArg(OPT_thinlto_emit_imports_files);
config->thinLTOEmitIndexFiles = args.hasArg(OPT_thinlto_emit_index_files) ||
args.hasArg(OPT_thinlto_index_only) ||
args.hasArg(OPT_thinlto_index_only_eq);
config->thinLTOIndexOnly = args.hasArg(OPT_thinlto_index_only) ||
args.hasArg(OPT_thinlto_index_only_eq);
config->thinLTOIndexOnlyArg = args.getLastArgValue(OPT_thinlto_index_only_eq);
config->thinLTOObjectSuffixReplace =
getOldNewOptions(args, OPT_thinlto_object_suffix_replace_eq);
config->thinLTOPrefixReplace =
getOldNewOptions(args, OPT_thinlto_prefix_replace_eq);
if (config->thinLTOEmitIndexFiles && !config->thinLTOIndexOnly) {
if (args.hasArg(OPT_thinlto_object_suffix_replace_eq))
error("--thinlto-object-suffix-replace is not supported with "
"--thinlto-emit-index-files");
else if (args.hasArg(OPT_thinlto_prefix_replace_eq))
error("--thinlto-prefix-replace is not supported with "
"--thinlto-emit-index-files");
}
config->thinLTOModulesToCompile =
args::getStrings(args, OPT_thinlto_single_module_eq);
config->timeTraceEnabled = args.hasArg(OPT_time_trace_eq);
config->timeTraceGranularity =
args::getInteger(args, OPT_time_trace_granularity, 500);
config->trace = args.hasArg(OPT_trace);
config->undefined = args::getStrings(args, OPT_undefined);
config->undefinedVersion =
args.hasFlag(OPT_undefined_version, OPT_no_undefined_version, true);
config->unique = args.hasArg(OPT_unique);
config->useAndroidRelrTags = args.hasFlag(
OPT_use_android_relr_tags, OPT_no_use_android_relr_tags, false);
config->warnBackrefs =
args.hasFlag(OPT_warn_backrefs, OPT_no_warn_backrefs, false);
config->warnCommon = args.hasFlag(OPT_warn_common, OPT_no_warn_common, false);
config->warnSymbolOrdering =
args.hasFlag(OPT_warn_symbol_ordering, OPT_no_warn_symbol_ordering, true);
config->whyExtract = args.getLastArgValue(OPT_why_extract);
config->zCombreloc = getZFlag(args, "combreloc", "nocombreloc", true);
config->zCopyreloc = getZFlag(args, "copyreloc", "nocopyreloc", true);
config->zForceBti = hasZOption(args, "force-bti");
config->zForceIbt = hasZOption(args, "force-ibt");
config->zGlobal = hasZOption(args, "global");
config->zGnustack = getZGnuStack(args);
config->zHazardplt = hasZOption(args, "hazardplt");
config->zIfuncNoplt = hasZOption(args, "ifunc-noplt");
config->zInitfirst = hasZOption(args, "initfirst");
config->zInterpose = hasZOption(args, "interpose");
config->zKeepTextSectionPrefix = getZFlag(
args, "keep-text-section-prefix", "nokeep-text-section-prefix", false);
config->zNodefaultlib = hasZOption(args, "nodefaultlib");
config->zNodelete = hasZOption(args, "nodelete");
config->zNodlopen = hasZOption(args, "nodlopen");
config->zNow = getZFlag(args, "now", "lazy", false);
config->zOrigin = hasZOption(args, "origin");
config->zPacPlt = hasZOption(args, "pac-plt");
config->zRelro = getZFlag(args, "relro", "norelro", true);
config->zRetpolineplt = hasZOption(args, "retpolineplt");
config->zRodynamic = hasZOption(args, "rodynamic");
config->zSeparate = getZSeparate(args);
config->zShstk = hasZOption(args, "shstk");
config->zStackSize = args::getZOptionValue(args, OPT_z, "stack-size", 0);
config->zStartStopGC =
getZFlag(args, "start-stop-gc", "nostart-stop-gc", true);
config->zStartStopVisibility = getZStartStopVisibility(args);
config->zText = getZFlag(args, "text", "notext", true);
config->zWxneeded = hasZOption(args, "wxneeded");
setUnresolvedSymbolPolicy(args);
config->power10Stubs = args.getLastArgValue(OPT_power10_stubs_eq) != "no";
if (opt::Arg *arg = args.getLastArg(OPT_eb, OPT_el)) {
if (arg->getOption().matches(OPT_eb))
config->optEB = true;
else
config->optEL = true;
}
for (opt::Arg *arg : args.filtered(OPT_shuffle_sections)) {
constexpr StringRef errPrefix = "--shuffle-sections=: ";
std::pair<StringRef, StringRef> kv = StringRef(arg->getValue()).split('=');
if (kv.first.empty() || kv.second.empty()) {
error(errPrefix + "expected <section_glob>=<seed>, but got '" +
arg->getValue() + "'");
continue;
}
// Signed so that <section_glob>=-1 is allowed.
int64_t v;
if (!to_integer(kv.second, v))
error(errPrefix + "expected an integer, but got '" + kv.second + "'");
else if (Expected<GlobPattern> pat = GlobPattern::create(kv.first))
config->shuffleSections.emplace_back(std::move(*pat), uint32_t(v));
else
error(errPrefix + toString(pat.takeError()));
}
auto reports = {std::make_pair("bti-report", &config->zBtiReport),
std::make_pair("cet-report", &config->zCetReport)};
for (opt::Arg *arg : args.filtered(OPT_z)) {
std::pair<StringRef, StringRef> option =
StringRef(arg->getValue()).split('=');
for (auto reportArg : reports) {
if (option.first != reportArg.first)
continue;
if (!isValidReportString(option.second)) {
error(Twine("-z ") + reportArg.first + "= parameter " + option.second +
" is not recognized");
continue;
}
*reportArg.second = option.second;
}
}
for (opt::Arg *arg : args.filtered(OPT_z)) {
std::pair<StringRef, StringRef> option =
StringRef(arg->getValue()).split('=');
if (option.first != "dead-reloc-in-nonalloc")
continue;
constexpr StringRef errPrefix = "-z dead-reloc-in-nonalloc=: ";
std::pair<StringRef, StringRef> kv = option.second.split('=');
if (kv.first.empty() || kv.second.empty()) {
error(errPrefix + "expected <section_glob>=<value>");
continue;
}
uint64_t v;
if (!to_integer(kv.second, v))
error(errPrefix + "expected a non-negative integer, but got '" +
kv.second + "'");
else if (Expected<GlobPattern> pat = GlobPattern::create(kv.first))
config->deadRelocInNonAlloc.emplace_back(std::move(*pat), v);
else
error(errPrefix + toString(pat.takeError()));
}
cl::ResetAllOptionOccurrences();
// Parse LTO options.
if (auto *arg = args.getLastArg(OPT_plugin_opt_mcpu_eq))
parseClangOption(saver().save("-mcpu=" + StringRef(arg->getValue())),
arg->getSpelling());
for (opt::Arg *arg : args.filtered(OPT_plugin_opt_eq_minus))
parseClangOption(std::string("-") + arg->getValue(), arg->getSpelling());
// GCC collect2 passes -plugin-opt=path/to/lto-wrapper with an absolute or
- // relative path. Just ignore. If not ended with "lto-wrapper", consider it an
+ // relative path. Just ignore. If not ended with "lto-wrapper" (or
+ // "lto-wrapper.exe" for GCC cross-compiled for Windows), consider it an
// unsupported LLVMgold.so option and error.
- for (opt::Arg *arg : args.filtered(OPT_plugin_opt_eq))
- if (!StringRef(arg->getValue()).endswith("lto-wrapper"))
+ for (opt::Arg *arg : args.filtered(OPT_plugin_opt_eq)) {
+ StringRef v(arg->getValue());
+ if (!v.endswith("lto-wrapper") && !v.endswith("lto-wrapper.exe"))
error(arg->getSpelling() + ": unknown plugin option '" + arg->getValue() +
"'");
+ }
config->passPlugins = args::getStrings(args, OPT_load_pass_plugins);
// Parse -mllvm options.
for (auto *arg : args.filtered(OPT_mllvm))
parseClangOption(arg->getValue(), arg->getSpelling());
// --threads= takes a positive integer and provides the default value for
// --thinlto-jobs=.
if (auto *arg = args.getLastArg(OPT_threads)) {
StringRef v(arg->getValue());
unsigned threads = 0;
if (!llvm::to_integer(v, threads, 0) || threads == 0)
error(arg->getSpelling() + ": expected a positive integer, but got '" +
arg->getValue() + "'");
parallel::strategy = hardware_concurrency(threads);
config->thinLTOJobs = v;
}
if (auto *arg = args.getLastArg(OPT_thinlto_jobs))
config->thinLTOJobs = arg->getValue();
if (config->ltoo > 3)
error("invalid optimization level for LTO: " + Twine(config->ltoo));
if (config->ltoPartitions == 0)
error("--lto-partitions: number of threads must be > 0");
if (!get_threadpool_strategy(config->thinLTOJobs))
error("--thinlto-jobs: invalid job count: " + config->thinLTOJobs);
if (config->splitStackAdjustSize < 0)
error("--split-stack-adjust-size: size must be >= 0");
// The text segment is traditionally the first segment, whose address equals
// the base address. However, lld places the R PT_LOAD first. -Ttext-segment
// is an old-fashioned option that does not play well with lld's layout.
// Suggest --image-base as a likely alternative.
if (args.hasArg(OPT_Ttext_segment))
error("-Ttext-segment is not supported. Use --image-base if you "
"intend to set the base address");
// Parse ELF{32,64}{LE,BE} and CPU type.
if (auto *arg = args.getLastArg(OPT_m)) {
StringRef s = arg->getValue();
std::tie(config->ekind, config->emachine, config->osabi) =
parseEmulation(s);
config->mipsN32Abi =
(s.startswith("elf32btsmipn32") || s.startswith("elf32ltsmipn32"));
config->emulation = s;
}
// Parse --hash-style={sysv,gnu,both}.
if (auto *arg = args.getLastArg(OPT_hash_style)) {
StringRef s = arg->getValue();
if (s == "sysv")
config->sysvHash = true;
else if (s == "gnu")
config->gnuHash = true;
else if (s == "both")
config->sysvHash = config->gnuHash = true;
else
error("unknown --hash-style: " + s);
}
if (args.hasArg(OPT_print_map))
config->mapFile = "-";
// Page alignment can be disabled by the -n (--nmagic) and -N (--omagic).
// As PT_GNU_RELRO relies on Paging, do not create it when we have disabled
// it.
if (config->nmagic || config->omagic)
config->zRelro = false;
std::tie(config->buildId, config->buildIdVector) = getBuildId(args);
if (getZFlag(args, "pack-relative-relocs", "nopack-relative-relocs", false)) {
config->relrGlibc = true;
config->relrPackDynRelocs = true;
} else {
std::tie(config->androidPackDynRelocs, config->relrPackDynRelocs) =
getPackDynRelocs(args);
}
if (auto *arg = args.getLastArg(OPT_symbol_ordering_file)){
if (args.hasArg(OPT_call_graph_ordering_file))
error("--symbol-ordering-file and --call-graph-order-file "
"may not be used together");
if (Optional<MemoryBufferRef> buffer = readFile(arg->getValue())){
config->symbolOrderingFile = getSymbolOrderingFile(*buffer);
// Also need to disable CallGraphProfileSort to prevent
// LLD order symbols with CGProfile
config->callGraphProfileSort = false;
}
}
assert(config->versionDefinitions.empty());
config->versionDefinitions.push_back(
{"local", (uint16_t)VER_NDX_LOCAL, {}, {}});
config->versionDefinitions.push_back(
{"global", (uint16_t)VER_NDX_GLOBAL, {}, {}});
// If --retain-symbol-file is used, we'll keep only the symbols listed in
// the file and discard all others.
if (auto *arg = args.getLastArg(OPT_retain_symbols_file)) {
config->versionDefinitions[VER_NDX_LOCAL].nonLocalPatterns.push_back(
{"*", /*isExternCpp=*/false, /*hasWildcard=*/true});
if (Optional<MemoryBufferRef> buffer = readFile(arg->getValue()))
for (StringRef s : args::getLines(*buffer))
config->versionDefinitions[VER_NDX_GLOBAL].nonLocalPatterns.push_back(
{s, /*isExternCpp=*/false, /*hasWildcard=*/false});
}
for (opt::Arg *arg : args.filtered(OPT_warn_backrefs_exclude)) {
StringRef pattern(arg->getValue());
if (Expected<GlobPattern> pat = GlobPattern::create(pattern))
config->warnBackrefsExclude.push_back(std::move(*pat));
else
error(arg->getSpelling() + ": " + toString(pat.takeError()));
}
// For -no-pie and -pie, --export-dynamic-symbol specifies defined symbols
// which should be exported. For -shared, references to matched non-local
// STV_DEFAULT symbols are not bound to definitions within the shared object,
// even if other options express a symbolic intention: -Bsymbolic,
// -Bsymbolic-functions (if STT_FUNC), --dynamic-list.
for (auto *arg : args.filtered(OPT_export_dynamic_symbol))
config->dynamicList.push_back(
{arg->getValue(), /*isExternCpp=*/false,
/*hasWildcard=*/hasWildcard(arg->getValue())});
// --export-dynamic-symbol-list specifies a list of --export-dynamic-symbol
// patterns. --dynamic-list is --export-dynamic-symbol-list plus -Bsymbolic
// like semantics.
config->symbolic =
config->bsymbolic == BsymbolicKind::All || args.hasArg(OPT_dynamic_list);
for (auto *arg :
args.filtered(OPT_dynamic_list, OPT_export_dynamic_symbol_list))
if (Optional<MemoryBufferRef> buffer = readFile(arg->getValue()))
readDynamicList(*buffer);
for (auto *arg : args.filtered(OPT_version_script))
if (Optional<std::string> path = searchScript(arg->getValue())) {
if (Optional<MemoryBufferRef> buffer = readFile(*path))
readVersionScript(*buffer);
} else {
error(Twine("cannot find version script ") + arg->getValue());
}
}
// Some Config members do not directly correspond to any particular
// command line options, but computed based on other Config values.
// This function initialize such members. See Config.h for the details
// of these values.
static void setConfigs(opt::InputArgList &args) {
ELFKind k = config->ekind;
uint16_t m = config->emachine;
config->copyRelocs = (config->relocatable || config->emitRelocs);
config->is64 = (k == ELF64LEKind || k == ELF64BEKind);
config->isLE = (k == ELF32LEKind || k == ELF64LEKind);
config->endianness = config->isLE ? endianness::little : endianness::big;
config->isMips64EL = (k == ELF64LEKind && m == EM_MIPS);
config->isPic = config->pie || config->shared;
config->picThunk = args.hasArg(OPT_pic_veneer, config->isPic);
config->wordsize = config->is64 ? 8 : 4;
// ELF defines two different ways to store relocation addends as shown below:
//
// Rel: Addends are stored to the location where relocations are applied. It
// cannot pack the full range of addend values for all relocation types, but
// this only affects relocation types that we don't support emitting as
// dynamic relocations (see getDynRel).
// Rela: Addends are stored as part of relocation entry.
//
// In other words, Rela makes it easy to read addends at the price of extra
// 4 or 8 byte for each relocation entry.
//
// We pick the format for dynamic relocations according to the psABI for each
// processor, but a contrary choice can be made if the dynamic loader
// supports.
config->isRela = getIsRela(args);
// If the output uses REL relocations we must store the dynamic relocation
// addends to the output sections. We also store addends for RELA relocations
// if --apply-dynamic-relocs is used.
// We default to not writing the addends when using RELA relocations since
// any standard conforming tool can find it in r_addend.
config->writeAddends = args.hasFlag(OPT_apply_dynamic_relocs,
OPT_no_apply_dynamic_relocs, false) ||
!config->isRela;
// Validation of dynamic relocation addends is on by default for assertions
// builds (for supported targets) and disabled otherwise. Ideally we would
// enable the debug checks for all targets, but currently not all targets
// have support for reading Elf_Rel addends, so we only enable for a subset.
#ifndef NDEBUG
bool checkDynamicRelocsDefault = m == EM_ARM || m == EM_386 || m == EM_MIPS ||
m == EM_X86_64 || m == EM_RISCV;
#else
bool checkDynamicRelocsDefault = false;
#endif
config->checkDynamicRelocs =
args.hasFlag(OPT_check_dynamic_relocations,
OPT_no_check_dynamic_relocations, checkDynamicRelocsDefault);
config->tocOptimize =
args.hasFlag(OPT_toc_optimize, OPT_no_toc_optimize, m == EM_PPC64);
config->pcRelOptimize =
args.hasFlag(OPT_pcrel_optimize, OPT_no_pcrel_optimize, m == EM_PPC64);
}
static bool isFormatBinary(StringRef s) {
if (s == "binary")
return true;
if (s == "elf" || s == "default")
return false;
error("unknown --format value: " + s +
" (supported formats: elf, default, binary)");
return false;
}
void LinkerDriver::createFiles(opt::InputArgList &args) {
llvm::TimeTraceScope timeScope("Load input files");
// For --{push,pop}-state.
std::vector<std::tuple<bool, bool, bool>> stack;
// Iterate over argv to process input files and positional arguments.
InputFile::isInGroup = false;
bool hasInput = false;
for (auto *arg : args) {
switch (arg->getOption().getID()) {
case OPT_library:
addLibrary(arg->getValue());
hasInput = true;
break;
case OPT_INPUT:
addFile(arg->getValue(), /*withLOption=*/false);
hasInput = true;
break;
case OPT_defsym: {
StringRef from;
StringRef to;
std::tie(from, to) = StringRef(arg->getValue()).split('=');
if (from.empty() || to.empty())
error("--defsym: syntax error: " + StringRef(arg->getValue()));
else
readDefsym(from, MemoryBufferRef(to, "--defsym"));
break;
}
case OPT_script:
if (Optional<std::string> path = searchScript(arg->getValue())) {
if (Optional<MemoryBufferRef> mb = readFile(*path))
readLinkerScript(*mb);
break;
}
error(Twine("cannot find linker script ") + arg->getValue());
break;
case OPT_as_needed:
config->asNeeded = true;
break;
case OPT_format:
config->formatBinary = isFormatBinary(arg->getValue());
break;
case OPT_no_as_needed:
config->asNeeded = false;
break;
case OPT_Bstatic:
case OPT_omagic:
case OPT_nmagic:
config->isStatic = true;
break;
case OPT_Bdynamic:
config->isStatic = false;
break;
case OPT_whole_archive:
inWholeArchive = true;
break;
case OPT_no_whole_archive:
inWholeArchive = false;
break;
case OPT_just_symbols:
if (Optional<MemoryBufferRef> mb = readFile(arg->getValue())) {
files.push_back(createObjFile(*mb));
files.back()->justSymbols = true;
}
break;
case OPT_start_group:
if (InputFile::isInGroup)
error("nested --start-group");
InputFile::isInGroup = true;
break;
case OPT_end_group:
if (!InputFile::isInGroup)
error("stray --end-group");
InputFile::isInGroup = false;
++InputFile::nextGroupId;
break;
case OPT_start_lib:
if (inLib)
error("nested --start-lib");
if (InputFile::isInGroup)
error("may not nest --start-lib in --start-group");
inLib = true;
InputFile::isInGroup = true;
break;
case OPT_end_lib:
if (!inLib)
error("stray --end-lib");
inLib = false;
InputFile::isInGroup = false;
++InputFile::nextGroupId;
break;
case OPT_push_state:
stack.emplace_back(config->asNeeded, config->isStatic, inWholeArchive);
break;
case OPT_pop_state:
if (stack.empty()) {
error("unbalanced --push-state/--pop-state");
break;
}
std::tie(config->asNeeded, config->isStatic, inWholeArchive) = stack.back();
stack.pop_back();
break;
}
}
if (files.empty() && !hasInput && errorCount() == 0)
error("no input files");
}
// If -m <machine_type> was not given, infer it from object files.
void LinkerDriver::inferMachineType() {
if (config->ekind != ELFNoneKind)
return;
for (InputFile *f : files) {
if (f->ekind == ELFNoneKind)
continue;
config->ekind = f->ekind;
config->emachine = f->emachine;
config->osabi = f->osabi;
config->mipsN32Abi = config->emachine == EM_MIPS && isMipsN32Abi(f);
return;
}
error("target emulation unknown: -m or at least one .o file required");
}
// Parse -z max-page-size=<value>. The default value is defined by
// each target.
static uint64_t getMaxPageSize(opt::InputArgList &args) {
uint64_t val = args::getZOptionValue(args, OPT_z, "max-page-size",
target->defaultMaxPageSize);
if (!isPowerOf2_64(val)) {
error("max-page-size: value isn't a power of 2");
return target->defaultMaxPageSize;
}
if (config->nmagic || config->omagic) {
if (val != target->defaultMaxPageSize)
warn("-z max-page-size set, but paging disabled by omagic or nmagic");
return 1;
}
return val;
}
// Parse -z common-page-size=<value>. The default value is defined by
// each target.
static uint64_t getCommonPageSize(opt::InputArgList &args) {
uint64_t val = args::getZOptionValue(args, OPT_z, "common-page-size",
target->defaultCommonPageSize);
if (!isPowerOf2_64(val)) {
error("common-page-size: value isn't a power of 2");
return target->defaultCommonPageSize;
}
if (config->nmagic || config->omagic) {
if (val != target->defaultCommonPageSize)
warn("-z common-page-size set, but paging disabled by omagic or nmagic");
return 1;
}
// commonPageSize can't be larger than maxPageSize.
if (val > config->maxPageSize)
val = config->maxPageSize;
return val;
}
// Parses --image-base option.
static Optional<uint64_t> getImageBase(opt::InputArgList &args) {
// Because we are using "Config->maxPageSize" here, this function has to be
// called after the variable is initialized.
auto *arg = args.getLastArg(OPT_image_base);
if (!arg)
return None;
StringRef s = arg->getValue();
uint64_t v;
if (!to_integer(s, v)) {
error("--image-base: number expected, but got " + s);
return 0;
}
if ((v % config->maxPageSize) != 0)
warn("--image-base: address isn't multiple of page size: " + s);
return v;
}
// Parses `--exclude-libs=lib,lib,...`.
// The library names may be delimited by commas or colons.
static DenseSet<StringRef> getExcludeLibs(opt::InputArgList &args) {
DenseSet<StringRef> ret;
for (auto *arg : args.filtered(OPT_exclude_libs)) {
StringRef s = arg->getValue();
for (;;) {
size_t pos = s.find_first_of(",:");
if (pos == StringRef::npos)
break;
ret.insert(s.substr(0, pos));
s = s.substr(pos + 1);
}
ret.insert(s);
}
return ret;
}
// Handles the --exclude-libs option. If a static library file is specified
// by the --exclude-libs option, all public symbols from the archive become
// private unless otherwise specified by version scripts or something.
// A special library name "ALL" means all archive files.
//
// This is not a popular option, but some programs such as bionic libc use it.
static void excludeLibs(opt::InputArgList &args) {
DenseSet<StringRef> libs = getExcludeLibs(args);
bool all = libs.count("ALL");
auto visit = [&](InputFile *file) {
if (file->archiveName.empty() ||
!(all || libs.count(path::filename(file->archiveName))))
return;
ArrayRef<Symbol *> symbols = file->getSymbols();
if (isa<ELFFileBase>(file))
symbols = cast<ELFFileBase>(file)->getGlobalSymbols();
for (Symbol *sym : symbols)
if (!sym->isUndefined() && sym->file == file)
sym->versionId = VER_NDX_LOCAL;
};
for (ELFFileBase *file : ctx->objectFiles)
visit(file);
for (BitcodeFile *file : ctx->bitcodeFiles)
visit(file);
}
// Force Sym to be entered in the output.
static void handleUndefined(Symbol *sym, const char *option) {
// Since a symbol may not be used inside the program, LTO may
// eliminate it. Mark the symbol as "used" to prevent it.
sym->isUsedInRegularObj = true;
if (!sym->isLazy())
return;
sym->extract();
if (!config->whyExtract.empty())
ctx->whyExtractRecords.emplace_back(option, sym->file, *sym);
}
// As an extension to GNU linkers, lld supports a variant of `-u`
// which accepts wildcard patterns. All symbols that match a given
// pattern are handled as if they were given by `-u`.
static void handleUndefinedGlob(StringRef arg) {
Expected<GlobPattern> pat = GlobPattern::create(arg);
if (!pat) {
error("--undefined-glob: " + toString(pat.takeError()));
return;
}
// Calling sym->extract() in the loop is not safe because it may add new
// symbols to the symbol table, invalidating the current iterator.
SmallVector<Symbol *, 0> syms;
for (Symbol *sym : symtab->symbols())
if (!sym->isPlaceholder() && pat->match(sym->getName()))
syms.push_back(sym);
for (Symbol *sym : syms)
handleUndefined(sym, "--undefined-glob");
}
static void handleLibcall(StringRef name) {
Symbol *sym = symtab->find(name);
if (!sym || !sym->isLazy())
return;
MemoryBufferRef mb;
mb = cast<LazyObject>(sym)->file->mb;
if (isBitcode(mb))
sym->extract();
}
static void writeArchiveStats() {
if (config->printArchiveStats.empty())
return;
std::error_code ec;
raw_fd_ostream os(config->printArchiveStats, ec, sys::fs::OF_None);
if (ec) {
error("--print-archive-stats=: cannot open " + config->printArchiveStats +
": " + ec.message());
return;
}
os << "members\textracted\tarchive\n";
SmallVector<StringRef, 0> archives;
DenseMap<CachedHashStringRef, unsigned> all, extracted;
for (ELFFileBase *file : ctx->objectFiles)
if (file->archiveName.size())
++extracted[CachedHashStringRef(file->archiveName)];
for (BitcodeFile *file : ctx->bitcodeFiles)
if (file->archiveName.size())
++extracted[CachedHashStringRef(file->archiveName)];
for (std::pair<StringRef, unsigned> f : driver->archiveFiles) {
unsigned &v = extracted[CachedHashString(f.first)];
os << f.second << '\t' << v << '\t' << f.first << '\n';
// If the archive occurs multiple times, other instances have a count of 0.
v = 0;
}
}
static void writeWhyExtract() {
if (config->whyExtract.empty())
return;
std::error_code ec;
raw_fd_ostream os(config->whyExtract, ec, sys::fs::OF_None);
if (ec) {
error("cannot open --why-extract= file " + config->whyExtract + ": " +
ec.message());
return;
}
os << "reference\textracted\tsymbol\n";
for (auto &entry : ctx->whyExtractRecords) {
os << std::get<0>(entry) << '\t' << toString(std::get<1>(entry)) << '\t'
<< toString(std::get<2>(entry)) << '\n';
}
}
static void reportBackrefs() {
for (auto &ref : ctx->backwardReferences) {
const Symbol &sym = *ref.first;
std::string to = toString(ref.second.second);
// Some libraries have known problems and can cause noise. Filter them out
// with --warn-backrefs-exclude=. The value may look like (for --start-lib)
// *.o or (archive member) *.a(*.o).
bool exclude = false;
for (const llvm::GlobPattern &pat : config->warnBackrefsExclude)
if (pat.match(to)) {
exclude = true;
break;
}
if (!exclude)
warn("backward reference detected: " + sym.getName() + " in " +
toString(ref.second.first) + " refers to " + to);
}
}
// Handle --dependency-file=<path>. If that option is given, lld creates a
// file at a given path with the following contents:
//
// <output-file>: <input-file> ...
//
// <input-file>:
//
// where <output-file> is a pathname of an output file and <input-file>
// ... is a list of pathnames of all input files. `make` command can read a
// file in the above format and interpret it as a dependency info. We write
// phony targets for every <input-file> to avoid an error when that file is
// removed.
//
// This option is useful if you want to make your final executable to depend
// on all input files including system libraries. Here is why.
//
// When you write a Makefile, you usually write it so that the final
// executable depends on all user-generated object files. Normally, you
// don't make your executable to depend on system libraries (such as libc)
// because you don't know the exact paths of libraries, even though system
// libraries that are linked to your executable statically are technically a
// part of your program. By using --dependency-file option, you can make
// lld to dump dependency info so that you can maintain exact dependencies
// easily.
static void writeDependencyFile() {
std::error_code ec;
raw_fd_ostream os(config->dependencyFile, ec, sys::fs::OF_None);
if (ec) {
error("cannot open " + config->dependencyFile + ": " + ec.message());
return;
}
// We use the same escape rules as Clang/GCC which are accepted by Make/Ninja:
// * A space is escaped by a backslash which itself must be escaped.
// * A hash sign is escaped by a single backslash.
// * $ is escapes as $$.
auto printFilename = [](raw_fd_ostream &os, StringRef filename) {
llvm::SmallString<256> nativePath;
llvm::sys::path::native(filename.str(), nativePath);
llvm::sys::path::remove_dots(nativePath, /*remove_dot_dot=*/true);
for (unsigned i = 0, e = nativePath.size(); i != e; ++i) {
if (nativePath[i] == '#') {
os << '\\';
} else if (nativePath[i] == ' ') {
os << '\\';
unsigned j = i;
while (j > 0 && nativePath[--j] == '\\')
os << '\\';
} else if (nativePath[i] == '$') {
os << '$';
}
os << nativePath[i];
}
};
os << config->outputFile << ":";
for (StringRef path : config->dependencyFiles) {
os << " \\\n ";
printFilename(os, path);
}
os << "\n";
for (StringRef path : config->dependencyFiles) {
os << "\n";
printFilename(os, path);
os << ":\n";
}
}
// Replaces common symbols with defined symbols reside in .bss sections.
// This function is called after all symbol names are resolved. As a
// result, the passes after the symbol resolution won't see any
// symbols of type CommonSymbol.
static void replaceCommonSymbols() {
llvm::TimeTraceScope timeScope("Replace common symbols");
for (ELFFileBase *file : ctx->objectFiles) {
if (!file->hasCommonSyms)
continue;
for (Symbol *sym : file->getGlobalSymbols()) {
auto *s = dyn_cast<CommonSymbol>(sym);
if (!s)
continue;
auto *bss = make<BssSection>("COMMON", s->size, s->alignment);
bss->file = s->file;
inputSections.push_back(bss);
s->replace(Defined{s->file, StringRef(), s->binding, s->stOther, s->type,
/*value=*/0, s->size, bss});
}
}
}
// If all references to a DSO happen to be weak, the DSO is not added to
// DT_NEEDED. If that happens, replace ShardSymbol with Undefined to avoid
// dangling references to an unneeded DSO. Use a weak binding to avoid
// --no-allow-shlib-undefined diagnostics. Similarly, demote lazy symbols.
static void demoteSharedAndLazySymbols() {
llvm::TimeTraceScope timeScope("Demote shared and lazy symbols");
for (Symbol *sym : symtab->symbols()) {
auto *s = dyn_cast<SharedSymbol>(sym);
if (!(s && !cast<SharedFile>(s->file)->isNeeded) && !sym->isLazy())
continue;
bool used = sym->used;
uint8_t binding = sym->isLazy() ? sym->binding : uint8_t(STB_WEAK);
sym->replace(
Undefined{nullptr, sym->getName(), binding, sym->stOther, sym->type});
sym->used = used;
sym->versionId = VER_NDX_GLOBAL;
}
}
// The section referred to by `s` is considered address-significant. Set the
// keepUnique flag on the section if appropriate.
static void markAddrsig(Symbol *s) {
if (auto *d = dyn_cast_or_null<Defined>(s))
if (d->section)
// We don't need to keep text sections unique under --icf=all even if they
// are address-significant.
if (config->icf == ICFLevel::Safe || !(d->section->flags & SHF_EXECINSTR))
d->section->keepUnique = true;
}
// Record sections that define symbols mentioned in --keep-unique <symbol>
// and symbols referred to by address-significance tables. These sections are
// ineligible for ICF.
template <class ELFT>
static void findKeepUniqueSections(opt::InputArgList &args) {
for (auto *arg : args.filtered(OPT_keep_unique)) {
StringRef name = arg->getValue();
auto *d = dyn_cast_or_null<Defined>(symtab->find(name));
if (!d || !d->section) {
warn("could not find symbol " + name + " to keep unique");
continue;
}
d->section->keepUnique = true;
}
// --icf=all --ignore-data-address-equality means that we can ignore
// the dynsym and address-significance tables entirely.
if (config->icf == ICFLevel::All && config->ignoreDataAddressEquality)
return;
// Symbols in the dynsym could be address-significant in other executables
// or DSOs, so we conservatively mark them as address-significant.
for (Symbol *sym : symtab->symbols())
if (sym->includeInDynsym())
markAddrsig(sym);
// Visit the address-significance table in each object file and mark each
// referenced symbol as address-significant.
for (InputFile *f : ctx->objectFiles) {
auto *obj = cast<ObjFile<ELFT>>(f);
ArrayRef<Symbol *> syms = obj->getSymbols();
if (obj->addrsigSec) {
ArrayRef<uint8_t> contents =
check(obj->getObj().getSectionContents(*obj->addrsigSec));
const uint8_t *cur = contents.begin();
while (cur != contents.end()) {
unsigned size;
const char *err;
uint64_t symIndex = decodeULEB128(cur, &size, contents.end(), &err);
if (err)
fatal(toString(f) + ": could not decode addrsig section: " + err);
markAddrsig(syms[symIndex]);
cur += size;
}
} else {
// If an object file does not have an address-significance table,
// conservatively mark all of its symbols as address-significant.
for (Symbol *s : syms)
markAddrsig(s);
}
}
}
// This function reads a symbol partition specification section. These sections
// are used to control which partition a symbol is allocated to. See
// https://lld.llvm.org/Partitions.html for more details on partitions.
template <typename ELFT>
static void readSymbolPartitionSection(InputSectionBase *s) {
// Read the relocation that refers to the partition's entry point symbol.
Symbol *sym;
const RelsOrRelas<ELFT> rels = s->template relsOrRelas<ELFT>();
if (rels.areRelocsRel())
sym = &s->getFile<ELFT>()->getRelocTargetSym(rels.rels[0]);
else
sym = &s->getFile<ELFT>()->getRelocTargetSym(rels.relas[0]);
if (!isa<Defined>(sym) || !sym->includeInDynsym())
return;
StringRef partName = reinterpret_cast<const char *>(s->rawData.data());
for (Partition &part : partitions) {
if (part.name == partName) {
sym->partition = part.getNumber();
return;
}
}
// Forbid partitions from being used on incompatible targets, and forbid them
// from being used together with various linker features that assume a single
// set of output sections.
if (script->hasSectionsCommand)
error(toString(s->file) +
": partitions cannot be used with the SECTIONS command");
if (script->hasPhdrsCommands())
error(toString(s->file) +
": partitions cannot be used with the PHDRS command");
if (!config->sectionStartMap.empty())
error(toString(s->file) + ": partitions cannot be used with "
"--section-start, -Ttext, -Tdata or -Tbss");
if (config->emachine == EM_MIPS)
error(toString(s->file) + ": partitions cannot be used on this target");
// Impose a limit of no more than 254 partitions. This limit comes from the
// sizes of the Partition fields in InputSectionBase and Symbol, as well as
// the amount of space devoted to the partition number in RankFlags.
if (partitions.size() == 254)
fatal("may not have more than 254 partitions");
partitions.emplace_back();
Partition &newPart = partitions.back();
newPart.name = partName;
sym->partition = newPart.getNumber();
}
static Symbol *addUnusedUndefined(StringRef name,
uint8_t binding = STB_GLOBAL) {
return symtab->addSymbol(Undefined{nullptr, name, binding, STV_DEFAULT, 0});
}
static void markBuffersAsDontNeed(bool skipLinkedOutput) {
// With --thinlto-index-only, all buffers are nearly unused from now on
// (except symbol/section names used by infrequent passes). Mark input file
// buffers as MADV_DONTNEED so that these pages can be reused by the expensive
// thin link, saving memory.
if (skipLinkedOutput) {
for (MemoryBuffer &mb : llvm::make_pointee_range(ctx->memoryBuffers))
mb.dontNeedIfMmap();
return;
}
// Otherwise, just mark MemoryBuffers backing BitcodeFiles.
DenseSet<const char *> bufs;
for (BitcodeFile *file : ctx->bitcodeFiles)
bufs.insert(file->mb.getBufferStart());
for (BitcodeFile *file : ctx->lazyBitcodeFiles)
bufs.insert(file->mb.getBufferStart());
for (MemoryBuffer &mb : llvm::make_pointee_range(ctx->memoryBuffers))
if (bufs.count(mb.getBufferStart()))
mb.dontNeedIfMmap();
}
// This function is where all the optimizations of link-time
// optimization takes place. When LTO is in use, some input files are
// not in native object file format but in the LLVM bitcode format.
// This function compiles bitcode files into a few big native files
// using LLVM functions and replaces bitcode symbols with the results.
// Because all bitcode files that the program consists of are passed to
// the compiler at once, it can do a whole-program optimization.
template <class ELFT>
void LinkerDriver::compileBitcodeFiles(bool skipLinkedOutput) {
llvm::TimeTraceScope timeScope("LTO");
// Compile bitcode files and replace bitcode symbols.
lto.reset(new BitcodeCompiler);
for (BitcodeFile *file : ctx->bitcodeFiles)
lto->add(*file);
if (!ctx->bitcodeFiles.empty())
markBuffersAsDontNeed(skipLinkedOutput);
for (InputFile *file : lto->compile()) {
auto *obj = cast<ObjFile<ELFT>>(file);
obj->parse(/*ignoreComdats=*/true);
// Parse '@' in symbol names for non-relocatable output.
if (!config->relocatable)
for (Symbol *sym : obj->getGlobalSymbols())
if (sym->hasVersionSuffix)
sym->parseSymbolVersion();
ctx->objectFiles.push_back(obj);
}
}
// The --wrap option is a feature to rename symbols so that you can write
// wrappers for existing functions. If you pass `--wrap=foo`, all
// occurrences of symbol `foo` are resolved to `__wrap_foo` (so, you are
// expected to write `__wrap_foo` function as a wrapper). The original
// symbol becomes accessible as `__real_foo`, so you can call that from your
// wrapper.
//
// This data structure is instantiated for each --wrap option.
struct WrappedSymbol {
Symbol *sym;
Symbol *real;
Symbol *wrap;
};
// Handles --wrap option.
//
// This function instantiates wrapper symbols. At this point, they seem
// like they are not being used at all, so we explicitly set some flags so
// that LTO won't eliminate them.
static std::vector<WrappedSymbol> addWrappedSymbols(opt::InputArgList &args) {
std::vector<WrappedSymbol> v;
DenseSet<StringRef> seen;
for (auto *arg : args.filtered(OPT_wrap)) {
StringRef name = arg->getValue();
if (!seen.insert(name).second)
continue;
Symbol *sym = symtab->find(name);
if (!sym)
continue;
Symbol *real = addUnusedUndefined(saver().save("__real_" + name));
Symbol *wrap =
addUnusedUndefined(saver().save("__wrap_" + name), sym->binding);
v.push_back({sym, real, wrap});
// We want to tell LTO not to inline symbols to be overwritten
// because LTO doesn't know the final symbol contents after renaming.
real->scriptDefined = true;
sym->scriptDefined = true;
// If a symbol is referenced in any object file, bitcode file or shared
// object, mark its redirection target (foo for __real_foo and __wrap_foo
// for foo) as referenced after redirection, which will be used to tell LTO
// to not eliminate the redirection target. If the object file defining the
// symbol also references it, we cannot easily distinguish the case from
// cases where the symbol is not referenced. Retain the redirection target
// in this case because we choose to wrap symbol references regardless of
// whether the symbol is defined
// (https://sourceware.org/bugzilla/show_bug.cgi?id=26358).
if (real->referenced || real->isDefined())
sym->referencedAfterWrap = true;
if (sym->referenced || sym->isDefined())
wrap->referencedAfterWrap = true;
}
return v;
}
// Do renaming for --wrap and foo@v1 by updating pointers to symbols.
//
// When this function is executed, only InputFiles and symbol table
// contain pointers to symbol objects. We visit them to replace pointers,
// so that wrapped symbols are swapped as instructed by the command line.
static void redirectSymbols(ArrayRef<WrappedSymbol> wrapped) {
llvm::TimeTraceScope timeScope("Redirect symbols");
DenseMap<Symbol *, Symbol *> map;
for (const WrappedSymbol &w : wrapped) {
map[w.sym] = w.wrap;
map[w.real] = w.sym;
}
for (Symbol *sym : symtab->symbols()) {
// Enumerate symbols with a non-default version (foo@v1). hasVersionSuffix
// filters out most symbols but is not sufficient.
if (!sym->hasVersionSuffix)
continue;
const char *suffix1 = sym->getVersionSuffix();
if (suffix1[0] != '@' || suffix1[1] == '@')
continue;
// Check the existing symbol foo. We have two special cases to handle:
//
// * There is a definition of foo@v1 and foo@@v1.
// * There is a definition of foo@v1 and foo.
Defined *sym2 = dyn_cast_or_null<Defined>(symtab->find(sym->getName()));
if (!sym2)
continue;
const char *suffix2 = sym2->getVersionSuffix();
if (suffix2[0] == '@' && suffix2[1] == '@' &&
strcmp(suffix1 + 1, suffix2 + 2) == 0) {
// foo@v1 and foo@@v1 should be merged, so redirect foo@v1 to foo@@v1.
map.try_emplace(sym, sym2);
// If both foo@v1 and foo@@v1 are defined and non-weak, report a duplicate
// definition error.
if (sym->isDefined())
sym2->checkDuplicate(cast<Defined>(*sym));
sym2->resolve(*sym);
// Eliminate foo@v1 from the symbol table.
sym->symbolKind = Symbol::PlaceholderKind;
sym->isUsedInRegularObj = false;
} else if (auto *sym1 = dyn_cast<Defined>(sym)) {
if (sym2->versionId > VER_NDX_GLOBAL
? config->versionDefinitions[sym2->versionId].name == suffix1 + 1
: sym1->section == sym2->section && sym1->value == sym2->value) {
// Due to an assembler design flaw, if foo is defined, .symver foo,
// foo@v1 defines both foo and foo@v1. Unless foo is bound to a
// different version, GNU ld makes foo@v1 canonical and eliminates foo.
// Emulate its behavior, otherwise we would have foo or foo@@v1 beside
// foo@v1. foo@v1 and foo combining does not apply if they are not
// defined in the same place.
map.try_emplace(sym2, sym);
sym2->symbolKind = Symbol::PlaceholderKind;
sym2->isUsedInRegularObj = false;
}
}
}
if (map.empty())
return;
// Update pointers in input files.
parallelForEach(ctx->objectFiles, [&](ELFFileBase *file) {
for (Symbol *&sym : file->getMutableGlobalSymbols())
if (Symbol *s = map.lookup(sym))
sym = s;
});
// Update pointers in the symbol table.
for (const WrappedSymbol &w : wrapped)
symtab->wrap(w.sym, w.real, w.wrap);
}
static void checkAndReportMissingFeature(StringRef config, uint32_t features,
uint32_t mask, const Twine &report) {
if (!(features & mask)) {
if (config == "error")
error(report);
else if (config == "warning")
warn(report);
}
}
// To enable CET (x86's hardware-assited control flow enforcement), each
// source file must be compiled with -fcf-protection. Object files compiled
// with the flag contain feature flags indicating that they are compatible
// with CET. We enable the feature only when all object files are compatible
// with CET.
//
// This is also the case with AARCH64's BTI and PAC which use the similar
// GNU_PROPERTY_AARCH64_FEATURE_1_AND mechanism.
static uint32_t getAndFeatures() {
if (config->emachine != EM_386 && config->emachine != EM_X86_64 &&
config->emachine != EM_AARCH64)
return 0;
uint32_t ret = -1;
for (ELFFileBase *f : ctx->objectFiles) {
uint32_t features = f->andFeatures;
checkAndReportMissingFeature(
config->zBtiReport, features, GNU_PROPERTY_AARCH64_FEATURE_1_BTI,
toString(f) + ": -z bti-report: file does not have "
"GNU_PROPERTY_AARCH64_FEATURE_1_BTI property");
checkAndReportMissingFeature(
config->zCetReport, features, GNU_PROPERTY_X86_FEATURE_1_IBT,
toString(f) + ": -z cet-report: file does not have "
"GNU_PROPERTY_X86_FEATURE_1_IBT property");
checkAndReportMissingFeature(
config->zCetReport, features, GNU_PROPERTY_X86_FEATURE_1_SHSTK,
toString(f) + ": -z cet-report: file does not have "
"GNU_PROPERTY_X86_FEATURE_1_SHSTK property");
if (config->zForceBti && !(features & GNU_PROPERTY_AARCH64_FEATURE_1_BTI)) {
features |= GNU_PROPERTY_AARCH64_FEATURE_1_BTI;
if (config->zBtiReport == "none")
warn(toString(f) + ": -z force-bti: file does not have "
"GNU_PROPERTY_AARCH64_FEATURE_1_BTI property");
} else if (config->zForceIbt &&
!(features & GNU_PROPERTY_X86_FEATURE_1_IBT)) {
if (config->zCetReport == "none")
warn(toString(f) + ": -z force-ibt: file does not have "
"GNU_PROPERTY_X86_FEATURE_1_IBT property");
features |= GNU_PROPERTY_X86_FEATURE_1_IBT;
}
if (config->zPacPlt && !(features & GNU_PROPERTY_AARCH64_FEATURE_1_PAC)) {
warn(toString(f) + ": -z pac-plt: file does not have "
"GNU_PROPERTY_AARCH64_FEATURE_1_PAC property");
features |= GNU_PROPERTY_AARCH64_FEATURE_1_PAC;
}
ret &= features;
}
// Force enable Shadow Stack.
if (config->zShstk)
ret |= GNU_PROPERTY_X86_FEATURE_1_SHSTK;
return ret;
}
static void initializeLocalSymbols(ELFFileBase *file) {
switch (config->ekind) {
case ELF32LEKind:
cast<ObjFile<ELF32LE>>(file)->initializeLocalSymbols();
break;
case ELF32BEKind:
cast<ObjFile<ELF32BE>>(file)->initializeLocalSymbols();
break;
case ELF64LEKind:
cast<ObjFile<ELF64LE>>(file)->initializeLocalSymbols();
break;
case ELF64BEKind:
cast<ObjFile<ELF64BE>>(file)->initializeLocalSymbols();
break;
default:
llvm_unreachable("");
}
}
static void postParseObjectFile(ELFFileBase *file) {
switch (config->ekind) {
case ELF32LEKind:
cast<ObjFile<ELF32LE>>(file)->postParse();
break;
case ELF32BEKind:
cast<ObjFile<ELF32BE>>(file)->postParse();
break;
case ELF64LEKind:
cast<ObjFile<ELF64LE>>(file)->postParse();
break;
case ELF64BEKind:
cast<ObjFile<ELF64BE>>(file)->postParse();
break;
default:
llvm_unreachable("");
}
}
// Do actual linking. Note that when this function is called,
// all linker scripts have already been parsed.
void LinkerDriver::link(opt::InputArgList &args) {
llvm::TimeTraceScope timeScope("Link", StringRef("LinkerDriver::Link"));
// If a --hash-style option was not given, set to a default value,
// which varies depending on the target.
if (!args.hasArg(OPT_hash_style)) {
if (config->emachine == EM_MIPS)
config->sysvHash = true;
else
config->sysvHash = config->gnuHash = true;
}
// Default output filename is "a.out" by the Unix tradition.
if (config->outputFile.empty())
config->outputFile = "a.out";
// Fail early if the output file or map file is not writable. If a user has a
// long link, e.g. due to a large LTO link, they do not wish to run it and
// find that it failed because there was a mistake in their command-line.
{
llvm::TimeTraceScope timeScope("Create output files");
if (auto e = tryCreateFile(config->outputFile))
error("cannot open output file " + config->outputFile + ": " +
e.message());
if (auto e = tryCreateFile(config->mapFile))
error("cannot open map file " + config->mapFile + ": " + e.message());
if (auto e = tryCreateFile(config->whyExtract))
error("cannot open --why-extract= file " + config->whyExtract + ": " +
e.message());
}
if (errorCount())
return;
// Use default entry point name if no name was given via the command
// line nor linker scripts. For some reason, MIPS entry point name is
// different from others.
config->warnMissingEntry =
(!config->entry.empty() || (!config->shared && !config->relocatable));
if (config->entry.empty() && !config->relocatable)
config->entry = (config->emachine == EM_MIPS) ? "__start" : "_start";
// Handle --trace-symbol.
for (auto *arg : args.filtered(OPT_trace_symbol))
symtab->insert(arg->getValue())->traced = true;
// Handle -u/--undefined before input files. If both a.a and b.so define foo,
// -u foo a.a b.so will extract a.a.
for (StringRef name : config->undefined)
addUnusedUndefined(name)->referenced = true;
// Add all files to the symbol table. This will add almost all
// symbols that we need to the symbol table. This process might
// add files to the link, via autolinking, these files are always
// appended to the Files vector.
{
llvm::TimeTraceScope timeScope("Parse input files");
for (size_t i = 0; i < files.size(); ++i) {
llvm::TimeTraceScope timeScope("Parse input files", files[i]->getName());
parseFile(files[i]);
}
}
// Now that we have every file, we can decide if we will need a
// dynamic symbol table.
// We need one if we were asked to export dynamic symbols or if we are
// producing a shared library.
// We also need one if any shared libraries are used and for pie executables
// (probably because the dynamic linker needs it).
config->hasDynSymTab =
!ctx->sharedFiles.empty() || config->isPic || config->exportDynamic;
// Some symbols (such as __ehdr_start) are defined lazily only when there
// are undefined symbols for them, so we add these to trigger that logic.
for (StringRef name : script->referencedSymbols) {
Symbol *sym = addUnusedUndefined(name);
sym->isUsedInRegularObj = true;
sym->referenced = true;
}
// Prevent LTO from removing any definition referenced by -u.
for (StringRef name : config->undefined)
if (Defined *sym = dyn_cast_or_null<Defined>(symtab->find(name)))
sym->isUsedInRegularObj = true;
// If an entry symbol is in a static archive, pull out that file now.
if (Symbol *sym = symtab->find(config->entry))
handleUndefined(sym, "--entry");
// Handle the `--undefined-glob <pattern>` options.
for (StringRef pat : args::getStrings(args, OPT_undefined_glob))
handleUndefinedGlob(pat);
// Mark -init and -fini symbols so that the LTO doesn't eliminate them.
if (Symbol *sym = dyn_cast_or_null<Defined>(symtab->find(config->init)))
sym->isUsedInRegularObj = true;
if (Symbol *sym = dyn_cast_or_null<Defined>(symtab->find(config->fini)))
sym->isUsedInRegularObj = true;
// If any of our inputs are bitcode files, the LTO code generator may create
// references to certain library functions that might not be explicit in the
// bitcode file's symbol table. If any of those library functions are defined
// in a bitcode file in an archive member, we need to arrange to use LTO to
// compile those archive members by adding them to the link beforehand.
//
// However, adding all libcall symbols to the link can have undesired
// consequences. For example, the libgcc implementation of
// __sync_val_compare_and_swap_8 on 32-bit ARM pulls in an .init_array entry
// that aborts the program if the Linux kernel does not support 64-bit
// atomics, which would prevent the program from running even if it does not
// use 64-bit atomics.
//
// Therefore, we only add libcall symbols to the link before LTO if we have
// to, i.e. if the symbol's definition is in bitcode. Any other required
// libcall symbols will be added to the link after LTO when we add the LTO
// object file to the link.
if (!ctx->bitcodeFiles.empty())
for (auto *s : lto::LTO::getRuntimeLibcallSymbols())
handleLibcall(s);
// Archive members defining __wrap symbols may be extracted.
std::vector<WrappedSymbol> wrapped = addWrappedSymbols(args);
// No more lazy bitcode can be extracted at this point. Do post parse work
// like checking duplicate symbols.
parallelForEach(ctx->objectFiles, initializeLocalSymbols);
parallelForEach(ctx->objectFiles, postParseObjectFile);
parallelForEach(ctx->bitcodeFiles,
[](BitcodeFile *file) { file->postParse(); });
for (auto &it : ctx->nonPrevailingSyms) {
Symbol &sym = *it.first;
sym.replace(Undefined{sym.file, sym.getName(), sym.binding, sym.stOther,
sym.type, it.second});
cast<Undefined>(sym).nonPrevailing = true;
}
ctx->nonPrevailingSyms.clear();
for (const DuplicateSymbol &d : ctx->duplicates)
reportDuplicate(*d.sym, d.file, d.section, d.value);
ctx->duplicates.clear();
// Return if there were name resolution errors.
if (errorCount())
return;
// We want to declare linker script's symbols early,
// so that we can version them.
// They also might be exported if referenced by DSOs.
script->declareSymbols();
// Handle --exclude-libs. This is before scanVersionScript() due to a
// workaround for Android ndk: for a defined versioned symbol in an archive
// without a version node in the version script, Android does not expect a
// 'has undefined version' error in -shared --exclude-libs=ALL mode (PR36295).
// GNU ld errors in this case.
if (args.hasArg(OPT_exclude_libs))
excludeLibs(args);
// Create elfHeader early. We need a dummy section in
// addReservedSymbols to mark the created symbols as not absolute.
Out::elfHeader = make<OutputSection>("", 0, SHF_ALLOC);
// We need to create some reserved symbols such as _end. Create them.
if (!config->relocatable)
addReservedSymbols();
// Apply version scripts.
//
// For a relocatable output, version scripts don't make sense, and
// parsing a symbol version string (e.g. dropping "@ver1" from a symbol
// name "foo@ver1") rather do harm, so we don't call this if -r is given.
if (!config->relocatable) {
llvm::TimeTraceScope timeScope("Process symbol versions");
symtab->scanVersionScript();
}
// Skip the normal linked output if some LTO options are specified.
//
// For --thinlto-index-only, index file creation is performed in
// compileBitcodeFiles, so we are done afterwards. --plugin-opt=emit-llvm and
// --plugin-opt=emit-asm create output files in bitcode or assembly code,
// respectively. When only certain thinLTO modules are specified for
// compilation, the intermediate object file are the expected output.
const bool skipLinkedOutput = config->thinLTOIndexOnly || config->emitLLVM ||
config->ltoEmitAsm ||
!config->thinLTOModulesToCompile.empty();
// Do link-time optimization if given files are LLVM bitcode files.
// This compiles bitcode files into real object files.
//
// With this the symbol table should be complete. After this, no new names
// except a few linker-synthesized ones will be added to the symbol table.
const size_t numObjsBeforeLTO = ctx->objectFiles.size();
invokeELFT(compileBitcodeFiles, skipLinkedOutput);
// Symbol resolution finished. Report backward reference problems,
// --print-archive-stats=, and --why-extract=.
reportBackrefs();
writeArchiveStats();
writeWhyExtract();
if (errorCount())
return;
// Bail out if normal linked output is skipped due to LTO.
if (skipLinkedOutput)
return;
// compileBitcodeFiles may have produced lto.tmp object files. After this, no
// more file will be added.
auto newObjectFiles = makeArrayRef(ctx->objectFiles).slice(numObjsBeforeLTO);
parallelForEach(newObjectFiles, initializeLocalSymbols);
parallelForEach(newObjectFiles, postParseObjectFile);
for (const DuplicateSymbol &d : ctx->duplicates)
reportDuplicate(*d.sym, d.file, d.section, d.value);
// Handle --exclude-libs again because lto.tmp may reference additional
// libcalls symbols defined in an excluded archive. This may override
// versionId set by scanVersionScript().
if (args.hasArg(OPT_exclude_libs))
excludeLibs(args);
// Apply symbol renames for --wrap and combine foo@v1 and foo@@v1.
redirectSymbols(wrapped);
// Replace common symbols with regular symbols.
replaceCommonSymbols();
{
llvm::TimeTraceScope timeScope("Aggregate sections");
// Now that we have a complete list of input files.
// Beyond this point, no new files are added.
// Aggregate all input sections into one place.
for (InputFile *f : ctx->objectFiles)
for (InputSectionBase *s : f->getSections())
if (s && s != &InputSection::discarded)
inputSections.push_back(s);
for (BinaryFile *f : ctx->binaryFiles)
for (InputSectionBase *s : f->getSections())
inputSections.push_back(cast<InputSection>(s));
}
{
llvm::TimeTraceScope timeScope("Strip sections");
if (ctx->hasSympart.load(std::memory_order_relaxed)) {
llvm::erase_if(inputSections, [](InputSectionBase *s) {
if (s->type != SHT_LLVM_SYMPART)
return false;
invokeELFT(readSymbolPartitionSection, s);
return true;
});
}
// We do not want to emit debug sections if --strip-all
// or --strip-debug are given.
if (config->strip != StripPolicy::None) {
llvm::erase_if(inputSections, [](InputSectionBase *s) {
if (isDebugSection(*s))
return true;
if (auto *isec = dyn_cast<InputSection>(s))
if (InputSectionBase *rel = isec->getRelocatedSection())
if (isDebugSection(*rel))
return true;
return false;
});
}
}
// Since we now have a complete set of input files, we can create
// a .d file to record build dependencies.
if (!config->dependencyFile.empty())
writeDependencyFile();
// Now that the number of partitions is fixed, save a pointer to the main
// partition.
mainPart = &partitions[0];
// Read .note.gnu.property sections from input object files which
// contain a hint to tweak linker's and loader's behaviors.
config->andFeatures = getAndFeatures();
// The Target instance handles target-specific stuff, such as applying
// relocations or writing a PLT section. It also contains target-dependent
// values such as a default image base address.
target = getTarget();
config->eflags = target->calcEFlags();
// maxPageSize (sometimes called abi page size) is the maximum page size that
// the output can be run on. For example if the OS can use 4k or 64k page
// sizes then maxPageSize must be 64k for the output to be useable on both.
// All important alignment decisions must use this value.
config->maxPageSize = getMaxPageSize(args);
// commonPageSize is the most common page size that the output will be run on.
// For example if an OS can use 4k or 64k page sizes and 4k is more common
// than 64k then commonPageSize is set to 4k. commonPageSize can be used for
// optimizations such as DATA_SEGMENT_ALIGN in linker scripts. LLD's use of it
// is limited to writing trap instructions on the last executable segment.
config->commonPageSize = getCommonPageSize(args);
config->imageBase = getImageBase(args);
if (config->emachine == EM_ARM) {
// FIXME: These warnings can be removed when lld only uses these features
// when the input objects have been compiled with an architecture that
// supports them.
if (config->armHasBlx == false)
warn("lld uses blx instruction, no object with architecture supporting "
"feature detected");
}
// This adds a .comment section containing a version string.
if (!config->relocatable)
inputSections.push_back(createCommentSection());
// Split SHF_MERGE and .eh_frame sections into pieces in preparation for garbage collection.
invokeELFT(splitSections);
// Garbage collection and removal of shared symbols from unused shared objects.
invokeELFT(markLive);
demoteSharedAndLazySymbols();
// Make copies of any input sections that need to be copied into each
// partition.
copySectionsIntoPartitions();
// Create synthesized sections such as .got and .plt. This is called before
// processSectionCommands() so that they can be placed by SECTIONS commands.
invokeELFT(createSyntheticSections);
// Some input sections that are used for exception handling need to be moved
// into synthetic sections. Do that now so that they aren't assigned to
// output sections in the usual way.
if (!config->relocatable)
combineEhSections();
{
llvm::TimeTraceScope timeScope("Assign sections");
// Create output sections described by SECTIONS commands.
script->processSectionCommands();
// Linker scripts control how input sections are assigned to output
// sections. Input sections that were not handled by scripts are called
// "orphans", and they are assigned to output sections by the default rule.
// Process that.
script->addOrphanSections();
}
{
llvm::TimeTraceScope timeScope("Merge/finalize input sections");
// Migrate InputSectionDescription::sectionBases to sections. This includes
// merging MergeInputSections into a single MergeSyntheticSection. From this
// point onwards InputSectionDescription::sections should be used instead of
// sectionBases.
for (SectionCommand *cmd : script->sectionCommands)
if (auto *osd = dyn_cast<OutputDesc>(cmd))
osd->osec.finalizeInputSections();
llvm::erase_if(inputSections, [](InputSectionBase *s) {
return isa<MergeInputSection>(s);
});
}
// Two input sections with different output sections should not be folded.
// ICF runs after processSectionCommands() so that we know the output sections.
if (config->icf != ICFLevel::None) {
invokeELFT(findKeepUniqueSections, args);
invokeELFT(doIcf);
}
// Read the callgraph now that we know what was gced or icfed
if (config->callGraphProfileSort) {
if (auto *arg = args.getLastArg(OPT_call_graph_ordering_file))
if (Optional<MemoryBufferRef> buffer = readFile(arg->getValue()))
readCallGraph(*buffer);
invokeELFT(readCallGraphsFromObjectFiles);
}
// Write the result to the file.
invokeELFT(writeResult);
}
diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index 927dc272b532..473809b05e9c 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -1,1787 +1,1787 @@
//===- InputFiles.cpp -----------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "InputFiles.h"
#include "Config.h"
#include "DWARF.h"
#include "Driver.h"
#include "InputSection.h"
#include "LinkerScript.h"
#include "SymbolTable.h"
#include "Symbols.h"
#include "SyntheticSections.h"
#include "Target.h"
#include "lld/Common/CommonLinkerContext.h"
#include "lld/Common/DWARF.h"
#include "llvm/ADT/CachedHashString.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/LTO/LTO.h"
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Support/ARMAttributeParser.h"
#include "llvm/Support/ARMBuildAttributes.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/RISCVAttributeParser.h"
#include "llvm/Support/TarWriter.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace llvm::ELF;
using namespace llvm::object;
using namespace llvm::sys;
using namespace llvm::sys::fs;
using namespace llvm::support::endian;
using namespace lld;
using namespace lld::elf;
bool InputFile::isInGroup;
uint32_t InputFile::nextGroupId;
std::unique_ptr<TarWriter> elf::tar;
// Returns "<internal>", "foo.a(bar.o)" or "baz.o".
std::string lld::toString(const InputFile *f) {
if (!f)
return "<internal>";
if (f->toStringCache.empty()) {
if (f->archiveName.empty())
f->toStringCache = f->getName();
else
(f->archiveName + "(" + f->getName() + ")").toVector(f->toStringCache);
}
return std::string(f->toStringCache);
}
static ELFKind getELFKind(MemoryBufferRef mb, StringRef archiveName) {
unsigned char size;
unsigned char endian;
std::tie(size, endian) = getElfArchType(mb.getBuffer());
auto report = [&](StringRef msg) {
StringRef filename = mb.getBufferIdentifier();
if (archiveName.empty())
fatal(filename + ": " + msg);
else
fatal(archiveName + "(" + filename + "): " + msg);
};
if (!mb.getBuffer().startswith(ElfMagic))
report("not an ELF file");
if (endian != ELFDATA2LSB && endian != ELFDATA2MSB)
report("corrupted ELF file: invalid data encoding");
if (size != ELFCLASS32 && size != ELFCLASS64)
report("corrupted ELF file: invalid file class");
size_t bufSize = mb.getBuffer().size();
if ((size == ELFCLASS32 && bufSize < sizeof(Elf32_Ehdr)) ||
(size == ELFCLASS64 && bufSize < sizeof(Elf64_Ehdr)))
report("corrupted ELF file: file is too short");
if (size == ELFCLASS32)
return (endian == ELFDATA2LSB) ? ELF32LEKind : ELF32BEKind;
return (endian == ELFDATA2LSB) ? ELF64LEKind : ELF64BEKind;
}
InputFile::InputFile(Kind k, MemoryBufferRef m)
: mb(m), groupId(nextGroupId), fileKind(k) {
// All files within the same --{start,end}-group get the same group ID.
// Otherwise, a new file will get a new group ID.
if (!isInGroup)
++nextGroupId;
}
Optional<MemoryBufferRef> elf::readFile(StringRef path) {
llvm::TimeTraceScope timeScope("Load input files", path);
// The --chroot option changes our virtual root directory.
// This is useful when you are dealing with files created by --reproduce.
if (!config->chroot.empty() && path.startswith("/"))
path = saver().save(config->chroot + path);
log(path);
config->dependencyFiles.insert(llvm::CachedHashString(path));
auto mbOrErr = MemoryBuffer::getFile(path, /*IsText=*/false,
/*RequiresNullTerminator=*/false);
if (auto ec = mbOrErr.getError()) {
error("cannot open " + path + ": " + ec.message());
return None;
}
MemoryBufferRef mbref = (*mbOrErr)->getMemBufferRef();
ctx->memoryBuffers.push_back(std::move(*mbOrErr)); // take MB ownership
if (tar)
tar->append(relativeToRoot(path), mbref.getBuffer());
return mbref;
}
// All input object files must be for the same architecture
// (e.g. it does not make sense to link x86 object files with
// MIPS object files.) This function checks for that error.
static bool isCompatible(InputFile *file) {
if (!file->isElf() && !isa<BitcodeFile>(file))
return true;
if (file->ekind == config->ekind && file->emachine == config->emachine) {
if (config->emachine != EM_MIPS)
return true;
if (isMipsN32Abi(file) == config->mipsN32Abi)
return true;
}
StringRef target =
!config->bfdname.empty() ? config->bfdname : config->emulation;
if (!target.empty()) {
error(toString(file) + " is incompatible with " + target);
return false;
}
InputFile *existing = nullptr;
if (!ctx->objectFiles.empty())
existing = ctx->objectFiles[0];
else if (!ctx->sharedFiles.empty())
existing = ctx->sharedFiles[0];
else if (!ctx->bitcodeFiles.empty())
existing = ctx->bitcodeFiles[0];
std::string with;
if (existing)
with = " with " + toString(existing);
error(toString(file) + " is incompatible" + with);
return false;
}
template <class ELFT> static void doParseFile(InputFile *file) {
if (!isCompatible(file))
return;
// Binary file
if (auto *f = dyn_cast<BinaryFile>(file)) {
ctx->binaryFiles.push_back(f);
f->parse();
return;
}
// Lazy object file
if (file->lazy) {
if (auto *f = dyn_cast<BitcodeFile>(file)) {
ctx->lazyBitcodeFiles.push_back(f);
f->parseLazy();
} else {
cast<ObjFile<ELFT>>(file)->parseLazy();
}
return;
}
if (config->trace)
message(toString(file));
// .so file
if (auto *f = dyn_cast<SharedFile>(file)) {
f->parse<ELFT>();
return;
}
// LLVM bitcode file
if (auto *f = dyn_cast<BitcodeFile>(file)) {
ctx->bitcodeFiles.push_back(f);
f->parse<ELFT>();
return;
}
// Regular object file
ctx->objectFiles.push_back(cast<ELFFileBase>(file));
cast<ObjFile<ELFT>>(file)->parse();
}
// Add symbols in File to the symbol table.
void elf::parseFile(InputFile *file) { invokeELFT(doParseFile, file); }
// Concatenates arguments to construct a string representing an error location.
static std::string createFileLineMsg(StringRef path, unsigned line) {
std::string filename = std::string(path::filename(path));
std::string lineno = ":" + std::to_string(line);
if (filename == path)
return filename + lineno;
return filename + lineno + " (" + path.str() + lineno + ")";
}
template <class ELFT>
static std::string getSrcMsgAux(ObjFile<ELFT> &file, const Symbol &sym,
InputSectionBase &sec, uint64_t offset) {
// In DWARF, functions and variables are stored to different places.
// First, look up a function for a given offset.
if (Optional<DILineInfo> info = file.getDILineInfo(&sec, offset))
return createFileLineMsg(info->FileName, info->Line);
// If it failed, look up again as a variable.
if (Optional<std::pair<std::string, unsigned>> fileLine =
file.getVariableLoc(sym.getName()))
return createFileLineMsg(fileLine->first, fileLine->second);
// File.sourceFile contains STT_FILE symbol, and that is a last resort.
return std::string(file.sourceFile);
}
std::string InputFile::getSrcMsg(const Symbol &sym, InputSectionBase &sec,
uint64_t offset) {
if (kind() != ObjKind)
return "";
switch (config->ekind) {
default:
llvm_unreachable("Invalid kind");
case ELF32LEKind:
return getSrcMsgAux(cast<ObjFile<ELF32LE>>(*this), sym, sec, offset);
case ELF32BEKind:
return getSrcMsgAux(cast<ObjFile<ELF32BE>>(*this), sym, sec, offset);
case ELF64LEKind:
return getSrcMsgAux(cast<ObjFile<ELF64LE>>(*this), sym, sec, offset);
case ELF64BEKind:
return getSrcMsgAux(cast<ObjFile<ELF64BE>>(*this), sym, sec, offset);
}
}
StringRef InputFile::getNameForScript() const {
if (archiveName.empty())
return getName();
if (nameForScriptCache.empty())
nameForScriptCache = (archiveName + Twine(':') + getName()).str();
return nameForScriptCache;
}
template <class ELFT> DWARFCache *ObjFile<ELFT>::getDwarf() {
llvm::call_once(initDwarf, [this]() {
dwarf = std::make_unique<DWARFCache>(std::make_unique<DWARFContext>(
std::make_unique<LLDDwarfObj<ELFT>>(this), "",
[&](Error err) { warn(getName() + ": " + toString(std::move(err))); },
[&](Error warning) {
warn(getName() + ": " + toString(std::move(warning)));
}));
});
return dwarf.get();
}
// Returns the pair of file name and line number describing location of data
// object (variable, array, etc) definition.
template <class ELFT>
Optional<std::pair<std::string, unsigned>>
ObjFile<ELFT>::getVariableLoc(StringRef name) {
return getDwarf()->getVariableLoc(name);
}
// Returns source line information for a given offset
// using DWARF debug info.
template <class ELFT>
Optional<DILineInfo> ObjFile<ELFT>::getDILineInfo(InputSectionBase *s,
uint64_t offset) {
// Detect SectionIndex for specified section.
uint64_t sectionIndex = object::SectionedAddress::UndefSection;
ArrayRef<InputSectionBase *> sections = s->file->getSections();
for (uint64_t curIndex = 0; curIndex < sections.size(); ++curIndex) {
if (s == sections[curIndex]) {
sectionIndex = curIndex;
break;
}
}
return getDwarf()->getDILineInfo(offset, sectionIndex);
}
ELFFileBase::ELFFileBase(Kind k, MemoryBufferRef mb) : InputFile(k, mb) {
ekind = getELFKind(mb, "");
switch (ekind) {
case ELF32LEKind:
init<ELF32LE>();
break;
case ELF32BEKind:
init<ELF32BE>();
break;
case ELF64LEKind:
init<ELF64LE>();
break;
case ELF64BEKind:
init<ELF64BE>();
break;
default:
llvm_unreachable("getELFKind");
}
}
template <typename Elf_Shdr>
static const Elf_Shdr *findSection(ArrayRef<Elf_Shdr> sections, uint32_t type) {
for (const Elf_Shdr &sec : sections)
if (sec.sh_type == type)
return &sec;
return nullptr;
}
template <class ELFT> void ELFFileBase::init() {
using Elf_Shdr = typename ELFT::Shdr;
using Elf_Sym = typename ELFT::Sym;
// Initialize trivial attributes.
const ELFFile<ELFT> &obj = getObj<ELFT>();
emachine = obj.getHeader().e_machine;
osabi = obj.getHeader().e_ident[llvm::ELF::EI_OSABI];
abiVersion = obj.getHeader().e_ident[llvm::ELF::EI_ABIVERSION];
ArrayRef<Elf_Shdr> sections = CHECK(obj.sections(), this);
elfShdrs = sections.data();
numELFShdrs = sections.size();
// Find a symbol table.
bool isDSO =
(identify_magic(mb.getBuffer()) == file_magic::elf_shared_object);
const Elf_Shdr *symtabSec =
findSection(sections, isDSO ? SHT_DYNSYM : SHT_SYMTAB);
if (!symtabSec)
return;
// Initialize members corresponding to a symbol table.
firstGlobal = symtabSec->sh_info;
ArrayRef<Elf_Sym> eSyms = CHECK(obj.symbols(symtabSec), this);
if (firstGlobal == 0 || firstGlobal > eSyms.size())
fatal(toString(this) + ": invalid sh_info in symbol table");
elfSyms = reinterpret_cast<const void *>(eSyms.data());
numELFSyms = uint32_t(eSyms.size());
stringTable = CHECK(obj.getStringTableForSymtab(*symtabSec, sections), this);
}
template <class ELFT>
uint32_t ObjFile<ELFT>::getSectionIndex(const Elf_Sym &sym) const {
return CHECK(
this->getObj().getSectionIndex(sym, getELFSyms<ELFT>(), shndxTable),
this);
}
template <class ELFT> void ObjFile<ELFT>::parse(bool ignoreComdats) {
object::ELFFile<ELFT> obj = this->getObj();
// Read a section table. justSymbols is usually false.
if (this->justSymbols)
initializeJustSymbols();
else
initializeSections(ignoreComdats, obj);
// Read a symbol table.
initializeSymbols(obj);
}
// Sections with SHT_GROUP and comdat bits define comdat section groups.
// They are identified and deduplicated by group name. This function
// returns a group name.
template <class ELFT>
StringRef ObjFile<ELFT>::getShtGroupSignature(ArrayRef<Elf_Shdr> sections,
const Elf_Shdr &sec) {
typename ELFT::SymRange symbols = this->getELFSyms<ELFT>();
if (sec.sh_info >= symbols.size())
fatal(toString(this) + ": invalid symbol index");
const typename ELFT::Sym &sym = symbols[sec.sh_info];
return CHECK(sym.getName(this->stringTable), this);
}
template <class ELFT>
bool ObjFile<ELFT>::shouldMerge(const Elf_Shdr &sec, StringRef name) {
// On a regular link we don't merge sections if -O0 (default is -O1). This
// sometimes makes the linker significantly faster, although the output will
// be bigger.
//
// Doing the same for -r would create a problem as it would combine sections
// with different sh_entsize. One option would be to just copy every SHF_MERGE
// section as is to the output. While this would produce a valid ELF file with
// usable SHF_MERGE sections, tools like (llvm-)?dwarfdump get confused when
// they see two .debug_str. We could have separate logic for combining
// SHF_MERGE sections based both on their name and sh_entsize, but that seems
// to be more trouble than it is worth. Instead, we just use the regular (-O1)
// logic for -r.
if (config->optimize == 0 && !config->relocatable)
return false;
// A mergeable section with size 0 is useless because they don't have
// any data to merge. A mergeable string section with size 0 can be
// argued as invalid because it doesn't end with a null character.
// We'll avoid a mess by handling them as if they were non-mergeable.
if (sec.sh_size == 0)
return false;
// Check for sh_entsize. The ELF spec is not clear about the zero
// sh_entsize. It says that "the member [sh_entsize] contains 0 if
// the section does not hold a table of fixed-size entries". We know
// that Rust 1.13 produces a string mergeable section with a zero
// sh_entsize. Here we just accept it rather than being picky about it.
uint64_t entSize = sec.sh_entsize;
if (entSize == 0)
return false;
if (sec.sh_size % entSize)
fatal(toString(this) + ":(" + name + "): SHF_MERGE section size (" +
Twine(sec.sh_size) + ") must be a multiple of sh_entsize (" +
Twine(entSize) + ")");
if (sec.sh_flags & SHF_WRITE)
fatal(toString(this) + ":(" + name +
"): writable SHF_MERGE section is not supported");
return true;
}
// This is for --just-symbols.
//
// --just-symbols is a very minor feature that allows you to link your
// output against other existing program, so that if you load both your
// program and the other program into memory, your output can refer the
// other program's symbols.
//
// When the option is given, we link "just symbols". The section table is
// initialized with null pointers.
template <class ELFT> void ObjFile<ELFT>::initializeJustSymbols() {
sections.resize(numELFShdrs);
}
// An ELF object file may contain a `.deplibs` section. If it exists, the
// section contains a list of library specifiers such as `m` for libm. This
// function resolves a given name by finding the first matching library checking
// the various ways that a library can be specified to LLD. This ELF extension
// is a form of autolinking and is called `dependent libraries`. It is currently
// unique to LLVM and lld.
static void addDependentLibrary(StringRef specifier, const InputFile *f) {
if (!config->dependentLibraries)
return;
if (Optional<std::string> s = searchLibraryBaseName(specifier))
driver->addFile(saver().save(*s), /*withLOption=*/true);
else if (Optional<std::string> s = findFromSearchPaths(specifier))
driver->addFile(saver().save(*s), /*withLOption=*/true);
else if (fs::exists(specifier))
driver->addFile(specifier, /*withLOption=*/false);
else
error(toString(f) +
": unable to find library from dependent library specifier: " +
specifier);
}
// Record the membership of a section group so that in the garbage collection
// pass, section group members are kept or discarded as a unit.
template <class ELFT>
static void handleSectionGroup(ArrayRef<InputSectionBase *> sections,
ArrayRef<typename ELFT::Word> entries) {
bool hasAlloc = false;
for (uint32_t index : entries.slice(1)) {
if (index >= sections.size())
return;
if (InputSectionBase *s = sections[index])
if (s != &InputSection::discarded && s->flags & SHF_ALLOC)
hasAlloc = true;
}
// If any member has the SHF_ALLOC flag, the whole group is subject to garbage
// collection. See the comment in markLive(). This rule retains .debug_types
// and .rela.debug_types.
if (!hasAlloc)
return;
// Connect the members in a circular doubly-linked list via
// nextInSectionGroup.
InputSectionBase *head;
InputSectionBase *prev = nullptr;
for (uint32_t index : entries.slice(1)) {
InputSectionBase *s = sections[index];
if (!s || s == &InputSection::discarded)
continue;
if (prev)
prev->nextInSectionGroup = s;
else
head = s;
prev = s;
}
if (prev)
prev->nextInSectionGroup = head;
}
template <class ELFT>
void ObjFile<ELFT>::initializeSections(bool ignoreComdats,
const llvm::object::ELFFile<ELFT> &obj) {
ArrayRef<Elf_Shdr> objSections = getELFShdrs<ELFT>();
StringRef shstrtab = CHECK(obj.getSectionStringTable(objSections), this);
uint64_t size = objSections.size();
this->sections.resize(size);
std::vector<ArrayRef<Elf_Word>> selectedGroups;
for (size_t i = 0; i != size; ++i) {
if (this->sections[i] == &InputSection::discarded)
continue;
const Elf_Shdr &sec = objSections[i];
// SHF_EXCLUDE'ed sections are discarded by the linker. However,
// if -r is given, we'll let the final link discard such sections.
// This is compatible with GNU.
if ((sec.sh_flags & SHF_EXCLUDE) && !config->relocatable) {
if (sec.sh_type == SHT_LLVM_CALL_GRAPH_PROFILE)
cgProfileSectionIndex = i;
if (sec.sh_type == SHT_LLVM_ADDRSIG) {
// We ignore the address-significance table if we know that the object
// file was created by objcopy or ld -r. This is because these tools
// will reorder the symbols in the symbol table, invalidating the data
// in the address-significance table, which refers to symbols by index.
if (sec.sh_link != 0)
this->addrsigSec = &sec;
else if (config->icf == ICFLevel::Safe)
warn(toString(this) +
": --icf=safe conservatively ignores "
"SHT_LLVM_ADDRSIG [index " +
Twine(i) +
"] with sh_link=0 "
"(likely created using objcopy or ld -r)");
}
this->sections[i] = &InputSection::discarded;
continue;
}
switch (sec.sh_type) {
case SHT_GROUP: {
// De-duplicate section groups by their signatures.
StringRef signature = getShtGroupSignature(objSections, sec);
this->sections[i] = &InputSection::discarded;
ArrayRef<Elf_Word> entries =
CHECK(obj.template getSectionContentsAsArray<Elf_Word>(sec), this);
if (entries.empty())
fatal(toString(this) + ": empty SHT_GROUP");
Elf_Word flag = entries[0];
if (flag && flag != GRP_COMDAT)
fatal(toString(this) + ": unsupported SHT_GROUP format");
bool keepGroup =
(flag & GRP_COMDAT) == 0 || ignoreComdats ||
symtab->comdatGroups.try_emplace(CachedHashStringRef(signature), this)
.second;
if (keepGroup) {
if (config->relocatable)
this->sections[i] = createInputSection(
i, sec, check(obj.getSectionName(sec, shstrtab)));
selectedGroups.push_back(entries);
continue;
}
// Otherwise, discard group members.
for (uint32_t secIndex : entries.slice(1)) {
if (secIndex >= size)
fatal(toString(this) +
": invalid section index in group: " + Twine(secIndex));
this->sections[secIndex] = &InputSection::discarded;
}
break;
}
case SHT_SYMTAB_SHNDX:
shndxTable = CHECK(obj.getSHNDXTable(sec, objSections), this);
break;
case SHT_SYMTAB:
case SHT_STRTAB:
case SHT_REL:
case SHT_RELA:
case SHT_NULL:
break;
case SHT_LLVM_SYMPART:
ctx->hasSympart.store(true, std::memory_order_relaxed);
LLVM_FALLTHROUGH;
default:
this->sections[i] =
createInputSection(i, sec, check(obj.getSectionName(sec, shstrtab)));
}
}
// We have a second loop. It is used to:
// 1) handle SHF_LINK_ORDER sections.
// 2) create SHT_REL[A] sections. In some cases the section header index of a
// relocation section may be smaller than that of the relocated section. In
// such cases, the relocation section would attempt to reference a target
// section that has not yet been created. For simplicity, delay creation of
// relocation sections until now.
for (size_t i = 0; i != size; ++i) {
if (this->sections[i] == &InputSection::discarded)
continue;
const Elf_Shdr &sec = objSections[i];
if (sec.sh_type == SHT_REL || sec.sh_type == SHT_RELA) {
// Find a relocation target section and associate this section with that.
// Target may have been discarded if it is in a different section group
// and the group is discarded, even though it's a violation of the spec.
// We handle that situation gracefully by discarding dangling relocation
// sections.
const uint32_t info = sec.sh_info;
InputSectionBase *s = getRelocTarget(i, sec, info);
if (!s)
continue;
// ELF spec allows mergeable sections with relocations, but they are rare,
// and it is in practice hard to merge such sections by contents, because
// applying relocations at end of linking changes section contents. So, we
// simply handle such sections as non-mergeable ones. Degrading like this
// is acceptable because section merging is optional.
if (auto *ms = dyn_cast<MergeInputSection>(s)) {
s = make<InputSection>(ms->file, ms->flags, ms->type, ms->alignment,
ms->data(), ms->name);
sections[info] = s;
}
if (s->relSecIdx != 0)
error(
toString(s) +
": multiple relocation sections to one section are not supported");
s->relSecIdx = i;
// Relocation sections are usually removed from the output, so return
// `nullptr` for the normal case. However, if -r or --emit-relocs is
// specified, we need to copy them to the output. (Some post link analysis
// tools specify --emit-relocs to obtain the information.)
if (config->copyRelocs) {
auto *isec = make<InputSection>(
*this, sec, check(obj.getSectionName(sec, shstrtab)));
// If the relocated section is discarded (due to /DISCARD/ or
// --gc-sections), the relocation section should be discarded as well.
s->dependentSections.push_back(isec);
sections[i] = isec;
}
continue;
}
// A SHF_LINK_ORDER section with sh_link=0 is handled as if it did not have
// the flag.
if (!sec.sh_link || !(sec.sh_flags & SHF_LINK_ORDER))
continue;
InputSectionBase *linkSec = nullptr;
if (sec.sh_link < size)
linkSec = this->sections[sec.sh_link];
if (!linkSec)
fatal(toString(this) + ": invalid sh_link index: " + Twine(sec.sh_link));
// A SHF_LINK_ORDER section is discarded if its linked-to section is
// discarded.
InputSection *isec = cast<InputSection>(this->sections[i]);
linkSec->dependentSections.push_back(isec);
if (!isa<InputSection>(linkSec))
error("a section " + isec->name +
" with SHF_LINK_ORDER should not refer a non-regular section: " +
toString(linkSec));
}
for (ArrayRef<Elf_Word> entries : selectedGroups)
handleSectionGroup<ELFT>(this->sections, entries);
}
// For ARM only, to set the EF_ARM_ABI_FLOAT_SOFT or EF_ARM_ABI_FLOAT_HARD
// flag in the ELF Header we need to look at Tag_ABI_VFP_args to find out how
// the input objects have been compiled.
static void updateARMVFPArgs(const ARMAttributeParser &attributes,
const InputFile *f) {
Optional<unsigned> attr =
attributes.getAttributeValue(ARMBuildAttrs::ABI_VFP_args);
if (!attr)
// If an ABI tag isn't present then it is implicitly given the value of 0
// which maps to ARMBuildAttrs::BaseAAPCS. However many assembler files,
// including some in glibc that don't use FP args (and should have value 3)
// don't have the attribute so we do not consider an implicit value of 0
// as a clash.
return;
unsigned vfpArgs = *attr;
ARMVFPArgKind arg;
switch (vfpArgs) {
case ARMBuildAttrs::BaseAAPCS:
arg = ARMVFPArgKind::Base;
break;
case ARMBuildAttrs::HardFPAAPCS:
arg = ARMVFPArgKind::VFP;
break;
case ARMBuildAttrs::ToolChainFPPCS:
// Tool chain specific convention that conforms to neither AAPCS variant.
arg = ARMVFPArgKind::ToolChain;
break;
case ARMBuildAttrs::CompatibleFPAAPCS:
// Object compatible with all conventions.
return;
default:
error(toString(f) + ": unknown Tag_ABI_VFP_args value: " + Twine(vfpArgs));
return;
}
// Follow ld.bfd and error if there is a mix of calling conventions.
if (config->armVFPArgs != arg && config->armVFPArgs != ARMVFPArgKind::Default)
error(toString(f) + ": incompatible Tag_ABI_VFP_args");
else
config->armVFPArgs = arg;
}
// The ARM support in lld makes some use of instructions that are not available
// on all ARM architectures. Namely:
// - Use of BLX instruction for interworking between ARM and Thumb state.
// - Use of the extended Thumb branch encoding in relocation.
// - Use of the MOVT/MOVW instructions in Thumb Thunks.
// The ARM Attributes section contains information about the architecture chosen
// at compile time. We follow the convention that if at least one input object
// is compiled with an architecture that supports these features then lld is
// permitted to use them.
static void updateSupportedARMFeatures(const ARMAttributeParser &attributes) {
Optional<unsigned> attr =
attributes.getAttributeValue(ARMBuildAttrs::CPU_arch);
if (!attr)
return;
auto arch = attr.value();
switch (arch) {
case ARMBuildAttrs::Pre_v4:
case ARMBuildAttrs::v4:
case ARMBuildAttrs::v4T:
// Architectures prior to v5 do not support BLX instruction
break;
case ARMBuildAttrs::v5T:
case ARMBuildAttrs::v5TE:
case ARMBuildAttrs::v5TEJ:
case ARMBuildAttrs::v6:
case ARMBuildAttrs::v6KZ:
case ARMBuildAttrs::v6K:
config->armHasBlx = true;
// Architectures used in pre-Cortex processors do not support
// The J1 = 1 J2 = 1 Thumb branch range extension, with the exception
// of Architecture v6T2 (arm1156t2-s and arm1156t2f-s) that do.
break;
default:
// All other Architectures have BLX and extended branch encoding
config->armHasBlx = true;
config->armJ1J2BranchEncoding = true;
if (arch != ARMBuildAttrs::v6_M && arch != ARMBuildAttrs::v6S_M)
// All Architectures used in Cortex processors with the exception
// of v6-M and v6S-M have the MOVT and MOVW instructions.
config->armHasMovtMovw = true;
break;
}
}
// If a source file is compiled with x86 hardware-assisted call flow control
// enabled, the generated object file contains feature flags indicating that
// fact. This function reads the feature flags and returns it.
//
// Essentially we want to read a single 32-bit value in this function, but this
// function is rather complicated because the value is buried deep inside a
// .note.gnu.property section.
//
// The section consists of one or more NOTE records. Each NOTE record consists
// of zero or more type-length-value fields. We want to find a field of a
// certain type. It seems a bit too much to just store a 32-bit value, perhaps
// the ABI is unnecessarily complicated.
template <class ELFT> static uint32_t readAndFeatures(const InputSection &sec) {
using Elf_Nhdr = typename ELFT::Nhdr;
using Elf_Note = typename ELFT::Note;
uint32_t featuresSet = 0;
ArrayRef<uint8_t> data = sec.rawData;
auto reportFatal = [&](const uint8_t *place, const char *msg) {
fatal(toString(sec.file) + ":(" + sec.name + "+0x" +
Twine::utohexstr(place - sec.rawData.data()) + "): " + msg);
};
while (!data.empty()) {
// Read one NOTE record.
auto *nhdr = reinterpret_cast<const Elf_Nhdr *>(data.data());
if (data.size() < sizeof(Elf_Nhdr) || data.size() < nhdr->getSize())
reportFatal(data.data(), "data is too short");
Elf_Note note(*nhdr);
if (nhdr->n_type != NT_GNU_PROPERTY_TYPE_0 || note.getName() != "GNU") {
data = data.slice(nhdr->getSize());
continue;
}
uint32_t featureAndType = config->emachine == EM_AARCH64
? GNU_PROPERTY_AARCH64_FEATURE_1_AND
: GNU_PROPERTY_X86_FEATURE_1_AND;
// Read a body of a NOTE record, which consists of type-length-value fields.
ArrayRef<uint8_t> desc = note.getDesc();
while (!desc.empty()) {
const uint8_t *place = desc.data();
if (desc.size() < 8)
reportFatal(place, "program property is too short");
uint32_t type = read32<ELFT::TargetEndianness>(desc.data());
uint32_t size = read32<ELFT::TargetEndianness>(desc.data() + 4);
desc = desc.slice(8);
if (desc.size() < size)
reportFatal(place, "program property is too short");
if (type == featureAndType) {
// We found a FEATURE_1_AND field. There may be more than one of these
// in a .note.gnu.property section, for a relocatable object we
// accumulate the bits set.
if (size < 4)
reportFatal(place, "FEATURE_1_AND entry is too short");
featuresSet |= read32<ELFT::TargetEndianness>(desc.data());
}
// Padding is present in the note descriptor, if necessary.
desc = desc.slice(alignTo<(ELFT::Is64Bits ? 8 : 4)>(size));
}
// Go to next NOTE record to look for more FEATURE_1_AND descriptions.
data = data.slice(nhdr->getSize());
}
return featuresSet;
}
template <class ELFT>
InputSectionBase *ObjFile<ELFT>::getRelocTarget(uint32_t idx,
const Elf_Shdr &sec,
uint32_t info) {
if (info < this->sections.size()) {
InputSectionBase *target = this->sections[info];
// Strictly speaking, a relocation section must be included in the
// group of the section it relocates. However, LLVM 3.3 and earlier
// would fail to do so, so we gracefully handle that case.
if (target == &InputSection::discarded)
return nullptr;
if (target != nullptr)
return target;
}
error(toString(this) + Twine(": relocation section (index ") + Twine(idx) +
") has invalid sh_info (" + Twine(info) + ")");
return nullptr;
}
template <class ELFT>
InputSectionBase *ObjFile<ELFT>::createInputSection(uint32_t idx,
const Elf_Shdr &sec,
StringRef name) {
if (sec.sh_type == SHT_ARM_ATTRIBUTES && config->emachine == EM_ARM) {
ARMAttributeParser attributes;
ArrayRef<uint8_t> contents = check(this->getObj().getSectionContents(sec));
if (Error e = attributes.parse(contents, config->ekind == ELF32LEKind
? support::little
: support::big)) {
auto *isec = make<InputSection>(*this, sec, name);
warn(toString(isec) + ": " + llvm::toString(std::move(e)));
} else {
updateSupportedARMFeatures(attributes);
updateARMVFPArgs(attributes, this);
// FIXME: Retain the first attribute section we see. The eglibc ARM
// dynamic loaders require the presence of an attribute section for dlopen
// to work. In a full implementation we would merge all attribute
// sections.
if (in.attributes == nullptr) {
in.attributes = std::make_unique<InputSection>(*this, sec, name);
return in.attributes.get();
}
return &InputSection::discarded;
}
}
if (sec.sh_type == SHT_RISCV_ATTRIBUTES && config->emachine == EM_RISCV) {
RISCVAttributeParser attributes;
ArrayRef<uint8_t> contents = check(this->getObj().getSectionContents(sec));
if (Error e = attributes.parse(contents, support::little)) {
auto *isec = make<InputSection>(*this, sec, name);
warn(toString(isec) + ": " + llvm::toString(std::move(e)));
} else {
// FIXME: Validate arch tag contains C if and only if EF_RISCV_RVC is
// present.
// FIXME: Retain the first attribute section we see. Tools such as
// llvm-objdump make use of the attribute section to determine which
// standard extensions to enable. In a full implementation we would merge
// all attribute sections.
if (in.attributes == nullptr) {
in.attributes = std::make_unique<InputSection>(*this, sec, name);
return in.attributes.get();
}
return &InputSection::discarded;
}
}
if (sec.sh_type == SHT_LLVM_DEPENDENT_LIBRARIES && !config->relocatable) {
ArrayRef<char> data =
CHECK(this->getObj().template getSectionContentsAsArray<char>(sec), this);
if (!data.empty() && data.back() != '\0') {
error(toString(this) +
": corrupted dependent libraries section (unterminated string): " +
name);
return &InputSection::discarded;
}
for (const char *d = data.begin(), *e = data.end(); d < e;) {
StringRef s(d);
addDependentLibrary(s, this);
d += s.size() + 1;
}
return &InputSection::discarded;
}
if (name.startswith(".n")) {
// The GNU linker uses .note.GNU-stack section as a marker indicating
// that the code in the object file does not expect that the stack is
// executable (in terms of NX bit). If all input files have the marker,
// the GNU linker adds a PT_GNU_STACK segment to tells the loader to
// make the stack non-executable. Most object files have this section as
// of 2017.
//
// But making the stack non-executable is a norm today for security
// reasons. Failure to do so may result in a serious security issue.
// Therefore, we make LLD always add PT_GNU_STACK unless it is
// explicitly told to do otherwise (by -z execstack). Because the stack
// executable-ness is controlled solely by command line options,
// .note.GNU-stack sections are simply ignored.
if (name == ".note.GNU-stack")
return &InputSection::discarded;
// Object files that use processor features such as Intel Control-Flow
// Enforcement (CET) or AArch64 Branch Target Identification BTI, use a
// .note.gnu.property section containing a bitfield of feature bits like the
// GNU_PROPERTY_X86_FEATURE_1_IBT flag. Read a bitmap containing the flag.
//
// Since we merge bitmaps from multiple object files to create a new
// .note.gnu.property containing a single AND'ed bitmap, we discard an input
// file's .note.gnu.property section.
if (name == ".note.gnu.property") {
this->andFeatures = readAndFeatures<ELFT>(InputSection(*this, sec, name));
return &InputSection::discarded;
}
// Split stacks is a feature to support a discontiguous stack,
// commonly used in the programming language Go. For the details,
// see https://gcc.gnu.org/wiki/SplitStacks. An object file compiled
// for split stack will include a .note.GNU-split-stack section.
if (name == ".note.GNU-split-stack") {
if (config->relocatable) {
error(
"cannot mix split-stack and non-split-stack in a relocatable link");
return &InputSection::discarded;
}
this->splitStack = true;
return &InputSection::discarded;
}
// An object file cmpiled for split stack, but where some of the
// functions were compiled with the no_split_stack_attribute will
// include a .note.GNU-no-split-stack section.
if (name == ".note.GNU-no-split-stack") {
this->someNoSplitStack = true;
return &InputSection::discarded;
}
// Strip existing .note.gnu.build-id sections so that the output won't have
// more than one build-id. This is not usually a problem because input
// object files normally don't have .build-id sections, but you can create
// such files by "ld.{bfd,gold,lld} -r --build-id", and we want to guard
// against it.
if (name == ".note.gnu.build-id")
return &InputSection::discarded;
}
// The linker merges EH (exception handling) frames and creates a
// .eh_frame_hdr section for runtime. So we handle them with a special
// class. For relocatable outputs, they are just passed through.
if (name == ".eh_frame" && !config->relocatable)
return make<EhInputSection>(*this, sec, name);
if ((sec.sh_flags & SHF_MERGE) && shouldMerge(sec, name))
return make<MergeInputSection>(*this, sec, name);
return make<InputSection>(*this, sec, name);
}
// Initialize this->Symbols. this->Symbols is a parallel array as
// its corresponding ELF symbol table.
template <class ELFT>
void ObjFile<ELFT>::initializeSymbols(const object::ELFFile<ELFT> &obj) {
SymbolTable &symtab = *elf::symtab;
ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>();
symbols.resize(eSyms.size());
// Some entries have been filled by LazyObjFile.
for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i)
if (!symbols[i])
symbols[i] = symtab.insert(CHECK(eSyms[i].getName(stringTable), this));
// Perform symbol resolution on non-local symbols.
SmallVector<unsigned, 32> undefineds;
for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) {
const Elf_Sym &eSym = eSyms[i];
uint32_t secIdx = eSym.st_shndx;
if (secIdx == SHN_UNDEF) {
undefineds.push_back(i);
continue;
}
uint8_t binding = eSym.getBinding();
uint8_t stOther = eSym.st_other;
uint8_t type = eSym.getType();
uint64_t value = eSym.st_value;
uint64_t size = eSym.st_size;
Symbol *sym = symbols[i];
sym->isUsedInRegularObj = true;
if (LLVM_UNLIKELY(eSym.st_shndx == SHN_COMMON)) {
if (value == 0 || value >= UINT32_MAX)
fatal(toString(this) + ": common symbol '" + sym->getName() +
"' has invalid alignment: " + Twine(value));
hasCommonSyms = true;
sym->resolve(
CommonSymbol{this, StringRef(), binding, stOther, type, value, size});
continue;
}
// Handle global defined symbols. Defined::section will be set in postParse.
sym->resolve(Defined{this, StringRef(), binding, stOther, type, value, size,
nullptr});
}
// Undefined symbols (excluding those defined relative to non-prevailing
// sections) can trigger recursive extract. Process defined symbols first so
// that the relative order between a defined symbol and an undefined symbol
// does not change the symbol resolution behavior. In addition, a set of
// interconnected symbols will all be resolved to the same file, instead of
// being resolved to different files.
for (unsigned i : undefineds) {
const Elf_Sym &eSym = eSyms[i];
Symbol *sym = symbols[i];
sym->resolve(Undefined{this, StringRef(), eSym.getBinding(), eSym.st_other,
eSym.getType()});
sym->isUsedInRegularObj = true;
sym->referenced = true;
}
}
template <class ELFT> void ObjFile<ELFT>::initializeLocalSymbols() {
if (!firstGlobal)
return;
localSymStorage = std::make_unique<SymbolUnion[]>(firstGlobal);
SymbolUnion *locals = localSymStorage.get();
ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>();
for (size_t i = 0, end = firstGlobal; i != end; ++i) {
const Elf_Sym &eSym = eSyms[i];
uint32_t secIdx = eSym.st_shndx;
if (LLVM_UNLIKELY(secIdx == SHN_XINDEX))
secIdx = check(getExtendedSymbolTableIndex<ELFT>(eSym, i, shndxTable));
else if (secIdx >= SHN_LORESERVE)
secIdx = 0;
if (LLVM_UNLIKELY(secIdx >= sections.size()))
fatal(toString(this) + ": invalid section index: " + Twine(secIdx));
if (LLVM_UNLIKELY(eSym.getBinding() != STB_LOCAL))
error(toString(this) + ": non-local symbol (" + Twine(i) +
") found at index < .symtab's sh_info (" + Twine(end) + ")");
InputSectionBase *sec = sections[secIdx];
uint8_t type = eSym.getType();
if (type == STT_FILE)
sourceFile = CHECK(eSym.getName(stringTable), this);
if (LLVM_UNLIKELY(stringTable.size() <= eSym.st_name))
fatal(toString(this) + ": invalid symbol name offset");
StringRef name(stringTable.data() + eSym.st_name);
symbols[i] = reinterpret_cast<Symbol *>(locals + i);
if (eSym.st_shndx == SHN_UNDEF || sec == &InputSection::discarded)
new (symbols[i]) Undefined(this, name, STB_LOCAL, eSym.st_other, type,
/*discardedSecIdx=*/secIdx);
else
new (symbols[i]) Defined(this, name, STB_LOCAL, eSym.st_other, type,
eSym.st_value, eSym.st_size, sec);
symbols[i]->isUsedInRegularObj = true;
}
}
// Called after all ObjFile::parse is called for all ObjFiles. This checks
// duplicate symbols and may do symbol property merge in the future.
template <class ELFT> void ObjFile<ELFT>::postParse() {
static std::mutex mu;
ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>();
for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) {
const Elf_Sym &eSym = eSyms[i];
Symbol &sym = *symbols[i];
uint32_t secIdx = eSym.st_shndx;
uint8_t binding = eSym.getBinding();
if (LLVM_UNLIKELY(binding != STB_GLOBAL && binding != STB_WEAK &&
binding != STB_GNU_UNIQUE))
errorOrWarn(toString(this) + ": symbol (" + Twine(i) +
") has invalid binding: " + Twine((int)binding));
// st_value of STT_TLS represents the assigned offset, not the actual
// address which is used by STT_FUNC and STT_OBJECT. STT_TLS symbols can
// only be referenced by special TLS relocations. It is usually an error if
// a STT_TLS symbol is replaced by a non-STT_TLS symbol, vice versa.
if (LLVM_UNLIKELY(sym.isTls()) && eSym.getType() != STT_TLS &&
eSym.getType() != STT_NOTYPE)
errorOrWarn("TLS attribute mismatch: " + toString(sym) + "\n>>> in " +
toString(sym.file) + "\n>>> in " + toString(this));
// Handle non-COMMON defined symbol below. !sym.file allows a symbol
// assignment to redefine a symbol without an error.
if (!sym.file || !sym.isDefined() || secIdx == SHN_UNDEF ||
secIdx == SHN_COMMON)
continue;
if (LLVM_UNLIKELY(secIdx == SHN_XINDEX))
secIdx = check(getExtendedSymbolTableIndex<ELFT>(eSym, i, shndxTable));
else if (secIdx >= SHN_LORESERVE)
secIdx = 0;
if (LLVM_UNLIKELY(secIdx >= sections.size()))
fatal(toString(this) + ": invalid section index: " + Twine(secIdx));
InputSectionBase *sec = sections[secIdx];
if (sec == &InputSection::discarded) {
if (sym.traced) {
printTraceSymbol(Undefined{this, sym.getName(), sym.binding,
sym.stOther, sym.type, secIdx},
sym.getName());
}
if (sym.file == this) {
std::lock_guard<std::mutex> lock(mu);
ctx->nonPrevailingSyms.emplace_back(&sym, secIdx);
}
continue;
}
if (sym.file == this) {
cast<Defined>(sym).section = sec;
continue;
}
- if (binding == STB_WEAK)
+ if (sym.binding == STB_WEAK || binding == STB_WEAK)
continue;
std::lock_guard<std::mutex> lock(mu);
ctx->duplicates.push_back({&sym, this, sec, eSym.st_value});
}
}
// The handling of tentative definitions (COMMON symbols) in archives is murky.
// A tentative definition will be promoted to a global definition if there are
// no non-tentative definitions to dominate it. When we hold a tentative
// definition to a symbol and are inspecting archive members for inclusion
// there are 2 ways we can proceed:
//
// 1) Consider the tentative definition a 'real' definition (ie promotion from
// tentative to real definition has already happened) and not inspect
// archive members for Global/Weak definitions to replace the tentative
// definition. An archive member would only be included if it satisfies some
// other undefined symbol. This is the behavior Gold uses.
//
// 2) Consider the tentative definition as still undefined (ie the promotion to
// a real definition happens only after all symbol resolution is done).
// The linker searches archive members for STB_GLOBAL definitions to
// replace the tentative definition with. This is the behavior used by
// GNU ld.
//
// The second behavior is inherited from SysVR4, which based it on the FORTRAN
// COMMON BLOCK model. This behavior is needed for proper initialization in old
// (pre F90) FORTRAN code that is packaged into an archive.
//
// The following functions search archive members for definitions to replace
// tentative definitions (implementing behavior 2).
static bool isBitcodeNonCommonDef(MemoryBufferRef mb, StringRef symName,
StringRef archiveName) {
IRSymtabFile symtabFile = check(readIRSymtab(mb));
for (const irsymtab::Reader::SymbolRef &sym :
symtabFile.TheReader.symbols()) {
if (sym.isGlobal() && sym.getName() == symName)
return !sym.isUndefined() && !sym.isWeak() && !sym.isCommon();
}
return false;
}
template <class ELFT>
static bool isNonCommonDef(MemoryBufferRef mb, StringRef symName,
StringRef archiveName) {
ObjFile<ELFT> *obj = make<ObjFile<ELFT>>(mb, archiveName);
StringRef stringtable = obj->getStringTable();
for (auto sym : obj->template getGlobalELFSyms<ELFT>()) {
Expected<StringRef> name = sym.getName(stringtable);
if (name && name.get() == symName)
return sym.isDefined() && sym.getBinding() == STB_GLOBAL &&
!sym.isCommon();
}
return false;
}
static bool isNonCommonDef(MemoryBufferRef mb, StringRef symName,
StringRef archiveName) {
switch (getELFKind(mb, archiveName)) {
case ELF32LEKind:
return isNonCommonDef<ELF32LE>(mb, symName, archiveName);
case ELF32BEKind:
return isNonCommonDef<ELF32BE>(mb, symName, archiveName);
case ELF64LEKind:
return isNonCommonDef<ELF64LE>(mb, symName, archiveName);
case ELF64BEKind:
return isNonCommonDef<ELF64BE>(mb, symName, archiveName);
default:
llvm_unreachable("getELFKind");
}
}
unsigned SharedFile::vernauxNum;
// Parse the version definitions in the object file if present, and return a
// vector whose nth element contains a pointer to the Elf_Verdef for version
// identifier n. Version identifiers that are not definitions map to nullptr.
template <typename ELFT>
static SmallVector<const void *, 0>
parseVerdefs(const uint8_t *base, const typename ELFT::Shdr *sec) {
if (!sec)
return {};
// Build the Verdefs array by following the chain of Elf_Verdef objects
// from the start of the .gnu.version_d section.
SmallVector<const void *, 0> verdefs;
const uint8_t *verdef = base + sec->sh_offset;
for (unsigned i = 0, e = sec->sh_info; i != e; ++i) {
auto *curVerdef = reinterpret_cast<const typename ELFT::Verdef *>(verdef);
verdef += curVerdef->vd_next;
unsigned verdefIndex = curVerdef->vd_ndx;
if (verdefIndex >= verdefs.size())
verdefs.resize(verdefIndex + 1);
verdefs[verdefIndex] = curVerdef;
}
return verdefs;
}
// Parse SHT_GNU_verneed to properly set the name of a versioned undefined
// symbol. We detect fatal issues which would cause vulnerabilities, but do not
// implement sophisticated error checking like in llvm-readobj because the value
// of such diagnostics is low.
template <typename ELFT>
std::vector<uint32_t> SharedFile::parseVerneed(const ELFFile<ELFT> &obj,
const typename ELFT::Shdr *sec) {
if (!sec)
return {};
std::vector<uint32_t> verneeds;
ArrayRef<uint8_t> data = CHECK(obj.getSectionContents(*sec), this);
const uint8_t *verneedBuf = data.begin();
for (unsigned i = 0; i != sec->sh_info; ++i) {
if (verneedBuf + sizeof(typename ELFT::Verneed) > data.end())
fatal(toString(this) + " has an invalid Verneed");
auto *vn = reinterpret_cast<const typename ELFT::Verneed *>(verneedBuf);
const uint8_t *vernauxBuf = verneedBuf + vn->vn_aux;
for (unsigned j = 0; j != vn->vn_cnt; ++j) {
if (vernauxBuf + sizeof(typename ELFT::Vernaux) > data.end())
fatal(toString(this) + " has an invalid Vernaux");
auto *aux = reinterpret_cast<const typename ELFT::Vernaux *>(vernauxBuf);
if (aux->vna_name >= this->stringTable.size())
fatal(toString(this) + " has a Vernaux with an invalid vna_name");
uint16_t version = aux->vna_other & VERSYM_VERSION;
if (version >= verneeds.size())
verneeds.resize(version + 1);
verneeds[version] = aux->vna_name;
vernauxBuf += aux->vna_next;
}
verneedBuf += vn->vn_next;
}
return verneeds;
}
// We do not usually care about alignments of data in shared object
// files because the loader takes care of it. However, if we promote a
// DSO symbol to point to .bss due to copy relocation, we need to keep
// the original alignment requirements. We infer it in this function.
template <typename ELFT>
static uint64_t getAlignment(ArrayRef<typename ELFT::Shdr> sections,
const typename ELFT::Sym &sym) {
uint64_t ret = UINT64_MAX;
if (sym.st_value)
ret = 1ULL << countTrailingZeros((uint64_t)sym.st_value);
if (0 < sym.st_shndx && sym.st_shndx < sections.size())
ret = std::min<uint64_t>(ret, sections[sym.st_shndx].sh_addralign);
return (ret > UINT32_MAX) ? 0 : ret;
}
// Fully parse the shared object file.
//
// This function parses symbol versions. If a DSO has version information,
// the file has a ".gnu.version_d" section which contains symbol version
// definitions. Each symbol is associated to one version through a table in
// ".gnu.version" section. That table is a parallel array for the symbol
// table, and each table entry contains an index in ".gnu.version_d".
//
// The special index 0 is reserved for VERF_NDX_LOCAL and 1 is for
// VER_NDX_GLOBAL. There's no table entry for these special versions in
// ".gnu.version_d".
//
// The file format for symbol versioning is perhaps a bit more complicated
// than necessary, but you can easily understand the code if you wrap your
// head around the data structure described above.
template <class ELFT> void SharedFile::parse() {
using Elf_Dyn = typename ELFT::Dyn;
using Elf_Shdr = typename ELFT::Shdr;
using Elf_Sym = typename ELFT::Sym;
using Elf_Verdef = typename ELFT::Verdef;
using Elf_Versym = typename ELFT::Versym;
ArrayRef<Elf_Dyn> dynamicTags;
const ELFFile<ELFT> obj = this->getObj<ELFT>();
ArrayRef<Elf_Shdr> sections = getELFShdrs<ELFT>();
const Elf_Shdr *versymSec = nullptr;
const Elf_Shdr *verdefSec = nullptr;
const Elf_Shdr *verneedSec = nullptr;
// Search for .dynsym, .dynamic, .symtab, .gnu.version and .gnu.version_d.
for (const Elf_Shdr &sec : sections) {
switch (sec.sh_type) {
default:
continue;
case SHT_DYNAMIC:
dynamicTags =
CHECK(obj.template getSectionContentsAsArray<Elf_Dyn>(sec), this);
break;
case SHT_GNU_versym:
versymSec = &sec;
break;
case SHT_GNU_verdef:
verdefSec = &sec;
break;
case SHT_GNU_verneed:
verneedSec = &sec;
break;
}
}
if (versymSec && numELFSyms == 0) {
error("SHT_GNU_versym should be associated with symbol table");
return;
}
// Search for a DT_SONAME tag to initialize this->soName.
for (const Elf_Dyn &dyn : dynamicTags) {
if (dyn.d_tag == DT_NEEDED) {
uint64_t val = dyn.getVal();
if (val >= this->stringTable.size())
fatal(toString(this) + ": invalid DT_NEEDED entry");
dtNeeded.push_back(this->stringTable.data() + val);
} else if (dyn.d_tag == DT_SONAME) {
uint64_t val = dyn.getVal();
if (val >= this->stringTable.size())
fatal(toString(this) + ": invalid DT_SONAME entry");
soName = this->stringTable.data() + val;
}
}
// DSOs are uniquified not by filename but by soname.
DenseMap<CachedHashStringRef, SharedFile *>::iterator it;
bool wasInserted;
std::tie(it, wasInserted) =
symtab->soNames.try_emplace(CachedHashStringRef(soName), this);
// If a DSO appears more than once on the command line with and without
// --as-needed, --no-as-needed takes precedence over --as-needed because a
// user can add an extra DSO with --no-as-needed to force it to be added to
// the dependency list.
it->second->isNeeded |= isNeeded;
if (!wasInserted)
return;
ctx->sharedFiles.push_back(this);
verdefs = parseVerdefs<ELFT>(obj.base(), verdefSec);
std::vector<uint32_t> verneeds = parseVerneed<ELFT>(obj, verneedSec);
// Parse ".gnu.version" section which is a parallel array for the symbol
// table. If a given file doesn't have a ".gnu.version" section, we use
// VER_NDX_GLOBAL.
size_t size = numELFSyms - firstGlobal;
std::vector<uint16_t> versyms(size, VER_NDX_GLOBAL);
if (versymSec) {
ArrayRef<Elf_Versym> versym =
CHECK(obj.template getSectionContentsAsArray<Elf_Versym>(*versymSec),
this)
.slice(firstGlobal);
for (size_t i = 0; i < size; ++i)
versyms[i] = versym[i].vs_index;
}
// System libraries can have a lot of symbols with versions. Using a
// fixed buffer for computing the versions name (foo@ver) can save a
// lot of allocations.
SmallString<0> versionedNameBuffer;
// Add symbols to the symbol table.
SymbolTable &symtab = *elf::symtab;
ArrayRef<Elf_Sym> syms = this->getGlobalELFSyms<ELFT>();
for (size_t i = 0, e = syms.size(); i != e; ++i) {
const Elf_Sym &sym = syms[i];
// ELF spec requires that all local symbols precede weak or global
// symbols in each symbol table, and the index of first non-local symbol
// is stored to sh_info. If a local symbol appears after some non-local
// symbol, that's a violation of the spec.
StringRef name = CHECK(sym.getName(stringTable), this);
if (sym.getBinding() == STB_LOCAL) {
warn("found local symbol '" + name +
"' in global part of symbol table in file " + toString(this));
continue;
}
uint16_t idx = versyms[i] & ~VERSYM_HIDDEN;
if (sym.isUndefined()) {
// For unversioned undefined symbols, VER_NDX_GLOBAL makes more sense but
// as of binutils 2.34, GNU ld produces VER_NDX_LOCAL.
if (idx != VER_NDX_LOCAL && idx != VER_NDX_GLOBAL) {
if (idx >= verneeds.size()) {
error("corrupt input file: version need index " + Twine(idx) +
" for symbol " + name + " is out of bounds\n>>> defined in " +
toString(this));
continue;
}
StringRef verName = stringTable.data() + verneeds[idx];
versionedNameBuffer.clear();
name = saver().save(
(name + "@" + verName).toStringRef(versionedNameBuffer));
}
Symbol *s = symtab.addSymbol(
Undefined{this, name, sym.getBinding(), sym.st_other, sym.getType()});
s->exportDynamic = true;
if (s->isUndefined() && sym.getBinding() != STB_WEAK &&
config->unresolvedSymbolsInShlib != UnresolvedPolicy::Ignore)
requiredSymbols.push_back(s);
continue;
}
// MIPS BFD linker puts _gp_disp symbol into DSO files and incorrectly
// assigns VER_NDX_LOCAL to this section global symbol. Here is a
// workaround for this bug.
if (config->emachine == EM_MIPS && idx == VER_NDX_LOCAL &&
name == "_gp_disp")
continue;
uint32_t alignment = getAlignment<ELFT>(sections, sym);
if (!(versyms[i] & VERSYM_HIDDEN)) {
auto *s = symtab.addSymbol(
SharedSymbol{*this, name, sym.getBinding(), sym.st_other,
sym.getType(), sym.st_value, sym.st_size, alignment});
if (s->file == this)
s->verdefIndex = idx;
}
// Also add the symbol with the versioned name to handle undefined symbols
// with explicit versions.
if (idx == VER_NDX_GLOBAL)
continue;
if (idx >= verdefs.size() || idx == VER_NDX_LOCAL) {
error("corrupt input file: version definition index " + Twine(idx) +
" for symbol " + name + " is out of bounds\n>>> defined in " +
toString(this));
continue;
}
StringRef verName =
stringTable.data() +
reinterpret_cast<const Elf_Verdef *>(verdefs[idx])->getAux()->vda_name;
versionedNameBuffer.clear();
name = (name + "@" + verName).toStringRef(versionedNameBuffer);
auto *s = symtab.addSymbol(
SharedSymbol{*this, saver().save(name), sym.getBinding(), sym.st_other,
sym.getType(), sym.st_value, sym.st_size, alignment});
if (s->file == this)
s->verdefIndex = idx;
}
}
static ELFKind getBitcodeELFKind(const Triple &t) {
if (t.isLittleEndian())
return t.isArch64Bit() ? ELF64LEKind : ELF32LEKind;
return t.isArch64Bit() ? ELF64BEKind : ELF32BEKind;
}
static uint16_t getBitcodeMachineKind(StringRef path, const Triple &t) {
switch (t.getArch()) {
case Triple::aarch64:
case Triple::aarch64_be:
return EM_AARCH64;
case Triple::amdgcn:
case Triple::r600:
return EM_AMDGPU;
case Triple::arm:
case Triple::thumb:
return EM_ARM;
case Triple::avr:
return EM_AVR;
case Triple::hexagon:
return EM_HEXAGON;
case Triple::mips:
case Triple::mipsel:
case Triple::mips64:
case Triple::mips64el:
return EM_MIPS;
case Triple::msp430:
return EM_MSP430;
case Triple::ppc:
case Triple::ppcle:
return EM_PPC;
case Triple::ppc64:
case Triple::ppc64le:
return EM_PPC64;
case Triple::riscv32:
case Triple::riscv64:
return EM_RISCV;
case Triple::x86:
return t.isOSIAMCU() ? EM_IAMCU : EM_386;
case Triple::x86_64:
return EM_X86_64;
default:
error(path + ": could not infer e_machine from bitcode target triple " +
t.str());
return EM_NONE;
}
}
static uint8_t getOsAbi(const Triple &t) {
switch (t.getOS()) {
case Triple::AMDHSA:
return ELF::ELFOSABI_AMDGPU_HSA;
case Triple::AMDPAL:
return ELF::ELFOSABI_AMDGPU_PAL;
case Triple::Mesa3D:
return ELF::ELFOSABI_AMDGPU_MESA3D;
default:
return ELF::ELFOSABI_NONE;
}
}
BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
uint64_t offsetInArchive, bool lazy)
: InputFile(BitcodeKind, mb) {
this->archiveName = archiveName;
this->lazy = lazy;
std::string path = mb.getBufferIdentifier().str();
if (config->thinLTOIndexOnly)
path = replaceThinLTOSuffix(mb.getBufferIdentifier());
// ThinLTO assumes that all MemoryBufferRefs given to it have a unique
// name. If two archives define two members with the same name, this
// causes a collision which result in only one of the objects being taken
// into consideration at LTO time (which very likely causes undefined
// symbols later in the link stage). So we append file offset to make
// filename unique.
StringRef name = archiveName.empty()
? saver().save(path)
: saver().save(archiveName + "(" + path::filename(path) +
" at " + utostr(offsetInArchive) + ")");
MemoryBufferRef mbref(mb.getBuffer(), name);
obj = CHECK(lto::InputFile::create(mbref), this);
Triple t(obj->getTargetTriple());
ekind = getBitcodeELFKind(t);
emachine = getBitcodeMachineKind(mb.getBufferIdentifier(), t);
osabi = getOsAbi(t);
}
static uint8_t mapVisibility(GlobalValue::VisibilityTypes gvVisibility) {
switch (gvVisibility) {
case GlobalValue::DefaultVisibility:
return STV_DEFAULT;
case GlobalValue::HiddenVisibility:
return STV_HIDDEN;
case GlobalValue::ProtectedVisibility:
return STV_PROTECTED;
}
llvm_unreachable("unknown visibility");
}
template <class ELFT>
static void
createBitcodeSymbol(Symbol *&sym, const std::vector<bool> &keptComdats,
const lto::InputFile::Symbol &objSym, BitcodeFile &f) {
uint8_t binding = objSym.isWeak() ? STB_WEAK : STB_GLOBAL;
uint8_t type = objSym.isTLS() ? STT_TLS : STT_NOTYPE;
uint8_t visibility = mapVisibility(objSym.getVisibility());
if (!sym)
sym = symtab->insert(saver().save(objSym.getName()));
int c = objSym.getComdatIndex();
if (objSym.isUndefined() || (c != -1 && !keptComdats[c])) {
Undefined newSym(&f, StringRef(), binding, visibility, type);
sym->resolve(newSym);
sym->referenced = true;
return;
}
if (objSym.isCommon()) {
sym->resolve(CommonSymbol{&f, StringRef(), binding, visibility, STT_OBJECT,
objSym.getCommonAlignment(),
objSym.getCommonSize()});
} else {
Defined newSym(&f, StringRef(), binding, visibility, type, 0, 0, nullptr);
if (objSym.canBeOmittedFromSymbolTable())
newSym.exportDynamic = false;
sym->resolve(newSym);
}
}
template <class ELFT> void BitcodeFile::parse() {
for (std::pair<StringRef, Comdat::SelectionKind> s : obj->getComdatTable()) {
keptComdats.push_back(
s.second == Comdat::NoDeduplicate ||
symtab->comdatGroups.try_emplace(CachedHashStringRef(s.first), this)
.second);
}
symbols.resize(obj->symbols().size());
// Process defined symbols first. See the comment in
// ObjFile<ELFT>::initializeSymbols.
for (auto it : llvm::enumerate(obj->symbols()))
if (!it.value().isUndefined()) {
Symbol *&sym = symbols[it.index()];
createBitcodeSymbol<ELFT>(sym, keptComdats, it.value(), *this);
}
for (auto it : llvm::enumerate(obj->symbols()))
if (it.value().isUndefined()) {
Symbol *&sym = symbols[it.index()];
createBitcodeSymbol<ELFT>(sym, keptComdats, it.value(), *this);
}
for (auto l : obj->getDependentLibraries())
addDependentLibrary(l, this);
}
void BitcodeFile::parseLazy() {
SymbolTable &symtab = *elf::symtab;
symbols.resize(obj->symbols().size());
for (auto it : llvm::enumerate(obj->symbols()))
if (!it.value().isUndefined()) {
auto *sym = symtab.insert(saver().save(it.value().getName()));
sym->resolve(LazyObject{*this});
symbols[it.index()] = sym;
}
}
void BitcodeFile::postParse() {
for (auto it : llvm::enumerate(obj->symbols())) {
const Symbol &sym = *symbols[it.index()];
const auto &objSym = it.value();
if (sym.file == this || !sym.isDefined() || objSym.isUndefined() ||
objSym.isCommon() || objSym.isWeak())
continue;
int c = objSym.getComdatIndex();
if (c != -1 && !keptComdats[c])
continue;
reportDuplicate(sym, this, nullptr, 0);
}
}
void BinaryFile::parse() {
ArrayRef<uint8_t> data = arrayRefFromStringRef(mb.getBuffer());
auto *section = make<InputSection>(this, SHF_ALLOC | SHF_WRITE, SHT_PROGBITS,
8, data, ".data");
sections.push_back(section);
// For each input file foo that is embedded to a result as a binary
// blob, we define _binary_foo_{start,end,size} symbols, so that
// user programs can access blobs by name. Non-alphanumeric
// characters in a filename are replaced with underscore.
std::string s = "_binary_" + mb.getBufferIdentifier().str();
for (size_t i = 0; i < s.size(); ++i)
if (!isAlnum(s[i]))
s[i] = '_';
llvm::StringSaver &saver = lld::saver();
symtab->addAndCheckDuplicate(Defined{nullptr, saver.save(s + "_start"),
STB_GLOBAL, STV_DEFAULT, STT_OBJECT, 0,
0, section});
symtab->addAndCheckDuplicate(Defined{nullptr, saver.save(s + "_end"),
STB_GLOBAL, STV_DEFAULT, STT_OBJECT,
data.size(), 0, section});
symtab->addAndCheckDuplicate(Defined{nullptr, saver.save(s + "_size"),
STB_GLOBAL, STV_DEFAULT, STT_OBJECT,
data.size(), 0, nullptr});
}
ELFFileBase *elf::createObjFile(MemoryBufferRef mb, StringRef archiveName,
bool lazy) {
ELFFileBase *f;
switch (getELFKind(mb, archiveName)) {
case ELF32LEKind:
f = make<ObjFile<ELF32LE>>(mb, archiveName);
break;
case ELF32BEKind:
f = make<ObjFile<ELF32BE>>(mb, archiveName);
break;
case ELF64LEKind:
f = make<ObjFile<ELF64LE>>(mb, archiveName);
break;
case ELF64BEKind:
f = make<ObjFile<ELF64BE>>(mb, archiveName);
break;
default:
llvm_unreachable("getELFKind");
}
f->lazy = lazy;
return f;
}
template <class ELFT> void ObjFile<ELFT>::parseLazy() {
const ArrayRef<typename ELFT::Sym> eSyms = this->getELFSyms<ELFT>();
SymbolTable &symtab = *elf::symtab;
symbols.resize(eSyms.size());
for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i)
if (eSyms[i].st_shndx != SHN_UNDEF)
symbols[i] = symtab.insert(CHECK(eSyms[i].getName(stringTable), this));
// Replace existing symbols with LazyObject symbols.
//
// resolve() may trigger this->extract() if an existing symbol is an undefined
// symbol. If that happens, this function has served its purpose, and we can
// exit from the loop early.
for (Symbol *sym : makeArrayRef(symbols).slice(firstGlobal))
if (sym) {
sym->resolve(LazyObject{*this});
if (!lazy)
return;
}
}
bool InputFile::shouldExtractForCommon(StringRef name) {
if (isa<BitcodeFile>(this))
return isBitcodeNonCommonDef(mb, name, archiveName);
return isNonCommonDef(mb, name, archiveName);
}
std::string elf::replaceThinLTOSuffix(StringRef path) {
StringRef suffix = config->thinLTOObjectSuffixReplace.first;
StringRef repl = config->thinLTOObjectSuffixReplace.second;
if (path.consume_back(suffix))
return (path + repl).str();
return std::string(path);
}
template void BitcodeFile::parse<ELF32LE>();
template void BitcodeFile::parse<ELF32BE>();
template void BitcodeFile::parse<ELF64LE>();
template void BitcodeFile::parse<ELF64BE>();
template class elf::ObjFile<ELF32LE>;
template class elf::ObjFile<ELF32BE>;
template class elf::ObjFile<ELF64LE>;
template class elf::ObjFile<ELF64BE>;
template void SharedFile::parse<ELF32LE>();
template void SharedFile::parse<ELF32BE>();
template void SharedFile::parse<ELF64LE>();
template void SharedFile::parse<ELF64BE>();
diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp
index ca6cbdfbb8bb..8f267251b7c0 100644
--- a/lld/MachO/UnwindInfoSection.cpp
+++ b/lld/MachO/UnwindInfoSection.cpp
@@ -1,708 +1,733 @@
//===- UnwindInfoSection.cpp ----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "UnwindInfoSection.h"
#include "ConcatOutputSection.h"
#include "Config.h"
#include "InputSection.h"
#include "OutputSection.h"
#include "OutputSegment.h"
#include "SymbolTable.h"
#include "Symbols.h"
#include "SyntheticSections.h"
#include "Target.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Memory.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/Support/Parallel.h"
#include <numeric>
using namespace llvm;
using namespace llvm::MachO;
using namespace llvm::support::endian;
using namespace lld;
using namespace lld::macho;
#define COMMON_ENCODINGS_MAX 127
#define COMPACT_ENCODINGS_MAX 256
#define SECOND_LEVEL_PAGE_BYTES 4096
#define SECOND_LEVEL_PAGE_WORDS (SECOND_LEVEL_PAGE_BYTES / sizeof(uint32_t))
#define REGULAR_SECOND_LEVEL_ENTRIES_MAX \
((SECOND_LEVEL_PAGE_BYTES - \
sizeof(unwind_info_regular_second_level_page_header)) / \
sizeof(unwind_info_regular_second_level_entry))
#define COMPRESSED_SECOND_LEVEL_ENTRIES_MAX \
((SECOND_LEVEL_PAGE_BYTES - \
sizeof(unwind_info_compressed_second_level_page_header)) / \
sizeof(uint32_t))
#define COMPRESSED_ENTRY_FUNC_OFFSET_BITS 24
#define COMPRESSED_ENTRY_FUNC_OFFSET_MASK \
UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET(~0)
// Compact Unwind format is a Mach-O evolution of DWARF Unwind that
// optimizes space and exception-time lookup. Most DWARF unwind
// entries can be replaced with Compact Unwind entries, but the ones
// that cannot are retained in DWARF form.
//
// This comment will address macro-level organization of the pre-link
// and post-link compact unwind tables. For micro-level organization
// pertaining to the bitfield layout of the 32-bit compact unwind
// entries, see libunwind/include/mach-o/compact_unwind_encoding.h
//
// Important clarifying factoids:
//
// * __LD,__compact_unwind is the compact unwind format for compiler
// output and linker input. It is never a final output. It could be
// an intermediate output with the `-r` option which retains relocs.
//
// * __TEXT,__unwind_info is the compact unwind format for final
// linker output. It is never an input.
//
// * __TEXT,__eh_frame is the DWARF format for both linker input and output.
//
// * __TEXT,__unwind_info entries are divided into 4 KiB pages (2nd
// level) by ascending address, and the pages are referenced by an
// index (1st level) in the section header.
//
// * Following the headers in __TEXT,__unwind_info, the bulk of the
// section contains a vector of compact unwind entries
// `{functionOffset, encoding}` sorted by ascending `functionOffset`.
// Adjacent entries with the same encoding can be folded to great
// advantage, achieving a 3-order-of-magnitude reduction in the
// number of entries.
//
// * The __TEXT,__unwind_info format can accommodate up to 127 unique
// encodings for the space-efficient compressed format. In practice,
// fewer than a dozen unique encodings are used by C++ programs of
// all sizes. Therefore, we don't even bother implementing the regular
// non-compressed format. Time will tell if anyone in the field ever
// overflows the 127-encodings limit.
//
// Refer to the definition of unwind_info_section_header in
// compact_unwind_encoding.h for an overview of the format we are encoding
// here.
// TODO(gkm): prune __eh_frame entries superseded by __unwind_info, PR50410
// TODO(gkm): how do we align the 2nd-level pages?
// The offsets of various fields in the on-disk representation of each compact
// unwind entry.
struct CompactUnwindOffsets {
uint32_t functionAddress;
uint32_t functionLength;
uint32_t encoding;
uint32_t personality;
uint32_t lsda;
CompactUnwindOffsets(size_t wordSize) {
if (wordSize == 8)
init<uint64_t>();
else {
assert(wordSize == 4);
init<uint32_t>();
}
}
private:
template <class Ptr> void init() {
functionAddress = offsetof(Layout<Ptr>, functionAddress);
functionLength = offsetof(Layout<Ptr>, functionLength);
encoding = offsetof(Layout<Ptr>, encoding);
personality = offsetof(Layout<Ptr>, personality);
lsda = offsetof(Layout<Ptr>, lsda);
}
template <class Ptr> struct Layout {
Ptr functionAddress;
uint32_t functionLength;
compact_unwind_encoding_t encoding;
Ptr personality;
Ptr lsda;
};
};
// LLD's internal representation of a compact unwind entry.
struct CompactUnwindEntry {
uint64_t functionAddress;
uint32_t functionLength;
compact_unwind_encoding_t encoding;
Symbol *personality;
InputSection *lsda;
};
using EncodingMap = DenseMap<compact_unwind_encoding_t, size_t>;
struct SecondLevelPage {
uint32_t kind;
size_t entryIndex;
size_t entryCount;
size_t byteCount;
std::vector<compact_unwind_encoding_t> localEncodings;
EncodingMap localEncodingIndexes;
};
// UnwindInfoSectionImpl allows us to avoid cluttering our header file with a
// lengthy definition of UnwindInfoSection.
class UnwindInfoSectionImpl final : public UnwindInfoSection {
public:
UnwindInfoSectionImpl() : cuOffsets(target->wordSize) {}
uint64_t getSize() const override { return unwindInfoSize; }
- void prepareRelocations() override;
+ void prepare() override;
void finalize() override;
void writeTo(uint8_t *buf) const override;
private:
void prepareRelocations(ConcatInputSection *);
void relocateCompactUnwind(std::vector<CompactUnwindEntry> &);
void encodePersonalities();
+ Symbol *canonicalizePersonality(Symbol *);
uint64_t unwindInfoSize = 0;
std::vector<decltype(symbols)::value_type> symbolsVec;
CompactUnwindOffsets cuOffsets;
std::vector<std::pair<compact_unwind_encoding_t, size_t>> commonEncodings;
EncodingMap commonEncodingIndexes;
// The entries here will be in the same order as their originating symbols
// in symbolsVec.
std::vector<CompactUnwindEntry> cuEntries;
// Indices into the cuEntries vector.
std::vector<size_t> cuIndices;
std::vector<Symbol *> personalities;
SmallDenseMap<std::pair<InputSection *, uint64_t /* addend */>, Symbol *>
personalityTable;
// Indices into cuEntries for CUEs with a non-null LSDA.
std::vector<size_t> entriesWithLsda;
// Map of cuEntries index to an index within the LSDA array.
DenseMap<size_t, uint32_t> lsdaIndex;
std::vector<SecondLevelPage> secondLevelPages;
uint64_t level2PagesOffset = 0;
};
UnwindInfoSection::UnwindInfoSection()
: SyntheticSection(segment_names::text, section_names::unwindInfo) {
align = 4;
}
// Record function symbols that may need entries emitted in __unwind_info, which
// stores unwind data for address ranges.
//
// Note that if several adjacent functions have the same unwind encoding and
// personality function and no LSDA, they share one unwind entry. For this to
// work, functions without unwind info need explicit "no unwind info" unwind
// entries -- else the unwinder would think they have the unwind info of the
// closest function with unwind info right before in the image. Thus, we add
// function symbols for each unique address regardless of whether they have
// associated unwind info.
void UnwindInfoSection::addSymbol(const Defined *d) {
if (d->unwindEntry)
allEntriesAreOmitted = false;
// We don't yet know the final output address of this symbol, but we know that
// they are uniquely determined by a combination of the isec and value, so
// we use that as the key here.
auto p = symbols.insert({{d->isec, d->value}, d});
// If we have multiple symbols at the same address, only one of them can have
// an associated unwind entry.
if (!p.second && d->unwindEntry) {
assert(!p.first->second->unwindEntry);
p.first->second = d;
}
}
-void UnwindInfoSectionImpl::prepareRelocations() {
+void UnwindInfoSectionImpl::prepare() {
// This iteration needs to be deterministic, since prepareRelocations may add
// entries to the GOT. Hence the use of a MapVector for
// UnwindInfoSection::symbols.
for (const Defined *d : make_second_range(symbols))
- if (d->unwindEntry &&
- d->unwindEntry->getName() == section_names::compactUnwind)
- prepareRelocations(d->unwindEntry);
+ if (d->unwindEntry) {
+ if (d->unwindEntry->getName() == section_names::compactUnwind) {
+ prepareRelocations(d->unwindEntry);
+ } else {
+ // We don't have to add entries to the GOT here because FDEs have
+ // explicit GOT relocations, so Writer::scanRelocations() will add those
+ // GOT entries. However, we still need to canonicalize the personality
+ // pointers (like prepareRelocations() does for CU entries) in order
+ // to avoid overflowing the 3-personality limit.
+ FDE &fde = cast<ObjFile>(d->getFile())->fdes[d->unwindEntry];
+ fde.personality = canonicalizePersonality(fde.personality);
+ }
+ }
}
// Compact unwind relocations have different semantics, so we handle them in a
// separate code path from regular relocations. First, we do not wish to add
// rebase opcodes for __LD,__compact_unwind, because that section doesn't
// actually end up in the final binary. Second, personality pointers always
// reside in the GOT and must be treated specially.
void UnwindInfoSectionImpl::prepareRelocations(ConcatInputSection *isec) {
assert(!isec->shouldOmitFromOutput() &&
"__compact_unwind section should not be omitted");
// FIXME: Make this skip relocations for CompactUnwindEntries that
// point to dead-stripped functions. That might save some amount of
// work. But since there are usually just few personality functions
// that are referenced from many places, at least some of them likely
// live, it wouldn't reduce number of got entries.
for (size_t i = 0; i < isec->relocs.size(); ++i) {
Reloc &r = isec->relocs[i];
assert(target->hasAttr(r.type, RelocAttrBits::UNSIGNED));
// Functions and LSDA entries always reside in the same object file as the
// compact unwind entries that references them, and thus appear as section
// relocs. There is no need to prepare them. We only prepare relocs for
// personality functions.
if (r.offset != cuOffsets.personality)
continue;
if (auto *s = r.referent.dyn_cast<Symbol *>()) {
// Personality functions are nearly always system-defined (e.g.,
// ___gxx_personality_v0 for C++) and relocated as dylib symbols. When an
// application provides its own personality function, it might be
// referenced by an extern Defined symbol reloc, or a local section reloc.
if (auto *defined = dyn_cast<Defined>(s)) {
// XXX(vyng) This is a a special case for handling duplicate personality
// symbols. Note that LD64's behavior is a bit different and it is
// inconsistent with how symbol resolution usually work
//
// So we've decided not to follow it. Instead, simply pick the symbol
// with the same name from the symbol table to replace the local one.
//
// (See discussions/alternatives already considered on D107533)
if (!defined->isExternal())
if (Symbol *sym = symtab->find(defined->getName()))
if (!sym->isLazy())
r.referent = s = sym;
}
if (auto *undefined = dyn_cast<Undefined>(s)) {
treatUndefinedSymbol(*undefined, isec, r.offset);
// treatUndefinedSymbol() can replace s with a DylibSymbol; re-check.
if (isa<Undefined>(s))
continue;
}
+ // Similar to canonicalizePersonality(), but we also register a GOT entry.
if (auto *defined = dyn_cast<Defined>(s)) {
// Check if we have created a synthetic symbol at the same address.
Symbol *&personality =
personalityTable[{defined->isec, defined->value}];
if (personality == nullptr) {
personality = defined;
in.got->addEntry(defined);
} else if (personality != defined) {
r.referent = personality;
}
continue;
}
+
assert(isa<DylibSymbol>(s));
in.got->addEntry(s);
continue;
}
if (auto *referentIsec = r.referent.dyn_cast<InputSection *>()) {
assert(!isCoalescedWeak(referentIsec));
// Personality functions can be referenced via section relocations
// if they live in the same object file. Create placeholder synthetic
// symbols for them in the GOT.
Symbol *&s = personalityTable[{referentIsec, r.addend}];
if (s == nullptr) {
// This runs after dead stripping, so the noDeadStrip argument does not
// matter.
s = make<Defined>("<internal>", /*file=*/nullptr, referentIsec,
r.addend, /*size=*/0, /*isWeakDef=*/false,
/*isExternal=*/false, /*isPrivateExtern=*/false,
/*includeInSymtab=*/true,
/*isThumb=*/false, /*isReferencedDynamically=*/false,
/*noDeadStrip=*/false);
s->used = true;
in.got->addEntry(s);
}
r.referent = s;
r.addend = 0;
}
}
}
+Symbol *UnwindInfoSectionImpl::canonicalizePersonality(Symbol *personality) {
+ if (auto *defined = dyn_cast_or_null<Defined>(personality)) {
+ // Check if we have created a synthetic symbol at the same address.
+ Symbol *&synth = personalityTable[{defined->isec, defined->value}];
+ if (synth == nullptr)
+ synth = defined;
+ else if (synth != defined)
+ return synth;
+ }
+ return personality;
+}
+
// We need to apply the relocations to the pre-link compact unwind section
// before converting it to post-link form. There should only be absolute
// relocations here: since we are not emitting the pre-link CU section, there
// is no source address to make a relative location meaningful.
void UnwindInfoSectionImpl::relocateCompactUnwind(
std::vector<CompactUnwindEntry> &cuEntries) {
parallelFor(0, symbolsVec.size(), [&](size_t i) {
CompactUnwindEntry &cu = cuEntries[i];
const Defined *d = symbolsVec[i].second;
cu.functionAddress = d->getVA();
if (!d->unwindEntry)
return;
// If we have DWARF unwind info, create a CU entry that points to it.
if (d->unwindEntry->getName() == section_names::ehFrame) {
cu.encoding = target->modeDwarfEncoding | d->unwindEntry->outSecOff;
const FDE &fde = cast<ObjFile>(d->getFile())->fdes[d->unwindEntry];
cu.functionLength = fde.funcLength;
cu.personality = fde.personality;
cu.lsda = fde.lsda;
return;
}
assert(d->unwindEntry->getName() == section_names::compactUnwind);
auto buf = reinterpret_cast<const uint8_t *>(d->unwindEntry->data.data()) -
target->wordSize;
cu.functionLength =
support::endian::read32le(buf + cuOffsets.functionLength);
cu.encoding = support::endian::read32le(buf + cuOffsets.encoding);
for (const Reloc &r : d->unwindEntry->relocs) {
if (r.offset == cuOffsets.personality) {
cu.personality = r.referent.get<Symbol *>();
} else if (r.offset == cuOffsets.lsda) {
if (auto *referentSym = r.referent.dyn_cast<Symbol *>())
cu.lsda = cast<Defined>(referentSym)->isec;
else
cu.lsda = r.referent.get<InputSection *>();
}
}
});
}
// There should only be a handful of unique personality pointers, so we can
// encode them as 2-bit indices into a small array.
void UnwindInfoSectionImpl::encodePersonalities() {
for (size_t idx : cuIndices) {
CompactUnwindEntry &cu = cuEntries[idx];
if (cu.personality == nullptr)
continue;
// Linear search is fast enough for a small array.
auto it = find(personalities, cu.personality);
uint32_t personalityIndex; // 1-based index
if (it != personalities.end()) {
personalityIndex = std::distance(personalities.begin(), it) + 1;
} else {
personalities.push_back(cu.personality);
personalityIndex = personalities.size();
}
cu.encoding |=
personalityIndex << countTrailingZeros(
static_cast<compact_unwind_encoding_t>(UNWIND_PERSONALITY_MASK));
}
if (personalities.size() > 3)
error("too many personalities (" + Twine(personalities.size()) +
") for compact unwind to encode");
}
static bool canFoldEncoding(compact_unwind_encoding_t encoding) {
// From compact_unwind_encoding.h:
// UNWIND_X86_64_MODE_STACK_IND:
// A "frameless" (RBP not used as frame pointer) function large constant
// stack size. This case is like the previous, except the stack size is too
// large to encode in the compact unwind encoding. Instead it requires that
// the function contains "subq $nnnnnnnn,RSP" in its prolog. The compact
// encoding contains the offset to the nnnnnnnn value in the function in
// UNWIND_X86_64_FRAMELESS_STACK_SIZE.
// Since this means the unwinder has to look at the `subq` in the function
// of the unwind info's unwind address, two functions that have identical
// unwind info can't be folded if it's using this encoding since both
// entries need unique addresses.
static_assert(static_cast<uint32_t>(UNWIND_X86_64_MODE_MASK) ==
static_cast<uint32_t>(UNWIND_X86_MODE_MASK),
"");
static_assert(static_cast<uint32_t>(UNWIND_X86_64_MODE_STACK_IND) ==
static_cast<uint32_t>(UNWIND_X86_MODE_STACK_IND),
"");
if ((target->cpuType == CPU_TYPE_X86_64 || target->cpuType == CPU_TYPE_X86) &&
(encoding & UNWIND_X86_64_MODE_MASK) == UNWIND_X86_64_MODE_STACK_IND) {
// FIXME: Consider passing in the two function addresses and getting
// their two stack sizes off the `subq` and only returning false if they're
// actually different.
return false;
}
return true;
}
// Scan the __LD,__compact_unwind entries and compute the space needs of
// __TEXT,__unwind_info and __TEXT,__eh_frame.
void UnwindInfoSectionImpl::finalize() {
if (symbols.empty())
return;
// At this point, the address space for __TEXT,__text has been
// assigned, so we can relocate the __LD,__compact_unwind entries
// into a temporary buffer. Relocation is necessary in order to sort
// the CU entries by function address. Sorting is necessary so that
// we can fold adjacent CU entries with identical encoding+personality
// and without any LSDA. Folding is necessary because it reduces the
// number of CU entries by as much as 3 orders of magnitude!
cuEntries.resize(symbols.size());
// The "map" part of the symbols MapVector was only needed for deduplication
// in addSymbol(). Now that we are done adding, move the contents to a plain
// std::vector for indexed access.
symbolsVec = symbols.takeVector();
relocateCompactUnwind(cuEntries);
// Rather than sort & fold the 32-byte entries directly, we create a
// vector of indices to entries and sort & fold that instead.
cuIndices.resize(cuEntries.size());
std::iota(cuIndices.begin(), cuIndices.end(), 0);
llvm::sort(cuIndices, [&](size_t a, size_t b) {
return cuEntries[a].functionAddress < cuEntries[b].functionAddress;
});
// Fold adjacent entries with matching encoding+personality and without LSDA
// We use three iterators on the same cuIndices to fold in-situ:
// (1) `foldBegin` is the first of a potential sequence of matching entries
// (2) `foldEnd` is the first non-matching entry after `foldBegin`.
// The semi-open interval [ foldBegin .. foldEnd ) contains a range
// entries that can be folded into a single entry and written to ...
// (3) `foldWrite`
auto foldWrite = cuIndices.begin();
for (auto foldBegin = cuIndices.begin(); foldBegin < cuIndices.end();) {
auto foldEnd = foldBegin;
// Common LSDA encodings (e.g. for C++ and Objective-C) contain offsets from
// a base address. The base address is normally not contained directly in
// the LSDA, and in that case, the personality function treats the starting
// address of the function (which is computed by the unwinder) as the base
// address and interprets the LSDA accordingly. The unwinder computes the
// starting address of a function as the address associated with its CU
// entry. For this reason, we cannot fold adjacent entries if they have an
// LSDA, because folding would make the unwinder compute the wrong starting
// address for the functions with the folded entries, which in turn would
// cause the personality function to misinterpret the LSDA for those
// functions. In the very rare case where the base address is encoded
// directly in the LSDA, two functions at different addresses would
// necessarily have different LSDAs, so their CU entries would not have been
// folded anyway.
while (++foldEnd < cuIndices.end() &&
cuEntries[*foldBegin].encoding == cuEntries[*foldEnd].encoding &&
!cuEntries[*foldBegin].lsda && !cuEntries[*foldEnd].lsda &&
// If we've gotten to this point, we don't have an LSDA, which should
// also imply that we don't have a personality function, since in all
// likelihood a personality function needs the LSDA to do anything
// useful. It can be technically valid to have a personality function
// and no LSDA though (e.g. the C++ personality __gxx_personality_v0
// is just a no-op without LSDA), so we still check for personality
// function equivalence to handle that case.
cuEntries[*foldBegin].personality ==
cuEntries[*foldEnd].personality &&
canFoldEncoding(cuEntries[*foldEnd].encoding))
;
*foldWrite++ = *foldBegin;
foldBegin = foldEnd;
}
cuIndices.erase(foldWrite, cuIndices.end());
encodePersonalities();
// Count frequencies of the folded encodings
EncodingMap encodingFrequencies;
for (size_t idx : cuIndices)
encodingFrequencies[cuEntries[idx].encoding]++;
// Make a vector of encodings, sorted by descending frequency
for (const auto &frequency : encodingFrequencies)
commonEncodings.emplace_back(frequency);
llvm::sort(commonEncodings,
[](const std::pair<compact_unwind_encoding_t, size_t> &a,
const std::pair<compact_unwind_encoding_t, size_t> &b) {
if (a.second == b.second)
// When frequencies match, secondarily sort on encoding
// to maintain parity with validate-unwind-info.py
return a.first > b.first;
return a.second > b.second;
});
// Truncate the vector to 127 elements.
// Common encoding indexes are limited to 0..126, while encoding
// indexes 127..255 are local to each second-level page
if (commonEncodings.size() > COMMON_ENCODINGS_MAX)
commonEncodings.resize(COMMON_ENCODINGS_MAX);
// Create a map from encoding to common-encoding-table index
for (size_t i = 0; i < commonEncodings.size(); i++)
commonEncodingIndexes[commonEncodings[i].first] = i;
// Split folded encodings into pages, where each page is limited by ...
// (a) 4 KiB capacity
// (b) 24-bit difference between first & final function address
// (c) 8-bit compact-encoding-table index,
// for which 0..126 references the global common-encodings table,
// and 127..255 references a local per-second-level-page table.
// First we try the compact format and determine how many entries fit.
// If more entries fit in the regular format, we use that.
for (size_t i = 0; i < cuIndices.size();) {
size_t idx = cuIndices[i];
secondLevelPages.emplace_back();
SecondLevelPage &page = secondLevelPages.back();
page.entryIndex = i;
uint64_t functionAddressMax =
cuEntries[idx].functionAddress + COMPRESSED_ENTRY_FUNC_OFFSET_MASK;
size_t n = commonEncodings.size();
size_t wordsRemaining =
SECOND_LEVEL_PAGE_WORDS -
sizeof(unwind_info_compressed_second_level_page_header) /
sizeof(uint32_t);
while (wordsRemaining >= 1 && i < cuIndices.size()) {
idx = cuIndices[i];
const CompactUnwindEntry *cuPtr = &cuEntries[idx];
if (cuPtr->functionAddress >= functionAddressMax) {
break;
} else if (commonEncodingIndexes.count(cuPtr->encoding) ||
page.localEncodingIndexes.count(cuPtr->encoding)) {
i++;
wordsRemaining--;
} else if (wordsRemaining >= 2 && n < COMPACT_ENCODINGS_MAX) {
page.localEncodings.emplace_back(cuPtr->encoding);
page.localEncodingIndexes[cuPtr->encoding] = n++;
i++;
wordsRemaining -= 2;
} else {
break;
}
}
page.entryCount = i - page.entryIndex;
// If this is not the final page, see if it's possible to fit more
// entries by using the regular format. This can happen when there
// are many unique encodings, and we we saturated the local
// encoding table early.
if (i < cuIndices.size() &&
page.entryCount < REGULAR_SECOND_LEVEL_ENTRIES_MAX) {
page.kind = UNWIND_SECOND_LEVEL_REGULAR;
page.entryCount = std::min(REGULAR_SECOND_LEVEL_ENTRIES_MAX,
cuIndices.size() - page.entryIndex);
i = page.entryIndex + page.entryCount;
} else {
page.kind = UNWIND_SECOND_LEVEL_COMPRESSED;
}
}
for (size_t idx : cuIndices) {
lsdaIndex[idx] = entriesWithLsda.size();
if (cuEntries[idx].lsda)
entriesWithLsda.push_back(idx);
}
// compute size of __TEXT,__unwind_info section
level2PagesOffset = sizeof(unwind_info_section_header) +
commonEncodings.size() * sizeof(uint32_t) +
personalities.size() * sizeof(uint32_t) +
// The extra second-level-page entry is for the sentinel
(secondLevelPages.size() + 1) *
sizeof(unwind_info_section_header_index_entry) +
entriesWithLsda.size() *
sizeof(unwind_info_section_header_lsda_index_entry);
unwindInfoSize =
level2PagesOffset + secondLevelPages.size() * SECOND_LEVEL_PAGE_BYTES;
}
// All inputs are relocated and output addresses are known, so write!
void UnwindInfoSectionImpl::writeTo(uint8_t *buf) const {
assert(!cuIndices.empty() && "call only if there is unwind info");
// section header
auto *uip = reinterpret_cast<unwind_info_section_header *>(buf);
uip->version = 1;
uip->commonEncodingsArraySectionOffset = sizeof(unwind_info_section_header);
uip->commonEncodingsArrayCount = commonEncodings.size();
uip->personalityArraySectionOffset =
uip->commonEncodingsArraySectionOffset +
(uip->commonEncodingsArrayCount * sizeof(uint32_t));
uip->personalityArrayCount = personalities.size();
uip->indexSectionOffset = uip->personalityArraySectionOffset +
(uip->personalityArrayCount * sizeof(uint32_t));
uip->indexCount = secondLevelPages.size() + 1;
// Common encodings
auto *i32p = reinterpret_cast<uint32_t *>(&uip[1]);
for (const auto &encoding : commonEncodings)
*i32p++ = encoding.first;
// Personalities
for (const Symbol *personality : personalities)
*i32p++ = personality->getGotVA() - in.header->addr;
// Level-1 index
uint32_t lsdaOffset =
uip->indexSectionOffset +
uip->indexCount * sizeof(unwind_info_section_header_index_entry);
uint64_t l2PagesOffset = level2PagesOffset;
auto *iep = reinterpret_cast<unwind_info_section_header_index_entry *>(i32p);
for (const SecondLevelPage &page : secondLevelPages) {
size_t idx = cuIndices[page.entryIndex];
iep->functionOffset = cuEntries[idx].functionAddress - in.header->addr;
iep->secondLevelPagesSectionOffset = l2PagesOffset;
iep->lsdaIndexArraySectionOffset =
lsdaOffset + lsdaIndex.lookup(idx) *
sizeof(unwind_info_section_header_lsda_index_entry);
iep++;
l2PagesOffset += SECOND_LEVEL_PAGE_BYTES;
}
// Level-1 sentinel
const CompactUnwindEntry &cuEnd = cuEntries[cuIndices.back()];
iep->functionOffset =
cuEnd.functionAddress - in.header->addr + cuEnd.functionLength;
iep->secondLevelPagesSectionOffset = 0;
iep->lsdaIndexArraySectionOffset =
lsdaOffset + entriesWithLsda.size() *
sizeof(unwind_info_section_header_lsda_index_entry);
iep++;
// LSDAs
auto *lep =
reinterpret_cast<unwind_info_section_header_lsda_index_entry *>(iep);
for (size_t idx : entriesWithLsda) {
const CompactUnwindEntry &cu = cuEntries[idx];
lep->lsdaOffset = cu.lsda->getVA(/*off=*/0) - in.header->addr;
lep->functionOffset = cu.functionAddress - in.header->addr;
lep++;
}
// Level-2 pages
auto *pp = reinterpret_cast<uint32_t *>(lep);
for (const SecondLevelPage &page : secondLevelPages) {
if (page.kind == UNWIND_SECOND_LEVEL_COMPRESSED) {
uintptr_t functionAddressBase =
cuEntries[cuIndices[page.entryIndex]].functionAddress;
auto *p2p =
reinterpret_cast<unwind_info_compressed_second_level_page_header *>(
pp);
p2p->kind = page.kind;
p2p->entryPageOffset =
sizeof(unwind_info_compressed_second_level_page_header);
p2p->entryCount = page.entryCount;
p2p->encodingsPageOffset =
p2p->entryPageOffset + p2p->entryCount * sizeof(uint32_t);
p2p->encodingsCount = page.localEncodings.size();
auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]);
for (size_t i = 0; i < page.entryCount; i++) {
const CompactUnwindEntry &cue =
cuEntries[cuIndices[page.entryIndex + i]];
auto it = commonEncodingIndexes.find(cue.encoding);
if (it == commonEncodingIndexes.end())
it = page.localEncodingIndexes.find(cue.encoding);
*ep++ = (it->second << COMPRESSED_ENTRY_FUNC_OFFSET_BITS) |
(cue.functionAddress - functionAddressBase);
}
if (!page.localEncodings.empty())
memcpy(ep, page.localEncodings.data(),
page.localEncodings.size() * sizeof(uint32_t));
} else {
auto *p2p =
reinterpret_cast<unwind_info_regular_second_level_page_header *>(pp);
p2p->kind = page.kind;
p2p->entryPageOffset =
sizeof(unwind_info_regular_second_level_page_header);
p2p->entryCount = page.entryCount;
auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]);
for (size_t i = 0; i < page.entryCount; i++) {
const CompactUnwindEntry &cue =
cuEntries[cuIndices[page.entryIndex + i]];
*ep++ = cue.functionAddress;
*ep++ = cue.encoding;
}
}
pp += SECOND_LEVEL_PAGE_WORDS;
}
}
UnwindInfoSection *macho::makeUnwindInfoSection() {
return make<UnwindInfoSectionImpl>();
}
diff --git a/lld/MachO/UnwindInfoSection.h b/lld/MachO/UnwindInfoSection.h
index c6b334731c75..f2bc3213a127 100644
--- a/lld/MachO/UnwindInfoSection.h
+++ b/lld/MachO/UnwindInfoSection.h
@@ -1,43 +1,43 @@
//===- UnwindInfoSection.h ------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLD_MACHO_UNWIND_INFO_H
#define LLD_MACHO_UNWIND_INFO_H
#include "ConcatOutputSection.h"
#include "SyntheticSections.h"
#include "llvm/ADT/MapVector.h"
#include "mach-o/compact_unwind_encoding.h"
namespace lld {
namespace macho {
class UnwindInfoSection : public SyntheticSection {
public:
// If all functions are free of unwind info, we can omit the unwind info
// section entirely.
bool isNeeded() const override { return !allEntriesAreOmitted; }
void addSymbol(const Defined *);
- virtual void prepareRelocations() = 0;
+ virtual void prepare() = 0;
protected:
UnwindInfoSection();
llvm::MapVector<std::pair<const InputSection *, uint64_t /*Defined::value*/>,
const Defined *>
symbols;
bool allEntriesAreOmitted = true;
};
UnwindInfoSection *makeUnwindInfoSection();
} // namespace macho
} // namespace lld
#endif
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index 3c44a60f4be2..ce9672dd0b4f 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -1,1245 +1,1245 @@
//===- Writer.cpp ---------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "Writer.h"
#include "ConcatOutputSection.h"
#include "Config.h"
#include "InputFiles.h"
#include "InputSection.h"
#include "MapFile.h"
#include "OutputSection.h"
#include "OutputSegment.h"
#include "SectionPriorities.h"
#include "SymbolTable.h"
#include "Symbols.h"
#include "SyntheticSections.h"
#include "Target.h"
#include "UnwindInfoSection.h"
#include "llvm/Support/Parallel.h"
#include "lld/Common/Arrays.h"
#include "lld/Common/CommonLinkerContext.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Parallel.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/ThreadPool.h"
#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/xxhash.h"
#include <algorithm>
using namespace llvm;
using namespace llvm::MachO;
using namespace llvm::sys;
using namespace lld;
using namespace lld::macho;
namespace {
class LCUuid;
class Writer {
public:
Writer() : buffer(errorHandler().outputBuffer) {}
void treatSpecialUndefineds();
void scanRelocations();
void scanSymbols();
template <class LP> void createOutputSections();
template <class LP> void createLoadCommands();
void finalizeAddresses();
void finalizeLinkEditSegment();
void assignAddresses(OutputSegment *);
void openFile();
void writeSections();
void writeUuid();
void writeCodeSignature();
void writeOutputFile();
template <class LP> void run();
ThreadPool threadPool;
std::unique_ptr<FileOutputBuffer> &buffer;
uint64_t addr = 0;
uint64_t fileOff = 0;
MachHeaderSection *header = nullptr;
StringTableSection *stringTableSection = nullptr;
SymtabSection *symtabSection = nullptr;
IndirectSymtabSection *indirectSymtabSection = nullptr;
CodeSignatureSection *codeSignatureSection = nullptr;
DataInCodeSection *dataInCodeSection = nullptr;
FunctionStartsSection *functionStartsSection = nullptr;
LCUuid *uuidCommand = nullptr;
OutputSegment *linkEditSegment = nullptr;
};
// LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information.
class LCDyldInfo final : public LoadCommand {
public:
LCDyldInfo(RebaseSection *rebaseSection, BindingSection *bindingSection,
WeakBindingSection *weakBindingSection,
LazyBindingSection *lazyBindingSection,
ExportSection *exportSection)
: rebaseSection(rebaseSection), bindingSection(bindingSection),
weakBindingSection(weakBindingSection),
lazyBindingSection(lazyBindingSection), exportSection(exportSection) {}
uint32_t getSize() const override { return sizeof(dyld_info_command); }
void writeTo(uint8_t *buf) const override {
auto *c = reinterpret_cast<dyld_info_command *>(buf);
c->cmd = LC_DYLD_INFO_ONLY;
c->cmdsize = getSize();
if (rebaseSection->isNeeded()) {
c->rebase_off = rebaseSection->fileOff;
c->rebase_size = rebaseSection->getFileSize();
}
if (bindingSection->isNeeded()) {
c->bind_off = bindingSection->fileOff;
c->bind_size = bindingSection->getFileSize();
}
if (weakBindingSection->isNeeded()) {
c->weak_bind_off = weakBindingSection->fileOff;
c->weak_bind_size = weakBindingSection->getFileSize();
}
if (lazyBindingSection->isNeeded()) {
c->lazy_bind_off = lazyBindingSection->fileOff;
c->lazy_bind_size = lazyBindingSection->getFileSize();
}
if (exportSection->isNeeded()) {
c->export_off = exportSection->fileOff;
c->export_size = exportSection->getFileSize();
}
}
RebaseSection *rebaseSection;
BindingSection *bindingSection;
WeakBindingSection *weakBindingSection;
LazyBindingSection *lazyBindingSection;
ExportSection *exportSection;
};
class LCSubFramework final : public LoadCommand {
public:
LCSubFramework(StringRef umbrella) : umbrella(umbrella) {}
uint32_t getSize() const override {
return alignTo(sizeof(sub_framework_command) + umbrella.size() + 1,
target->wordSize);
}
void writeTo(uint8_t *buf) const override {
auto *c = reinterpret_cast<sub_framework_command *>(buf);
buf += sizeof(sub_framework_command);
c->cmd = LC_SUB_FRAMEWORK;
c->cmdsize = getSize();
c->umbrella = sizeof(sub_framework_command);
memcpy(buf, umbrella.data(), umbrella.size());
buf[umbrella.size()] = '\0';
}
private:
const StringRef umbrella;
};
class LCFunctionStarts final : public LoadCommand {
public:
explicit LCFunctionStarts(FunctionStartsSection *functionStartsSection)
: functionStartsSection(functionStartsSection) {}
uint32_t getSize() const override { return sizeof(linkedit_data_command); }
void writeTo(uint8_t *buf) const override {
auto *c = reinterpret_cast<linkedit_data_command *>(buf);
c->cmd = LC_FUNCTION_STARTS;
c->cmdsize = getSize();
c->dataoff = functionStartsSection->fileOff;
c->datasize = functionStartsSection->getFileSize();
}
private:
FunctionStartsSection *functionStartsSection;
};
class LCDataInCode final : public LoadCommand {
public:
explicit LCDataInCode(DataInCodeSection *dataInCodeSection)
: dataInCodeSection(dataInCodeSection) {}
uint32_t getSize() const override { return sizeof(linkedit_data_command); }
void writeTo(uint8_t *buf) const override {
auto *c = reinterpret_cast<linkedit_data_command *>(buf);
c->cmd = LC_DATA_IN_CODE;
c->cmdsize = getSize();
c->dataoff = dataInCodeSection->fileOff;
c->datasize = dataInCodeSection->getFileSize();
}
private:
DataInCodeSection *dataInCodeSection;
};
class LCDysymtab final : public LoadCommand {
public:
LCDysymtab(SymtabSection *symtabSection,
IndirectSymtabSection *indirectSymtabSection)
: symtabSection(symtabSection),
indirectSymtabSection(indirectSymtabSection) {}
uint32_t getSize() const override { return sizeof(dysymtab_command); }
void writeTo(uint8_t *buf) const override {
auto *c = reinterpret_cast<dysymtab_command *>(buf);
c->cmd = LC_DYSYMTAB;
c->cmdsize = getSize();
c->ilocalsym = 0;
c->iextdefsym = c->nlocalsym = symtabSection->getNumLocalSymbols();
c->nextdefsym = symtabSection->getNumExternalSymbols();
c->iundefsym = c->iextdefsym + c->nextdefsym;
c->nundefsym = symtabSection->getNumUndefinedSymbols();
c->indirectsymoff = indirectSymtabSection->fileOff;
c->nindirectsyms = indirectSymtabSection->getNumSymbols();
}
SymtabSection *symtabSection;
IndirectSymtabSection *indirectSymtabSection;
};
template <class LP> class LCSegment final : public LoadCommand {
public:
LCSegment(StringRef name, OutputSegment *seg) : name(name), seg(seg) {}
uint32_t getSize() const override {
return sizeof(typename LP::segment_command) +
seg->numNonHiddenSections() * sizeof(typename LP::section);
}
void writeTo(uint8_t *buf) const override {
using SegmentCommand = typename LP::segment_command;
using SectionHeader = typename LP::section;
auto *c = reinterpret_cast<SegmentCommand *>(buf);
buf += sizeof(SegmentCommand);
c->cmd = LP::segmentLCType;
c->cmdsize = getSize();
memcpy(c->segname, name.data(), name.size());
c->fileoff = seg->fileOff;
c->maxprot = seg->maxProt;
c->initprot = seg->initProt;
c->vmaddr = seg->addr;
c->vmsize = seg->vmSize;
c->filesize = seg->fileSize;
c->nsects = seg->numNonHiddenSections();
for (const OutputSection *osec : seg->getSections()) {
if (osec->isHidden())
continue;
auto *sectHdr = reinterpret_cast<SectionHeader *>(buf);
buf += sizeof(SectionHeader);
memcpy(sectHdr->sectname, osec->name.data(), osec->name.size());
memcpy(sectHdr->segname, name.data(), name.size());
sectHdr->addr = osec->addr;
sectHdr->offset = osec->fileOff;
sectHdr->align = Log2_32(osec->align);
sectHdr->flags = osec->flags;
sectHdr->size = osec->getSize();
sectHdr->reserved1 = osec->reserved1;
sectHdr->reserved2 = osec->reserved2;
}
}
private:
StringRef name;
OutputSegment *seg;
};
class LCMain final : public LoadCommand {
uint32_t getSize() const override {
return sizeof(structs::entry_point_command);
}
void writeTo(uint8_t *buf) const override {
auto *c = reinterpret_cast<structs::entry_point_command *>(buf);
c->cmd = LC_MAIN;
c->cmdsize = getSize();
if (config->entry->isInStubs())
c->entryoff =
in.stubs->fileOff + config->entry->stubsIndex * target->stubSize;
else
c->entryoff = config->entry->getVA() - in.header->addr;
c->stacksize = 0;
}
};
class LCSymtab final : public LoadCommand {
public:
LCSymtab(SymtabSection *symtabSection, StringTableSection *stringTableSection)
: symtabSection(symtabSection), stringTableSection(stringTableSection) {}
uint32_t getSize() const override { return sizeof(symtab_command); }
void writeTo(uint8_t *buf) const override {
auto *c = reinterpret_cast<symtab_command *>(buf);
c->cmd = LC_SYMTAB;
c->cmdsize = getSize();
c->symoff = symtabSection->fileOff;
c->nsyms = symtabSection->getNumSymbols();
c->stroff = stringTableSection->fileOff;
c->strsize = stringTableSection->getFileSize();
}
SymtabSection *symtabSection = nullptr;
StringTableSection *stringTableSection = nullptr;
};
// There are several dylib load commands that share the same structure:
// * LC_LOAD_DYLIB
// * LC_ID_DYLIB
// * LC_REEXPORT_DYLIB
class LCDylib final : public LoadCommand {
public:
LCDylib(LoadCommandType type, StringRef path,
uint32_t compatibilityVersion = 0, uint32_t currentVersion = 0)
: type(type), path(path), compatibilityVersion(compatibilityVersion),
currentVersion(currentVersion) {
instanceCount++;
}
uint32_t getSize() const override {
return alignTo(sizeof(dylib_command) + path.size() + 1, 8);
}
void writeTo(uint8_t *buf) const override {
auto *c = reinterpret_cast<dylib_command *>(buf);
buf += sizeof(dylib_command);
c->cmd = type;
c->cmdsize = getSize();
c->dylib.name = sizeof(dylib_command);
c->dylib.timestamp = 0;
c->dylib.compatibility_version = compatibilityVersion;
c->dylib.current_version = currentVersion;
memcpy(buf, path.data(), path.size());
buf[path.size()] = '\0';
}
static uint32_t getInstanceCount() { return instanceCount; }
static void resetInstanceCount() { instanceCount = 0; }
private:
LoadCommandType type;
StringRef path;
uint32_t compatibilityVersion;
uint32_t currentVersion;
static uint32_t instanceCount;
};
uint32_t LCDylib::instanceCount = 0;
class LCLoadDylinker final : public LoadCommand {
public:
uint32_t getSize() const override {
return alignTo(sizeof(dylinker_command) + path.size() + 1, 8);
}
void writeTo(uint8_t *buf) const override {
auto *c = reinterpret_cast<dylinker_command *>(buf);
buf += sizeof(dylinker_command);
c->cmd = LC_LOAD_DYLINKER;
c->cmdsize = getSize();
c->name = sizeof(dylinker_command);
memcpy(buf, path.data(), path.size());
buf[path.size()] = '\0';
}
private:
// Recent versions of Darwin won't run any binary that has dyld at a
// different location.
const StringRef path = "/usr/lib/dyld";
};
class LCRPath final : public LoadCommand {
public:
explicit LCRPath(StringRef path) : path(path) {}
uint32_t getSize() const override {
return alignTo(sizeof(rpath_command) + path.size() + 1, target->wordSize);
}
void writeTo(uint8_t *buf) const override {
auto *c = reinterpret_cast<rpath_command *>(buf);
buf += sizeof(rpath_command);
c->cmd = LC_RPATH;
c->cmdsize = getSize();
c->path = sizeof(rpath_command);
memcpy(buf, path.data(), path.size());
buf[path.size()] = '\0';
}
private:
StringRef path;
};
class LCMinVersion final : public LoadCommand {
public:
explicit LCMinVersion(const PlatformInfo &platformInfo)
: platformInfo(platformInfo) {}
uint32_t getSize() const override { return sizeof(version_min_command); }
void writeTo(uint8_t *buf) const override {
auto *c = reinterpret_cast<version_min_command *>(buf);
switch (platformInfo.target.Platform) {
case PLATFORM_MACOS:
c->cmd = LC_VERSION_MIN_MACOSX;
break;
case PLATFORM_IOS:
case PLATFORM_IOSSIMULATOR:
c->cmd = LC_VERSION_MIN_IPHONEOS;
break;
case PLATFORM_TVOS:
case PLATFORM_TVOSSIMULATOR:
c->cmd = LC_VERSION_MIN_TVOS;
break;
case PLATFORM_WATCHOS:
case PLATFORM_WATCHOSSIMULATOR:
c->cmd = LC_VERSION_MIN_WATCHOS;
break;
default:
llvm_unreachable("invalid platform");
break;
}
c->cmdsize = getSize();
c->version = encodeVersion(platformInfo.minimum);
c->sdk = encodeVersion(platformInfo.sdk);
}
private:
const PlatformInfo &platformInfo;
};
class LCBuildVersion final : public LoadCommand {
public:
explicit LCBuildVersion(const PlatformInfo &platformInfo)
: platformInfo(platformInfo) {}
const int ntools = 1;
uint32_t getSize() const override {
return sizeof(build_version_command) + ntools * sizeof(build_tool_version);
}
void writeTo(uint8_t *buf) const override {
auto *c = reinterpret_cast<build_version_command *>(buf);
c->cmd = LC_BUILD_VERSION;
c->cmdsize = getSize();
c->platform = static_cast<uint32_t>(platformInfo.target.Platform);
c->minos = encodeVersion(platformInfo.minimum);
c->sdk = encodeVersion(platformInfo.sdk);
c->ntools = ntools;
auto *t = reinterpret_cast<build_tool_version *>(&c[1]);
t->tool = TOOL_LD;
t->version = encodeVersion(VersionTuple(
LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, LLVM_VERSION_PATCH));
}
private:
const PlatformInfo &platformInfo;
};
// Stores a unique identifier for the output file based on an MD5 hash of its
// contents. In order to hash the contents, we must first write them, but
// LC_UUID itself must be part of the written contents in order for all the
// offsets to be calculated correctly. We resolve this circular paradox by
// first writing an LC_UUID with an all-zero UUID, then updating the UUID with
// its real value later.
class LCUuid final : public LoadCommand {
public:
uint32_t getSize() const override { return sizeof(uuid_command); }
void writeTo(uint8_t *buf) const override {
auto *c = reinterpret_cast<uuid_command *>(buf);
c->cmd = LC_UUID;
c->cmdsize = getSize();
uuidBuf = c->uuid;
}
void writeUuid(uint64_t digest) const {
// xxhash only gives us 8 bytes, so put some fixed data in the other half.
static_assert(sizeof(uuid_command::uuid) == 16, "unexpected uuid size");
memcpy(uuidBuf, "LLD\xa1UU1D", 8);
memcpy(uuidBuf + 8, &digest, 8);
// RFC 4122 conformance. We need to fix 4 bits in byte 6 and 2 bits in
// byte 8. Byte 6 is already fine due to the fixed data we put in. We don't
// want to lose bits of the digest in byte 8, so swap that with a byte of
// fixed data that happens to have the right bits set.
std::swap(uuidBuf[3], uuidBuf[8]);
// Claim that this is an MD5-based hash. It isn't, but this signals that
// this is not a time-based and not a random hash. MD5 seems like the least
// bad lie we can put here.
assert((uuidBuf[6] & 0xf0) == 0x30 && "See RFC 4122 Sections 4.2.2, 4.1.3");
assert((uuidBuf[8] & 0xc0) == 0x80 && "See RFC 4122 Section 4.2.2");
}
mutable uint8_t *uuidBuf;
};
template <class LP> class LCEncryptionInfo final : public LoadCommand {
public:
uint32_t getSize() const override {
return sizeof(typename LP::encryption_info_command);
}
void writeTo(uint8_t *buf) const override {
using EncryptionInfo = typename LP::encryption_info_command;
auto *c = reinterpret_cast<EncryptionInfo *>(buf);
buf += sizeof(EncryptionInfo);
c->cmd = LP::encryptionInfoLCType;
c->cmdsize = getSize();
c->cryptoff = in.header->getSize();
auto it = find_if(outputSegments, [](const OutputSegment *seg) {
return seg->name == segment_names::text;
});
assert(it != outputSegments.end());
c->cryptsize = (*it)->fileSize - c->cryptoff;
}
};
class LCCodeSignature final : public LoadCommand {
public:
LCCodeSignature(CodeSignatureSection *section) : section(section) {}
uint32_t getSize() const override { return sizeof(linkedit_data_command); }
void writeTo(uint8_t *buf) const override {
auto *c = reinterpret_cast<linkedit_data_command *>(buf);
c->cmd = LC_CODE_SIGNATURE;
c->cmdsize = getSize();
c->dataoff = static_cast<uint32_t>(section->fileOff);
c->datasize = section->getSize();
}
CodeSignatureSection *section;
};
} // namespace
void Writer::treatSpecialUndefineds() {
if (config->entry)
if (auto *undefined = dyn_cast<Undefined>(config->entry))
treatUndefinedSymbol(*undefined, "the entry point");
// FIXME: This prints symbols that are undefined both in input files and
// via -u flag twice.
for (const Symbol *sym : config->explicitUndefineds) {
if (const auto *undefined = dyn_cast<Undefined>(sym))
treatUndefinedSymbol(*undefined, "-u");
}
// Literal exported-symbol names must be defined, but glob
// patterns need not match.
for (const CachedHashStringRef &cachedName :
config->exportedSymbols.literals) {
if (const Symbol *sym = symtab->find(cachedName))
if (const auto *undefined = dyn_cast<Undefined>(sym))
treatUndefinedSymbol(*undefined, "-exported_symbol(s_list)");
}
}
// Add stubs and bindings where necessary (e.g. if the symbol is a
// DylibSymbol.)
static void prepareBranchTarget(Symbol *sym) {
if (auto *dysym = dyn_cast<DylibSymbol>(sym)) {
if (in.stubs->addEntry(dysym)) {
if (sym->isWeakDef()) {
in.binding->addEntry(dysym, in.lazyPointers->isec,
sym->stubsIndex * target->wordSize);
in.weakBinding->addEntry(sym, in.lazyPointers->isec,
sym->stubsIndex * target->wordSize);
} else {
in.lazyBinding->addEntry(dysym);
}
}
} else if (auto *defined = dyn_cast<Defined>(sym)) {
if (defined->isExternalWeakDef()) {
if (in.stubs->addEntry(sym)) {
in.rebase->addEntry(in.lazyPointers->isec,
sym->stubsIndex * target->wordSize);
in.weakBinding->addEntry(sym, in.lazyPointers->isec,
sym->stubsIndex * target->wordSize);
}
} else if (defined->interposable) {
if (in.stubs->addEntry(sym))
in.lazyBinding->addEntry(sym);
}
} else {
llvm_unreachable("invalid branch target symbol type");
}
}
// Can a symbol's address can only be resolved at runtime?
static bool needsBinding(const Symbol *sym) {
if (isa<DylibSymbol>(sym))
return true;
if (const auto *defined = dyn_cast<Defined>(sym))
return defined->isExternalWeakDef() || defined->interposable;
return false;
}
static void prepareSymbolRelocation(Symbol *sym, const InputSection *isec,
const lld::macho::Reloc &r) {
assert(sym->isLive());
const RelocAttrs &relocAttrs = target->getRelocAttrs(r.type);
if (relocAttrs.hasAttr(RelocAttrBits::BRANCH)) {
prepareBranchTarget(sym);
} else if (relocAttrs.hasAttr(RelocAttrBits::GOT)) {
if (relocAttrs.hasAttr(RelocAttrBits::POINTER) || needsBinding(sym))
in.got->addEntry(sym);
} else if (relocAttrs.hasAttr(RelocAttrBits::TLV)) {
if (needsBinding(sym))
in.tlvPointers->addEntry(sym);
} else if (relocAttrs.hasAttr(RelocAttrBits::UNSIGNED)) {
// References from thread-local variable sections are treated as offsets
// relative to the start of the referent section, and therefore have no
// need of rebase opcodes.
if (!(isThreadLocalVariables(isec->getFlags()) && isa<Defined>(sym)))
addNonLazyBindingEntries(sym, isec, r.offset, r.addend);
}
}
void Writer::scanRelocations() {
TimeTraceScope timeScope("Scan relocations");
// This can't use a for-each loop: It calls treatUndefinedSymbol(), which can
// add to inputSections, which invalidates inputSections's iterators.
for (size_t i = 0; i < inputSections.size(); ++i) {
ConcatInputSection *isec = inputSections[i];
if (isec->shouldOmitFromOutput())
continue;
for (auto it = isec->relocs.begin(); it != isec->relocs.end(); ++it) {
lld::macho::Reloc &r = *it;
if (target->hasAttr(r.type, RelocAttrBits::SUBTRAHEND)) {
// Skip over the following UNSIGNED relocation -- it's just there as the
// minuend, and doesn't have the usual UNSIGNED semantics. We don't want
// to emit rebase opcodes for it.
it++;
continue;
}
if (auto *sym = r.referent.dyn_cast<Symbol *>()) {
if (auto *undefined = dyn_cast<Undefined>(sym))
treatUndefinedSymbol(*undefined, isec, r.offset);
// treatUndefinedSymbol() can replace sym with a DylibSymbol; re-check.
if (!isa<Undefined>(sym) && validateSymbolRelocation(sym, isec, r))
prepareSymbolRelocation(sym, isec, r);
} else {
// Canonicalize the referent so that later accesses in Writer won't
// have to worry about it. Perhaps we should do this for Defined::isec
// too...
auto *referentIsec = r.referent.get<InputSection *>();
r.referent = referentIsec->canonical();
if (!r.pcrel)
in.rebase->addEntry(isec, r.offset);
}
}
}
- in.unwindInfo->prepareRelocations();
+ in.unwindInfo->prepare();
}
void Writer::scanSymbols() {
TimeTraceScope timeScope("Scan symbols");
for (Symbol *sym : symtab->getSymbols()) {
if (auto *defined = dyn_cast<Defined>(sym)) {
if (!defined->isLive())
continue;
defined->canonicalize();
if (defined->overridesWeakDef)
in.weakBinding->addNonWeakDefinition(defined);
if (!defined->isAbsolute() && isCodeSection(defined->isec))
in.unwindInfo->addSymbol(defined);
} else if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) {
// This branch intentionally doesn't check isLive().
if (dysym->isDynamicLookup())
continue;
dysym->getFile()->refState =
std::max(dysym->getFile()->refState, dysym->getRefState());
}
}
for (const InputFile *file : inputFiles) {
if (auto *objFile = dyn_cast<ObjFile>(file))
for (Symbol *sym : objFile->symbols) {
if (auto *defined = dyn_cast_or_null<Defined>(sym)) {
if (!defined->isLive())
continue;
defined->canonicalize();
if (!defined->isExternal() && !defined->isAbsolute() &&
isCodeSection(defined->isec))
in.unwindInfo->addSymbol(defined);
}
}
}
}
// TODO: ld64 enforces the old load commands in a few other cases.
static bool useLCBuildVersion(const PlatformInfo &platformInfo) {
static const std::vector<std::pair<PlatformType, VersionTuple>> minVersion = {
{PLATFORM_MACOS, VersionTuple(10, 14)},
{PLATFORM_IOS, VersionTuple(12, 0)},
{PLATFORM_IOSSIMULATOR, VersionTuple(13, 0)},
{PLATFORM_TVOS, VersionTuple(12, 0)},
{PLATFORM_TVOSSIMULATOR, VersionTuple(13, 0)},
{PLATFORM_WATCHOS, VersionTuple(5, 0)},
{PLATFORM_WATCHOSSIMULATOR, VersionTuple(6, 0)}};
auto it = llvm::find_if(minVersion, [&](const auto &p) {
return p.first == platformInfo.target.Platform;
});
return it == minVersion.end() ? true : platformInfo.minimum >= it->second;
}
template <class LP> void Writer::createLoadCommands() {
uint8_t segIndex = 0;
for (OutputSegment *seg : outputSegments) {
in.header->addLoadCommand(make<LCSegment<LP>>(seg->name, seg));
seg->index = segIndex++;
}
in.header->addLoadCommand(make<LCDyldInfo>(
in.rebase, in.binding, in.weakBinding, in.lazyBinding, in.exports));
in.header->addLoadCommand(make<LCSymtab>(symtabSection, stringTableSection));
in.header->addLoadCommand(
make<LCDysymtab>(symtabSection, indirectSymtabSection));
if (!config->umbrella.empty())
in.header->addLoadCommand(make<LCSubFramework>(config->umbrella));
if (config->emitEncryptionInfo)
in.header->addLoadCommand(make<LCEncryptionInfo<LP>>());
for (StringRef path : config->runtimePaths)
in.header->addLoadCommand(make<LCRPath>(path));
switch (config->outputType) {
case MH_EXECUTE:
in.header->addLoadCommand(make<LCLoadDylinker>());
break;
case MH_DYLIB:
in.header->addLoadCommand(make<LCDylib>(LC_ID_DYLIB, config->installName,
config->dylibCompatibilityVersion,
config->dylibCurrentVersion));
break;
case MH_BUNDLE:
break;
default:
llvm_unreachable("unhandled output file type");
}
uuidCommand = make<LCUuid>();
in.header->addLoadCommand(uuidCommand);
if (useLCBuildVersion(config->platformInfo))
in.header->addLoadCommand(make<LCBuildVersion>(config->platformInfo));
else
in.header->addLoadCommand(make<LCMinVersion>(config->platformInfo));
if (config->secondaryPlatformInfo) {
in.header->addLoadCommand(
make<LCBuildVersion>(*config->secondaryPlatformInfo));
}
// This is down here to match ld64's load command order.
if (config->outputType == MH_EXECUTE)
in.header->addLoadCommand(make<LCMain>());
// See ld64's OutputFile::buildDylibOrdinalMapping for the corresponding
// library ordinal computation code in ld64.
int64_t dylibOrdinal = 1;
DenseMap<StringRef, int64_t> ordinalForInstallName;
std::vector<DylibFile *> dylibFiles;
for (InputFile *file : inputFiles) {
if (auto *dylibFile = dyn_cast<DylibFile>(file))
dylibFiles.push_back(dylibFile);
}
for (size_t i = 0; i < dylibFiles.size(); ++i)
dylibFiles.insert(dylibFiles.end(), dylibFiles[i]->extraDylibs.begin(),
dylibFiles[i]->extraDylibs.end());
for (DylibFile *dylibFile : dylibFiles) {
if (dylibFile->isBundleLoader) {
dylibFile->ordinal = BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE;
// Shortcut since bundle-loader does not re-export the symbols.
dylibFile->reexport = false;
continue;
}
// Don't emit load commands for a dylib that is not referenced if:
// - it was added implicitly (via a reexport, an LC_LOAD_DYLINKER --
// if it's on the linker command line, it's explicit)
// - or it's marked MH_DEAD_STRIPPABLE_DYLIB
// - or the flag -dead_strip_dylibs is used
// FIXME: `isReferenced()` is currently computed before dead code
// stripping, so references from dead code keep a dylib alive. This
// matches ld64, but it's something we should do better.
if (!dylibFile->isReferenced() && !dylibFile->forceNeeded &&
(!dylibFile->isExplicitlyLinked() || dylibFile->deadStrippable ||
config->deadStripDylibs))
continue;
// Several DylibFiles can have the same installName. Only emit a single
// load command for that installName and give all these DylibFiles the
// same ordinal.
// This can happen in several cases:
// - a new framework could change its installName to an older
// framework name via an $ld$ symbol depending on platform_version
// - symlinks (for example, libpthread.tbd is a symlink to libSystem.tbd;
// Foo.framework/Foo.tbd is usually a symlink to
// Foo.framework/Versions/Current/Foo.tbd, where
// Foo.framework/Versions/Current is usually a symlink to
// Foo.framework/Versions/A)
// - a framework can be linked both explicitly on the linker
// command line and implicitly as a reexport from a different
// framework. The re-export will usually point to the tbd file
// in Foo.framework/Versions/A/Foo.tbd, while the explicit link will
// usually find Foo.framework/Foo.tbd. These are usually symlinks,
// but in a --reproduce archive they will be identical but distinct
// files.
// In the first case, *semantically distinct* DylibFiles will have the
// same installName.
int64_t &ordinal = ordinalForInstallName[dylibFile->installName];
if (ordinal) {
dylibFile->ordinal = ordinal;
continue;
}
ordinal = dylibFile->ordinal = dylibOrdinal++;
LoadCommandType lcType =
dylibFile->forceWeakImport || dylibFile->refState == RefState::Weak
? LC_LOAD_WEAK_DYLIB
: LC_LOAD_DYLIB;
in.header->addLoadCommand(make<LCDylib>(lcType, dylibFile->installName,
dylibFile->compatibilityVersion,
dylibFile->currentVersion));
if (dylibFile->reexport)
in.header->addLoadCommand(
make<LCDylib>(LC_REEXPORT_DYLIB, dylibFile->installName));
}
if (functionStartsSection)
in.header->addLoadCommand(make<LCFunctionStarts>(functionStartsSection));
if (dataInCodeSection)
in.header->addLoadCommand(make<LCDataInCode>(dataInCodeSection));
if (codeSignatureSection)
in.header->addLoadCommand(make<LCCodeSignature>(codeSignatureSection));
const uint32_t MACOS_MAXPATHLEN = 1024;
config->headerPad = std::max(
config->headerPad, (config->headerPadMaxInstallNames
? LCDylib::getInstanceCount() * MACOS_MAXPATHLEN
: 0));
}
// Sorting only can happen once all outputs have been collected. Here we sort
// segments, output sections within each segment, and input sections within each
// output segment.
static void sortSegmentsAndSections() {
TimeTraceScope timeScope("Sort segments and sections");
sortOutputSegments();
DenseMap<const InputSection *, size_t> isecPriorities =
priorityBuilder.buildInputSectionPriorities();
uint32_t sectionIndex = 0;
for (OutputSegment *seg : outputSegments) {
seg->sortOutputSections();
// References from thread-local variable sections are treated as offsets
// relative to the start of the thread-local data memory area, which
// is initialized via copying all the TLV data sections (which are all
// contiguous). If later data sections require a greater alignment than
// earlier ones, the offsets of data within those sections won't be
// guaranteed to aligned unless we normalize alignments. We therefore use
// the largest alignment for all TLV data sections.
uint32_t tlvAlign = 0;
for (const OutputSection *osec : seg->getSections())
if (isThreadLocalData(osec->flags) && osec->align > tlvAlign)
tlvAlign = osec->align;
for (OutputSection *osec : seg->getSections()) {
// Now that the output sections are sorted, assign the final
// output section indices.
if (!osec->isHidden())
osec->index = ++sectionIndex;
if (isThreadLocalData(osec->flags)) {
if (!firstTLVDataSection)
firstTLVDataSection = osec;
osec->align = tlvAlign;
}
if (!isecPriorities.empty()) {
if (auto *merged = dyn_cast<ConcatOutputSection>(osec)) {
llvm::stable_sort(merged->inputs,
[&](InputSection *a, InputSection *b) {
return isecPriorities[a] > isecPriorities[b];
});
}
}
}
}
}
template <class LP> void Writer::createOutputSections() {
TimeTraceScope timeScope("Create output sections");
// First, create hidden sections
stringTableSection = make<StringTableSection>();
symtabSection = makeSymtabSection<LP>(*stringTableSection);
indirectSymtabSection = make<IndirectSymtabSection>();
if (config->adhocCodesign)
codeSignatureSection = make<CodeSignatureSection>();
if (config->emitDataInCodeInfo)
dataInCodeSection = make<DataInCodeSection>();
if (config->emitFunctionStarts)
functionStartsSection = make<FunctionStartsSection>();
if (config->emitBitcodeBundle)
make<BitcodeBundleSection>();
switch (config->outputType) {
case MH_EXECUTE:
make<PageZeroSection>();
break;
case MH_DYLIB:
case MH_BUNDLE:
break;
default:
llvm_unreachable("unhandled output file type");
}
// Then add input sections to output sections.
for (ConcatInputSection *isec : inputSections) {
if (isec->shouldOmitFromOutput())
continue;
ConcatOutputSection *osec = cast<ConcatOutputSection>(isec->parent);
osec->addInput(isec);
osec->inputOrder =
std::min(osec->inputOrder, static_cast<int>(isec->outSecOff));
}
// Once all the inputs are added, we can finalize the output section
// properties and create the corresponding output segments.
for (const auto &it : concatOutputSections) {
StringRef segname = it.first.first;
ConcatOutputSection *osec = it.second;
assert(segname != segment_names::ld);
if (osec->isNeeded()) {
// See comment in ObjFile::splitEhFrames()
if (osec->name == section_names::ehFrame &&
segname == segment_names::text)
osec->align = target->wordSize;
getOrCreateOutputSegment(segname)->addOutputSection(osec);
}
}
for (SyntheticSection *ssec : syntheticSections) {
auto it = concatOutputSections.find({ssec->segname, ssec->name});
// We add all LinkEdit sections here because we don't know if they are
// needed until their finalizeContents() methods get called later. While
// this means that we add some redundant sections to __LINKEDIT, there is
// is no redundancy in the output, as we do not emit section headers for
// any LinkEdit sections.
if (ssec->isNeeded() || ssec->segname == segment_names::linkEdit) {
if (it == concatOutputSections.end()) {
getOrCreateOutputSegment(ssec->segname)->addOutputSection(ssec);
} else {
fatal("section from " +
toString(it->second->firstSection()->getFile()) +
" conflicts with synthetic section " + ssec->segname + "," +
ssec->name);
}
}
}
// dyld requires __LINKEDIT segment to always exist (even if empty).
linkEditSegment = getOrCreateOutputSegment(segment_names::linkEdit);
}
void Writer::finalizeAddresses() {
TimeTraceScope timeScope("Finalize addresses");
uint64_t pageSize = target->getPageSize();
// We could parallelize this loop, but local benchmarking indicates it is
// faster to do it all in the main thread.
for (OutputSegment *seg : outputSegments) {
if (seg == linkEditSegment)
continue;
for (OutputSection *osec : seg->getSections()) {
if (!osec->isNeeded())
continue;
// Other kinds of OutputSections have already been finalized.
if (auto concatOsec = dyn_cast<ConcatOutputSection>(osec))
concatOsec->finalizeContents();
}
}
// Ensure that segments (and the sections they contain) are allocated
// addresses in ascending order, which dyld requires.
//
// Note that at this point, __LINKEDIT sections are empty, but we need to
// determine addresses of other segments/sections before generating its
// contents.
for (OutputSegment *seg : outputSegments) {
if (seg == linkEditSegment)
continue;
seg->addr = addr;
assignAddresses(seg);
// codesign / libstuff checks for segment ordering by verifying that
// `fileOff + fileSize == next segment fileOff`. So we call alignTo() before
// (instead of after) computing fileSize to ensure that the segments are
// contiguous. We handle addr / vmSize similarly for the same reason.
fileOff = alignTo(fileOff, pageSize);
addr = alignTo(addr, pageSize);
seg->vmSize = addr - seg->addr;
seg->fileSize = fileOff - seg->fileOff;
seg->assignAddressesToStartEndSymbols();
}
}
void Writer::finalizeLinkEditSegment() {
TimeTraceScope timeScope("Finalize __LINKEDIT segment");
// Fill __LINKEDIT contents.
std::vector<LinkEditSection *> linkEditSections{
in.rebase,
in.binding,
in.weakBinding,
in.lazyBinding,
in.exports,
symtabSection,
indirectSymtabSection,
dataInCodeSection,
functionStartsSection,
};
SmallVector<std::shared_future<void>> threadFutures;
threadFutures.reserve(linkEditSections.size());
for (LinkEditSection *osec : linkEditSections)
if (osec)
threadFutures.emplace_back(threadPool.async(
[](LinkEditSection *osec) { osec->finalizeContents(); }, osec));
for (std::shared_future<void> &future : threadFutures)
future.wait();
// Now that __LINKEDIT is filled out, do a proper calculation of its
// addresses and offsets.
linkEditSegment->addr = addr;
assignAddresses(linkEditSegment);
// No need to page-align fileOff / addr here since this is the last segment.
linkEditSegment->vmSize = addr - linkEditSegment->addr;
linkEditSegment->fileSize = fileOff - linkEditSegment->fileOff;
}
void Writer::assignAddresses(OutputSegment *seg) {
seg->fileOff = fileOff;
for (OutputSection *osec : seg->getSections()) {
if (!osec->isNeeded())
continue;
addr = alignTo(addr, osec->align);
fileOff = alignTo(fileOff, osec->align);
osec->addr = addr;
osec->fileOff = isZeroFill(osec->flags) ? 0 : fileOff;
osec->finalize();
osec->assignAddressesToStartEndSymbols();
addr += osec->getSize();
fileOff += osec->getFileSize();
}
}
void Writer::openFile() {
Expected<std::unique_ptr<FileOutputBuffer>> bufferOrErr =
FileOutputBuffer::create(config->outputFile, fileOff,
FileOutputBuffer::F_executable);
if (!bufferOrErr)
fatal("failed to open " + config->outputFile + ": " +
llvm::toString(bufferOrErr.takeError()));
buffer = std::move(*bufferOrErr);
in.bufferStart = buffer->getBufferStart();
}
void Writer::writeSections() {
uint8_t *buf = buffer->getBufferStart();
std::vector<const OutputSection *> osecs;
for (const OutputSegment *seg : outputSegments)
append_range(osecs, seg->getSections());
parallelForEach(osecs.begin(), osecs.end(), [&](const OutputSection *osec) {
osec->writeTo(buf + osec->fileOff);
});
}
// In order to utilize multiple cores, we first split the buffer into chunks,
// compute a hash for each chunk, and then compute a hash value of the hash
// values.
void Writer::writeUuid() {
TimeTraceScope timeScope("Computing UUID");
ArrayRef<uint8_t> data{buffer->getBufferStart(), buffer->getBufferEnd()};
unsigned chunkCount = parallel::strategy.compute_thread_count() * 10;
// Round-up integer division
size_t chunkSize = (data.size() + chunkCount - 1) / chunkCount;
std::vector<ArrayRef<uint8_t>> chunks = split(data, chunkSize);
// Leave one slot for filename
std::vector<uint64_t> hashes(chunks.size() + 1);
SmallVector<std::shared_future<void>> threadFutures;
threadFutures.reserve(chunks.size());
for (size_t i = 0; i < chunks.size(); ++i)
threadFutures.emplace_back(threadPool.async(
[&](size_t j) { hashes[j] = xxHash64(chunks[j]); }, i));
for (std::shared_future<void> &future : threadFutures)
future.wait();
// Append the output filename so that identical binaries with different names
// don't get the same UUID.
hashes[chunks.size()] = xxHash64(sys::path::filename(config->finalOutput));
uint64_t digest = xxHash64({reinterpret_cast<uint8_t *>(hashes.data()),
hashes.size() * sizeof(uint64_t)});
uuidCommand->writeUuid(digest);
}
void Writer::writeCodeSignature() {
if (codeSignatureSection) {
TimeTraceScope timeScope("Write code signature");
codeSignatureSection->writeHashes(buffer->getBufferStart());
}
}
void Writer::writeOutputFile() {
TimeTraceScope timeScope("Write output file");
openFile();
reportPendingUndefinedSymbols();
if (errorCount())
return;
writeSections();
writeUuid();
writeCodeSignature();
if (auto e = buffer->commit())
error("failed to write to the output file: " + toString(std::move(e)));
}
template <class LP> void Writer::run() {
treatSpecialUndefineds();
if (config->entry && !isa<Undefined>(config->entry))
prepareBranchTarget(config->entry);
// Canonicalization of all pointers to InputSections should be handled by
// these two scan* methods. I.e. from this point onward, for all live
// InputSections, we should have `isec->canonical() == isec`.
scanSymbols();
scanRelocations();
// Do not proceed if there was an undefined symbol.
reportPendingUndefinedSymbols();
if (errorCount())
return;
if (in.stubHelper->isNeeded())
in.stubHelper->setup();
if (in.objCImageInfo->isNeeded())
in.objCImageInfo->finalizeContents();
// At this point, we should know exactly which output sections are needed,
// courtesy of scanSymbols() and scanRelocations().
createOutputSections<LP>();
// After this point, we create no new segments; HOWEVER, we might
// yet create branch-range extension thunks for architectures whose
// hardware call instructions have limited range, e.g., ARM(64).
// The thunks are created as InputSections interspersed among
// the ordinary __TEXT,_text InputSections.
sortSegmentsAndSections();
createLoadCommands<LP>();
finalizeAddresses();
threadPool.async([&] {
if (LLVM_ENABLE_THREADS && config->timeTraceEnabled)
timeTraceProfilerInitialize(config->timeTraceGranularity, "writeMapFile");
writeMapFile();
if (LLVM_ENABLE_THREADS && config->timeTraceEnabled)
timeTraceProfilerFinishThread();
});
finalizeLinkEditSegment();
writeOutputFile();
}
template <class LP> void macho::writeResult() { Writer().run<LP>(); }
void macho::resetWriter() { LCDylib::resetInstanceCount(); }
void macho::createSyntheticSections() {
in.header = make<MachHeaderSection>();
if (config->dedupLiterals)
in.cStringSection = make<DeduplicatedCStringSection>();
else
in.cStringSection = make<CStringSection>();
in.wordLiteralSection =
config->dedupLiterals ? make<WordLiteralSection>() : nullptr;
in.rebase = make<RebaseSection>();
in.binding = make<BindingSection>();
in.weakBinding = make<WeakBindingSection>();
in.lazyBinding = make<LazyBindingSection>();
in.exports = make<ExportSection>();
in.got = make<GotSection>();
in.tlvPointers = make<TlvPointerSection>();
in.lazyPointers = make<LazyPointerSection>();
in.stubs = make<StubsSection>();
in.stubHelper = make<StubHelperSection>();
in.unwindInfo = makeUnwindInfoSection();
in.objCImageInfo = make<ObjCImageInfoSection>();
// This section contains space for just a single word, and will be used by
// dyld to cache an address to the image loader it uses.
uint8_t *arr = bAlloc().Allocate<uint8_t>(target->wordSize);
memset(arr, 0, target->wordSize);
in.imageLoaderCache = makeSyntheticInputSection(
segment_names::data, section_names::data, S_REGULAR,
ArrayRef<uint8_t>{arr, target->wordSize},
/*align=*/target->wordSize);
// References from dyld are not visible to us, so ensure this section is
// always treated as live.
in.imageLoaderCache->live = true;
}
OutputSection *macho::firstTLVDataSection = nullptr;
template void macho::writeResult<LP64>();
template void macho::writeResult<ILP32>();
diff --git a/lldb/bindings/interfaces.swig b/lldb/bindings/interfaces.swig
index c9a6d0f06056..021c7683d170 100644
--- a/lldb/bindings/interfaces.swig
+++ b/lldb/bindings/interfaces.swig
@@ -1,84 +1,81 @@
/* Various liblldb typedefs that SWIG needs to know about. */
#define __extension__ /* Undefine GCC keyword to make Swig happy when processing glibc's stdint.h. */
-/* The ISO C99 standard specifies that in C++ implementations limit macros such
- as INT32_MAX should only be defined if __STDC_LIMIT_MACROS is. */
-#define __STDC_LIMIT_MACROS
%include "stdint.i"
%include "lldb/lldb-defines.h"
%include "lldb/lldb-enumerations.h"
%include "lldb/lldb-forward.h"
%include "lldb/lldb-types.h"
/* Forward declaration of SB classes. */
%include "lldb/API/SBDefines.h"
/* Python interface files with docstrings. */
%include "./interface/SBAddress.i"
%include "./interface/SBAttachInfo.i"
%include "./interface/SBBlock.i"
%include "./interface/SBBreakpoint.i"
%include "./interface/SBBreakpointLocation.i"
%include "./interface/SBBreakpointName.i"
%include "./interface/SBBroadcaster.i"
%include "./interface/SBCommandInterpreter.i"
%include "./interface/SBCommandInterpreterRunOptions.i"
%include "./interface/SBCommandReturnObject.i"
%include "./interface/SBCommunication.i"
%include "./interface/SBCompileUnit.i"
%include "./interface/SBData.i"
%include "./interface/SBDebugger.i"
%include "./interface/SBDeclaration.i"
%include "./interface/SBError.i"
%include "./interface/SBEnvironment.i"
%include "./interface/SBEvent.i"
%include "./interface/SBExecutionContext.i"
%include "./interface/SBExpressionOptions.i"
%include "./interface/SBFile.i"
%include "./interface/SBFileSpec.i"
%include "./interface/SBFileSpecList.i"
%include "./interface/SBFrame.i"
%include "./interface/SBFunction.i"
%include "./interface/SBHostOS.i"
%include "./interface/SBInstruction.i"
%include "./interface/SBInstructionList.i"
%include "./interface/SBLanguageRuntime.i"
%include "./interface/SBLaunchInfo.i"
%include "./interface/SBLineEntry.i"
%include "./interface/SBListener.i"
%include "./interface/SBMemoryRegionInfo.i"
%include "./interface/SBMemoryRegionInfoList.i"
%include "./interface/SBModule.i"
%include "./interface/SBModuleSpec.i"
%include "./interface/SBPlatform.i"
%include "./interface/SBProcess.i"
%include "./interface/SBProcessInfo.i"
%include "./interface/SBQueue.i"
%include "./interface/SBQueueItem.i"
%include "./interface/SBReproducer.i"
%include "./interface/SBSection.i"
%include "./interface/SBSourceManager.i"
%include "./interface/SBStream.i"
%include "./interface/SBStringList.i"
%include "./interface/SBStructuredData.i"
%include "./interface/SBSymbol.i"
%include "./interface/SBSymbolContext.i"
%include "./interface/SBSymbolContextList.i"
%include "./interface/SBTarget.i"
%include "./interface/SBThread.i"
%include "./interface/SBThreadCollection.i"
%include "./interface/SBThreadPlan.i"
%include "./interface/SBTrace.i"
%include "./interface/SBType.i"
%include "./interface/SBTypeCategory.i"
%include "./interface/SBTypeEnumMember.i"
%include "./interface/SBTypeFilter.i"
%include "./interface/SBTypeFormat.i"
%include "./interface/SBTypeNameSpecifier.i"
%include "./interface/SBTypeSummary.i"
%include "./interface/SBTypeSynthetic.i"
%include "./interface/SBUnixSignals.i"
%include "./interface/SBValue.i"
%include "./interface/SBValueList.i"
%include "./interface/SBVariablesOptions.i"
%include "./interface/SBWatchpoint.i"
diff --git a/lldb/bindings/python/python-typemaps.swig b/lldb/bindings/python/python-typemaps.swig
index bf3de66b91bf..d45431c771ca 100644
--- a/lldb/bindings/python/python-typemaps.swig
+++ b/lldb/bindings/python/python-typemaps.swig
@@ -1,517 +1,517 @@
/* Typemap definitions, to allow SWIG to properly handle 'char**' data types. */
%inline %{
#include "../bindings/python/python-typemaps.h"
%}
%typemap(in) char ** {
/* Check if is a list */
if (PythonList::Check($input)) {
PythonList list(PyRefType::Borrowed, $input);
int size = list.GetSize();
int i = 0;
$1 = (char **)malloc((size + 1) * sizeof(char *));
for (i = 0; i < size; i++) {
PythonString py_str = list.GetItemAtIndex(i).AsType<PythonString>();
if (!py_str.IsAllocated()) {
PyErr_SetString(PyExc_TypeError, "list must contain strings");
free($1);
return nullptr;
}
$1[i] = const_cast<char *>(py_str.GetString().data());
}
$1[i] = 0;
} else if ($input == Py_None) {
$1 = NULL;
} else {
PyErr_SetString(PyExc_TypeError, "not a list");
return NULL;
}
}
%typemap(typecheck) char ** {
/* Check if is a list */
$1 = 1;
if (PythonList::Check($input)) {
PythonList list(PyRefType::Borrowed, $input);
int size = list.GetSize();
int i = 0;
for (i = 0; i < size; i++) {
PythonString s = list.GetItemAtIndex(i).AsType<PythonString>();
if (!s.IsAllocated()) {
$1 = 0;
}
}
} else {
$1 = (($input == Py_None) ? 1 : 0);
}
}
%typemap(freearg) char** {
free((char *) $1);
}
%typemap(out) char** {
int len;
int i;
len = 0;
while ($1[len])
len++;
PythonList list(len);
for (i = 0; i < len; i++)
list.SetItemAtIndex(i, PythonString($1[i]));
$result = list.release();
}
%typemap(in) lldb::tid_t {
PythonObject obj = Retain<PythonObject>($input);
lldb::tid_t value = unwrapOrSetPythonException(As<unsigned long long>(obj));
if (PyErr_Occurred())
return nullptr;
$1 = value;
}
%typemap(in) lldb::StateType {
PythonObject obj = Retain<PythonObject>($input);
unsigned long long state_type_value =
unwrapOrSetPythonException(As<unsigned long long>(obj));
if (PyErr_Occurred())
return nullptr;
if (state_type_value > lldb::StateType::kLastStateType) {
PyErr_SetString(PyExc_ValueError, "Not a valid StateType value");
return nullptr;
}
$1 = static_cast<lldb::StateType>(state_type_value);
}
/* Typemap definitions to allow SWIG to properly handle char buffer. */
// typemap for a char buffer
%typemap(in) (char *dst, size_t dst_len) {
if (!PyInt_Check($input)) {
PyErr_SetString(PyExc_ValueError, "Expecting an integer");
return NULL;
}
$2 = PyInt_AsLong($input);
if ($2 <= 0) {
PyErr_SetString(PyExc_ValueError, "Positive integer expected");
return NULL;
}
$1 = (char *)malloc($2);
}
// SBProcess::ReadCStringFromMemory() uses a void*, but needs to be treated
// as char data instead of byte data.
%typemap(in) (void *char_buf, size_t size) = (char *dst, size_t dst_len);
// Return the char buffer. Discarding any previous return result
%typemap(argout) (char *dst, size_t dst_len) {
Py_XDECREF($result); /* Blow away any previous result */
if (result == 0) {
PythonString string("");
$result = string.release();
Py_INCREF($result);
} else {
llvm::StringRef ref(static_cast<const char *>($1), result);
PythonString string(ref);
$result = string.release();
}
free($1);
}
// SBProcess::ReadCStringFromMemory() uses a void*, but needs to be treated
// as char data instead of byte data.
%typemap(argout) (void *char_buf, size_t size) = (char *dst, size_t dst_len);
// typemap for handling an snprintf-like API like SBThread::GetStopDescription.
%typemap(in) (char *dst_or_null, size_t dst_len) {
if (!PyInt_Check($input)) {
PyErr_SetString(PyExc_ValueError, "Expecting an integer");
return NULL;
}
$2 = PyInt_AsLong($input);
if ($2 <= 0) {
PyErr_SetString(PyExc_ValueError, "Positive integer expected");
return NULL;
}
$1 = (char *)malloc($2);
}
%typemap(argout) (char *dst_or_null, size_t dst_len) {
Py_XDECREF($result); /* Blow away any previous result */
llvm::StringRef ref($1);
PythonString string(ref);
$result = string.release();
free($1);
}
// typemap for an outgoing buffer
// See also SBEvent::SBEvent(uint32_t event, const char *cstr, uint32_t cstr_len).
// Ditto for SBProcess::PutSTDIN(const char *src, size_t src_len).
%typemap(in) (const char *cstr, uint32_t cstr_len),
(const char *src, size_t src_len) {
if (PythonString::Check($input)) {
PythonString str(PyRefType::Borrowed, $input);
$1 = (char *)str.GetString().data();
$2 = str.GetSize();
} else if (PythonByteArray::Check($input)) {
PythonByteArray bytearray(PyRefType::Borrowed, $input);
$1 = (char *)bytearray.GetBytes().data();
$2 = bytearray.GetSize();
} else if (PythonBytes::Check($input)) {
PythonBytes bytes(PyRefType::Borrowed, $input);
$1 = (char *)bytes.GetBytes().data();
$2 = bytes.GetSize();
} else {
PyErr_SetString(PyExc_ValueError, "Expecting a string");
return NULL;
}
}
// For SBProcess::WriteMemory, SBTarget::GetInstructions and SBDebugger::DispatchInput.
%typemap(in) (const void *buf, size_t size),
(const void *data, size_t data_len) {
if (PythonString::Check($input)) {
PythonString str(PyRefType::Borrowed, $input);
$1 = (void *)str.GetString().data();
$2 = str.GetSize();
} else if (PythonByteArray::Check($input)) {
PythonByteArray bytearray(PyRefType::Borrowed, $input);
$1 = (void *)bytearray.GetBytes().data();
$2 = bytearray.GetSize();
} else if (PythonBytes::Check($input)) {
PythonBytes bytes(PyRefType::Borrowed, $input);
$1 = (void *)bytes.GetBytes().data();
$2 = bytes.GetSize();
} else {
PyErr_SetString(PyExc_ValueError, "Expecting a buffer");
return NULL;
}
}
// typemap for an incoming buffer
// See also SBProcess::ReadMemory.
%typemap(in) (void *buf, size_t size) {
if (PyInt_Check($input)) {
$2 = PyInt_AsLong($input);
} else if (PyLong_Check($input)) {
$2 = PyLong_AsLong($input);
} else {
PyErr_SetString(PyExc_ValueError, "Expecting an integer or long object");
return NULL;
}
if ($2 <= 0) {
PyErr_SetString(PyExc_ValueError, "Positive integer expected");
return NULL;
}
$1 = (void *)malloc($2);
}
// Return the buffer. Discarding any previous return result
// See also SBProcess::ReadMemory.
%typemap(argout) (void *buf, size_t size) {
Py_XDECREF($result); /* Blow away any previous result */
if (result == 0) {
$result = Py_None;
Py_INCREF($result);
} else {
PythonBytes bytes(static_cast<const uint8_t *>($1), result);
$result = bytes.release();
}
free($1);
}
%{
namespace {
template <class T>
T PyLongAsT(PyObject *obj) {
static_assert(true, "unsupported type");
}
template <> uint64_t PyLongAsT<uint64_t>(PyObject *obj) {
return static_cast<uint64_t>(PyLong_AsUnsignedLongLong(obj));
}
template <> uint32_t PyLongAsT<uint32_t>(PyObject *obj) {
return static_cast<uint32_t>(PyLong_AsUnsignedLong(obj));
}
template <> int64_t PyLongAsT<int64_t>(PyObject *obj) {
return static_cast<int64_t>(PyLong_AsLongLong(obj));
}
template <> int32_t PyLongAsT<int32_t>(PyObject *obj) {
return static_cast<int32_t>(PyLong_AsLong(obj));
}
template <class T> bool SetNumberFromPyObject(T &number, PyObject *obj) {
if (PyInt_Check(obj))
number = static_cast<T>(PyInt_AsLong(obj));
else if (PyLong_Check(obj))
number = PyLongAsT<T>(obj);
else
return false;
return true;
}
template <> bool SetNumberFromPyObject<double>(double &number, PyObject *obj) {
if (PyFloat_Check(obj)) {
number = PyFloat_AsDouble(obj);
return true;
}
return false;
}
} // namespace
%}
// these typemaps allow Python users to pass list objects
// and have them turn into C++ arrays (this is useful, for instance
// when creating SBData objects from lists of numbers)
%typemap(in) (uint64_t* array, size_t array_len),
(uint32_t* array, size_t array_len),
(int64_t* array, size_t array_len),
(int32_t* array, size_t array_len),
(double* array, size_t array_len) {
/* Check if is a list */
if (PyList_Check($input)) {
int size = PyList_Size($input);
int i = 0;
$2 = size;
$1 = ($1_type)malloc(size * sizeof($*1_type));
for (i = 0; i < size; i++) {
PyObject *o = PyList_GetItem($input, i);
if (!SetNumberFromPyObject($1[i], o)) {
PyErr_SetString(PyExc_TypeError, "list must contain numbers");
free($1);
return NULL;
}
if (PyErr_Occurred()) {
free($1);
return NULL;
}
}
} else if ($input == Py_None) {
$1 = NULL;
$2 = 0;
} else {
PyErr_SetString(PyExc_TypeError, "not a list");
return NULL;
}
}
%typemap(freearg) (uint64_t* array, size_t array_len),
(uint32_t* array, size_t array_len),
(int64_t* array, size_t array_len),
(int32_t* array, size_t array_len),
(double* array, size_t array_len) {
free($1);
}
// these typemaps wrap SBModule::GetVersion() from requiring a memory buffer
// to the more Pythonic style where a list is returned and no previous allocation
// is necessary - this will break if more than 50 versions are ever returned
%typemap(typecheck) (uint32_t *versions, uint32_t num_versions) {
$1 = ($input == Py_None ? 1 : 0);
}
%typemap(in, numinputs=0) (uint32_t *versions) {
$1 = (uint32_t *)malloc(sizeof(uint32_t) * 50);
}
%typemap(in, numinputs=0) (uint32_t num_versions) {
$1 = 50;
}
%typemap(argout) (uint32_t *versions, uint32_t num_versions) {
uint32_t count = result;
if (count >= $2)
count = $2;
PyObject *list = PyList_New(count);
for (uint32_t j = 0; j < count; j++) {
PyObject *item = PyInt_FromLong($1[j]);
int ok = PyList_SetItem(list, j, item);
if (ok != 0) {
$result = Py_None;
break;
}
}
$result = list;
}
%typemap(freearg) (uint32_t *versions) {
free($1);
}
// For Log::LogOutputCallback
%typemap(in) (lldb::LogOutputCallback log_callback, void *baton) {
if (!($input == Py_None ||
PyCallable_Check(reinterpret_cast<PyObject *>($input)))) {
PyErr_SetString(PyExc_TypeError, "Need a callable object or None!");
return NULL;
}
// FIXME (filcab): We can't currently check if our callback is already
// LLDBSwigPythonCallPythonLogOutputCallback (to DECREF the previous
// baton) nor can we just remove all traces of a callback, if we want to
// revert to a file logging mechanism.
// Don't lose the callback reference
Py_INCREF($input);
$1 = LLDBSwigPythonCallPythonLogOutputCallback;
$2 = $input;
}
%typemap(typecheck) (lldb::LogOutputCallback log_callback, void *baton) {
$1 = $input == Py_None;
$1 = $1 || PyCallable_Check(reinterpret_cast<PyObject *>($input));
}
%typemap(in) lldb::FileSP {
PythonFile py_file(PyRefType::Borrowed, $input);
if (!py_file) {
PyErr_SetString(PyExc_TypeError, "not a file");
return nullptr;
}
auto sp = unwrapOrSetPythonException(py_file.ConvertToFile());
if (!sp)
return nullptr;
$1 = sp;
}
%typemap(in) lldb::FileSP FORCE_IO_METHODS {
PythonFile py_file(PyRefType::Borrowed, $input);
if (!py_file) {
PyErr_SetString(PyExc_TypeError, "not a file");
return nullptr;
}
auto sp = unwrapOrSetPythonException(
py_file.ConvertToFileForcingUseOfScriptingIOMethods());
if (!sp)
return nullptr;
$1 = sp;
}
%typemap(in) lldb::FileSP BORROWED {
PythonFile py_file(PyRefType::Borrowed, $input);
if (!py_file) {
PyErr_SetString(PyExc_TypeError, "not a file");
return nullptr;
}
auto sp =
unwrapOrSetPythonException(py_file.ConvertToFile(/*borrowed=*/true));
if (!sp)
return nullptr;
$1 = sp;
}
%typemap(in) lldb::FileSP BORROWED_FORCE_IO_METHODS {
PythonFile py_file(PyRefType::Borrowed, $input);
if (!py_file) {
PyErr_SetString(PyExc_TypeError, "not a file");
return nullptr;
}
auto sp = unwrapOrSetPythonException(
py_file.ConvertToFileForcingUseOfScriptingIOMethods(/*borrowed=*/true));
if (!sp)
return nullptr;
$1 = sp;
}
%typecheck(SWIG_TYPECHECK_POINTER) lldb::FileSP {
if (PythonFile::Check($input)) {
$1 = 1;
} else {
PyErr_Clear();
$1 = 0;
}
}
%typemap(out) lldb::FileSP {
$result = nullptr;
- lldb::FileSP &sp = $1;
+ const lldb::FileSP &sp = $1;
if (sp) {
PythonFile pyfile = unwrapOrSetPythonException(PythonFile::FromFile(*sp));
if (!pyfile.IsValid())
return nullptr;
$result = pyfile.release();
}
if (!$result) {
$result = Py_None;
Py_INCREF(Py_None);
}
}
%typemap(in) (const char* string, int len) {
if ($input == Py_None) {
$1 = NULL;
$2 = 0;
} else if (PythonString::Check($input)) {
PythonString py_str(PyRefType::Borrowed, $input);
llvm::StringRef str = py_str.GetString();
$1 = const_cast<char *>(str.data());
$2 = str.size();
// In Python 2, if $input is a PyUnicode object then this
// will trigger a Unicode -> String conversion, in which
// case the `PythonString` will now own the PyString. Thus
// if it goes out of scope, the data will be deleted. The
// only way to avoid this is to leak the Python object in
// that case. Note that if there was no conversion, then
// releasing the string will not leak anything, since we
// created this as a borrowed reference.
py_str.release();
} else {
PyErr_SetString(PyExc_TypeError, "not a string-like object");
return NULL;
}
}
// These two pybuffer macros are copied out of swig/Lib/python/pybuffer.i,
// and fixed so they will not crash if PyObject_GetBuffer fails.
// https://github.com/swig/swig/issues/1640
//
// I've also moved the call to PyBuffer_Release to the end of the SWIG wrapper,
// doing it right away is not legal according to the python buffer protocol.
%define %pybuffer_mutable_binary(TYPEMAP, SIZE)
%typemap(in) (TYPEMAP, SIZE) (Py_buffer_RAII view) {
int res;
Py_ssize_t size = 0;
void *buf = 0;
res = PyObject_GetBuffer($input, &view.buffer, PyBUF_WRITABLE);
if (res < 0) {
PyErr_Clear();
%argument_fail(res, "(TYPEMAP, SIZE)", $symname, $argnum);
}
size = view.buffer.len;
buf = view.buffer.buf;
$1 = ($1_ltype)buf;
$2 = ($2_ltype)(size / sizeof($*1_type));
}
%enddef
%define %pybuffer_binary(TYPEMAP, SIZE)
%typemap(in) (TYPEMAP, SIZE) (Py_buffer_RAII view) {
int res;
Py_ssize_t size = 0;
const void *buf = 0;
res = PyObject_GetBuffer($input, &view.buffer, PyBUF_CONTIG_RO);
if (res < 0) {
PyErr_Clear();
%argument_fail(res, "(TYPEMAP, SIZE)", $symname, $argnum);
}
size = view.buffer.len;
buf = view.buffer.buf;
$1 = ($1_ltype)buf;
$2 = ($2_ltype)(size / sizeof($*1_type));
}
%enddef
%pybuffer_binary(const uint8_t *buf, size_t num_bytes);
%pybuffer_mutable_binary(uint8_t *buf, size_t num_bytes);
diff --git a/lldb/include/lldb/API/SBType.h b/lldb/include/lldb/API/SBType.h
index 244d328b51f4..aa45aeeec476 100644
--- a/lldb/include/lldb/API/SBType.h
+++ b/lldb/include/lldb/API/SBType.h
@@ -1,271 +1,273 @@
//===-- SBType.h ------------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLDB_API_SBTYPE_H
#define LLDB_API_SBTYPE_H
#include "lldb/API/SBDefines.h"
namespace lldb {
class SBTypeList;
class LLDB_API SBTypeMember {
public:
SBTypeMember();
SBTypeMember(const lldb::SBTypeMember &rhs);
~SBTypeMember();
lldb::SBTypeMember &operator=(const lldb::SBTypeMember &rhs);
explicit operator bool() const;
bool IsValid() const;
const char *GetName();
lldb::SBType GetType();
uint64_t GetOffsetInBytes();
uint64_t GetOffsetInBits();
bool IsBitfield();
uint32_t GetBitfieldSizeInBits();
bool GetDescription(lldb::SBStream &description,
lldb::DescriptionLevel description_level);
protected:
friend class SBType;
void reset(lldb_private::TypeMemberImpl *);
lldb_private::TypeMemberImpl &ref();
const lldb_private::TypeMemberImpl &ref() const;
std::unique_ptr<lldb_private::TypeMemberImpl> m_opaque_up;
};
class SBTypeMemberFunction {
public:
SBTypeMemberFunction();
SBTypeMemberFunction(const lldb::SBTypeMemberFunction &rhs);
~SBTypeMemberFunction();
lldb::SBTypeMemberFunction &operator=(const lldb::SBTypeMemberFunction &rhs);
explicit operator bool() const;
bool IsValid() const;
const char *GetName();
const char *GetDemangledName();
const char *GetMangledName();
lldb::SBType GetType();
lldb::SBType GetReturnType();
uint32_t GetNumberOfArguments();
lldb::SBType GetArgumentTypeAtIndex(uint32_t);
lldb::MemberFunctionKind GetKind();
bool GetDescription(lldb::SBStream &description,
lldb::DescriptionLevel description_level);
protected:
friend class SBType;
void reset(lldb_private::TypeMemberFunctionImpl *);
lldb_private::TypeMemberFunctionImpl &ref();
const lldb_private::TypeMemberFunctionImpl &ref() const;
lldb::TypeMemberFunctionImplSP m_opaque_sp;
};
class SBType {
public:
SBType();
SBType(const lldb::SBType &rhs);
~SBType();
explicit operator bool() const;
bool IsValid() const;
uint64_t GetByteSize();
bool IsPointerType();
bool IsReferenceType();
bool IsFunctionType();
bool IsPolymorphicClass();
bool IsArrayType();
bool IsVectorType();
bool IsTypedefType();
bool IsAnonymousType();
bool IsScopedEnumerationType();
bool IsAggregateType();
lldb::SBType GetPointerType();
lldb::SBType GetPointeeType();
lldb::SBType GetReferenceType();
lldb::SBType GetTypedefedType();
lldb::SBType GetDereferencedType();
lldb::SBType GetUnqualifiedType();
lldb::SBType GetArrayElementType();
lldb::SBType GetArrayType(uint64_t size);
lldb::SBType GetVectorElementType();
lldb::SBType GetCanonicalType();
lldb::SBType GetEnumerationIntegerType();
// Get the "lldb::BasicType" enumeration for a type. If a type is not a basic
// type eBasicTypeInvalid will be returned
lldb::BasicType GetBasicType();
// The call below confusing and should really be renamed to "CreateBasicType"
lldb::SBType GetBasicType(lldb::BasicType type);
uint32_t GetNumberOfFields();
uint32_t GetNumberOfDirectBaseClasses();
uint32_t GetNumberOfVirtualBaseClasses();
lldb::SBTypeMember GetFieldAtIndex(uint32_t idx);
lldb::SBTypeMember GetDirectBaseClassAtIndex(uint32_t idx);
lldb::SBTypeMember GetVirtualBaseClassAtIndex(uint32_t idx);
lldb::SBTypeEnumMemberList GetEnumMembers();
uint32_t GetNumberOfTemplateArguments();
lldb::SBType GetTemplateArgumentType(uint32_t idx);
+ /// Return the TemplateArgumentKind of the template argument at index idx.
+ /// Variadic argument packs are automatically expanded.
lldb::TemplateArgumentKind GetTemplateArgumentKind(uint32_t idx);
lldb::SBType GetFunctionReturnType();
lldb::SBTypeList GetFunctionArgumentTypes();
uint32_t GetNumberOfMemberFunctions();
lldb::SBTypeMemberFunction GetMemberFunctionAtIndex(uint32_t idx);
lldb::SBModule GetModule();
const char *GetName();
const char *GetDisplayTypeName();
lldb::TypeClass GetTypeClass();
bool IsTypeComplete();
uint32_t GetTypeFlags();
bool GetDescription(lldb::SBStream &description,
lldb::DescriptionLevel description_level);
lldb::SBType &operator=(const lldb::SBType &rhs);
bool operator==(lldb::SBType &rhs);
bool operator!=(lldb::SBType &rhs);
protected:
lldb_private::TypeImpl &ref();
const lldb_private::TypeImpl &ref() const;
lldb::TypeImplSP GetSP();
void SetSP(const lldb::TypeImplSP &type_impl_sp);
lldb::TypeImplSP m_opaque_sp;
friend class SBFunction;
friend class SBModule;
friend class SBTarget;
friend class SBTypeEnumMember;
friend class SBTypeEnumMemberList;
friend class SBTypeNameSpecifier;
friend class SBTypeMember;
friend class SBTypeMemberFunction;
friend class SBTypeList;
friend class SBValue;
SBType(const lldb_private::CompilerType &);
SBType(const lldb::TypeSP &);
SBType(const lldb::TypeImplSP &);
};
class SBTypeList {
public:
SBTypeList();
SBTypeList(const lldb::SBTypeList &rhs);
~SBTypeList();
lldb::SBTypeList &operator=(const lldb::SBTypeList &rhs);
explicit operator bool() const;
bool IsValid();
void Append(lldb::SBType type);
lldb::SBType GetTypeAtIndex(uint32_t index);
uint32_t GetSize();
private:
std::unique_ptr<lldb_private::TypeListImpl> m_opaque_up;
friend class SBModule;
friend class SBCompileUnit;
};
} // namespace lldb
#endif // LLDB_API_SBTYPE_H
diff --git a/lldb/include/lldb/Symbol/CompilerType.h b/lldb/include/lldb/Symbol/CompilerType.h
index 0ad05a27570e..aefd19d0a859 100644
--- a/lldb/include/lldb/Symbol/CompilerType.h
+++ b/lldb/include/lldb/Symbol/CompilerType.h
@@ -1,422 +1,436 @@
//===-- CompilerType.h ------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLDB_SYMBOL_COMPILERTYPE_H
#define LLDB_SYMBOL_COMPILERTYPE_H
#include <functional>
#include <string>
#include <vector>
#include "lldb/lldb-private.h"
#include "llvm/ADT/APSInt.h"
namespace lldb_private {
class DataExtractor;
/// Generic representation of a type in a programming language.
///
/// This class serves as an abstraction for a type inside one of the TypeSystems
/// implemented by the language plugins. It does not have any actual logic in it
/// but only stores an opaque pointer and a pointer to the TypeSystem that
/// gives meaning to this opaque pointer. All methods of this class should call
/// their respective method in the TypeSystem interface and pass the opaque
/// pointer along.
///
/// \see lldb_private::TypeSystem
class CompilerType {
public:
/// Creates a CompilerType with the given TypeSystem and opaque compiler type.
///
/// This constructor should only be called from the respective TypeSystem
/// implementation.
///
/// \see lldb_private::TypeSystemClang::GetType(clang::QualType)
CompilerType(TypeSystem *type_system, lldb::opaque_compiler_type_t type)
: m_type(type), m_type_system(type_system) {
assert(Verify() && "verification failed");
}
CompilerType(const CompilerType &rhs)
: m_type(rhs.m_type), m_type_system(rhs.m_type_system) {}
CompilerType() = default;
/// Operators.
/// \{
const CompilerType &operator=(const CompilerType &rhs) {
m_type = rhs.m_type;
m_type_system = rhs.m_type_system;
return *this;
}
bool operator<(const CompilerType &rhs) const {
if (m_type_system == rhs.m_type_system)
return m_type < rhs.m_type;
return m_type_system < rhs.m_type_system;
}
/// \}
/// Tests.
/// \{
explicit operator bool() const {
return m_type != nullptr && m_type_system != nullptr;
}
bool IsValid() const { return m_type != nullptr && m_type_system != nullptr; }
bool IsArrayType(CompilerType *element_type = nullptr,
uint64_t *size = nullptr,
bool *is_incomplete = nullptr) const;
bool IsVectorType(CompilerType *element_type = nullptr,
uint64_t *size = nullptr) const;
bool IsArrayOfScalarType() const;
bool IsAggregateType() const;
bool IsAnonymousType() const;
bool IsScopedEnumerationType() const;
bool IsBeingDefined() const;
bool IsCharType() const;
bool IsCompleteType() const;
bool IsConst() const;
bool IsCStringType(uint32_t &length) const;
bool IsDefined() const;
bool IsFloatingPointType(uint32_t &count, bool &is_complex) const;
bool IsFunctionType() const;
uint32_t IsHomogeneousAggregate(CompilerType *base_type_ptr) const;
size_t GetNumberOfFunctionArguments() const;
CompilerType GetFunctionArgumentAtIndex(const size_t index) const;
bool IsVariadicFunctionType() const;
bool IsFunctionPointerType() const;
bool
IsBlockPointerType(CompilerType *function_pointer_type_ptr = nullptr) const;
bool IsIntegerType(bool &is_signed) const;
bool IsEnumerationType(bool &is_signed) const;
bool IsIntegerOrEnumerationType(bool &is_signed) const;
bool IsPolymorphicClass() const;
/// \param target_type Can pass nullptr.
bool IsPossibleDynamicType(CompilerType *target_type, bool check_cplusplus,
bool check_objc) const;
bool IsPointerToScalarType() const;
bool IsRuntimeGeneratedType() const;
bool IsPointerType(CompilerType *pointee_type = nullptr) const;
bool IsPointerOrReferenceType(CompilerType *pointee_type = nullptr) const;
bool IsReferenceType(CompilerType *pointee_type = nullptr,
bool *is_rvalue = nullptr) const;
bool ShouldTreatScalarValueAsAddress() const;
bool IsScalarType() const;
bool IsTypedefType() const;
bool IsVoidType() const;
/// \}
/// Type Completion.
/// \{
bool GetCompleteType() const;
/// \}
/// AST related queries.
/// \{
size_t GetPointerByteSize() const;
/// \}
/// Accessors.
/// \{
TypeSystem *GetTypeSystem() const { return m_type_system; }
ConstString GetTypeName() const;
ConstString GetDisplayTypeName() const;
uint32_t
GetTypeInfo(CompilerType *pointee_or_element_compiler_type = nullptr) const;
lldb::LanguageType GetMinimumLanguage();
lldb::opaque_compiler_type_t GetOpaqueQualType() const { return m_type; }
lldb::TypeClass GetTypeClass() const;
void SetCompilerType(TypeSystem *type_system,
lldb::opaque_compiler_type_t type);
unsigned GetTypeQualifiers() const;
/// \}
/// Creating related types.
/// \{
CompilerType GetArrayElementType(ExecutionContextScope *exe_scope) const;
CompilerType GetArrayType(uint64_t size) const;
CompilerType GetCanonicalType() const;
CompilerType GetFullyUnqualifiedType() const;
CompilerType GetEnumerationIntegerType() const;
/// Returns -1 if this isn't a function of if the function doesn't
/// have a prototype Returns a value >= 0 if there is a prototype.
int GetFunctionArgumentCount() const;
CompilerType GetFunctionArgumentTypeAtIndex(size_t idx) const;
CompilerType GetFunctionReturnType() const;
size_t GetNumMemberFunctions() const;
TypeMemberFunctionImpl GetMemberFunctionAtIndex(size_t idx);
/// If this type is a reference to a type (L value or R value reference),
/// return a new type with the reference removed, else return the current type
/// itself.
CompilerType GetNonReferenceType() const;
/// If this type is a pointer type, return the type that the pointer points
/// to, else return an invalid type.
CompilerType GetPointeeType() const;
/// Return a new CompilerType that is a pointer to this type
CompilerType GetPointerType() const;
/// Return a new CompilerType that is a L value reference to this type if this
/// type is valid and the type system supports L value references, else return
/// an invalid type.
CompilerType GetLValueReferenceType() const;
/// Return a new CompilerType that is a R value reference to this type if this
/// type is valid and the type system supports R value references, else return
/// an invalid type.
CompilerType GetRValueReferenceType() const;
/// Return a new CompilerType adds a const modifier to this type if this type
/// is valid and the type system supports const modifiers, else return an
/// invalid type.
CompilerType AddConstModifier() const;
/// Return a new CompilerType adds a volatile modifier to this type if this
/// type is valid and the type system supports volatile modifiers, else return
/// an invalid type.
CompilerType AddVolatileModifier() const;
/// Return a new CompilerType that is the atomic type of this type. If this
/// type is not valid or the type system doesn't support atomic types, this
/// returns an invalid type.
CompilerType GetAtomicType() const;
/// Return a new CompilerType adds a restrict modifier to this type if this
/// type is valid and the type system supports restrict modifiers, else return
/// an invalid type.
CompilerType AddRestrictModifier() const;
/// Create a typedef to this type using "name" as the name of the typedef this
/// type is valid and the type system supports typedefs, else return an
/// invalid type.
/// \param payload The typesystem-specific \p lldb::Type payload.
CompilerType CreateTypedef(const char *name,
const CompilerDeclContext &decl_ctx,
uint32_t payload) const;
/// If the current object represents a typedef type, get the underlying type
CompilerType GetTypedefedType() const;
/// Create related types using the current type's AST
CompilerType GetBasicTypeFromAST(lldb::BasicType basic_type) const;
/// \}
/// Exploring the type.
/// \{
struct IntegralTemplateArgument;
/// Return the size of the type in bytes.
llvm::Optional<uint64_t> GetByteSize(ExecutionContextScope *exe_scope) const;
/// Return the size of the type in bits.
llvm::Optional<uint64_t> GetBitSize(ExecutionContextScope *exe_scope) const;
lldb::Encoding GetEncoding(uint64_t &count) const;
lldb::Format GetFormat() const;
llvm::Optional<size_t>
GetTypeBitAlign(ExecutionContextScope *exe_scope) const;
uint32_t GetNumChildren(bool omit_empty_base_classes,
const ExecutionContext *exe_ctx) const;
lldb::BasicType GetBasicTypeEnumeration() const;
static lldb::BasicType GetBasicTypeEnumeration(ConstString name);
/// If this type is an enumeration, iterate through all of its enumerators
/// using a callback. If the callback returns true, keep iterating, else abort
/// the iteration.
void ForEachEnumerator(
std::function<bool(const CompilerType &integer_type, ConstString name,
const llvm::APSInt &value)> const &callback) const;
uint32_t GetNumFields() const;
CompilerType GetFieldAtIndex(size_t idx, std::string &name,
uint64_t *bit_offset_ptr,
uint32_t *bitfield_bit_size_ptr,
bool *is_bitfield_ptr) const;
uint32_t GetNumDirectBaseClasses() const;
uint32_t GetNumVirtualBaseClasses() const;
CompilerType GetDirectBaseClassAtIndex(size_t idx,
uint32_t *bit_offset_ptr) const;
CompilerType GetVirtualBaseClassAtIndex(size_t idx,
uint32_t *bit_offset_ptr) const;
uint32_t GetIndexOfFieldWithName(const char *name,
CompilerType *field_compiler_type = nullptr,
uint64_t *bit_offset_ptr = nullptr,
uint32_t *bitfield_bit_size_ptr = nullptr,
bool *is_bitfield_ptr = nullptr) const;
CompilerType GetChildCompilerTypeAtIndex(
ExecutionContext *exe_ctx, size_t idx, bool transparent_pointers,
bool omit_empty_base_classes, bool ignore_array_bounds,
std::string &child_name, uint32_t &child_byte_size,
int32_t &child_byte_offset, uint32_t &child_bitfield_bit_size,
uint32_t &child_bitfield_bit_offset, bool &child_is_base_class,
bool &child_is_deref_of_parent, ValueObject *valobj,
uint64_t &language_flags) const;
/// Lookup a child given a name. This function will match base class names and
/// member member names in "clang_type" only, not descendants.
uint32_t GetIndexOfChildWithName(const char *name,
bool omit_empty_base_classes) const;
/// Lookup a child member given a name. This function will match member names
/// only and will descend into "clang_type" children in search for the first
/// member in this class, or any base class that matches "name".
/// TODO: Return all matches for a given name by returning a
/// vector<vector<uint32_t>>
/// so we catch all names that match a given child name, not just the first.
size_t
GetIndexOfChildMemberWithName(const char *name, bool omit_empty_base_classes,
std::vector<uint32_t> &child_indexes) const;
- size_t GetNumTemplateArguments() const;
-
- lldb::TemplateArgumentKind GetTemplateArgumentKind(size_t idx) const;
- CompilerType GetTypeTemplateArgument(size_t idx) const;
+ /// Return the number of template arguments the type has.
+ /// If expand_pack is true, then variadic argument packs are automatically
+ /// expanded to their supplied arguments. If it is false an argument pack
+ /// will only count as 1 argument.
+ size_t GetNumTemplateArguments(bool expand_pack = false) const;
+
+ // Return the TemplateArgumentKind of the template argument at index idx.
+ // If expand_pack is true, then variadic argument packs are automatically
+ // expanded to their supplied arguments. With expand_pack set to false, an
+ // arguement pack will count as 1 argument and return a type of Pack.
+ lldb::TemplateArgumentKind
+ GetTemplateArgumentKind(size_t idx, bool expand_pack = false) const;
+ CompilerType GetTypeTemplateArgument(size_t idx,
+ bool expand_pack = false) const;
/// Returns the value of the template argument and its type.
+ /// If expand_pack is true, then variadic argument packs are automatically
+ /// expanded to their supplied arguments. With expand_pack set to false, an
+ /// arguement pack will count as 1 argument and it is invalid to call this
+ /// method on the pack argument.
llvm::Optional<IntegralTemplateArgument>
- GetIntegralTemplateArgument(size_t idx) const;
+ GetIntegralTemplateArgument(size_t idx, bool expand_pack = false) const;
CompilerType GetTypeForFormatters() const;
LazyBool ShouldPrintAsOneLiner(ValueObject *valobj) const;
bool IsMeaninglessWithoutDynamicResolution() const;
/// \}
/// Dumping types.
/// \{
#ifndef NDEBUG
/// Convenience LLVM-style dump method for use in the debugger only.
/// Don't call this function from actual code.
LLVM_DUMP_METHOD void dump() const;
#endif
void DumpValue(ExecutionContext *exe_ctx, Stream *s, lldb::Format format,
const DataExtractor &data, lldb::offset_t data_offset,
size_t data_byte_size, uint32_t bitfield_bit_size,
uint32_t bitfield_bit_offset, bool show_types,
bool show_summary, bool verbose, uint32_t depth);
bool DumpTypeValue(Stream *s, lldb::Format format, const DataExtractor &data,
lldb::offset_t data_offset, size_t data_byte_size,
uint32_t bitfield_bit_size, uint32_t bitfield_bit_offset,
ExecutionContextScope *exe_scope);
void DumpSummary(ExecutionContext *exe_ctx, Stream *s,
const DataExtractor &data, lldb::offset_t data_offset,
size_t data_byte_size);
/// Dump to stdout.
void DumpTypeDescription(lldb::DescriptionLevel level =
lldb::eDescriptionLevelFull) const;
/// Print a description of the type to a stream. The exact implementation
/// varies, but the expectation is that eDescriptionLevelFull returns a
/// source-like representation of the type, whereas eDescriptionLevelVerbose
/// does a dump of the underlying AST if applicable.
void DumpTypeDescription(Stream *s, lldb::DescriptionLevel level =
lldb::eDescriptionLevelFull) const;
/// \}
bool GetValueAsScalar(const DataExtractor &data, lldb::offset_t data_offset,
size_t data_byte_size, Scalar &value,
ExecutionContextScope *exe_scope) const;
void Clear() {
m_type = nullptr;
m_type_system = nullptr;
}
private:
#ifndef NDEBUG
/// If the type is valid, ask the TypeSystem to verify the integrity
/// of the type to catch CompilerTypes that mix and match invalid
/// TypeSystem/Opaque type pairs.
bool Verify() const;
#endif
lldb::opaque_compiler_type_t m_type = nullptr;
TypeSystem *m_type_system = nullptr;
};
bool operator==(const CompilerType &lhs, const CompilerType &rhs);
bool operator!=(const CompilerType &lhs, const CompilerType &rhs);
struct CompilerType::IntegralTemplateArgument {
llvm::APSInt value;
CompilerType type;
};
} // namespace lldb_private
#endif // LLDB_SYMBOL_COMPILERTYPE_H
diff --git a/lldb/include/lldb/Symbol/TypeSystem.h b/lldb/include/lldb/Symbol/TypeSystem.h
index be5783596897..769449a4933b 100644
--- a/lldb/include/lldb/Symbol/TypeSystem.h
+++ b/lldb/include/lldb/Symbol/TypeSystem.h
@@ -1,553 +1,557 @@
//===-- TypeSystem.h ------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLDB_SYMBOL_TYPESYSTEM_H
#define LLDB_SYMBOL_TYPESYSTEM_H
#include <functional>
#include <map>
#include <mutex>
#include <string>
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Error.h"
#include "lldb/Core/PluginInterface.h"
#include "lldb/Expression/Expression.h"
#include "lldb/Symbol/CompilerDecl.h"
#include "lldb/Symbol/CompilerDeclContext.h"
#include "lldb/lldb-private.h"
class DWARFDIE;
class DWARFASTParser;
class PDBASTParser;
namespace lldb_private {
/// A SmallBitVector that represents a set of source languages (\p
/// lldb::LanguageType). Each lldb::LanguageType is represented by
/// the bit with the position of its enumerator. The largest
/// LanguageType is < 64, so this is space-efficient and on 64-bit
/// architectures a LanguageSet can be completely stack-allocated.
struct LanguageSet {
llvm::SmallBitVector bitvector;
LanguageSet();
/// If the set contains a single language only, return it.
llvm::Optional<lldb::LanguageType> GetSingularLanguage();
void Insert(lldb::LanguageType language);
bool Empty() const;
size_t Size() const;
bool operator[](unsigned i) const;
};
/// Interface for representing a type system.
///
/// Implemented by language plugins to define the type system for a given
/// language.
///
/// This interface extensively used opaque pointers to prevent that generic
/// LLDB code has dependencies on language plugins. The type and semantics of
/// these opaque pointers are defined by the TypeSystem implementation inside
/// the respective language plugin. Opaque pointers from one TypeSystem
/// instance should never be passed to a different TypeSystem instance (even
/// when the language plugin for both TypeSystem instances is the same).
///
/// Most of the functions in this class should not be called directly but only
/// called by their respective counterparts in CompilerType, CompilerDecl and
/// CompilerDeclContext.
///
/// \see lldb_private::CompilerType
/// \see lldb_private::CompilerDecl
/// \see lldb_private::CompilerDeclContext
class TypeSystem : public PluginInterface {
public:
// Constructors and Destructors
~TypeSystem() override;
// LLVM RTTI support
virtual bool isA(const void *ClassID) const = 0;
static lldb::TypeSystemSP CreateInstance(lldb::LanguageType language,
Module *module);
static lldb::TypeSystemSP CreateInstance(lldb::LanguageType language,
Target *target);
// Free up any resources associated with this TypeSystem. Done before
// removing all the TypeSystems from the TypeSystemMap.
virtual void Finalize() {}
virtual DWARFASTParser *GetDWARFParser() { return nullptr; }
virtual PDBASTParser *GetPDBParser() { return nullptr; }
virtual SymbolFile *GetSymbolFile() const { return m_sym_file; }
virtual void SetSymbolFile(SymbolFile *sym_file) { m_sym_file = sym_file; }
// CompilerDecl functions
virtual ConstString DeclGetName(void *opaque_decl) = 0;
virtual ConstString DeclGetMangledName(void *opaque_decl);
virtual CompilerDeclContext DeclGetDeclContext(void *opaque_decl);
virtual CompilerType DeclGetFunctionReturnType(void *opaque_decl);
virtual size_t DeclGetFunctionNumArguments(void *opaque_decl);
virtual CompilerType DeclGetFunctionArgumentType(void *opaque_decl,
size_t arg_idx);
virtual CompilerType GetTypeForDecl(void *opaque_decl) = 0;
// CompilerDeclContext functions
virtual std::vector<CompilerDecl>
DeclContextFindDeclByName(void *opaque_decl_ctx, ConstString name,
const bool ignore_imported_decls);
virtual ConstString DeclContextGetName(void *opaque_decl_ctx) = 0;
virtual ConstString
DeclContextGetScopeQualifiedName(void *opaque_decl_ctx) = 0;
virtual bool DeclContextIsClassMethod(
void *opaque_decl_ctx, lldb::LanguageType *language_ptr,
bool *is_instance_method_ptr, ConstString *language_object_name_ptr) = 0;
virtual bool DeclContextIsContainedInLookup(void *opaque_decl_ctx,
void *other_opaque_decl_ctx) = 0;
// Tests
#ifndef NDEBUG
/// Verify the integrity of the type to catch CompilerTypes that mix
/// and match invalid TypeSystem/Opaque type pairs.
virtual bool Verify(lldb::opaque_compiler_type_t type) = 0;
#endif
virtual bool IsArrayType(lldb::opaque_compiler_type_t type,
CompilerType *element_type, uint64_t *size,
bool *is_incomplete) = 0;
virtual bool IsAggregateType(lldb::opaque_compiler_type_t type) = 0;
virtual bool IsAnonymousType(lldb::opaque_compiler_type_t type);
virtual bool IsCharType(lldb::opaque_compiler_type_t type) = 0;
virtual bool IsCompleteType(lldb::opaque_compiler_type_t type) = 0;
virtual bool IsDefined(lldb::opaque_compiler_type_t type) = 0;
virtual bool IsFloatingPointType(lldb::opaque_compiler_type_t type,
uint32_t &count, bool &is_complex) = 0;
virtual bool IsFunctionType(lldb::opaque_compiler_type_t type) = 0;
virtual size_t
GetNumberOfFunctionArguments(lldb::opaque_compiler_type_t type) = 0;
virtual CompilerType
GetFunctionArgumentAtIndex(lldb::opaque_compiler_type_t type,
const size_t index) = 0;
virtual bool IsFunctionPointerType(lldb::opaque_compiler_type_t type) = 0;
virtual bool IsBlockPointerType(lldb::opaque_compiler_type_t type,
CompilerType *function_pointer_type_ptr) = 0;
virtual bool IsIntegerType(lldb::opaque_compiler_type_t type,
bool &is_signed) = 0;
virtual bool IsEnumerationType(lldb::opaque_compiler_type_t type,
bool &is_signed) {
is_signed = false;
return false;
}
virtual bool IsScopedEnumerationType(lldb::opaque_compiler_type_t type) = 0;
virtual bool IsPossibleDynamicType(lldb::opaque_compiler_type_t type,
CompilerType *target_type, // Can pass NULL
bool check_cplusplus, bool check_objc) = 0;
virtual bool IsPointerType(lldb::opaque_compiler_type_t type,
CompilerType *pointee_type) = 0;
virtual bool IsScalarType(lldb::opaque_compiler_type_t type) = 0;
virtual bool IsVoidType(lldb::opaque_compiler_type_t type) = 0;
virtual bool CanPassInRegisters(const CompilerType &type) = 0;
// TypeSystems can support more than one language
virtual bool SupportsLanguage(lldb::LanguageType language) = 0;
// Type Completion
virtual bool GetCompleteType(lldb::opaque_compiler_type_t type) = 0;
// AST related queries
virtual uint32_t GetPointerByteSize() = 0;
// Accessors
virtual ConstString GetTypeName(lldb::opaque_compiler_type_t type) = 0;
virtual ConstString GetDisplayTypeName(lldb::opaque_compiler_type_t type) = 0;
virtual uint32_t
GetTypeInfo(lldb::opaque_compiler_type_t type,
CompilerType *pointee_or_element_compiler_type) = 0;
virtual lldb::LanguageType
GetMinimumLanguage(lldb::opaque_compiler_type_t type) = 0;
virtual lldb::TypeClass GetTypeClass(lldb::opaque_compiler_type_t type) = 0;
// Creating related types
virtual CompilerType
GetArrayElementType(lldb::opaque_compiler_type_t type,
ExecutionContextScope *exe_scope) = 0;
virtual CompilerType GetArrayType(lldb::opaque_compiler_type_t type,
uint64_t size);
virtual CompilerType GetCanonicalType(lldb::opaque_compiler_type_t type) = 0;
virtual CompilerType
GetEnumerationIntegerType(lldb::opaque_compiler_type_t type) = 0;
// Returns -1 if this isn't a function of if the function doesn't have a
// prototype Returns a value >= 0 if there is a prototype.
virtual int GetFunctionArgumentCount(lldb::opaque_compiler_type_t type) = 0;
virtual CompilerType
GetFunctionArgumentTypeAtIndex(lldb::opaque_compiler_type_t type,
size_t idx) = 0;
virtual CompilerType
GetFunctionReturnType(lldb::opaque_compiler_type_t type) = 0;
virtual size_t GetNumMemberFunctions(lldb::opaque_compiler_type_t type) = 0;
virtual TypeMemberFunctionImpl
GetMemberFunctionAtIndex(lldb::opaque_compiler_type_t type, size_t idx) = 0;
virtual CompilerType GetPointeeType(lldb::opaque_compiler_type_t type) = 0;
virtual CompilerType GetPointerType(lldb::opaque_compiler_type_t type) = 0;
virtual CompilerType
GetLValueReferenceType(lldb::opaque_compiler_type_t type);
virtual CompilerType
GetRValueReferenceType(lldb::opaque_compiler_type_t type);
virtual CompilerType GetAtomicType(lldb::opaque_compiler_type_t type);
virtual CompilerType AddConstModifier(lldb::opaque_compiler_type_t type);
virtual CompilerType AddVolatileModifier(lldb::opaque_compiler_type_t type);
virtual CompilerType AddRestrictModifier(lldb::opaque_compiler_type_t type);
/// \param opaque_payload The m_payload field of Type, which may
/// carry TypeSystem-specific extra information.
virtual CompilerType CreateTypedef(lldb::opaque_compiler_type_t type,
const char *name,
const CompilerDeclContext &decl_ctx,
uint32_t opaque_payload);
// Exploring the type
virtual const llvm::fltSemantics &GetFloatTypeSemantics(size_t byte_size) = 0;
virtual llvm::Optional<uint64_t>
GetBitSize(lldb::opaque_compiler_type_t type,
ExecutionContextScope *exe_scope) = 0;
virtual lldb::Encoding GetEncoding(lldb::opaque_compiler_type_t type,
uint64_t &count) = 0;
virtual lldb::Format GetFormat(lldb::opaque_compiler_type_t type) = 0;
virtual uint32_t GetNumChildren(lldb::opaque_compiler_type_t type,
bool omit_empty_base_classes,
const ExecutionContext *exe_ctx) = 0;
virtual CompilerType GetBuiltinTypeByName(ConstString name);
virtual lldb::BasicType
GetBasicTypeEnumeration(lldb::opaque_compiler_type_t type) = 0;
virtual void ForEachEnumerator(
lldb::opaque_compiler_type_t type,
std::function<bool(const CompilerType &integer_type,
ConstString name,
const llvm::APSInt &value)> const &callback) {}
virtual uint32_t GetNumFields(lldb::opaque_compiler_type_t type) = 0;
virtual CompilerType GetFieldAtIndex(lldb::opaque_compiler_type_t type,
size_t idx, std::string &name,
uint64_t *bit_offset_ptr,
uint32_t *bitfield_bit_size_ptr,
bool *is_bitfield_ptr) = 0;
virtual uint32_t
GetNumDirectBaseClasses(lldb::opaque_compiler_type_t type) = 0;
virtual uint32_t
GetNumVirtualBaseClasses(lldb::opaque_compiler_type_t type) = 0;
virtual CompilerType
GetDirectBaseClassAtIndex(lldb::opaque_compiler_type_t type, size_t idx,
uint32_t *bit_offset_ptr) = 0;
virtual CompilerType
GetVirtualBaseClassAtIndex(lldb::opaque_compiler_type_t type, size_t idx,
uint32_t *bit_offset_ptr) = 0;
virtual CompilerType GetChildCompilerTypeAtIndex(
lldb::opaque_compiler_type_t type, ExecutionContext *exe_ctx, size_t idx,
bool transparent_pointers, bool omit_empty_base_classes,
bool ignore_array_bounds, std::string &child_name,
uint32_t &child_byte_size, int32_t &child_byte_offset,
uint32_t &child_bitfield_bit_size, uint32_t &child_bitfield_bit_offset,
bool &child_is_base_class, bool &child_is_deref_of_parent,
ValueObject *valobj, uint64_t &language_flags) = 0;
// Lookup a child given a name. This function will match base class names and
// member member names in "clang_type" only, not descendants.
virtual uint32_t GetIndexOfChildWithName(lldb::opaque_compiler_type_t type,
const char *name,
bool omit_empty_base_classes) = 0;
// Lookup a child member given a name. This function will match member names
// only and will descend into "clang_type" children in search for the first
// member in this class, or any base class that matches "name".
// TODO: Return all matches for a given name by returning a
// vector<vector<uint32_t>>
// so we catch all names that match a given child name, not just the first.
virtual size_t
GetIndexOfChildMemberWithName(lldb::opaque_compiler_type_t type,
const char *name, bool omit_empty_base_classes,
std::vector<uint32_t> &child_indexes) = 0;
- virtual size_t GetNumTemplateArguments(lldb::opaque_compiler_type_t type);
+ virtual size_t GetNumTemplateArguments(lldb::opaque_compiler_type_t type,
+ bool expand_pack);
virtual lldb::TemplateArgumentKind
- GetTemplateArgumentKind(lldb::opaque_compiler_type_t type, size_t idx);
- virtual CompilerType GetTypeTemplateArgument(lldb::opaque_compiler_type_t type,
- size_t idx);
+ GetTemplateArgumentKind(lldb::opaque_compiler_type_t type, size_t idx,
+ bool expand_pack);
+ virtual CompilerType
+ GetTypeTemplateArgument(lldb::opaque_compiler_type_t type, size_t idx,
+ bool expand_pack);
virtual llvm::Optional<CompilerType::IntegralTemplateArgument>
- GetIntegralTemplateArgument(lldb::opaque_compiler_type_t type, size_t idx);
+ GetIntegralTemplateArgument(lldb::opaque_compiler_type_t type, size_t idx,
+ bool expand_pack);
// Dumping types
#ifndef NDEBUG
/// Convenience LLVM-style dump method for use in the debugger only.
LLVM_DUMP_METHOD virtual void
dump(lldb::opaque_compiler_type_t type) const = 0;
#endif
virtual void DumpValue(lldb::opaque_compiler_type_t type,
ExecutionContext *exe_ctx, Stream *s,
lldb::Format format, const DataExtractor &data,
lldb::offset_t data_offset, size_t data_byte_size,
uint32_t bitfield_bit_size,
uint32_t bitfield_bit_offset, bool show_types,
bool show_summary, bool verbose, uint32_t depth) = 0;
virtual bool DumpTypeValue(lldb::opaque_compiler_type_t type, Stream *s,
lldb::Format format, const DataExtractor &data,
lldb::offset_t data_offset, size_t data_byte_size,
uint32_t bitfield_bit_size,
uint32_t bitfield_bit_offset,
ExecutionContextScope *exe_scope) = 0;
/// Dump the type to stdout.
virtual void DumpTypeDescription(
lldb::opaque_compiler_type_t type,
lldb::DescriptionLevel level = lldb::eDescriptionLevelFull) = 0;
/// Print a description of the type to a stream. The exact implementation
/// varies, but the expectation is that eDescriptionLevelFull returns a
/// source-like representation of the type, whereas eDescriptionLevelVerbose
/// does a dump of the underlying AST if applicable.
virtual void DumpTypeDescription(
lldb::opaque_compiler_type_t type, Stream *s,
lldb::DescriptionLevel level = lldb::eDescriptionLevelFull) = 0;
/// Dump a textual representation of the internal TypeSystem state to the
/// given stream.
///
/// This should not modify the state of the TypeSystem if possible.
virtual void Dump(llvm::raw_ostream &output) = 0;
// TODO: These methods appear unused. Should they be removed?
virtual bool IsRuntimeGeneratedType(lldb::opaque_compiler_type_t type) = 0;
virtual void DumpSummary(lldb::opaque_compiler_type_t type,
ExecutionContext *exe_ctx, Stream *s,
const DataExtractor &data,
lldb::offset_t data_offset,
size_t data_byte_size) = 0;
// TODO: Determine if these methods should move to TypeSystemClang.
virtual bool IsPointerOrReferenceType(lldb::opaque_compiler_type_t type,
CompilerType *pointee_type) = 0;
virtual unsigned GetTypeQualifiers(lldb::opaque_compiler_type_t type) = 0;
virtual bool IsCStringType(lldb::opaque_compiler_type_t type,
uint32_t &length) = 0;
virtual llvm::Optional<size_t>
GetTypeBitAlign(lldb::opaque_compiler_type_t type,
ExecutionContextScope *exe_scope) = 0;
virtual CompilerType GetBasicTypeFromAST(lldb::BasicType basic_type) = 0;
virtual CompilerType
GetBuiltinTypeForEncodingAndBitSize(lldb::Encoding encoding,
size_t bit_size) = 0;
virtual bool IsBeingDefined(lldb::opaque_compiler_type_t type) = 0;
virtual bool IsConst(lldb::opaque_compiler_type_t type) = 0;
virtual uint32_t IsHomogeneousAggregate(lldb::opaque_compiler_type_t type,
CompilerType *base_type_ptr) = 0;
virtual bool IsPolymorphicClass(lldb::opaque_compiler_type_t type) = 0;
virtual bool IsTypedefType(lldb::opaque_compiler_type_t type) = 0;
// If the current object represents a typedef type, get the underlying type
virtual CompilerType GetTypedefedType(lldb::opaque_compiler_type_t type) = 0;
virtual bool IsVectorType(lldb::opaque_compiler_type_t type,
CompilerType *element_type, uint64_t *size) = 0;
virtual CompilerType
GetFullyUnqualifiedType(lldb::opaque_compiler_type_t type) = 0;
virtual CompilerType
GetNonReferenceType(lldb::opaque_compiler_type_t type) = 0;
virtual bool IsReferenceType(lldb::opaque_compiler_type_t type,
CompilerType *pointee_type, bool *is_rvalue) = 0;
virtual bool
ShouldTreatScalarValueAsAddress(lldb::opaque_compiler_type_t type) {
return IsPointerOrReferenceType(type, nullptr);
}
virtual UserExpression *
GetUserExpression(llvm::StringRef expr, llvm::StringRef prefix,
lldb::LanguageType language,
Expression::ResultType desired_type,
const EvaluateExpressionOptions &options,
ValueObject *ctx_obj) {
return nullptr;
}
virtual FunctionCaller *GetFunctionCaller(const CompilerType &return_type,
const Address &function_address,
const ValueList &arg_value_list,
const char *name) {
return nullptr;
}
virtual std::unique_ptr<UtilityFunction>
CreateUtilityFunction(std::string text, std::string name);
virtual PersistentExpressionState *GetPersistentExpressionState() {
return nullptr;
}
virtual CompilerType GetTypeForFormatters(void *type);
virtual LazyBool ShouldPrintAsOneLiner(void *type, ValueObject *valobj);
// Type systems can have types that are placeholder types, which are meant to
// indicate the presence of a type, but offer no actual information about
// said types, and leave the burden of actually figuring type information out
// to dynamic type resolution. For instance a language with a generics
// system, can use placeholder types to indicate "type argument goes here",
// without promising uniqueness of the placeholder, nor attaching any
// actually idenfiable information to said placeholder. This API allows type
// systems to tell LLDB when such a type has been encountered In response,
// the debugger can react by not using this type as a cache entry in any
// type-specific way For instance, LLDB will currently not cache any
// formatters that are discovered on such a type as attributable to the
// meaningless type itself, instead preferring to use the dynamic type
virtual bool IsMeaninglessWithoutDynamicResolution(void *type);
protected:
SymbolFile *m_sym_file = nullptr;
};
class TypeSystemMap {
public:
TypeSystemMap();
~TypeSystemMap();
// Clear calls Finalize on all the TypeSystems managed by this map, and then
// empties the map.
void Clear();
// Iterate through all of the type systems that are created. Return true from
// callback to keep iterating, false to stop iterating.
void ForEach(std::function<bool(TypeSystem *)> const &callback);
llvm::Expected<TypeSystem &>
GetTypeSystemForLanguage(lldb::LanguageType language, Module *module,
bool can_create);
llvm::Expected<TypeSystem &>
GetTypeSystemForLanguage(lldb::LanguageType language, Target *target,
bool can_create);
protected:
typedef std::map<lldb::LanguageType, lldb::TypeSystemSP> collection;
mutable std::mutex m_mutex; ///< A mutex to keep this object happy in
///multi-threaded environments.
collection m_map;
bool m_clear_in_progress = false;
private:
typedef llvm::function_ref<lldb::TypeSystemSP()> CreateCallback;
/// Finds the type system for the given language. If no type system could be
/// found for a language and a CreateCallback was provided, the value returned
/// by the callback will be treated as the TypeSystem for the language.
///
/// \param language The language for which the type system should be found.
/// \param create_callback A callback that will be called if no previously
/// created TypeSystem that fits the given language
/// could found. Can be omitted if a non-existent
/// type system should be treated as an error instead.
/// \return The found type system or an error.
llvm::Expected<TypeSystem &> GetTypeSystemForLanguage(
lldb::LanguageType language,
llvm::Optional<CreateCallback> create_callback = llvm::None);
};
} // namespace lldb_private
#endif // LLDB_SYMBOL_TYPESYSTEM_H
diff --git a/lldb/source/API/SBType.cpp b/lldb/source/API/SBType.cpp
index 533930c0544b..adc60a084367 100644
--- a/lldb/source/API/SBType.cpp
+++ b/lldb/source/API/SBType.cpp
@@ -1,893 +1,897 @@
//===-- SBType.cpp --------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "lldb/API/SBType.h"
#include "lldb/API/SBDefines.h"
#include "lldb/API/SBModule.h"
#include "lldb/API/SBStream.h"
#include "lldb/API/SBTypeEnumMember.h"
#include "lldb/Core/Mangled.h"
#include "lldb/Symbol/CompilerType.h"
#include "lldb/Symbol/Type.h"
#include "lldb/Symbol/TypeSystem.h"
#include "lldb/Utility/ConstString.h"
#include "lldb/Utility/Instrumentation.h"
#include "lldb/Utility/Stream.h"
#include "llvm/ADT/APSInt.h"
#include <memory>
using namespace lldb;
using namespace lldb_private;
SBType::SBType() { LLDB_INSTRUMENT_VA(this); }
SBType::SBType(const CompilerType &type)
: m_opaque_sp(new TypeImpl(
CompilerType(type.GetTypeSystem(), type.GetOpaqueQualType()))) {}
SBType::SBType(const lldb::TypeSP &type_sp)
: m_opaque_sp(new TypeImpl(type_sp)) {}
SBType::SBType(const lldb::TypeImplSP &type_impl_sp)
: m_opaque_sp(type_impl_sp) {}
SBType::SBType(const SBType &rhs) {
LLDB_INSTRUMENT_VA(this, rhs);
if (this != &rhs) {
m_opaque_sp = rhs.m_opaque_sp;
}
}
// SBType::SBType (TypeImpl* impl) :
// m_opaque_up(impl)
//{}
//
bool SBType::operator==(SBType &rhs) {
LLDB_INSTRUMENT_VA(this, rhs);
if (!IsValid())
return !rhs.IsValid();
if (!rhs.IsValid())
return false;
return *m_opaque_sp.get() == *rhs.m_opaque_sp.get();
}
bool SBType::operator!=(SBType &rhs) {
LLDB_INSTRUMENT_VA(this, rhs);
if (!IsValid())
return rhs.IsValid();
if (!rhs.IsValid())
return true;
return *m_opaque_sp.get() != *rhs.m_opaque_sp.get();
}
lldb::TypeImplSP SBType::GetSP() { return m_opaque_sp; }
void SBType::SetSP(const lldb::TypeImplSP &type_impl_sp) {
m_opaque_sp = type_impl_sp;
}
SBType &SBType::operator=(const SBType &rhs) {
LLDB_INSTRUMENT_VA(this, rhs);
if (this != &rhs) {
m_opaque_sp = rhs.m_opaque_sp;
}
return *this;
}
SBType::~SBType() = default;
TypeImpl &SBType::ref() {
if (m_opaque_sp.get() == nullptr)
m_opaque_sp = std::make_shared<TypeImpl>();
return *m_opaque_sp;
}
const TypeImpl &SBType::ref() const {
// "const SBAddress &addr" should already have checked "addr.IsValid()" prior
// to calling this function. In case you didn't we will assert and die to let
// you know.
assert(m_opaque_sp.get());
return *m_opaque_sp;
}
bool SBType::IsValid() const {
LLDB_INSTRUMENT_VA(this);
return this->operator bool();
}
SBType::operator bool() const {
LLDB_INSTRUMENT_VA(this);
if (m_opaque_sp.get() == nullptr)
return false;
return m_opaque_sp->IsValid();
}
uint64_t SBType::GetByteSize() {
LLDB_INSTRUMENT_VA(this);
if (IsValid())
if (llvm::Optional<uint64_t> size =
m_opaque_sp->GetCompilerType(false).GetByteSize(nullptr))
return *size;
return 0;
}
bool SBType::IsPointerType() {
LLDB_INSTRUMENT_VA(this);
if (!IsValid())
return false;
return m_opaque_sp->GetCompilerType(true).IsPointerType();
}
bool SBType::IsArrayType() {
LLDB_INSTRUMENT_VA(this);
if (!IsValid())
return false;
return m_opaque_sp->GetCompilerType(true).IsArrayType(nullptr, nullptr,
nullptr);
}
bool SBType::IsVectorType() {
LLDB_INSTRUMENT_VA(this);
if (!IsValid())
return false;
return m_opaque_sp->GetCompilerType(true).IsVectorType(nullptr, nullptr);
}
bool SBType::IsReferenceType() {
LLDB_INSTRUMENT_VA(this);
if (!IsValid())
return false;
return m_opaque_sp->GetCompilerType(true).IsReferenceType();
}
SBType SBType::GetPointerType() {
LLDB_INSTRUMENT_VA(this);
if (!IsValid())
return SBType();
return SBType(TypeImplSP(new TypeImpl(m_opaque_sp->GetPointerType())));
}
SBType SBType::GetPointeeType() {
LLDB_INSTRUMENT_VA(this);
if (!IsValid())
return SBType();
return SBType(TypeImplSP(new TypeImpl(m_opaque_sp->GetPointeeType())));
}
SBType SBType::GetReferenceType() {
LLDB_INSTRUMENT_VA(this);
if (!IsValid())
return SBType();
return SBType(TypeImplSP(new TypeImpl(m_opaque_sp->GetReferenceType())));
}
SBType SBType::GetTypedefedType() {
LLDB_INSTRUMENT_VA(this);
if (!IsValid())
return SBType();
return SBType(TypeImplSP(new TypeImpl(m_opaque_sp->GetTypedefedType())));
}
SBType SBType::GetDereferencedType() {
LLDB_INSTRUMENT_VA(this);
if (!IsValid())
return SBType();
return SBType(TypeImplSP(new TypeImpl(m_opaque_sp->GetDereferencedType())));
}
SBType SBType::GetArrayElementType() {
LLDB_INSTRUMENT_VA(this);
if (!IsValid())
return SBType();
return SBType(TypeImplSP(new TypeImpl(
m_opaque_sp->GetCompilerType(true).GetArrayElementType(nullptr))));
}
SBType SBType::GetArrayType(uint64_t size) {
LLDB_INSTRUMENT_VA(this, size);
if (!IsValid())
return SBType();
return SBType(TypeImplSP(
new TypeImpl(m_opaque_sp->GetCompilerType(true).GetArrayType(size))));
}
SBType SBType::GetVectorElementType() {
LLDB_INSTRUMENT_VA(this);
SBType type_sb;
if (IsValid()) {
CompilerType vector_element_type;
if (m_opaque_sp->GetCompilerType(true).IsVectorType(&vector_element_type,
nullptr))
type_sb.SetSP(TypeImplSP(new TypeImpl(vector_element_type)));
}
return type_sb;
}
bool SBType::IsFunctionType() {
LLDB_INSTRUMENT_VA(this);
if (!IsValid())
return false;
return m_opaque_sp->GetCompilerType(true).IsFunctionType();
}
bool SBType::IsPolymorphicClass() {
LLDB_INSTRUMENT_VA(this);
if (!IsValid())
return false;
return m_opaque_sp->GetCompilerType(true).IsPolymorphicClass();
}
bool SBType::IsTypedefType() {
LLDB_INSTRUMENT_VA(this);
if (!IsValid())
return false;
return m_opaque_sp->GetCompilerType(true).IsTypedefType();
}
bool SBType::IsAnonymousType() {
LLDB_INSTRUMENT_VA(this);
if (!IsValid())
return false;
return m_opaque_sp->GetCompilerType(true).IsAnonymousType();
}
bool SBType::IsScopedEnumerationType() {
LLDB_INSTRUMENT_VA(this);
if (!IsValid())
return false;
return m_opaque_sp->GetCompilerType(true).IsScopedEnumerationType();
}
bool SBType::IsAggregateType() {
LLDB_INSTRUMENT_VA(this);
if (!IsValid())
return false;
return m_opaque_sp->GetCompilerType(true).IsAggregateType();
}
lldb::SBType SBType::GetFunctionReturnType() {
LLDB_INSTRUMENT_VA(this);
if (IsValid()) {
CompilerType return_type(
m_opaque_sp->GetCompilerType(true).GetFunctionReturnType());
if (return_type.IsValid())
return SBType(return_type);
}
return lldb::SBType();
}
lldb::SBTypeList SBType::GetFunctionArgumentTypes() {
LLDB_INSTRUMENT_VA(this);
SBTypeList sb_type_list;
if (IsValid()) {
CompilerType func_type(m_opaque_sp->GetCompilerType(true));
size_t count = func_type.GetNumberOfFunctionArguments();
for (size_t i = 0; i < count; i++) {
sb_type_list.Append(SBType(func_type.GetFunctionArgumentAtIndex(i)));
}
}
return sb_type_list;
}
uint32_t SBType::GetNumberOfMemberFunctions() {
LLDB_INSTRUMENT_VA(this);
if (IsValid()) {
return m_opaque_sp->GetCompilerType(true).GetNumMemberFunctions();
}
return 0;
}
lldb::SBTypeMemberFunction SBType::GetMemberFunctionAtIndex(uint32_t idx) {
LLDB_INSTRUMENT_VA(this, idx);
SBTypeMemberFunction sb_func_type;
if (IsValid())
sb_func_type.reset(new TypeMemberFunctionImpl(
m_opaque_sp->GetCompilerType(true).GetMemberFunctionAtIndex(idx)));
return sb_func_type;
}
lldb::SBType SBType::GetUnqualifiedType() {
LLDB_INSTRUMENT_VA(this);
if (!IsValid())
return SBType();
return SBType(TypeImplSP(new TypeImpl(m_opaque_sp->GetUnqualifiedType())));
}
lldb::SBType SBType::GetCanonicalType() {
LLDB_INSTRUMENT_VA(this);
if (IsValid())
return SBType(TypeImplSP(new TypeImpl(m_opaque_sp->GetCanonicalType())));
return SBType();
}
SBType SBType::GetEnumerationIntegerType() {
LLDB_INSTRUMENT_VA(this);
if (IsValid()) {
return SBType(
m_opaque_sp->GetCompilerType(true).GetEnumerationIntegerType());
}
return SBType();
}
lldb::BasicType SBType::GetBasicType() {
LLDB_INSTRUMENT_VA(this);
if (IsValid())
return m_opaque_sp->GetCompilerType(false).GetBasicTypeEnumeration();
return eBasicTypeInvalid;
}
SBType SBType::GetBasicType(lldb::BasicType basic_type) {
LLDB_INSTRUMENT_VA(this, basic_type);
if (IsValid() && m_opaque_sp->IsValid())
return SBType(
m_opaque_sp->GetTypeSystem(false)->GetBasicTypeFromAST(basic_type));
return SBType();
}
uint32_t SBType::GetNumberOfDirectBaseClasses() {
LLDB_INSTRUMENT_VA(this);
if (IsValid())
return m_opaque_sp->GetCompilerType(true).GetNumDirectBaseClasses();
return 0;
}
uint32_t SBType::GetNumberOfVirtualBaseClasses() {
LLDB_INSTRUMENT_VA(this);
if (IsValid())
return m_opaque_sp->GetCompilerType(true).GetNumVirtualBaseClasses();
return 0;
}
uint32_t SBType::GetNumberOfFields() {
LLDB_INSTRUMENT_VA(this);
if (IsValid())
return m_opaque_sp->GetCompilerType(true).GetNumFields();
return 0;
}
bool SBType::GetDescription(SBStream &description,
lldb::DescriptionLevel description_level) {
LLDB_INSTRUMENT_VA(this, description, description_level);
Stream &strm = description.ref();
if (m_opaque_sp) {
m_opaque_sp->GetDescription(strm, description_level);
} else
strm.PutCString("No value");
return true;
}
SBTypeMember SBType::GetDirectBaseClassAtIndex(uint32_t idx) {
LLDB_INSTRUMENT_VA(this, idx);
SBTypeMember sb_type_member;
if (IsValid()) {
uint32_t bit_offset = 0;
CompilerType base_class_type =
m_opaque_sp->GetCompilerType(true).GetDirectBaseClassAtIndex(
idx, &bit_offset);
if (base_class_type.IsValid())
sb_type_member.reset(new TypeMemberImpl(
TypeImplSP(new TypeImpl(base_class_type)), bit_offset));
}
return sb_type_member;
}
SBTypeMember SBType::GetVirtualBaseClassAtIndex(uint32_t idx) {
LLDB_INSTRUMENT_VA(this, idx);
SBTypeMember sb_type_member;
if (IsValid()) {
uint32_t bit_offset = 0;
CompilerType base_class_type =
m_opaque_sp->GetCompilerType(true).GetVirtualBaseClassAtIndex(
idx, &bit_offset);
if (base_class_type.IsValid())
sb_type_member.reset(new TypeMemberImpl(
TypeImplSP(new TypeImpl(base_class_type)), bit_offset));
}
return sb_type_member;
}
SBTypeEnumMemberList SBType::GetEnumMembers() {
LLDB_INSTRUMENT_VA(this);
SBTypeEnumMemberList sb_enum_member_list;
if (IsValid()) {
CompilerType this_type(m_opaque_sp->GetCompilerType(true));
if (this_type.IsValid()) {
this_type.ForEachEnumerator([&sb_enum_member_list](
const CompilerType &integer_type,
ConstString name,
const llvm::APSInt &value) -> bool {
SBTypeEnumMember enum_member(
lldb::TypeEnumMemberImplSP(new TypeEnumMemberImpl(
lldb::TypeImplSP(new TypeImpl(integer_type)), name, value)));
sb_enum_member_list.Append(enum_member);
return true; // Keep iterating
});
}
}
return sb_enum_member_list;
}
SBTypeMember SBType::GetFieldAtIndex(uint32_t idx) {
LLDB_INSTRUMENT_VA(this, idx);
SBTypeMember sb_type_member;
if (IsValid()) {
CompilerType this_type(m_opaque_sp->GetCompilerType(false));
if (this_type.IsValid()) {
uint64_t bit_offset = 0;
uint32_t bitfield_bit_size = 0;
bool is_bitfield = false;
std::string name_sstr;
CompilerType field_type(this_type.GetFieldAtIndex(
idx, name_sstr, &bit_offset, &bitfield_bit_size, &is_bitfield));
if (field_type.IsValid()) {
ConstString name;
if (!name_sstr.empty())
name.SetCString(name_sstr.c_str());
sb_type_member.reset(
new TypeMemberImpl(TypeImplSP(new TypeImpl(field_type)), bit_offset,
name, bitfield_bit_size, is_bitfield));
}
}
}
return sb_type_member;
}
bool SBType::IsTypeComplete() {
LLDB_INSTRUMENT_VA(this);
if (!IsValid())
return false;
return m_opaque_sp->GetCompilerType(false).IsCompleteType();
}
uint32_t SBType::GetTypeFlags() {
LLDB_INSTRUMENT_VA(this);
if (!IsValid())
return 0;
return m_opaque_sp->GetCompilerType(true).GetTypeInfo();
}
lldb::SBModule SBType::GetModule() {
LLDB_INSTRUMENT_VA(this);
lldb::SBModule sb_module;
if (!IsValid())
return sb_module;
sb_module.SetSP(m_opaque_sp->GetModule());
return sb_module;
}
const char *SBType::GetName() {
LLDB_INSTRUMENT_VA(this);
if (!IsValid())
return "";
return m_opaque_sp->GetName().GetCString();
}
const char *SBType::GetDisplayTypeName() {
LLDB_INSTRUMENT_VA(this);
if (!IsValid())
return "";
return m_opaque_sp->GetDisplayTypeName().GetCString();
}
lldb::TypeClass SBType::GetTypeClass() {
LLDB_INSTRUMENT_VA(this);
if (IsValid())
return m_opaque_sp->GetCompilerType(true).GetTypeClass();
return lldb::eTypeClassInvalid;
}
uint32_t SBType::GetNumberOfTemplateArguments() {
LLDB_INSTRUMENT_VA(this);
if (IsValid())
- return m_opaque_sp->GetCompilerType(false).GetNumTemplateArguments();
+ return m_opaque_sp->GetCompilerType(false).GetNumTemplateArguments(
+ /*expand_pack=*/true);
return 0;
}
lldb::SBType SBType::GetTemplateArgumentType(uint32_t idx) {
LLDB_INSTRUMENT_VA(this, idx);
if (!IsValid())
return SBType();
CompilerType type;
+ const bool expand_pack = true;
switch(GetTemplateArgumentKind(idx)) {
case eTemplateArgumentKindType:
- type = m_opaque_sp->GetCompilerType(false).GetTypeTemplateArgument(idx);
+ type = m_opaque_sp->GetCompilerType(false).GetTypeTemplateArgument(
+ idx, expand_pack);
break;
case eTemplateArgumentKindIntegral:
type = m_opaque_sp->GetCompilerType(false)
- .GetIntegralTemplateArgument(idx)
+ .GetIntegralTemplateArgument(idx, expand_pack)
->type;
break;
default:
break;
}
if (type.IsValid())
return SBType(type);
return SBType();
}
lldb::TemplateArgumentKind SBType::GetTemplateArgumentKind(uint32_t idx) {
LLDB_INSTRUMENT_VA(this, idx);
if (IsValid())
- return m_opaque_sp->GetCompilerType(false).GetTemplateArgumentKind(idx);
+ return m_opaque_sp->GetCompilerType(false).GetTemplateArgumentKind(
+ idx, /*expand_pack=*/true);
return eTemplateArgumentKindNull;
}
SBTypeList::SBTypeList() : m_opaque_up(new TypeListImpl()) {
LLDB_INSTRUMENT_VA(this);
}
SBTypeList::SBTypeList(const SBTypeList &rhs)
: m_opaque_up(new TypeListImpl()) {
LLDB_INSTRUMENT_VA(this, rhs);
for (uint32_t i = 0, rhs_size = const_cast<SBTypeList &>(rhs).GetSize();
i < rhs_size; i++)
Append(const_cast<SBTypeList &>(rhs).GetTypeAtIndex(i));
}
bool SBTypeList::IsValid() {
LLDB_INSTRUMENT_VA(this);
return this->operator bool();
}
SBTypeList::operator bool() const {
LLDB_INSTRUMENT_VA(this);
return (m_opaque_up != nullptr);
}
SBTypeList &SBTypeList::operator=(const SBTypeList &rhs) {
LLDB_INSTRUMENT_VA(this, rhs);
if (this != &rhs) {
m_opaque_up = std::make_unique<TypeListImpl>();
for (uint32_t i = 0, rhs_size = const_cast<SBTypeList &>(rhs).GetSize();
i < rhs_size; i++)
Append(const_cast<SBTypeList &>(rhs).GetTypeAtIndex(i));
}
return *this;
}
void SBTypeList::Append(SBType type) {
LLDB_INSTRUMENT_VA(this, type);
if (type.IsValid())
m_opaque_up->Append(type.m_opaque_sp);
}
SBType SBTypeList::GetTypeAtIndex(uint32_t index) {
LLDB_INSTRUMENT_VA(this, index);
if (m_opaque_up)
return SBType(m_opaque_up->GetTypeAtIndex(index));
return SBType();
}
uint32_t SBTypeList::GetSize() {
LLDB_INSTRUMENT_VA(this);
return m_opaque_up->GetSize();
}
SBTypeList::~SBTypeList() = default;
SBTypeMember::SBTypeMember() { LLDB_INSTRUMENT_VA(this); }
SBTypeMember::~SBTypeMember() = default;
SBTypeMember::SBTypeMember(const SBTypeMember &rhs) {
LLDB_INSTRUMENT_VA(this, rhs);
if (this != &rhs) {
if (rhs.IsValid())
m_opaque_up = std::make_unique<TypeMemberImpl>(rhs.ref());
}
}
lldb::SBTypeMember &SBTypeMember::operator=(const lldb::SBTypeMember &rhs) {
LLDB_INSTRUMENT_VA(this, rhs);
if (this != &rhs) {
if (rhs.IsValid())
m_opaque_up = std::make_unique<TypeMemberImpl>(rhs.ref());
}
return *this;
}
bool SBTypeMember::IsValid() const {
LLDB_INSTRUMENT_VA(this);
return this->operator bool();
}
SBTypeMember::operator bool() const {
LLDB_INSTRUMENT_VA(this);
return m_opaque_up.get();
}
const char *SBTypeMember::GetName() {
LLDB_INSTRUMENT_VA(this);
if (m_opaque_up)
return m_opaque_up->GetName().GetCString();
return nullptr;
}
SBType SBTypeMember::GetType() {
LLDB_INSTRUMENT_VA(this);
SBType sb_type;
if (m_opaque_up) {
sb_type.SetSP(m_opaque_up->GetTypeImpl());
}
return sb_type;
}
uint64_t SBTypeMember::GetOffsetInBytes() {
LLDB_INSTRUMENT_VA(this);
if (m_opaque_up)
return m_opaque_up->GetBitOffset() / 8u;
return 0;
}
uint64_t SBTypeMember::GetOffsetInBits() {
LLDB_INSTRUMENT_VA(this);
if (m_opaque_up)
return m_opaque_up->GetBitOffset();
return 0;
}
bool SBTypeMember::IsBitfield() {
LLDB_INSTRUMENT_VA(this);
if (m_opaque_up)
return m_opaque_up->GetIsBitfield();
return false;
}
uint32_t SBTypeMember::GetBitfieldSizeInBits() {
LLDB_INSTRUMENT_VA(this);
if (m_opaque_up)
return m_opaque_up->GetBitfieldBitSize();
return 0;
}
bool SBTypeMember::GetDescription(lldb::SBStream &description,
lldb::DescriptionLevel description_level) {
LLDB_INSTRUMENT_VA(this, description, description_level);
Stream &strm = description.ref();
if (m_opaque_up) {
const uint32_t bit_offset = m_opaque_up->GetBitOffset();
const uint32_t byte_offset = bit_offset / 8u;
const uint32_t byte_bit_offset = bit_offset % 8u;
const char *name = m_opaque_up->GetName().GetCString();
if (byte_bit_offset)
strm.Printf("+%u + %u bits: (", byte_offset, byte_bit_offset);
else
strm.Printf("+%u: (", byte_offset);
TypeImplSP type_impl_sp(m_opaque_up->GetTypeImpl());
if (type_impl_sp)
type_impl_sp->GetDescription(strm, description_level);
strm.Printf(") %s", name);
if (m_opaque_up->GetIsBitfield()) {
const uint32_t bitfield_bit_size = m_opaque_up->GetBitfieldBitSize();
strm.Printf(" : %u", bitfield_bit_size);
}
} else {
strm.PutCString("No value");
}
return true;
}
void SBTypeMember::reset(TypeMemberImpl *type_member_impl) {
m_opaque_up.reset(type_member_impl);
}
TypeMemberImpl &SBTypeMember::ref() {
if (m_opaque_up == nullptr)
m_opaque_up = std::make_unique<TypeMemberImpl>();
return *m_opaque_up;
}
const TypeMemberImpl &SBTypeMember::ref() const { return *m_opaque_up; }
SBTypeMemberFunction::SBTypeMemberFunction() { LLDB_INSTRUMENT_VA(this); }
SBTypeMemberFunction::~SBTypeMemberFunction() = default;
SBTypeMemberFunction::SBTypeMemberFunction(const SBTypeMemberFunction &rhs)
: m_opaque_sp(rhs.m_opaque_sp) {
LLDB_INSTRUMENT_VA(this, rhs);
}
lldb::SBTypeMemberFunction &SBTypeMemberFunction::
operator=(const lldb::SBTypeMemberFunction &rhs) {
LLDB_INSTRUMENT_VA(this, rhs);
if (this != &rhs)
m_opaque_sp = rhs.m_opaque_sp;
return *this;
}
bool SBTypeMemberFunction::IsValid() const {
LLDB_INSTRUMENT_VA(this);
return this->operator bool();
}
SBTypeMemberFunction::operator bool() const {
LLDB_INSTRUMENT_VA(this);
return m_opaque_sp.get();
}
const char *SBTypeMemberFunction::GetName() {
LLDB_INSTRUMENT_VA(this);
if (m_opaque_sp)
return m_opaque_sp->GetName().GetCString();
return nullptr;
}
const char *SBTypeMemberFunction::GetDemangledName() {
LLDB_INSTRUMENT_VA(this);
if (m_opaque_sp) {
ConstString mangled_str = m_opaque_sp->GetMangledName();
if (mangled_str) {
Mangled mangled(mangled_str);
return mangled.GetDemangledName().GetCString();
}
}
return nullptr;
}
const char *SBTypeMemberFunction::GetMangledName() {
LLDB_INSTRUMENT_VA(this);
if (m_opaque_sp)
return m_opaque_sp->GetMangledName().GetCString();
return nullptr;
}
SBType SBTypeMemberFunction::GetType() {
LLDB_INSTRUMENT_VA(this);
SBType sb_type;
if (m_opaque_sp) {
sb_type.SetSP(lldb::TypeImplSP(new TypeImpl(m_opaque_sp->GetType())));
}
return sb_type;
}
lldb::SBType SBTypeMemberFunction::GetReturnType() {
LLDB_INSTRUMENT_VA(this);
SBType sb_type;
if (m_opaque_sp) {
sb_type.SetSP(lldb::TypeImplSP(new TypeImpl(m_opaque_sp->GetReturnType())));
}
return sb_type;
}
uint32_t SBTypeMemberFunction::GetNumberOfArguments() {
LLDB_INSTRUMENT_VA(this);
if (m_opaque_sp)
return m_opaque_sp->GetNumArguments();
return 0;
}
lldb::SBType SBTypeMemberFunction::GetArgumentTypeAtIndex(uint32_t i) {
LLDB_INSTRUMENT_VA(this, i);
SBType sb_type;
if (m_opaque_sp) {
sb_type.SetSP(
lldb::TypeImplSP(new TypeImpl(m_opaque_sp->GetArgumentAtIndex(i))));
}
return sb_type;
}
lldb::MemberFunctionKind SBTypeMemberFunction::GetKind() {
LLDB_INSTRUMENT_VA(this);
if (m_opaque_sp)
return m_opaque_sp->GetKind();
return lldb::eMemberFunctionKindUnknown;
}
bool SBTypeMemberFunction::GetDescription(
lldb::SBStream &description, lldb::DescriptionLevel description_level) {
LLDB_INSTRUMENT_VA(this, description, description_level);
Stream &strm = description.ref();
if (m_opaque_sp)
return m_opaque_sp->GetDescription(strm);
return false;
}
void SBTypeMemberFunction::reset(TypeMemberFunctionImpl *type_member_impl) {
m_opaque_sp.reset(type_member_impl);
}
TypeMemberFunctionImpl &SBTypeMemberFunction::ref() {
if (!m_opaque_sp)
m_opaque_sp = std::make_shared<TypeMemberFunctionImpl>();
return *m_opaque_sp.get();
}
const TypeMemberFunctionImpl &SBTypeMemberFunction::ref() const {
return *m_opaque_sp.get();
}
diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
index c6eb693bba6b..a1ebe5830bb9 100644
--- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
+++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
@@ -1,9928 +1,9971 @@
//===-- TypeSystemClang.cpp -----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "TypeSystemClang.h"
#include "llvm/Support/FormatAdapters.h"
#include "llvm/Support/FormatVariadic.h"
#include <mutex>
#include <string>
#include <vector>
#include "clang/AST/ASTContext.h"
#include "clang/AST/ASTImporter.h"
#include "clang/AST/Attr.h"
#include "clang/AST/CXXInheritance.h"
#include "clang/AST/DeclObjC.h"
#include "clang/AST/DeclTemplate.h"
#include "clang/AST/Mangle.h"
#include "clang/AST/RecordLayout.h"
#include "clang/AST/Type.h"
#include "clang/AST/VTableBuilder.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/FileSystemOptions.h"
#include "clang/Basic/LangStandard.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Basic/TargetOptions.h"
#include "clang/Frontend/FrontendOptions.h"
#include "clang/Lex/HeaderSearch.h"
#include "clang/Lex/HeaderSearchOptions.h"
#include "clang/Lex/ModuleMap.h"
#include "clang/Sema/Sema.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/Threading.h"
#include "Plugins/ExpressionParser/Clang/ClangASTImporter.h"
#include "Plugins/ExpressionParser/Clang/ClangASTMetadata.h"
#include "Plugins/ExpressionParser/Clang/ClangExternalASTSourceCallbacks.h"
#include "Plugins/ExpressionParser/Clang/ClangFunctionCaller.h"
#include "Plugins/ExpressionParser/Clang/ClangPersistentVariables.h"
#include "Plugins/ExpressionParser/Clang/ClangUserExpression.h"
#include "Plugins/ExpressionParser/Clang/ClangUtil.h"
#include "Plugins/ExpressionParser/Clang/ClangUtilityFunction.h"
#include "lldb/Core/DumpDataExtractor.h"
#include "lldb/Core/Module.h"
#include "lldb/Core/PluginManager.h"
#include "lldb/Core/StreamFile.h"
#include "lldb/Core/ThreadSafeDenseMap.h"
#include "lldb/Core/UniqueCStringMap.h"
#include "lldb/Symbol/ObjectFile.h"
#include "lldb/Symbol/SymbolFile.h"
#include "lldb/Target/ExecutionContext.h"
#include "lldb/Target/Language.h"
#include "lldb/Target/Process.h"
#include "lldb/Target/Target.h"
#include "lldb/Utility/ArchSpec.h"
#include "lldb/Utility/DataExtractor.h"
#include "lldb/Utility/Flags.h"
#include "lldb/Utility/LLDBAssert.h"
#include "lldb/Utility/LLDBLog.h"
#include "lldb/Utility/RegularExpression.h"
#include "lldb/Utility/Scalar.h"
#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
#include "Plugins/SymbolFile/DWARF/DWARFASTParserClang.h"
#include "Plugins/SymbolFile/PDB/PDBASTParser.h"
#include <cstdio>
#include <mutex>
using namespace lldb;
using namespace lldb_private;
using namespace lldb_private::dwarf;
using namespace clang;
using llvm::StringSwitch;
LLDB_PLUGIN_DEFINE(TypeSystemClang)
namespace {
static void VerifyDecl(clang::Decl *decl) {
assert(decl && "VerifyDecl called with nullptr?");
#ifndef NDEBUG
// We don't care about the actual access value here but only want to trigger
// that Clang calls its internal Decl::AccessDeclContextCheck validation.
decl->getAccess();
#endif
}
static inline bool
TypeSystemClangSupportsLanguage(lldb::LanguageType language) {
return language == eLanguageTypeUnknown || // Clang is the default type system
lldb_private::Language::LanguageIsC(language) ||
lldb_private::Language::LanguageIsCPlusPlus(language) ||
lldb_private::Language::LanguageIsObjC(language) ||
lldb_private::Language::LanguageIsPascal(language) ||
// Use Clang for Rust until there is a proper language plugin for it
language == eLanguageTypeRust ||
language == eLanguageTypeExtRenderScript ||
// Use Clang for D until there is a proper language plugin for it
language == eLanguageTypeD ||
// Open Dylan compiler debug info is designed to be Clang-compatible
language == eLanguageTypeDylan;
}
// Checks whether m1 is an overload of m2 (as opposed to an override). This is
// called by addOverridesForMethod to distinguish overrides (which share a
// vtable entry) from overloads (which require distinct entries).
bool isOverload(clang::CXXMethodDecl *m1, clang::CXXMethodDecl *m2) {
// FIXME: This should detect covariant return types, but currently doesn't.
lldbassert(&m1->getASTContext() == &m2->getASTContext() &&
"Methods should have the same AST context");
clang::ASTContext &context = m1->getASTContext();
const auto *m1Type = llvm::cast<clang::FunctionProtoType>(
context.getCanonicalType(m1->getType()));
const auto *m2Type = llvm::cast<clang::FunctionProtoType>(
context.getCanonicalType(m2->getType()));
auto compareArgTypes = [&context](const clang::QualType &m1p,
const clang::QualType &m2p) {
return context.hasSameType(m1p.getUnqualifiedType(),
m2p.getUnqualifiedType());
};
// FIXME: In C++14 and later, we can just pass m2Type->param_type_end()
// as a fourth parameter to std::equal().
return (m1->getNumParams() != m2->getNumParams()) ||
!std::equal(m1Type->param_type_begin(), m1Type->param_type_end(),
m2Type->param_type_begin(), compareArgTypes);
}
// If decl is a virtual method, walk the base classes looking for methods that
// decl overrides. This table of overridden methods is used by IRGen to
// determine the vtable layout for decl's parent class.
void addOverridesForMethod(clang::CXXMethodDecl *decl) {
if (!decl->isVirtual())
return;
clang::CXXBasePaths paths;
llvm::SmallVector<clang::NamedDecl *, 4> decls;
auto find_overridden_methods =
[&decls, decl](const clang::CXXBaseSpecifier *specifier,
clang::CXXBasePath &path) {
if (auto *base_record = llvm::dyn_cast<clang::CXXRecordDecl>(
specifier->getType()->castAs<clang::RecordType>()->getDecl())) {
clang::DeclarationName name = decl->getDeclName();
// If this is a destructor, check whether the base class destructor is
// virtual.
if (name.getNameKind() == clang::DeclarationName::CXXDestructorName)
if (auto *baseDtorDecl = base_record->getDestructor()) {
if (baseDtorDecl->isVirtual()) {
decls.push_back(baseDtorDecl);
return true;
} else
return false;
}
// Otherwise, search for name in the base class.
for (path.Decls = base_record->lookup(name).begin();
path.Decls != path.Decls.end(); ++path.Decls) {
if (auto *method_decl =
llvm::dyn_cast<clang::CXXMethodDecl>(*path.Decls))
if (method_decl->isVirtual() && !isOverload(decl, method_decl)) {
decls.push_back(method_decl);
return true;
}
}
}
return false;
};
if (decl->getParent()->lookupInBases(find_overridden_methods, paths)) {
for (auto *overridden_decl : decls)
decl->addOverriddenMethod(
llvm::cast<clang::CXXMethodDecl>(overridden_decl));
}
}
}
static lldb::addr_t GetVTableAddress(Process &process,
VTableContextBase &vtable_ctx,
ValueObject &valobj,
const ASTRecordLayout &record_layout) {
// Retrieve type info
CompilerType pointee_type;
CompilerType this_type(valobj.GetCompilerType());
uint32_t type_info = this_type.GetTypeInfo(&pointee_type);
if (!type_info)
return LLDB_INVALID_ADDRESS;
// Check if it's a pointer or reference
bool ptr_or_ref = false;
if (type_info & (eTypeIsPointer | eTypeIsReference)) {
ptr_or_ref = true;
type_info = pointee_type.GetTypeInfo();
}
// We process only C++ classes
const uint32_t cpp_class = eTypeIsClass | eTypeIsCPlusPlus;
if ((type_info & cpp_class) != cpp_class)
return LLDB_INVALID_ADDRESS;
// Calculate offset to VTable pointer
lldb::offset_t vbtable_ptr_offset =
vtable_ctx.isMicrosoft() ? record_layout.getVBPtrOffset().getQuantity()
: 0;
if (ptr_or_ref) {
// We have a pointer / ref to object, so read
// VTable pointer from process memory
if (valobj.GetAddressTypeOfChildren() != eAddressTypeLoad)
return LLDB_INVALID_ADDRESS;
auto vbtable_ptr_addr = valobj.GetValueAsUnsigned(LLDB_INVALID_ADDRESS);
if (vbtable_ptr_addr == LLDB_INVALID_ADDRESS)
return LLDB_INVALID_ADDRESS;
vbtable_ptr_addr += vbtable_ptr_offset;
Status err;
return process.ReadPointerFromMemory(vbtable_ptr_addr, err);
}
// We have an object already read from process memory,
// so just extract VTable pointer from it
DataExtractor data;
Status err;
auto size = valobj.GetData(data, err);
if (err.Fail() || vbtable_ptr_offset + data.GetAddressByteSize() > size)
return LLDB_INVALID_ADDRESS;
return data.GetAddress(&vbtable_ptr_offset);
}
static int64_t ReadVBaseOffsetFromVTable(Process &process,
VTableContextBase &vtable_ctx,
lldb::addr_t vtable_ptr,
const CXXRecordDecl *cxx_record_decl,
const CXXRecordDecl *base_class_decl) {
if (vtable_ctx.isMicrosoft()) {
clang::MicrosoftVTableContext &msoft_vtable_ctx =
static_cast<clang::MicrosoftVTableContext &>(vtable_ctx);
// Get the index into the virtual base table. The
// index is the index in uint32_t from vbtable_ptr
const unsigned vbtable_index =
msoft_vtable_ctx.getVBTableIndex(cxx_record_decl, base_class_decl);
const lldb::addr_t base_offset_addr = vtable_ptr + vbtable_index * 4;
Status err;
return process.ReadSignedIntegerFromMemory(base_offset_addr, 4, INT64_MAX,
err);
}
clang::ItaniumVTableContext &itanium_vtable_ctx =
static_cast<clang::ItaniumVTableContext &>(vtable_ctx);
clang::CharUnits base_offset_offset =
itanium_vtable_ctx.getVirtualBaseOffsetOffset(cxx_record_decl,
base_class_decl);
const lldb::addr_t base_offset_addr =
vtable_ptr + base_offset_offset.getQuantity();
const uint32_t base_offset_size = process.GetAddressByteSize();
Status err;
return process.ReadSignedIntegerFromMemory(base_offset_addr, base_offset_size,
INT64_MAX, err);
}
static bool GetVBaseBitOffset(VTableContextBase &vtable_ctx,
ValueObject &valobj,
const ASTRecordLayout &record_layout,
const CXXRecordDecl *cxx_record_decl,
const CXXRecordDecl *base_class_decl,
int32_t &bit_offset) {
ExecutionContext exe_ctx(valobj.GetExecutionContextRef());
Process *process = exe_ctx.GetProcessPtr();
if (!process)
return false;
lldb::addr_t vtable_ptr =
GetVTableAddress(*process, vtable_ctx, valobj, record_layout);
if (vtable_ptr == LLDB_INVALID_ADDRESS)
return false;
auto base_offset = ReadVBaseOffsetFromVTable(
*process, vtable_ctx, vtable_ptr, cxx_record_decl, base_class_decl);
if (base_offset == INT64_MAX)
return false;
bit_offset = base_offset * 8;
return true;
}
typedef lldb_private::ThreadSafeDenseMap<clang::ASTContext *, TypeSystemClang *>
ClangASTMap;
static ClangASTMap &GetASTMap() {
static ClangASTMap *g_map_ptr = nullptr;
static llvm::once_flag g_once_flag;
llvm::call_once(g_once_flag, []() {
g_map_ptr = new ClangASTMap(); // leaked on purpose to avoid spins
});
return *g_map_ptr;
}
TypePayloadClang::TypePayloadClang(OptionalClangModuleID owning_module,
bool is_complete_objc_class)
: m_payload(owning_module.GetValue()) {
SetIsCompleteObjCClass(is_complete_objc_class);
}
void TypePayloadClang::SetOwningModule(OptionalClangModuleID id) {
assert(id.GetValue() < ObjCClassBit);
bool is_complete = IsCompleteObjCClass();
m_payload = id.GetValue();
SetIsCompleteObjCClass(is_complete);
}
static void SetMemberOwningModule(clang::Decl *member,
const clang::Decl *parent) {
if (!member || !parent)
return;
OptionalClangModuleID id(parent->getOwningModuleID());
if (!id.HasValue())
return;
member->setFromASTFile();
member->setOwningModuleID(id.GetValue());
member->setModuleOwnershipKind(clang::Decl::ModuleOwnershipKind::Visible);
if (llvm::isa<clang::NamedDecl>(member))
if (auto *dc = llvm::dyn_cast<clang::DeclContext>(parent)) {
dc->setHasExternalVisibleStorage(true);
// This triggers ExternalASTSource::FindExternalVisibleDeclsByName() to be
// called when searching for members.
dc->setHasExternalLexicalStorage(true);
}
}
char TypeSystemClang::ID;
bool TypeSystemClang::IsOperator(llvm::StringRef name,
clang::OverloadedOperatorKind &op_kind) {
// All operators have to start with "operator".
if (!name.consume_front("operator"))
return false;
// Remember if there was a space after "operator". This is necessary to
// check for collisions with strangely named functions like "operatorint()".
bool space_after_operator = name.consume_front(" ");
op_kind = StringSwitch<clang::OverloadedOperatorKind>(name)
.Case("+", clang::OO_Plus)
.Case("+=", clang::OO_PlusEqual)
.Case("++", clang::OO_PlusPlus)
.Case("-", clang::OO_Minus)
.Case("-=", clang::OO_MinusEqual)
.Case("--", clang::OO_MinusMinus)
.Case("->", clang::OO_Arrow)
.Case("->*", clang::OO_ArrowStar)
.Case("*", clang::OO_Star)
.Case("*=", clang::OO_StarEqual)
.Case("/", clang::OO_Slash)
.Case("/=", clang::OO_SlashEqual)
.Case("%", clang::OO_Percent)
.Case("%=", clang::OO_PercentEqual)
.Case("^", clang::OO_Caret)
.Case("^=", clang::OO_CaretEqual)
.Case("&", clang::OO_Amp)
.Case("&=", clang::OO_AmpEqual)
.Case("&&", clang::OO_AmpAmp)
.Case("|", clang::OO_Pipe)
.Case("|=", clang::OO_PipeEqual)
.Case("||", clang::OO_PipePipe)
.Case("~", clang::OO_Tilde)
.Case("!", clang::OO_Exclaim)
.Case("!=", clang::OO_ExclaimEqual)
.Case("=", clang::OO_Equal)
.Case("==", clang::OO_EqualEqual)
.Case("<", clang::OO_Less)
.Case("<<", clang::OO_LessLess)
.Case("<<=", clang::OO_LessLessEqual)
.Case("<=", clang::OO_LessEqual)
.Case(">", clang::OO_Greater)
.Case(">>", clang::OO_GreaterGreater)
.Case(">>=", clang::OO_GreaterGreaterEqual)
.Case(">=", clang::OO_GreaterEqual)
.Case("()", clang::OO_Call)
.Case("[]", clang::OO_Subscript)
.Case(",", clang::OO_Comma)
.Default(clang::NUM_OVERLOADED_OPERATORS);
// We found a fitting operator, so we can exit now.
if (op_kind != clang::NUM_OVERLOADED_OPERATORS)
return true;
// After the "operator " or "operator" part is something unknown. This means
// it's either one of the named operators (new/delete), a conversion operator
// (e.g. operator bool) or a function which name starts with "operator"
// (e.g. void operatorbool).
// If it's a function that starts with operator it can't have a space after
// "operator" because identifiers can't contain spaces.
// E.g. "operator int" (conversion operator)
// vs. "operatorint" (function with colliding name).
if (!space_after_operator)
return false; // not an operator.
// Now the operator is either one of the named operators or a conversion
// operator.
op_kind = StringSwitch<clang::OverloadedOperatorKind>(name)
.Case("new", clang::OO_New)
.Case("new[]", clang::OO_Array_New)
.Case("delete", clang::OO_Delete)
.Case("delete[]", clang::OO_Array_Delete)
// conversion operators hit this case.
.Default(clang::NUM_OVERLOADED_OPERATORS);
return true;
}
clang::AccessSpecifier
TypeSystemClang::ConvertAccessTypeToAccessSpecifier(AccessType access) {
switch (access) {
default:
break;
case eAccessNone:
return AS_none;
case eAccessPublic:
return AS_public;
case eAccessPrivate:
return AS_private;
case eAccessProtected:
return AS_protected;
}
return AS_none;
}
static void ParseLangArgs(LangOptions &Opts, InputKind IK, const char *triple) {
// FIXME: Cleanup per-file based stuff.
// Set some properties which depend solely on the input kind; it would be
// nice to move these to the language standard, and have the driver resolve
// the input kind + language standard.
if (IK.getLanguage() == clang::Language::Asm) {
Opts.AsmPreprocessor = 1;
} else if (IK.isObjectiveC()) {
Opts.ObjC = 1;
}
LangStandard::Kind LangStd = LangStandard::lang_unspecified;
if (LangStd == LangStandard::lang_unspecified) {
// Based on the base language, pick one.
switch (IK.getLanguage()) {
case clang::Language::Unknown:
case clang::Language::LLVM_IR:
case clang::Language::RenderScript:
llvm_unreachable("Invalid input kind!");
case clang::Language::OpenCL:
LangStd = LangStandard::lang_opencl10;
break;
case clang::Language::OpenCLCXX:
LangStd = LangStandard::lang_openclcpp10;
break;
case clang::Language::CUDA:
LangStd = LangStandard::lang_cuda;
break;
case clang::Language::Asm:
case clang::Language::C:
case clang::Language::ObjC:
LangStd = LangStandard::lang_gnu99;
break;
case clang::Language::CXX:
case clang::Language::ObjCXX:
LangStd = LangStandard::lang_gnucxx98;
break;
case clang::Language::HIP:
LangStd = LangStandard::lang_hip;
break;
case clang::Language::HLSL:
LangStd = LangStandard::lang_hlsl;
break;
}
}
const LangStandard &Std = LangStandard::getLangStandardForKind(LangStd);
Opts.LineComment = Std.hasLineComments();
Opts.C99 = Std.isC99();
Opts.CPlusPlus = Std.isCPlusPlus();
Opts.CPlusPlus11 = Std.isCPlusPlus11();
Opts.Digraphs = Std.hasDigraphs();
Opts.GNUMode = Std.isGNUMode();
Opts.GNUInline = !Std.isC99();
Opts.HexFloats = Std.hasHexFloats();
Opts.WChar = true;
// OpenCL has some additional defaults.
if (LangStd == LangStandard::lang_opencl10) {
Opts.OpenCL = 1;
Opts.AltiVec = 1;
Opts.CXXOperatorNames = 1;
Opts.setLaxVectorConversions(LangOptions::LaxVectorConversionKind::All);
}
// OpenCL and C++ both have bool, true, false keywords.
Opts.Bool = Opts.OpenCL || Opts.CPlusPlus;
Opts.setValueVisibilityMode(DefaultVisibility);
// Mimicing gcc's behavior, trigraphs are only enabled if -trigraphs is
// specified, or -std is set to a conforming mode.
Opts.Trigraphs = !Opts.GNUMode;
Opts.CharIsSigned = ArchSpec(triple).CharIsSignedByDefault();
Opts.OptimizeSize = 0;
// FIXME: Eliminate this dependency.
// unsigned Opt =
// Args.hasArg(OPT_Os) ? 2 : getLastArgIntValue(Args, OPT_O, 0, Diags);
// Opts.Optimize = Opt != 0;
unsigned Opt = 0;
// This is the __NO_INLINE__ define, which just depends on things like the
// optimization level and -fno-inline, not actually whether the backend has
// inlining enabled.
//
// FIXME: This is affected by other options (-fno-inline).
Opts.NoInlineDefine = !Opt;
// This is needed to allocate the extra space for the owning module
// on each decl.
Opts.ModulesLocalVisibility = 1;
}
TypeSystemClang::TypeSystemClang(llvm::StringRef name,
llvm::Triple target_triple) {
m_display_name = name.str();
if (!target_triple.str().empty())
SetTargetTriple(target_triple.str());
// The caller didn't pass an ASTContext so create a new one for this
// TypeSystemClang.
CreateASTContext();
}
TypeSystemClang::TypeSystemClang(llvm::StringRef name,
ASTContext &existing_ctxt) {
m_display_name = name.str();
SetTargetTriple(existing_ctxt.getTargetInfo().getTriple().str());
m_ast_up.reset(&existing_ctxt);
GetASTMap().Insert(&existing_ctxt, this);
}
// Destructor
TypeSystemClang::~TypeSystemClang() { Finalize(); }
lldb::TypeSystemSP TypeSystemClang::CreateInstance(lldb::LanguageType language,
lldb_private::Module *module,
Target *target) {
if (!TypeSystemClangSupportsLanguage(language))
return lldb::TypeSystemSP();
ArchSpec arch;
if (module)
arch = module->GetArchitecture();
else if (target)
arch = target->GetArchitecture();
if (!arch.IsValid())
return lldb::TypeSystemSP();
llvm::Triple triple = arch.GetTriple();
// LLVM wants this to be set to iOS or MacOSX; if we're working on
// a bare-boards type image, change the triple for llvm's benefit.
if (triple.getVendor() == llvm::Triple::Apple &&
triple.getOS() == llvm::Triple::UnknownOS) {
if (triple.getArch() == llvm::Triple::arm ||
triple.getArch() == llvm::Triple::aarch64 ||
triple.getArch() == llvm::Triple::aarch64_32 ||
triple.getArch() == llvm::Triple::thumb) {
triple.setOS(llvm::Triple::IOS);
} else {
triple.setOS(llvm::Triple::MacOSX);
}
}
if (module) {
std::string ast_name =
"ASTContext for '" + module->GetFileSpec().GetPath() + "'";
return std::make_shared<TypeSystemClang>(ast_name, triple);
} else if (target && target->IsValid())
return std::make_shared<ScratchTypeSystemClang>(*target, triple);
return lldb::TypeSystemSP();
}
LanguageSet TypeSystemClang::GetSupportedLanguagesForTypes() {
LanguageSet languages;
languages.Insert(lldb::eLanguageTypeC89);
languages.Insert(lldb::eLanguageTypeC);
languages.Insert(lldb::eLanguageTypeC11);
languages.Insert(lldb::eLanguageTypeC_plus_plus);
languages.Insert(lldb::eLanguageTypeC99);
languages.Insert(lldb::eLanguageTypeObjC);
languages.Insert(lldb::eLanguageTypeObjC_plus_plus);
languages.Insert(lldb::eLanguageTypeC_plus_plus_03);
languages.Insert(lldb::eLanguageTypeC_plus_plus_11);
languages.Insert(lldb::eLanguageTypeC11);
languages.Insert(lldb::eLanguageTypeC_plus_plus_14);
return languages;
}
LanguageSet TypeSystemClang::GetSupportedLanguagesForExpressions() {
LanguageSet languages;
languages.Insert(lldb::eLanguageTypeC_plus_plus);
languages.Insert(lldb::eLanguageTypeObjC_plus_plus);
languages.Insert(lldb::eLanguageTypeC_plus_plus_03);
languages.Insert(lldb::eLanguageTypeC_plus_plus_11);
languages.Insert(lldb::eLanguageTypeC_plus_plus_14);
return languages;
}
void TypeSystemClang::Initialize() {
PluginManager::RegisterPlugin(
GetPluginNameStatic(), "clang base AST context plug-in", CreateInstance,
GetSupportedLanguagesForTypes(), GetSupportedLanguagesForExpressions());
}
void TypeSystemClang::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
void TypeSystemClang::Finalize() {
assert(m_ast_up);
GetASTMap().Erase(m_ast_up.get());
if (!m_ast_owned)
m_ast_up.release();
m_builtins_up.reset();
m_selector_table_up.reset();
m_identifier_table_up.reset();
m_target_info_up.reset();
m_target_options_rp.reset();
m_diagnostics_engine_up.reset();
m_source_manager_up.reset();
m_language_options_up.reset();
}
void TypeSystemClang::setSema(Sema *s) {
// Ensure that the new sema actually belongs to our ASTContext.
assert(s == nullptr || &s->getASTContext() == m_ast_up.get());
m_sema = s;
}
const char *TypeSystemClang::GetTargetTriple() {
return m_target_triple.c_str();
}
void TypeSystemClang::SetTargetTriple(llvm::StringRef target_triple) {
m_target_triple = target_triple.str();
}
void TypeSystemClang::SetExternalSource(
llvm::IntrusiveRefCntPtr<ExternalASTSource> &ast_source_up) {
ASTContext &ast = getASTContext();
ast.getTranslationUnitDecl()->setHasExternalLexicalStorage(true);
ast.setExternalSource(ast_source_up);
}
ASTContext &TypeSystemClang::getASTContext() {
assert(m_ast_up);
return *m_ast_up;
}
class NullDiagnosticConsumer : public DiagnosticConsumer {
public:
NullDiagnosticConsumer() { m_log = GetLog(LLDBLog::Expressions); }
void HandleDiagnostic(DiagnosticsEngine::Level DiagLevel,
const clang::Diagnostic &info) override {
if (m_log) {
llvm::SmallVector<char, 32> diag_str(10);
info.FormatDiagnostic(diag_str);
diag_str.push_back('\0');
LLDB_LOGF(m_log, "Compiler diagnostic: %s\n", diag_str.data());
}
}
DiagnosticConsumer *clone(DiagnosticsEngine &Diags) const {
return new NullDiagnosticConsumer();
}
private:
Log *m_log;
};
void TypeSystemClang::CreateASTContext() {
assert(!m_ast_up);
m_ast_owned = true;
m_language_options_up = std::make_unique<LangOptions>();
ParseLangArgs(*m_language_options_up, clang::Language::ObjCXX,
GetTargetTriple());
m_identifier_table_up =
std::make_unique<IdentifierTable>(*m_language_options_up, nullptr);
m_builtins_up = std::make_unique<Builtin::Context>();
m_selector_table_up = std::make_unique<SelectorTable>();
clang::FileSystemOptions file_system_options;
m_file_manager_up = std::make_unique<clang::FileManager>(
file_system_options, FileSystem::Instance().GetVirtualFileSystem());
llvm::IntrusiveRefCntPtr<DiagnosticIDs> diag_id_sp(new DiagnosticIDs());
m_diagnostics_engine_up =
std::make_unique<DiagnosticsEngine>(diag_id_sp, new DiagnosticOptions());
m_source_manager_up = std::make_unique<clang::SourceManager>(
*m_diagnostics_engine_up, *m_file_manager_up);
m_ast_up = std::make_unique<ASTContext>(
*m_language_options_up, *m_source_manager_up, *m_identifier_table_up,
*m_selector_table_up, *m_builtins_up, TU_Complete);
m_diagnostic_consumer_up = std::make_unique<NullDiagnosticConsumer>();
m_ast_up->getDiagnostics().setClient(m_diagnostic_consumer_up.get(), false);
// This can be NULL if we don't know anything about the architecture or if
// the target for an architecture isn't enabled in the llvm/clang that we
// built
TargetInfo *target_info = getTargetInfo();
if (target_info)
m_ast_up->InitBuiltinTypes(*target_info);
GetASTMap().Insert(m_ast_up.get(), this);
llvm::IntrusiveRefCntPtr<clang::ExternalASTSource> ast_source_up(
new ClangExternalASTSourceCallbacks(*this));
SetExternalSource(ast_source_up);
}
TypeSystemClang *TypeSystemClang::GetASTContext(clang::ASTContext *ast) {
TypeSystemClang *clang_ast = GetASTMap().Lookup(ast);
return clang_ast;
}
clang::MangleContext *TypeSystemClang::getMangleContext() {
if (m_mangle_ctx_up == nullptr)
m_mangle_ctx_up.reset(getASTContext().createMangleContext());
return m_mangle_ctx_up.get();
}
std::shared_ptr<clang::TargetOptions> &TypeSystemClang::getTargetOptions() {
if (m_target_options_rp == nullptr && !m_target_triple.empty()) {
m_target_options_rp = std::make_shared<clang::TargetOptions>();
if (m_target_options_rp != nullptr)
m_target_options_rp->Triple = m_target_triple;
}
return m_target_options_rp;
}
TargetInfo *TypeSystemClang::getTargetInfo() {
// target_triple should be something like "x86_64-apple-macosx"
if (m_target_info_up == nullptr && !m_target_triple.empty())
m_target_info_up.reset(TargetInfo::CreateTargetInfo(
getASTContext().getDiagnostics(), getTargetOptions()));
return m_target_info_up.get();
}
#pragma mark Basic Types
static inline bool QualTypeMatchesBitSize(const uint64_t bit_size,
ASTContext &ast, QualType qual_type) {
uint64_t qual_type_bit_size = ast.getTypeSize(qual_type);
return qual_type_bit_size == bit_size;
}
CompilerType
TypeSystemClang::GetBuiltinTypeForEncodingAndBitSize(Encoding encoding,
size_t bit_size) {
ASTContext &ast = getASTContext();
switch (encoding) {
case eEncodingInvalid:
if (QualTypeMatchesBitSize(bit_size, ast, ast.VoidPtrTy))
return GetType(ast.VoidPtrTy);
break;
case eEncodingUint:
if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedCharTy))
return GetType(ast.UnsignedCharTy);
if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedShortTy))
return GetType(ast.UnsignedShortTy);
if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedIntTy))
return GetType(ast.UnsignedIntTy);
if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedLongTy))
return GetType(ast.UnsignedLongTy);
if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedLongLongTy))
return GetType(ast.UnsignedLongLongTy);
if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedInt128Ty))
return GetType(ast.UnsignedInt128Ty);
break;
case eEncodingSint:
if (QualTypeMatchesBitSize(bit_size, ast, ast.SignedCharTy))
return GetType(ast.SignedCharTy);
if (QualTypeMatchesBitSize(bit_size, ast, ast.ShortTy))
return GetType(ast.ShortTy);
if (QualTypeMatchesBitSize(bit_size, ast, ast.IntTy))
return GetType(ast.IntTy);
if (QualTypeMatchesBitSize(bit_size, ast, ast.LongTy))
return GetType(ast.LongTy);
if (QualTypeMatchesBitSize(bit_size, ast, ast.LongLongTy))
return GetType(ast.LongLongTy);
if (QualTypeMatchesBitSize(bit_size, ast, ast.Int128Ty))
return GetType(ast.Int128Ty);
break;
case eEncodingIEEE754:
if (QualTypeMatchesBitSize(bit_size, ast, ast.FloatTy))
return GetType(ast.FloatTy);
if (QualTypeMatchesBitSize(bit_size, ast, ast.DoubleTy))
return GetType(ast.DoubleTy);
if (QualTypeMatchesBitSize(bit_size, ast, ast.LongDoubleTy))
return GetType(ast.LongDoubleTy);
if (QualTypeMatchesBitSize(bit_size, ast, ast.HalfTy))
return GetType(ast.HalfTy);
break;
case eEncodingVector:
// Sanity check that bit_size is a multiple of 8's.
if (bit_size && !(bit_size & 0x7u))
return GetType(ast.getExtVectorType(ast.UnsignedCharTy, bit_size / 8));
break;
}
return CompilerType();
}
lldb::BasicType
TypeSystemClang::GetBasicTypeEnumeration(ConstString name) {
if (name) {
typedef UniqueCStringMap<lldb::BasicType> TypeNameToBasicTypeMap;
static TypeNameToBasicTypeMap g_type_map;
static llvm::once_flag g_once_flag;
llvm::call_once(g_once_flag, []() {
// "void"
g_type_map.Append(ConstString("void"), eBasicTypeVoid);
// "char"
g_type_map.Append(ConstString("char"), eBasicTypeChar);
g_type_map.Append(ConstString("signed char"), eBasicTypeSignedChar);
g_type_map.Append(ConstString("unsigned char"), eBasicTypeUnsignedChar);
g_type_map.Append(ConstString("wchar_t"), eBasicTypeWChar);
g_type_map.Append(ConstString("signed wchar_t"), eBasicTypeSignedWChar);
g_type_map.Append(ConstString("unsigned wchar_t"),
eBasicTypeUnsignedWChar);
// "short"
g_type_map.Append(ConstString("short"), eBasicTypeShort);
g_type_map.Append(ConstString("short int"), eBasicTypeShort);
g_type_map.Append(ConstString("unsigned short"), eBasicTypeUnsignedShort);
g_type_map.Append(ConstString("unsigned short int"),
eBasicTypeUnsignedShort);
// "int"
g_type_map.Append(ConstString("int"), eBasicTypeInt);
g_type_map.Append(ConstString("signed int"), eBasicTypeInt);
g_type_map.Append(ConstString("unsigned int"), eBasicTypeUnsignedInt);
g_type_map.Append(ConstString("unsigned"), eBasicTypeUnsignedInt);
// "long"
g_type_map.Append(ConstString("long"), eBasicTypeLong);
g_type_map.Append(ConstString("long int"), eBasicTypeLong);
g_type_map.Append(ConstString("unsigned long"), eBasicTypeUnsignedLong);
g_type_map.Append(ConstString("unsigned long int"),
eBasicTypeUnsignedLong);
// "long long"
g_type_map.Append(ConstString("long long"), eBasicTypeLongLong);
g_type_map.Append(ConstString("long long int"), eBasicTypeLongLong);
g_type_map.Append(ConstString("unsigned long long"),
eBasicTypeUnsignedLongLong);
g_type_map.Append(ConstString("unsigned long long int"),
eBasicTypeUnsignedLongLong);
// "int128"
g_type_map.Append(ConstString("__int128_t"), eBasicTypeInt128);
g_type_map.Append(ConstString("__uint128_t"), eBasicTypeUnsignedInt128);
// Miscellaneous
g_type_map.Append(ConstString("bool"), eBasicTypeBool);
g_type_map.Append(ConstString("float"), eBasicTypeFloat);
g_type_map.Append(ConstString("double"), eBasicTypeDouble);
g_type_map.Append(ConstString("long double"), eBasicTypeLongDouble);
g_type_map.Append(ConstString("id"), eBasicTypeObjCID);
g_type_map.Append(ConstString("SEL"), eBasicTypeObjCSel);
g_type_map.Append(ConstString("nullptr"), eBasicTypeNullPtr);
g_type_map.Sort();
});
return g_type_map.Find(name, eBasicTypeInvalid);
}
return eBasicTypeInvalid;
}
uint32_t TypeSystemClang::GetPointerByteSize() {
if (m_pointer_byte_size == 0)
if (auto size = GetBasicType(lldb::eBasicTypeVoid)
.GetPointerType()
.GetByteSize(nullptr))
m_pointer_byte_size = *size;
return m_pointer_byte_size;
}
CompilerType TypeSystemClang::GetBasicType(lldb::BasicType basic_type) {
clang::ASTContext &ast = getASTContext();
lldb::opaque_compiler_type_t clang_type =
GetOpaqueCompilerType(&ast, basic_type);
if (clang_type)
return CompilerType(this, clang_type);
return CompilerType();
}
CompilerType TypeSystemClang::GetBuiltinTypeForDWARFEncodingAndBitSize(
llvm::StringRef type_name, uint32_t dw_ate, uint32_t bit_size) {
ASTContext &ast = getASTContext();
switch (dw_ate) {
default:
break;
case DW_ATE_address:
if (QualTypeMatchesBitSize(bit_size, ast, ast.VoidPtrTy))
return GetType(ast.VoidPtrTy);
break;
case DW_ATE_boolean:
if (QualTypeMatchesBitSize(bit_size, ast, ast.BoolTy))
return GetType(ast.BoolTy);
if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedCharTy))
return GetType(ast.UnsignedCharTy);
if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedShortTy))
return GetType(ast.UnsignedShortTy);
if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedIntTy))
return GetType(ast.UnsignedIntTy);
break;
case DW_ATE_lo_user:
// This has been seen to mean DW_AT_complex_integer
if (type_name.contains("complex")) {
CompilerType complex_int_clang_type =
GetBuiltinTypeForDWARFEncodingAndBitSize("int", DW_ATE_signed,
bit_size / 2);
return GetType(
ast.getComplexType(ClangUtil::GetQualType(complex_int_clang_type)));
}
break;
case DW_ATE_complex_float: {
CanQualType FloatComplexTy = ast.getComplexType(ast.FloatTy);
if (QualTypeMatchesBitSize(bit_size, ast, FloatComplexTy))
return GetType(FloatComplexTy);
CanQualType DoubleComplexTy = ast.getComplexType(ast.DoubleTy);
if (QualTypeMatchesBitSize(bit_size, ast, DoubleComplexTy))
return GetType(DoubleComplexTy);
CanQualType LongDoubleComplexTy = ast.getComplexType(ast.LongDoubleTy);
if (QualTypeMatchesBitSize(bit_size, ast, LongDoubleComplexTy))
return GetType(LongDoubleComplexTy);
CompilerType complex_float_clang_type =
GetBuiltinTypeForDWARFEncodingAndBitSize("float", DW_ATE_float,
bit_size / 2);
return GetType(
ast.getComplexType(ClangUtil::GetQualType(complex_float_clang_type)));
}
case DW_ATE_float:
if (type_name == "float" &&
QualTypeMatchesBitSize(bit_size, ast, ast.FloatTy))
return GetType(ast.FloatTy);
if (type_name == "double" &&
QualTypeMatchesBitSize(bit_size, ast, ast.DoubleTy))
return GetType(ast.DoubleTy);
if (type_name == "long double" &&
QualTypeMatchesBitSize(bit_size, ast, ast.LongDoubleTy))
return GetType(ast.LongDoubleTy);
// Fall back to not requiring a name match
if (QualTypeMatchesBitSize(bit_size, ast, ast.FloatTy))
return GetType(ast.FloatTy);
if (QualTypeMatchesBitSize(bit_size, ast, ast.DoubleTy))
return GetType(ast.DoubleTy);
if (QualTypeMatchesBitSize(bit_size, ast, ast.LongDoubleTy))
return GetType(ast.LongDoubleTy);
if (QualTypeMatchesBitSize(bit_size, ast, ast.HalfTy))
return GetType(ast.HalfTy);
break;
case DW_ATE_signed:
if (!type_name.empty()) {
if (type_name == "wchar_t" &&
QualTypeMatchesBitSize(bit_size, ast, ast.WCharTy) &&
(getTargetInfo() &&
TargetInfo::isTypeSigned(getTargetInfo()->getWCharType())))
return GetType(ast.WCharTy);
if (type_name == "void" &&
QualTypeMatchesBitSize(bit_size, ast, ast.VoidTy))
return GetType(ast.VoidTy);
if (type_name.contains("long long") &&
QualTypeMatchesBitSize(bit_size, ast, ast.LongLongTy))
return GetType(ast.LongLongTy);
if (type_name.contains("long") &&
QualTypeMatchesBitSize(bit_size, ast, ast.LongTy))
return GetType(ast.LongTy);
if (type_name.contains("short") &&
QualTypeMatchesBitSize(bit_size, ast, ast.ShortTy))
return GetType(ast.ShortTy);
if (type_name.contains("char")) {
if (QualTypeMatchesBitSize(bit_size, ast, ast.CharTy))
return GetType(ast.CharTy);
if (QualTypeMatchesBitSize(bit_size, ast, ast.SignedCharTy))
return GetType(ast.SignedCharTy);
}
if (type_name.contains("int")) {
if (QualTypeMatchesBitSize(bit_size, ast, ast.IntTy))
return GetType(ast.IntTy);
if (QualTypeMatchesBitSize(bit_size, ast, ast.Int128Ty))
return GetType(ast.Int128Ty);
}
}
// We weren't able to match up a type name, just search by size
if (QualTypeMatchesBitSize(bit_size, ast, ast.CharTy))
return GetType(ast.CharTy);
if (QualTypeMatchesBitSize(bit_size, ast, ast.ShortTy))
return GetType(ast.ShortTy);
if (QualTypeMatchesBitSize(bit_size, ast, ast.IntTy))
return GetType(ast.IntTy);
if (QualTypeMatchesBitSize(bit_size, ast, ast.LongTy))
return GetType(ast.LongTy);
if (QualTypeMatchesBitSize(bit_size, ast, ast.LongLongTy))
return GetType(ast.LongLongTy);
if (QualTypeMatchesBitSize(bit_size, ast, ast.Int128Ty))
return GetType(ast.Int128Ty);
break;
case DW_ATE_signed_char:
if (ast.getLangOpts().CharIsSigned && type_name == "char") {
if (QualTypeMatchesBitSize(bit_size, ast, ast.CharTy))
return GetType(ast.CharTy);
}
if (QualTypeMatchesBitSize(bit_size, ast, ast.SignedCharTy))
return GetType(ast.SignedCharTy);
break;
case DW_ATE_unsigned:
if (!type_name.empty()) {
if (type_name == "wchar_t") {
if (QualTypeMatchesBitSize(bit_size, ast, ast.WCharTy)) {
if (!(getTargetInfo() &&
TargetInfo::isTypeSigned(getTargetInfo()->getWCharType())))
return GetType(ast.WCharTy);
}
}
if (type_name.contains("long long")) {
if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedLongLongTy))
return GetType(ast.UnsignedLongLongTy);
} else if (type_name.contains("long")) {
if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedLongTy))
return GetType(ast.UnsignedLongTy);
} else if (type_name.contains("short")) {
if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedShortTy))
return GetType(ast.UnsignedShortTy);
} else if (type_name.contains("char")) {
if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedCharTy))
return GetType(ast.UnsignedCharTy);
} else if (type_name.contains("int")) {
if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedIntTy))
return GetType(ast.UnsignedIntTy);
if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedInt128Ty))
return GetType(ast.UnsignedInt128Ty);
}
}
// We weren't able to match up a type name, just search by size
if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedCharTy))
return GetType(ast.UnsignedCharTy);
if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedShortTy))
return GetType(ast.UnsignedShortTy);
if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedIntTy))
return GetType(ast.UnsignedIntTy);
if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedLongTy))
return GetType(ast.UnsignedLongTy);
if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedLongLongTy))
return GetType(ast.UnsignedLongLongTy);
if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedInt128Ty))
return GetType(ast.UnsignedInt128Ty);
break;
case DW_ATE_unsigned_char:
if (!ast.getLangOpts().CharIsSigned && type_name == "char") {
if (QualTypeMatchesBitSize(bit_size, ast, ast.CharTy))
return GetType(ast.CharTy);
}
if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedCharTy))
return GetType(ast.UnsignedCharTy);
if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedShortTy))
return GetType(ast.UnsignedShortTy);
break;
case DW_ATE_imaginary_float:
break;
case DW_ATE_UTF:
switch (bit_size) {
case 8:
return GetType(ast.Char8Ty);
case 16:
return GetType(ast.Char16Ty);
case 32:
return GetType(ast.Char32Ty);
default:
if (!type_name.empty()) {
if (type_name == "char16_t")
return GetType(ast.Char16Ty);
if (type_name == "char32_t")
return GetType(ast.Char32Ty);
if (type_name == "char8_t")
return GetType(ast.Char8Ty);
}
}
break;
}
Log *log = GetLog(LLDBLog::Types);
LLDB_LOG(log,
"error: need to add support for DW_TAG_base_type '{0}' "
"encoded with DW_ATE = {1:x}, bit_size = {2}",
type_name, dw_ate, bit_size);
return CompilerType();
}
CompilerType TypeSystemClang::GetCStringType(bool is_const) {
ASTContext &ast = getASTContext();
QualType char_type(ast.CharTy);
if (is_const)
char_type.addConst();
return GetType(ast.getPointerType(char_type));
}
bool TypeSystemClang::AreTypesSame(CompilerType type1, CompilerType type2,
bool ignore_qualifiers) {
TypeSystemClang *ast =
llvm::dyn_cast_or_null<TypeSystemClang>(type1.GetTypeSystem());
if (!ast || ast != type2.GetTypeSystem())
return false;
if (type1.GetOpaqueQualType() == type2.GetOpaqueQualType())
return true;
QualType type1_qual = ClangUtil::GetQualType(type1);
QualType type2_qual = ClangUtil::GetQualType(type2);
if (ignore_qualifiers) {
type1_qual = type1_qual.getUnqualifiedType();
type2_qual = type2_qual.getUnqualifiedType();
}
return ast->getASTContext().hasSameType(type1_qual, type2_qual);
}
CompilerType TypeSystemClang::GetTypeForDecl(void *opaque_decl) {
if (!opaque_decl)
return CompilerType();
clang::Decl *decl = static_cast<clang::Decl *>(opaque_decl);
if (auto *named_decl = llvm::dyn_cast<clang::NamedDecl>(decl))
return GetTypeForDecl(named_decl);
return CompilerType();
}
CompilerDeclContext TypeSystemClang::CreateDeclContext(DeclContext *ctx) {
// Check that the DeclContext actually belongs to this ASTContext.
assert(&ctx->getParentASTContext() == &getASTContext());
return CompilerDeclContext(this, ctx);
}
CompilerType TypeSystemClang::GetTypeForDecl(clang::NamedDecl *decl) {
if (clang::ObjCInterfaceDecl *interface_decl =
llvm::dyn_cast<clang::ObjCInterfaceDecl>(decl))
return GetTypeForDecl(interface_decl);
if (clang::TagDecl *tag_decl = llvm::dyn_cast<clang::TagDecl>(decl))
return GetTypeForDecl(tag_decl);
return CompilerType();
}
CompilerType TypeSystemClang::GetTypeForDecl(TagDecl *decl) {
return GetType(getASTContext().getTagDeclType(decl));
}
CompilerType TypeSystemClang::GetTypeForDecl(ObjCInterfaceDecl *decl) {
return GetType(getASTContext().getObjCInterfaceType(decl));
}
#pragma mark Structure, Unions, Classes
void TypeSystemClang::SetOwningModule(clang::Decl *decl,
OptionalClangModuleID owning_module) {
if (!decl || !owning_module.HasValue())
return;
decl->setFromASTFile();
decl->setOwningModuleID(owning_module.GetValue());
decl->setModuleOwnershipKind(clang::Decl::ModuleOwnershipKind::Visible);
}
OptionalClangModuleID
TypeSystemClang::GetOrCreateClangModule(llvm::StringRef name,
OptionalClangModuleID parent,
bool is_framework, bool is_explicit) {
// Get the external AST source which holds the modules.
auto *ast_source = llvm::dyn_cast_or_null<ClangExternalASTSourceCallbacks>(
getASTContext().getExternalSource());
assert(ast_source && "external ast source was lost");
if (!ast_source)
return {};
// Lazily initialize the module map.
if (!m_header_search_up) {
auto HSOpts = std::make_shared<clang::HeaderSearchOptions>();
m_header_search_up = std::make_unique<clang::HeaderSearch>(
HSOpts, *m_source_manager_up, *m_diagnostics_engine_up,
*m_language_options_up, m_target_info_up.get());
m_module_map_up = std::make_unique<clang::ModuleMap>(
*m_source_manager_up, *m_diagnostics_engine_up, *m_language_options_up,
m_target_info_up.get(), *m_header_search_up);
}
// Get or create the module context.
bool created;
clang::Module *module;
auto parent_desc = ast_source->getSourceDescriptor(parent.GetValue());
std::tie(module, created) = m_module_map_up->findOrCreateModule(
name, parent_desc ? parent_desc->getModuleOrNull() : nullptr,
is_framework, is_explicit);
if (!created)
return ast_source->GetIDForModule(module);
return ast_source->RegisterModule(module);
}
CompilerType TypeSystemClang::CreateRecordType(
clang::DeclContext *decl_ctx, OptionalClangModuleID owning_module,
AccessType access_type, llvm::StringRef name, int kind,
LanguageType language, ClangASTMetadata *metadata, bool exports_symbols) {
ASTContext &ast = getASTContext();
if (decl_ctx == nullptr)
decl_ctx = ast.getTranslationUnitDecl();
if (language == eLanguageTypeObjC ||
language == eLanguageTypeObjC_plus_plus) {
bool isForwardDecl = true;
bool isInternal = false;
return CreateObjCClass(name, decl_ctx, owning_module, isForwardDecl,
isInternal, metadata);
}
// NOTE: Eventually CXXRecordDecl will be merged back into RecordDecl and
// we will need to update this code. I was told to currently always use the
// CXXRecordDecl class since we often don't know from debug information if
// something is struct or a class, so we default to always use the more
// complete definition just in case.
bool has_name = !name.empty();
CXXRecordDecl *decl = CXXRecordDecl::CreateDeserialized(ast, 0);
decl->setTagKind(static_cast<TagDecl::TagKind>(kind));
decl->setDeclContext(decl_ctx);
if (has_name)
decl->setDeclName(&ast.Idents.get(name));
SetOwningModule(decl, owning_module);
if (!has_name) {
// In C++ a lambda is also represented as an unnamed class. This is
// different from an *anonymous class* that the user wrote:
//
// struct A {
// // anonymous class (GNU/MSVC extension)
// struct {
// int x;
// };
// // unnamed class within a class
// struct {
// int y;
// } B;
// };
//
// void f() {
// // unammed class outside of a class
// struct {
// int z;
// } C;
// }
//
// Anonymous classes is a GNU/MSVC extension that clang supports. It
// requires the anonymous class be embedded within a class. So the new
// heuristic verifies this condition.
if (isa<CXXRecordDecl>(decl_ctx) && exports_symbols)
decl->setAnonymousStructOrUnion(true);
}
if (metadata)
SetMetadata(decl, *metadata);
if (access_type != eAccessNone)
decl->setAccess(ConvertAccessTypeToAccessSpecifier(access_type));
if (decl_ctx)
decl_ctx->addDecl(decl);
return GetType(ast.getTagDeclType(decl));
}
namespace {
/// Returns true iff the given TemplateArgument should be represented as an
/// NonTypeTemplateParmDecl in the AST.
bool IsValueParam(const clang::TemplateArgument &argument) {
return argument.getKind() == TemplateArgument::Integral;
}
void AddAccessSpecifierDecl(clang::CXXRecordDecl *cxx_record_decl,
ASTContext &ct,
clang::AccessSpecifier previous_access,
clang::AccessSpecifier access_specifier) {
if (!cxx_record_decl->isClass() && !cxx_record_decl->isStruct())
return;
if (previous_access != access_specifier) {
// For struct, don't add AS_public if it's the first AccessSpecDecl.
// For class, don't add AS_private if it's the first AccessSpecDecl.
if ((cxx_record_decl->isStruct() &&
previous_access == clang::AccessSpecifier::AS_none &&
access_specifier == clang::AccessSpecifier::AS_public) ||
(cxx_record_decl->isClass() &&
previous_access == clang::AccessSpecifier::AS_none &&
access_specifier == clang::AccessSpecifier::AS_private)) {
return;
}
cxx_record_decl->addDecl(
AccessSpecDecl::Create(ct, access_specifier, cxx_record_decl,
SourceLocation(), SourceLocation()));
}
}
} // namespace
static TemplateParameterList *CreateTemplateParameterList(
ASTContext &ast,
const TypeSystemClang::TemplateParameterInfos &template_param_infos,
llvm::SmallVector<NamedDecl *, 8> &template_param_decls) {
const bool parameter_pack = false;
const bool is_typename = false;
const unsigned depth = 0;
const size_t num_template_params = template_param_infos.args.size();
DeclContext *const decl_context =
ast.getTranslationUnitDecl(); // Is this the right decl context?,
for (size_t i = 0; i < num_template_params; ++i) {
const char *name = template_param_infos.names[i];
IdentifierInfo *identifier_info = nullptr;
if (name && name[0])
identifier_info = &ast.Idents.get(name);
if (IsValueParam(template_param_infos.args[i])) {
QualType template_param_type =
template_param_infos.args[i].getIntegralType();
template_param_decls.push_back(NonTypeTemplateParmDecl::Create(
ast, decl_context, SourceLocation(), SourceLocation(), depth, i,
identifier_info, template_param_type, parameter_pack,
ast.getTrivialTypeSourceInfo(template_param_type)));
} else {
template_param_decls.push_back(TemplateTypeParmDecl::Create(
ast, decl_context, SourceLocation(), SourceLocation(), depth, i,
identifier_info, is_typename, parameter_pack));
}
}
if (template_param_infos.packed_args) {
IdentifierInfo *identifier_info = nullptr;
if (template_param_infos.pack_name && template_param_infos.pack_name[0])
identifier_info = &ast.Idents.get(template_param_infos.pack_name);
const bool parameter_pack_true = true;
if (!template_param_infos.packed_args->args.empty() &&
IsValueParam(template_param_infos.packed_args->args[0])) {
QualType template_param_type =
template_param_infos.packed_args->args[0].getIntegralType();
template_param_decls.push_back(NonTypeTemplateParmDecl::Create(
ast, decl_context, SourceLocation(), SourceLocation(), depth,
num_template_params, identifier_info, template_param_type,
parameter_pack_true,
ast.getTrivialTypeSourceInfo(template_param_type)));
} else {
template_param_decls.push_back(TemplateTypeParmDecl::Create(
ast, decl_context, SourceLocation(), SourceLocation(), depth,
num_template_params, identifier_info, is_typename,
parameter_pack_true));
}
}
clang::Expr *const requires_clause = nullptr; // TODO: Concepts
TemplateParameterList *template_param_list = TemplateParameterList::Create(
ast, SourceLocation(), SourceLocation(), template_param_decls,
SourceLocation(), requires_clause);
return template_param_list;
}
clang::FunctionTemplateDecl *TypeSystemClang::CreateFunctionTemplateDecl(
clang::DeclContext *decl_ctx, OptionalClangModuleID owning_module,
clang::FunctionDecl *func_decl,
const TemplateParameterInfos &template_param_infos) {
// /// Create a function template node.
ASTContext &ast = getASTContext();
llvm::SmallVector<NamedDecl *, 8> template_param_decls;
TemplateParameterList *template_param_list = CreateTemplateParameterList(
ast, template_param_infos, template_param_decls);
FunctionTemplateDecl *func_tmpl_decl =
FunctionTemplateDecl::CreateDeserialized(ast, 0);
func_tmpl_decl->setDeclContext(decl_ctx);
func_tmpl_decl->setLocation(func_decl->getLocation());
func_tmpl_decl->setDeclName(func_decl->getDeclName());
func_tmpl_decl->init(func_decl, template_param_list);
SetOwningModule(func_tmpl_decl, owning_module);
for (size_t i = 0, template_param_decl_count = template_param_decls.size();
i < template_param_decl_count; ++i) {
// TODO: verify which decl context we should put template_param_decls into..
template_param_decls[i]->setDeclContext(func_decl);
}
// Function templates inside a record need to have an access specifier.
// It doesn't matter what access specifier we give the template as LLDB
// anyway allows accessing everything inside a record.
if (decl_ctx->isRecord())
func_tmpl_decl->setAccess(clang::AccessSpecifier::AS_public);
return func_tmpl_decl;
}
void TypeSystemClang::CreateFunctionTemplateSpecializationInfo(
FunctionDecl *func_decl, clang::FunctionTemplateDecl *func_tmpl_decl,
const TemplateParameterInfos &infos) {
TemplateArgumentList *template_args_ptr =
TemplateArgumentList::CreateCopy(func_decl->getASTContext(), infos.args);
func_decl->setFunctionTemplateSpecialization(func_tmpl_decl,
template_args_ptr, nullptr);
}
/// Returns true if the given template parameter can represent the given value.
/// For example, `typename T` can represent `int` but not integral values such
/// as `int I = 3`.
static bool TemplateParameterAllowsValue(NamedDecl *param,
const TemplateArgument &value) {
if (llvm::isa<TemplateTypeParmDecl>(param)) {
// Compare the argument kind, i.e. ensure that <typename> != <int>.
if (value.getKind() != TemplateArgument::Type)
return false;
} else if (auto *type_param =
llvm::dyn_cast<NonTypeTemplateParmDecl>(param)) {
// Compare the argument kind, i.e. ensure that <typename> != <int>.
if (!IsValueParam(value))
return false;
// Compare the integral type, i.e. ensure that <int> != <char>.
if (type_param->getType() != value.getIntegralType())
return false;
} else {
// There is no way to create other parameter decls at the moment, so we
// can't reach this case during normal LLDB usage. Log that this happened
// and assert.
Log *log = GetLog(LLDBLog::Expressions);
LLDB_LOG(log,
"Don't know how to compare template parameter to passed"
" value. Decl kind of parameter is: {0}",
param->getDeclKindName());
lldbassert(false && "Can't compare this TemplateParmDecl subclass");
// In release builds just fall back to marking the parameter as not
// accepting the value so that we don't try to fit an instantiation to a
// template that doesn't fit. E.g., avoid that `S<1>` is being connected to
// `template<typename T> struct S;`.
return false;
}
return true;
}
/// Returns true if the given class template declaration could produce an
/// instantiation with the specified values.
/// For example, `<typename T>` allows the arguments `float`, but not for
/// example `bool, float` or `3` (as an integer parameter value).
static bool ClassTemplateAllowsToInstantiationArgs(
ClassTemplateDecl *class_template_decl,
const TypeSystemClang::TemplateParameterInfos &instantiation_values) {
TemplateParameterList &params = *class_template_decl->getTemplateParameters();
// Save some work by iterating only once over the found parameters and
// calculate the information related to parameter packs.
// Contains the first pack parameter (or non if there are none).
llvm::Optional<NamedDecl *> pack_parameter;
// Contains the number of non-pack parameters.
size_t non_pack_params = params.size();
for (size_t i = 0; i < params.size(); ++i) {
NamedDecl *param = params.getParam(i);
if (param->isParameterPack()) {
pack_parameter = param;
non_pack_params = i;
break;
}
}
// The found template needs to have compatible non-pack template arguments.
// E.g., ensure that <typename, typename> != <typename>.
// The pack parameters are compared later.
if (non_pack_params != instantiation_values.args.size())
return false;
// Ensure that <typename...> != <typename>.
if (pack_parameter.has_value() != instantiation_values.hasParameterPack())
return false;
// Compare the first pack parameter that was found with the first pack
// parameter value. The special case of having an empty parameter pack value
// always fits to a pack parameter.
// E.g., ensure that <int...> != <typename...>.
if (pack_parameter && !instantiation_values.packed_args->args.empty() &&
!TemplateParameterAllowsValue(
*pack_parameter, instantiation_values.packed_args->args.front()))
return false;
// Compare all the non-pack parameters now.
// E.g., ensure that <int> != <long>.
for (const auto pair : llvm::zip_first(instantiation_values.args, params)) {
const TemplateArgument &passed_arg = std::get<0>(pair);
NamedDecl *found_param = std::get<1>(pair);
if (!TemplateParameterAllowsValue(found_param, passed_arg))
return false;
}
return class_template_decl;
}
ClassTemplateDecl *TypeSystemClang::CreateClassTemplateDecl(
DeclContext *decl_ctx, OptionalClangModuleID owning_module,
lldb::AccessType access_type, llvm::StringRef class_name, int kind,
const TemplateParameterInfos &template_param_infos) {
ASTContext &ast = getASTContext();
ClassTemplateDecl *class_template_decl = nullptr;
if (decl_ctx == nullptr)
decl_ctx = ast.getTranslationUnitDecl();
IdentifierInfo &identifier_info = ast.Idents.get(class_name);
DeclarationName decl_name(&identifier_info);
// Search the AST for an existing ClassTemplateDecl that could be reused.
clang::DeclContext::lookup_result result = decl_ctx->lookup(decl_name);
for (NamedDecl *decl : result) {
class_template_decl = dyn_cast<clang::ClassTemplateDecl>(decl);
if (!class_template_decl)
continue;
// The class template has to be able to represents the instantiation
// values we received. Without this we might end up putting an instantiation
// with arguments such as <int, int> to a template such as:
// template<typename T> struct S;
// Connecting the instantiation to an incompatible template could cause
// problems later on.
if (!ClassTemplateAllowsToInstantiationArgs(class_template_decl,
template_param_infos))
continue;
return class_template_decl;
}
llvm::SmallVector<NamedDecl *, 8> template_param_decls;
TemplateParameterList *template_param_list = CreateTemplateParameterList(
ast, template_param_infos, template_param_decls);
CXXRecordDecl *template_cxx_decl = CXXRecordDecl::CreateDeserialized(ast, 0);
template_cxx_decl->setTagKind(static_cast<TagDecl::TagKind>(kind));
// What decl context do we use here? TU? The actual decl context?
template_cxx_decl->setDeclContext(decl_ctx);
template_cxx_decl->setDeclName(decl_name);
SetOwningModule(template_cxx_decl, owning_module);
for (size_t i = 0, template_param_decl_count = template_param_decls.size();
i < template_param_decl_count; ++i) {
template_param_decls[i]->setDeclContext(template_cxx_decl);
}
// With templated classes, we say that a class is templated with
// specializations, but that the bare class has no functions.
// template_cxx_decl->startDefinition();
// template_cxx_decl->completeDefinition();
class_template_decl = ClassTemplateDecl::CreateDeserialized(ast, 0);
// What decl context do we use here? TU? The actual decl context?
class_template_decl->setDeclContext(decl_ctx);
class_template_decl->setDeclName(decl_name);
class_template_decl->init(template_cxx_decl, template_param_list);
template_cxx_decl->setDescribedClassTemplate(class_template_decl);
SetOwningModule(class_template_decl, owning_module);
if (access_type != eAccessNone)
class_template_decl->setAccess(
ConvertAccessTypeToAccessSpecifier(access_type));
decl_ctx->addDecl(class_template_decl);
VerifyDecl(class_template_decl);
return class_template_decl;
}
TemplateTemplateParmDecl *
TypeSystemClang::CreateTemplateTemplateParmDecl(const char *template_name) {
ASTContext &ast = getASTContext();
auto *decl_ctx = ast.getTranslationUnitDecl();
IdentifierInfo &identifier_info = ast.Idents.get(template_name);
llvm::SmallVector<NamedDecl *, 8> template_param_decls;
TypeSystemClang::TemplateParameterInfos template_param_infos;
TemplateParameterList *template_param_list = CreateTemplateParameterList(
ast, template_param_infos, template_param_decls);
// LLDB needs to create those decls only to be able to display a
// type that includes a template template argument. Only the name matters for
// this purpose, so we use dummy values for the other characteristics of the
// type.
return TemplateTemplateParmDecl::Create(
ast, decl_ctx, SourceLocation(),
/*Depth*/ 0, /*Position*/ 0,
/*IsParameterPack*/ false, &identifier_info, template_param_list);
}
ClassTemplateSpecializationDecl *
TypeSystemClang::CreateClassTemplateSpecializationDecl(
DeclContext *decl_ctx, OptionalClangModuleID owning_module,
ClassTemplateDecl *class_template_decl, int kind,
const TemplateParameterInfos &template_param_infos) {
ASTContext &ast = getASTContext();
llvm::SmallVector<clang::TemplateArgument, 2> args(
template_param_infos.args.size() +
(template_param_infos.packed_args ? 1 : 0));
std::copy(template_param_infos.args.begin(), template_param_infos.args.end(),
args.begin());
if (template_param_infos.packed_args) {
args[args.size() - 1] = TemplateArgument::CreatePackCopy(
ast, template_param_infos.packed_args->args);
}
ClassTemplateSpecializationDecl *class_template_specialization_decl =
ClassTemplateSpecializationDecl::CreateDeserialized(ast, 0);
class_template_specialization_decl->setTagKind(
static_cast<TagDecl::TagKind>(kind));
class_template_specialization_decl->setDeclContext(decl_ctx);
class_template_specialization_decl->setInstantiationOf(class_template_decl);
class_template_specialization_decl->setTemplateArgs(
TemplateArgumentList::CreateCopy(ast, args));
ast.getTypeDeclType(class_template_specialization_decl, nullptr);
class_template_specialization_decl->setDeclName(
class_template_decl->getDeclName());
SetOwningModule(class_template_specialization_decl, owning_module);
decl_ctx->addDecl(class_template_specialization_decl);
class_template_specialization_decl->setSpecializationKind(
TSK_ExplicitSpecialization);
return class_template_specialization_decl;
}
CompilerType TypeSystemClang::CreateClassTemplateSpecializationType(
ClassTemplateSpecializationDecl *class_template_specialization_decl) {
if (class_template_specialization_decl) {
ASTContext &ast = getASTContext();
return GetType(ast.getTagDeclType(class_template_specialization_decl));
}
return CompilerType();
}
static inline bool check_op_param(bool is_method,
clang::OverloadedOperatorKind op_kind,
bool unary, bool binary,
uint32_t num_params) {
// Special-case call since it can take any number of operands
if (op_kind == OO_Call)
return true;
// The parameter count doesn't include "this"
if (is_method)
++num_params;
if (num_params == 1)
return unary;
if (num_params == 2)
return binary;
else
return false;
}
bool TypeSystemClang::CheckOverloadedOperatorKindParameterCount(
bool is_method, clang::OverloadedOperatorKind op_kind,
uint32_t num_params) {
switch (op_kind) {
default:
break;
// C++ standard allows any number of arguments to new/delete
case OO_New:
case OO_Array_New:
case OO_Delete:
case OO_Array_Delete:
return true;
}
#define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly) \
case OO_##Name: \
return check_op_param(is_method, op_kind, Unary, Binary, num_params);
switch (op_kind) {
#include "clang/Basic/OperatorKinds.def"
default:
break;
}
return false;
}
clang::AccessSpecifier
TypeSystemClang::UnifyAccessSpecifiers(clang::AccessSpecifier lhs,
clang::AccessSpecifier rhs) {
// Make the access equal to the stricter of the field and the nested field's
// access
if (lhs == AS_none || rhs == AS_none)
return AS_none;
if (lhs == AS_private || rhs == AS_private)
return AS_private;
if (lhs == AS_protected || rhs == AS_protected)
return AS_protected;
return AS_public;
}
bool TypeSystemClang::FieldIsBitfield(FieldDecl *field,
uint32_t &bitfield_bit_size) {
ASTContext &ast = getASTContext();
if (field == nullptr)
return false;
if (field->isBitField()) {
Expr *bit_width_expr = field->getBitWidth();
if (bit_width_expr) {
if (Optional<llvm::APSInt> bit_width_apsint =
bit_width_expr->getIntegerConstantExpr(ast)) {
bitfield_bit_size = bit_width_apsint->getLimitedValue(UINT32_MAX);
return true;
}
}
}
return false;
}
bool TypeSystemClang::RecordHasFields(const RecordDecl *record_decl) {
if (record_decl == nullptr)
return false;
if (!record_decl->field_empty())
return true;
// No fields, lets check this is a CXX record and check the base classes
const CXXRecordDecl *cxx_record_decl = dyn_cast<CXXRecordDecl>(record_decl);
if (cxx_record_decl) {
CXXRecordDecl::base_class_const_iterator base_class, base_class_end;
for (base_class = cxx_record_decl->bases_begin(),
base_class_end = cxx_record_decl->bases_end();
base_class != base_class_end; ++base_class) {
const CXXRecordDecl *base_class_decl = cast<CXXRecordDecl>(
base_class->getType()->getAs<RecordType>()->getDecl());
if (RecordHasFields(base_class_decl))
return true;
}
}
return false;
}
#pragma mark Objective-C Classes
CompilerType TypeSystemClang::CreateObjCClass(
llvm::StringRef name, clang::DeclContext *decl_ctx,
OptionalClangModuleID owning_module, bool isForwardDecl, bool isInternal,
ClangASTMetadata *metadata) {
ASTContext &ast = getASTContext();
assert(!name.empty());
if (!decl_ctx)
decl_ctx = ast.getTranslationUnitDecl();
ObjCInterfaceDecl *decl = ObjCInterfaceDecl::CreateDeserialized(ast, 0);
decl->setDeclContext(decl_ctx);
decl->setDeclName(&ast.Idents.get(name));
/*isForwardDecl,*/
decl->setImplicit(isInternal);
SetOwningModule(decl, owning_module);
if (metadata)
SetMetadata(decl, *metadata);
return GetType(ast.getObjCInterfaceType(decl));
}
static inline bool BaseSpecifierIsEmpty(const CXXBaseSpecifier *b) {
return !TypeSystemClang::RecordHasFields(b->getType()->getAsCXXRecordDecl());
}
uint32_t
TypeSystemClang::GetNumBaseClasses(const CXXRecordDecl *cxx_record_decl,
bool omit_empty_base_classes) {
uint32_t num_bases = 0;
if (cxx_record_decl) {
if (omit_empty_base_classes) {
CXXRecordDecl::base_class_const_iterator base_class, base_class_end;
for (base_class = cxx_record_decl->bases_begin(),
base_class_end = cxx_record_decl->bases_end();
base_class != base_class_end; ++base_class) {
// Skip empty base classes
if (BaseSpecifierIsEmpty(base_class))
continue;
++num_bases;
}
} else
num_bases = cxx_record_decl->getNumBases();
}
return num_bases;
}
#pragma mark Namespace Declarations
NamespaceDecl *TypeSystemClang::GetUniqueNamespaceDeclaration(
const char *name, clang::DeclContext *decl_ctx,
OptionalClangModuleID owning_module, bool is_inline) {
NamespaceDecl *namespace_decl = nullptr;
ASTContext &ast = getASTContext();
TranslationUnitDecl *translation_unit_decl = ast.getTranslationUnitDecl();
if (!decl_ctx)
decl_ctx = translation_unit_decl;
if (name) {
IdentifierInfo &identifier_info = ast.Idents.get(name);
DeclarationName decl_name(&identifier_info);
clang::DeclContext::lookup_result result = decl_ctx->lookup(decl_name);
for (NamedDecl *decl : result) {
namespace_decl = dyn_cast<clang::NamespaceDecl>(decl);
if (namespace_decl)
return namespace_decl;
}
namespace_decl =
NamespaceDecl::Create(ast, decl_ctx, is_inline, SourceLocation(),
SourceLocation(), &identifier_info, nullptr);
decl_ctx->addDecl(namespace_decl);
} else {
if (decl_ctx == translation_unit_decl) {
namespace_decl = translation_unit_decl->getAnonymousNamespace();
if (namespace_decl)
return namespace_decl;
namespace_decl =
NamespaceDecl::Create(ast, decl_ctx, false, SourceLocation(),
SourceLocation(), nullptr, nullptr);
translation_unit_decl->setAnonymousNamespace(namespace_decl);
translation_unit_decl->addDecl(namespace_decl);
assert(namespace_decl == translation_unit_decl->getAnonymousNamespace());
} else {
NamespaceDecl *parent_namespace_decl = cast<NamespaceDecl>(decl_ctx);
if (parent_namespace_decl) {
namespace_decl = parent_namespace_decl->getAnonymousNamespace();
if (namespace_decl)
return namespace_decl;
namespace_decl =
NamespaceDecl::Create(ast, decl_ctx, false, SourceLocation(),
SourceLocation(), nullptr, nullptr);
parent_namespace_decl->setAnonymousNamespace(namespace_decl);
parent_namespace_decl->addDecl(namespace_decl);
assert(namespace_decl ==
parent_namespace_decl->getAnonymousNamespace());
} else {
assert(false && "GetUniqueNamespaceDeclaration called with no name and "
"no namespace as decl_ctx");
}
}
}
// Note: namespaces can span multiple modules, so perhaps this isn't a good
// idea.
SetOwningModule(namespace_decl, owning_module);
VerifyDecl(namespace_decl);
return namespace_decl;
}
clang::BlockDecl *
TypeSystemClang::CreateBlockDeclaration(clang::DeclContext *ctx,
OptionalClangModuleID owning_module) {
if (ctx) {
clang::BlockDecl *decl =
clang::BlockDecl::CreateDeserialized(getASTContext(), 0);
decl->setDeclContext(ctx);
ctx->addDecl(decl);
SetOwningModule(decl, owning_module);
return decl;
}
return nullptr;
}
clang::DeclContext *FindLCABetweenDecls(clang::DeclContext *left,
clang::DeclContext *right,
clang::DeclContext *root) {
if (root == nullptr)
return nullptr;
std::set<clang::DeclContext *> path_left;
for (clang::DeclContext *d = left; d != nullptr; d = d->getParent())
path_left.insert(d);
for (clang::DeclContext *d = right; d != nullptr; d = d->getParent())
if (path_left.find(d) != path_left.end())
return d;
return nullptr;
}
clang::UsingDirectiveDecl *TypeSystemClang::CreateUsingDirectiveDeclaration(
clang::DeclContext *decl_ctx, OptionalClangModuleID owning_module,
clang::NamespaceDecl *ns_decl) {
if (decl_ctx && ns_decl) {
auto *translation_unit = getASTContext().getTranslationUnitDecl();
clang::UsingDirectiveDecl *using_decl = clang::UsingDirectiveDecl::Create(
getASTContext(), decl_ctx, clang::SourceLocation(),
clang::SourceLocation(), clang::NestedNameSpecifierLoc(),
clang::SourceLocation(), ns_decl,
FindLCABetweenDecls(decl_ctx, ns_decl,
translation_unit));
decl_ctx->addDecl(using_decl);
SetOwningModule(using_decl, owning_module);
return using_decl;
}
return nullptr;
}
clang::UsingDecl *
TypeSystemClang::CreateUsingDeclaration(clang::DeclContext *current_decl_ctx,
OptionalClangModuleID owning_module,
clang::NamedDecl *target) {
if (current_decl_ctx && target) {
clang::UsingDecl *using_decl = clang::UsingDecl::Create(
getASTContext(), current_decl_ctx, clang::SourceLocation(),
clang::NestedNameSpecifierLoc(), clang::DeclarationNameInfo(), false);
SetOwningModule(using_decl, owning_module);
clang::UsingShadowDecl *shadow_decl = clang::UsingShadowDecl::Create(
getASTContext(), current_decl_ctx, clang::SourceLocation(),
target->getDeclName(), using_decl, target);
SetOwningModule(shadow_decl, owning_module);
using_decl->addShadowDecl(shadow_decl);
current_decl_ctx->addDecl(using_decl);
return using_decl;
}
return nullptr;
}
clang::VarDecl *TypeSystemClang::CreateVariableDeclaration(
clang::DeclContext *decl_context, OptionalClangModuleID owning_module,
const char *name, clang::QualType type) {
if (decl_context) {
clang::VarDecl *var_decl =
clang::VarDecl::CreateDeserialized(getASTContext(), 0);
var_decl->setDeclContext(decl_context);
if (name && name[0])
var_decl->setDeclName(&getASTContext().Idents.getOwn(name));
var_decl->setType(type);
SetOwningModule(var_decl, owning_module);
var_decl->setAccess(clang::AS_public);
decl_context->addDecl(var_decl);
return var_decl;
}
return nullptr;
}
lldb::opaque_compiler_type_t
TypeSystemClang::GetOpaqueCompilerType(clang::ASTContext *ast,
lldb::BasicType basic_type) {
switch (basic_type) {
case eBasicTypeVoid:
return ast->VoidTy.getAsOpaquePtr();
case eBasicTypeChar:
return ast->CharTy.getAsOpaquePtr();
case eBasicTypeSignedChar:
return ast->SignedCharTy.getAsOpaquePtr();
case eBasicTypeUnsignedChar:
return ast->UnsignedCharTy.getAsOpaquePtr();
case eBasicTypeWChar:
return ast->getWCharType().getAsOpaquePtr();
case eBasicTypeSignedWChar:
return ast->getSignedWCharType().getAsOpaquePtr();
case eBasicTypeUnsignedWChar:
return ast->getUnsignedWCharType().getAsOpaquePtr();
case eBasicTypeChar8:
return ast->Char8Ty.getAsOpaquePtr();
case eBasicTypeChar16:
return ast->Char16Ty.getAsOpaquePtr();
case eBasicTypeChar32:
return ast->Char32Ty.getAsOpaquePtr();
case eBasicTypeShort:
return ast->ShortTy.getAsOpaquePtr();
case eBasicTypeUnsignedShort:
return ast->UnsignedShortTy.getAsOpaquePtr();
case eBasicTypeInt:
return ast->IntTy.getAsOpaquePtr();
case eBasicTypeUnsignedInt:
return ast->UnsignedIntTy.getAsOpaquePtr();
case eBasicTypeLong:
return ast->LongTy.getAsOpaquePtr();
case eBasicTypeUnsignedLong:
return ast->UnsignedLongTy.getAsOpaquePtr();
case eBasicTypeLongLong:
return ast->LongLongTy.getAsOpaquePtr();
case eBasicTypeUnsignedLongLong:
return ast->UnsignedLongLongTy.getAsOpaquePtr();
case eBasicTypeInt128:
return ast->Int128Ty.getAsOpaquePtr();
case eBasicTypeUnsignedInt128:
return ast->UnsignedInt128Ty.getAsOpaquePtr();
case eBasicTypeBool:
return ast->BoolTy.getAsOpaquePtr();
case eBasicTypeHalf:
return ast->HalfTy.getAsOpaquePtr();
case eBasicTypeFloat:
return ast->FloatTy.getAsOpaquePtr();
case eBasicTypeDouble:
return ast->DoubleTy.getAsOpaquePtr();
case eBasicTypeLongDouble:
return ast->LongDoubleTy.getAsOpaquePtr();
case eBasicTypeFloatComplex:
return ast->getComplexType(ast->FloatTy).getAsOpaquePtr();
case eBasicTypeDoubleComplex:
return ast->getComplexType(ast->DoubleTy).getAsOpaquePtr();
case eBasicTypeLongDoubleComplex:
return ast->getComplexType(ast->LongDoubleTy).getAsOpaquePtr();
case eBasicTypeObjCID:
return ast->getObjCIdType().getAsOpaquePtr();
case eBasicTypeObjCClass:
return ast->getObjCClassType().getAsOpaquePtr();
case eBasicTypeObjCSel:
return ast->getObjCSelType().getAsOpaquePtr();
case eBasicTypeNullPtr:
return ast->NullPtrTy.getAsOpaquePtr();
default:
return nullptr;
}
}
#pragma mark Function Types
clang::DeclarationName
TypeSystemClang::GetDeclarationName(llvm::StringRef name,
const CompilerType &function_clang_type) {
clang::OverloadedOperatorKind op_kind = clang::NUM_OVERLOADED_OPERATORS;
if (!IsOperator(name, op_kind) || op_kind == clang::NUM_OVERLOADED_OPERATORS)
return DeclarationName(&getASTContext().Idents.get(
name)); // Not operator, but a regular function.
// Check the number of operator parameters. Sometimes we have seen bad DWARF
// that doesn't correctly describe operators and if we try to create a method
// and add it to the class, clang will assert and crash, so we need to make
// sure things are acceptable.
clang::QualType method_qual_type(ClangUtil::GetQualType(function_clang_type));
const clang::FunctionProtoType *function_type =
llvm::dyn_cast<clang::FunctionProtoType>(method_qual_type.getTypePtr());
if (function_type == nullptr)
return clang::DeclarationName();
const bool is_method = false;
const unsigned int num_params = function_type->getNumParams();
if (!TypeSystemClang::CheckOverloadedOperatorKindParameterCount(
is_method, op_kind, num_params))
return clang::DeclarationName();
return getASTContext().DeclarationNames.getCXXOperatorName(op_kind);
}
PrintingPolicy TypeSystemClang::GetTypePrintingPolicy() {
clang::PrintingPolicy printing_policy(getASTContext().getPrintingPolicy());
printing_policy.SuppressTagKeyword = true;
// Inline namespaces are important for some type formatters (e.g., libc++
// and libstdc++ are differentiated by their inline namespaces).
printing_policy.SuppressInlineNamespace = false;
printing_policy.SuppressUnwrittenScope = false;
// Default arguments are also always important for type formatters. Otherwise
// we would need to always specify two type names for the setups where we do
// know the default arguments and where we don't know default arguments.
//
// For example, without this we would need to have formatters for both:
// std::basic_string<char>
// and
// std::basic_string<char, std::char_traits<char>, std::allocator<char> >
// to support setups where LLDB was able to reconstruct default arguments
// (and we then would have suppressed them from the type name) and also setups
// where LLDB wasn't able to reconstruct the default arguments.
printing_policy.SuppressDefaultTemplateArgs = false;
return printing_policy;
}
std::string TypeSystemClang::GetTypeNameForDecl(const NamedDecl *named_decl) {
clang::PrintingPolicy printing_policy = GetTypePrintingPolicy();
std::string result;
llvm::raw_string_ostream os(result);
named_decl->printQualifiedName(os, printing_policy);
return result;
}
FunctionDecl *TypeSystemClang::CreateFunctionDeclaration(
clang::DeclContext *decl_ctx, OptionalClangModuleID owning_module,
llvm::StringRef name, const CompilerType &function_clang_type,
clang::StorageClass storage, bool is_inline) {
FunctionDecl *func_decl = nullptr;
ASTContext &ast = getASTContext();
if (!decl_ctx)
decl_ctx = ast.getTranslationUnitDecl();
const bool hasWrittenPrototype = true;
const bool isConstexprSpecified = false;
clang::DeclarationName declarationName =
GetDeclarationName(name, function_clang_type);
func_decl = FunctionDecl::CreateDeserialized(ast, 0);
func_decl->setDeclContext(decl_ctx);
func_decl->setDeclName(declarationName);
func_decl->setType(ClangUtil::GetQualType(function_clang_type));
func_decl->setStorageClass(storage);
func_decl->setInlineSpecified(is_inline);
func_decl->setHasWrittenPrototype(hasWrittenPrototype);
func_decl->setConstexprKind(isConstexprSpecified
? ConstexprSpecKind::Constexpr
: ConstexprSpecKind::Unspecified);
SetOwningModule(func_decl, owning_module);
decl_ctx->addDecl(func_decl);
VerifyDecl(func_decl);
return func_decl;
}
CompilerType
TypeSystemClang::CreateFunctionType(const CompilerType &result_type,
const CompilerType *args, unsigned num_args,
bool is_variadic, unsigned type_quals,
clang::CallingConv cc) {
if (!result_type || !ClangUtil::IsClangType(result_type))
return CompilerType(); // invalid return type
std::vector<QualType> qual_type_args;
if (num_args > 0 && args == nullptr)
return CompilerType(); // invalid argument array passed in
// Verify that all arguments are valid and the right type
for (unsigned i = 0; i < num_args; ++i) {
if (args[i]) {
// Make sure we have a clang type in args[i] and not a type from another
// language whose name might match
const bool is_clang_type = ClangUtil::IsClangType(args[i]);
lldbassert(is_clang_type);
if (is_clang_type)
qual_type_args.push_back(ClangUtil::GetQualType(args[i]));
else
return CompilerType(); // invalid argument type (must be a clang type)
} else
return CompilerType(); // invalid argument type (empty)
}
// TODO: Detect calling convention in DWARF?
FunctionProtoType::ExtProtoInfo proto_info;
proto_info.ExtInfo = cc;
proto_info.Variadic = is_variadic;
proto_info.ExceptionSpec = EST_None;
proto_info.TypeQuals = clang::Qualifiers::fromFastMask(type_quals);
proto_info.RefQualifier = RQ_None;
return GetType(getASTContext().getFunctionType(
ClangUtil::GetQualType(result_type), qual_type_args, proto_info));
}
ParmVarDecl *TypeSystemClang::CreateParameterDeclaration(
clang::DeclContext *decl_ctx, OptionalClangModuleID owning_module,
const char *name, const CompilerType &param_type, int storage,
bool add_decl) {
ASTContext &ast = getASTContext();
auto *decl = ParmVarDecl::CreateDeserialized(ast, 0);
decl->setDeclContext(decl_ctx);
if (name && name[0])
decl->setDeclName(&ast.Idents.get(name));
decl->setType(ClangUtil::GetQualType(param_type));
decl->setStorageClass(static_cast<clang::StorageClass>(storage));
SetOwningModule(decl, owning_module);
if (add_decl)
decl_ctx->addDecl(decl);
return decl;
}
void TypeSystemClang::SetFunctionParameters(
FunctionDecl *function_decl, llvm::ArrayRef<ParmVarDecl *> params) {
if (function_decl)
function_decl->setParams(params);
}
CompilerType
TypeSystemClang::CreateBlockPointerType(const CompilerType &function_type) {
QualType block_type = m_ast_up->getBlockPointerType(
clang::QualType::getFromOpaquePtr(function_type.GetOpaqueQualType()));
return GetType(block_type);
}
#pragma mark Array Types
CompilerType TypeSystemClang::CreateArrayType(const CompilerType &element_type,
size_t element_count,
bool is_vector) {
if (element_type.IsValid()) {
ASTContext &ast = getASTContext();
if (is_vector) {
return GetType(ast.getExtVectorType(ClangUtil::GetQualType(element_type),
element_count));
} else {
llvm::APInt ap_element_count(64, element_count);
if (element_count == 0) {
return GetType(ast.getIncompleteArrayType(
ClangUtil::GetQualType(element_type), clang::ArrayType::Normal, 0));
} else {
return GetType(ast.getConstantArrayType(
ClangUtil::GetQualType(element_type), ap_element_count, nullptr,
clang::ArrayType::Normal, 0));
}
}
}
return CompilerType();
}
CompilerType TypeSystemClang::CreateStructForIdentifier(
ConstString type_name,
const std::initializer_list<std::pair<const char *, CompilerType>>
&type_fields,
bool packed) {
CompilerType type;
if (!type_name.IsEmpty() &&
(type = GetTypeForIdentifier<clang::CXXRecordDecl>(type_name))
.IsValid()) {
lldbassert(0 && "Trying to create a type for an existing name");
return type;
}
type = CreateRecordType(nullptr, OptionalClangModuleID(), lldb::eAccessPublic,
type_name.GetCString(), clang::TTK_Struct,
lldb::eLanguageTypeC);
StartTagDeclarationDefinition(type);
for (const auto &field : type_fields)
AddFieldToRecordType(type, field.first, field.second, lldb::eAccessPublic,
0);
if (packed)
SetIsPacked(type);
CompleteTagDeclarationDefinition(type);
return type;
}
CompilerType TypeSystemClang::GetOrCreateStructForIdentifier(
ConstString type_name,
const std::initializer_list<std::pair<const char *, CompilerType>>
&type_fields,
bool packed) {
CompilerType type;
if ((type = GetTypeForIdentifier<clang::CXXRecordDecl>(type_name)).IsValid())
return type;
return CreateStructForIdentifier(type_name, type_fields, packed);
}
#pragma mark Enumeration Types
CompilerType TypeSystemClang::CreateEnumerationType(
llvm::StringRef name, clang::DeclContext *decl_ctx,
OptionalClangModuleID owning_module, const Declaration &decl,
const CompilerType &integer_clang_type, bool is_scoped) {
// TODO: Do something intelligent with the Declaration object passed in
// like maybe filling in the SourceLocation with it...
ASTContext &ast = getASTContext();
// TODO: ask about these...
// const bool IsFixed = false;
EnumDecl *enum_decl = EnumDecl::CreateDeserialized(ast, 0);
enum_decl->setDeclContext(decl_ctx);
if (!name.empty())
enum_decl->setDeclName(&ast.Idents.get(name));
enum_decl->setScoped(is_scoped);
enum_decl->setScopedUsingClassTag(is_scoped);
enum_decl->setFixed(false);
SetOwningModule(enum_decl, owning_module);
if (decl_ctx)
decl_ctx->addDecl(enum_decl);
// TODO: check if we should be setting the promotion type too?
enum_decl->setIntegerType(ClangUtil::GetQualType(integer_clang_type));
enum_decl->setAccess(AS_public); // TODO respect what's in the debug info
return GetType(ast.getTagDeclType(enum_decl));
}
CompilerType TypeSystemClang::GetIntTypeFromBitSize(size_t bit_size,
bool is_signed) {
clang::ASTContext &ast = getASTContext();
if (is_signed) {
if (bit_size == ast.getTypeSize(ast.SignedCharTy))
return GetType(ast.SignedCharTy);
if (bit_size == ast.getTypeSize(ast.ShortTy))
return GetType(ast.ShortTy);
if (bit_size == ast.getTypeSize(ast.IntTy))
return GetType(ast.IntTy);
if (bit_size == ast.getTypeSize(ast.LongTy))
return GetType(ast.LongTy);
if (bit_size == ast.getTypeSize(ast.LongLongTy))
return GetType(ast.LongLongTy);
if (bit_size == ast.getTypeSize(ast.Int128Ty))
return GetType(ast.Int128Ty);
} else {
if (bit_size == ast.getTypeSize(ast.UnsignedCharTy))
return GetType(ast.UnsignedCharTy);
if (bit_size == ast.getTypeSize(ast.UnsignedShortTy))
return GetType(ast.UnsignedShortTy);
if (bit_size == ast.getTypeSize(ast.UnsignedIntTy))
return GetType(ast.UnsignedIntTy);
if (bit_size == ast.getTypeSize(ast.UnsignedLongTy))
return GetType(ast.UnsignedLongTy);
if (bit_size == ast.getTypeSize(ast.UnsignedLongLongTy))
return GetType(ast.UnsignedLongLongTy);
if (bit_size == ast.getTypeSize(ast.UnsignedInt128Ty))
return GetType(ast.UnsignedInt128Ty);
}
return CompilerType();
}
CompilerType TypeSystemClang::GetPointerSizedIntType(bool is_signed) {
return GetIntTypeFromBitSize(
getASTContext().getTypeSize(getASTContext().VoidPtrTy), is_signed);
}
void TypeSystemClang::DumpDeclContextHiearchy(clang::DeclContext *decl_ctx) {
if (decl_ctx) {
DumpDeclContextHiearchy(decl_ctx->getParent());
clang::NamedDecl *named_decl = llvm::dyn_cast<clang::NamedDecl>(decl_ctx);
if (named_decl) {
printf("%20s: %s\n", decl_ctx->getDeclKindName(),
named_decl->getDeclName().getAsString().c_str());
} else {
printf("%20s\n", decl_ctx->getDeclKindName());
}
}
}
void TypeSystemClang::DumpDeclHiearchy(clang::Decl *decl) {
if (decl == nullptr)
return;
DumpDeclContextHiearchy(decl->getDeclContext());
clang::RecordDecl *record_decl = llvm::dyn_cast<clang::RecordDecl>(decl);
if (record_decl) {
printf("%20s: %s%s\n", decl->getDeclKindName(),
record_decl->getDeclName().getAsString().c_str(),
record_decl->isInjectedClassName() ? " (injected class name)" : "");
} else {
clang::NamedDecl *named_decl = llvm::dyn_cast<clang::NamedDecl>(decl);
if (named_decl) {
printf("%20s: %s\n", decl->getDeclKindName(),
named_decl->getDeclName().getAsString().c_str());
} else {
printf("%20s\n", decl->getDeclKindName());
}
}
}
bool TypeSystemClang::DeclsAreEquivalent(clang::Decl *lhs_decl,
clang::Decl *rhs_decl) {
if (lhs_decl && rhs_decl) {
// Make sure the decl kinds match first
const clang::Decl::Kind lhs_decl_kind = lhs_decl->getKind();
const clang::Decl::Kind rhs_decl_kind = rhs_decl->getKind();
if (lhs_decl_kind == rhs_decl_kind) {
// Now check that the decl contexts kinds are all equivalent before we
// have to check any names of the decl contexts...
clang::DeclContext *lhs_decl_ctx = lhs_decl->getDeclContext();
clang::DeclContext *rhs_decl_ctx = rhs_decl->getDeclContext();
if (lhs_decl_ctx && rhs_decl_ctx) {
while (true) {
if (lhs_decl_ctx && rhs_decl_ctx) {
const clang::Decl::Kind lhs_decl_ctx_kind =
lhs_decl_ctx->getDeclKind();
const clang::Decl::Kind rhs_decl_ctx_kind =
rhs_decl_ctx->getDeclKind();
if (lhs_decl_ctx_kind == rhs_decl_ctx_kind) {
lhs_decl_ctx = lhs_decl_ctx->getParent();
rhs_decl_ctx = rhs_decl_ctx->getParent();
if (lhs_decl_ctx == nullptr && rhs_decl_ctx == nullptr)
break;
} else
return false;
} else
return false;
}
// Now make sure the name of the decls match
clang::NamedDecl *lhs_named_decl =
llvm::dyn_cast<clang::NamedDecl>(lhs_decl);
clang::NamedDecl *rhs_named_decl =
llvm::dyn_cast<clang::NamedDecl>(rhs_decl);
if (lhs_named_decl && rhs_named_decl) {
clang::DeclarationName lhs_decl_name = lhs_named_decl->getDeclName();
clang::DeclarationName rhs_decl_name = rhs_named_decl->getDeclName();
if (lhs_decl_name.getNameKind() == rhs_decl_name.getNameKind()) {
if (lhs_decl_name.getAsString() != rhs_decl_name.getAsString())
return false;
} else
return false;
} else
return false;
// We know that the decl context kinds all match, so now we need to
// make sure the names match as well
lhs_decl_ctx = lhs_decl->getDeclContext();
rhs_decl_ctx = rhs_decl->getDeclContext();
while (true) {
switch (lhs_decl_ctx->getDeclKind()) {
case clang::Decl::TranslationUnit:
// We don't care about the translation unit names
return true;
default: {
clang::NamedDecl *lhs_named_decl =
llvm::dyn_cast<clang::NamedDecl>(lhs_decl_ctx);
clang::NamedDecl *rhs_named_decl =
llvm::dyn_cast<clang::NamedDecl>(rhs_decl_ctx);
if (lhs_named_decl && rhs_named_decl) {
clang::DeclarationName lhs_decl_name =
lhs_named_decl->getDeclName();
clang::DeclarationName rhs_decl_name =
rhs_named_decl->getDeclName();
if (lhs_decl_name.getNameKind() == rhs_decl_name.getNameKind()) {
if (lhs_decl_name.getAsString() != rhs_decl_name.getAsString())
return false;
} else
return false;
} else
return false;
} break;
}
lhs_decl_ctx = lhs_decl_ctx->getParent();
rhs_decl_ctx = rhs_decl_ctx->getParent();
}
}
}
}
return false;
}
bool TypeSystemClang::GetCompleteDecl(clang::ASTContext *ast,
clang::Decl *decl) {
if (!decl)
return false;
ExternalASTSource *ast_source = ast->getExternalSource();
if (!ast_source)
return false;
if (clang::TagDecl *tag_decl = llvm::dyn_cast<clang::TagDecl>(decl)) {
if (tag_decl->isCompleteDefinition())
return true;
if (!tag_decl->hasExternalLexicalStorage())
return false;
ast_source->CompleteType(tag_decl);
return !tag_decl->getTypeForDecl()->isIncompleteType();
} else if (clang::ObjCInterfaceDecl *objc_interface_decl =
llvm::dyn_cast<clang::ObjCInterfaceDecl>(decl)) {
if (objc_interface_decl->getDefinition())
return true;
if (!objc_interface_decl->hasExternalLexicalStorage())
return false;
ast_source->CompleteType(objc_interface_decl);
return !objc_interface_decl->getTypeForDecl()->isIncompleteType();
} else {
return false;
}
}
void TypeSystemClang::SetMetadataAsUserID(const clang::Decl *decl,
user_id_t user_id) {
ClangASTMetadata meta_data;
meta_data.SetUserID(user_id);
SetMetadata(decl, meta_data);
}
void TypeSystemClang::SetMetadataAsUserID(const clang::Type *type,
user_id_t user_id) {
ClangASTMetadata meta_data;
meta_data.SetUserID(user_id);
SetMetadata(type, meta_data);
}
void TypeSystemClang::SetMetadata(const clang::Decl *object,
ClangASTMetadata &metadata) {
m_decl_metadata[object] = metadata;
}
void TypeSystemClang::SetMetadata(const clang::Type *object,
ClangASTMetadata &metadata) {
m_type_metadata[object] = metadata;
}
ClangASTMetadata *TypeSystemClang::GetMetadata(const clang::Decl *object) {
auto It = m_decl_metadata.find(object);
if (It != m_decl_metadata.end())
return &It->second;
return nullptr;
}
ClangASTMetadata *TypeSystemClang::GetMetadata(const clang::Type *object) {
auto It = m_type_metadata.find(object);
if (It != m_type_metadata.end())
return &It->second;
return nullptr;
}
void TypeSystemClang::SetCXXRecordDeclAccess(const clang::CXXRecordDecl *object,
clang::AccessSpecifier access) {
if (access == clang::AccessSpecifier::AS_none)
m_cxx_record_decl_access.erase(object);
else
m_cxx_record_decl_access[object] = access;
}
clang::AccessSpecifier
TypeSystemClang::GetCXXRecordDeclAccess(const clang::CXXRecordDecl *object) {
auto It = m_cxx_record_decl_access.find(object);
if (It != m_cxx_record_decl_access.end())
return It->second;
return clang::AccessSpecifier::AS_none;
}
clang::DeclContext *
TypeSystemClang::GetDeclContextForType(const CompilerType &type) {
return GetDeclContextForType(ClangUtil::GetQualType(type));
}
/// Aggressively desugar the provided type, skipping past various kinds of
/// syntactic sugar and other constructs one typically wants to ignore.
/// The \p mask argument allows one to skip certain kinds of simplifications,
/// when one wishes to handle a certain kind of type directly.
static QualType
RemoveWrappingTypes(QualType type, ArrayRef<clang::Type::TypeClass> mask = {}) {
while (true) {
if (find(mask, type->getTypeClass()) != mask.end())
return type;
switch (type->getTypeClass()) {
// This is not fully correct as _Atomic is more than sugar, but it is
// sufficient for the purposes we care about.
case clang::Type::Atomic:
type = cast<clang::AtomicType>(type)->getValueType();
break;
case clang::Type::Auto:
case clang::Type::Decltype:
case clang::Type::Elaborated:
case clang::Type::Paren:
case clang::Type::SubstTemplateTypeParm:
case clang::Type::TemplateSpecialization:
case clang::Type::Typedef:
case clang::Type::TypeOf:
case clang::Type::TypeOfExpr:
case clang::Type::Using:
type = type->getLocallyUnqualifiedSingleStepDesugaredType();
break;
default:
return type;
}
}
}
clang::DeclContext *
TypeSystemClang::GetDeclContextForType(clang::QualType type) {
if (type.isNull())
return nullptr;
clang::QualType qual_type = RemoveWrappingTypes(type.getCanonicalType());
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
switch (type_class) {
case clang::Type::ObjCInterface:
return llvm::cast<clang::ObjCObjectType>(qual_type.getTypePtr())
->getInterface();
case clang::Type::ObjCObjectPointer:
return GetDeclContextForType(
llvm::cast<clang::ObjCObjectPointerType>(qual_type.getTypePtr())
->getPointeeType());
case clang::Type::Record:
return llvm::cast<clang::RecordType>(qual_type)->getDecl();
case clang::Type::Enum:
return llvm::cast<clang::EnumType>(qual_type)->getDecl();
default:
break;
}
// No DeclContext in this type...
return nullptr;
}
static bool GetCompleteQualType(clang::ASTContext *ast,
clang::QualType qual_type,
bool allow_completion = true) {
qual_type = RemoveWrappingTypes(qual_type);
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
switch (type_class) {
case clang::Type::ConstantArray:
case clang::Type::IncompleteArray:
case clang::Type::VariableArray: {
const clang::ArrayType *array_type =
llvm::dyn_cast<clang::ArrayType>(qual_type.getTypePtr());
if (array_type)
return GetCompleteQualType(ast, array_type->getElementType(),
allow_completion);
} break;
case clang::Type::Record: {
clang::CXXRecordDecl *cxx_record_decl = qual_type->getAsCXXRecordDecl();
if (cxx_record_decl) {
if (cxx_record_decl->hasExternalLexicalStorage()) {
const bool is_complete = cxx_record_decl->isCompleteDefinition();
const bool fields_loaded =
cxx_record_decl->hasLoadedFieldsFromExternalStorage();
if (is_complete && fields_loaded)
return true;
if (!allow_completion)
return false;
// Call the field_begin() accessor to for it to use the external source
// to load the fields...
clang::ExternalASTSource *external_ast_source =
ast->getExternalSource();
if (external_ast_source) {
external_ast_source->CompleteType(cxx_record_decl);
if (cxx_record_decl->isCompleteDefinition()) {
cxx_record_decl->field_begin();
cxx_record_decl->setHasLoadedFieldsFromExternalStorage(true);
}
}
}
}
const clang::TagType *tag_type =
llvm::cast<clang::TagType>(qual_type.getTypePtr());
return !tag_type->isIncompleteType();
} break;
case clang::Type::Enum: {
const clang::TagType *tag_type =
llvm::dyn_cast<clang::TagType>(qual_type.getTypePtr());
if (tag_type) {
clang::TagDecl *tag_decl = tag_type->getDecl();
if (tag_decl) {
if (tag_decl->getDefinition())
return true;
if (!allow_completion)
return false;
if (tag_decl->hasExternalLexicalStorage()) {
if (ast) {
clang::ExternalASTSource *external_ast_source =
ast->getExternalSource();
if (external_ast_source) {
external_ast_source->CompleteType(tag_decl);
return !tag_type->isIncompleteType();
}
}
}
return false;
}
}
} break;
case clang::Type::ObjCObject:
case clang::Type::ObjCInterface: {
const clang::ObjCObjectType *objc_class_type =
llvm::dyn_cast<clang::ObjCObjectType>(qual_type);
if (objc_class_type) {
clang::ObjCInterfaceDecl *class_interface_decl =
objc_class_type->getInterface();
// We currently can't complete objective C types through the newly added
// ASTContext because it only supports TagDecl objects right now...
if (class_interface_decl) {
if (class_interface_decl->getDefinition())
return true;
if (!allow_completion)
return false;
if (class_interface_decl->hasExternalLexicalStorage()) {
if (ast) {
clang::ExternalASTSource *external_ast_source =
ast->getExternalSource();
if (external_ast_source) {
external_ast_source->CompleteType(class_interface_decl);
return !objc_class_type->isIncompleteType();
}
}
}
return false;
}
}
} break;
case clang::Type::Attributed:
return GetCompleteQualType(
ast, llvm::cast<clang::AttributedType>(qual_type)->getModifiedType(),
allow_completion);
default:
break;
}
return true;
}
static clang::ObjCIvarDecl::AccessControl
ConvertAccessTypeToObjCIvarAccessControl(AccessType access) {
switch (access) {
case eAccessNone:
return clang::ObjCIvarDecl::None;
case eAccessPublic:
return clang::ObjCIvarDecl::Public;
case eAccessPrivate:
return clang::ObjCIvarDecl::Private;
case eAccessProtected:
return clang::ObjCIvarDecl::Protected;
case eAccessPackage:
return clang::ObjCIvarDecl::Package;
}
return clang::ObjCIvarDecl::None;
}
// Tests
#ifndef NDEBUG
bool TypeSystemClang::Verify(lldb::opaque_compiler_type_t type) {
return !type || llvm::isa<clang::Type>(GetQualType(type).getTypePtr());
}
#endif
bool TypeSystemClang::IsAggregateType(lldb::opaque_compiler_type_t type) {
clang::QualType qual_type(RemoveWrappingTypes(GetCanonicalQualType(type)));
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
switch (type_class) {
case clang::Type::IncompleteArray:
case clang::Type::VariableArray:
case clang::Type::ConstantArray:
case clang::Type::ExtVector:
case clang::Type::Vector:
case clang::Type::Record:
case clang::Type::ObjCObject:
case clang::Type::ObjCInterface:
return true;
default:
break;
}
// The clang type does have a value
return false;
}
bool TypeSystemClang::IsAnonymousType(lldb::opaque_compiler_type_t type) {
clang::QualType qual_type(RemoveWrappingTypes(GetCanonicalQualType(type)));
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
switch (type_class) {
case clang::Type::Record: {
if (const clang::RecordType *record_type =
llvm::dyn_cast_or_null<clang::RecordType>(
qual_type.getTypePtrOrNull())) {
if (const clang::RecordDecl *record_decl = record_type->getDecl()) {
return record_decl->isAnonymousStructOrUnion();
}
}
break;
}
default:
break;
}
// The clang type does have a value
return false;
}
bool TypeSystemClang::IsArrayType(lldb::opaque_compiler_type_t type,
CompilerType *element_type_ptr,
uint64_t *size, bool *is_incomplete) {
clang::QualType qual_type(RemoveWrappingTypes(GetCanonicalQualType(type)));
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
switch (type_class) {
default:
break;
case clang::Type::ConstantArray:
if (element_type_ptr)
element_type_ptr->SetCompilerType(
this, llvm::cast<clang::ConstantArrayType>(qual_type)
->getElementType()
.getAsOpaquePtr());
if (size)
*size = llvm::cast<clang::ConstantArrayType>(qual_type)
->getSize()
.getLimitedValue(ULLONG_MAX);
if (is_incomplete)
*is_incomplete = false;
return true;
case clang::Type::IncompleteArray:
if (element_type_ptr)
element_type_ptr->SetCompilerType(
this, llvm::cast<clang::IncompleteArrayType>(qual_type)
->getElementType()
.getAsOpaquePtr());
if (size)
*size = 0;
if (is_incomplete)
*is_incomplete = true;
return true;
case clang::Type::VariableArray:
if (element_type_ptr)
element_type_ptr->SetCompilerType(
this, llvm::cast<clang::VariableArrayType>(qual_type)
->getElementType()
.getAsOpaquePtr());
if (size)
*size = 0;
if (is_incomplete)
*is_incomplete = false;
return true;
case clang::Type::DependentSizedArray:
if (element_type_ptr)
element_type_ptr->SetCompilerType(
this, llvm::cast<clang::DependentSizedArrayType>(qual_type)
->getElementType()
.getAsOpaquePtr());
if (size)
*size = 0;
if (is_incomplete)
*is_incomplete = false;
return true;
}
if (element_type_ptr)
element_type_ptr->Clear();
if (size)
*size = 0;
if (is_incomplete)
*is_incomplete = false;
return false;
}
bool TypeSystemClang::IsVectorType(lldb::opaque_compiler_type_t type,
CompilerType *element_type, uint64_t *size) {
clang::QualType qual_type(GetCanonicalQualType(type));
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
switch (type_class) {
case clang::Type::Vector: {
const clang::VectorType *vector_type =
qual_type->getAs<clang::VectorType>();
if (vector_type) {
if (size)
*size = vector_type->getNumElements();
if (element_type)
*element_type = GetType(vector_type->getElementType());
}
return true;
} break;
case clang::Type::ExtVector: {
const clang::ExtVectorType *ext_vector_type =
qual_type->getAs<clang::ExtVectorType>();
if (ext_vector_type) {
if (size)
*size = ext_vector_type->getNumElements();
if (element_type)
*element_type =
CompilerType(this, ext_vector_type->getElementType().getAsOpaquePtr());
}
return true;
}
default:
break;
}
return false;
}
bool TypeSystemClang::IsRuntimeGeneratedType(
lldb::opaque_compiler_type_t type) {
clang::DeclContext *decl_ctx = GetDeclContextForType(GetQualType(type));
if (!decl_ctx)
return false;
if (!llvm::isa<clang::ObjCInterfaceDecl>(decl_ctx))
return false;
clang::ObjCInterfaceDecl *result_iface_decl =
llvm::dyn_cast<clang::ObjCInterfaceDecl>(decl_ctx);
ClangASTMetadata *ast_metadata = GetMetadata(result_iface_decl);
if (!ast_metadata)
return false;
return (ast_metadata->GetISAPtr() != 0);
}
bool TypeSystemClang::IsCharType(lldb::opaque_compiler_type_t type) {
return GetQualType(type).getUnqualifiedType()->isCharType();
}
bool TypeSystemClang::IsCompleteType(lldb::opaque_compiler_type_t type) {
// If the type hasn't been lazily completed yet, complete it now so that we
// can give the caller an accurate answer whether the type actually has a
// definition. Without completing the type now we would just tell the user
// the current (internal) completeness state of the type and most users don't
// care (or even know) about this behavior.
const bool allow_completion = true;
return GetCompleteQualType(&getASTContext(), GetQualType(type),
allow_completion);
}
bool TypeSystemClang::IsConst(lldb::opaque_compiler_type_t type) {
return GetQualType(type).isConstQualified();
}
bool TypeSystemClang::IsCStringType(lldb::opaque_compiler_type_t type,
uint32_t &length) {
CompilerType pointee_or_element_clang_type;
length = 0;
Flags type_flags(GetTypeInfo(type, &pointee_or_element_clang_type));
if (!pointee_or_element_clang_type.IsValid())
return false;
if (type_flags.AnySet(eTypeIsArray | eTypeIsPointer)) {
if (pointee_or_element_clang_type.IsCharType()) {
if (type_flags.Test(eTypeIsArray)) {
// We know the size of the array and it could be a C string since it is
// an array of characters
length = llvm::cast<clang::ConstantArrayType>(
GetCanonicalQualType(type).getTypePtr())
->getSize()
.getLimitedValue();
}
return true;
}
}
return false;
}
bool TypeSystemClang::IsFunctionType(lldb::opaque_compiler_type_t type) {
if (type) {
clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));
if (qual_type->isFunctionType()) {
return true;
}
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
switch (type_class) {
default:
break;
case clang::Type::LValueReference:
case clang::Type::RValueReference: {
const clang::ReferenceType *reference_type =
llvm::cast<clang::ReferenceType>(qual_type.getTypePtr());
if (reference_type)
return IsFunctionType(
reference_type->getPointeeType().getAsOpaquePtr());
} break;
}
}
return false;
}
// Used to detect "Homogeneous Floating-point Aggregates"
uint32_t
TypeSystemClang::IsHomogeneousAggregate(lldb::opaque_compiler_type_t type,
CompilerType *base_type_ptr) {
if (!type)
return 0;
clang::QualType qual_type(RemoveWrappingTypes(GetCanonicalQualType(type)));
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
switch (type_class) {
case clang::Type::Record:
if (GetCompleteType(type)) {
const clang::CXXRecordDecl *cxx_record_decl =
qual_type->getAsCXXRecordDecl();
if (cxx_record_decl) {
if (cxx_record_decl->getNumBases() || cxx_record_decl->isDynamicClass())
return 0;
}
const clang::RecordType *record_type =
llvm::cast<clang::RecordType>(qual_type.getTypePtr());
if (record_type) {
const clang::RecordDecl *record_decl = record_type->getDecl();
if (record_decl) {
// We are looking for a structure that contains only floating point
// types
clang::RecordDecl::field_iterator field_pos,
field_end = record_decl->field_end();
uint32_t num_fields = 0;
bool is_hva = false;
bool is_hfa = false;
clang::QualType base_qual_type;
uint64_t base_bitwidth = 0;
for (field_pos = record_decl->field_begin(); field_pos != field_end;
++field_pos) {
clang::QualType field_qual_type = field_pos->getType();
uint64_t field_bitwidth = getASTContext().getTypeSize(qual_type);
if (field_qual_type->isFloatingType()) {
if (field_qual_type->isComplexType())
return 0;
else {
if (num_fields == 0)
base_qual_type = field_qual_type;
else {
if (is_hva)
return 0;
is_hfa = true;
if (field_qual_type.getTypePtr() !=
base_qual_type.getTypePtr())
return 0;
}
}
} else if (field_qual_type->isVectorType() ||
field_qual_type->isExtVectorType()) {
if (num_fields == 0) {
base_qual_type = field_qual_type;
base_bitwidth = field_bitwidth;
} else {
if (is_hfa)
return 0;
is_hva = true;
if (base_bitwidth != field_bitwidth)
return 0;
if (field_qual_type.getTypePtr() != base_qual_type.getTypePtr())
return 0;
}
} else
return 0;
++num_fields;
}
if (base_type_ptr)
*base_type_ptr = CompilerType(this, base_qual_type.getAsOpaquePtr());
return num_fields;
}
}
}
break;
default:
break;
}
return 0;
}
size_t TypeSystemClang::GetNumberOfFunctionArguments(
lldb::opaque_compiler_type_t type) {
if (type) {
clang::QualType qual_type(GetCanonicalQualType(type));
const clang::FunctionProtoType *func =
llvm::dyn_cast<clang::FunctionProtoType>(qual_type.getTypePtr());
if (func)
return func->getNumParams();
}
return 0;
}
CompilerType
TypeSystemClang::GetFunctionArgumentAtIndex(lldb::opaque_compiler_type_t type,
const size_t index) {
if (type) {
clang::QualType qual_type(GetQualType(type));
const clang::FunctionProtoType *func =
llvm::dyn_cast<clang::FunctionProtoType>(qual_type.getTypePtr());
if (func) {
if (index < func->getNumParams())
return CompilerType(this, func->getParamType(index).getAsOpaquePtr());
}
}
return CompilerType();
}
bool TypeSystemClang::IsFunctionPointerType(lldb::opaque_compiler_type_t type) {
if (type) {
clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));
if (qual_type->isFunctionPointerType())
return true;
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
switch (type_class) {
default:
break;
case clang::Type::LValueReference:
case clang::Type::RValueReference: {
const clang::ReferenceType *reference_type =
llvm::cast<clang::ReferenceType>(qual_type.getTypePtr());
if (reference_type)
return IsFunctionPointerType(
reference_type->getPointeeType().getAsOpaquePtr());
} break;
}
}
return false;
}
bool TypeSystemClang::IsBlockPointerType(
lldb::opaque_compiler_type_t type,
CompilerType *function_pointer_type_ptr) {
if (type) {
clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));
if (qual_type->isBlockPointerType()) {
if (function_pointer_type_ptr) {
const clang::BlockPointerType *block_pointer_type =
qual_type->castAs<clang::BlockPointerType>();
QualType pointee_type = block_pointer_type->getPointeeType();
QualType function_pointer_type = m_ast_up->getPointerType(pointee_type);
*function_pointer_type_ptr =
CompilerType(this, function_pointer_type.getAsOpaquePtr());
}
return true;
}
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
switch (type_class) {
default:
break;
case clang::Type::LValueReference:
case clang::Type::RValueReference: {
const clang::ReferenceType *reference_type =
llvm::cast<clang::ReferenceType>(qual_type.getTypePtr());
if (reference_type)
return IsBlockPointerType(
reference_type->getPointeeType().getAsOpaquePtr(),
function_pointer_type_ptr);
} break;
}
}
return false;
}
bool TypeSystemClang::IsIntegerType(lldb::opaque_compiler_type_t type,
bool &is_signed) {
if (!type)
return false;
clang::QualType qual_type(GetCanonicalQualType(type));
const clang::BuiltinType *builtin_type =
llvm::dyn_cast<clang::BuiltinType>(qual_type->getCanonicalTypeInternal());
if (builtin_type) {
if (builtin_type->isInteger()) {
is_signed = builtin_type->isSignedInteger();
return true;
}
}
return false;
}
bool TypeSystemClang::IsEnumerationType(lldb::opaque_compiler_type_t type,
bool &is_signed) {
if (type) {
const clang::EnumType *enum_type = llvm::dyn_cast<clang::EnumType>(
GetCanonicalQualType(type)->getCanonicalTypeInternal());
if (enum_type) {
IsIntegerType(enum_type->getDecl()->getIntegerType().getAsOpaquePtr(),
is_signed);
return true;
}
}
return false;
}
bool TypeSystemClang::IsScopedEnumerationType(
lldb::opaque_compiler_type_t type) {
if (type) {
const clang::EnumType *enum_type = llvm::dyn_cast<clang::EnumType>(
GetCanonicalQualType(type)->getCanonicalTypeInternal());
if (enum_type) {
return enum_type->isScopedEnumeralType();
}
}
return false;
}
bool TypeSystemClang::IsPointerType(lldb::opaque_compiler_type_t type,
CompilerType *pointee_type) {
if (type) {
clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
switch (type_class) {
case clang::Type::Builtin:
switch (llvm::cast<clang::BuiltinType>(qual_type)->getKind()) {
default:
break;
case clang::BuiltinType::ObjCId:
case clang::BuiltinType::ObjCClass:
return true;
}
return false;
case clang::Type::ObjCObjectPointer:
if (pointee_type)
pointee_type->SetCompilerType(
this, llvm::cast<clang::ObjCObjectPointerType>(qual_type)
->getPointeeType()
.getAsOpaquePtr());
return true;
case clang::Type::BlockPointer:
if (pointee_type)
pointee_type->SetCompilerType(
this, llvm::cast<clang::BlockPointerType>(qual_type)
->getPointeeType()
.getAsOpaquePtr());
return true;
case clang::Type::Pointer:
if (pointee_type)
pointee_type->SetCompilerType(this,
llvm::cast<clang::PointerType>(qual_type)
->getPointeeType()
.getAsOpaquePtr());
return true;
case clang::Type::MemberPointer:
if (pointee_type)
pointee_type->SetCompilerType(
this, llvm::cast<clang::MemberPointerType>(qual_type)
->getPointeeType()
.getAsOpaquePtr());
return true;
default:
break;
}
}
if (pointee_type)
pointee_type->Clear();
return false;
}
bool TypeSystemClang::IsPointerOrReferenceType(
lldb::opaque_compiler_type_t type, CompilerType *pointee_type) {
if (type) {
clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
switch (type_class) {
case clang::Type::Builtin:
switch (llvm::cast<clang::BuiltinType>(qual_type)->getKind()) {
default:
break;
case clang::BuiltinType::ObjCId:
case clang::BuiltinType::ObjCClass:
return true;
}
return false;
case clang::Type::ObjCObjectPointer:
if (pointee_type)
pointee_type->SetCompilerType(
this, llvm::cast<clang::ObjCObjectPointerType>(qual_type)
->getPointeeType().getAsOpaquePtr());
return true;
case clang::Type::BlockPointer:
if (pointee_type)
pointee_type->SetCompilerType(
this, llvm::cast<clang::BlockPointerType>(qual_type)
->getPointeeType()
.getAsOpaquePtr());
return true;
case clang::Type::Pointer:
if (pointee_type)
pointee_type->SetCompilerType(this,
llvm::cast<clang::PointerType>(qual_type)
->getPointeeType()
.getAsOpaquePtr());
return true;
case clang::Type::MemberPointer:
if (pointee_type)
pointee_type->SetCompilerType(
this, llvm::cast<clang::MemberPointerType>(qual_type)
->getPointeeType()
.getAsOpaquePtr());
return true;
case clang::Type::LValueReference:
if (pointee_type)
pointee_type->SetCompilerType(
this, llvm::cast<clang::LValueReferenceType>(qual_type)
->desugar()
.getAsOpaquePtr());
return true;
case clang::Type::RValueReference:
if (pointee_type)
pointee_type->SetCompilerType(
this, llvm::cast<clang::RValueReferenceType>(qual_type)
->desugar()
.getAsOpaquePtr());
return true;
default:
break;
}
}
if (pointee_type)
pointee_type->Clear();
return false;
}
bool TypeSystemClang::IsReferenceType(lldb::opaque_compiler_type_t type,
CompilerType *pointee_type,
bool *is_rvalue) {
if (type) {
clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
switch (type_class) {
case clang::Type::LValueReference:
if (pointee_type)
pointee_type->SetCompilerType(
this, llvm::cast<clang::LValueReferenceType>(qual_type)
->desugar()
.getAsOpaquePtr());
if (is_rvalue)
*is_rvalue = false;
return true;
case clang::Type::RValueReference:
if (pointee_type)
pointee_type->SetCompilerType(
this, llvm::cast<clang::RValueReferenceType>(qual_type)
->desugar()
.getAsOpaquePtr());
if (is_rvalue)
*is_rvalue = true;
return true;
default:
break;
}
}
if (pointee_type)
pointee_type->Clear();
return false;
}
bool TypeSystemClang::IsFloatingPointType(lldb::opaque_compiler_type_t type,
uint32_t &count, bool &is_complex) {
if (type) {
clang::QualType qual_type(GetCanonicalQualType(type));
if (const clang::BuiltinType *BT = llvm::dyn_cast<clang::BuiltinType>(
qual_type->getCanonicalTypeInternal())) {
clang::BuiltinType::Kind kind = BT->getKind();
if (kind >= clang::BuiltinType::Float &&
kind <= clang::BuiltinType::LongDouble) {
count = 1;
is_complex = false;
return true;
}
} else if (const clang::ComplexType *CT =
llvm::dyn_cast<clang::ComplexType>(
qual_type->getCanonicalTypeInternal())) {
if (IsFloatingPointType(CT->getElementType().getAsOpaquePtr(), count,
is_complex)) {
count = 2;
is_complex = true;
return true;
}
} else if (const clang::VectorType *VT = llvm::dyn_cast<clang::VectorType>(
qual_type->getCanonicalTypeInternal())) {
if (IsFloatingPointType(VT->getElementType().getAsOpaquePtr(), count,
is_complex)) {
count = VT->getNumElements();
is_complex = false;
return true;
}
}
}
count = 0;
is_complex = false;
return false;
}
bool TypeSystemClang::IsDefined(lldb::opaque_compiler_type_t type) {
if (!type)
return false;
clang::QualType qual_type(GetQualType(type));
const clang::TagType *tag_type =
llvm::dyn_cast<clang::TagType>(qual_type.getTypePtr());
if (tag_type) {
clang::TagDecl *tag_decl = tag_type->getDecl();
if (tag_decl)
return tag_decl->isCompleteDefinition();
return false;
} else {
const clang::ObjCObjectType *objc_class_type =
llvm::dyn_cast<clang::ObjCObjectType>(qual_type);
if (objc_class_type) {
clang::ObjCInterfaceDecl *class_interface_decl =
objc_class_type->getInterface();
if (class_interface_decl)
return class_interface_decl->getDefinition() != nullptr;
return false;
}
}
return true;
}
bool TypeSystemClang::IsObjCClassType(const CompilerType &type) {
if (ClangUtil::IsClangType(type)) {
clang::QualType qual_type(ClangUtil::GetCanonicalQualType(type));
const clang::ObjCObjectPointerType *obj_pointer_type =
llvm::dyn_cast<clang::ObjCObjectPointerType>(qual_type);
if (obj_pointer_type)
return obj_pointer_type->isObjCClassType();
}
return false;
}
bool TypeSystemClang::IsObjCObjectOrInterfaceType(const CompilerType &type) {
if (ClangUtil::IsClangType(type))
return ClangUtil::GetCanonicalQualType(type)->isObjCObjectOrInterfaceType();
return false;
}
bool TypeSystemClang::IsClassType(lldb::opaque_compiler_type_t type) {
if (!type)
return false;
clang::QualType qual_type(GetCanonicalQualType(type));
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
return (type_class == clang::Type::Record);
}
bool TypeSystemClang::IsEnumType(lldb::opaque_compiler_type_t type) {
if (!type)
return false;
clang::QualType qual_type(GetCanonicalQualType(type));
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
return (type_class == clang::Type::Enum);
}
bool TypeSystemClang::IsPolymorphicClass(lldb::opaque_compiler_type_t type) {
if (type) {
clang::QualType qual_type(GetCanonicalQualType(type));
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
switch (type_class) {
case clang::Type::Record:
if (GetCompleteType(type)) {
const clang::RecordType *record_type =
llvm::cast<clang::RecordType>(qual_type.getTypePtr());
const clang::RecordDecl *record_decl = record_type->getDecl();
if (record_decl) {
const clang::CXXRecordDecl *cxx_record_decl =
llvm::dyn_cast<clang::CXXRecordDecl>(record_decl);
if (cxx_record_decl)
return cxx_record_decl->isPolymorphic();
}
}
break;
default:
break;
}
}
return false;
}
bool TypeSystemClang::IsPossibleDynamicType(lldb::opaque_compiler_type_t type,
CompilerType *dynamic_pointee_type,
bool check_cplusplus,
bool check_objc) {
clang::QualType pointee_qual_type;
if (type) {
clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));
bool success = false;
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
switch (type_class) {
case clang::Type::Builtin:
if (check_objc &&
llvm::cast<clang::BuiltinType>(qual_type)->getKind() ==
clang::BuiltinType::ObjCId) {
if (dynamic_pointee_type)
dynamic_pointee_type->SetCompilerType(this, type);
return true;
}
break;
case clang::Type::ObjCObjectPointer:
if (check_objc) {
if (const auto *objc_pointee_type =
qual_type->getPointeeType().getTypePtrOrNull()) {
if (const auto *objc_object_type =
llvm::dyn_cast_or_null<clang::ObjCObjectType>(
objc_pointee_type)) {
if (objc_object_type->isObjCClass())
return false;
}
}
if (dynamic_pointee_type)
dynamic_pointee_type->SetCompilerType(
this, llvm::cast<clang::ObjCObjectPointerType>(qual_type)
->getPointeeType()
.getAsOpaquePtr());
return true;
}
break;
case clang::Type::Pointer:
pointee_qual_type =
llvm::cast<clang::PointerType>(qual_type)->getPointeeType();
success = true;
break;
case clang::Type::LValueReference:
case clang::Type::RValueReference:
pointee_qual_type =
llvm::cast<clang::ReferenceType>(qual_type)->getPointeeType();
success = true;
break;
default:
break;
}
if (success) {
// Check to make sure what we are pointing too is a possible dynamic C++
// type We currently accept any "void *" (in case we have a class that
// has been watered down to an opaque pointer) and virtual C++ classes.
const clang::Type::TypeClass pointee_type_class =
pointee_qual_type.getCanonicalType()->getTypeClass();
switch (pointee_type_class) {
case clang::Type::Builtin:
switch (llvm::cast<clang::BuiltinType>(pointee_qual_type)->getKind()) {
case clang::BuiltinType::UnknownAny:
case clang::BuiltinType::Void:
if (dynamic_pointee_type)
dynamic_pointee_type->SetCompilerType(
this, pointee_qual_type.getAsOpaquePtr());
return true;
default:
break;
}
break;
case clang::Type::Record:
if (check_cplusplus) {
clang::CXXRecordDecl *cxx_record_decl =
pointee_qual_type->getAsCXXRecordDecl();
if (cxx_record_decl) {
bool is_complete = cxx_record_decl->isCompleteDefinition();
if (is_complete)
success = cxx_record_decl->isDynamicClass();
else {
ClangASTMetadata *metadata = GetMetadata(cxx_record_decl);
if (metadata)
success = metadata->GetIsDynamicCXXType();
else {
is_complete = GetType(pointee_qual_type).GetCompleteType();
if (is_complete)
success = cxx_record_decl->isDynamicClass();
else
success = false;
}
}
if (success) {
if (dynamic_pointee_type)
dynamic_pointee_type->SetCompilerType(
this, pointee_qual_type.getAsOpaquePtr());
return true;
}
}
}
break;
case clang::Type::ObjCObject:
case clang::Type::ObjCInterface:
if (check_objc) {
if (dynamic_pointee_type)
dynamic_pointee_type->SetCompilerType(
this, pointee_qual_type.getAsOpaquePtr());
return true;
}
break;
default:
break;
}
}
}
if (dynamic_pointee_type)
dynamic_pointee_type->Clear();
return false;
}
bool TypeSystemClang::IsScalarType(lldb::opaque_compiler_type_t type) {
if (!type)
return false;
return (GetTypeInfo(type, nullptr) & eTypeIsScalar) != 0;
}
bool TypeSystemClang::IsTypedefType(lldb::opaque_compiler_type_t type) {
if (!type)
return false;
return RemoveWrappingTypes(GetQualType(type), {clang::Type::Typedef})
->getTypeClass() == clang::Type::Typedef;
}
bool TypeSystemClang::IsVoidType(lldb::opaque_compiler_type_t type) {
if (!type)
return false;
return GetCanonicalQualType(type)->isVoidType();
}
bool TypeSystemClang::CanPassInRegisters(const CompilerType &type) {
if (auto *record_decl =
TypeSystemClang::GetAsRecordDecl(type)) {
return record_decl->canPassInRegisters();
}
return false;
}
bool TypeSystemClang::SupportsLanguage(lldb::LanguageType language) {
return TypeSystemClangSupportsLanguage(language);
}
Optional<std::string>
TypeSystemClang::GetCXXClassName(const CompilerType &type) {
if (!type)
return llvm::None;
clang::QualType qual_type(ClangUtil::GetCanonicalQualType(type));
if (qual_type.isNull())
return llvm::None;
clang::CXXRecordDecl *cxx_record_decl = qual_type->getAsCXXRecordDecl();
if (!cxx_record_decl)
return llvm::None;
return std::string(cxx_record_decl->getIdentifier()->getNameStart());
}
bool TypeSystemClang::IsCXXClassType(const CompilerType &type) {
if (!type)
return false;
clang::QualType qual_type(ClangUtil::GetCanonicalQualType(type));
return !qual_type.isNull() && qual_type->getAsCXXRecordDecl() != nullptr;
}
bool TypeSystemClang::IsBeingDefined(lldb::opaque_compiler_type_t type) {
if (!type)
return false;
clang::QualType qual_type(GetCanonicalQualType(type));
const clang::TagType *tag_type = llvm::dyn_cast<clang::TagType>(qual_type);
if (tag_type)
return tag_type->isBeingDefined();
return false;
}
bool TypeSystemClang::IsObjCObjectPointerType(const CompilerType &type,
CompilerType *class_type_ptr) {
if (!ClangUtil::IsClangType(type))
return false;
clang::QualType qual_type(ClangUtil::GetCanonicalQualType(type));
if (!qual_type.isNull() && qual_type->isObjCObjectPointerType()) {
if (class_type_ptr) {
if (!qual_type->isObjCClassType() && !qual_type->isObjCIdType()) {
const clang::ObjCObjectPointerType *obj_pointer_type =
llvm::dyn_cast<clang::ObjCObjectPointerType>(qual_type);
if (obj_pointer_type == nullptr)
class_type_ptr->Clear();
else
class_type_ptr->SetCompilerType(
type.GetTypeSystem(),
clang::QualType(obj_pointer_type->getInterfaceType(), 0)
.getAsOpaquePtr());
}
}
return true;
}
if (class_type_ptr)
class_type_ptr->Clear();
return false;
}
// Type Completion
bool TypeSystemClang::GetCompleteType(lldb::opaque_compiler_type_t type) {
if (!type)
return false;
const bool allow_completion = true;
return GetCompleteQualType(&getASTContext(), GetQualType(type),
allow_completion);
}
ConstString TypeSystemClang::GetTypeName(lldb::opaque_compiler_type_t type) {
if (!type)
return ConstString();
clang::QualType qual_type(GetQualType(type));
// Remove certain type sugar from the name. Sugar such as elaborated types
// or template types which only serve to improve diagnostics shouldn't
// act as their own types from the user's perspective (e.g., formatter
// shouldn't format a variable differently depending on how the ser has
// specified the type. '::Type' and 'Type' should behave the same).
// Typedefs and atomic derived types are not removed as they are actually
// useful for identifiying specific types.
qual_type = RemoveWrappingTypes(qual_type,
{clang::Type::Typedef, clang::Type::Atomic});
// For a typedef just return the qualified name.
if (const auto *typedef_type = qual_type->getAs<clang::TypedefType>()) {
const clang::TypedefNameDecl *typedef_decl = typedef_type->getDecl();
return ConstString(GetTypeNameForDecl(typedef_decl));
}
return ConstString(qual_type.getAsString(GetTypePrintingPolicy()));
}
ConstString
TypeSystemClang::GetDisplayTypeName(lldb::opaque_compiler_type_t type) {
if (!type)
return ConstString();
clang::QualType qual_type(GetQualType(type));
clang::PrintingPolicy printing_policy(getASTContext().getPrintingPolicy());
printing_policy.SuppressTagKeyword = true;
printing_policy.SuppressScope = false;
printing_policy.SuppressUnwrittenScope = true;
printing_policy.SuppressInlineNamespace = true;
return ConstString(qual_type.getAsString(printing_policy));
}
uint32_t
TypeSystemClang::GetTypeInfo(lldb::opaque_compiler_type_t type,
CompilerType *pointee_or_element_clang_type) {
if (!type)
return 0;
if (pointee_or_element_clang_type)
pointee_or_element_clang_type->Clear();
clang::QualType qual_type =
RemoveWrappingTypes(GetQualType(type), {clang::Type::Typedef});
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
switch (type_class) {
case clang::Type::Attributed:
return GetTypeInfo(qual_type->castAs<clang::AttributedType>()
->getModifiedType()
.getAsOpaquePtr(),
pointee_or_element_clang_type);
case clang::Type::Builtin: {
const clang::BuiltinType *builtin_type =
llvm::cast<clang::BuiltinType>(qual_type->getCanonicalTypeInternal());
uint32_t builtin_type_flags = eTypeIsBuiltIn | eTypeHasValue;
switch (builtin_type->getKind()) {
case clang::BuiltinType::ObjCId:
case clang::BuiltinType::ObjCClass:
if (pointee_or_element_clang_type)
pointee_or_element_clang_type->SetCompilerType(
this, getASTContext().ObjCBuiltinClassTy.getAsOpaquePtr());
builtin_type_flags |= eTypeIsPointer | eTypeIsObjC;
break;
case clang::BuiltinType::ObjCSel:
if (pointee_or_element_clang_type)
pointee_or_element_clang_type->SetCompilerType(
this, getASTContext().CharTy.getAsOpaquePtr());
builtin_type_flags |= eTypeIsPointer | eTypeIsObjC;
break;
case clang::BuiltinType::Bool:
case clang::BuiltinType::Char_U:
case clang::BuiltinType::UChar:
case clang::BuiltinType::WChar_U:
case clang::BuiltinType::Char16:
case clang::BuiltinType::Char32:
case clang::BuiltinType::UShort:
case clang::BuiltinType::UInt:
case clang::BuiltinType::ULong:
case clang::BuiltinType::ULongLong:
case clang::BuiltinType::UInt128:
case clang::BuiltinType::Char_S:
case clang::BuiltinType::SChar:
case clang::BuiltinType::WChar_S:
case clang::BuiltinType::Short:
case clang::BuiltinType::Int:
case clang::BuiltinType::Long:
case clang::BuiltinType::LongLong:
case clang::BuiltinType::Int128:
case clang::BuiltinType::Float:
case clang::BuiltinType::Double:
case clang::BuiltinType::LongDouble:
builtin_type_flags |= eTypeIsScalar;
if (builtin_type->isInteger()) {
builtin_type_flags |= eTypeIsInteger;
if (builtin_type->isSignedInteger())
builtin_type_flags |= eTypeIsSigned;
} else if (builtin_type->isFloatingPoint())
builtin_type_flags |= eTypeIsFloat;
break;
default:
break;
}
return builtin_type_flags;
}
case clang::Type::BlockPointer:
if (pointee_or_element_clang_type)
pointee_or_element_clang_type->SetCompilerType(
this, qual_type->getPointeeType().getAsOpaquePtr());
return eTypeIsPointer | eTypeHasChildren | eTypeIsBlock;
case clang::Type::Complex: {
uint32_t complex_type_flags =
eTypeIsBuiltIn | eTypeHasValue | eTypeIsComplex;
const clang::ComplexType *complex_type = llvm::dyn_cast<clang::ComplexType>(
qual_type->getCanonicalTypeInternal());
if (complex_type) {
clang::QualType complex_element_type(complex_type->getElementType());
if (complex_element_type->isIntegerType())
complex_type_flags |= eTypeIsFloat;
else if (complex_element_type->isFloatingType())
complex_type_flags |= eTypeIsInteger;
}
return complex_type_flags;
} break;
case clang::Type::ConstantArray:
case clang::Type::DependentSizedArray:
case clang::Type::IncompleteArray:
case clang::Type::VariableArray:
if (pointee_or_element_clang_type)
pointee_or_element_clang_type->SetCompilerType(
this, llvm::cast<clang::ArrayType>(qual_type.getTypePtr())
->getElementType()
.getAsOpaquePtr());
return eTypeHasChildren | eTypeIsArray;
case clang::Type::DependentName:
return 0;
case clang::Type::DependentSizedExtVector:
return eTypeHasChildren | eTypeIsVector;
case clang::Type::DependentTemplateSpecialization:
return eTypeIsTemplate;
case clang::Type::Enum:
if (pointee_or_element_clang_type)
pointee_or_element_clang_type->SetCompilerType(
this, llvm::cast<clang::EnumType>(qual_type)
->getDecl()
->getIntegerType()
.getAsOpaquePtr());
return eTypeIsEnumeration | eTypeHasValue;
case clang::Type::FunctionProto:
return eTypeIsFuncPrototype | eTypeHasValue;
case clang::Type::FunctionNoProto:
return eTypeIsFuncPrototype | eTypeHasValue;
case clang::Type::InjectedClassName:
return 0;
case clang::Type::LValueReference:
case clang::Type::RValueReference:
if (pointee_or_element_clang_type)
pointee_or_element_clang_type->SetCompilerType(
this, llvm::cast<clang::ReferenceType>(qual_type.getTypePtr())
->getPointeeType()
.getAsOpaquePtr());
return eTypeHasChildren | eTypeIsReference | eTypeHasValue;
case clang::Type::MemberPointer:
return eTypeIsPointer | eTypeIsMember | eTypeHasValue;
case clang::Type::ObjCObjectPointer:
if (pointee_or_element_clang_type)
pointee_or_element_clang_type->SetCompilerType(
this, qual_type->getPointeeType().getAsOpaquePtr());
return eTypeHasChildren | eTypeIsObjC | eTypeIsClass | eTypeIsPointer |
eTypeHasValue;
case clang::Type::ObjCObject:
return eTypeHasChildren | eTypeIsObjC | eTypeIsClass;
case clang::Type::ObjCInterface:
return eTypeHasChildren | eTypeIsObjC | eTypeIsClass;
case clang::Type::Pointer:
if (pointee_or_element_clang_type)
pointee_or_element_clang_type->SetCompilerType(
this, qual_type->getPointeeType().getAsOpaquePtr());
return eTypeHasChildren | eTypeIsPointer | eTypeHasValue;
case clang::Type::Record:
if (qual_type->getAsCXXRecordDecl())
return eTypeHasChildren | eTypeIsClass | eTypeIsCPlusPlus;
else
return eTypeHasChildren | eTypeIsStructUnion;
break;
case clang::Type::SubstTemplateTypeParm:
return eTypeIsTemplate;
case clang::Type::TemplateTypeParm:
return eTypeIsTemplate;
case clang::Type::TemplateSpecialization:
return eTypeIsTemplate;
case clang::Type::Typedef:
return eTypeIsTypedef | GetType(llvm::cast<clang::TypedefType>(qual_type)
->getDecl()
->getUnderlyingType())
.GetTypeInfo(pointee_or_element_clang_type);
case clang::Type::UnresolvedUsing:
return 0;
case clang::Type::ExtVector:
case clang::Type::Vector: {
uint32_t vector_type_flags = eTypeHasChildren | eTypeIsVector;
const clang::VectorType *vector_type = llvm::dyn_cast<clang::VectorType>(
qual_type->getCanonicalTypeInternal());
if (vector_type) {
if (vector_type->isIntegerType())
vector_type_flags |= eTypeIsFloat;
else if (vector_type->isFloatingType())
vector_type_flags |= eTypeIsInteger;
}
return vector_type_flags;
}
default:
return 0;
}
return 0;
}
lldb::LanguageType
TypeSystemClang::GetMinimumLanguage(lldb::opaque_compiler_type_t type) {
if (!type)
return lldb::eLanguageTypeC;
// If the type is a reference, then resolve it to what it refers to first:
clang::QualType qual_type(GetCanonicalQualType(type).getNonReferenceType());
if (qual_type->isAnyPointerType()) {
if (qual_type->isObjCObjectPointerType())
return lldb::eLanguageTypeObjC;
if (qual_type->getPointeeCXXRecordDecl())
return lldb::eLanguageTypeC_plus_plus;
clang::QualType pointee_type(qual_type->getPointeeType());
if (pointee_type->getPointeeCXXRecordDecl())
return lldb::eLanguageTypeC_plus_plus;
if (pointee_type->isObjCObjectOrInterfaceType())
return lldb::eLanguageTypeObjC;
if (pointee_type->isObjCClassType())
return lldb::eLanguageTypeObjC;
if (pointee_type.getTypePtr() ==
getASTContext().ObjCBuiltinIdTy.getTypePtr())
return lldb::eLanguageTypeObjC;
} else {
if (qual_type->isObjCObjectOrInterfaceType())
return lldb::eLanguageTypeObjC;
if (qual_type->getAsCXXRecordDecl())
return lldb::eLanguageTypeC_plus_plus;
switch (qual_type->getTypeClass()) {
default:
break;
case clang::Type::Builtin:
switch (llvm::cast<clang::BuiltinType>(qual_type)->getKind()) {
default:
case clang::BuiltinType::Void:
case clang::BuiltinType::Bool:
case clang::BuiltinType::Char_U:
case clang::BuiltinType::UChar:
case clang::BuiltinType::WChar_U:
case clang::BuiltinType::Char16:
case clang::BuiltinType::Char32:
case clang::BuiltinType::UShort:
case clang::BuiltinType::UInt:
case clang::BuiltinType::ULong:
case clang::BuiltinType::ULongLong:
case clang::BuiltinType::UInt128:
case clang::BuiltinType::Char_S:
case clang::BuiltinType::SChar:
case clang::BuiltinType::WChar_S:
case clang::BuiltinType::Short:
case clang::BuiltinType::Int:
case clang::BuiltinType::Long:
case clang::BuiltinType::LongLong:
case clang::BuiltinType::Int128:
case clang::BuiltinType::Float:
case clang::BuiltinType::Double:
case clang::BuiltinType::LongDouble:
break;
case clang::BuiltinType::NullPtr:
return eLanguageTypeC_plus_plus;
case clang::BuiltinType::ObjCId:
case clang::BuiltinType::ObjCClass:
case clang::BuiltinType::ObjCSel:
return eLanguageTypeObjC;
case clang::BuiltinType::Dependent:
case clang::BuiltinType::Overload:
case clang::BuiltinType::BoundMember:
case clang::BuiltinType::UnknownAny:
break;
}
break;
case clang::Type::Typedef:
return GetType(llvm::cast<clang::TypedefType>(qual_type)
->getDecl()
->getUnderlyingType())
.GetMinimumLanguage();
}
}
return lldb::eLanguageTypeC;
}
lldb::TypeClass
TypeSystemClang::GetTypeClass(lldb::opaque_compiler_type_t type) {
if (!type)
return lldb::eTypeClassInvalid;
clang::QualType qual_type =
RemoveWrappingTypes(GetQualType(type), {clang::Type::Typedef});
switch (qual_type->getTypeClass()) {
case clang::Type::Atomic:
case clang::Type::Auto:
case clang::Type::Decltype:
case clang::Type::Elaborated:
case clang::Type::Paren:
case clang::Type::TypeOf:
case clang::Type::TypeOfExpr:
case clang::Type::Using:
llvm_unreachable("Handled in RemoveWrappingTypes!");
case clang::Type::UnaryTransform:
break;
case clang::Type::FunctionNoProto:
return lldb::eTypeClassFunction;
case clang::Type::FunctionProto:
return lldb::eTypeClassFunction;
case clang::Type::IncompleteArray:
return lldb::eTypeClassArray;
case clang::Type::VariableArray:
return lldb::eTypeClassArray;
case clang::Type::ConstantArray:
return lldb::eTypeClassArray;
case clang::Type::DependentSizedArray:
return lldb::eTypeClassArray;
case clang::Type::DependentSizedExtVector:
return lldb::eTypeClassVector;
case clang::Type::DependentVector:
return lldb::eTypeClassVector;
case clang::Type::ExtVector:
return lldb::eTypeClassVector;
case clang::Type::Vector:
return lldb::eTypeClassVector;
case clang::Type::Builtin:
// Ext-Int is just an integer type.
case clang::Type::BitInt:
case clang::Type::DependentBitInt:
return lldb::eTypeClassBuiltin;
case clang::Type::ObjCObjectPointer:
return lldb::eTypeClassObjCObjectPointer;
case clang::Type::BlockPointer:
return lldb::eTypeClassBlockPointer;
case clang::Type::Pointer:
return lldb::eTypeClassPointer;
case clang::Type::LValueReference:
return lldb::eTypeClassReference;
case clang::Type::RValueReference:
return lldb::eTypeClassReference;
case clang::Type::MemberPointer:
return lldb::eTypeClassMemberPointer;
case clang::Type::Complex:
if (qual_type->isComplexType())
return lldb::eTypeClassComplexFloat;
else
return lldb::eTypeClassComplexInteger;
case clang::Type::ObjCObject:
return lldb::eTypeClassObjCObject;
case clang::Type::ObjCInterface:
return lldb::eTypeClassObjCInterface;
case clang::Type::Record: {
const clang::RecordType *record_type =
llvm::cast<clang::RecordType>(qual_type.getTypePtr());
const clang::RecordDecl *record_decl = record_type->getDecl();
if (record_decl->isUnion())
return lldb::eTypeClassUnion;
else if (record_decl->isStruct())
return lldb::eTypeClassStruct;
else
return lldb::eTypeClassClass;
} break;
case clang::Type::Enum:
return lldb::eTypeClassEnumeration;
case clang::Type::Typedef:
return lldb::eTypeClassTypedef;
case clang::Type::UnresolvedUsing:
break;
case clang::Type::Attributed:
case clang::Type::BTFTagAttributed:
break;
case clang::Type::TemplateTypeParm:
break;
case clang::Type::SubstTemplateTypeParm:
break;
case clang::Type::SubstTemplateTypeParmPack:
break;
case clang::Type::InjectedClassName:
break;
case clang::Type::DependentName:
break;
case clang::Type::DependentTemplateSpecialization:
break;
case clang::Type::PackExpansion:
break;
case clang::Type::TemplateSpecialization:
break;
case clang::Type::DeducedTemplateSpecialization:
break;
case clang::Type::Pipe:
break;
// pointer type decayed from an array or function type.
case clang::Type::Decayed:
break;
case clang::Type::Adjusted:
break;
case clang::Type::ObjCTypeParam:
break;
case clang::Type::DependentAddressSpace:
break;
case clang::Type::MacroQualified:
break;
// Matrix types that we're not sure how to display at the moment.
case clang::Type::ConstantMatrix:
case clang::Type::DependentSizedMatrix:
break;
}
// We don't know hot to display this type...
return lldb::eTypeClassOther;
}
unsigned TypeSystemClang::GetTypeQualifiers(lldb::opaque_compiler_type_t type) {
if (type)
return GetQualType(type).getQualifiers().getCVRQualifiers();
return 0;
}
// Creating related types
CompilerType
TypeSystemClang::GetArrayElementType(lldb::opaque_compiler_type_t type,
ExecutionContextScope *exe_scope) {
if (type) {
clang::QualType qual_type(GetQualType(type));
const clang::Type *array_eletype =
qual_type.getTypePtr()->getArrayElementTypeNoTypeQual();
if (!array_eletype)
return CompilerType();
return GetType(clang::QualType(array_eletype, 0));
}
return CompilerType();
}
CompilerType TypeSystemClang::GetArrayType(lldb::opaque_compiler_type_t type,
uint64_t size) {
if (type) {
clang::QualType qual_type(GetCanonicalQualType(type));
clang::ASTContext &ast_ctx = getASTContext();
if (size != 0)
return GetType(ast_ctx.getConstantArrayType(
qual_type, llvm::APInt(64, size), nullptr,
clang::ArrayType::ArraySizeModifier::Normal, 0));
else
return GetType(ast_ctx.getIncompleteArrayType(
qual_type, clang::ArrayType::ArraySizeModifier::Normal, 0));
}
return CompilerType();
}
CompilerType
TypeSystemClang::GetCanonicalType(lldb::opaque_compiler_type_t type) {
if (type)
return GetType(GetCanonicalQualType(type));
return CompilerType();
}
static clang::QualType GetFullyUnqualifiedType_Impl(clang::ASTContext *ast,
clang::QualType qual_type) {
if (qual_type->isPointerType())
qual_type = ast->getPointerType(
GetFullyUnqualifiedType_Impl(ast, qual_type->getPointeeType()));
else if (const ConstantArrayType *arr =
ast->getAsConstantArrayType(qual_type)) {
qual_type = ast->getConstantArrayType(
GetFullyUnqualifiedType_Impl(ast, arr->getElementType()),
arr->getSize(), arr->getSizeExpr(), arr->getSizeModifier(),
arr->getIndexTypeQualifiers().getAsOpaqueValue());
} else
qual_type = qual_type.getUnqualifiedType();
qual_type.removeLocalConst();
qual_type.removeLocalRestrict();
qual_type.removeLocalVolatile();
return qual_type;
}
CompilerType
TypeSystemClang::GetFullyUnqualifiedType(lldb::opaque_compiler_type_t type) {
if (type)
return GetType(
GetFullyUnqualifiedType_Impl(&getASTContext(), GetQualType(type)));
return CompilerType();
}
CompilerType
TypeSystemClang::GetEnumerationIntegerType(lldb::opaque_compiler_type_t type) {
if (type)
return GetEnumerationIntegerType(GetType(GetCanonicalQualType(type)));
return CompilerType();
}
int TypeSystemClang::GetFunctionArgumentCount(
lldb::opaque_compiler_type_t type) {
if (type) {
const clang::FunctionProtoType *func =
llvm::dyn_cast<clang::FunctionProtoType>(GetCanonicalQualType(type));
if (func)
return func->getNumParams();
}
return -1;
}
CompilerType TypeSystemClang::GetFunctionArgumentTypeAtIndex(
lldb::opaque_compiler_type_t type, size_t idx) {
if (type) {
const clang::FunctionProtoType *func =
llvm::dyn_cast<clang::FunctionProtoType>(GetQualType(type));
if (func) {
const uint32_t num_args = func->getNumParams();
if (idx < num_args)
return GetType(func->getParamType(idx));
}
}
return CompilerType();
}
CompilerType
TypeSystemClang::GetFunctionReturnType(lldb::opaque_compiler_type_t type) {
if (type) {
clang::QualType qual_type(GetQualType(type));
const clang::FunctionProtoType *func =
llvm::dyn_cast<clang::FunctionProtoType>(qual_type.getTypePtr());
if (func)
return GetType(func->getReturnType());
}
return CompilerType();
}
size_t
TypeSystemClang::GetNumMemberFunctions(lldb::opaque_compiler_type_t type) {
size_t num_functions = 0;
if (type) {
clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));
switch (qual_type->getTypeClass()) {
case clang::Type::Record:
if (GetCompleteQualType(&getASTContext(), qual_type)) {
const clang::RecordType *record_type =
llvm::cast<clang::RecordType>(qual_type.getTypePtr());
const clang::RecordDecl *record_decl = record_type->getDecl();
assert(record_decl);
const clang::CXXRecordDecl *cxx_record_decl =
llvm::dyn_cast<clang::CXXRecordDecl>(record_decl);
if (cxx_record_decl)
num_functions = std::distance(cxx_record_decl->method_begin(),
cxx_record_decl->method_end());
}
break;
case clang::Type::ObjCObjectPointer: {
const clang::ObjCObjectPointerType *objc_class_type =
qual_type->castAs<clang::ObjCObjectPointerType>();
const clang::ObjCInterfaceType *objc_interface_type =
objc_class_type->getInterfaceType();
if (objc_interface_type &&
GetCompleteType(static_cast<lldb::opaque_compiler_type_t>(
const_cast<clang::ObjCInterfaceType *>(objc_interface_type)))) {
clang::ObjCInterfaceDecl *class_interface_decl =
objc_interface_type->getDecl();
if (class_interface_decl) {
num_functions = std::distance(class_interface_decl->meth_begin(),
class_interface_decl->meth_end());
}
}
break;
}
case clang::Type::ObjCObject:
case clang::Type::ObjCInterface:
if (GetCompleteType(type)) {
const clang::ObjCObjectType *objc_class_type =
llvm::dyn_cast<clang::ObjCObjectType>(qual_type.getTypePtr());
if (objc_class_type) {
clang::ObjCInterfaceDecl *class_interface_decl =
objc_class_type->getInterface();
if (class_interface_decl)
num_functions = std::distance(class_interface_decl->meth_begin(),
class_interface_decl->meth_end());
}
}
break;
default:
break;
}
}
return num_functions;
}
TypeMemberFunctionImpl
TypeSystemClang::GetMemberFunctionAtIndex(lldb::opaque_compiler_type_t type,
size_t idx) {
std::string name;
MemberFunctionKind kind(MemberFunctionKind::eMemberFunctionKindUnknown);
CompilerType clang_type;
CompilerDecl clang_decl;
if (type) {
clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));
switch (qual_type->getTypeClass()) {
case clang::Type::Record:
if (GetCompleteQualType(&getASTContext(), qual_type)) {
const clang::RecordType *record_type =
llvm::cast<clang::RecordType>(qual_type.getTypePtr());
const clang::RecordDecl *record_decl = record_type->getDecl();
assert(record_decl);
const clang::CXXRecordDecl *cxx_record_decl =
llvm::dyn_cast<clang::CXXRecordDecl>(record_decl);
if (cxx_record_decl) {
auto method_iter = cxx_record_decl->method_begin();
auto method_end = cxx_record_decl->method_end();
if (idx <
static_cast<size_t>(std::distance(method_iter, method_end))) {
std::advance(method_iter, idx);
clang::CXXMethodDecl *cxx_method_decl =
method_iter->getCanonicalDecl();
if (cxx_method_decl) {
name = cxx_method_decl->getDeclName().getAsString();
if (cxx_method_decl->isStatic())
kind = lldb::eMemberFunctionKindStaticMethod;
else if (llvm::isa<clang::CXXConstructorDecl>(cxx_method_decl))
kind = lldb::eMemberFunctionKindConstructor;
else if (llvm::isa<clang::CXXDestructorDecl>(cxx_method_decl))
kind = lldb::eMemberFunctionKindDestructor;
else
kind = lldb::eMemberFunctionKindInstanceMethod;
clang_type = GetType(cxx_method_decl->getType());
clang_decl = GetCompilerDecl(cxx_method_decl);
}
}
}
}
break;
case clang::Type::ObjCObjectPointer: {
const clang::ObjCObjectPointerType *objc_class_type =
qual_type->castAs<clang::ObjCObjectPointerType>();
const clang::ObjCInterfaceType *objc_interface_type =
objc_class_type->getInterfaceType();
if (objc_interface_type &&
GetCompleteType(static_cast<lldb::opaque_compiler_type_t>(
const_cast<clang::ObjCInterfaceType *>(objc_interface_type)))) {
clang::ObjCInterfaceDecl *class_interface_decl =
objc_interface_type->getDecl();
if (class_interface_decl) {
auto method_iter = class_interface_decl->meth_begin();
auto method_end = class_interface_decl->meth_end();
if (idx <
static_cast<size_t>(std::distance(method_iter, method_end))) {
std::advance(method_iter, idx);
clang::ObjCMethodDecl *objc_method_decl =
method_iter->getCanonicalDecl();
if (objc_method_decl) {
clang_decl = GetCompilerDecl(objc_method_decl);
name = objc_method_decl->getSelector().getAsString();
if (objc_method_decl->isClassMethod())
kind = lldb::eMemberFunctionKindStaticMethod;
else
kind = lldb::eMemberFunctionKindInstanceMethod;
}
}
}
}
break;
}
case clang::Type::ObjCObject:
case clang::Type::ObjCInterface:
if (GetCompleteType(type)) {
const clang::ObjCObjectType *objc_class_type =
llvm::dyn_cast<clang::ObjCObjectType>(qual_type.getTypePtr());
if (objc_class_type) {
clang::ObjCInterfaceDecl *class_interface_decl =
objc_class_type->getInterface();
if (class_interface_decl) {
auto method_iter = class_interface_decl->meth_begin();
auto method_end = class_interface_decl->meth_end();
if (idx <
static_cast<size_t>(std::distance(method_iter, method_end))) {
std::advance(method_iter, idx);
clang::ObjCMethodDecl *objc_method_decl =
method_iter->getCanonicalDecl();
if (objc_method_decl) {
clang_decl = GetCompilerDecl(objc_method_decl);
name = objc_method_decl->getSelector().getAsString();
if (objc_method_decl->isClassMethod())
kind = lldb::eMemberFunctionKindStaticMethod;
else
kind = lldb::eMemberFunctionKindInstanceMethod;
}
}
}
}
}
break;
default:
break;
}
}
if (kind == eMemberFunctionKindUnknown)
return TypeMemberFunctionImpl();
else
return TypeMemberFunctionImpl(clang_type, clang_decl, name, kind);
}
CompilerType
TypeSystemClang::GetNonReferenceType(lldb::opaque_compiler_type_t type) {
if (type)
return GetType(GetQualType(type).getNonReferenceType());
return CompilerType();
}
CompilerType
TypeSystemClang::GetPointeeType(lldb::opaque_compiler_type_t type) {
if (type) {
clang::QualType qual_type(GetQualType(type));
return GetType(qual_type.getTypePtr()->getPointeeType());
}
return CompilerType();
}
CompilerType
TypeSystemClang::GetPointerType(lldb::opaque_compiler_type_t type) {
if (type) {
clang::QualType qual_type(GetQualType(type));
switch (qual_type.getDesugaredType(getASTContext())->getTypeClass()) {
case clang::Type::ObjCObject:
case clang::Type::ObjCInterface:
return GetType(getASTContext().getObjCObjectPointerType(qual_type));
default:
return GetType(getASTContext().getPointerType(qual_type));
}
}
return CompilerType();
}
CompilerType
TypeSystemClang::GetLValueReferenceType(lldb::opaque_compiler_type_t type) {
if (type)
return GetType(getASTContext().getLValueReferenceType(GetQualType(type)));
else
return CompilerType();
}
CompilerType
TypeSystemClang::GetRValueReferenceType(lldb::opaque_compiler_type_t type) {
if (type)
return GetType(getASTContext().getRValueReferenceType(GetQualType(type)));
else
return CompilerType();
}
CompilerType TypeSystemClang::GetAtomicType(lldb::opaque_compiler_type_t type) {
if (!type)
return CompilerType();
return GetType(getASTContext().getAtomicType(GetQualType(type)));
}
CompilerType
TypeSystemClang::AddConstModifier(lldb::opaque_compiler_type_t type) {
if (type) {
clang::QualType result(GetQualType(type));
result.addConst();
return GetType(result);
}
return CompilerType();
}
CompilerType
TypeSystemClang::AddVolatileModifier(lldb::opaque_compiler_type_t type) {
if (type) {
clang::QualType result(GetQualType(type));
result.addVolatile();
return GetType(result);
}
return CompilerType();
}
CompilerType
TypeSystemClang::AddRestrictModifier(lldb::opaque_compiler_type_t type) {
if (type) {
clang::QualType result(GetQualType(type));
result.addRestrict();
return GetType(result);
}
return CompilerType();
}
CompilerType TypeSystemClang::CreateTypedef(
lldb::opaque_compiler_type_t type, const char *typedef_name,
const CompilerDeclContext &compiler_decl_ctx, uint32_t payload) {
if (type && typedef_name && typedef_name[0]) {
clang::ASTContext &clang_ast = getASTContext();
clang::QualType qual_type(GetQualType(type));
clang::DeclContext *decl_ctx =
TypeSystemClang::DeclContextGetAsDeclContext(compiler_decl_ctx);
if (!decl_ctx)
decl_ctx = getASTContext().getTranslationUnitDecl();
clang::TypedefDecl *decl =
clang::TypedefDecl::CreateDeserialized(clang_ast, 0);
decl->setDeclContext(decl_ctx);
decl->setDeclName(&clang_ast.Idents.get(typedef_name));
decl->setTypeSourceInfo(clang_ast.getTrivialTypeSourceInfo(qual_type));
decl_ctx->addDecl(decl);
SetOwningModule(decl, TypePayloadClang(payload).GetOwningModule());
clang::TagDecl *tdecl = nullptr;
if (!qual_type.isNull()) {
if (const clang::RecordType *rt = qual_type->getAs<clang::RecordType>())
tdecl = rt->getDecl();
if (const clang::EnumType *et = qual_type->getAs<clang::EnumType>())
tdecl = et->getDecl();
}
// Check whether this declaration is an anonymous struct, union, or enum,
// hidden behind a typedef. If so, we try to check whether we have a
// typedef tag to attach to the original record declaration
if (tdecl && !tdecl->getIdentifier() && !tdecl->getTypedefNameForAnonDecl())
tdecl->setTypedefNameForAnonDecl(decl);
decl->setAccess(clang::AS_public); // TODO respect proper access specifier
// Get a uniqued clang::QualType for the typedef decl type
return GetType(clang_ast.getTypedefType(decl));
}
return CompilerType();
}
CompilerType
TypeSystemClang::GetTypedefedType(lldb::opaque_compiler_type_t type) {
if (type) {
const clang::TypedefType *typedef_type = llvm::dyn_cast<clang::TypedefType>(
RemoveWrappingTypes(GetQualType(type), {clang::Type::Typedef}));
if (typedef_type)
return GetType(typedef_type->getDecl()->getUnderlyingType());
}
return CompilerType();
}
// Create related types using the current type's AST
CompilerType TypeSystemClang::GetBasicTypeFromAST(lldb::BasicType basic_type) {
return TypeSystemClang::GetBasicType(basic_type);
}
// Exploring the type
const llvm::fltSemantics &
TypeSystemClang::GetFloatTypeSemantics(size_t byte_size) {
clang::ASTContext &ast = getASTContext();
const size_t bit_size = byte_size * 8;
if (bit_size == ast.getTypeSize(ast.FloatTy))
return ast.getFloatTypeSemantics(ast.FloatTy);
else if (bit_size == ast.getTypeSize(ast.DoubleTy))
return ast.getFloatTypeSemantics(ast.DoubleTy);
else if (bit_size == ast.getTypeSize(ast.LongDoubleTy))
return ast.getFloatTypeSemantics(ast.LongDoubleTy);
else if (bit_size == ast.getTypeSize(ast.HalfTy))
return ast.getFloatTypeSemantics(ast.HalfTy);
return llvm::APFloatBase::Bogus();
}
Optional<uint64_t>
TypeSystemClang::GetBitSize(lldb::opaque_compiler_type_t type,
ExecutionContextScope *exe_scope) {
if (GetCompleteType(type)) {
clang::QualType qual_type(GetCanonicalQualType(type));
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
switch (type_class) {
case clang::Type::Record:
if (GetCompleteType(type))
return getASTContext().getTypeSize(qual_type);
else
return None;
break;
case clang::Type::ObjCInterface:
case clang::Type::ObjCObject: {
ExecutionContext exe_ctx(exe_scope);
Process *process = exe_ctx.GetProcessPtr();
if (process) {
ObjCLanguageRuntime *objc_runtime = ObjCLanguageRuntime::Get(*process);
if (objc_runtime) {
uint64_t bit_size = 0;
if (objc_runtime->GetTypeBitSize(GetType(qual_type), bit_size))
return bit_size;
}
} else {
static bool g_printed = false;
if (!g_printed) {
StreamString s;
DumpTypeDescription(type, &s);
llvm::outs() << "warning: trying to determine the size of type ";
llvm::outs() << s.GetString() << "\n";
llvm::outs() << "without a valid ExecutionContext. this is not "
"reliable. please file a bug against LLDB.\n";
llvm::outs() << "backtrace:\n";
llvm::sys::PrintStackTrace(llvm::outs());
llvm::outs() << "\n";
g_printed = true;
}
}
}
LLVM_FALLTHROUGH;
default:
const uint32_t bit_size = getASTContext().getTypeSize(qual_type);
if (bit_size == 0) {
if (qual_type->isIncompleteArrayType())
return getASTContext().getTypeSize(
qual_type->getArrayElementTypeNoTypeQual()
->getCanonicalTypeUnqualified());
}
if (qual_type->isObjCObjectOrInterfaceType())
return bit_size +
getASTContext().getTypeSize(getASTContext().ObjCBuiltinClassTy);
// Function types actually have a size of 0, that's not an error.
if (qual_type->isFunctionProtoType())
return bit_size;
if (bit_size)
return bit_size;
}
}
return None;
}
llvm::Optional<size_t>
TypeSystemClang::GetTypeBitAlign(lldb::opaque_compiler_type_t type,
ExecutionContextScope *exe_scope) {
if (GetCompleteType(type))
return getASTContext().getTypeAlign(GetQualType(type));
return {};
}
lldb::Encoding TypeSystemClang::GetEncoding(lldb::opaque_compiler_type_t type,
uint64_t &count) {
if (!type)
return lldb::eEncodingInvalid;
count = 1;
clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));
switch (qual_type->getTypeClass()) {
case clang::Type::Atomic:
case clang::Type::Auto:
case clang::Type::Decltype:
case clang::Type::Elaborated:
case clang::Type::Paren:
case clang::Type::Typedef:
case clang::Type::TypeOf:
case clang::Type::TypeOfExpr:
case clang::Type::Using:
llvm_unreachable("Handled in RemoveWrappingTypes!");
case clang::Type::UnaryTransform:
break;
case clang::Type::FunctionNoProto:
case clang::Type::FunctionProto:
break;
case clang::Type::IncompleteArray:
case clang::Type::VariableArray:
break;
case clang::Type::ConstantArray:
break;
case clang::Type::DependentVector:
case clang::Type::ExtVector:
case clang::Type::Vector:
// TODO: Set this to more than one???
break;
case clang::Type::BitInt:
case clang::Type::DependentBitInt:
return qual_type->isUnsignedIntegerType() ? lldb::eEncodingUint
: lldb::eEncodingSint;
case clang::Type::Builtin:
switch (llvm::cast<clang::BuiltinType>(qual_type)->getKind()) {
case clang::BuiltinType::Void:
break;
case clang::BuiltinType::Char_S:
case clang::BuiltinType::SChar:
case clang::BuiltinType::WChar_S:
case clang::BuiltinType::Short:
case clang::BuiltinType::Int:
case clang::BuiltinType::Long:
case clang::BuiltinType::LongLong:
case clang::BuiltinType::Int128:
return lldb::eEncodingSint;
case clang::BuiltinType::Bool:
case clang::BuiltinType::Char_U:
case clang::BuiltinType::UChar:
case clang::BuiltinType::WChar_U:
case clang::BuiltinType::Char8:
case clang::BuiltinType::Char16:
case clang::BuiltinType::Char32:
case clang::BuiltinType::UShort:
case clang::BuiltinType::UInt:
case clang::BuiltinType::ULong:
case clang::BuiltinType::ULongLong:
case clang::BuiltinType::UInt128:
return lldb::eEncodingUint;
// Fixed point types. Note that they are currently ignored.
case clang::BuiltinType::ShortAccum:
case clang::BuiltinType::Accum:
case clang::BuiltinType::LongAccum:
case clang::BuiltinType::UShortAccum:
case clang::BuiltinType::UAccum:
case clang::BuiltinType::ULongAccum:
case clang::BuiltinType::ShortFract:
case clang::BuiltinType::Fract:
case clang::BuiltinType::LongFract:
case clang::BuiltinType::UShortFract:
case clang::BuiltinType::UFract:
case clang::BuiltinType::ULongFract:
case clang::BuiltinType::SatShortAccum:
case clang::BuiltinType::SatAccum:
case clang::BuiltinType::SatLongAccum:
case clang::BuiltinType::SatUShortAccum:
case clang::BuiltinType::SatUAccum:
case clang::BuiltinType::SatULongAccum:
case clang::BuiltinType::SatShortFract:
case clang::BuiltinType::SatFract:
case clang::BuiltinType::SatLongFract:
case clang::BuiltinType::SatUShortFract:
case clang::BuiltinType::SatUFract:
case clang::BuiltinType::SatULongFract:
break;
case clang::BuiltinType::Half:
case clang::BuiltinType::Float:
case clang::BuiltinType::Float16:
case clang::BuiltinType::Float128:
case clang::BuiltinType::Double:
case clang::BuiltinType::LongDouble:
case clang::BuiltinType::BFloat16:
case clang::BuiltinType::Ibm128:
return lldb::eEncodingIEEE754;
case clang::BuiltinType::ObjCClass:
case clang::BuiltinType::ObjCId:
case clang::BuiltinType::ObjCSel:
return lldb::eEncodingUint;
case clang::BuiltinType::NullPtr:
return lldb::eEncodingUint;
case clang::BuiltinType::Kind::ARCUnbridgedCast:
case clang::BuiltinType::Kind::BoundMember:
case clang::BuiltinType::Kind::BuiltinFn:
case clang::BuiltinType::Kind::Dependent:
case clang::BuiltinType::Kind::OCLClkEvent:
case clang::BuiltinType::Kind::OCLEvent:
case clang::BuiltinType::Kind::OCLImage1dRO:
case clang::BuiltinType::Kind::OCLImage1dWO:
case clang::BuiltinType::Kind::OCLImage1dRW:
case clang::BuiltinType::Kind::OCLImage1dArrayRO:
case clang::BuiltinType::Kind::OCLImage1dArrayWO:
case clang::BuiltinType::Kind::OCLImage1dArrayRW:
case clang::BuiltinType::Kind::OCLImage1dBufferRO:
case clang::BuiltinType::Kind::OCLImage1dBufferWO:
case clang::BuiltinType::Kind::OCLImage1dBufferRW:
case clang::BuiltinType::Kind::OCLImage2dRO:
case clang::BuiltinType::Kind::OCLImage2dWO:
case clang::BuiltinType::Kind::OCLImage2dRW:
case clang::BuiltinType::Kind::OCLImage2dArrayRO:
case clang::BuiltinType::Kind::OCLImage2dArrayWO:
case clang::BuiltinType::Kind::OCLImage2dArrayRW:
case clang::BuiltinType::Kind::OCLImage2dArrayDepthRO:
case clang::BuiltinType::Kind::OCLImage2dArrayDepthWO:
case clang::BuiltinType::Kind::OCLImage2dArrayDepthRW:
case clang::BuiltinType::Kind::OCLImage2dArrayMSAARO:
case clang::BuiltinType::Kind::OCLImage2dArrayMSAAWO:
case clang::BuiltinType::Kind::OCLImage2dArrayMSAARW:
case clang::BuiltinType::Kind::OCLImage2dArrayMSAADepthRO:
case clang::BuiltinType::Kind::OCLImage2dArrayMSAADepthWO:
case clang::BuiltinType::Kind::OCLImage2dArrayMSAADepthRW:
case clang::BuiltinType::Kind::OCLImage2dDepthRO:
case clang::BuiltinType::Kind::OCLImage2dDepthWO:
case clang::BuiltinType::Kind::OCLImage2dDepthRW:
case clang::BuiltinType::Kind::OCLImage2dMSAARO:
case clang::BuiltinType::Kind::OCLImage2dMSAAWO:
case clang::BuiltinType::Kind::OCLImage2dMSAARW:
case clang::BuiltinType::Kind::OCLImage2dMSAADepthRO:
case clang::BuiltinType::Kind::OCLImage2dMSAADepthWO:
case clang::BuiltinType::Kind::OCLImage2dMSAADepthRW:
case clang::BuiltinType::Kind::OCLImage3dRO:
case clang::BuiltinType::Kind::OCLImage3dWO:
case clang::BuiltinType::Kind::OCLImage3dRW:
case clang::BuiltinType::Kind::OCLQueue:
case clang::BuiltinType::Kind::OCLReserveID:
case clang::BuiltinType::Kind::OCLSampler:
case clang::BuiltinType::Kind::OMPArraySection:
case clang::BuiltinType::Kind::OMPArrayShaping:
case clang::BuiltinType::Kind::OMPIterator:
case clang::BuiltinType::Kind::Overload:
case clang::BuiltinType::Kind::PseudoObject:
case clang::BuiltinType::Kind::UnknownAny:
break;
case clang::BuiltinType::OCLIntelSubgroupAVCMcePayload:
case clang::BuiltinType::OCLIntelSubgroupAVCImePayload:
case clang::BuiltinType::OCLIntelSubgroupAVCRefPayload:
case clang::BuiltinType::OCLIntelSubgroupAVCSicPayload:
case clang::BuiltinType::OCLIntelSubgroupAVCMceResult:
case clang::BuiltinType::OCLIntelSubgroupAVCImeResult:
case clang::BuiltinType::OCLIntelSubgroupAVCRefResult:
case clang::BuiltinType::OCLIntelSubgroupAVCSicResult:
case clang::BuiltinType::OCLIntelSubgroupAVCImeResultSingleRefStreamout:
case clang::BuiltinType::OCLIntelSubgroupAVCImeResultDualRefStreamout:
case clang::BuiltinType::OCLIntelSubgroupAVCImeSingleRefStreamin:
case clang::BuiltinType::OCLIntelSubgroupAVCImeDualRefStreamin:
break;
// PowerPC -- Matrix Multiply Assist
case clang::BuiltinType::VectorPair:
case clang::BuiltinType::VectorQuad:
break;
// ARM -- Scalable Vector Extension
case clang::BuiltinType::SveBool:
case clang::BuiltinType::SveInt8:
case clang::BuiltinType::SveInt8x2:
case clang::BuiltinType::SveInt8x3:
case clang::BuiltinType::SveInt8x4:
case clang::BuiltinType::SveInt16:
case clang::BuiltinType::SveInt16x2:
case clang::BuiltinType::SveInt16x3:
case clang::BuiltinType::SveInt16x4:
case clang::BuiltinType::SveInt32:
case clang::BuiltinType::SveInt32x2:
case clang::BuiltinType::SveInt32x3:
case clang::BuiltinType::SveInt32x4:
case clang::BuiltinType::SveInt64:
case clang::BuiltinType::SveInt64x2:
case clang::BuiltinType::SveInt64x3:
case clang::BuiltinType::SveInt64x4:
case clang::BuiltinType::SveUint8:
case clang::BuiltinType::SveUint8x2:
case clang::BuiltinType::SveUint8x3:
case clang::BuiltinType::SveUint8x4:
case clang::BuiltinType::SveUint16:
case clang::BuiltinType::SveUint16x2:
case clang::BuiltinType::SveUint16x3:
case clang::BuiltinType::SveUint16x4:
case clang::BuiltinType::SveUint32:
case clang::BuiltinType::SveUint32x2:
case clang::BuiltinType::SveUint32x3:
case clang::BuiltinType::SveUint32x4:
case clang::BuiltinType::SveUint64:
case clang::BuiltinType::SveUint64x2:
case clang::BuiltinType::SveUint64x3:
case clang::BuiltinType::SveUint64x4:
case clang::BuiltinType::SveFloat16:
case clang::BuiltinType::SveBFloat16:
case clang::BuiltinType::SveBFloat16x2:
case clang::BuiltinType::SveBFloat16x3:
case clang::BuiltinType::SveBFloat16x4:
case clang::BuiltinType::SveFloat16x2:
case clang::BuiltinType::SveFloat16x3:
case clang::BuiltinType::SveFloat16x4:
case clang::BuiltinType::SveFloat32:
case clang::BuiltinType::SveFloat32x2:
case clang::BuiltinType::SveFloat32x3:
case clang::BuiltinType::SveFloat32x4:
case clang::BuiltinType::SveFloat64:
case clang::BuiltinType::SveFloat64x2:
case clang::BuiltinType::SveFloat64x3:
case clang::BuiltinType::SveFloat64x4:
break;
// RISC-V V builtin types.
case clang::BuiltinType::RvvInt8mf8:
case clang::BuiltinType::RvvInt8mf4:
case clang::BuiltinType::RvvInt8mf2:
case clang::BuiltinType::RvvInt8m1:
case clang::BuiltinType::RvvInt8m2:
case clang::BuiltinType::RvvInt8m4:
case clang::BuiltinType::RvvInt8m8:
case clang::BuiltinType::RvvUint8mf8:
case clang::BuiltinType::RvvUint8mf4:
case clang::BuiltinType::RvvUint8mf2:
case clang::BuiltinType::RvvUint8m1:
case clang::BuiltinType::RvvUint8m2:
case clang::BuiltinType::RvvUint8m4:
case clang::BuiltinType::RvvUint8m8:
case clang::BuiltinType::RvvInt16mf4:
case clang::BuiltinType::RvvInt16mf2:
case clang::BuiltinType::RvvInt16m1:
case clang::BuiltinType::RvvInt16m2:
case clang::BuiltinType::RvvInt16m4:
case clang::BuiltinType::RvvInt16m8:
case clang::BuiltinType::RvvUint16mf4:
case clang::BuiltinType::RvvUint16mf2:
case clang::BuiltinType::RvvUint16m1:
case clang::BuiltinType::RvvUint16m2:
case clang::BuiltinType::RvvUint16m4:
case clang::BuiltinType::RvvUint16m8:
case clang::BuiltinType::RvvInt32mf2:
case clang::BuiltinType::RvvInt32m1:
case clang::BuiltinType::RvvInt32m2:
case clang::BuiltinType::RvvInt32m4:
case clang::BuiltinType::RvvInt32m8:
case clang::BuiltinType::RvvUint32mf2:
case clang::BuiltinType::RvvUint32m1:
case clang::BuiltinType::RvvUint32m2:
case clang::BuiltinType::RvvUint32m4:
case clang::BuiltinType::RvvUint32m8:
case clang::BuiltinType::RvvInt64m1:
case clang::BuiltinType::RvvInt64m2:
case clang::BuiltinType::RvvInt64m4:
case clang::BuiltinType::RvvInt64m8:
case clang::BuiltinType::RvvUint64m1:
case clang::BuiltinType::RvvUint64m2:
case clang::BuiltinType::RvvUint64m4:
case clang::BuiltinType::RvvUint64m8:
case clang::BuiltinType::RvvFloat16mf4:
case clang::BuiltinType::RvvFloat16mf2:
case clang::BuiltinType::RvvFloat16m1:
case clang::BuiltinType::RvvFloat16m2:
case clang::BuiltinType::RvvFloat16m4:
case clang::BuiltinType::RvvFloat16m8:
case clang::BuiltinType::RvvFloat32mf2:
case clang::BuiltinType::RvvFloat32m1:
case clang::BuiltinType::RvvFloat32m2:
case clang::BuiltinType::RvvFloat32m4:
case clang::BuiltinType::RvvFloat32m8:
case clang::BuiltinType::RvvFloat64m1:
case clang::BuiltinType::RvvFloat64m2:
case clang::BuiltinType::RvvFloat64m4:
case clang::BuiltinType::RvvFloat64m8:
case clang::BuiltinType::RvvBool1:
case clang::BuiltinType::RvvBool2:
case clang::BuiltinType::RvvBool4:
case clang::BuiltinType::RvvBool8:
case clang::BuiltinType::RvvBool16:
case clang::BuiltinType::RvvBool32:
case clang::BuiltinType::RvvBool64:
break;
case clang::BuiltinType::IncompleteMatrixIdx:
break;
}
break;
// All pointer types are represented as unsigned integer encodings. We may
// nee to add a eEncodingPointer if we ever need to know the difference
case clang::Type::ObjCObjectPointer:
case clang::Type::BlockPointer:
case clang::Type::Pointer:
case clang::Type::LValueReference:
case clang::Type::RValueReference:
case clang::Type::MemberPointer:
return lldb::eEncodingUint;
case clang::Type::Complex: {
lldb::Encoding encoding = lldb::eEncodingIEEE754;
if (qual_type->isComplexType())
encoding = lldb::eEncodingIEEE754;
else {
const clang::ComplexType *complex_type =
qual_type->getAsComplexIntegerType();
if (complex_type)
encoding = GetType(complex_type->getElementType()).GetEncoding(count);
else
encoding = lldb::eEncodingSint;
}
count = 2;
return encoding;
}
case clang::Type::ObjCInterface:
break;
case clang::Type::Record:
break;
case clang::Type::Enum:
return lldb::eEncodingSint;
case clang::Type::DependentSizedArray:
case clang::Type::DependentSizedExtVector:
case clang::Type::UnresolvedUsing:
case clang::Type::Attributed:
case clang::Type::BTFTagAttributed:
case clang::Type::TemplateTypeParm:
case clang::Type::SubstTemplateTypeParm:
case clang::Type::SubstTemplateTypeParmPack:
case clang::Type::InjectedClassName:
case clang::Type::DependentName:
case clang::Type::DependentTemplateSpecialization:
case clang::Type::PackExpansion:
case clang::Type::ObjCObject:
case clang::Type::TemplateSpecialization:
case clang::Type::DeducedTemplateSpecialization:
case clang::Type::Adjusted:
case clang::Type::Pipe:
break;
// pointer type decayed from an array or function type.
case clang::Type::Decayed:
break;
case clang::Type::ObjCTypeParam:
break;
case clang::Type::DependentAddressSpace:
break;
case clang::Type::MacroQualified:
break;
case clang::Type::ConstantMatrix:
case clang::Type::DependentSizedMatrix:
break;
}
count = 0;
return lldb::eEncodingInvalid;
}
lldb::Format TypeSystemClang::GetFormat(lldb::opaque_compiler_type_t type) {
if (!type)
return lldb::eFormatDefault;
clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));
switch (qual_type->getTypeClass()) {
case clang::Type::Atomic:
case clang::Type::Auto:
case clang::Type::Decltype:
case clang::Type::Elaborated:
case clang::Type::Paren:
case clang::Type::Typedef:
case clang::Type::TypeOf:
case clang::Type::TypeOfExpr:
case clang::Type::Using:
llvm_unreachable("Handled in RemoveWrappingTypes!");
case clang::Type::UnaryTransform:
break;
case clang::Type::FunctionNoProto:
case clang::Type::FunctionProto:
break;
case clang::Type::IncompleteArray:
case clang::Type::VariableArray:
break;
case clang::Type::ConstantArray:
return lldb::eFormatVoid; // no value
case clang::Type::DependentVector:
case clang::Type::ExtVector:
case clang::Type::Vector:
break;
case clang::Type::BitInt:
case clang::Type::DependentBitInt:
return qual_type->isUnsignedIntegerType() ? lldb::eFormatUnsigned
: lldb::eFormatDecimal;
case clang::Type::Builtin:
switch (llvm::cast<clang::BuiltinType>(qual_type)->getKind()) {
case clang::BuiltinType::UnknownAny:
case clang::BuiltinType::Void:
case clang::BuiltinType::BoundMember:
break;
case clang::BuiltinType::Bool:
return lldb::eFormatBoolean;
case clang::BuiltinType::Char_S:
case clang::BuiltinType::SChar:
case clang::BuiltinType::WChar_S:
case clang::BuiltinType::Char_U:
case clang::BuiltinType::UChar:
case clang::BuiltinType::WChar_U:
return lldb::eFormatChar;
case clang::BuiltinType::Char8:
return lldb::eFormatUnicode8;
case clang::BuiltinType::Char16:
return lldb::eFormatUnicode16;
case clang::BuiltinType::Char32:
return lldb::eFormatUnicode32;
case clang::BuiltinType::UShort:
return lldb::eFormatUnsigned;
case clang::BuiltinType::Short:
return lldb::eFormatDecimal;
case clang::BuiltinType::UInt:
return lldb::eFormatUnsigned;
case clang::BuiltinType::Int:
return lldb::eFormatDecimal;
case clang::BuiltinType::ULong:
return lldb::eFormatUnsigned;
case clang::BuiltinType::Long:
return lldb::eFormatDecimal;
case clang::BuiltinType::ULongLong:
return lldb::eFormatUnsigned;
case clang::BuiltinType::LongLong:
return lldb::eFormatDecimal;
case clang::BuiltinType::UInt128:
return lldb::eFormatUnsigned;
case clang::BuiltinType::Int128:
return lldb::eFormatDecimal;
case clang::BuiltinType::Half:
case clang::BuiltinType::Float:
case clang::BuiltinType::Double:
case clang::BuiltinType::LongDouble:
return lldb::eFormatFloat;
default:
return lldb::eFormatHex;
}
break;
case clang::Type::ObjCObjectPointer:
return lldb::eFormatHex;
case clang::Type::BlockPointer:
return lldb::eFormatHex;
case clang::Type::Pointer:
return lldb::eFormatHex;
case clang::Type::LValueReference:
case clang::Type::RValueReference:
return lldb::eFormatHex;
case clang::Type::MemberPointer:
break;
case clang::Type::Complex: {
if (qual_type->isComplexType())
return lldb::eFormatComplex;
else
return lldb::eFormatComplexInteger;
}
case clang::Type::ObjCInterface:
break;
case clang::Type::Record:
break;
case clang::Type::Enum:
return lldb::eFormatEnum;
case clang::Type::DependentSizedArray:
case clang::Type::DependentSizedExtVector:
case clang::Type::UnresolvedUsing:
case clang::Type::Attributed:
case clang::Type::BTFTagAttributed:
case clang::Type::TemplateTypeParm:
case clang::Type::SubstTemplateTypeParm:
case clang::Type::SubstTemplateTypeParmPack:
case clang::Type::InjectedClassName:
case clang::Type::DependentName:
case clang::Type::DependentTemplateSpecialization:
case clang::Type::PackExpansion:
case clang::Type::ObjCObject:
case clang::Type::TemplateSpecialization:
case clang::Type::DeducedTemplateSpecialization:
case clang::Type::Adjusted:
case clang::Type::Pipe:
break;
// pointer type decayed from an array or function type.
case clang::Type::Decayed:
break;
case clang::Type::ObjCTypeParam:
break;
case clang::Type::DependentAddressSpace:
break;
case clang::Type::MacroQualified:
break;
// Matrix types we're not sure how to display yet.
case clang::Type::ConstantMatrix:
case clang::Type::DependentSizedMatrix:
break;
}
// We don't know hot to display this type...
return lldb::eFormatBytes;
}
static bool ObjCDeclHasIVars(clang::ObjCInterfaceDecl *class_interface_decl,
bool check_superclass) {
while (class_interface_decl) {
if (class_interface_decl->ivar_size() > 0)
return true;
if (check_superclass)
class_interface_decl = class_interface_decl->getSuperClass();
else
break;
}
return false;
}
static Optional<SymbolFile::ArrayInfo>
GetDynamicArrayInfo(TypeSystemClang &ast, SymbolFile *sym_file,
clang::QualType qual_type,
const ExecutionContext *exe_ctx) {
if (qual_type->isIncompleteArrayType())
if (auto *metadata = ast.GetMetadata(qual_type.getTypePtr()))
return sym_file->GetDynamicArrayInfoForUID(metadata->GetUserID(),
exe_ctx);
return llvm::None;
}
uint32_t TypeSystemClang::GetNumChildren(lldb::opaque_compiler_type_t type,
bool omit_empty_base_classes,
const ExecutionContext *exe_ctx) {
if (!type)
return 0;
uint32_t num_children = 0;
clang::QualType qual_type(RemoveWrappingTypes(GetQualType(type)));
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
switch (type_class) {
case clang::Type::Builtin:
switch (llvm::cast<clang::BuiltinType>(qual_type)->getKind()) {
case clang::BuiltinType::ObjCId: // child is Class
case clang::BuiltinType::ObjCClass: // child is Class
num_children = 1;
break;
default:
break;
}
break;
case clang::Type::Complex:
return 0;
case clang::Type::Record:
if (GetCompleteQualType(&getASTContext(), qual_type)) {
const clang::RecordType *record_type =
llvm::cast<clang::RecordType>(qual_type.getTypePtr());
const clang::RecordDecl *record_decl = record_type->getDecl();
assert(record_decl);
const clang::CXXRecordDecl *cxx_record_decl =
llvm::dyn_cast<clang::CXXRecordDecl>(record_decl);
if (cxx_record_decl) {
if (omit_empty_base_classes) {
// Check each base classes to see if it or any of its base classes
// contain any fields. This can help limit the noise in variable
// views by not having to show base classes that contain no members.
clang::CXXRecordDecl::base_class_const_iterator base_class,
base_class_end;
for (base_class = cxx_record_decl->bases_begin(),
base_class_end = cxx_record_decl->bases_end();
base_class != base_class_end; ++base_class) {
const clang::CXXRecordDecl *base_class_decl =
llvm::cast<clang::CXXRecordDecl>(
base_class->getType()
->getAs<clang::RecordType>()
->getDecl());
// Skip empty base classes
if (!TypeSystemClang::RecordHasFields(base_class_decl))
continue;
num_children++;
}
} else {
// Include all base classes
num_children += cxx_record_decl->getNumBases();
}
}
clang::RecordDecl::field_iterator field, field_end;
for (field = record_decl->field_begin(),
field_end = record_decl->field_end();
field != field_end; ++field)
++num_children;
}
break;
case clang::Type::ObjCObject:
case clang::Type::ObjCInterface:
if (GetCompleteQualType(&getASTContext(), qual_type)) {
const clang::ObjCObjectType *objc_class_type =
llvm::dyn_cast<clang::ObjCObjectType>(qual_type.getTypePtr());
assert(objc_class_type);
if (objc_class_type) {
clang::ObjCInterfaceDecl *class_interface_decl =
objc_class_type->getInterface();
if (class_interface_decl) {
clang::ObjCInterfaceDecl *superclass_interface_decl =
class_interface_decl->getSuperClass();
if (superclass_interface_decl) {
if (omit_empty_base_classes) {
if (ObjCDeclHasIVars(superclass_interface_decl, true))
++num_children;
} else
++num_children;
}
num_children += class_interface_decl->ivar_size();
}
}
}
break;
case clang::Type::LValueReference:
case clang::Type::RValueReference:
case clang::Type::ObjCObjectPointer: {
CompilerType pointee_clang_type(GetPointeeType(type));
uint32_t num_pointee_children = 0;
if (pointee_clang_type.IsAggregateType())
num_pointee_children =
pointee_clang_type.GetNumChildren(omit_empty_base_classes, exe_ctx);
// If this type points to a simple type, then it has 1 child
if (num_pointee_children == 0)
num_children = 1;
else
num_children = num_pointee_children;
} break;
case clang::Type::Vector:
case clang::Type::ExtVector:
num_children =
llvm::cast<clang::VectorType>(qual_type.getTypePtr())->getNumElements();
break;
case clang::Type::ConstantArray:
num_children = llvm::cast<clang::ConstantArrayType>(qual_type.getTypePtr())
->getSize()
.getLimitedValue();
break;
case clang::Type::IncompleteArray:
if (auto array_info =
GetDynamicArrayInfo(*this, GetSymbolFile(), qual_type, exe_ctx))
// Only 1-dimensional arrays are supported.
num_children = array_info->element_orders.size()
? array_info->element_orders.back()
: 0;
break;
case clang::Type::Pointer: {
const clang::PointerType *pointer_type =
llvm::cast<clang::PointerType>(qual_type.getTypePtr());
clang::QualType pointee_type(pointer_type->getPointeeType());
CompilerType pointee_clang_type(GetType(pointee_type));
uint32_t num_pointee_children = 0;
if (pointee_clang_type.IsAggregateType())
num_pointee_children =
pointee_clang_type.GetNumChildren(omit_empty_base_classes, exe_ctx);
if (num_pointee_children == 0) {
// We have a pointer to a pointee type that claims it has no children. We
// will want to look at
num_children = GetNumPointeeChildren(pointee_type);
} else
num_children = num_pointee_children;
} break;
default:
break;
}
return num_children;
}
CompilerType TypeSystemClang::GetBuiltinTypeByName(ConstString name) {
return GetBasicType(GetBasicTypeEnumeration(name));
}
lldb::BasicType
TypeSystemClang::GetBasicTypeEnumeration(lldb::opaque_compiler_type_t type) {
if (type) {
clang::QualType qual_type(GetQualType(type));
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
if (type_class == clang::Type::Builtin) {
switch (llvm::cast<clang::BuiltinType>(qual_type)->getKind()) {
case clang::BuiltinType::Void:
return eBasicTypeVoid;
case clang::BuiltinType::Bool:
return eBasicTypeBool;
case clang::BuiltinType::Char_S:
return eBasicTypeSignedChar;
case clang::BuiltinType::Char_U:
return eBasicTypeUnsignedChar;
case clang::BuiltinType::Char8:
return eBasicTypeChar8;
case clang::BuiltinType::Char16:
return eBasicTypeChar16;
case clang::BuiltinType::Char32:
return eBasicTypeChar32;
case clang::BuiltinType::UChar:
return eBasicTypeUnsignedChar;
case clang::BuiltinType::SChar:
return eBasicTypeSignedChar;
case clang::BuiltinType::WChar_S:
return eBasicTypeSignedWChar;
case clang::BuiltinType::WChar_U:
return eBasicTypeUnsignedWChar;
case clang::BuiltinType::Short:
return eBasicTypeShort;
case clang::BuiltinType::UShort:
return eBasicTypeUnsignedShort;
case clang::BuiltinType::Int:
return eBasicTypeInt;
case clang::BuiltinType::UInt:
return eBasicTypeUnsignedInt;
case clang::BuiltinType::Long:
return eBasicTypeLong;
case clang::BuiltinType::ULong:
return eBasicTypeUnsignedLong;
case clang::BuiltinType::LongLong:
return eBasicTypeLongLong;
case clang::BuiltinType::ULongLong:
return eBasicTypeUnsignedLongLong;
case clang::BuiltinType::Int128:
return eBasicTypeInt128;
case clang::BuiltinType::UInt128:
return eBasicTypeUnsignedInt128;
case clang::BuiltinType::Half:
return eBasicTypeHalf;
case clang::BuiltinType::Float:
return eBasicTypeFloat;
case clang::BuiltinType::Double:
return eBasicTypeDouble;
case clang::BuiltinType::LongDouble:
return eBasicTypeLongDouble;
case clang::BuiltinType::NullPtr:
return eBasicTypeNullPtr;
case clang::BuiltinType::ObjCId:
return eBasicTypeObjCID;
case clang::BuiltinType::ObjCClass:
return eBasicTypeObjCClass;
case clang::BuiltinType::ObjCSel:
return eBasicTypeObjCSel;
default:
return eBasicTypeOther;
}
}
}
return eBasicTypeInvalid;
}
void TypeSystemClang::ForEachEnumerator(
lldb::opaque_compiler_type_t type,
std::function<bool(const CompilerType &integer_type,
ConstString name,
const llvm::APSInt &value)> const &callback) {
const clang::EnumType *enum_type =
llvm::dyn_cast<clang::EnumType>(GetCanonicalQualType(type));
if (enum_type) {
const clang::EnumDecl *enum_decl = enum_type->getDecl();
if (enum_decl) {
CompilerType integer_type = GetType(enum_decl->getIntegerType());
clang::EnumDecl::enumerator_iterator enum_pos, enum_end_pos;
for (enum_pos = enum_decl->enumerator_begin(),
enum_end_pos = enum_decl->enumerator_end();
enum_pos != enum_end_pos; ++enum_pos) {
ConstString name(enum_pos->getNameAsString().c_str());
if (!callback(integer_type, name, enum_pos->getInitVal()))
break;
}
}
}
}
#pragma mark Aggregate Types
uint32_t TypeSystemClang::GetNumFields(lldb::opaque_compiler_type_t type) {
if (!type)
return 0;
uint32_t count = 0;
clang::QualType qual_type(RemoveWrappingTypes(GetCanonicalQualType(type)));
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
switch (type_class) {
case clang::Type::Record:
if (GetCompleteType(type)) {
const clang::RecordType *record_type =
llvm::dyn_cast<clang::RecordType>(qual_type.getTypePtr());
if (record_type) {
clang::RecordDecl *record_decl = record_type->getDecl();
if (record_decl) {
uint32_t field_idx = 0;
clang::RecordDecl::field_iterator field, field_end;
for (field = record_decl->field_begin(),
field_end = record_decl->field_end();
field != field_end; ++field)
++field_idx;
count = field_idx;
}
}
}
break;
case clang::Type::ObjCObjectPointer: {
const clang::ObjCObjectPointerType *objc_class_type =
qual_type->castAs<clang::ObjCObjectPointerType>();
const clang::ObjCInterfaceType *objc_interface_type =
objc_class_type->getInterfaceType();
if (objc_interface_type &&
GetCompleteType(static_cast<lldb::opaque_compiler_type_t>(
const_cast<clang::ObjCInterfaceType *>(objc_interface_type)))) {
clang::ObjCInterfaceDecl *class_interface_decl =
objc_interface_type->getDecl();
if (class_interface_decl) {
count = class_interface_decl->ivar_size();
}
}
break;
}
case clang::Type::ObjCObject:
case clang::Type::ObjCInterface:
if (GetCompleteType(type)) {
const clang::ObjCObjectType *objc_class_type =
llvm::dyn_cast<clang::ObjCObjectType>(qual_type.getTypePtr());
if (objc_class_type) {
clang::ObjCInterfaceDecl *class_interface_decl =
objc_class_type->getInterface();
if (class_interface_decl)
count = class_interface_decl->ivar_size();
}
}
break;
default:
break;
}
return count;
}
static lldb::opaque_compiler_type_t
GetObjCFieldAtIndex(clang::ASTContext *ast,
clang::ObjCInterfaceDecl *class_interface_decl, size_t idx,
std::string &name, uint64_t *bit_offset_ptr,
uint32_t *bitfield_bit_size_ptr, bool *is_bitfield_ptr) {
if (class_interface_decl) {
if (idx < (class_interface_decl->ivar_size())) {
clang::ObjCInterfaceDecl::ivar_iterator ivar_pos,
ivar_end = class_interface_decl->ivar_end();
uint32_t ivar_idx = 0;
for (ivar_pos = class_interface_decl->ivar_begin(); ivar_pos != ivar_end;
++ivar_pos, ++ivar_idx) {
if (ivar_idx == idx) {
const clang::ObjCIvarDecl *ivar_decl = *ivar_pos;
clang::QualType ivar_qual_type(ivar_decl->getType());
name.assign(ivar_decl->getNameAsString());
if (bit_offset_ptr) {
const clang::ASTRecordLayout &interface_layout =
ast->getASTObjCInterfaceLayout(class_interface_decl);
*bit_offset_ptr = interface_layout.getFieldOffset(ivar_idx);
}
const bool is_bitfield = ivar_pos->isBitField();
if (bitfield_bit_size_ptr) {
*bitfield_bit_size_ptr = 0;
if (is_bitfield && ast) {
clang::Expr *bitfield_bit_size_expr = ivar_pos->getBitWidth();
clang::Expr::EvalResult result;
if (bitfield_bit_size_expr &&
bitfield_bit_size_expr->EvaluateAsInt(result, *ast)) {
llvm::APSInt bitfield_apsint = result.Val.getInt();
*bitfield_bit_size_ptr = bitfield_apsint.getLimitedValue();
}
}
}
if (is_bitfield_ptr)
*is_bitfield_ptr = is_bitfield;
return ivar_qual_type.getAsOpaquePtr();
}
}
}
}
return nullptr;
}
CompilerType TypeSystemClang::GetFieldAtIndex(lldb::opaque_compiler_type_t type,
size_t idx, std::string &name,
uint64_t *bit_offset_ptr,
uint32_t *bitfield_bit_size_ptr,
bool *is_bitfield_ptr) {
if (!type)
return CompilerType();
clang::QualType qual_type(RemoveWrappingTypes(GetCanonicalQualType(type)));
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
switch (type_class) {
case clang::Type::Record:
if (GetCompleteType(type)) {
const clang::RecordType *record_type =
llvm::cast<clang::RecordType>(qual_type.getTypePtr());
const clang::RecordDecl *record_decl = record_type->getDecl();
uint32_t field_idx = 0;
clang::RecordDecl::field_iterator field, field_end;
for (field = record_decl->field_begin(),
field_end = record_decl->field_end();
field != field_end; ++field, ++field_idx) {
if (idx == field_idx) {
// Print the member type if requested
// Print the member name and equal sign
name.assign(field->getNameAsString());
// Figure out the type byte size (field_type_info.first) and
// alignment (field_type_info.second) from the AST context.
if (bit_offset_ptr) {
const clang::ASTRecordLayout &record_layout =
getASTContext().getASTRecordLayout(record_decl);
*bit_offset_ptr = record_layout.getFieldOffset(field_idx);
}
const bool is_bitfield = field->isBitField();
if (bitfield_bit_size_ptr) {
*bitfield_bit_size_ptr = 0;
if (is_bitfield) {
clang::Expr *bitfield_bit_size_expr = field->getBitWidth();
clang::Expr::EvalResult result;
if (bitfield_bit_size_expr &&
bitfield_bit_size_expr->EvaluateAsInt(result,
getASTContext())) {
llvm::APSInt bitfield_apsint = result.Val.getInt();
*bitfield_bit_size_ptr = bitfield_apsint.getLimitedValue();
}
}
}
if (is_bitfield_ptr)
*is_bitfield_ptr = is_bitfield;
return GetType(field->getType());
}
}
}
break;
case clang::Type::ObjCObjectPointer: {
const clang::ObjCObjectPointerType *objc_class_type =
qual_type->castAs<clang::ObjCObjectPointerType>();
const clang::ObjCInterfaceType *objc_interface_type =
objc_class_type->getInterfaceType();
if (objc_interface_type &&
GetCompleteType(static_cast<lldb::opaque_compiler_type_t>(
const_cast<clang::ObjCInterfaceType *>(objc_interface_type)))) {
clang::ObjCInterfaceDecl *class_interface_decl =
objc_interface_type->getDecl();
if (class_interface_decl) {
return CompilerType(
this, GetObjCFieldAtIndex(&getASTContext(), class_interface_decl,
idx, name, bit_offset_ptr,
bitfield_bit_size_ptr, is_bitfield_ptr));
}
}
break;
}
case clang::Type::ObjCObject:
case clang::Type::ObjCInterface:
if (GetCompleteType(type)) {
const clang::ObjCObjectType *objc_class_type =
llvm::dyn_cast<clang::ObjCObjectType>(qual_type.getTypePtr());
assert(objc_class_type);
if (objc_class_type) {
clang::ObjCInterfaceDecl *class_interface_decl =
objc_class_type->getInterface();
return CompilerType(
this, GetObjCFieldAtIndex(&getASTContext(), class_interface_decl,
idx, name, bit_offset_ptr,
bitfield_bit_size_ptr, is_bitfield_ptr));
}
}
break;
default:
break;
}
return CompilerType();
}
uint32_t
TypeSystemClang::GetNumDirectBaseClasses(lldb::opaque_compiler_type_t type) {
uint32_t count = 0;
clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
switch (type_class) {
case clang::Type::Record:
if (GetCompleteType(type)) {
const clang::CXXRecordDecl *cxx_record_decl =
qual_type->getAsCXXRecordDecl();
if (cxx_record_decl)
count = cxx_record_decl->getNumBases();
}
break;
case clang::Type::ObjCObjectPointer:
count = GetPointeeType(type).GetNumDirectBaseClasses();
break;
case clang::Type::ObjCObject:
if (GetCompleteType(type)) {
const clang::ObjCObjectType *objc_class_type =
qual_type->getAsObjCQualifiedInterfaceType();
if (objc_class_type) {
clang::ObjCInterfaceDecl *class_interface_decl =
objc_class_type->getInterface();
if (class_interface_decl && class_interface_decl->getSuperClass())
count = 1;
}
}
break;
case clang::Type::ObjCInterface:
if (GetCompleteType(type)) {
const clang::ObjCInterfaceType *objc_interface_type =
qual_type->getAs<clang::ObjCInterfaceType>();
if (objc_interface_type) {
clang::ObjCInterfaceDecl *class_interface_decl =
objc_interface_type->getInterface();
if (class_interface_decl && class_interface_decl->getSuperClass())
count = 1;
}
}
break;
default:
break;
}
return count;
}
uint32_t
TypeSystemClang::GetNumVirtualBaseClasses(lldb::opaque_compiler_type_t type) {
uint32_t count = 0;
clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
switch (type_class) {
case clang::Type::Record:
if (GetCompleteType(type)) {
const clang::CXXRecordDecl *cxx_record_decl =
qual_type->getAsCXXRecordDecl();
if (cxx_record_decl)
count = cxx_record_decl->getNumVBases();
}
break;
default:
break;
}
return count;
}
CompilerType TypeSystemClang::GetDirectBaseClassAtIndex(
lldb::opaque_compiler_type_t type, size_t idx, uint32_t *bit_offset_ptr) {
clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
switch (type_class) {
case clang::Type::Record:
if (GetCompleteType(type)) {
const clang::CXXRecordDecl *cxx_record_decl =
qual_type->getAsCXXRecordDecl();
if (cxx_record_decl) {
uint32_t curr_idx = 0;
clang::CXXRecordDecl::base_class_const_iterator base_class,
base_class_end;
for (base_class = cxx_record_decl->bases_begin(),
base_class_end = cxx_record_decl->bases_end();
base_class != base_class_end; ++base_class, ++curr_idx) {
if (curr_idx == idx) {
if (bit_offset_ptr) {
const clang::ASTRecordLayout &record_layout =
getASTContext().getASTRecordLayout(cxx_record_decl);
const clang::CXXRecordDecl *base_class_decl =
llvm::cast<clang::CXXRecordDecl>(
base_class->getType()
->castAs<clang::RecordType>()
->getDecl());
if (base_class->isVirtual())
*bit_offset_ptr =
record_layout.getVBaseClassOffset(base_class_decl)
.getQuantity() *
8;
else
*bit_offset_ptr =
record_layout.getBaseClassOffset(base_class_decl)
.getQuantity() *
8;
}
return GetType(base_class->getType());
}
}
}
}
break;
case clang::Type::ObjCObjectPointer:
return GetPointeeType(type).GetDirectBaseClassAtIndex(idx, bit_offset_ptr);
case clang::Type::ObjCObject:
if (idx == 0 && GetCompleteType(type)) {
const clang::ObjCObjectType *objc_class_type =
qual_type->getAsObjCQualifiedInterfaceType();
if (objc_class_type) {
clang::ObjCInterfaceDecl *class_interface_decl =
objc_class_type->getInterface();
if (class_interface_decl) {
clang::ObjCInterfaceDecl *superclass_interface_decl =
class_interface_decl->getSuperClass();
if (superclass_interface_decl) {
if (bit_offset_ptr)
*bit_offset_ptr = 0;
return GetType(getASTContext().getObjCInterfaceType(
superclass_interface_decl));
}
}
}
}
break;
case clang::Type::ObjCInterface:
if (idx == 0 && GetCompleteType(type)) {
const clang::ObjCObjectType *objc_interface_type =
qual_type->getAs<clang::ObjCInterfaceType>();
if (objc_interface_type) {
clang::ObjCInterfaceDecl *class_interface_decl =
objc_interface_type->getInterface();
if (class_interface_decl) {
clang::ObjCInterfaceDecl *superclass_interface_decl =
class_interface_decl->getSuperClass();
if (superclass_interface_decl) {
if (bit_offset_ptr)
*bit_offset_ptr = 0;
return GetType(getASTContext().getObjCInterfaceType(
superclass_interface_decl));
}
}
}
}
break;
default:
break;
}
return CompilerType();
}
CompilerType TypeSystemClang::GetVirtualBaseClassAtIndex(
lldb::opaque_compiler_type_t type, size_t idx, uint32_t *bit_offset_ptr) {
clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
switch (type_class) {
case clang::Type::Record:
if (GetCompleteType(type)) {
const clang::CXXRecordDecl *cxx_record_decl =
qual_type->getAsCXXRecordDecl();
if (cxx_record_decl) {
uint32_t curr_idx = 0;
clang::CXXRecordDecl::base_class_const_iterator base_class,
base_class_end;
for (base_class = cxx_record_decl->vbases_begin(),
base_class_end = cxx_record_decl->vbases_end();
base_class != base_class_end; ++base_class, ++curr_idx) {
if (curr_idx == idx) {
if (bit_offset_ptr) {
const clang::ASTRecordLayout &record_layout =
getASTContext().getASTRecordLayout(cxx_record_decl);
const clang::CXXRecordDecl *base_class_decl =
llvm::cast<clang::CXXRecordDecl>(
base_class->getType()
->castAs<clang::RecordType>()
->getDecl());
*bit_offset_ptr =
record_layout.getVBaseClassOffset(base_class_decl)
.getQuantity() *
8;
}
return GetType(base_class->getType());
}
}
}
}
break;
default:
break;
}
return CompilerType();
}
// If a pointer to a pointee type (the clang_type arg) says that it has no
// children, then we either need to trust it, or override it and return a
// different result. For example, an "int *" has one child that is an integer,
// but a function pointer doesn't have any children. Likewise if a Record type
// claims it has no children, then there really is nothing to show.
uint32_t TypeSystemClang::GetNumPointeeChildren(clang::QualType type) {
if (type.isNull())
return 0;
clang::QualType qual_type = RemoveWrappingTypes(type.getCanonicalType());
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
switch (type_class) {
case clang::Type::Builtin:
switch (llvm::cast<clang::BuiltinType>(qual_type)->getKind()) {
case clang::BuiltinType::UnknownAny:
case clang::BuiltinType::Void:
case clang::BuiltinType::NullPtr:
case clang::BuiltinType::OCLEvent:
case clang::BuiltinType::OCLImage1dRO:
case clang::BuiltinType::OCLImage1dWO:
case clang::BuiltinType::OCLImage1dRW:
case clang::BuiltinType::OCLImage1dArrayRO:
case clang::BuiltinType::OCLImage1dArrayWO:
case clang::BuiltinType::OCLImage1dArrayRW:
case clang::BuiltinType::OCLImage1dBufferRO:
case clang::BuiltinType::OCLImage1dBufferWO:
case clang::BuiltinType::OCLImage1dBufferRW:
case clang::BuiltinType::OCLImage2dRO:
case clang::BuiltinType::OCLImage2dWO:
case clang::BuiltinType::OCLImage2dRW:
case clang::BuiltinType::OCLImage2dArrayRO:
case clang::BuiltinType::OCLImage2dArrayWO:
case clang::BuiltinType::OCLImage2dArrayRW:
case clang::BuiltinType::OCLImage3dRO:
case clang::BuiltinType::OCLImage3dWO:
case clang::BuiltinType::OCLImage3dRW:
case clang::BuiltinType::OCLSampler:
return 0;
case clang::BuiltinType::Bool:
case clang::BuiltinType::Char_U:
case clang::BuiltinType::UChar:
case clang::BuiltinType::WChar_U:
case clang::BuiltinType::Char16:
case clang::BuiltinType::Char32:
case clang::BuiltinType::UShort:
case clang::BuiltinType::UInt:
case clang::BuiltinType::ULong:
case clang::BuiltinType::ULongLong:
case clang::BuiltinType::UInt128:
case clang::BuiltinType::Char_S:
case clang::BuiltinType::SChar:
case clang::BuiltinType::WChar_S:
case clang::BuiltinType::Short:
case clang::BuiltinType::Int:
case clang::BuiltinType::Long:
case clang::BuiltinType::LongLong:
case clang::BuiltinType::Int128:
case clang::BuiltinType::Float:
case clang::BuiltinType::Double:
case clang::BuiltinType::LongDouble:
case clang::BuiltinType::Dependent:
case clang::BuiltinType::Overload:
case clang::BuiltinType::ObjCId:
case clang::BuiltinType::ObjCClass:
case clang::BuiltinType::ObjCSel:
case clang::BuiltinType::BoundMember:
case clang::BuiltinType::Half:
case clang::BuiltinType::ARCUnbridgedCast:
case clang::BuiltinType::PseudoObject:
case clang::BuiltinType::BuiltinFn:
case clang::BuiltinType::OMPArraySection:
return 1;
default:
return 0;
}
break;
case clang::Type::Complex:
return 1;
case clang::Type::Pointer:
return 1;
case clang::Type::BlockPointer:
return 0; // If block pointers don't have debug info, then no children for
// them
case clang::Type::LValueReference:
return 1;
case clang::Type::RValueReference:
return 1;
case clang::Type::MemberPointer:
return 0;
case clang::Type::ConstantArray:
return 0;
case clang::Type::IncompleteArray:
return 0;
case clang::Type::VariableArray:
return 0;
case clang::Type::DependentSizedArray:
return 0;
case clang::Type::DependentSizedExtVector:
return 0;
case clang::Type::Vector:
return 0;
case clang::Type::ExtVector:
return 0;
case clang::Type::FunctionProto:
return 0; // When we function pointers, they have no children...
case clang::Type::FunctionNoProto:
return 0; // When we function pointers, they have no children...
case clang::Type::UnresolvedUsing:
return 0;
case clang::Type::Record:
return 0;
case clang::Type::Enum:
return 1;
case clang::Type::TemplateTypeParm:
return 1;
case clang::Type::SubstTemplateTypeParm:
return 1;
case clang::Type::TemplateSpecialization:
return 1;
case clang::Type::InjectedClassName:
return 0;
case clang::Type::DependentName:
return 1;
case clang::Type::DependentTemplateSpecialization:
return 1;
case clang::Type::ObjCObject:
return 0;
case clang::Type::ObjCInterface:
return 0;
case clang::Type::ObjCObjectPointer:
return 1;
default:
break;
}
return 0;
}
CompilerType TypeSystemClang::GetChildCompilerTypeAtIndex(
lldb::opaque_compiler_type_t type, ExecutionContext *exe_ctx, size_t idx,
bool transparent_pointers, bool omit_empty_base_classes,
bool ignore_array_bounds, std::string &child_name,
uint32_t &child_byte_size, int32_t &child_byte_offset,
uint32_t &child_bitfield_bit_size, uint32_t &child_bitfield_bit_offset,
bool &child_is_base_class, bool &child_is_deref_of_parent,
ValueObject *valobj, uint64_t &language_flags) {
if (!type)
return CompilerType();
auto get_exe_scope = [&exe_ctx]() {
return exe_ctx ? exe_ctx->GetBestExecutionContextScope() : nullptr;
};
clang::QualType parent_qual_type(
RemoveWrappingTypes(GetCanonicalQualType(type)));
const clang::Type::TypeClass parent_type_class =
parent_qual_type->getTypeClass();
child_bitfield_bit_size = 0;
child_bitfield_bit_offset = 0;
child_is_base_class = false;
language_flags = 0;
const bool idx_is_valid =
idx < GetNumChildren(type, omit_empty_base_classes, exe_ctx);
int32_t bit_offset;
switch (parent_type_class) {
case clang::Type::Builtin:
if (idx_is_valid) {
switch (llvm::cast<clang::BuiltinType>(parent_qual_type)->getKind()) {
case clang::BuiltinType::ObjCId:
case clang::BuiltinType::ObjCClass:
child_name = "isa";
child_byte_size =
getASTContext().getTypeSize(getASTContext().ObjCBuiltinClassTy) /
CHAR_BIT;
return GetType(getASTContext().ObjCBuiltinClassTy);
default:
break;
}
}
break;
case clang::Type::Record:
if (idx_is_valid && GetCompleteType(type)) {
const clang::RecordType *record_type =
llvm::cast<clang::RecordType>(parent_qual_type.getTypePtr());
const clang::RecordDecl *record_decl = record_type->getDecl();
assert(record_decl);
const clang::ASTRecordLayout &record_layout =
getASTContext().getASTRecordLayout(record_decl);
uint32_t child_idx = 0;
const clang::CXXRecordDecl *cxx_record_decl =
llvm::dyn_cast<clang::CXXRecordDecl>(record_decl);
if (cxx_record_decl) {
// We might have base classes to print out first
clang::CXXRecordDecl::base_class_const_iterator base_class,
base_class_end;
for (base_class = cxx_record_decl->bases_begin(),
base_class_end = cxx_record_decl->bases_end();
base_class != base_class_end; ++base_class) {
const clang::CXXRecordDecl *base_class_decl = nullptr;
// Skip empty base classes
if (omit_empty_base_classes) {
base_class_decl = llvm::cast<clang::CXXRecordDecl>(
base_class->getType()->getAs<clang::RecordType>()->getDecl());
if (!TypeSystemClang::RecordHasFields(base_class_decl))
continue;
}
if (idx == child_idx) {
if (base_class_decl == nullptr)
base_class_decl = llvm::cast<clang::CXXRecordDecl>(
base_class->getType()->getAs<clang::RecordType>()->getDecl());
if (base_class->isVirtual()) {
bool handled = false;
if (valobj) {
clang::VTableContextBase *vtable_ctx =
getASTContext().getVTableContext();
if (vtable_ctx)
handled = GetVBaseBitOffset(*vtable_ctx, *valobj,
record_layout, cxx_record_decl,
base_class_decl, bit_offset);
}
if (!handled)
bit_offset = record_layout.getVBaseClassOffset(base_class_decl)
.getQuantity() *
8;
} else
bit_offset = record_layout.getBaseClassOffset(base_class_decl)
.getQuantity() *
8;
// Base classes should be a multiple of 8 bits in size
child_byte_offset = bit_offset / 8;
CompilerType base_class_clang_type = GetType(base_class->getType());
child_name = base_class_clang_type.GetTypeName().AsCString("");
Optional<uint64_t> size =
base_class_clang_type.GetBitSize(get_exe_scope());
if (!size)
return {};
uint64_t base_class_clang_type_bit_size = *size;
// Base classes bit sizes should be a multiple of 8 bits in size
assert(base_class_clang_type_bit_size % 8 == 0);
child_byte_size = base_class_clang_type_bit_size / 8;
child_is_base_class = true;
return base_class_clang_type;
}
// We don't increment the child index in the for loop since we might
// be skipping empty base classes
++child_idx;
}
}
// Make sure index is in range...
uint32_t field_idx = 0;
clang::RecordDecl::field_iterator field, field_end;
for (field = record_decl->field_begin(),
field_end = record_decl->field_end();
field != field_end; ++field, ++field_idx, ++child_idx) {
if (idx == child_idx) {
// Print the member type if requested
// Print the member name and equal sign
child_name.assign(field->getNameAsString());
// Figure out the type byte size (field_type_info.first) and
// alignment (field_type_info.second) from the AST context.
CompilerType field_clang_type = GetType(field->getType());
assert(field_idx < record_layout.getFieldCount());
Optional<uint64_t> size =
field_clang_type.GetByteSize(get_exe_scope());
if (!size)
return {};
child_byte_size = *size;
const uint32_t child_bit_size = child_byte_size * 8;
// Figure out the field offset within the current struct/union/class
// type
bit_offset = record_layout.getFieldOffset(field_idx);
if (FieldIsBitfield(*field, child_bitfield_bit_size)) {
child_bitfield_bit_offset = bit_offset % child_bit_size;
const uint32_t child_bit_offset =
bit_offset - child_bitfield_bit_offset;
child_byte_offset = child_bit_offset / 8;
} else {
child_byte_offset = bit_offset / 8;
}
return field_clang_type;
}
}
}
break;
case clang::Type::ObjCObject:
case clang::Type::ObjCInterface:
if (idx_is_valid && GetCompleteType(type)) {
const clang::ObjCObjectType *objc_class_type =
llvm::dyn_cast<clang::ObjCObjectType>(parent_qual_type.getTypePtr());
assert(objc_class_type);
if (objc_class_type) {
uint32_t child_idx = 0;
clang::ObjCInterfaceDecl *class_interface_decl =
objc_class_type->getInterface();
if (class_interface_decl) {
const clang::ASTRecordLayout &interface_layout =
getASTContext().getASTObjCInterfaceLayout(class_interface_decl);
clang::ObjCInterfaceDecl *superclass_interface_decl =
class_interface_decl->getSuperClass();
if (superclass_interface_decl) {
if (omit_empty_base_classes) {
CompilerType base_class_clang_type =
GetType(getASTContext().getObjCInterfaceType(
superclass_interface_decl));
if (base_class_clang_type.GetNumChildren(omit_empty_base_classes,
exe_ctx) > 0) {
if (idx == 0) {
clang::QualType ivar_qual_type(
getASTContext().getObjCInterfaceType(
superclass_interface_decl));
child_name.assign(
superclass_interface_decl->getNameAsString());
clang::TypeInfo ivar_type_info =
getASTContext().getTypeInfo(ivar_qual_type.getTypePtr());
child_byte_size = ivar_type_info.Width / 8;
child_byte_offset = 0;
child_is_base_class = true;
return GetType(ivar_qual_type);
}
++child_idx;
}
} else
++child_idx;
}
const uint32_t superclass_idx = child_idx;
if (idx < (child_idx + class_interface_decl->ivar_size())) {
clang::ObjCInterfaceDecl::ivar_iterator ivar_pos,
ivar_end = class_interface_decl->ivar_end();
for (ivar_pos = class_interface_decl->ivar_begin();
ivar_pos != ivar_end; ++ivar_pos) {
if (child_idx == idx) {
clang::ObjCIvarDecl *ivar_decl = *ivar_pos;
clang::QualType ivar_qual_type(ivar_decl->getType());
child_name.assign(ivar_decl->getNameAsString());
clang::TypeInfo ivar_type_info =
getASTContext().getTypeInfo(ivar_qual_type.getTypePtr());
child_byte_size = ivar_type_info.Width / 8;
// Figure out the field offset within the current
// struct/union/class type For ObjC objects, we can't trust the
// bit offset we get from the Clang AST, since that doesn't
// account for the space taken up by unbacked properties, or
// from the changing size of base classes that are newer than
// this class. So if we have a process around that we can ask
// about this object, do so.
child_byte_offset = LLDB_INVALID_IVAR_OFFSET;
Process *process = nullptr;
if (exe_ctx)
process = exe_ctx->GetProcessPtr();
if (process) {
ObjCLanguageRuntime *objc_runtime =
ObjCLanguageRuntime::Get(*process);
if (objc_runtime != nullptr) {
CompilerType parent_ast_type = GetType(parent_qual_type);
child_byte_offset = objc_runtime->GetByteOffsetForIvar(
parent_ast_type, ivar_decl->getNameAsString().c_str());
}
}
// Setting this to INT32_MAX to make sure we don't compute it
// twice...
bit_offset = INT32_MAX;
if (child_byte_offset ==
static_cast<int32_t>(LLDB_INVALID_IVAR_OFFSET)) {
bit_offset = interface_layout.getFieldOffset(child_idx -
superclass_idx);
child_byte_offset = bit_offset / 8;
}
// Note, the ObjC Ivar Byte offset is just that, it doesn't
// account for the bit offset of a bitfield within its
// containing object. So regardless of where we get the byte
// offset from, we still need to get the bit offset for
// bitfields from the layout.
if (FieldIsBitfield(ivar_decl, child_bitfield_bit_size)) {
if (bit_offset == INT32_MAX)
bit_offset = interface_layout.getFieldOffset(
child_idx - superclass_idx);
child_bitfield_bit_offset = bit_offset % 8;
}
return GetType(ivar_qual_type);
}
++child_idx;
}
}
}
}
}
break;
case clang::Type::ObjCObjectPointer:
if (idx_is_valid) {
CompilerType pointee_clang_type(GetPointeeType(type));
if (transparent_pointers && pointee_clang_type.IsAggregateType()) {
child_is_deref_of_parent = false;
bool tmp_child_is_deref_of_parent = false;
return pointee_clang_type.GetChildCompilerTypeAtIndex(
exe_ctx, idx, transparent_pointers, omit_empty_base_classes,
ignore_array_bounds, child_name, child_byte_size, child_byte_offset,
child_bitfield_bit_size, child_bitfield_bit_offset,
child_is_base_class, tmp_child_is_deref_of_parent, valobj,
language_flags);
} else {
child_is_deref_of_parent = true;
const char *parent_name =
valobj ? valobj->GetName().GetCString() : nullptr;
if (parent_name) {
child_name.assign(1, '*');
child_name += parent_name;
}
// We have a pointer to an simple type
if (idx == 0 && pointee_clang_type.GetCompleteType()) {
if (Optional<uint64_t> size =
pointee_clang_type.GetByteSize(get_exe_scope())) {
child_byte_size = *size;
child_byte_offset = 0;
return pointee_clang_type;
}
}
}
}
break;
case clang::Type::Vector:
case clang::Type::ExtVector:
if (idx_is_valid) {
const clang::VectorType *array =
llvm::cast<clang::VectorType>(parent_qual_type.getTypePtr());
if (array) {
CompilerType element_type = GetType(array->getElementType());
if (element_type.GetCompleteType()) {
char element_name[64];
::snprintf(element_name, sizeof(element_name), "[%" PRIu64 "]",
static_cast<uint64_t>(idx));
child_name.assign(element_name);
if (Optional<uint64_t> size =
element_type.GetByteSize(get_exe_scope())) {
child_byte_size = *size;
child_byte_offset = (int32_t)idx * (int32_t)child_byte_size;
return element_type;
}
}
}
}
break;
case clang::Type::ConstantArray:
case clang::Type::IncompleteArray:
if (ignore_array_bounds || idx_is_valid) {
const clang::ArrayType *array = GetQualType(type)->getAsArrayTypeUnsafe();
if (array) {
CompilerType element_type = GetType(array->getElementType());
if (element_type.GetCompleteType()) {
child_name = std::string(llvm::formatv("[{0}]", idx));
if (Optional<uint64_t> size =
element_type.GetByteSize(get_exe_scope())) {
child_byte_size = *size;
child_byte_offset = (int32_t)idx * (int32_t)child_byte_size;
return element_type;
}
}
}
}
break;
case clang::Type::Pointer: {
CompilerType pointee_clang_type(GetPointeeType(type));
// Don't dereference "void *" pointers
if (pointee_clang_type.IsVoidType())
return CompilerType();
if (transparent_pointers && pointee_clang_type.IsAggregateType()) {
child_is_deref_of_parent = false;
bool tmp_child_is_deref_of_parent = false;
return pointee_clang_type.GetChildCompilerTypeAtIndex(
exe_ctx, idx, transparent_pointers, omit_empty_base_classes,
ignore_array_bounds, child_name, child_byte_size, child_byte_offset,
child_bitfield_bit_size, child_bitfield_bit_offset,
child_is_base_class, tmp_child_is_deref_of_parent, valobj,
language_flags);
} else {
child_is_deref_of_parent = true;
const char *parent_name =
valobj ? valobj->GetName().GetCString() : nullptr;
if (parent_name) {
child_name.assign(1, '*');
child_name += parent_name;
}
// We have a pointer to an simple type
if (idx == 0) {
if (Optional<uint64_t> size =
pointee_clang_type.GetByteSize(get_exe_scope())) {
child_byte_size = *size;
child_byte_offset = 0;
return pointee_clang_type;
}
}
}
break;
}
case clang::Type::LValueReference:
case clang::Type::RValueReference:
if (idx_is_valid) {
const clang::ReferenceType *reference_type =
llvm::cast<clang::ReferenceType>(
RemoveWrappingTypes(GetQualType(type)).getTypePtr());
CompilerType pointee_clang_type =
GetType(reference_type->getPointeeType());
if (transparent_pointers && pointee_clang_type.IsAggregateType()) {
child_is_deref_of_parent = false;
bool tmp_child_is_deref_of_parent = false;
return pointee_clang_type.GetChildCompilerTypeAtIndex(
exe_ctx, idx, transparent_pointers, omit_empty_base_classes,
ignore_array_bounds, child_name, child_byte_size, child_byte_offset,
child_bitfield_bit_size, child_bitfield_bit_offset,
child_is_base_class, tmp_child_is_deref_of_parent, valobj,
language_flags);
} else {
const char *parent_name =
valobj ? valobj->GetName().GetCString() : nullptr;
if (parent_name) {
child_name.assign(1, '&');
child_name += parent_name;
}
// We have a pointer to an simple type
if (idx == 0) {
if (Optional<uint64_t> size =
pointee_clang_type.GetByteSize(get_exe_scope())) {
child_byte_size = *size;
child_byte_offset = 0;
return pointee_clang_type;
}
}
}
}
break;
default:
break;
}
return CompilerType();
}
static uint32_t GetIndexForRecordBase(const clang::RecordDecl *record_decl,
const clang::CXXBaseSpecifier *base_spec,
bool omit_empty_base_classes) {
uint32_t child_idx = 0;
const clang::CXXRecordDecl *cxx_record_decl =
llvm::dyn_cast<clang::CXXRecordDecl>(record_decl);
if (cxx_record_decl) {
clang::CXXRecordDecl::base_class_const_iterator base_class, base_class_end;
for (base_class = cxx_record_decl->bases_begin(),
base_class_end = cxx_record_decl->bases_end();
base_class != base_class_end; ++base_class) {
if (omit_empty_base_classes) {
if (BaseSpecifierIsEmpty(base_class))
continue;
}
if (base_class == base_spec)
return child_idx;
++child_idx;
}
}
return UINT32_MAX;
}
static uint32_t GetIndexForRecordChild(const clang::RecordDecl *record_decl,
clang::NamedDecl *canonical_decl,
bool omit_empty_base_classes) {
uint32_t child_idx = TypeSystemClang::GetNumBaseClasses(
llvm::dyn_cast<clang::CXXRecordDecl>(record_decl),
omit_empty_base_classes);
clang::RecordDecl::field_iterator field, field_end;
for (field = record_decl->field_begin(), field_end = record_decl->field_end();
field != field_end; ++field, ++child_idx) {
if (field->getCanonicalDecl() == canonical_decl)
return child_idx;
}
return UINT32_MAX;
}
// Look for a child member (doesn't include base classes, but it does include
// their members) in the type hierarchy. Returns an index path into
// "clang_type" on how to reach the appropriate member.
//
// class A
// {
// public:
// int m_a;
// int m_b;
// };
//
// class B
// {
// };
//
// class C :
// public B,
// public A
// {
// };
//
// If we have a clang type that describes "class C", and we wanted to looked
// "m_b" in it:
//
// With omit_empty_base_classes == false we would get an integer array back
// with: { 1, 1 } The first index 1 is the child index for "class A" within
// class C The second index 1 is the child index for "m_b" within class A
//
// With omit_empty_base_classes == true we would get an integer array back
// with: { 0, 1 } The first index 0 is the child index for "class A" within
// class C (since class B doesn't have any members it doesn't count) The second
// index 1 is the child index for "m_b" within class A
size_t TypeSystemClang::GetIndexOfChildMemberWithName(
lldb::opaque_compiler_type_t type, const char *name,
bool omit_empty_base_classes, std::vector<uint32_t> &child_indexes) {
if (type && name && name[0]) {
clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
switch (type_class) {
case clang::Type::Record:
if (GetCompleteType(type)) {
const clang::RecordType *record_type =
llvm::cast<clang::RecordType>(qual_type.getTypePtr());
const clang::RecordDecl *record_decl = record_type->getDecl();
assert(record_decl);
uint32_t child_idx = 0;
const clang::CXXRecordDecl *cxx_record_decl =
llvm::dyn_cast<clang::CXXRecordDecl>(record_decl);
// Try and find a field that matches NAME
clang::RecordDecl::field_iterator field, field_end;
llvm::StringRef name_sref(name);
for (field = record_decl->field_begin(),
field_end = record_decl->field_end();
field != field_end; ++field, ++child_idx) {
llvm::StringRef field_name = field->getName();
if (field_name.empty()) {
CompilerType field_type = GetType(field->getType());
child_indexes.push_back(child_idx);
if (field_type.GetIndexOfChildMemberWithName(
name, omit_empty_base_classes, child_indexes))
return child_indexes.size();
child_indexes.pop_back();
} else if (field_name.equals(name_sref)) {
// We have to add on the number of base classes to this index!
child_indexes.push_back(
child_idx + TypeSystemClang::GetNumBaseClasses(
cxx_record_decl, omit_empty_base_classes));
return child_indexes.size();
}
}
if (cxx_record_decl) {
const clang::RecordDecl *parent_record_decl = cxx_record_decl;
// Didn't find things easily, lets let clang do its thang...
clang::IdentifierInfo &ident_ref =
getASTContext().Idents.get(name_sref);
clang::DeclarationName decl_name(&ident_ref);
clang::CXXBasePaths paths;
if (cxx_record_decl->lookupInBases(
[decl_name](const clang::CXXBaseSpecifier *specifier,
clang::CXXBasePath &path) {
CXXRecordDecl *record =
specifier->getType()->getAsCXXRecordDecl();
auto r = record->lookup(decl_name);
path.Decls = r.begin();
return !r.empty();
},
paths)) {
clang::CXXBasePaths::const_paths_iterator path,
path_end = paths.end();
for (path = paths.begin(); path != path_end; ++path) {
const size_t num_path_elements = path->size();
for (size_t e = 0; e < num_path_elements; ++e) {
clang::CXXBasePathElement elem = (*path)[e];
child_idx = GetIndexForRecordBase(parent_record_decl, elem.Base,
omit_empty_base_classes);
if (child_idx == UINT32_MAX) {
child_indexes.clear();
return 0;
} else {
child_indexes.push_back(child_idx);
parent_record_decl = llvm::cast<clang::RecordDecl>(
elem.Base->getType()
->castAs<clang::RecordType>()
->getDecl());
}
}
for (clang::DeclContext::lookup_iterator I = path->Decls, E;
I != E; ++I) {
child_idx = GetIndexForRecordChild(
parent_record_decl, *I, omit_empty_base_classes);
if (child_idx == UINT32_MAX) {
child_indexes.clear();
return 0;
} else {
child_indexes.push_back(child_idx);
}
}
}
return child_indexes.size();
}
}
}
break;
case clang::Type::ObjCObject:
case clang::Type::ObjCInterface:
if (GetCompleteType(type)) {
llvm::StringRef name_sref(name);
const clang::ObjCObjectType *objc_class_type =
llvm::dyn_cast<clang::ObjCObjectType>(qual_type.getTypePtr());
assert(objc_class_type);
if (objc_class_type) {
uint32_t child_idx = 0;
clang::ObjCInterfaceDecl *class_interface_decl =
objc_class_type->getInterface();
if (class_interface_decl) {
clang::ObjCInterfaceDecl::ivar_iterator ivar_pos,
ivar_end = class_interface_decl->ivar_end();
clang::ObjCInterfaceDecl *superclass_interface_decl =
class_interface_decl->getSuperClass();
for (ivar_pos = class_interface_decl->ivar_begin();
ivar_pos != ivar_end; ++ivar_pos, ++child_idx) {
const clang::ObjCIvarDecl *ivar_decl = *ivar_pos;
if (ivar_decl->getName().equals(name_sref)) {
if ((!omit_empty_base_classes && superclass_interface_decl) ||
(omit_empty_base_classes &&
ObjCDeclHasIVars(superclass_interface_decl, true)))
++child_idx;
child_indexes.push_back(child_idx);
return child_indexes.size();
}
}
if (superclass_interface_decl) {
// The super class index is always zero for ObjC classes, so we
// push it onto the child indexes in case we find an ivar in our
// superclass...
child_indexes.push_back(0);
CompilerType superclass_clang_type =
GetType(getASTContext().getObjCInterfaceType(
superclass_interface_decl));
if (superclass_clang_type.GetIndexOfChildMemberWithName(
name, omit_empty_base_classes, child_indexes)) {
// We did find an ivar in a superclass so just return the
// results!
return child_indexes.size();
}
// We didn't find an ivar matching "name" in our superclass, pop
// the superclass zero index that we pushed on above.
child_indexes.pop_back();
}
}
}
}
break;
case clang::Type::ObjCObjectPointer: {
CompilerType objc_object_clang_type = GetType(
llvm::cast<clang::ObjCObjectPointerType>(qual_type.getTypePtr())
->getPointeeType());
return objc_object_clang_type.GetIndexOfChildMemberWithName(
name, omit_empty_base_classes, child_indexes);
} break;
case clang::Type::ConstantArray: {
// const clang::ConstantArrayType *array =
// llvm::cast<clang::ConstantArrayType>(parent_qual_type.getTypePtr());
// const uint64_t element_count =
// array->getSize().getLimitedValue();
//
// if (idx < element_count)
// {
// std::pair<uint64_t, unsigned> field_type_info =
// ast->getTypeInfo(array->getElementType());
//
// char element_name[32];
// ::snprintf (element_name, sizeof (element_name),
// "%s[%u]", parent_name ? parent_name : "", idx);
//
// child_name.assign(element_name);
// assert(field_type_info.first % 8 == 0);
// child_byte_size = field_type_info.first / 8;
// child_byte_offset = idx * child_byte_size;
// return array->getElementType().getAsOpaquePtr();
// }
} break;
// case clang::Type::MemberPointerType:
// {
// MemberPointerType *mem_ptr_type =
// llvm::cast<MemberPointerType>(qual_type.getTypePtr());
// clang::QualType pointee_type =
// mem_ptr_type->getPointeeType();
//
// if (TypeSystemClang::IsAggregateType
// (pointee_type.getAsOpaquePtr()))
// {
// return GetIndexOfChildWithName (ast,
// mem_ptr_type->getPointeeType().getAsOpaquePtr(),
// name);
// }
// }
// break;
//
case clang::Type::LValueReference:
case clang::Type::RValueReference: {
const clang::ReferenceType *reference_type =
llvm::cast<clang::ReferenceType>(qual_type.getTypePtr());
clang::QualType pointee_type(reference_type->getPointeeType());
CompilerType pointee_clang_type = GetType(pointee_type);
if (pointee_clang_type.IsAggregateType()) {
return pointee_clang_type.GetIndexOfChildMemberWithName(
name, omit_empty_base_classes, child_indexes);
}
} break;
case clang::Type::Pointer: {
CompilerType pointee_clang_type(GetPointeeType(type));
if (pointee_clang_type.IsAggregateType()) {
return pointee_clang_type.GetIndexOfChildMemberWithName(
name, omit_empty_base_classes, child_indexes);
}
} break;
default:
break;
}
}
return 0;
}
// Get the index of the child of "clang_type" whose name matches. This function
// doesn't descend into the children, but only looks one level deep and name
// matches can include base class names.
uint32_t
TypeSystemClang::GetIndexOfChildWithName(lldb::opaque_compiler_type_t type,
const char *name,
bool omit_empty_base_classes) {
if (type && name && name[0]) {
clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
switch (type_class) {
case clang::Type::Record:
if (GetCompleteType(type)) {
const clang::RecordType *record_type =
llvm::cast<clang::RecordType>(qual_type.getTypePtr());
const clang::RecordDecl *record_decl = record_type->getDecl();
assert(record_decl);
uint32_t child_idx = 0;
const clang::CXXRecordDecl *cxx_record_decl =
llvm::dyn_cast<clang::CXXRecordDecl>(record_decl);
if (cxx_record_decl) {
clang::CXXRecordDecl::base_class_const_iterator base_class,
base_class_end;
for (base_class = cxx_record_decl->bases_begin(),
base_class_end = cxx_record_decl->bases_end();
base_class != base_class_end; ++base_class) {
// Skip empty base classes
clang::CXXRecordDecl *base_class_decl =
llvm::cast<clang::CXXRecordDecl>(
base_class->getType()
->castAs<clang::RecordType>()
->getDecl());
if (omit_empty_base_classes &&
!TypeSystemClang::RecordHasFields(base_class_decl))
continue;
CompilerType base_class_clang_type = GetType(base_class->getType());
std::string base_class_type_name(
base_class_clang_type.GetTypeName().AsCString(""));
if (base_class_type_name == name)
return child_idx;
++child_idx;
}
}
// Try and find a field that matches NAME
clang::RecordDecl::field_iterator field, field_end;
llvm::StringRef name_sref(name);
for (field = record_decl->field_begin(),
field_end = record_decl->field_end();
field != field_end; ++field, ++child_idx) {
if (field->getName().equals(name_sref))
return child_idx;
}
}
break;
case clang::Type::ObjCObject:
case clang::Type::ObjCInterface:
if (GetCompleteType(type)) {
llvm::StringRef name_sref(name);
const clang::ObjCObjectType *objc_class_type =
llvm::dyn_cast<clang::ObjCObjectType>(qual_type.getTypePtr());
assert(objc_class_type);
if (objc_class_type) {
uint32_t child_idx = 0;
clang::ObjCInterfaceDecl *class_interface_decl =
objc_class_type->getInterface();
if (class_interface_decl) {
clang::ObjCInterfaceDecl::ivar_iterator ivar_pos,
ivar_end = class_interface_decl->ivar_end();
clang::ObjCInterfaceDecl *superclass_interface_decl =
class_interface_decl->getSuperClass();
for (ivar_pos = class_interface_decl->ivar_begin();
ivar_pos != ivar_end; ++ivar_pos, ++child_idx) {
const clang::ObjCIvarDecl *ivar_decl = *ivar_pos;
if (ivar_decl->getName().equals(name_sref)) {
if ((!omit_empty_base_classes && superclass_interface_decl) ||
(omit_empty_base_classes &&
ObjCDeclHasIVars(superclass_interface_decl, true)))
++child_idx;
return child_idx;
}
}
if (superclass_interface_decl) {
if (superclass_interface_decl->getName().equals(name_sref))
return 0;
}
}
}
}
break;
case clang::Type::ObjCObjectPointer: {
CompilerType pointee_clang_type = GetType(
llvm::cast<clang::ObjCObjectPointerType>(qual_type.getTypePtr())
->getPointeeType());
return pointee_clang_type.GetIndexOfChildWithName(
name, omit_empty_base_classes);
} break;
case clang::Type::ConstantArray: {
// const clang::ConstantArrayType *array =
// llvm::cast<clang::ConstantArrayType>(parent_qual_type.getTypePtr());
// const uint64_t element_count =
// array->getSize().getLimitedValue();
//
// if (idx < element_count)
// {
// std::pair<uint64_t, unsigned> field_type_info =
// ast->getTypeInfo(array->getElementType());
//
// char element_name[32];
// ::snprintf (element_name, sizeof (element_name),
// "%s[%u]", parent_name ? parent_name : "", idx);
//
// child_name.assign(element_name);
// assert(field_type_info.first % 8 == 0);
// child_byte_size = field_type_info.first / 8;
// child_byte_offset = idx * child_byte_size;
// return array->getElementType().getAsOpaquePtr();
// }
} break;
// case clang::Type::MemberPointerType:
// {
// MemberPointerType *mem_ptr_type =
// llvm::cast<MemberPointerType>(qual_type.getTypePtr());
// clang::QualType pointee_type =
// mem_ptr_type->getPointeeType();
//
// if (TypeSystemClang::IsAggregateType
// (pointee_type.getAsOpaquePtr()))
// {
// return GetIndexOfChildWithName (ast,
// mem_ptr_type->getPointeeType().getAsOpaquePtr(),
// name);
// }
// }
// break;
//
case clang::Type::LValueReference:
case clang::Type::RValueReference: {
const clang::ReferenceType *reference_type =
llvm::cast<clang::ReferenceType>(qual_type.getTypePtr());
CompilerType pointee_type = GetType(reference_type->getPointeeType());
if (pointee_type.IsAggregateType()) {
return pointee_type.GetIndexOfChildWithName(name,
omit_empty_base_classes);
}
} break;
case clang::Type::Pointer: {
const clang::PointerType *pointer_type =
llvm::cast<clang::PointerType>(qual_type.getTypePtr());
CompilerType pointee_type = GetType(pointer_type->getPointeeType());
if (pointee_type.IsAggregateType()) {
return pointee_type.GetIndexOfChildWithName(name,
omit_empty_base_classes);
} else {
// if (parent_name)
// {
// child_name.assign(1, '*');
// child_name += parent_name;
// }
//
// // We have a pointer to an simple type
// if (idx == 0)
// {
// std::pair<uint64_t, unsigned> clang_type_info
// = ast->getTypeInfo(pointee_type);
// assert(clang_type_info.first % 8 == 0);
// child_byte_size = clang_type_info.first / 8;
// child_byte_offset = 0;
// return pointee_type.getAsOpaquePtr();
// }
}
} break;
default:
break;
}
}
return UINT32_MAX;
}
size_t
-TypeSystemClang::GetNumTemplateArguments(lldb::opaque_compiler_type_t type) {
+TypeSystemClang::GetNumTemplateArguments(lldb::opaque_compiler_type_t type,
+ bool expand_pack) {
if (!type)
return 0;
clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
switch (type_class) {
case clang::Type::Record:
if (GetCompleteType(type)) {
const clang::CXXRecordDecl *cxx_record_decl =
qual_type->getAsCXXRecordDecl();
if (cxx_record_decl) {
const clang::ClassTemplateSpecializationDecl *template_decl =
llvm::dyn_cast<clang::ClassTemplateSpecializationDecl>(
cxx_record_decl);
- if (template_decl)
- return template_decl->getTemplateArgs().size();
+ if (template_decl) {
+ const auto &template_arg_list = template_decl->getTemplateArgs();
+ size_t num_args = template_arg_list.size();
+ assert(num_args && "template specialization without any args");
+ if (expand_pack && num_args) {
+ const auto &pack = template_arg_list[num_args - 1];
+ if (pack.getKind() == clang::TemplateArgument::Pack)
+ num_args += pack.pack_size() - 1;
+ }
+ return num_args;
+ }
}
}
break;
default:
break;
}
return 0;
}
const clang::ClassTemplateSpecializationDecl *
TypeSystemClang::GetAsTemplateSpecialization(
lldb::opaque_compiler_type_t type) {
if (!type)
return nullptr;
clang::QualType qual_type(RemoveWrappingTypes(GetCanonicalQualType(type)));
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
switch (type_class) {
case clang::Type::Record: {
if (! GetCompleteType(type))
return nullptr;
const clang::CXXRecordDecl *cxx_record_decl =
qual_type->getAsCXXRecordDecl();
if (!cxx_record_decl)
return nullptr;
return llvm::dyn_cast<clang::ClassTemplateSpecializationDecl>(
cxx_record_decl);
}
default:
return nullptr;
}
}
+const TemplateArgument *
+GetNthTemplateArgument(const clang::ClassTemplateSpecializationDecl *decl,
+ size_t idx, bool expand_pack) {
+ const auto &args = decl->getTemplateArgs();
+ const size_t args_size = args.size();
+
+ assert(args_size && "template specialization without any args");
+ if (!args_size)
+ return nullptr;
+
+ const size_t last_idx = args_size - 1;
+
+ // We're asked for a template argument that can't be a parameter pack, so
+ // return it without worrying about 'expand_pack'.
+ if (idx < last_idx)
+ return &args[idx];
+
+ // We're asked for the last template argument but we don't want/need to
+ // expand it.
+ if (!expand_pack || args[last_idx].getKind() != clang::TemplateArgument::Pack)
+ return idx >= args.size() ? nullptr : &args[idx];
+
+ // Index into the expanded pack.
+ // Note that 'idx' counts from the beginning of all template arguments
+ // (including the ones preceding the parameter pack).
+ const auto &pack = args[last_idx];
+ const size_t pack_idx = idx - last_idx;
+ const size_t pack_size = pack.pack_size();
+ assert(pack_idx < pack_size && "parameter pack index out-of-bounds");
+ return &pack.pack_elements()[pack_idx];
+}
+
lldb::TemplateArgumentKind
TypeSystemClang::GetTemplateArgumentKind(lldb::opaque_compiler_type_t type,
- size_t arg_idx) {
+ size_t arg_idx, bool expand_pack) {
const clang::ClassTemplateSpecializationDecl *template_decl =
GetAsTemplateSpecialization(type);
- if (! template_decl || arg_idx >= template_decl->getTemplateArgs().size())
+ if (!template_decl)
+ return eTemplateArgumentKindNull;
+
+ const auto *arg = GetNthTemplateArgument(template_decl, arg_idx, expand_pack);
+ if (!arg)
return eTemplateArgumentKindNull;
- switch (template_decl->getTemplateArgs()[arg_idx].getKind()) {
+ switch (arg->getKind()) {
case clang::TemplateArgument::Null:
return eTemplateArgumentKindNull;
case clang::TemplateArgument::NullPtr:
return eTemplateArgumentKindNullPtr;
case clang::TemplateArgument::Type:
return eTemplateArgumentKindType;
case clang::TemplateArgument::Declaration:
return eTemplateArgumentKindDeclaration;
case clang::TemplateArgument::Integral:
return eTemplateArgumentKindIntegral;
case clang::TemplateArgument::Template:
return eTemplateArgumentKindTemplate;
case clang::TemplateArgument::TemplateExpansion:
return eTemplateArgumentKindTemplateExpansion;
case clang::TemplateArgument::Expression:
return eTemplateArgumentKindExpression;
case clang::TemplateArgument::Pack:
return eTemplateArgumentKindPack;
}
llvm_unreachable("Unhandled clang::TemplateArgument::ArgKind");
}
CompilerType
TypeSystemClang::GetTypeTemplateArgument(lldb::opaque_compiler_type_t type,
- size_t idx) {
+ size_t idx, bool expand_pack) {
const clang::ClassTemplateSpecializationDecl *template_decl =
GetAsTemplateSpecialization(type);
- if (!template_decl || idx >= template_decl->getTemplateArgs().size())
+ if (!template_decl)
return CompilerType();
- const clang::TemplateArgument &template_arg =
- template_decl->getTemplateArgs()[idx];
- if (template_arg.getKind() != clang::TemplateArgument::Type)
+ const auto *arg = GetNthTemplateArgument(template_decl, idx, expand_pack);
+ if (!arg || arg->getKind() != clang::TemplateArgument::Type)
return CompilerType();
- return GetType(template_arg.getAsType());
+ return GetType(arg->getAsType());
}
Optional<CompilerType::IntegralTemplateArgument>
TypeSystemClang::GetIntegralTemplateArgument(lldb::opaque_compiler_type_t type,
- size_t idx) {
+ size_t idx, bool expand_pack) {
const clang::ClassTemplateSpecializationDecl *template_decl =
GetAsTemplateSpecialization(type);
- if (! template_decl || idx >= template_decl->getTemplateArgs().size())
+ if (!template_decl)
return llvm::None;
- const clang::TemplateArgument &template_arg =
- template_decl->getTemplateArgs()[idx];
- if (template_arg.getKind() != clang::TemplateArgument::Integral)
+ const auto *arg = GetNthTemplateArgument(template_decl, idx, expand_pack);
+ if (!arg || arg->getKind() != clang::TemplateArgument::Integral)
return llvm::None;
- return {
- {template_arg.getAsIntegral(), GetType(template_arg.getIntegralType())}};
+ return {{arg->getAsIntegral(), GetType(arg->getIntegralType())}};
}
CompilerType TypeSystemClang::GetTypeForFormatters(void *type) {
if (type)
return ClangUtil::RemoveFastQualifiers(CompilerType(this, type));
return CompilerType();
}
clang::EnumDecl *TypeSystemClang::GetAsEnumDecl(const CompilerType &type) {
const clang::EnumType *enutype =
llvm::dyn_cast<clang::EnumType>(ClangUtil::GetCanonicalQualType(type));
if (enutype)
return enutype->getDecl();
return nullptr;
}
clang::RecordDecl *TypeSystemClang::GetAsRecordDecl(const CompilerType &type) {
const clang::RecordType *record_type =
llvm::dyn_cast<clang::RecordType>(ClangUtil::GetCanonicalQualType(type));
if (record_type)
return record_type->getDecl();
return nullptr;
}
clang::TagDecl *TypeSystemClang::GetAsTagDecl(const CompilerType &type) {
return ClangUtil::GetAsTagDecl(type);
}
clang::TypedefNameDecl *
TypeSystemClang::GetAsTypedefDecl(const CompilerType &type) {
const clang::TypedefType *typedef_type =
llvm::dyn_cast<clang::TypedefType>(ClangUtil::GetQualType(type));
if (typedef_type)
return typedef_type->getDecl();
return nullptr;
}
clang::CXXRecordDecl *
TypeSystemClang::GetAsCXXRecordDecl(lldb::opaque_compiler_type_t type) {
return GetCanonicalQualType(type)->getAsCXXRecordDecl();
}
clang::ObjCInterfaceDecl *
TypeSystemClang::GetAsObjCInterfaceDecl(const CompilerType &type) {
const clang::ObjCObjectType *objc_class_type =
llvm::dyn_cast<clang::ObjCObjectType>(
ClangUtil::GetCanonicalQualType(type));
if (objc_class_type)
return objc_class_type->getInterface();
return nullptr;
}
clang::FieldDecl *TypeSystemClang::AddFieldToRecordType(
const CompilerType &type, llvm::StringRef name,
const CompilerType &field_clang_type, AccessType access,
uint32_t bitfield_bit_size) {
if (!type.IsValid() || !field_clang_type.IsValid())
return nullptr;
TypeSystemClang *ast =
llvm::dyn_cast_or_null<TypeSystemClang>(type.GetTypeSystem());
if (!ast)
return nullptr;
clang::ASTContext &clang_ast = ast->getASTContext();
clang::IdentifierInfo *ident = nullptr;
if (!name.empty())
ident = &clang_ast.Idents.get(name);
clang::FieldDecl *field = nullptr;
clang::Expr *bit_width = nullptr;
if (bitfield_bit_size != 0) {
llvm::APInt bitfield_bit_size_apint(clang_ast.getTypeSize(clang_ast.IntTy),
bitfield_bit_size);
bit_width = new (clang_ast)
clang::IntegerLiteral(clang_ast, bitfield_bit_size_apint,
clang_ast.IntTy, clang::SourceLocation());
}
clang::RecordDecl *record_decl = ast->GetAsRecordDecl(type);
if (record_decl) {
field = clang::FieldDecl::CreateDeserialized(clang_ast, 0);
field->setDeclContext(record_decl);
field->setDeclName(ident);
field->setType(ClangUtil::GetQualType(field_clang_type));
if (bit_width)
field->setBitWidth(bit_width);
SetMemberOwningModule(field, record_decl);
if (name.empty()) {
// Determine whether this field corresponds to an anonymous struct or
// union.
if (const clang::TagType *TagT =
field->getType()->getAs<clang::TagType>()) {
if (clang::RecordDecl *Rec =
llvm::dyn_cast<clang::RecordDecl>(TagT->getDecl()))
if (!Rec->getDeclName()) {
Rec->setAnonymousStructOrUnion(true);
field->setImplicit();
}
}
}
if (field) {
clang::AccessSpecifier access_specifier =
TypeSystemClang::ConvertAccessTypeToAccessSpecifier(access);
field->setAccess(access_specifier);
if (clang::CXXRecordDecl *cxx_record_decl =
llvm::dyn_cast<CXXRecordDecl>(record_decl)) {
AddAccessSpecifierDecl(cxx_record_decl, ast->getASTContext(),
ast->GetCXXRecordDeclAccess(cxx_record_decl),
access_specifier);
ast->SetCXXRecordDeclAccess(cxx_record_decl, access_specifier);
}
record_decl->addDecl(field);
VerifyDecl(field);
}
} else {
clang::ObjCInterfaceDecl *class_interface_decl =
ast->GetAsObjCInterfaceDecl(type);
if (class_interface_decl) {
const bool is_synthesized = false;
field_clang_type.GetCompleteType();
auto *ivar = clang::ObjCIvarDecl::CreateDeserialized(clang_ast, 0);
ivar->setDeclContext(class_interface_decl);
ivar->setDeclName(ident);
ivar->setType(ClangUtil::GetQualType(field_clang_type));
ivar->setAccessControl(ConvertAccessTypeToObjCIvarAccessControl(access));
if (bit_width)
ivar->setBitWidth(bit_width);
ivar->setSynthesize(is_synthesized);
field = ivar;
SetMemberOwningModule(field, class_interface_decl);
if (field) {
class_interface_decl->addDecl(field);
VerifyDecl(field);
}
}
}
return field;
}
void TypeSystemClang::BuildIndirectFields(const CompilerType &type) {
if (!type)
return;
TypeSystemClang *ast = llvm::dyn_cast<TypeSystemClang>(type.GetTypeSystem());
if (!ast)
return;
clang::RecordDecl *record_decl = ast->GetAsRecordDecl(type);
if (!record_decl)
return;
typedef llvm::SmallVector<clang::IndirectFieldDecl *, 1> IndirectFieldVector;
IndirectFieldVector indirect_fields;
clang::RecordDecl::field_iterator field_pos;
clang::RecordDecl::field_iterator field_end_pos = record_decl->field_end();
clang::RecordDecl::field_iterator last_field_pos = field_end_pos;
for (field_pos = record_decl->field_begin(); field_pos != field_end_pos;
last_field_pos = field_pos++) {
if (field_pos->isAnonymousStructOrUnion()) {
clang::QualType field_qual_type = field_pos->getType();
const clang::RecordType *field_record_type =
field_qual_type->getAs<clang::RecordType>();
if (!field_record_type)
continue;
clang::RecordDecl *field_record_decl = field_record_type->getDecl();
if (!field_record_decl)
continue;
for (clang::RecordDecl::decl_iterator
di = field_record_decl->decls_begin(),
de = field_record_decl->decls_end();
di != de; ++di) {
if (clang::FieldDecl *nested_field_decl =
llvm::dyn_cast<clang::FieldDecl>(*di)) {
clang::NamedDecl **chain =
new (ast->getASTContext()) clang::NamedDecl *[2];
chain[0] = *field_pos;
chain[1] = nested_field_decl;
clang::IndirectFieldDecl *indirect_field =
clang::IndirectFieldDecl::Create(
ast->getASTContext(), record_decl, clang::SourceLocation(),
nested_field_decl->getIdentifier(),
nested_field_decl->getType(), {chain, 2});
SetMemberOwningModule(indirect_field, record_decl);
indirect_field->setImplicit();
indirect_field->setAccess(TypeSystemClang::UnifyAccessSpecifiers(
field_pos->getAccess(), nested_field_decl->getAccess()));
indirect_fields.push_back(indirect_field);
} else if (clang::IndirectFieldDecl *nested_indirect_field_decl =
llvm::dyn_cast<clang::IndirectFieldDecl>(*di)) {
size_t nested_chain_size =
nested_indirect_field_decl->getChainingSize();
clang::NamedDecl **chain = new (ast->getASTContext())
clang::NamedDecl *[nested_chain_size + 1];
chain[0] = *field_pos;
int chain_index = 1;
for (clang::IndirectFieldDecl::chain_iterator
nci = nested_indirect_field_decl->chain_begin(),
nce = nested_indirect_field_decl->chain_end();
nci < nce; ++nci) {
chain[chain_index] = *nci;
chain_index++;
}
clang::IndirectFieldDecl *indirect_field =
clang::IndirectFieldDecl::Create(
ast->getASTContext(), record_decl, clang::SourceLocation(),
nested_indirect_field_decl->getIdentifier(),
nested_indirect_field_decl->getType(),
{chain, nested_chain_size + 1});
SetMemberOwningModule(indirect_field, record_decl);
indirect_field->setImplicit();
indirect_field->setAccess(TypeSystemClang::UnifyAccessSpecifiers(
field_pos->getAccess(), nested_indirect_field_decl->getAccess()));
indirect_fields.push_back(indirect_field);
}
}
}
}
// Check the last field to see if it has an incomplete array type as its last
// member and if it does, the tell the record decl about it
if (last_field_pos != field_end_pos) {
if (last_field_pos->getType()->isIncompleteArrayType())
record_decl->hasFlexibleArrayMember();
}
for (IndirectFieldVector::iterator ifi = indirect_fields.begin(),
ife = indirect_fields.end();
ifi < ife; ++ifi) {
record_decl->addDecl(*ifi);
}
}
void TypeSystemClang::SetIsPacked(const CompilerType &type) {
if (type) {
TypeSystemClang *ast =
llvm::dyn_cast<TypeSystemClang>(type.GetTypeSystem());
if (ast) {
clang::RecordDecl *record_decl = GetAsRecordDecl(type);
if (!record_decl)
return;
record_decl->addAttr(
clang::PackedAttr::CreateImplicit(ast->getASTContext()));
}
}
}
clang::VarDecl *TypeSystemClang::AddVariableToRecordType(
const CompilerType &type, llvm::StringRef name,
const CompilerType &var_type, AccessType access) {
if (!type.IsValid() || !var_type.IsValid())
return nullptr;
TypeSystemClang *ast = llvm::dyn_cast<TypeSystemClang>(type.GetTypeSystem());
if (!ast)
return nullptr;
clang::RecordDecl *record_decl = ast->GetAsRecordDecl(type);
if (!record_decl)
return nullptr;
clang::VarDecl *var_decl = nullptr;
clang::IdentifierInfo *ident = nullptr;
if (!name.empty())
ident = &ast->getASTContext().Idents.get(name);
var_decl = clang::VarDecl::CreateDeserialized(ast->getASTContext(), 0);
var_decl->setDeclContext(record_decl);
var_decl->setDeclName(ident);
var_decl->setType(ClangUtil::GetQualType(var_type));
var_decl->setStorageClass(clang::SC_Static);
SetMemberOwningModule(var_decl, record_decl);
if (!var_decl)
return nullptr;
var_decl->setAccess(
TypeSystemClang::ConvertAccessTypeToAccessSpecifier(access));
record_decl->addDecl(var_decl);
VerifyDecl(var_decl);
return var_decl;
}
void TypeSystemClang::SetIntegerInitializerForVariable(
VarDecl *var, const llvm::APInt &init_value) {
assert(!var->hasInit() && "variable already initialized");
clang::ASTContext &ast = var->getASTContext();
QualType qt = var->getType();
assert(qt->isIntegralOrEnumerationType() &&
"only integer or enum types supported");
// If the variable is an enum type, take the underlying integer type as
// the type of the integer literal.
if (const EnumType *enum_type = qt->getAs<EnumType>()) {
const EnumDecl *enum_decl = enum_type->getDecl();
qt = enum_decl->getIntegerType();
}
var->setInit(IntegerLiteral::Create(ast, init_value, qt.getUnqualifiedType(),
SourceLocation()));
}
void TypeSystemClang::SetFloatingInitializerForVariable(
clang::VarDecl *var, const llvm::APFloat &init_value) {
assert(!var->hasInit() && "variable already initialized");
clang::ASTContext &ast = var->getASTContext();
QualType qt = var->getType();
assert(qt->isFloatingType() && "only floating point types supported");
var->setInit(FloatingLiteral::Create(
ast, init_value, true, qt.getUnqualifiedType(), SourceLocation()));
}
clang::CXXMethodDecl *TypeSystemClang::AddMethodToCXXRecordType(
lldb::opaque_compiler_type_t type, llvm::StringRef name,
const char *mangled_name, const CompilerType &method_clang_type,
lldb::AccessType access, bool is_virtual, bool is_static, bool is_inline,
bool is_explicit, bool is_attr_used, bool is_artificial) {
if (!type || !method_clang_type.IsValid() || name.empty())
return nullptr;
clang::QualType record_qual_type(GetCanonicalQualType(type));
clang::CXXRecordDecl *cxx_record_decl =
record_qual_type->getAsCXXRecordDecl();
if (cxx_record_decl == nullptr)
return nullptr;
clang::QualType method_qual_type(ClangUtil::GetQualType(method_clang_type));
clang::CXXMethodDecl *cxx_method_decl = nullptr;
clang::DeclarationName decl_name(&getASTContext().Idents.get(name));
const clang::FunctionType *function_type =
llvm::dyn_cast<clang::FunctionType>(method_qual_type.getTypePtr());
if (function_type == nullptr)
return nullptr;
const clang::FunctionProtoType *method_function_prototype(
llvm::dyn_cast<clang::FunctionProtoType>(function_type));
if (!method_function_prototype)
return nullptr;
unsigned int num_params = method_function_prototype->getNumParams();
clang::CXXDestructorDecl *cxx_dtor_decl(nullptr);
clang::CXXConstructorDecl *cxx_ctor_decl(nullptr);
if (is_artificial)
return nullptr; // skip everything artificial
const clang::ExplicitSpecifier explicit_spec(
nullptr /*expr*/, is_explicit ? clang::ExplicitSpecKind::ResolvedTrue
: clang::ExplicitSpecKind::ResolvedFalse);
if (name.startswith("~")) {
cxx_dtor_decl =
clang::CXXDestructorDecl::CreateDeserialized(getASTContext(), 0);
cxx_dtor_decl->setDeclContext(cxx_record_decl);
cxx_dtor_decl->setDeclName(
getASTContext().DeclarationNames.getCXXDestructorName(
getASTContext().getCanonicalType(record_qual_type)));
cxx_dtor_decl->setType(method_qual_type);
cxx_dtor_decl->setImplicit(is_artificial);
cxx_dtor_decl->setInlineSpecified(is_inline);
cxx_dtor_decl->setConstexprKind(ConstexprSpecKind::Unspecified);
cxx_method_decl = cxx_dtor_decl;
} else if (decl_name == cxx_record_decl->getDeclName()) {
cxx_ctor_decl = clang::CXXConstructorDecl::CreateDeserialized(
getASTContext(), 0, 0);
cxx_ctor_decl->setDeclContext(cxx_record_decl);
cxx_ctor_decl->setDeclName(
getASTContext().DeclarationNames.getCXXConstructorName(
getASTContext().getCanonicalType(record_qual_type)));
cxx_ctor_decl->setType(method_qual_type);
cxx_ctor_decl->setImplicit(is_artificial);
cxx_ctor_decl->setInlineSpecified(is_inline);
cxx_ctor_decl->setConstexprKind(ConstexprSpecKind::Unspecified);
cxx_ctor_decl->setNumCtorInitializers(0);
cxx_ctor_decl->setExplicitSpecifier(explicit_spec);
cxx_method_decl = cxx_ctor_decl;
} else {
clang::StorageClass SC = is_static ? clang::SC_Static : clang::SC_None;
clang::OverloadedOperatorKind op_kind = clang::NUM_OVERLOADED_OPERATORS;
if (IsOperator(name, op_kind)) {
if (op_kind != clang::NUM_OVERLOADED_OPERATORS) {
// Check the number of operator parameters. Sometimes we have seen bad
// DWARF that doesn't correctly describe operators and if we try to
// create a method and add it to the class, clang will assert and
// crash, so we need to make sure things are acceptable.
const bool is_method = true;
if (!TypeSystemClang::CheckOverloadedOperatorKindParameterCount(
is_method, op_kind, num_params))
return nullptr;
cxx_method_decl =
clang::CXXMethodDecl::CreateDeserialized(getASTContext(), 0);
cxx_method_decl->setDeclContext(cxx_record_decl);
cxx_method_decl->setDeclName(
getASTContext().DeclarationNames.getCXXOperatorName(op_kind));
cxx_method_decl->setType(method_qual_type);
cxx_method_decl->setStorageClass(SC);
cxx_method_decl->setInlineSpecified(is_inline);
cxx_method_decl->setConstexprKind(ConstexprSpecKind::Unspecified);
} else if (num_params == 0) {
// Conversion operators don't take params...
auto *cxx_conversion_decl =
clang::CXXConversionDecl::CreateDeserialized(getASTContext(), 0);
cxx_conversion_decl->setDeclContext(cxx_record_decl);
cxx_conversion_decl->setDeclName(
getASTContext().DeclarationNames.getCXXConversionFunctionName(
getASTContext().getCanonicalType(
function_type->getReturnType())));
cxx_conversion_decl->setType(method_qual_type);
cxx_conversion_decl->setInlineSpecified(is_inline);
cxx_conversion_decl->setExplicitSpecifier(explicit_spec);
cxx_conversion_decl->setConstexprKind(ConstexprSpecKind::Unspecified);
cxx_method_decl = cxx_conversion_decl;
}
}
if (cxx_method_decl == nullptr) {
cxx_method_decl =
clang::CXXMethodDecl::CreateDeserialized(getASTContext(), 0);
cxx_method_decl->setDeclContext(cxx_record_decl);
cxx_method_decl->setDeclName(decl_name);
cxx_method_decl->setType(method_qual_type);
cxx_method_decl->setInlineSpecified(is_inline);
cxx_method_decl->setStorageClass(SC);
cxx_method_decl->setConstexprKind(ConstexprSpecKind::Unspecified);
}
}
SetMemberOwningModule(cxx_method_decl, cxx_record_decl);
clang::AccessSpecifier access_specifier =
TypeSystemClang::ConvertAccessTypeToAccessSpecifier(access);
cxx_method_decl->setAccess(access_specifier);
cxx_method_decl->setVirtualAsWritten(is_virtual);
if (is_attr_used)
cxx_method_decl->addAttr(clang::UsedAttr::CreateImplicit(getASTContext()));
if (mangled_name != nullptr) {
cxx_method_decl->addAttr(clang::AsmLabelAttr::CreateImplicit(
getASTContext(), mangled_name, /*literal=*/false));
}
// Populate the method decl with parameter decls
llvm::SmallVector<clang::ParmVarDecl *, 12> params;
for (unsigned param_index = 0; param_index < num_params; ++param_index) {
params.push_back(clang::ParmVarDecl::Create(
getASTContext(), cxx_method_decl, clang::SourceLocation(),
clang::SourceLocation(),
nullptr, // anonymous
method_function_prototype->getParamType(param_index), nullptr,
clang::SC_None, nullptr));
}
cxx_method_decl->setParams(llvm::ArrayRef<clang::ParmVarDecl *>(params));
AddAccessSpecifierDecl(cxx_record_decl, getASTContext(),
GetCXXRecordDeclAccess(cxx_record_decl),
access_specifier);
SetCXXRecordDeclAccess(cxx_record_decl, access_specifier);
cxx_record_decl->addDecl(cxx_method_decl);
// Sometimes the debug info will mention a constructor (default/copy/move),
// destructor, or assignment operator (copy/move) but there won't be any
// version of this in the code. So we check if the function was artificially
// generated and if it is trivial and this lets the compiler/backend know
// that it can inline the IR for these when it needs to and we can avoid a
// "missing function" error when running expressions.
if (is_artificial) {
if (cxx_ctor_decl && ((cxx_ctor_decl->isDefaultConstructor() &&
cxx_record_decl->hasTrivialDefaultConstructor()) ||
(cxx_ctor_decl->isCopyConstructor() &&
cxx_record_decl->hasTrivialCopyConstructor()) ||
(cxx_ctor_decl->isMoveConstructor() &&
cxx_record_decl->hasTrivialMoveConstructor()))) {
cxx_ctor_decl->setDefaulted();
cxx_ctor_decl->setTrivial(true);
} else if (cxx_dtor_decl) {
if (cxx_record_decl->hasTrivialDestructor()) {
cxx_dtor_decl->setDefaulted();
cxx_dtor_decl->setTrivial(true);
}
} else if ((cxx_method_decl->isCopyAssignmentOperator() &&
cxx_record_decl->hasTrivialCopyAssignment()) ||
(cxx_method_decl->isMoveAssignmentOperator() &&
cxx_record_decl->hasTrivialMoveAssignment())) {
cxx_method_decl->setDefaulted();
cxx_method_decl->setTrivial(true);
}
}
VerifyDecl(cxx_method_decl);
return cxx_method_decl;
}
void TypeSystemClang::AddMethodOverridesForCXXRecordType(
lldb::opaque_compiler_type_t type) {
if (auto *record = GetAsCXXRecordDecl(type))
for (auto *method : record->methods())
addOverridesForMethod(method);
}
#pragma mark C++ Base Classes
std::unique_ptr<clang::CXXBaseSpecifier>
TypeSystemClang::CreateBaseClassSpecifier(lldb::opaque_compiler_type_t type,
AccessType access, bool is_virtual,
bool base_of_class) {
if (!type)
return nullptr;
return std::make_unique<clang::CXXBaseSpecifier>(
clang::SourceRange(), is_virtual, base_of_class,
TypeSystemClang::ConvertAccessTypeToAccessSpecifier(access),
getASTContext().getTrivialTypeSourceInfo(GetQualType(type)),
clang::SourceLocation());
}
bool TypeSystemClang::TransferBaseClasses(
lldb::opaque_compiler_type_t type,
std::vector<std::unique_ptr<clang::CXXBaseSpecifier>> bases) {
if (!type)
return false;
clang::CXXRecordDecl *cxx_record_decl = GetAsCXXRecordDecl(type);
if (!cxx_record_decl)
return false;
std::vector<clang::CXXBaseSpecifier *> raw_bases;
raw_bases.reserve(bases.size());
// Clang will make a copy of them, so it's ok that we pass pointers that we're
// about to destroy.
for (auto &b : bases)
raw_bases.push_back(b.get());
cxx_record_decl->setBases(raw_bases.data(), raw_bases.size());
return true;
}
bool TypeSystemClang::SetObjCSuperClass(
const CompilerType &type, const CompilerType &superclass_clang_type) {
TypeSystemClang *ast =
llvm::dyn_cast_or_null<TypeSystemClang>(type.GetTypeSystem());
if (!ast)
return false;
clang::ASTContext &clang_ast = ast->getASTContext();
if (type && superclass_clang_type.IsValid() &&
superclass_clang_type.GetTypeSystem() == type.GetTypeSystem()) {
clang::ObjCInterfaceDecl *class_interface_decl =
GetAsObjCInterfaceDecl(type);
clang::ObjCInterfaceDecl *super_interface_decl =
GetAsObjCInterfaceDecl(superclass_clang_type);
if (class_interface_decl && super_interface_decl) {
class_interface_decl->setSuperClass(clang_ast.getTrivialTypeSourceInfo(
clang_ast.getObjCInterfaceType(super_interface_decl)));
return true;
}
}
return false;
}
bool TypeSystemClang::AddObjCClassProperty(
const CompilerType &type, const char *property_name,
const CompilerType &property_clang_type, clang::ObjCIvarDecl *ivar_decl,
const char *property_setter_name, const char *property_getter_name,
uint32_t property_attributes, ClangASTMetadata *metadata) {
if (!type || !property_clang_type.IsValid() || property_name == nullptr ||
property_name[0] == '\0')
return false;
TypeSystemClang *ast = llvm::dyn_cast<TypeSystemClang>(type.GetTypeSystem());
if (!ast)
return false;
clang::ASTContext &clang_ast = ast->getASTContext();
clang::ObjCInterfaceDecl *class_interface_decl = GetAsObjCInterfaceDecl(type);
if (!class_interface_decl)
return false;
CompilerType property_clang_type_to_access;
if (property_clang_type.IsValid())
property_clang_type_to_access = property_clang_type;
else if (ivar_decl)
property_clang_type_to_access = ast->GetType(ivar_decl->getType());
if (!class_interface_decl || !property_clang_type_to_access.IsValid())
return false;
clang::TypeSourceInfo *prop_type_source;
if (ivar_decl)
prop_type_source = clang_ast.getTrivialTypeSourceInfo(ivar_decl->getType());
else
prop_type_source = clang_ast.getTrivialTypeSourceInfo(
ClangUtil::GetQualType(property_clang_type));
clang::ObjCPropertyDecl *property_decl =
clang::ObjCPropertyDecl::CreateDeserialized(clang_ast, 0);
property_decl->setDeclContext(class_interface_decl);
property_decl->setDeclName(&clang_ast.Idents.get(property_name));
property_decl->setType(ivar_decl
? ivar_decl->getType()
: ClangUtil::GetQualType(property_clang_type),
prop_type_source);
SetMemberOwningModule(property_decl, class_interface_decl);
if (!property_decl)
return false;
if (metadata)
ast->SetMetadata(property_decl, *metadata);
class_interface_decl->addDecl(property_decl);
clang::Selector setter_sel, getter_sel;
if (property_setter_name) {
std::string property_setter_no_colon(property_setter_name,
strlen(property_setter_name) - 1);
clang::IdentifierInfo *setter_ident =
&clang_ast.Idents.get(property_setter_no_colon);
setter_sel = clang_ast.Selectors.getSelector(1, &setter_ident);
} else if (!(property_attributes & DW_APPLE_PROPERTY_readonly)) {
std::string setter_sel_string("set");
setter_sel_string.push_back(::toupper(property_name[0]));
setter_sel_string.append(&property_name[1]);
clang::IdentifierInfo *setter_ident =
&clang_ast.Idents.get(setter_sel_string);
setter_sel = clang_ast.Selectors.getSelector(1, &setter_ident);
}
property_decl->setSetterName(setter_sel);
property_decl->setPropertyAttributes(ObjCPropertyAttribute::kind_setter);
if (property_getter_name != nullptr) {
clang::IdentifierInfo *getter_ident =
&clang_ast.Idents.get(property_getter_name);
getter_sel = clang_ast.Selectors.getSelector(0, &getter_ident);
} else {
clang::IdentifierInfo *getter_ident = &clang_ast.Idents.get(property_name);
getter_sel = clang_ast.Selectors.getSelector(0, &getter_ident);
}
property_decl->setGetterName(getter_sel);
property_decl->setPropertyAttributes(ObjCPropertyAttribute::kind_getter);
if (ivar_decl)
property_decl->setPropertyIvarDecl(ivar_decl);
if (property_attributes & DW_APPLE_PROPERTY_readonly)
property_decl->setPropertyAttributes(ObjCPropertyAttribute::kind_readonly);
if (property_attributes & DW_APPLE_PROPERTY_readwrite)
property_decl->setPropertyAttributes(ObjCPropertyAttribute::kind_readwrite);
if (property_attributes & DW_APPLE_PROPERTY_assign)
property_decl->setPropertyAttributes(ObjCPropertyAttribute::kind_assign);
if (property_attributes & DW_APPLE_PROPERTY_retain)
property_decl->setPropertyAttributes(ObjCPropertyAttribute::kind_retain);
if (property_attributes & DW_APPLE_PROPERTY_copy)
property_decl->setPropertyAttributes(ObjCPropertyAttribute::kind_copy);
if (property_attributes & DW_APPLE_PROPERTY_nonatomic)
property_decl->setPropertyAttributes(ObjCPropertyAttribute::kind_nonatomic);
if (property_attributes & ObjCPropertyAttribute::kind_nullability)
property_decl->setPropertyAttributes(
ObjCPropertyAttribute::kind_nullability);
if (property_attributes & ObjCPropertyAttribute::kind_null_resettable)
property_decl->setPropertyAttributes(
ObjCPropertyAttribute::kind_null_resettable);
if (property_attributes & ObjCPropertyAttribute::kind_class)
property_decl->setPropertyAttributes(ObjCPropertyAttribute::kind_class);
const bool isInstance =
(property_attributes & ObjCPropertyAttribute::kind_class) == 0;
clang::ObjCMethodDecl *getter = nullptr;
if (!getter_sel.isNull())
getter = isInstance ? class_interface_decl->lookupInstanceMethod(getter_sel)
: class_interface_decl->lookupClassMethod(getter_sel);
if (!getter_sel.isNull() && !getter) {
const bool isVariadic = false;
const bool isPropertyAccessor = true;
const bool isSynthesizedAccessorStub = false;
const bool isImplicitlyDeclared = true;
const bool isDefined = false;
const clang::ObjCMethodDecl::ImplementationControl impControl =
clang::ObjCMethodDecl::None;
const bool HasRelatedResultType = false;
getter = clang::ObjCMethodDecl::CreateDeserialized(clang_ast, 0);
getter->setDeclName(getter_sel);
getter->setReturnType(ClangUtil::GetQualType(property_clang_type_to_access));
getter->setDeclContext(class_interface_decl);
getter->setInstanceMethod(isInstance);
getter->setVariadic(isVariadic);
getter->setPropertyAccessor(isPropertyAccessor);
getter->setSynthesizedAccessorStub(isSynthesizedAccessorStub);
getter->setImplicit(isImplicitlyDeclared);
getter->setDefined(isDefined);
getter->setDeclImplementation(impControl);
getter->setRelatedResultType(HasRelatedResultType);
SetMemberOwningModule(getter, class_interface_decl);
if (getter) {
if (metadata)
ast->SetMetadata(getter, *metadata);
getter->setMethodParams(clang_ast, llvm::ArrayRef<clang::ParmVarDecl *>(),
llvm::ArrayRef<clang::SourceLocation>());
class_interface_decl->addDecl(getter);
}
}
if (getter) {
getter->setPropertyAccessor(true);
property_decl->setGetterMethodDecl(getter);
}
clang::ObjCMethodDecl *setter = nullptr;
setter = isInstance ? class_interface_decl->lookupInstanceMethod(setter_sel)
: class_interface_decl->lookupClassMethod(setter_sel);
if (!setter_sel.isNull() && !setter) {
clang::QualType result_type = clang_ast.VoidTy;
const bool isVariadic = false;
const bool isPropertyAccessor = true;
const bool isSynthesizedAccessorStub = false;
const bool isImplicitlyDeclared = true;
const bool isDefined = false;
const clang::ObjCMethodDecl::ImplementationControl impControl =
clang::ObjCMethodDecl::None;
const bool HasRelatedResultType = false;
setter = clang::ObjCMethodDecl::CreateDeserialized(clang_ast, 0);
setter->setDeclName(setter_sel);
setter->setReturnType(result_type);
setter->setDeclContext(class_interface_decl);
setter->setInstanceMethod(isInstance);
setter->setVariadic(isVariadic);
setter->setPropertyAccessor(isPropertyAccessor);
setter->setSynthesizedAccessorStub(isSynthesizedAccessorStub);
setter->setImplicit(isImplicitlyDeclared);
setter->setDefined(isDefined);
setter->setDeclImplementation(impControl);
setter->setRelatedResultType(HasRelatedResultType);
SetMemberOwningModule(setter, class_interface_decl);
if (setter) {
if (metadata)
ast->SetMetadata(setter, *metadata);
llvm::SmallVector<clang::ParmVarDecl *, 1> params;
params.push_back(clang::ParmVarDecl::Create(
clang_ast, setter, clang::SourceLocation(), clang::SourceLocation(),
nullptr, // anonymous
ClangUtil::GetQualType(property_clang_type_to_access), nullptr,
clang::SC_Auto, nullptr));
setter->setMethodParams(clang_ast,
llvm::ArrayRef<clang::ParmVarDecl *>(params),
llvm::ArrayRef<clang::SourceLocation>());
class_interface_decl->addDecl(setter);
}
}
if (setter) {
setter->setPropertyAccessor(true);
property_decl->setSetterMethodDecl(setter);
}
return true;
}
bool TypeSystemClang::IsObjCClassTypeAndHasIVars(const CompilerType &type,
bool check_superclass) {
clang::ObjCInterfaceDecl *class_interface_decl = GetAsObjCInterfaceDecl(type);
if (class_interface_decl)
return ObjCDeclHasIVars(class_interface_decl, check_superclass);
return false;
}
clang::ObjCMethodDecl *TypeSystemClang::AddMethodToObjCObjectType(
const CompilerType &type,
const char *name, // the full symbol name as seen in the symbol table
// (lldb::opaque_compiler_type_t type, "-[NString
// stringWithCString:]")
const CompilerType &method_clang_type, lldb::AccessType access,
bool is_artificial, bool is_variadic, bool is_objc_direct_call) {
if (!type || !method_clang_type.IsValid())
return nullptr;
clang::ObjCInterfaceDecl *class_interface_decl = GetAsObjCInterfaceDecl(type);
if (class_interface_decl == nullptr)
return nullptr;
TypeSystemClang *lldb_ast =
llvm::dyn_cast<TypeSystemClang>(type.GetTypeSystem());
if (lldb_ast == nullptr)
return nullptr;
clang::ASTContext &ast = lldb_ast->getASTContext();
const char *selector_start = ::strchr(name, ' ');
if (selector_start == nullptr)
return nullptr;
selector_start++;
llvm::SmallVector<clang::IdentifierInfo *, 12> selector_idents;
size_t len = 0;
const char *start;
unsigned num_selectors_with_args = 0;
for (start = selector_start; start && *start != '\0' && *start != ']';
start += len) {
len = ::strcspn(start, ":]");
bool has_arg = (start[len] == ':');
if (has_arg)
++num_selectors_with_args;
selector_idents.push_back(&ast.Idents.get(llvm::StringRef(start, len)));
if (has_arg)
len += 1;
}
if (selector_idents.size() == 0)
return nullptr;
clang::Selector method_selector = ast.Selectors.getSelector(
num_selectors_with_args ? selector_idents.size() : 0,
selector_idents.data());
clang::QualType method_qual_type(ClangUtil::GetQualType(method_clang_type));
// Populate the method decl with parameter decls
const clang::Type *method_type(method_qual_type.getTypePtr());
if (method_type == nullptr)
return nullptr;
const clang::FunctionProtoType *method_function_prototype(
llvm::dyn_cast<clang::FunctionProtoType>(method_type));
if (!method_function_prototype)
return nullptr;
const bool isInstance = (name[0] == '-');
const bool isVariadic = is_variadic;
const bool isPropertyAccessor = false;
const bool isSynthesizedAccessorStub = false;
/// Force this to true because we don't have source locations.
const bool isImplicitlyDeclared = true;
const bool isDefined = false;
const clang::ObjCMethodDecl::ImplementationControl impControl =
clang::ObjCMethodDecl::None;
const bool HasRelatedResultType = false;
const unsigned num_args = method_function_prototype->getNumParams();
if (num_args != num_selectors_with_args)
return nullptr; // some debug information is corrupt. We are not going to
// deal with it.
auto *objc_method_decl = clang::ObjCMethodDecl::CreateDeserialized(ast, 0);
objc_method_decl->setDeclName(method_selector);
objc_method_decl->setReturnType(method_function_prototype->getReturnType());
objc_method_decl->setDeclContext(
lldb_ast->GetDeclContextForType(ClangUtil::GetQualType(type)));
objc_method_decl->setInstanceMethod(isInstance);
objc_method_decl->setVariadic(isVariadic);
objc_method_decl->setPropertyAccessor(isPropertyAccessor);
objc_method_decl->setSynthesizedAccessorStub(isSynthesizedAccessorStub);
objc_method_decl->setImplicit(isImplicitlyDeclared);
objc_method_decl->setDefined(isDefined);
objc_method_decl->setDeclImplementation(impControl);
objc_method_decl->setRelatedResultType(HasRelatedResultType);
SetMemberOwningModule(objc_method_decl, class_interface_decl);
if (objc_method_decl == nullptr)
return nullptr;
if (num_args > 0) {
llvm::SmallVector<clang::ParmVarDecl *, 12> params;
for (unsigned param_index = 0; param_index < num_args; ++param_index) {
params.push_back(clang::ParmVarDecl::Create(
ast, objc_method_decl, clang::SourceLocation(),
clang::SourceLocation(),
nullptr, // anonymous
method_function_prototype->getParamType(param_index), nullptr,
clang::SC_Auto, nullptr));
}
objc_method_decl->setMethodParams(
ast, llvm::ArrayRef<clang::ParmVarDecl *>(params),
llvm::ArrayRef<clang::SourceLocation>());
}
if (is_objc_direct_call) {
// Add a the objc_direct attribute to the declaration we generate that
// we generate a direct method call for this ObjCMethodDecl.
objc_method_decl->addAttr(
clang::ObjCDirectAttr::CreateImplicit(ast, SourceLocation()));
// Usually Sema is creating implicit parameters (e.g., self) when it
// parses the method. We don't have a parsing Sema when we build our own
// AST here so we manually need to create these implicit parameters to
// make the direct call code generation happy.
objc_method_decl->createImplicitParams(ast, class_interface_decl);
}
class_interface_decl->addDecl(objc_method_decl);
VerifyDecl(objc_method_decl);
return objc_method_decl;
}
bool TypeSystemClang::SetHasExternalStorage(lldb::opaque_compiler_type_t type,
bool has_extern) {
if (!type)
return false;
clang::QualType qual_type(RemoveWrappingTypes(GetCanonicalQualType(type)));
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
switch (type_class) {
case clang::Type::Record: {
clang::CXXRecordDecl *cxx_record_decl = qual_type->getAsCXXRecordDecl();
if (cxx_record_decl) {
cxx_record_decl->setHasExternalLexicalStorage(has_extern);
cxx_record_decl->setHasExternalVisibleStorage(has_extern);
return true;
}
} break;
case clang::Type::Enum: {
clang::EnumDecl *enum_decl =
llvm::cast<clang::EnumType>(qual_type)->getDecl();
if (enum_decl) {
enum_decl->setHasExternalLexicalStorage(has_extern);
enum_decl->setHasExternalVisibleStorage(has_extern);
return true;
}
} break;
case clang::Type::ObjCObject:
case clang::Type::ObjCInterface: {
const clang::ObjCObjectType *objc_class_type =
llvm::dyn_cast<clang::ObjCObjectType>(qual_type.getTypePtr());
assert(objc_class_type);
if (objc_class_type) {
clang::ObjCInterfaceDecl *class_interface_decl =
objc_class_type->getInterface();
if (class_interface_decl) {
class_interface_decl->setHasExternalLexicalStorage(has_extern);
class_interface_decl->setHasExternalVisibleStorage(has_extern);
return true;
}
}
} break;
default:
break;
}
return false;
}
#pragma mark TagDecl
bool TypeSystemClang::StartTagDeclarationDefinition(const CompilerType &type) {
clang::QualType qual_type(ClangUtil::GetQualType(type));
if (!qual_type.isNull()) {
const clang::TagType *tag_type = qual_type->getAs<clang::TagType>();
if (tag_type) {
clang::TagDecl *tag_decl = tag_type->getDecl();
if (tag_decl) {
tag_decl->startDefinition();
return true;
}
}
const clang::ObjCObjectType *object_type =
qual_type->getAs<clang::ObjCObjectType>();
if (object_type) {
clang::ObjCInterfaceDecl *interface_decl = object_type->getInterface();
if (interface_decl) {
interface_decl->startDefinition();
return true;
}
}
}
return false;
}
bool TypeSystemClang::CompleteTagDeclarationDefinition(
const CompilerType &type) {
clang::QualType qual_type(ClangUtil::GetQualType(type));
if (qual_type.isNull())
return false;
TypeSystemClang *lldb_ast =
llvm::dyn_cast<TypeSystemClang>(type.GetTypeSystem());
if (lldb_ast == nullptr)
return false;
// Make sure we use the same methodology as
// TypeSystemClang::StartTagDeclarationDefinition() as to how we start/end
// the definition.
const clang::TagType *tag_type = qual_type->getAs<clang::TagType>();
if (tag_type) {
clang::TagDecl *tag_decl = tag_type->getDecl();
if (auto *cxx_record_decl = llvm::dyn_cast<CXXRecordDecl>(tag_decl)) {
// If we have a move constructor declared but no copy constructor we
// need to explicitly mark it as deleted. Usually Sema would do this for
// us in Sema::DeclareImplicitCopyConstructor but we don't have a Sema
// when building an AST from debug information.
// See also:
// C++11 [class.copy]p7, p18:
// If the class definition declares a move constructor or move assignment
// operator, an implicitly declared copy constructor or copy assignment
// operator is defined as deleted.
if (cxx_record_decl->hasUserDeclaredMoveConstructor() ||
cxx_record_decl->hasUserDeclaredMoveAssignment()) {
if (cxx_record_decl->needsImplicitCopyConstructor())
cxx_record_decl->setImplicitCopyConstructorIsDeleted();
if (cxx_record_decl->needsImplicitCopyAssignment())
cxx_record_decl->setImplicitCopyAssignmentIsDeleted();
}
if (!cxx_record_decl->isCompleteDefinition())
cxx_record_decl->completeDefinition();
cxx_record_decl->setHasLoadedFieldsFromExternalStorage(true);
cxx_record_decl->setHasExternalLexicalStorage(false);
cxx_record_decl->setHasExternalVisibleStorage(false);
lldb_ast->SetCXXRecordDeclAccess(cxx_record_decl,
clang::AccessSpecifier::AS_none);
return true;
}
}
const clang::EnumType *enutype = qual_type->getAs<clang::EnumType>();
if (!enutype)
return false;
clang::EnumDecl *enum_decl = enutype->getDecl();
if (enum_decl->isCompleteDefinition())
return true;
clang::ASTContext &ast = lldb_ast->getASTContext();
/// TODO This really needs to be fixed.
QualType integer_type(enum_decl->getIntegerType());
if (!integer_type.isNull()) {
unsigned NumPositiveBits = 1;
unsigned NumNegativeBits = 0;
clang::QualType promotion_qual_type;
// If the enum integer type is less than an integer in bit width,
// then we must promote it to an integer size.
if (ast.getTypeSize(enum_decl->getIntegerType()) <
ast.getTypeSize(ast.IntTy)) {
if (enum_decl->getIntegerType()->isSignedIntegerType())
promotion_qual_type = ast.IntTy;
else
promotion_qual_type = ast.UnsignedIntTy;
} else
promotion_qual_type = enum_decl->getIntegerType();
enum_decl->completeDefinition(enum_decl->getIntegerType(),
promotion_qual_type, NumPositiveBits,
NumNegativeBits);
}
return true;
}
clang::EnumConstantDecl *TypeSystemClang::AddEnumerationValueToEnumerationType(
const CompilerType &enum_type, const Declaration &decl, const char *name,
const llvm::APSInt &value) {
if (!enum_type || ConstString(name).IsEmpty())
return nullptr;
lldbassert(enum_type.GetTypeSystem() == static_cast<TypeSystem *>(this));
lldb::opaque_compiler_type_t enum_opaque_compiler_type =
enum_type.GetOpaqueQualType();
if (!enum_opaque_compiler_type)
return nullptr;
clang::QualType enum_qual_type(
GetCanonicalQualType(enum_opaque_compiler_type));
const clang::Type *clang_type = enum_qual_type.getTypePtr();
if (!clang_type)
return nullptr;
const clang::EnumType *enutype = llvm::dyn_cast<clang::EnumType>(clang_type);
if (!enutype)
return nullptr;
clang::EnumConstantDecl *enumerator_decl =
clang::EnumConstantDecl::CreateDeserialized(getASTContext(), 0);
enumerator_decl->setDeclContext(enutype->getDecl());
if (name && name[0])
enumerator_decl->setDeclName(&getASTContext().Idents.get(name));
enumerator_decl->setType(clang::QualType(enutype, 0));
enumerator_decl->setInitVal(value);
SetMemberOwningModule(enumerator_decl, enutype->getDecl());
if (!enumerator_decl)
return nullptr;
enutype->getDecl()->addDecl(enumerator_decl);
VerifyDecl(enumerator_decl);
return enumerator_decl;
}
clang::EnumConstantDecl *TypeSystemClang::AddEnumerationValueToEnumerationType(
const CompilerType &enum_type, const Declaration &decl, const char *name,
int64_t enum_value, uint32_t enum_value_bit_size) {
CompilerType underlying_type = GetEnumerationIntegerType(enum_type);
bool is_signed = false;
underlying_type.IsIntegerType(is_signed);
llvm::APSInt value(enum_value_bit_size, is_signed);
value = enum_value;
return AddEnumerationValueToEnumerationType(enum_type, decl, name, value);
}
CompilerType TypeSystemClang::GetEnumerationIntegerType(CompilerType type) {
clang::QualType qt(ClangUtil::GetQualType(type));
const clang::Type *clang_type = qt.getTypePtrOrNull();
const auto *enum_type = llvm::dyn_cast_or_null<clang::EnumType>(clang_type);
if (!enum_type)
return CompilerType();
return GetType(enum_type->getDecl()->getIntegerType());
}
CompilerType
TypeSystemClang::CreateMemberPointerType(const CompilerType &type,
const CompilerType &pointee_type) {
if (type && pointee_type.IsValid() &&
type.GetTypeSystem() == pointee_type.GetTypeSystem()) {
TypeSystemClang *ast =
llvm::dyn_cast<TypeSystemClang>(type.GetTypeSystem());
if (!ast)
return CompilerType();
return ast->GetType(ast->getASTContext().getMemberPointerType(
ClangUtil::GetQualType(pointee_type),
ClangUtil::GetQualType(type).getTypePtr()));
}
return CompilerType();
}
// Dumping types
#define DEPTH_INCREMENT 2
#ifndef NDEBUG
LLVM_DUMP_METHOD void
TypeSystemClang::dump(lldb::opaque_compiler_type_t type) const {
if (!type)
return;
clang::QualType qual_type(GetQualType(type));
qual_type.dump();
}
#endif
void TypeSystemClang::Dump(llvm::raw_ostream &output) {
GetTranslationUnitDecl()->dump(output);
}
void TypeSystemClang::DumpFromSymbolFile(Stream &s,
llvm::StringRef symbol_name) {
SymbolFile *symfile = GetSymbolFile();
if (!symfile)
return;
lldb_private::TypeList type_list;
symfile->GetTypes(nullptr, eTypeClassAny, type_list);
size_t ntypes = type_list.GetSize();
for (size_t i = 0; i < ntypes; ++i) {
TypeSP type = type_list.GetTypeAtIndex(i);
if (!symbol_name.empty())
if (symbol_name != type->GetName().GetStringRef())
continue;
s << type->GetName().AsCString() << "\n";
CompilerType full_type = type->GetFullCompilerType();
if (clang::TagDecl *tag_decl = GetAsTagDecl(full_type)) {
tag_decl->dump(s.AsRawOstream());
continue;
}
if (clang::TypedefNameDecl *typedef_decl = GetAsTypedefDecl(full_type)) {
typedef_decl->dump(s.AsRawOstream());
continue;
}
if (auto *objc_obj = llvm::dyn_cast<clang::ObjCObjectType>(
ClangUtil::GetQualType(full_type).getTypePtr())) {
if (clang::ObjCInterfaceDecl *interface_decl = objc_obj->getInterface()) {
interface_decl->dump(s.AsRawOstream());
continue;
}
}
GetCanonicalQualType(full_type.GetOpaqueQualType())
.dump(s.AsRawOstream(), getASTContext());
}
}
void TypeSystemClang::DumpValue(
lldb::opaque_compiler_type_t type, ExecutionContext *exe_ctx, Stream *s,
lldb::Format format, const lldb_private::DataExtractor &data,
lldb::offset_t data_byte_offset, size_t data_byte_size,
uint32_t bitfield_bit_size, uint32_t bitfield_bit_offset, bool show_types,
bool show_summary, bool verbose, uint32_t depth) {
if (!type)
return;
clang::QualType qual_type(GetQualType(type));
switch (qual_type->getTypeClass()) {
case clang::Type::Record:
if (GetCompleteType(type)) {
const clang::RecordType *record_type =
llvm::cast<clang::RecordType>(qual_type.getTypePtr());
const clang::RecordDecl *record_decl = record_type->getDecl();
assert(record_decl);
uint32_t field_bit_offset = 0;
uint32_t field_byte_offset = 0;
const clang::ASTRecordLayout &record_layout =
getASTContext().getASTRecordLayout(record_decl);
uint32_t child_idx = 0;
const clang::CXXRecordDecl *cxx_record_decl =
llvm::dyn_cast<clang::CXXRecordDecl>(record_decl);
if (cxx_record_decl) {
// We might have base classes to print out first
clang::CXXRecordDecl::base_class_const_iterator base_class,
base_class_end;
for (base_class = cxx_record_decl->bases_begin(),
base_class_end = cxx_record_decl->bases_end();
base_class != base_class_end; ++base_class) {
const clang::CXXRecordDecl *base_class_decl =
llvm::cast<clang::CXXRecordDecl>(
base_class->getType()->getAs<clang::RecordType>()->getDecl());
// Skip empty base classes
if (!verbose && !TypeSystemClang::RecordHasFields(base_class_decl))
continue;
if (base_class->isVirtual())
field_bit_offset =
record_layout.getVBaseClassOffset(base_class_decl)
.getQuantity() *
8;
else
field_bit_offset = record_layout.getBaseClassOffset(base_class_decl)
.getQuantity() *
8;
field_byte_offset = field_bit_offset / 8;
assert(field_bit_offset % 8 == 0);
if (child_idx == 0)
s->PutChar('{');
else
s->PutChar(',');
clang::QualType base_class_qual_type = base_class->getType();
std::string base_class_type_name(base_class_qual_type.getAsString());
// Indent and print the base class type name
s->Format("\n{0}{1}", llvm::fmt_repeat(" ", depth + DEPTH_INCREMENT),
base_class_type_name);
clang::TypeInfo base_class_type_info =
getASTContext().getTypeInfo(base_class_qual_type);
// Dump the value of the member
CompilerType base_clang_type = GetType(base_class_qual_type);
base_clang_type.DumpValue(
exe_ctx,
s, // Stream to dump to
base_clang_type
.GetFormat(), // The format with which to display the member
data, // Data buffer containing all bytes for this type
data_byte_offset + field_byte_offset, // Offset into "data" where
// to grab value from
base_class_type_info.Width / 8, // Size of this type in bytes
0, // Bitfield bit size
0, // Bitfield bit offset
show_types, // Boolean indicating if we should show the variable
// types
show_summary, // Boolean indicating if we should show a summary
// for the current type
verbose, // Verbose output?
depth + DEPTH_INCREMENT); // Scope depth for any types that have
// children
++child_idx;
}
}
uint32_t field_idx = 0;
clang::RecordDecl::field_iterator field, field_end;
for (field = record_decl->field_begin(),
field_end = record_decl->field_end();
field != field_end; ++field, ++field_idx, ++child_idx) {
// Print the starting squiggly bracket (if this is the first member) or
// comma (for member 2 and beyond) for the struct/union/class member.
if (child_idx == 0)
s->PutChar('{');
else
s->PutChar(',');
// Indent
s->Printf("\n%*s", depth + DEPTH_INCREMENT, "");
clang::QualType field_type = field->getType();
// Print the member type if requested
// Figure out the type byte size (field_type_info.first) and alignment
// (field_type_info.second) from the AST context.
clang::TypeInfo field_type_info =
getASTContext().getTypeInfo(field_type);
assert(field_idx < record_layout.getFieldCount());
// Figure out the field offset within the current struct/union/class
// type
field_bit_offset = record_layout.getFieldOffset(field_idx);
field_byte_offset = field_bit_offset / 8;
uint32_t field_bitfield_bit_size = 0;
uint32_t field_bitfield_bit_offset = 0;
if (FieldIsBitfield(*field, field_bitfield_bit_size))
field_bitfield_bit_offset = field_bit_offset % 8;
if (show_types) {
std::string field_type_name(field_type.getAsString());
if (field_bitfield_bit_size > 0)
s->Printf("(%s:%u) ", field_type_name.c_str(),
field_bitfield_bit_size);
else
s->Printf("(%s) ", field_type_name.c_str());
}
// Print the member name and equal sign
s->Printf("%s = ", field->getNameAsString().c_str());
// Dump the value of the member
CompilerType field_clang_type = GetType(field_type);
field_clang_type.DumpValue(
exe_ctx,
s, // Stream to dump to
field_clang_type
.GetFormat(), // The format with which to display the member
data, // Data buffer containing all bytes for this type
data_byte_offset + field_byte_offset, // Offset into "data" where to
// grab value from
field_type_info.Width / 8, // Size of this type in bytes
field_bitfield_bit_size, // Bitfield bit size
field_bitfield_bit_offset, // Bitfield bit offset
show_types, // Boolean indicating if we should show the variable
// types
show_summary, // Boolean indicating if we should show a summary for
// the current type
verbose, // Verbose output?
depth + DEPTH_INCREMENT); // Scope depth for any types that have
// children
}
// Indent the trailing squiggly bracket
if (child_idx > 0)
s->Printf("\n%*s}", depth, "");
}
return;
case clang::Type::Enum:
if (GetCompleteType(type)) {
const clang::EnumType *enutype =
llvm::cast<clang::EnumType>(qual_type.getTypePtr());
const clang::EnumDecl *enum_decl = enutype->getDecl();
assert(enum_decl);
clang::EnumDecl::enumerator_iterator enum_pos, enum_end_pos;
lldb::offset_t offset = data_byte_offset;
const int64_t enum_value = data.GetMaxU64Bitfield(
&offset, data_byte_size, bitfield_bit_size, bitfield_bit_offset);
for (enum_pos = enum_decl->enumerator_begin(),
enum_end_pos = enum_decl->enumerator_end();
enum_pos != enum_end_pos; ++enum_pos) {
if (enum_pos->getInitVal() == enum_value) {
s->Printf("%s", enum_pos->getNameAsString().c_str());
return;
}
}
// If we have gotten here we didn't get find the enumerator in the enum
// decl, so just print the integer.
s->Printf("%" PRIi64, enum_value);
}
return;
case clang::Type::ConstantArray: {
const clang::ConstantArrayType *array =
llvm::cast<clang::ConstantArrayType>(qual_type.getTypePtr());
bool is_array_of_characters = false;
clang::QualType element_qual_type = array->getElementType();
const clang::Type *canonical_type =
element_qual_type->getCanonicalTypeInternal().getTypePtr();
if (canonical_type)
is_array_of_characters = canonical_type->isCharType();
const uint64_t element_count = array->getSize().getLimitedValue();
clang::TypeInfo field_type_info =
getASTContext().getTypeInfo(element_qual_type);
uint32_t element_idx = 0;
uint32_t element_offset = 0;
uint64_t element_byte_size = field_type_info.Width / 8;
uint32_t element_stride = element_byte_size;
if (is_array_of_characters) {
s->PutChar('"');
DumpDataExtractor(data, s, data_byte_offset, lldb::eFormatChar,
element_byte_size, element_count, UINT32_MAX,
LLDB_INVALID_ADDRESS, 0, 0);
s->PutChar('"');
return;
} else {
CompilerType element_clang_type = GetType(element_qual_type);
lldb::Format element_format = element_clang_type.GetFormat();
for (element_idx = 0; element_idx < element_count; ++element_idx) {
// Print the starting squiggly bracket (if this is the first member) or
// comman (for member 2 and beyong) for the struct/union/class member.
if (element_idx == 0)
s->PutChar('{');
else
s->PutChar(',');
// Indent and print the index
s->Printf("\n%*s[%u] ", depth + DEPTH_INCREMENT, "", element_idx);
// Figure out the field offset within the current struct/union/class
// type
element_offset = element_idx * element_stride;
// Dump the value of the member
element_clang_type.DumpValue(
exe_ctx,
s, // Stream to dump to
element_format, // The format with which to display the element
data, // Data buffer containing all bytes for this type
data_byte_offset +
element_offset, // Offset into "data" where to grab value from
element_byte_size, // Size of this type in bytes
0, // Bitfield bit size
0, // Bitfield bit offset
show_types, // Boolean indicating if we should show the variable
// types
show_summary, // Boolean indicating if we should show a summary for
// the current type
verbose, // Verbose output?
depth + DEPTH_INCREMENT); // Scope depth for any types that have
// children
}
// Indent the trailing squiggly bracket
if (element_idx > 0)
s->Printf("\n%*s}", depth, "");
}
}
return;
case clang::Type::Typedef: {
clang::QualType typedef_qual_type =
llvm::cast<clang::TypedefType>(qual_type)
->getDecl()
->getUnderlyingType();
CompilerType typedef_clang_type = GetType(typedef_qual_type);
lldb::Format typedef_format = typedef_clang_type.GetFormat();
clang::TypeInfo typedef_type_info =
getASTContext().getTypeInfo(typedef_qual_type);
uint64_t typedef_byte_size = typedef_type_info.Width / 8;
return typedef_clang_type.DumpValue(
exe_ctx,
s, // Stream to dump to
typedef_format, // The format with which to display the element
data, // Data buffer containing all bytes for this type
data_byte_offset, // Offset into "data" where to grab value from
typedef_byte_size, // Size of this type in bytes
bitfield_bit_size, // Bitfield bit size
bitfield_bit_offset, // Bitfield bit offset
show_types, // Boolean indicating if we should show the variable types
show_summary, // Boolean indicating if we should show a summary for the
// current type
verbose, // Verbose output?
depth); // Scope depth for any types that have children
} break;
case clang::Type::Auto: {
clang::QualType elaborated_qual_type =
llvm::cast<clang::AutoType>(qual_type)->getDeducedType();
CompilerType elaborated_clang_type = GetType(elaborated_qual_type);
lldb::Format elaborated_format = elaborated_clang_type.GetFormat();
clang::TypeInfo elaborated_type_info =
getASTContext().getTypeInfo(elaborated_qual_type);
uint64_t elaborated_byte_size = elaborated_type_info.Width / 8;
return elaborated_clang_type.DumpValue(
exe_ctx,
s, // Stream to dump to
elaborated_format, // The format with which to display the element
data, // Data buffer containing all bytes for this type
data_byte_offset, // Offset into "data" where to grab value from
elaborated_byte_size, // Size of this type in bytes
bitfield_bit_size, // Bitfield bit size
bitfield_bit_offset, // Bitfield bit offset
show_types, // Boolean indicating if we should show the variable types
show_summary, // Boolean indicating if we should show a summary for the
// current type
verbose, // Verbose output?
depth); // Scope depth for any types that have children
} break;
case clang::Type::Elaborated: {
clang::QualType elaborated_qual_type =
llvm::cast<clang::ElaboratedType>(qual_type)->getNamedType();
CompilerType elaborated_clang_type = GetType(elaborated_qual_type);
lldb::Format elaborated_format = elaborated_clang_type.GetFormat();
clang::TypeInfo elaborated_type_info =
getASTContext().getTypeInfo(elaborated_qual_type);
uint64_t elaborated_byte_size = elaborated_type_info.Width / 8;
return elaborated_clang_type.DumpValue(
exe_ctx,
s, // Stream to dump to
elaborated_format, // The format with which to display the element
data, // Data buffer containing all bytes for this type
data_byte_offset, // Offset into "data" where to grab value from
elaborated_byte_size, // Size of this type in bytes
bitfield_bit_size, // Bitfield bit size
bitfield_bit_offset, // Bitfield bit offset
show_types, // Boolean indicating if we should show the variable types
show_summary, // Boolean indicating if we should show a summary for the
// current type
verbose, // Verbose output?
depth); // Scope depth for any types that have children
} break;
case clang::Type::Paren: {
clang::QualType desugar_qual_type =
llvm::cast<clang::ParenType>(qual_type)->desugar();
CompilerType desugar_clang_type = GetType(desugar_qual_type);
lldb::Format desugar_format = desugar_clang_type.GetFormat();
clang::TypeInfo desugar_type_info =
getASTContext().getTypeInfo(desugar_qual_type);
uint64_t desugar_byte_size = desugar_type_info.Width / 8;
return desugar_clang_type.DumpValue(
exe_ctx,
s, // Stream to dump to
desugar_format, // The format with which to display the element
data, // Data buffer containing all bytes for this type
data_byte_offset, // Offset into "data" where to grab value from
desugar_byte_size, // Size of this type in bytes
bitfield_bit_size, // Bitfield bit size
bitfield_bit_offset, // Bitfield bit offset
show_types, // Boolean indicating if we should show the variable types
show_summary, // Boolean indicating if we should show a summary for the
// current type
verbose, // Verbose output?
depth); // Scope depth for any types that have children
} break;
default:
// We are down to a scalar type that we just need to display.
DumpDataExtractor(data, s, data_byte_offset, format, data_byte_size, 1,
UINT32_MAX, LLDB_INVALID_ADDRESS, bitfield_bit_size,
bitfield_bit_offset);
if (show_summary)
DumpSummary(type, exe_ctx, s, data, data_byte_offset, data_byte_size);
break;
}
}
static bool DumpEnumValue(const clang::QualType &qual_type, Stream *s,
const DataExtractor &data, lldb::offset_t byte_offset,
size_t byte_size, uint32_t bitfield_bit_offset,
uint32_t bitfield_bit_size) {
const clang::EnumType *enutype =
llvm::cast<clang::EnumType>(qual_type.getTypePtr());
const clang::EnumDecl *enum_decl = enutype->getDecl();
assert(enum_decl);
lldb::offset_t offset = byte_offset;
const uint64_t enum_svalue = data.GetMaxS64Bitfield(
&offset, byte_size, bitfield_bit_size, bitfield_bit_offset);
bool can_be_bitfield = true;
uint64_t covered_bits = 0;
int num_enumerators = 0;
// Try to find an exact match for the value.
// At the same time, we're applying a heuristic to determine whether we want
// to print this enum as a bitfield. We're likely dealing with a bitfield if
// every enumerator is either a one bit value or a superset of the previous
// enumerators. Also 0 doesn't make sense when the enumerators are used as
// flags.
for (auto *enumerator : enum_decl->enumerators()) {
uint64_t val = enumerator->getInitVal().getSExtValue();
val = llvm::SignExtend64(val, 8*byte_size);
if (llvm::countPopulation(val) != 1 && (val & ~covered_bits) != 0)
can_be_bitfield = false;
covered_bits |= val;
++num_enumerators;
if (val == enum_svalue) {
// Found an exact match, that's all we need to do.
s->PutCString(enumerator->getNameAsString());
return true;
}
}
// Unsigned values make more sense for flags.
offset = byte_offset;
const uint64_t enum_uvalue = data.GetMaxU64Bitfield(
&offset, byte_size, bitfield_bit_size, bitfield_bit_offset);
// No exact match, but we don't think this is a bitfield. Print the value as
// decimal.
if (!can_be_bitfield) {
if (qual_type->isSignedIntegerOrEnumerationType())
s->Printf("%" PRIi64, enum_svalue);
else
s->Printf("%" PRIu64, enum_uvalue);
return true;
}
uint64_t remaining_value = enum_uvalue;
std::vector<std::pair<uint64_t, llvm::StringRef>> values;
values.reserve(num_enumerators);
for (auto *enumerator : enum_decl->enumerators())
if (auto val = enumerator->getInitVal().getZExtValue())
values.emplace_back(val, enumerator->getName());
// Sort in reverse order of the number of the population count, so that in
// `enum {A, B, ALL = A|B }` we visit ALL first. Use a stable sort so that
// A | C where A is declared before C is displayed in this order.
std::stable_sort(values.begin(), values.end(), [](const auto &a, const auto &b) {
return llvm::countPopulation(a.first) > llvm::countPopulation(b.first);
});
for (const auto &val : values) {
if ((remaining_value & val.first) != val.first)
continue;
remaining_value &= ~val.first;
s->PutCString(val.second);
if (remaining_value)
s->PutCString(" | ");
}
// If there is a remainder that is not covered by the value, print it as hex.
if (remaining_value)
s->Printf("0x%" PRIx64, remaining_value);
return true;
}
bool TypeSystemClang::DumpTypeValue(
lldb::opaque_compiler_type_t type, Stream *s, lldb::Format format,
const lldb_private::DataExtractor &data, lldb::offset_t byte_offset,
size_t byte_size, uint32_t bitfield_bit_size, uint32_t bitfield_bit_offset,
ExecutionContextScope *exe_scope) {
if (!type)
return false;
if (IsAggregateType(type)) {
return false;
} else {
clang::QualType qual_type(GetQualType(type));
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
if (type_class == clang::Type::Elaborated) {
qual_type = llvm::cast<clang::ElaboratedType>(qual_type)->getNamedType();
return DumpTypeValue(qual_type.getAsOpaquePtr(), s, format, data, byte_offset, byte_size,
bitfield_bit_size, bitfield_bit_offset, exe_scope);
}
switch (type_class) {
case clang::Type::Typedef: {
clang::QualType typedef_qual_type =
llvm::cast<clang::TypedefType>(qual_type)
->getDecl()
->getUnderlyingType();
CompilerType typedef_clang_type = GetType(typedef_qual_type);
if (format == eFormatDefault)
format = typedef_clang_type.GetFormat();
clang::TypeInfo typedef_type_info =
getASTContext().getTypeInfo(typedef_qual_type);
uint64_t typedef_byte_size = typedef_type_info.Width / 8;
return typedef_clang_type.DumpTypeValue(
s,
format, // The format with which to display the element
data, // Data buffer containing all bytes for this type
byte_offset, // Offset into "data" where to grab value from
typedef_byte_size, // Size of this type in bytes
bitfield_bit_size, // Size in bits of a bitfield value, if zero don't
// treat as a bitfield
bitfield_bit_offset, // Offset in bits of a bitfield value if
// bitfield_bit_size != 0
exe_scope);
} break;
case clang::Type::Enum:
// If our format is enum or default, show the enumeration value as its
// enumeration string value, else just display it as requested.
if ((format == eFormatEnum || format == eFormatDefault) &&
GetCompleteType(type))
return DumpEnumValue(qual_type, s, data, byte_offset, byte_size,
bitfield_bit_offset, bitfield_bit_size);
// format was not enum, just fall through and dump the value as
// requested....
LLVM_FALLTHROUGH;
default:
// We are down to a scalar type that we just need to display.
{
uint32_t item_count = 1;
// A few formats, we might need to modify our size and count for
// depending
// on how we are trying to display the value...
switch (format) {
default:
case eFormatBoolean:
case eFormatBinary:
case eFormatComplex:
case eFormatCString: // NULL terminated C strings
case eFormatDecimal:
case eFormatEnum:
case eFormatHex:
case eFormatHexUppercase:
case eFormatFloat:
case eFormatOctal:
case eFormatOSType:
case eFormatUnsigned:
case eFormatPointer:
case eFormatVectorOfChar:
case eFormatVectorOfSInt8:
case eFormatVectorOfUInt8:
case eFormatVectorOfSInt16:
case eFormatVectorOfUInt16:
case eFormatVectorOfSInt32:
case eFormatVectorOfUInt32:
case eFormatVectorOfSInt64:
case eFormatVectorOfUInt64:
case eFormatVectorOfFloat32:
case eFormatVectorOfFloat64:
case eFormatVectorOfUInt128:
break;
case eFormatChar:
case eFormatCharPrintable:
case eFormatCharArray:
case eFormatBytes:
case eFormatUnicode8:
case eFormatBytesWithASCII:
item_count = byte_size;
byte_size = 1;
break;
case eFormatUnicode16:
item_count = byte_size / 2;
byte_size = 2;
break;
case eFormatUnicode32:
item_count = byte_size / 4;
byte_size = 4;
break;
}
return DumpDataExtractor(data, s, byte_offset, format, byte_size,
item_count, UINT32_MAX, LLDB_INVALID_ADDRESS,
bitfield_bit_size, bitfield_bit_offset,
exe_scope);
}
break;
}
}
return false;
}
void TypeSystemClang::DumpSummary(lldb::opaque_compiler_type_t type,
ExecutionContext *exe_ctx, Stream *s,
const lldb_private::DataExtractor &data,
lldb::offset_t data_byte_offset,
size_t data_byte_size) {
uint32_t length = 0;
if (IsCStringType(type, length)) {
if (exe_ctx) {
Process *process = exe_ctx->GetProcessPtr();
if (process) {
lldb::offset_t offset = data_byte_offset;
lldb::addr_t pointer_address = data.GetMaxU64(&offset, data_byte_size);
std::vector<uint8_t> buf;
if (length > 0)
buf.resize(length);
else
buf.resize(256);
DataExtractor cstr_data(&buf.front(), buf.size(),
process->GetByteOrder(), 4);
buf.back() = '\0';
size_t bytes_read;
size_t total_cstr_len = 0;
Status error;
while ((bytes_read = process->ReadMemory(pointer_address, &buf.front(),
buf.size(), error)) > 0) {
const size_t len = strlen((const char *)&buf.front());
if (len == 0)
break;
if (total_cstr_len == 0)
s->PutCString(" \"");
DumpDataExtractor(cstr_data, s, 0, lldb::eFormatChar, 1, len,
UINT32_MAX, LLDB_INVALID_ADDRESS, 0, 0);
total_cstr_len += len;
if (len < buf.size())
break;
pointer_address += total_cstr_len;
}
if (total_cstr_len > 0)
s->PutChar('"');
}
}
}
}
void TypeSystemClang::DumpTypeDescription(lldb::opaque_compiler_type_t type,
lldb::DescriptionLevel level) {
StreamFile s(stdout, false);
DumpTypeDescription(type, &s, level);
CompilerType ct(this, type);
const clang::Type *clang_type = ClangUtil::GetQualType(ct).getTypePtr();
ClangASTMetadata *metadata = GetMetadata(clang_type);
if (metadata) {
metadata->Dump(&s);
}
}
void TypeSystemClang::DumpTypeDescription(lldb::opaque_compiler_type_t type,
Stream *s,
lldb::DescriptionLevel level) {
if (type) {
clang::QualType qual_type =
RemoveWrappingTypes(GetQualType(type), {clang::Type::Typedef});
llvm::SmallVector<char, 1024> buf;
llvm::raw_svector_ostream llvm_ostrm(buf);
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
switch (type_class) {
case clang::Type::ObjCObject:
case clang::Type::ObjCInterface: {
GetCompleteType(type);
auto *objc_class_type =
llvm::dyn_cast<clang::ObjCObjectType>(qual_type.getTypePtr());
assert(objc_class_type);
if (!objc_class_type)
break;
clang::ObjCInterfaceDecl *class_interface_decl =
objc_class_type->getInterface();
if (!class_interface_decl)
break;
if (level == eDescriptionLevelVerbose)
class_interface_decl->dump(llvm_ostrm);
else
class_interface_decl->print(llvm_ostrm,
getASTContext().getPrintingPolicy(),
s->GetIndentLevel());
} break;
case clang::Type::Typedef: {
auto *typedef_type = qual_type->getAs<clang::TypedefType>();
if (!typedef_type)
break;
const clang::TypedefNameDecl *typedef_decl = typedef_type->getDecl();
if (level == eDescriptionLevelVerbose)
typedef_decl->dump(llvm_ostrm);
else {
std::string clang_typedef_name(GetTypeNameForDecl(typedef_decl));
if (!clang_typedef_name.empty()) {
s->PutCString("typedef ");
s->PutCString(clang_typedef_name);
}
}
} break;
case clang::Type::Record: {
GetCompleteType(type);
auto *record_type = llvm::cast<clang::RecordType>(qual_type.getTypePtr());
const clang::RecordDecl *record_decl = record_type->getDecl();
if (level == eDescriptionLevelVerbose)
record_decl->dump(llvm_ostrm);
else {
if (auto *cxx_record_decl =
llvm::dyn_cast<clang::CXXRecordDecl>(record_decl))
cxx_record_decl->print(llvm_ostrm,
getASTContext().getPrintingPolicy(),
s->GetIndentLevel());
else
record_decl->print(llvm_ostrm, getASTContext().getPrintingPolicy(),
s->GetIndentLevel());
}
} break;
default: {
if (auto *tag_type =
llvm::dyn_cast<clang::TagType>(qual_type.getTypePtr())) {
if (clang::TagDecl *tag_decl = tag_type->getDecl()) {
if (level == eDescriptionLevelVerbose)
tag_decl->dump(llvm_ostrm);
else
tag_decl->print(llvm_ostrm, 0);
}
} else {
if (level == eDescriptionLevelVerbose)
qual_type->dump(llvm_ostrm, getASTContext());
else {
std::string clang_type_name(qual_type.getAsString());
if (!clang_type_name.empty())
s->PutCString(clang_type_name);
}
}
}
}
if (buf.size() > 0) {
s->Write(buf.data(), buf.size());
}
}
}
void TypeSystemClang::DumpTypeName(const CompilerType &type) {
if (ClangUtil::IsClangType(type)) {
clang::QualType qual_type(
ClangUtil::GetCanonicalQualType(ClangUtil::RemoveFastQualifiers(type)));
const clang::Type::TypeClass type_class = qual_type->getTypeClass();
switch (type_class) {
case clang::Type::Record: {
const clang::CXXRecordDecl *cxx_record_decl =
qual_type->getAsCXXRecordDecl();
if (cxx_record_decl)
printf("class %s", cxx_record_decl->getName().str().c_str());
} break;
case clang::Type::Enum: {
clang::EnumDecl *enum_decl =
llvm::cast<clang::EnumType>(qual_type)->getDecl();
if (enum_decl) {
printf("enum %s", enum_decl->getName().str().c_str());
}
} break;
case clang::Type::ObjCObject:
case clang::Type::ObjCInterface: {
const clang::ObjCObjectType *objc_class_type =
llvm::dyn_cast<clang::ObjCObjectType>(qual_type);
if (objc_class_type) {
clang::ObjCInterfaceDecl *class_interface_decl =
objc_class_type->getInterface();
// We currently can't complete objective C types through the newly
// added ASTContext because it only supports TagDecl objects right
// now...
if (class_interface_decl)
printf("@class %s", class_interface_decl->getName().str().c_str());
}
} break;
case clang::Type::Typedef:
printf("typedef %s", llvm::cast<clang::TypedefType>(qual_type)
->getDecl()
->getName()
.str()
.c_str());
break;
case clang::Type::Auto:
printf("auto ");
return DumpTypeName(CompilerType(type.GetTypeSystem(),
llvm::cast<clang::AutoType>(qual_type)
->getDeducedType()
.getAsOpaquePtr()));
case clang::Type::Elaborated:
printf("elaborated ");
return DumpTypeName(CompilerType(
type.GetTypeSystem(), llvm::cast<clang::ElaboratedType>(qual_type)
->getNamedType()
.getAsOpaquePtr()));
case clang::Type::Paren:
printf("paren ");
return DumpTypeName(CompilerType(
type.GetTypeSystem(),
llvm::cast<clang::ParenType>(qual_type)->desugar().getAsOpaquePtr()));
default:
printf("TypeSystemClang::DumpTypeName() type_class = %u", type_class);
break;
}
}
}
clang::ClassTemplateDecl *TypeSystemClang::ParseClassTemplateDecl(
clang::DeclContext *decl_ctx, OptionalClangModuleID owning_module,
lldb::AccessType access_type, const char *parent_name, int tag_decl_kind,
const TypeSystemClang::TemplateParameterInfos &template_param_infos) {
if (template_param_infos.IsValid()) {
std::string template_basename(parent_name);
template_basename.erase(template_basename.find('<'));
return CreateClassTemplateDecl(decl_ctx, owning_module, access_type,
template_basename.c_str(), tag_decl_kind,
template_param_infos);
}
return nullptr;
}
void TypeSystemClang::CompleteTagDecl(clang::TagDecl *decl) {
SymbolFile *sym_file = GetSymbolFile();
if (sym_file) {
CompilerType clang_type = GetTypeForDecl(decl);
if (clang_type)
sym_file->CompleteType(clang_type);
}
}
void TypeSystemClang::CompleteObjCInterfaceDecl(
clang::ObjCInterfaceDecl *decl) {
SymbolFile *sym_file = GetSymbolFile();
if (sym_file) {
CompilerType clang_type = GetTypeForDecl(decl);
if (clang_type)
sym_file->CompleteType(clang_type);
}
}
DWARFASTParser *TypeSystemClang::GetDWARFParser() {
if (!m_dwarf_ast_parser_up)
m_dwarf_ast_parser_up = std::make_unique<DWARFASTParserClang>(*this);
return m_dwarf_ast_parser_up.get();
}
PDBASTParser *TypeSystemClang::GetPDBParser() {
if (!m_pdb_ast_parser_up)
m_pdb_ast_parser_up = std::make_unique<PDBASTParser>(*this);
return m_pdb_ast_parser_up.get();
}
bool TypeSystemClang::LayoutRecordType(
const clang::RecordDecl *record_decl, uint64_t &bit_size,
uint64_t &alignment,
llvm::DenseMap<const clang::FieldDecl *, uint64_t> &field_offsets,
llvm::DenseMap<const clang::CXXRecordDecl *, clang::CharUnits>
&base_offsets,
llvm::DenseMap<const clang::CXXRecordDecl *, clang::CharUnits>
&vbase_offsets) {
lldb_private::ClangASTImporter *importer = nullptr;
if (m_dwarf_ast_parser_up)
importer = &m_dwarf_ast_parser_up->GetClangASTImporter();
if (!importer && m_pdb_ast_parser_up)
importer = &m_pdb_ast_parser_up->GetClangASTImporter();
if (!importer)
return false;
return importer->LayoutRecordType(record_decl, bit_size, alignment,
field_offsets, base_offsets, vbase_offsets);
}
// CompilerDecl override functions
ConstString TypeSystemClang::DeclGetName(void *opaque_decl) {
if (opaque_decl) {
clang::NamedDecl *nd =
llvm::dyn_cast<NamedDecl>((clang::Decl *)opaque_decl);
if (nd != nullptr)
return ConstString(nd->getDeclName().getAsString());
}
return ConstString();
}
ConstString TypeSystemClang::DeclGetMangledName(void *opaque_decl) {
if (opaque_decl) {
clang::NamedDecl *nd =
llvm::dyn_cast<clang::NamedDecl>((clang::Decl *)opaque_decl);
if (nd != nullptr && !llvm::isa<clang::ObjCMethodDecl>(nd)) {
clang::MangleContext *mc = getMangleContext();
if (mc && mc->shouldMangleCXXName(nd)) {
llvm::SmallVector<char, 1024> buf;
llvm::raw_svector_ostream llvm_ostrm(buf);
if (llvm::isa<clang::CXXConstructorDecl>(nd)) {
mc->mangleName(
clang::GlobalDecl(llvm::dyn_cast<clang::CXXConstructorDecl>(nd),
Ctor_Complete),
llvm_ostrm);
} else if (llvm::isa<clang::CXXDestructorDecl>(nd)) {
mc->mangleName(
clang::GlobalDecl(llvm::dyn_cast<clang::CXXDestructorDecl>(nd),
Dtor_Complete),
llvm_ostrm);
} else {
mc->mangleName(nd, llvm_ostrm);
}
if (buf.size() > 0)
return ConstString(buf.data(), buf.size());
}
}
}
return ConstString();
}
CompilerDeclContext TypeSystemClang::DeclGetDeclContext(void *opaque_decl) {
if (opaque_decl)
return CreateDeclContext(((clang::Decl *)opaque_decl)->getDeclContext());
return CompilerDeclContext();
}
CompilerType TypeSystemClang::DeclGetFunctionReturnType(void *opaque_decl) {
if (clang::FunctionDecl *func_decl =
llvm::dyn_cast<clang::FunctionDecl>((clang::Decl *)opaque_decl))
return GetType(func_decl->getReturnType());
if (clang::ObjCMethodDecl *objc_method =
llvm::dyn_cast<clang::ObjCMethodDecl>((clang::Decl *)opaque_decl))
return GetType(objc_method->getReturnType());
else
return CompilerType();
}
size_t TypeSystemClang::DeclGetFunctionNumArguments(void *opaque_decl) {
if (clang::FunctionDecl *func_decl =
llvm::dyn_cast<clang::FunctionDecl>((clang::Decl *)opaque_decl))
return func_decl->param_size();
if (clang::ObjCMethodDecl *objc_method =
llvm::dyn_cast<clang::ObjCMethodDecl>((clang::Decl *)opaque_decl))
return objc_method->param_size();
else
return 0;
}
CompilerType TypeSystemClang::DeclGetFunctionArgumentType(void *opaque_decl,
size_t idx) {
if (clang::FunctionDecl *func_decl =
llvm::dyn_cast<clang::FunctionDecl>((clang::Decl *)opaque_decl)) {
if (idx < func_decl->param_size()) {
ParmVarDecl *var_decl = func_decl->getParamDecl(idx);
if (var_decl)
return GetType(var_decl->getOriginalType());
}
} else if (clang::ObjCMethodDecl *objc_method =
llvm::dyn_cast<clang::ObjCMethodDecl>(
(clang::Decl *)opaque_decl)) {
if (idx < objc_method->param_size())
return GetType(objc_method->parameters()[idx]->getOriginalType());
}
return CompilerType();
}
// CompilerDeclContext functions
std::vector<CompilerDecl> TypeSystemClang::DeclContextFindDeclByName(
void *opaque_decl_ctx, ConstString name, const bool ignore_using_decls) {
std::vector<CompilerDecl> found_decls;
SymbolFile *symbol_file = GetSymbolFile();
if (opaque_decl_ctx && symbol_file) {
DeclContext *root_decl_ctx = (DeclContext *)opaque_decl_ctx;
std::set<DeclContext *> searched;
std::multimap<DeclContext *, DeclContext *> search_queue;
for (clang::DeclContext *decl_context = root_decl_ctx;
decl_context != nullptr && found_decls.empty();
decl_context = decl_context->getParent()) {
search_queue.insert(std::make_pair(decl_context, decl_context));
for (auto it = search_queue.find(decl_context); it != search_queue.end();
it++) {
if (!searched.insert(it->second).second)
continue;
symbol_file->ParseDeclsForContext(
CreateDeclContext(it->second));
for (clang::Decl *child : it->second->decls()) {
if (clang::UsingDirectiveDecl *ud =
llvm::dyn_cast<clang::UsingDirectiveDecl>(child)) {
if (ignore_using_decls)
continue;
clang::DeclContext *from = ud->getCommonAncestor();
if (searched.find(ud->getNominatedNamespace()) == searched.end())
search_queue.insert(
std::make_pair(from, ud->getNominatedNamespace()));
} else if (clang::UsingDecl *ud =
llvm::dyn_cast<clang::UsingDecl>(child)) {
if (ignore_using_decls)
continue;
for (clang::UsingShadowDecl *usd : ud->shadows()) {
clang::Decl *target = usd->getTargetDecl();
if (clang::NamedDecl *nd =
llvm::dyn_cast<clang::NamedDecl>(target)) {
IdentifierInfo *ii = nd->getIdentifier();
if (ii != nullptr &&
ii->getName().equals(name.AsCString(nullptr)))
found_decls.push_back(GetCompilerDecl(nd));
}
}
} else if (clang::NamedDecl *nd =
llvm::dyn_cast<clang::NamedDecl>(child)) {
IdentifierInfo *ii = nd->getIdentifier();
if (ii != nullptr && ii->getName().equals(name.AsCString(nullptr)))
found_decls.push_back(GetCompilerDecl(nd));
}
}
}
}
}
return found_decls;
}
// Look for child_decl_ctx's lookup scope in frame_decl_ctx and its parents,
// and return the number of levels it took to find it, or
// LLDB_INVALID_DECL_LEVEL if not found. If the decl was imported via a using
// declaration, its name and/or type, if set, will be used to check that the
// decl found in the scope is a match.
//
// The optional name is required by languages (like C++) to handle using
// declarations like:
//
// void poo();
// namespace ns {
// void foo();
// void goo();
// }
// void bar() {
// using ns::foo;
// // CountDeclLevels returns 0 for 'foo', 1 for 'poo', and
// // LLDB_INVALID_DECL_LEVEL for 'goo'.
// }
//
// The optional type is useful in the case that there's a specific overload
// that we're looking for that might otherwise be shadowed, like:
//
// void foo(int);
// namespace ns {
// void foo();
// }
// void bar() {
// using ns::foo;
// // CountDeclLevels returns 0 for { 'foo', void() },
// // 1 for { 'foo', void(int) }, and
// // LLDB_INVALID_DECL_LEVEL for { 'foo', void(int, int) }.
// }
//
// NOTE: Because file statics are at the TranslationUnit along with globals, a
// function at file scope will return the same level as a function at global
// scope. Ideally we'd like to treat the file scope as an additional scope just
// below the global scope. More work needs to be done to recognise that, if
// the decl we're trying to look up is static, we should compare its source
// file with that of the current scope and return a lower number for it.
uint32_t TypeSystemClang::CountDeclLevels(clang::DeclContext *frame_decl_ctx,
clang::DeclContext *child_decl_ctx,
ConstString *child_name,
CompilerType *child_type) {
SymbolFile *symbol_file = GetSymbolFile();
if (frame_decl_ctx && symbol_file) {
std::set<DeclContext *> searched;
std::multimap<DeclContext *, DeclContext *> search_queue;
// Get the lookup scope for the decl we're trying to find.
clang::DeclContext *parent_decl_ctx = child_decl_ctx->getParent();
// Look for it in our scope's decl context and its parents.
uint32_t level = 0;
for (clang::DeclContext *decl_ctx = frame_decl_ctx; decl_ctx != nullptr;
decl_ctx = decl_ctx->getParent()) {
if (!decl_ctx->isLookupContext())
continue;
if (decl_ctx == parent_decl_ctx)
// Found it!
return level;
search_queue.insert(std::make_pair(decl_ctx, decl_ctx));
for (auto it = search_queue.find(decl_ctx); it != search_queue.end();
it++) {
if (searched.find(it->second) != searched.end())
continue;
// Currently DWARF has one shared translation unit for all Decls at top
// level, so this would erroneously find using statements anywhere. So
// don't look at the top-level translation unit.
// TODO fix this and add a testcase that depends on it.
if (llvm::isa<clang::TranslationUnitDecl>(it->second))
continue;
searched.insert(it->second);
symbol_file->ParseDeclsForContext(
CreateDeclContext(it->second));
for (clang::Decl *child : it->second->decls()) {
if (clang::UsingDirectiveDecl *ud =
llvm::dyn_cast<clang::UsingDirectiveDecl>(child)) {
clang::DeclContext *ns = ud->getNominatedNamespace();
if (ns == parent_decl_ctx)
// Found it!
return level;
clang::DeclContext *from = ud->getCommonAncestor();
if (searched.find(ns) == searched.end())
search_queue.insert(std::make_pair(from, ns));
} else if (child_name) {
if (clang::UsingDecl *ud =
llvm::dyn_cast<clang::UsingDecl>(child)) {
for (clang::UsingShadowDecl *usd : ud->shadows()) {
clang::Decl *target = usd->getTargetDecl();
clang::NamedDecl *nd = llvm::dyn_cast<clang::NamedDecl>(target);
if (!nd)
continue;
// Check names.
IdentifierInfo *ii = nd->getIdentifier();
if (ii == nullptr ||
!ii->getName().equals(child_name->AsCString(nullptr)))
continue;
// Check types, if one was provided.
if (child_type) {
CompilerType clang_type = GetTypeForDecl(nd);
if (!AreTypesSame(clang_type, *child_type,
/*ignore_qualifiers=*/true))
continue;
}
// Found it!
return level;
}
}
}
}
}
++level;
}
}
return LLDB_INVALID_DECL_LEVEL;
}
ConstString TypeSystemClang::DeclContextGetName(void *opaque_decl_ctx) {
if (opaque_decl_ctx) {
clang::NamedDecl *named_decl =
llvm::dyn_cast<clang::NamedDecl>((clang::DeclContext *)opaque_decl_ctx);
if (named_decl)
return ConstString(named_decl->getName());
}
return ConstString();
}
ConstString
TypeSystemClang::DeclContextGetScopeQualifiedName(void *opaque_decl_ctx) {
if (opaque_decl_ctx) {
clang::NamedDecl *named_decl =
llvm::dyn_cast<clang::NamedDecl>((clang::DeclContext *)opaque_decl_ctx);
if (named_decl)
return ConstString(GetTypeNameForDecl(named_decl));
}
return ConstString();
}
bool TypeSystemClang::DeclContextIsClassMethod(
void *opaque_decl_ctx, lldb::LanguageType *language_ptr,
bool *is_instance_method_ptr, ConstString *language_object_name_ptr) {
if (opaque_decl_ctx) {
clang::DeclContext *decl_ctx = (clang::DeclContext *)opaque_decl_ctx;
if (ObjCMethodDecl *objc_method =
llvm::dyn_cast<clang::ObjCMethodDecl>(decl_ctx)) {
if (is_instance_method_ptr)
*is_instance_method_ptr = objc_method->isInstanceMethod();
if (language_ptr)
*language_ptr = eLanguageTypeObjC;
if (language_object_name_ptr)
language_object_name_ptr->SetCString("self");
return true;
} else if (CXXMethodDecl *cxx_method =
llvm::dyn_cast<clang::CXXMethodDecl>(decl_ctx)) {
if (is_instance_method_ptr)
*is_instance_method_ptr = cxx_method->isInstance();
if (language_ptr)
*language_ptr = eLanguageTypeC_plus_plus;
if (language_object_name_ptr)
language_object_name_ptr->SetCString("this");
return true;
} else if (clang::FunctionDecl *function_decl =
llvm::dyn_cast<clang::FunctionDecl>(decl_ctx)) {
ClangASTMetadata *metadata = GetMetadata(function_decl);
if (metadata && metadata->HasObjectPtr()) {
if (is_instance_method_ptr)
*is_instance_method_ptr = true;
if (language_ptr)
*language_ptr = eLanguageTypeObjC;
if (language_object_name_ptr)
language_object_name_ptr->SetCString(metadata->GetObjectPtrName());
return true;
}
}
}
return false;
}
bool TypeSystemClang::DeclContextIsContainedInLookup(
void *opaque_decl_ctx, void *other_opaque_decl_ctx) {
auto *decl_ctx = (clang::DeclContext *)opaque_decl_ctx;
auto *other = (clang::DeclContext *)other_opaque_decl_ctx;
do {
// A decl context always includes its own contents in its lookup.
if (decl_ctx == other)
return true;
// If we have an inline namespace, then the lookup of the parent context
// also includes the inline namespace contents.
} while (other->isInlineNamespace() && (other = other->getParent()));
return false;
}
static bool IsClangDeclContext(const CompilerDeclContext &dc) {
return dc.IsValid() && isa<TypeSystemClang>(dc.GetTypeSystem());
}
clang::DeclContext *
TypeSystemClang::DeclContextGetAsDeclContext(const CompilerDeclContext &dc) {
if (IsClangDeclContext(dc))
return (clang::DeclContext *)dc.GetOpaqueDeclContext();
return nullptr;
}
ObjCMethodDecl *
TypeSystemClang::DeclContextGetAsObjCMethodDecl(const CompilerDeclContext &dc) {
if (IsClangDeclContext(dc))
return llvm::dyn_cast<clang::ObjCMethodDecl>(
(clang::DeclContext *)dc.GetOpaqueDeclContext());
return nullptr;
}
CXXMethodDecl *
TypeSystemClang::DeclContextGetAsCXXMethodDecl(const CompilerDeclContext &dc) {
if (IsClangDeclContext(dc))
return llvm::dyn_cast<clang::CXXMethodDecl>(
(clang::DeclContext *)dc.GetOpaqueDeclContext());
return nullptr;
}
clang::FunctionDecl *
TypeSystemClang::DeclContextGetAsFunctionDecl(const CompilerDeclContext &dc) {
if (IsClangDeclContext(dc))
return llvm::dyn_cast<clang::FunctionDecl>(
(clang::DeclContext *)dc.GetOpaqueDeclContext());
return nullptr;
}
clang::NamespaceDecl *
TypeSystemClang::DeclContextGetAsNamespaceDecl(const CompilerDeclContext &dc) {
if (IsClangDeclContext(dc))
return llvm::dyn_cast<clang::NamespaceDecl>(
(clang::DeclContext *)dc.GetOpaqueDeclContext());
return nullptr;
}
ClangASTMetadata *
TypeSystemClang::DeclContextGetMetaData(const CompilerDeclContext &dc,
const Decl *object) {
TypeSystemClang *ast = llvm::cast<TypeSystemClang>(dc.GetTypeSystem());
return ast->GetMetadata(object);
}
clang::ASTContext *
TypeSystemClang::DeclContextGetTypeSystemClang(const CompilerDeclContext &dc) {
TypeSystemClang *ast =
llvm::dyn_cast_or_null<TypeSystemClang>(dc.GetTypeSystem());
if (ast)
return &ast->getASTContext();
return nullptr;
}
namespace {
/// A specialized scratch AST used within ScratchTypeSystemClang.
/// These are the ASTs backing the different IsolatedASTKinds. They behave
/// like a normal ScratchTypeSystemClang but they don't own their own
/// persistent storage or target reference.
class SpecializedScratchAST : public TypeSystemClang {
public:
/// \param name The display name of the TypeSystemClang instance.
/// \param triple The triple used for the TypeSystemClang instance.
/// \param ast_source The ClangASTSource that should be used to complete
/// type information.
SpecializedScratchAST(llvm::StringRef name, llvm::Triple triple,
std::unique_ptr<ClangASTSource> ast_source)
: TypeSystemClang(name, triple),
m_scratch_ast_source_up(std::move(ast_source)) {
// Setup the ClangASTSource to complete this AST.
m_scratch_ast_source_up->InstallASTContext(*this);
llvm::IntrusiveRefCntPtr<clang::ExternalASTSource> proxy_ast_source(
m_scratch_ast_source_up->CreateProxy());
SetExternalSource(proxy_ast_source);
}
/// The ExternalASTSource that performs lookups and completes types.
std::unique_ptr<ClangASTSource> m_scratch_ast_source_up;
};
} // namespace
char ScratchTypeSystemClang::ID;
const llvm::NoneType ScratchTypeSystemClang::DefaultAST = llvm::None;
ScratchTypeSystemClang::ScratchTypeSystemClang(Target &target,
llvm::Triple triple)
: TypeSystemClang("scratch ASTContext", triple), m_triple(triple),
m_target_wp(target.shared_from_this()),
m_persistent_variables(
new ClangPersistentVariables(target.shared_from_this())) {
m_scratch_ast_source_up = CreateASTSource();
m_scratch_ast_source_up->InstallASTContext(*this);
llvm::IntrusiveRefCntPtr<clang::ExternalASTSource> proxy_ast_source(
m_scratch_ast_source_up->CreateProxy());
SetExternalSource(proxy_ast_source);
}
void ScratchTypeSystemClang::Finalize() {
TypeSystemClang::Finalize();
m_scratch_ast_source_up.reset();
}
TypeSystemClang *
ScratchTypeSystemClang::GetForTarget(Target &target,
llvm::Optional<IsolatedASTKind> ast_kind,
bool create_on_demand) {
auto type_system_or_err = target.GetScratchTypeSystemForLanguage(
lldb::eLanguageTypeC, create_on_demand);
if (auto err = type_system_or_err.takeError()) {
LLDB_LOG_ERROR(GetLog(LLDBLog::Target), std::move(err),
"Couldn't get scratch TypeSystemClang");
return nullptr;
}
ScratchTypeSystemClang &scratch_ast =
llvm::cast<ScratchTypeSystemClang>(type_system_or_err.get());
// If no dedicated sub-AST was requested, just return the main AST.
if (ast_kind == DefaultAST)
return &scratch_ast;
// Search the sub-ASTs.
return &scratch_ast.GetIsolatedAST(*ast_kind);
}
/// Returns a human-readable name that uniquely identifiers the sub-AST kind.
static llvm::StringRef
GetNameForIsolatedASTKind(ScratchTypeSystemClang::IsolatedASTKind kind) {
switch (kind) {
case ScratchTypeSystemClang::IsolatedASTKind::CppModules:
return "C++ modules";
}
llvm_unreachable("Unimplemented IsolatedASTKind?");
}
void ScratchTypeSystemClang::Dump(llvm::raw_ostream &output) {
// First dump the main scratch AST.
output << "State of scratch Clang type system:\n";
TypeSystemClang::Dump(output);
// Now sort the isolated sub-ASTs.
typedef std::pair<IsolatedASTKey, TypeSystem *> KeyAndTS;
std::vector<KeyAndTS> sorted_typesystems;
for (const auto &a : m_isolated_asts)
sorted_typesystems.emplace_back(a.first, a.second.get());
llvm::stable_sort(sorted_typesystems, llvm::less_first());
// Dump each sub-AST too.
for (const auto &a : sorted_typesystems) {
IsolatedASTKind kind =
static_cast<ScratchTypeSystemClang::IsolatedASTKind>(a.first);
output << "State of scratch Clang type subsystem "
<< GetNameForIsolatedASTKind(kind) << ":\n";
a.second->Dump(output);
}
}
UserExpression *ScratchTypeSystemClang::GetUserExpression(
llvm::StringRef expr, llvm::StringRef prefix, lldb::LanguageType language,
Expression::ResultType desired_type,
const EvaluateExpressionOptions &options, ValueObject *ctx_obj) {
TargetSP target_sp = m_target_wp.lock();
if (!target_sp)
return nullptr;
return new ClangUserExpression(*target_sp.get(), expr, prefix, language,
desired_type, options, ctx_obj);
}
FunctionCaller *ScratchTypeSystemClang::GetFunctionCaller(
const CompilerType &return_type, const Address &function_address,
const ValueList &arg_value_list, const char *name) {
TargetSP target_sp = m_target_wp.lock();
if (!target_sp)
return nullptr;
Process *process = target_sp->GetProcessSP().get();
if (!process)
return nullptr;
return new ClangFunctionCaller(*process, return_type, function_address,
arg_value_list, name);
}
std::unique_ptr<UtilityFunction>
ScratchTypeSystemClang::CreateUtilityFunction(std::string text,
std::string name) {
TargetSP target_sp = m_target_wp.lock();
if (!target_sp)
return {};
return std::make_unique<ClangUtilityFunction>(
*target_sp.get(), std::move(text), std::move(name),
target_sp->GetDebugUtilityExpression());
}
PersistentExpressionState *
ScratchTypeSystemClang::GetPersistentExpressionState() {
return m_persistent_variables.get();
}
void ScratchTypeSystemClang::ForgetSource(ASTContext *src_ctx,
ClangASTImporter &importer) {
// Remove it as a source from the main AST.
importer.ForgetSource(&getASTContext(), src_ctx);
// Remove it as a source from all created sub-ASTs.
for (const auto &a : m_isolated_asts)
importer.ForgetSource(&a.second->getASTContext(), src_ctx);
}
std::unique_ptr<ClangASTSource> ScratchTypeSystemClang::CreateASTSource() {
return std::make_unique<ClangASTSource>(
m_target_wp.lock()->shared_from_this(),
m_persistent_variables->GetClangASTImporter());
}
static llvm::StringRef
GetSpecializedASTName(ScratchTypeSystemClang::IsolatedASTKind feature) {
switch (feature) {
case ScratchTypeSystemClang::IsolatedASTKind::CppModules:
return "scratch ASTContext for C++ module types";
}
llvm_unreachable("Unimplemented ASTFeature kind?");
}
TypeSystemClang &ScratchTypeSystemClang::GetIsolatedAST(
ScratchTypeSystemClang::IsolatedASTKind feature) {
auto found_ast = m_isolated_asts.find(feature);
if (found_ast != m_isolated_asts.end())
return *found_ast->second;
// Couldn't find the requested sub-AST, so create it now.
std::unique_ptr<TypeSystemClang> new_ast;
new_ast.reset(new SpecializedScratchAST(GetSpecializedASTName(feature),
m_triple, CreateASTSource()));
m_isolated_asts[feature] = std::move(new_ast);
return *m_isolated_asts[feature];
}
diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h
index 24dbb71c8f4d..7f25a6df548f 100644
--- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h
+++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h
@@ -1,1234 +1,1235 @@
//===-- TypeSystemClang.h ---------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLDB_SOURCE_PLUGINS_TYPESYSTEM_CLANG_TYPESYSTEMCLANG_H
#define LLDB_SOURCE_PLUGINS_TYPESYSTEM_CLANG_TYPESYSTEMCLANG_H
#include <cstdint>
#include <functional>
#include <initializer_list>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <utility>
#include <vector>
#include "clang/AST/ASTContext.h"
#include "clang/AST/ASTFwd.h"
#include "clang/AST/TemplateBase.h"
#include "clang/Basic/TargetInfo.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/SmallVector.h"
#include "Plugins/ExpressionParser/Clang/ClangPersistentVariables.h"
#include "lldb/Expression/ExpressionVariable.h"
#include "lldb/Symbol/CompilerType.h"
#include "lldb/Symbol/TypeSystem.h"
#include "lldb/Target/Target.h"
#include "lldb/Utility/ConstString.h"
#include "lldb/Utility/Flags.h"
#include "lldb/Utility/Log.h"
#include "lldb/lldb-enumerations.h"
class DWARFASTParserClang;
class PDBASTParser;
namespace clang {
class FileManager;
class HeaderSearch;
class ModuleMap;
} // namespace clang
namespace lldb_private {
class ClangASTMetadata;
class ClangASTSource;
class Declaration;
/// A Clang module ID.
class OptionalClangModuleID {
unsigned m_id = 0;
public:
OptionalClangModuleID() = default;
explicit OptionalClangModuleID(unsigned id) : m_id(id) {}
bool HasValue() const { return m_id != 0; }
unsigned GetValue() const { return m_id; }
};
/// The implementation of lldb::Type's m_payload field for TypeSystemClang.
class TypePayloadClang {
/// The Layout is as follows:
/// \verbatim
/// bit 0..30 ... Owning Module ID.
/// bit 31 ...... IsCompleteObjCClass.
/// \endverbatim
Type::Payload m_payload = 0;
public:
TypePayloadClang() = default;
explicit TypePayloadClang(OptionalClangModuleID owning_module,
bool is_complete_objc_class = false);
explicit TypePayloadClang(uint32_t opaque_payload) : m_payload(opaque_payload) {}
operator Type::Payload() { return m_payload; }
static constexpr unsigned ObjCClassBit = 1 << 31;
bool IsCompleteObjCClass() { return Flags(m_payload).Test(ObjCClassBit); }
void SetIsCompleteObjCClass(bool is_complete_objc_class) {
m_payload = is_complete_objc_class ? Flags(m_payload).Set(ObjCClassBit)
: Flags(m_payload).Clear(ObjCClassBit);
}
OptionalClangModuleID GetOwningModule() {
return OptionalClangModuleID(Flags(m_payload).Clear(ObjCClassBit));
}
void SetOwningModule(OptionalClangModuleID id);
/// \}
};
-
+
/// A TypeSystem implementation based on Clang.
///
/// This class uses a single clang::ASTContext as the backend for storing
/// its types and declarations. Every clang::ASTContext should also just have
/// a single associated TypeSystemClang instance that manages it.
///
/// The clang::ASTContext instance can either be created by TypeSystemClang
/// itself or it can adopt an existing clang::ASTContext (for example, when
/// it is necessary to provide a TypeSystem interface for an existing
/// clang::ASTContext that was created by clang::CompilerInstance).
class TypeSystemClang : public TypeSystem {
// LLVM RTTI support
static char ID;
public:
typedef void (*CompleteTagDeclCallback)(void *baton, clang::TagDecl *);
typedef void (*CompleteObjCInterfaceDeclCallback)(void *baton,
clang::ObjCInterfaceDecl *);
// llvm casting support
bool isA(const void *ClassID) const override { return ClassID == &ID; }
static bool classof(const TypeSystem *ts) { return ts->isA(&ID); }
/// Constructs a TypeSystemClang with an ASTContext using the given triple.
///
/// \param name The name for the TypeSystemClang (for logging purposes)
/// \param triple The llvm::Triple used for the ASTContext. The triple defines
/// certain characteristics of the ASTContext and its types
/// (e.g., whether certain primitive types exist or what their
/// signedness is).
explicit TypeSystemClang(llvm::StringRef name, llvm::Triple triple);
/// Constructs a TypeSystemClang that uses an existing ASTContext internally.
/// Useful when having an existing ASTContext created by Clang.
///
/// \param name The name for the TypeSystemClang (for logging purposes)
/// \param existing_ctxt An existing ASTContext.
explicit TypeSystemClang(llvm::StringRef name,
clang::ASTContext &existing_ctxt);
~TypeSystemClang() override;
void Finalize() override;
// PluginInterface functions
llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
static llvm::StringRef GetPluginNameStatic() { return "clang"; }
static lldb::TypeSystemSP CreateInstance(lldb::LanguageType language,
Module *module, Target *target);
static LanguageSet GetSupportedLanguagesForTypes();
static LanguageSet GetSupportedLanguagesForExpressions();
static void Initialize();
static void Terminate();
static TypeSystemClang *GetASTContext(clang::ASTContext *ast_ctx);
/// Returns the display name of this TypeSystemClang that indicates what
/// purpose it serves in LLDB. Used for example in logs.
llvm::StringRef getDisplayName() const { return m_display_name; }
/// Returns the clang::ASTContext instance managed by this TypeSystemClang.
clang::ASTContext &getASTContext();
clang::MangleContext *getMangleContext();
std::shared_ptr<clang::TargetOptions> &getTargetOptions();
clang::TargetInfo *getTargetInfo();
void setSema(clang::Sema *s);
clang::Sema *getSema() { return m_sema; }
const char *GetTargetTriple();
void SetExternalSource(
llvm::IntrusiveRefCntPtr<clang::ExternalASTSource> &ast_source_up);
bool GetCompleteDecl(clang::Decl *decl) {
return TypeSystemClang::GetCompleteDecl(&getASTContext(), decl);
}
static void DumpDeclHiearchy(clang::Decl *decl);
static void DumpDeclContextHiearchy(clang::DeclContext *decl_ctx);
static bool DeclsAreEquivalent(clang::Decl *lhs_decl, clang::Decl *rhs_decl);
static bool GetCompleteDecl(clang::ASTContext *ast, clang::Decl *decl);
void SetMetadataAsUserID(const clang::Decl *decl, lldb::user_id_t user_id);
void SetMetadataAsUserID(const clang::Type *type, lldb::user_id_t user_id);
void SetMetadata(const clang::Decl *object, ClangASTMetadata &meta_data);
void SetMetadata(const clang::Type *object, ClangASTMetadata &meta_data);
ClangASTMetadata *GetMetadata(const clang::Decl *object);
ClangASTMetadata *GetMetadata(const clang::Type *object);
void SetCXXRecordDeclAccess(const clang::CXXRecordDecl *object,
clang::AccessSpecifier access);
clang::AccessSpecifier
GetCXXRecordDeclAccess(const clang::CXXRecordDecl *object);
// Basic Types
CompilerType GetBuiltinTypeForEncodingAndBitSize(lldb::Encoding encoding,
size_t bit_size) override;
CompilerType GetBasicType(lldb::BasicType type);
static lldb::BasicType GetBasicTypeEnumeration(ConstString name);
CompilerType
GetBuiltinTypeForDWARFEncodingAndBitSize(llvm::StringRef type_name,
uint32_t dw_ate, uint32_t bit_size);
CompilerType GetCStringType(bool is_const);
static clang::DeclContext *GetDeclContextForType(clang::QualType type);
static clang::DeclContext *GetDeclContextForType(const CompilerType &type);
uint32_t GetPointerByteSize() override;
clang::TranslationUnitDecl *GetTranslationUnitDecl() {
return getASTContext().getTranslationUnitDecl();
}
static bool AreTypesSame(CompilerType type1, CompilerType type2,
bool ignore_qualifiers = false);
/// Creates a CompilerType form the given QualType with the current
/// TypeSystemClang instance as the CompilerType's typesystem.
/// \param qt The QualType for a type that belongs to the ASTContext of this
/// TypeSystemClang.
/// \return The CompilerType representing the given QualType. If the
/// QualType's type pointer is a nullptr then the function returns an
/// invalid CompilerType.
CompilerType GetType(clang::QualType qt) {
if (qt.getTypePtrOrNull() == nullptr)
return CompilerType();
// Check that the type actually belongs to this TypeSystemClang.
assert(qt->getAsTagDecl() == nullptr ||
&qt->getAsTagDecl()->getASTContext() == &getASTContext());
return CompilerType(this, qt.getAsOpaquePtr());
}
CompilerType GetTypeForDecl(clang::NamedDecl *decl);
CompilerType GetTypeForDecl(clang::TagDecl *decl);
CompilerType GetTypeForDecl(clang::ObjCInterfaceDecl *objc_decl);
template <typename RecordDeclType>
CompilerType
GetTypeForIdentifier(ConstString type_name,
clang::DeclContext *decl_context = nullptr) {
CompilerType compiler_type;
if (type_name.GetLength()) {
clang::ASTContext &ast = getASTContext();
if (!decl_context)
decl_context = ast.getTranslationUnitDecl();
clang::IdentifierInfo &myIdent = ast.Idents.get(type_name.GetCString());
clang::DeclarationName myName =
ast.DeclarationNames.getIdentifier(&myIdent);
clang::DeclContext::lookup_result result = decl_context->lookup(myName);
if (!result.empty()) {
clang::NamedDecl *named_decl = *result.begin();
if (const RecordDeclType *record_decl =
llvm::dyn_cast<RecordDeclType>(named_decl))
compiler_type.SetCompilerType(
this, clang::QualType(record_decl->getTypeForDecl(), 0)
.getAsOpaquePtr());
}
}
return compiler_type;
}
CompilerType CreateStructForIdentifier(
ConstString type_name,
const std::initializer_list<std::pair<const char *, CompilerType>>
&type_fields,
bool packed = false);
CompilerType GetOrCreateStructForIdentifier(
ConstString type_name,
const std::initializer_list<std::pair<const char *, CompilerType>>
&type_fields,
bool packed = false);
static bool IsOperator(llvm::StringRef name,
clang::OverloadedOperatorKind &op_kind);
// Structure, Unions, Classes
static clang::AccessSpecifier
ConvertAccessTypeToAccessSpecifier(lldb::AccessType access);
static clang::AccessSpecifier
UnifyAccessSpecifiers(clang::AccessSpecifier lhs, clang::AccessSpecifier rhs);
static uint32_t GetNumBaseClasses(const clang::CXXRecordDecl *cxx_record_decl,
bool omit_empty_base_classes);
/// Synthesize a clang::Module and return its ID or a default-constructed ID.
OptionalClangModuleID GetOrCreateClangModule(llvm::StringRef name,
OptionalClangModuleID parent,
bool is_framework = false,
bool is_explicit = false);
CompilerType CreateRecordType(clang::DeclContext *decl_ctx,
OptionalClangModuleID owning_module,
lldb::AccessType access_type,
llvm::StringRef name, int kind,
lldb::LanguageType language,
ClangASTMetadata *metadata = nullptr,
bool exports_symbols = false);
class TemplateParameterInfos {
public:
bool IsValid() const {
// Having a pack name but no packed args doesn't make sense, so mark
// these template parameters as invalid.
if (pack_name && !packed_args)
return false;
return args.size() == names.size() &&
(!packed_args || !packed_args->packed_args);
}
bool hasParameterPack() const { return static_cast<bool>(packed_args); }
llvm::SmallVector<const char *, 2> names;
llvm::SmallVector<clang::TemplateArgument, 2> args;
-
+
const char * pack_name = nullptr;
std::unique_ptr<TemplateParameterInfos> packed_args;
};
clang::FunctionTemplateDecl *CreateFunctionTemplateDecl(
clang::DeclContext *decl_ctx, OptionalClangModuleID owning_module,
clang::FunctionDecl *func_decl, const TemplateParameterInfos &infos);
void CreateFunctionTemplateSpecializationInfo(
clang::FunctionDecl *func_decl, clang::FunctionTemplateDecl *Template,
const TemplateParameterInfos &infos);
clang::ClassTemplateDecl *CreateClassTemplateDecl(
clang::DeclContext *decl_ctx, OptionalClangModuleID owning_module,
lldb::AccessType access_type, llvm::StringRef class_name, int kind,
const TemplateParameterInfos &infos);
clang::TemplateTemplateParmDecl *
CreateTemplateTemplateParmDecl(const char *template_name);
clang::ClassTemplateSpecializationDecl *CreateClassTemplateSpecializationDecl(
clang::DeclContext *decl_ctx, OptionalClangModuleID owning_module,
clang::ClassTemplateDecl *class_template_decl, int kind,
const TemplateParameterInfos &infos);
CompilerType
CreateClassTemplateSpecializationType(clang::ClassTemplateSpecializationDecl *
class_template_specialization_decl);
static clang::DeclContext *
GetAsDeclContext(clang::FunctionDecl *function_decl);
static bool CheckOverloadedOperatorKindParameterCount(
bool is_method, clang::OverloadedOperatorKind op_kind,
uint32_t num_params);
bool FieldIsBitfield(clang::FieldDecl *field, uint32_t &bitfield_bit_size);
static bool RecordHasFields(const clang::RecordDecl *record_decl);
CompilerType CreateObjCClass(llvm::StringRef name,
clang::DeclContext *decl_ctx,
OptionalClangModuleID owning_module,
bool isForwardDecl, bool isInternal,
ClangASTMetadata *metadata = nullptr);
// Returns a mask containing bits from the TypeSystemClang::eTypeXXX
// enumerations
// Namespace Declarations
clang::NamespaceDecl *
GetUniqueNamespaceDeclaration(const char *name, clang::DeclContext *decl_ctx,
OptionalClangModuleID owning_module,
bool is_inline = false);
// Function Types
clang::FunctionDecl *CreateFunctionDeclaration(
clang::DeclContext *decl_ctx, OptionalClangModuleID owning_module,
llvm::StringRef name, const CompilerType &function_Type,
clang::StorageClass storage, bool is_inline);
CompilerType CreateFunctionType(const CompilerType &result_type,
const CompilerType *args, unsigned num_args,
bool is_variadic, unsigned type_quals,
clang::CallingConv cc = clang::CC_C);
clang::ParmVarDecl *
CreateParameterDeclaration(clang::DeclContext *decl_ctx,
OptionalClangModuleID owning_module,
const char *name, const CompilerType &param_type,
int storage, bool add_decl = false);
void SetFunctionParameters(clang::FunctionDecl *function_decl,
llvm::ArrayRef<clang::ParmVarDecl *> params);
CompilerType CreateBlockPointerType(const CompilerType &function_type);
// Array Types
CompilerType CreateArrayType(const CompilerType &element_type,
size_t element_count, bool is_vector);
// Enumeration Types
CompilerType CreateEnumerationType(llvm::StringRef name,
clang::DeclContext *decl_ctx,
OptionalClangModuleID owning_module,
const Declaration &decl,
const CompilerType &integer_qual_type,
bool is_scoped);
// Integer type functions
CompilerType GetIntTypeFromBitSize(size_t bit_size, bool is_signed);
CompilerType GetPointerSizedIntType(bool is_signed);
// Floating point functions
static CompilerType GetFloatTypeFromBitSize(clang::ASTContext *ast,
size_t bit_size);
// TypeSystem methods
DWARFASTParser *GetDWARFParser() override;
PDBASTParser *GetPDBParser() override;
// TypeSystemClang callbacks for external source lookups.
void CompleteTagDecl(clang::TagDecl *);
void CompleteObjCInterfaceDecl(clang::ObjCInterfaceDecl *);
bool LayoutRecordType(
const clang::RecordDecl *record_decl, uint64_t &size, uint64_t &alignment,
llvm::DenseMap<const clang::FieldDecl *, uint64_t> &field_offsets,
llvm::DenseMap<const clang::CXXRecordDecl *, clang::CharUnits>
&base_offsets,
llvm::DenseMap<const clang::CXXRecordDecl *, clang::CharUnits>
&vbase_offsets);
/// Creates a CompilerDecl from the given Decl with the current
/// TypeSystemClang instance as its typesystem.
/// The Decl has to come from the ASTContext of this
/// TypeSystemClang.
CompilerDecl GetCompilerDecl(clang::Decl *decl) {
assert(&decl->getASTContext() == &getASTContext() &&
"CreateCompilerDecl for Decl from wrong ASTContext?");
return CompilerDecl(this, decl);
}
// CompilerDecl override functions
ConstString DeclGetName(void *opaque_decl) override;
ConstString DeclGetMangledName(void *opaque_decl) override;
CompilerDeclContext DeclGetDeclContext(void *opaque_decl) override;
CompilerType DeclGetFunctionReturnType(void *opaque_decl) override;
size_t DeclGetFunctionNumArguments(void *opaque_decl) override;
CompilerType DeclGetFunctionArgumentType(void *opaque_decl,
size_t arg_idx) override;
CompilerType GetTypeForDecl(void *opaque_decl) override;
// CompilerDeclContext override functions
/// Creates a CompilerDeclContext from the given DeclContext
/// with the current TypeSystemClang instance as its typesystem.
/// The DeclContext has to come from the ASTContext of this
/// TypeSystemClang.
CompilerDeclContext CreateDeclContext(clang::DeclContext *ctx);
/// Set the owning module for \p decl.
static void SetOwningModule(clang::Decl *decl,
OptionalClangModuleID owning_module);
std::vector<CompilerDecl>
DeclContextFindDeclByName(void *opaque_decl_ctx, ConstString name,
const bool ignore_using_decls) override;
ConstString DeclContextGetName(void *opaque_decl_ctx) override;
ConstString DeclContextGetScopeQualifiedName(void *opaque_decl_ctx) override;
bool DeclContextIsClassMethod(void *opaque_decl_ctx,
lldb::LanguageType *language_ptr,
bool *is_instance_method_ptr,
ConstString *language_object_name_ptr) override;
bool DeclContextIsContainedInLookup(void *opaque_decl_ctx,
void *other_opaque_decl_ctx) override;
// Clang specific clang::DeclContext functions
static clang::DeclContext *
DeclContextGetAsDeclContext(const CompilerDeclContext &dc);
static clang::ObjCMethodDecl *
DeclContextGetAsObjCMethodDecl(const CompilerDeclContext &dc);
static clang::CXXMethodDecl *
DeclContextGetAsCXXMethodDecl(const CompilerDeclContext &dc);
static clang::FunctionDecl *
DeclContextGetAsFunctionDecl(const CompilerDeclContext &dc);
static clang::NamespaceDecl *
DeclContextGetAsNamespaceDecl(const CompilerDeclContext &dc);
static ClangASTMetadata *DeclContextGetMetaData(const CompilerDeclContext &dc,
const clang::Decl *object);
static clang::ASTContext *
DeclContextGetTypeSystemClang(const CompilerDeclContext &dc);
// Tests
#ifndef NDEBUG
bool Verify(lldb::opaque_compiler_type_t type) override;
#endif
-
+
bool IsArrayType(lldb::opaque_compiler_type_t type,
CompilerType *element_type, uint64_t *size,
bool *is_incomplete) override;
bool IsVectorType(lldb::opaque_compiler_type_t type,
CompilerType *element_type, uint64_t *size) override;
bool IsAggregateType(lldb::opaque_compiler_type_t type) override;
bool IsAnonymousType(lldb::opaque_compiler_type_t type) override;
bool IsBeingDefined(lldb::opaque_compiler_type_t type) override;
bool IsCharType(lldb::opaque_compiler_type_t type) override;
bool IsCompleteType(lldb::opaque_compiler_type_t type) override;
bool IsConst(lldb::opaque_compiler_type_t type) override;
bool IsCStringType(lldb::opaque_compiler_type_t type,
uint32_t &length) override;
static bool IsCXXClassType(const CompilerType &type);
bool IsDefined(lldb::opaque_compiler_type_t type) override;
bool IsFloatingPointType(lldb::opaque_compiler_type_t type, uint32_t &count,
bool &is_complex) override;
bool IsFunctionType(lldb::opaque_compiler_type_t type) override;
uint32_t IsHomogeneousAggregate(lldb::opaque_compiler_type_t type,
CompilerType *base_type_ptr) override;
size_t
GetNumberOfFunctionArguments(lldb::opaque_compiler_type_t type) override;
CompilerType GetFunctionArgumentAtIndex(lldb::opaque_compiler_type_t type,
const size_t index) override;
bool IsFunctionPointerType(lldb::opaque_compiler_type_t type) override;
bool IsBlockPointerType(lldb::opaque_compiler_type_t type,
CompilerType *function_pointer_type_ptr) override;
bool IsIntegerType(lldb::opaque_compiler_type_t type,
bool &is_signed) override;
bool IsEnumerationType(lldb::opaque_compiler_type_t type,
bool &is_signed) override;
bool IsScopedEnumerationType(lldb::opaque_compiler_type_t type) override;
static bool IsObjCClassType(const CompilerType &type);
static bool IsObjCClassTypeAndHasIVars(const CompilerType &type,
bool check_superclass);
static bool IsObjCObjectOrInterfaceType(const CompilerType &type);
static bool IsObjCObjectPointerType(const CompilerType &type,
CompilerType *target_type = nullptr);
bool IsPolymorphicClass(lldb::opaque_compiler_type_t type) override;
static bool IsClassType(lldb::opaque_compiler_type_t type);
static bool IsEnumType(lldb::opaque_compiler_type_t type);
bool IsPossibleDynamicType(lldb::opaque_compiler_type_t type,
CompilerType *target_type, // Can pass nullptr
bool check_cplusplus, bool check_objc) override;
bool IsRuntimeGeneratedType(lldb::opaque_compiler_type_t type) override;
bool IsPointerType(lldb::opaque_compiler_type_t type,
CompilerType *pointee_type) override;
bool IsPointerOrReferenceType(lldb::opaque_compiler_type_t type,
CompilerType *pointee_type) override;
bool IsReferenceType(lldb::opaque_compiler_type_t type,
CompilerType *pointee_type, bool *is_rvalue) override;
bool IsScalarType(lldb::opaque_compiler_type_t type) override;
bool IsTypedefType(lldb::opaque_compiler_type_t type) override;
bool IsVoidType(lldb::opaque_compiler_type_t type) override;
bool CanPassInRegisters(const CompilerType &type) override;
bool SupportsLanguage(lldb::LanguageType language) override;
static llvm::Optional<std::string> GetCXXClassName(const CompilerType &type);
// Type Completion
bool GetCompleteType(lldb::opaque_compiler_type_t type) override;
// Accessors
ConstString GetTypeName(lldb::opaque_compiler_type_t type) override;
ConstString GetDisplayTypeName(lldb::opaque_compiler_type_t type) override;
uint32_t GetTypeInfo(lldb::opaque_compiler_type_t type,
CompilerType *pointee_or_element_compiler_type) override;
lldb::LanguageType
GetMinimumLanguage(lldb::opaque_compiler_type_t type) override;
lldb::TypeClass GetTypeClass(lldb::opaque_compiler_type_t type) override;
unsigned GetTypeQualifiers(lldb::opaque_compiler_type_t type) override;
// Creating related types
CompilerType GetArrayElementType(lldb::opaque_compiler_type_t type,
ExecutionContextScope *exe_scope) override;
CompilerType GetArrayType(lldb::opaque_compiler_type_t type,
uint64_t size) override;
CompilerType GetCanonicalType(lldb::opaque_compiler_type_t type) override;
CompilerType
GetFullyUnqualifiedType(lldb::opaque_compiler_type_t type) override;
CompilerType
GetEnumerationIntegerType(lldb::opaque_compiler_type_t type) override;
// Returns -1 if this isn't a function of if the function doesn't have a
// prototype Returns a value >= 0 if there is a prototype.
int GetFunctionArgumentCount(lldb::opaque_compiler_type_t type) override;
CompilerType GetFunctionArgumentTypeAtIndex(lldb::opaque_compiler_type_t type,
size_t idx) override;
CompilerType
GetFunctionReturnType(lldb::opaque_compiler_type_t type) override;
size_t GetNumMemberFunctions(lldb::opaque_compiler_type_t type) override;
TypeMemberFunctionImpl
GetMemberFunctionAtIndex(lldb::opaque_compiler_type_t type,
size_t idx) override;
CompilerType GetNonReferenceType(lldb::opaque_compiler_type_t type) override;
CompilerType GetPointeeType(lldb::opaque_compiler_type_t type) override;
CompilerType GetPointerType(lldb::opaque_compiler_type_t type) override;
CompilerType
GetLValueReferenceType(lldb::opaque_compiler_type_t type) override;
CompilerType
GetRValueReferenceType(lldb::opaque_compiler_type_t type) override;
CompilerType GetAtomicType(lldb::opaque_compiler_type_t type) override;
CompilerType AddConstModifier(lldb::opaque_compiler_type_t type) override;
CompilerType AddVolatileModifier(lldb::opaque_compiler_type_t type) override;
CompilerType AddRestrictModifier(lldb::opaque_compiler_type_t type) override;
/// Using the current type, create a new typedef to that type using
/// "typedef_name" as the name and "decl_ctx" as the decl context.
/// \param opaque_payload is an opaque TypePayloadClang.
CompilerType CreateTypedef(lldb::opaque_compiler_type_t type,
const char *name,
const CompilerDeclContext &decl_ctx,
uint32_t opaque_payload) override;
// If the current object represents a typedef type, get the underlying type
CompilerType GetTypedefedType(lldb::opaque_compiler_type_t type) override;
// Create related types using the current type's AST
CompilerType GetBasicTypeFromAST(lldb::BasicType basic_type) override;
// Exploring the type
const llvm::fltSemantics &GetFloatTypeSemantics(size_t byte_size) override;
llvm::Optional<uint64_t> GetByteSize(lldb::opaque_compiler_type_t type,
ExecutionContextScope *exe_scope) {
if (llvm::Optional<uint64_t> bit_size = GetBitSize(type, exe_scope))
return (*bit_size + 7) / 8;
return llvm::None;
}
llvm::Optional<uint64_t>
GetBitSize(lldb::opaque_compiler_type_t type,
ExecutionContextScope *exe_scope) override;
lldb::Encoding GetEncoding(lldb::opaque_compiler_type_t type,
uint64_t &count) override;
lldb::Format GetFormat(lldb::opaque_compiler_type_t type) override;
llvm::Optional<size_t>
GetTypeBitAlign(lldb::opaque_compiler_type_t type,
ExecutionContextScope *exe_scope) override;
uint32_t GetNumChildren(lldb::opaque_compiler_type_t type,
bool omit_empty_base_classes,
const ExecutionContext *exe_ctx) override;
CompilerType GetBuiltinTypeByName(ConstString name) override;
lldb::BasicType
GetBasicTypeEnumeration(lldb::opaque_compiler_type_t type) override;
static lldb::BasicType
GetBasicTypeEnumeration(lldb::opaque_compiler_type_t type,
ConstString name);
void ForEachEnumerator(
lldb::opaque_compiler_type_t type,
std::function<bool(const CompilerType &integer_type,
ConstString name,
const llvm::APSInt &value)> const &callback) override;
uint32_t GetNumFields(lldb::opaque_compiler_type_t type) override;
CompilerType GetFieldAtIndex(lldb::opaque_compiler_type_t type, size_t idx,
std::string &name, uint64_t *bit_offset_ptr,
uint32_t *bitfield_bit_size_ptr,
bool *is_bitfield_ptr) override;
uint32_t GetNumDirectBaseClasses(lldb::opaque_compiler_type_t type) override;
uint32_t GetNumVirtualBaseClasses(lldb::opaque_compiler_type_t type) override;
CompilerType GetDirectBaseClassAtIndex(lldb::opaque_compiler_type_t type,
size_t idx,
uint32_t *bit_offset_ptr) override;
CompilerType GetVirtualBaseClassAtIndex(lldb::opaque_compiler_type_t type,
size_t idx,
uint32_t *bit_offset_ptr) override;
static uint32_t GetNumPointeeChildren(clang::QualType type);
CompilerType GetChildCompilerTypeAtIndex(
lldb::opaque_compiler_type_t type, ExecutionContext *exe_ctx, size_t idx,
bool transparent_pointers, bool omit_empty_base_classes,
bool ignore_array_bounds, std::string &child_name,
uint32_t &child_byte_size, int32_t &child_byte_offset,
uint32_t &child_bitfield_bit_size, uint32_t &child_bitfield_bit_offset,
bool &child_is_base_class, bool &child_is_deref_of_parent,
ValueObject *valobj, uint64_t &language_flags) override;
// Lookup a child given a name. This function will match base class names and
// member member names in "clang_type" only, not descendants.
uint32_t GetIndexOfChildWithName(lldb::opaque_compiler_type_t type,
const char *name,
bool omit_empty_base_classes) override;
// Lookup a child member given a name. This function will match member names
// only and will descend into "clang_type" children in search for the first
// member in this class, or any base class that matches "name".
// TODO: Return all matches for a given name by returning a
// vector<vector<uint32_t>>
// so we catch all names that match a given child name, not just the first.
size_t
GetIndexOfChildMemberWithName(lldb::opaque_compiler_type_t type,
const char *name, bool omit_empty_base_classes,
std::vector<uint32_t> &child_indexes) override;
- size_t GetNumTemplateArguments(lldb::opaque_compiler_type_t type) override;
+ size_t GetNumTemplateArguments(lldb::opaque_compiler_type_t type,
+ bool expand_pack) override;
lldb::TemplateArgumentKind
- GetTemplateArgumentKind(lldb::opaque_compiler_type_t type,
- size_t idx) override;
+ GetTemplateArgumentKind(lldb::opaque_compiler_type_t type, size_t idx,
+ bool expand_pack) override;
CompilerType GetTypeTemplateArgument(lldb::opaque_compiler_type_t type,
- size_t idx) override;
+ size_t idx, bool expand_pack) override;
llvm::Optional<CompilerType::IntegralTemplateArgument>
- GetIntegralTemplateArgument(lldb::opaque_compiler_type_t type,
- size_t idx) override;
+ GetIntegralTemplateArgument(lldb::opaque_compiler_type_t type, size_t idx,
+ bool expand_pack) override;
CompilerType GetTypeForFormatters(void *type) override;
#define LLDB_INVALID_DECL_LEVEL UINT32_MAX
// LLDB_INVALID_DECL_LEVEL is returned by CountDeclLevels if child_decl_ctx
// could not be found in decl_ctx.
uint32_t CountDeclLevels(clang::DeclContext *frame_decl_ctx,
clang::DeclContext *child_decl_ctx,
ConstString *child_name = nullptr,
CompilerType *child_type = nullptr);
// Modifying RecordType
static clang::FieldDecl *AddFieldToRecordType(const CompilerType &type,
llvm::StringRef name,
const CompilerType &field_type,
lldb::AccessType access,
uint32_t bitfield_bit_size);
static void BuildIndirectFields(const CompilerType &type);
static void SetIsPacked(const CompilerType &type);
static clang::VarDecl *AddVariableToRecordType(const CompilerType &type,
llvm::StringRef name,
const CompilerType &var_type,
lldb::AccessType access);
/// Initializes a variable with an integer value.
/// \param var The variable to initialize. Must not already have an
/// initializer and must have an integer or enum type.
/// \param init_value The integer value that the variable should be
/// initialized to. Has to match the bit width of the
/// variable type.
static void SetIntegerInitializerForVariable(clang::VarDecl *var,
const llvm::APInt &init_value);
/// Initializes a variable with a floating point value.
/// \param var The variable to initialize. Must not already have an
/// initializer and must have a floating point type.
/// \param init_value The float value that the variable should be
/// initialized to.
static void
SetFloatingInitializerForVariable(clang::VarDecl *var,
const llvm::APFloat &init_value);
clang::CXXMethodDecl *AddMethodToCXXRecordType(
lldb::opaque_compiler_type_t type, llvm::StringRef name,
const char *mangled_name, const CompilerType &method_type,
lldb::AccessType access, bool is_virtual, bool is_static, bool is_inline,
bool is_explicit, bool is_attr_used, bool is_artificial);
void AddMethodOverridesForCXXRecordType(lldb::opaque_compiler_type_t type);
// C++ Base Classes
std::unique_ptr<clang::CXXBaseSpecifier>
CreateBaseClassSpecifier(lldb::opaque_compiler_type_t type,
lldb::AccessType access, bool is_virtual,
bool base_of_class);
bool TransferBaseClasses(
lldb::opaque_compiler_type_t type,
std::vector<std::unique_ptr<clang::CXXBaseSpecifier>> bases);
static bool SetObjCSuperClass(const CompilerType &type,
const CompilerType &superclass_compiler_type);
static bool AddObjCClassProperty(const CompilerType &type,
const char *property_name,
const CompilerType &property_compiler_type,
clang::ObjCIvarDecl *ivar_decl,
const char *property_setter_name,
const char *property_getter_name,
uint32_t property_attributes,
ClangASTMetadata *metadata);
static clang::ObjCMethodDecl *AddMethodToObjCObjectType(
const CompilerType &type,
const char *name, // the full symbol name as seen in the symbol table
// (lldb::opaque_compiler_type_t type, "-[NString
// stringWithCString:]")
const CompilerType &method_compiler_type, lldb::AccessType access,
bool is_artificial, bool is_variadic, bool is_objc_direct_call);
static bool SetHasExternalStorage(lldb::opaque_compiler_type_t type,
bool has_extern);
// Tag Declarations
static bool StartTagDeclarationDefinition(const CompilerType &type);
static bool CompleteTagDeclarationDefinition(const CompilerType &type);
// Modifying Enumeration types
clang::EnumConstantDecl *AddEnumerationValueToEnumerationType(
const CompilerType &enum_type, const Declaration &decl, const char *name,
int64_t enum_value, uint32_t enum_value_bit_size);
clang::EnumConstantDecl *AddEnumerationValueToEnumerationType(
const CompilerType &enum_type, const Declaration &decl, const char *name,
const llvm::APSInt &value);
/// Returns the underlying integer type for an enum type. If the given type
/// is invalid or not an enum-type, the function returns an invalid
/// CompilerType.
CompilerType GetEnumerationIntegerType(CompilerType type);
// Pointers & References
// Call this function using the class type when you want to make a member
// pointer type to pointee_type.
static CompilerType CreateMemberPointerType(const CompilerType &type,
const CompilerType &pointee_type);
// Dumping types
#ifndef NDEBUG
/// Convenience LLVM-style dump method for use in the debugger only.
/// In contrast to the other \p Dump() methods this directly invokes
/// \p clang::QualType::dump().
LLVM_DUMP_METHOD void dump(lldb::opaque_compiler_type_t type) const override;
#endif
/// \see lldb_private::TypeSystem::Dump
void Dump(llvm::raw_ostream &output) override;
/// Dump clang AST types from the symbol file.
///
/// \param[in] s
/// A stream to send the dumped AST node(s) to
/// \param[in] symbol_name
/// The name of the symbol to dump, if it is empty dump all the symbols
void DumpFromSymbolFile(Stream &s, llvm::StringRef symbol_name);
void DumpValue(lldb::opaque_compiler_type_t type, ExecutionContext *exe_ctx,
Stream *s, lldb::Format format, const DataExtractor &data,
lldb::offset_t data_offset, size_t data_byte_size,
uint32_t bitfield_bit_size, uint32_t bitfield_bit_offset,
bool show_types, bool show_summary, bool verbose,
uint32_t depth) override;
bool DumpTypeValue(lldb::opaque_compiler_type_t type, Stream *s,
lldb::Format format, const DataExtractor &data,
lldb::offset_t data_offset, size_t data_byte_size,
uint32_t bitfield_bit_size, uint32_t bitfield_bit_offset,
ExecutionContextScope *exe_scope) override;
void DumpSummary(lldb::opaque_compiler_type_t type, ExecutionContext *exe_ctx,
Stream *s, const DataExtractor &data,
lldb::offset_t data_offset, size_t data_byte_size) override;
void DumpTypeDescription(
lldb::opaque_compiler_type_t type,
lldb::DescriptionLevel level = lldb::eDescriptionLevelFull) override;
void DumpTypeDescription(
lldb::opaque_compiler_type_t type, Stream *s,
lldb::DescriptionLevel level = lldb::eDescriptionLevelFull) override;
static void DumpTypeName(const CompilerType &type);
static clang::EnumDecl *GetAsEnumDecl(const CompilerType &type);
static clang::RecordDecl *GetAsRecordDecl(const CompilerType &type);
static clang::TagDecl *GetAsTagDecl(const CompilerType &type);
static clang::TypedefNameDecl *GetAsTypedefDecl(const CompilerType &type);
static clang::CXXRecordDecl *
GetAsCXXRecordDecl(lldb::opaque_compiler_type_t type);
static clang::ObjCInterfaceDecl *
GetAsObjCInterfaceDecl(const CompilerType &type);
clang::ClassTemplateDecl *ParseClassTemplateDecl(
clang::DeclContext *decl_ctx, OptionalClangModuleID owning_module,
lldb::AccessType access_type, const char *parent_name, int tag_decl_kind,
const TypeSystemClang::TemplateParameterInfos &template_param_infos);
clang::BlockDecl *CreateBlockDeclaration(clang::DeclContext *ctx,
OptionalClangModuleID owning_module);
clang::UsingDirectiveDecl *
CreateUsingDirectiveDeclaration(clang::DeclContext *decl_ctx,
OptionalClangModuleID owning_module,
clang::NamespaceDecl *ns_decl);
clang::UsingDecl *CreateUsingDeclaration(clang::DeclContext *current_decl_ctx,
OptionalClangModuleID owning_module,
clang::NamedDecl *target);
clang::VarDecl *CreateVariableDeclaration(clang::DeclContext *decl_context,
OptionalClangModuleID owning_module,
const char *name,
clang::QualType type);
static lldb::opaque_compiler_type_t
GetOpaqueCompilerType(clang::ASTContext *ast, lldb::BasicType basic_type);
static clang::QualType GetQualType(lldb::opaque_compiler_type_t type) {
if (type)
return clang::QualType::getFromOpaquePtr(type);
return clang::QualType();
}
static clang::QualType
GetCanonicalQualType(lldb::opaque_compiler_type_t type) {
if (type)
return clang::QualType::getFromOpaquePtr(type).getCanonicalType();
return clang::QualType();
}
clang::DeclarationName
GetDeclarationName(llvm::StringRef name,
const CompilerType &function_clang_type);
clang::LangOptions *GetLangOpts() const {
return m_language_options_up.get();
}
clang::SourceManager *GetSourceMgr() const {
return m_source_manager_up.get();
}
private:
/// Returns the PrintingPolicy used when generating the internal type names.
/// These type names are mostly used for the formatter selection.
clang::PrintingPolicy GetTypePrintingPolicy();
/// Returns the internal type name for the given NamedDecl using the
/// type printing policy.
std::string GetTypeNameForDecl(const clang::NamedDecl *named_decl);
const clang::ClassTemplateSpecializationDecl *
GetAsTemplateSpecialization(lldb::opaque_compiler_type_t type);
// Classes that inherit from TypeSystemClang can see and modify these
std::string m_target_triple;
std::unique_ptr<clang::ASTContext> m_ast_up;
std::unique_ptr<clang::LangOptions> m_language_options_up;
std::unique_ptr<clang::FileManager> m_file_manager_up;
std::unique_ptr<clang::SourceManager> m_source_manager_up;
std::unique_ptr<clang::DiagnosticsEngine> m_diagnostics_engine_up;
std::unique_ptr<clang::DiagnosticConsumer> m_diagnostic_consumer_up;
std::shared_ptr<clang::TargetOptions> m_target_options_rp;
std::unique_ptr<clang::TargetInfo> m_target_info_up;
std::unique_ptr<clang::IdentifierTable> m_identifier_table_up;
std::unique_ptr<clang::SelectorTable> m_selector_table_up;
std::unique_ptr<clang::Builtin::Context> m_builtins_up;
std::unique_ptr<clang::HeaderSearch> m_header_search_up;
std::unique_ptr<clang::ModuleMap> m_module_map_up;
std::unique_ptr<DWARFASTParserClang> m_dwarf_ast_parser_up;
std::unique_ptr<PDBASTParser> m_pdb_ast_parser_up;
std::unique_ptr<clang::MangleContext> m_mangle_ctx_up;
uint32_t m_pointer_byte_size = 0;
bool m_ast_owned = false;
/// A string describing what this TypeSystemClang represents (e.g.,
/// AST for debug information, an expression, some other utility ClangAST).
/// Useful for logging and debugging.
std::string m_display_name;
typedef llvm::DenseMap<const clang::Decl *, ClangASTMetadata> DeclMetadataMap;
/// Maps Decls to their associated ClangASTMetadata.
DeclMetadataMap m_decl_metadata;
typedef llvm::DenseMap<const clang::Type *, ClangASTMetadata> TypeMetadataMap;
/// Maps Types to their associated ClangASTMetadata.
TypeMetadataMap m_type_metadata;
typedef llvm::DenseMap<const clang::CXXRecordDecl *, clang::AccessSpecifier>
CXXRecordDeclAccessMap;
/// Maps CXXRecordDecl to their most recent added method/field's
/// AccessSpecifier.
CXXRecordDeclAccessMap m_cxx_record_decl_access;
/// The sema associated that is currently used to build this ASTContext.
/// May be null if we are already done parsing this ASTContext or the
/// ASTContext wasn't created by parsing source code.
clang::Sema *m_sema = nullptr;
// For TypeSystemClang only
TypeSystemClang(const TypeSystemClang &);
const TypeSystemClang &operator=(const TypeSystemClang &);
/// Creates the internal ASTContext.
void CreateASTContext();
void SetTargetTriple(llvm::StringRef target_triple);
};
/// The TypeSystemClang instance used for the scratch ASTContext in a
/// lldb::Target.
class ScratchTypeSystemClang : public TypeSystemClang {
/// LLVM RTTI support
static char ID;
public:
ScratchTypeSystemClang(Target &target, llvm::Triple triple);
~ScratchTypeSystemClang() override = default;
void Finalize() override;
/// The different kinds of isolated ASTs within the scratch TypeSystem.
///
/// These ASTs are isolated from the main scratch AST and are each
/// dedicated to a special language option/feature that makes the contained
/// AST nodes incompatible with other AST nodes.
enum IsolatedASTKind {
/// The isolated AST for declarations/types from expressions that imported
/// type information from a C++ module. The templates from a C++ module
/// often conflict with the templates we generate from debug information,
/// so we put these types in their own AST.
CppModules
};
/// Alias for requesting the default scratch TypeSystemClang in GetForTarget.
// This isn't constexpr as gtest/llvm::Optional comparison logic is trying
// to get the address of this for pretty-printing.
static const llvm::NoneType DefaultAST;
/// Infers the appropriate sub-AST from Clang's LangOptions.
static llvm::Optional<IsolatedASTKind>
InferIsolatedASTKindFromLangOpts(const clang::LangOptions &l) {
// If modules are activated we want the dedicated C++ module AST.
// See IsolatedASTKind::CppModules for more info.
if (l.Modules)
return IsolatedASTKind::CppModules;
return DefaultAST;
}
/// Returns the scratch TypeSystemClang for the given target.
/// \param target The Target which scratch TypeSystemClang should be returned.
/// \param ast_kind Allows requesting a specific sub-AST instead of the
/// default scratch AST. See also `IsolatedASTKind`.
/// \param create_on_demand If the scratch TypeSystemClang instance can be
/// created by this call if it doesn't exist yet. If it doesn't exist yet and
/// this parameter is false, this function returns a nullptr.
/// \return The scratch type system of the target or a nullptr in case an
/// error occurred.
static TypeSystemClang *
GetForTarget(Target &target,
llvm::Optional<IsolatedASTKind> ast_kind = DefaultAST,
bool create_on_demand = true);
/// Returns the scratch TypeSystemClang for the given target. The returned
/// TypeSystemClang will be the scratch AST or a sub-AST, depending on which
/// fits best to the passed LangOptions.
/// \param target The Target which scratch TypeSystemClang should be returned.
/// \param lang_opts The LangOptions of a clang ASTContext that the caller
/// wants to export type information from. This is used to
/// find the best matching sub-AST that will be returned.
static TypeSystemClang *GetForTarget(Target &target,
const clang::LangOptions &lang_opts) {
return GetForTarget(target, InferIsolatedASTKindFromLangOpts(lang_opts));
}
/// \see lldb_private::TypeSystem::Dump
void Dump(llvm::raw_ostream &output) override;
UserExpression *
GetUserExpression(llvm::StringRef expr, llvm::StringRef prefix,
lldb::LanguageType language,
Expression::ResultType desired_type,
const EvaluateExpressionOptions &options,
ValueObject *ctx_obj) override;
FunctionCaller *GetFunctionCaller(const CompilerType &return_type,
const Address &function_address,
const ValueList &arg_value_list,
const char *name) override;
std::unique_ptr<UtilityFunction>
CreateUtilityFunction(std::string text, std::string name) override;
PersistentExpressionState *GetPersistentExpressionState() override;
/// Unregisters the given ASTContext as a source from the scratch AST (and
/// all sub-ASTs).
/// \see ClangASTImporter::ForgetSource
void ForgetSource(clang::ASTContext *src_ctx, ClangASTImporter &importer);
// llvm casting support
bool isA(const void *ClassID) const override {
return ClassID == &ID || TypeSystemClang::isA(ClassID);
}
static bool classof(const TypeSystem *ts) { return ts->isA(&ID); }
private:
std::unique_ptr<ClangASTSource> CreateASTSource();
/// Returns the requested sub-AST.
/// Will lazily create the sub-AST if it hasn't been created before.
TypeSystemClang &GetIsolatedAST(IsolatedASTKind feature);
/// The target triple.
/// This was potentially adjusted and might not be identical to the triple
/// of `m_target_wp`.
llvm::Triple m_triple;
lldb::TargetWP m_target_wp;
/// The persistent variables associated with this process for the expression
/// parser.
std::unique_ptr<ClangPersistentVariables> m_persistent_variables;
/// The ExternalASTSource that performs lookups and completes minimally
/// imported types.
std::unique_ptr<ClangASTSource> m_scratch_ast_source_up;
// FIXME: GCC 5.x doesn't support enum as map keys.
typedef int IsolatedASTKey;
/// Map from IsolatedASTKind to their actual TypeSystemClang instance.
/// This map is lazily filled with sub-ASTs and should be accessed via
/// `GetSubAST` (which lazily fills this map).
std::unordered_map<IsolatedASTKey, std::unique_ptr<TypeSystemClang>>
m_isolated_asts;
};
} // namespace lldb_private
#endif // LLDB_SOURCE_PLUGINS_TYPESYSTEM_CLANG_TYPESYSTEMCLANG_H
diff --git a/lldb/source/Symbol/CompilerType.cpp b/lldb/source/Symbol/CompilerType.cpp
index ac98352c235e..bef456583687 100644
--- a/lldb/source/Symbol/CompilerType.cpp
+++ b/lldb/source/Symbol/CompilerType.cpp
@@ -1,896 +1,898 @@
//===-- CompilerType.cpp --------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "lldb/Symbol/CompilerType.h"
#include "lldb/Core/Debugger.h"
#include "lldb/Core/StreamFile.h"
#include "lldb/Symbol/Type.h"
#include "lldb/Target/ExecutionContext.h"
#include "lldb/Target/Process.h"
#include "lldb/Utility/ConstString.h"
#include "lldb/Utility/DataBufferHeap.h"
#include "lldb/Utility/DataExtractor.h"
#include "lldb/Utility/Scalar.h"
#include "lldb/Utility/Stream.h"
#include "lldb/Utility/StreamString.h"
#include <iterator>
#include <mutex>
using namespace lldb;
using namespace lldb_private;
// Tests
bool CompilerType::IsAggregateType() const {
if (IsValid())
return m_type_system->IsAggregateType(m_type);
return false;
}
bool CompilerType::IsAnonymousType() const {
if (IsValid())
return m_type_system->IsAnonymousType(m_type);
return false;
}
bool CompilerType::IsScopedEnumerationType() const {
if (IsValid())
return m_type_system->IsScopedEnumerationType(m_type);
return false;
}
bool CompilerType::IsArrayType(CompilerType *element_type_ptr, uint64_t *size,
bool *is_incomplete) const {
if (IsValid())
return m_type_system->IsArrayType(m_type, element_type_ptr, size,
is_incomplete);
if (element_type_ptr)
element_type_ptr->Clear();
if (size)
*size = 0;
if (is_incomplete)
*is_incomplete = false;
return false;
}
bool CompilerType::IsVectorType(CompilerType *element_type,
uint64_t *size) const {
if (IsValid())
return m_type_system->IsVectorType(m_type, element_type, size);
return false;
}
bool CompilerType::IsRuntimeGeneratedType() const {
if (IsValid())
return m_type_system->IsRuntimeGeneratedType(m_type);
return false;
}
bool CompilerType::IsCharType() const {
if (IsValid())
return m_type_system->IsCharType(m_type);
return false;
}
bool CompilerType::IsCompleteType() const {
if (IsValid())
return m_type_system->IsCompleteType(m_type);
return false;
}
bool CompilerType::IsConst() const {
if (IsValid())
return m_type_system->IsConst(m_type);
return false;
}
bool CompilerType::IsCStringType(uint32_t &length) const {
if (IsValid())
return m_type_system->IsCStringType(m_type, length);
return false;
}
bool CompilerType::IsFunctionType() const {
if (IsValid())
return m_type_system->IsFunctionType(m_type);
return false;
}
// Used to detect "Homogeneous Floating-point Aggregates"
uint32_t
CompilerType::IsHomogeneousAggregate(CompilerType *base_type_ptr) const {
if (IsValid())
return m_type_system->IsHomogeneousAggregate(m_type, base_type_ptr);
return 0;
}
size_t CompilerType::GetNumberOfFunctionArguments() const {
if (IsValid())
return m_type_system->GetNumberOfFunctionArguments(m_type);
return 0;
}
CompilerType
CompilerType::GetFunctionArgumentAtIndex(const size_t index) const {
if (IsValid())
return m_type_system->GetFunctionArgumentAtIndex(m_type, index);
return CompilerType();
}
bool CompilerType::IsFunctionPointerType() const {
if (IsValid())
return m_type_system->IsFunctionPointerType(m_type);
return false;
}
bool CompilerType::IsBlockPointerType(
CompilerType *function_pointer_type_ptr) const {
if (IsValid())
return m_type_system->IsBlockPointerType(m_type, function_pointer_type_ptr);
return false;
}
bool CompilerType::IsIntegerType(bool &is_signed) const {
if (IsValid())
return m_type_system->IsIntegerType(m_type, is_signed);
return false;
}
bool CompilerType::IsEnumerationType(bool &is_signed) const {
if (IsValid())
return m_type_system->IsEnumerationType(m_type, is_signed);
return false;
}
bool CompilerType::IsIntegerOrEnumerationType(bool &is_signed) const {
return IsIntegerType(is_signed) || IsEnumerationType(is_signed);
}
bool CompilerType::IsPointerType(CompilerType *pointee_type) const {
if (IsValid()) {
return m_type_system->IsPointerType(m_type, pointee_type);
}
if (pointee_type)
pointee_type->Clear();
return false;
}
bool CompilerType::IsPointerOrReferenceType(CompilerType *pointee_type) const {
if (IsValid()) {
return m_type_system->IsPointerOrReferenceType(m_type, pointee_type);
}
if (pointee_type)
pointee_type->Clear();
return false;
}
bool CompilerType::IsReferenceType(CompilerType *pointee_type,
bool *is_rvalue) const {
if (IsValid()) {
return m_type_system->IsReferenceType(m_type, pointee_type, is_rvalue);
}
if (pointee_type)
pointee_type->Clear();
return false;
}
bool CompilerType::ShouldTreatScalarValueAsAddress() const {
if (IsValid())
return m_type_system->ShouldTreatScalarValueAsAddress(m_type);
return false;
}
bool CompilerType::IsFloatingPointType(uint32_t &count,
bool &is_complex) const {
if (IsValid()) {
return m_type_system->IsFloatingPointType(m_type, count, is_complex);
}
count = 0;
is_complex = false;
return false;
}
bool CompilerType::IsDefined() const {
if (IsValid())
return m_type_system->IsDefined(m_type);
return true;
}
bool CompilerType::IsPolymorphicClass() const {
if (IsValid()) {
return m_type_system->IsPolymorphicClass(m_type);
}
return false;
}
bool CompilerType::IsPossibleDynamicType(CompilerType *dynamic_pointee_type,
bool check_cplusplus,
bool check_objc) const {
if (IsValid())
return m_type_system->IsPossibleDynamicType(m_type, dynamic_pointee_type,
check_cplusplus, check_objc);
return false;
}
bool CompilerType::IsScalarType() const {
if (!IsValid())
return false;
return m_type_system->IsScalarType(m_type);
}
bool CompilerType::IsTypedefType() const {
if (!IsValid())
return false;
return m_type_system->IsTypedefType(m_type);
}
bool CompilerType::IsVoidType() const {
if (!IsValid())
return false;
return m_type_system->IsVoidType(m_type);
}
bool CompilerType::IsPointerToScalarType() const {
if (!IsValid())
return false;
return IsPointerType() && GetPointeeType().IsScalarType();
}
bool CompilerType::IsArrayOfScalarType() const {
CompilerType element_type;
if (IsArrayType(&element_type))
return element_type.IsScalarType();
return false;
}
bool CompilerType::IsBeingDefined() const {
if (!IsValid())
return false;
return m_type_system->IsBeingDefined(m_type);
}
// Type Completion
bool CompilerType::GetCompleteType() const {
if (!IsValid())
return false;
return m_type_system->GetCompleteType(m_type);
}
// AST related queries
size_t CompilerType::GetPointerByteSize() const {
if (m_type_system)
return m_type_system->GetPointerByteSize();
return 0;
}
ConstString CompilerType::GetTypeName() const {
if (IsValid()) {
return m_type_system->GetTypeName(m_type);
}
return ConstString("<invalid>");
}
ConstString CompilerType::GetDisplayTypeName() const {
if (IsValid())
return m_type_system->GetDisplayTypeName(m_type);
return ConstString("<invalid>");
}
uint32_t CompilerType::GetTypeInfo(
CompilerType *pointee_or_element_compiler_type) const {
if (!IsValid())
return 0;
return m_type_system->GetTypeInfo(m_type, pointee_or_element_compiler_type);
}
lldb::LanguageType CompilerType::GetMinimumLanguage() {
if (!IsValid())
return lldb::eLanguageTypeC;
return m_type_system->GetMinimumLanguage(m_type);
}
lldb::TypeClass CompilerType::GetTypeClass() const {
if (!IsValid())
return lldb::eTypeClassInvalid;
return m_type_system->GetTypeClass(m_type);
}
void CompilerType::SetCompilerType(TypeSystem *type_system,
lldb::opaque_compiler_type_t type) {
m_type_system = type_system;
m_type = type;
}
unsigned CompilerType::GetTypeQualifiers() const {
if (IsValid())
return m_type_system->GetTypeQualifiers(m_type);
return 0;
}
// Creating related types
CompilerType
CompilerType::GetArrayElementType(ExecutionContextScope *exe_scope) const {
if (IsValid()) {
return m_type_system->GetArrayElementType(m_type, exe_scope);
}
return CompilerType();
}
CompilerType CompilerType::GetArrayType(uint64_t size) const {
if (IsValid()) {
return m_type_system->GetArrayType(m_type, size);
}
return CompilerType();
}
CompilerType CompilerType::GetCanonicalType() const {
if (IsValid())
return m_type_system->GetCanonicalType(m_type);
return CompilerType();
}
CompilerType CompilerType::GetFullyUnqualifiedType() const {
if (IsValid())
return m_type_system->GetFullyUnqualifiedType(m_type);
return CompilerType();
}
CompilerType CompilerType::GetEnumerationIntegerType() const {
if (IsValid())
return m_type_system->GetEnumerationIntegerType(m_type);
return CompilerType();
}
int CompilerType::GetFunctionArgumentCount() const {
if (IsValid()) {
return m_type_system->GetFunctionArgumentCount(m_type);
}
return -1;
}
CompilerType CompilerType::GetFunctionArgumentTypeAtIndex(size_t idx) const {
if (IsValid()) {
return m_type_system->GetFunctionArgumentTypeAtIndex(m_type, idx);
}
return CompilerType();
}
CompilerType CompilerType::GetFunctionReturnType() const {
if (IsValid()) {
return m_type_system->GetFunctionReturnType(m_type);
}
return CompilerType();
}
size_t CompilerType::GetNumMemberFunctions() const {
if (IsValid()) {
return m_type_system->GetNumMemberFunctions(m_type);
}
return 0;
}
TypeMemberFunctionImpl CompilerType::GetMemberFunctionAtIndex(size_t idx) {
if (IsValid()) {
return m_type_system->GetMemberFunctionAtIndex(m_type, idx);
}
return TypeMemberFunctionImpl();
}
CompilerType CompilerType::GetNonReferenceType() const {
if (IsValid())
return m_type_system->GetNonReferenceType(m_type);
return CompilerType();
}
CompilerType CompilerType::GetPointeeType() const {
if (IsValid()) {
return m_type_system->GetPointeeType(m_type);
}
return CompilerType();
}
CompilerType CompilerType::GetPointerType() const {
if (IsValid()) {
return m_type_system->GetPointerType(m_type);
}
return CompilerType();
}
CompilerType CompilerType::GetLValueReferenceType() const {
if (IsValid())
return m_type_system->GetLValueReferenceType(m_type);
else
return CompilerType();
}
CompilerType CompilerType::GetRValueReferenceType() const {
if (IsValid())
return m_type_system->GetRValueReferenceType(m_type);
else
return CompilerType();
}
CompilerType CompilerType::GetAtomicType() const {
if (IsValid())
return m_type_system->GetAtomicType(m_type);
return CompilerType();
}
CompilerType CompilerType::AddConstModifier() const {
if (IsValid())
return m_type_system->AddConstModifier(m_type);
else
return CompilerType();
}
CompilerType CompilerType::AddVolatileModifier() const {
if (IsValid())
return m_type_system->AddVolatileModifier(m_type);
else
return CompilerType();
}
CompilerType CompilerType::AddRestrictModifier() const {
if (IsValid())
return m_type_system->AddRestrictModifier(m_type);
else
return CompilerType();
}
CompilerType CompilerType::CreateTypedef(const char *name,
const CompilerDeclContext &decl_ctx,
uint32_t payload) const {
if (IsValid())
return m_type_system->CreateTypedef(m_type, name, decl_ctx, payload);
else
return CompilerType();
}
CompilerType CompilerType::GetTypedefedType() const {
if (IsValid())
return m_type_system->GetTypedefedType(m_type);
else
return CompilerType();
}
// Create related types using the current type's AST
CompilerType
CompilerType::GetBasicTypeFromAST(lldb::BasicType basic_type) const {
if (IsValid())
return m_type_system->GetBasicTypeFromAST(basic_type);
return CompilerType();
}
// Exploring the type
llvm::Optional<uint64_t>
CompilerType::GetBitSize(ExecutionContextScope *exe_scope) const {
if (IsValid())
return m_type_system->GetBitSize(m_type, exe_scope);
return {};
}
llvm::Optional<uint64_t>
CompilerType::GetByteSize(ExecutionContextScope *exe_scope) const {
if (llvm::Optional<uint64_t> bit_size = GetBitSize(exe_scope))
return (*bit_size + 7) / 8;
return {};
}
llvm::Optional<size_t> CompilerType::GetTypeBitAlign(ExecutionContextScope *exe_scope) const {
if (IsValid())
return m_type_system->GetTypeBitAlign(m_type, exe_scope);
return {};
}
lldb::Encoding CompilerType::GetEncoding(uint64_t &count) const {
if (!IsValid())
return lldb::eEncodingInvalid;
return m_type_system->GetEncoding(m_type, count);
}
lldb::Format CompilerType::GetFormat() const {
if (!IsValid())
return lldb::eFormatDefault;
return m_type_system->GetFormat(m_type);
}
uint32_t CompilerType::GetNumChildren(bool omit_empty_base_classes,
const ExecutionContext *exe_ctx) const {
if (!IsValid())
return 0;
return m_type_system->GetNumChildren(m_type, omit_empty_base_classes,
exe_ctx);
}
lldb::BasicType CompilerType::GetBasicTypeEnumeration() const {
if (IsValid())
return m_type_system->GetBasicTypeEnumeration(m_type);
return eBasicTypeInvalid;
}
void CompilerType::ForEachEnumerator(
std::function<bool(const CompilerType &integer_type,
ConstString name,
const llvm::APSInt &value)> const &callback) const {
if (IsValid())
return m_type_system->ForEachEnumerator(m_type, callback);
}
uint32_t CompilerType::GetNumFields() const {
if (!IsValid())
return 0;
return m_type_system->GetNumFields(m_type);
}
CompilerType CompilerType::GetFieldAtIndex(size_t idx, std::string &name,
uint64_t *bit_offset_ptr,
uint32_t *bitfield_bit_size_ptr,
bool *is_bitfield_ptr) const {
if (!IsValid())
return CompilerType();
return m_type_system->GetFieldAtIndex(m_type, idx, name, bit_offset_ptr,
bitfield_bit_size_ptr, is_bitfield_ptr);
}
uint32_t CompilerType::GetNumDirectBaseClasses() const {
if (IsValid())
return m_type_system->GetNumDirectBaseClasses(m_type);
return 0;
}
uint32_t CompilerType::GetNumVirtualBaseClasses() const {
if (IsValid())
return m_type_system->GetNumVirtualBaseClasses(m_type);
return 0;
}
CompilerType
CompilerType::GetDirectBaseClassAtIndex(size_t idx,
uint32_t *bit_offset_ptr) const {
if (IsValid())
return m_type_system->GetDirectBaseClassAtIndex(m_type, idx,
bit_offset_ptr);
return CompilerType();
}
CompilerType
CompilerType::GetVirtualBaseClassAtIndex(size_t idx,
uint32_t *bit_offset_ptr) const {
if (IsValid())
return m_type_system->GetVirtualBaseClassAtIndex(m_type, idx,
bit_offset_ptr);
return CompilerType();
}
uint32_t CompilerType::GetIndexOfFieldWithName(
const char *name, CompilerType *field_compiler_type_ptr,
uint64_t *bit_offset_ptr, uint32_t *bitfield_bit_size_ptr,
bool *is_bitfield_ptr) const {
unsigned count = GetNumFields();
std::string field_name;
for (unsigned index = 0; index < count; index++) {
CompilerType field_compiler_type(
GetFieldAtIndex(index, field_name, bit_offset_ptr,
bitfield_bit_size_ptr, is_bitfield_ptr));
if (strcmp(field_name.c_str(), name) == 0) {
if (field_compiler_type_ptr)
*field_compiler_type_ptr = field_compiler_type;
return index;
}
}
return UINT32_MAX;
}
CompilerType CompilerType::GetChildCompilerTypeAtIndex(
ExecutionContext *exe_ctx, size_t idx, bool transparent_pointers,
bool omit_empty_base_classes, bool ignore_array_bounds,
std::string &child_name, uint32_t &child_byte_size,
int32_t &child_byte_offset, uint32_t &child_bitfield_bit_size,
uint32_t &child_bitfield_bit_offset, bool &child_is_base_class,
bool &child_is_deref_of_parent, ValueObject *valobj,
uint64_t &language_flags) const {
if (!IsValid())
return CompilerType();
return m_type_system->GetChildCompilerTypeAtIndex(
m_type, exe_ctx, idx, transparent_pointers, omit_empty_base_classes,
ignore_array_bounds, child_name, child_byte_size, child_byte_offset,
child_bitfield_bit_size, child_bitfield_bit_offset, child_is_base_class,
child_is_deref_of_parent, valobj, language_flags);
}
// Look for a child member (doesn't include base classes, but it does include
// their members) in the type hierarchy. Returns an index path into
// "clang_type" on how to reach the appropriate member.
//
// class A
// {
// public:
// int m_a;
// int m_b;
// };
//
// class B
// {
// };
//
// class C :
// public B,
// public A
// {
// };
//
// If we have a clang type that describes "class C", and we wanted to looked
// "m_b" in it:
//
// With omit_empty_base_classes == false we would get an integer array back
// with: { 1, 1 } The first index 1 is the child index for "class A" within
// class C The second index 1 is the child index for "m_b" within class A
//
// With omit_empty_base_classes == true we would get an integer array back
// with: { 0, 1 } The first index 0 is the child index for "class A" within
// class C (since class B doesn't have any members it doesn't count) The second
// index 1 is the child index for "m_b" within class A
size_t CompilerType::GetIndexOfChildMemberWithName(
const char *name, bool omit_empty_base_classes,
std::vector<uint32_t> &child_indexes) const {
if (IsValid() && name && name[0]) {
return m_type_system->GetIndexOfChildMemberWithName(
m_type, name, omit_empty_base_classes, child_indexes);
}
return 0;
}
-size_t CompilerType::GetNumTemplateArguments() const {
+size_t CompilerType::GetNumTemplateArguments(bool expand_pack) const {
if (IsValid()) {
- return m_type_system->GetNumTemplateArguments(m_type);
+ return m_type_system->GetNumTemplateArguments(m_type, expand_pack);
}
return 0;
}
-TemplateArgumentKind CompilerType::GetTemplateArgumentKind(size_t idx) const {
+TemplateArgumentKind
+CompilerType::GetTemplateArgumentKind(size_t idx, bool expand_pack) const {
if (IsValid())
- return m_type_system->GetTemplateArgumentKind(m_type, idx);
+ return m_type_system->GetTemplateArgumentKind(m_type, idx, expand_pack);
return eTemplateArgumentKindNull;
}
-CompilerType CompilerType::GetTypeTemplateArgument(size_t idx) const {
+CompilerType CompilerType::GetTypeTemplateArgument(size_t idx,
+ bool expand_pack) const {
if (IsValid()) {
- return m_type_system->GetTypeTemplateArgument(m_type, idx);
+ return m_type_system->GetTypeTemplateArgument(m_type, idx, expand_pack);
}
return CompilerType();
}
llvm::Optional<CompilerType::IntegralTemplateArgument>
-CompilerType::GetIntegralTemplateArgument(size_t idx) const {
+CompilerType::GetIntegralTemplateArgument(size_t idx, bool expand_pack) const {
if (IsValid())
- return m_type_system->GetIntegralTemplateArgument(m_type, idx);
+ return m_type_system->GetIntegralTemplateArgument(m_type, idx, expand_pack);
return llvm::None;
}
CompilerType CompilerType::GetTypeForFormatters() const {
if (IsValid())
return m_type_system->GetTypeForFormatters(m_type);
return CompilerType();
}
LazyBool CompilerType::ShouldPrintAsOneLiner(ValueObject *valobj) const {
if (IsValid())
return m_type_system->ShouldPrintAsOneLiner(m_type, valobj);
return eLazyBoolCalculate;
}
bool CompilerType::IsMeaninglessWithoutDynamicResolution() const {
if (IsValid())
return m_type_system->IsMeaninglessWithoutDynamicResolution(m_type);
return false;
}
// Get the index of the child of "clang_type" whose name matches. This function
// doesn't descend into the children, but only looks one level deep and name
// matches can include base class names.
uint32_t
CompilerType::GetIndexOfChildWithName(const char *name,
bool omit_empty_base_classes) const {
if (IsValid() && name && name[0]) {
return m_type_system->GetIndexOfChildWithName(m_type, name,
omit_empty_base_classes);
}
return UINT32_MAX;
}
// Dumping types
void CompilerType::DumpValue(ExecutionContext *exe_ctx, Stream *s,
lldb::Format format, const DataExtractor &data,
lldb::offset_t data_byte_offset,
size_t data_byte_size, uint32_t bitfield_bit_size,
uint32_t bitfield_bit_offset, bool show_types,
bool show_summary, bool verbose, uint32_t depth) {
if (!IsValid())
return;
m_type_system->DumpValue(m_type, exe_ctx, s, format, data, data_byte_offset,
data_byte_size, bitfield_bit_size,
bitfield_bit_offset, show_types, show_summary,
verbose, depth);
}
bool CompilerType::DumpTypeValue(Stream *s, lldb::Format format,
const DataExtractor &data,
lldb::offset_t byte_offset, size_t byte_size,
uint32_t bitfield_bit_size,
uint32_t bitfield_bit_offset,
ExecutionContextScope *exe_scope) {
if (!IsValid())
return false;
return m_type_system->DumpTypeValue(m_type, s, format, data, byte_offset,
byte_size, bitfield_bit_size,
bitfield_bit_offset, exe_scope);
}
void CompilerType::DumpSummary(ExecutionContext *exe_ctx, Stream *s,
const DataExtractor &data,
lldb::offset_t data_byte_offset,
size_t data_byte_size) {
if (IsValid())
m_type_system->DumpSummary(m_type, exe_ctx, s, data, data_byte_offset,
data_byte_size);
}
void CompilerType::DumpTypeDescription(lldb::DescriptionLevel level) const {
if (IsValid())
m_type_system->DumpTypeDescription(m_type, level);
}
void CompilerType::DumpTypeDescription(Stream *s,
lldb::DescriptionLevel level) const {
if (IsValid()) {
m_type_system->DumpTypeDescription(m_type, s, level);
}
}
#ifndef NDEBUG
LLVM_DUMP_METHOD void CompilerType::dump() const {
if (IsValid())
m_type_system->dump(m_type);
else
llvm::errs() << "<invalid>\n";
}
#endif
bool CompilerType::GetValueAsScalar(const lldb_private::DataExtractor &data,
lldb::offset_t data_byte_offset,
size_t data_byte_size, Scalar &value,
ExecutionContextScope *exe_scope) const {
if (!IsValid())
return false;
if (IsAggregateType()) {
return false; // Aggregate types don't have scalar values
} else {
uint64_t count = 0;
lldb::Encoding encoding = GetEncoding(count);
if (encoding == lldb::eEncodingInvalid || count != 1)
return false;
llvm::Optional<uint64_t> byte_size = GetByteSize(exe_scope);
if (!byte_size)
return false;
lldb::offset_t offset = data_byte_offset;
switch (encoding) {
case lldb::eEncodingInvalid:
break;
case lldb::eEncodingVector:
break;
case lldb::eEncodingUint:
if (*byte_size <= sizeof(unsigned long long)) {
uint64_t uval64 = data.GetMaxU64(&offset, *byte_size);
if (*byte_size <= sizeof(unsigned int)) {
value = (unsigned int)uval64;
return true;
} else if (*byte_size <= sizeof(unsigned long)) {
value = (unsigned long)uval64;
return true;
} else if (*byte_size <= sizeof(unsigned long long)) {
value = (unsigned long long)uval64;
return true;
} else
value.Clear();
}
break;
case lldb::eEncodingSint:
if (*byte_size <= sizeof(long long)) {
int64_t sval64 = data.GetMaxS64(&offset, *byte_size);
if (*byte_size <= sizeof(int)) {
value = (int)sval64;
return true;
} else if (*byte_size <= sizeof(long)) {
value = (long)sval64;
return true;
} else if (*byte_size <= sizeof(long long)) {
value = (long long)sval64;
return true;
} else
value.Clear();
}
break;
case lldb::eEncodingIEEE754:
if (*byte_size <= sizeof(long double)) {
uint32_t u32;
uint64_t u64;
if (*byte_size == sizeof(float)) {
if (sizeof(float) == sizeof(uint32_t)) {
u32 = data.GetU32(&offset);
value = *((float *)&u32);
return true;
} else if (sizeof(float) == sizeof(uint64_t)) {
u64 = data.GetU64(&offset);
value = *((float *)&u64);
return true;
}
} else if (*byte_size == sizeof(double)) {
if (sizeof(double) == sizeof(uint32_t)) {
u32 = data.GetU32(&offset);
value = *((double *)&u32);
return true;
} else if (sizeof(double) == sizeof(uint64_t)) {
u64 = data.GetU64(&offset);
value = *((double *)&u64);
return true;
}
} else if (*byte_size == sizeof(long double)) {
if (sizeof(long double) == sizeof(uint32_t)) {
u32 = data.GetU32(&offset);
value = *((long double *)&u32);
return true;
} else if (sizeof(long double) == sizeof(uint64_t)) {
u64 = data.GetU64(&offset);
value = *((long double *)&u64);
return true;
}
}
}
break;
}
}
return false;
}
#ifndef NDEBUG
bool CompilerType::Verify() const {
return !IsValid() || m_type_system->Verify(m_type);
}
#endif
bool lldb_private::operator==(const lldb_private::CompilerType &lhs,
const lldb_private::CompilerType &rhs) {
return lhs.GetTypeSystem() == rhs.GetTypeSystem() &&
lhs.GetOpaqueQualType() == rhs.GetOpaqueQualType();
}
bool lldb_private::operator!=(const lldb_private::CompilerType &lhs,
const lldb_private::CompilerType &rhs) {
return !(lhs == rhs);
}
diff --git a/lldb/source/Symbol/TypeSystem.cpp b/lldb/source/Symbol/TypeSystem.cpp
index 3092dc0bf0a4..412373533aab 100644
--- a/lldb/source/Symbol/TypeSystem.cpp
+++ b/lldb/source/Symbol/TypeSystem.cpp
@@ -1,300 +1,302 @@
//===-- TypeSystem.cpp ----------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "lldb/Symbol/TypeSystem.h"
#include "lldb/Core/PluginManager.h"
#include "lldb/Expression/UtilityFunction.h"
#include "lldb/Symbol/CompilerType.h"
#include "lldb/Target/Language.h"
#include <set>
using namespace lldb_private;
using namespace lldb;
/// A 64-bit SmallBitVector is only small up to 64-7 bits, and the
/// setBitsInMask interface wants to write full bytes.
static const size_t g_num_small_bitvector_bits = 64 - 8;
static_assert(eNumLanguageTypes < g_num_small_bitvector_bits,
"Languages bit vector is no longer small on 64 bit systems");
LanguageSet::LanguageSet() : bitvector(eNumLanguageTypes, false) {}
llvm::Optional<LanguageType> LanguageSet::GetSingularLanguage() {
if (bitvector.count() == 1)
return (LanguageType)bitvector.find_first();
return {};
}
void LanguageSet::Insert(LanguageType language) { bitvector.set(language); }
size_t LanguageSet::Size() const { return bitvector.count(); }
bool LanguageSet::Empty() const { return bitvector.none(); }
bool LanguageSet::operator[](unsigned i) const { return bitvector[i]; }
TypeSystem::~TypeSystem() = default;
static lldb::TypeSystemSP CreateInstanceHelper(lldb::LanguageType language,
Module *module, Target *target) {
uint32_t i = 0;
TypeSystemCreateInstance create_callback;
while ((create_callback = PluginManager::GetTypeSystemCreateCallbackAtIndex(
i++)) != nullptr) {
lldb::TypeSystemSP type_system_sp =
create_callback(language, module, target);
if (type_system_sp)
return type_system_sp;
}
return lldb::TypeSystemSP();
}
lldb::TypeSystemSP TypeSystem::CreateInstance(lldb::LanguageType language,
Module *module) {
return CreateInstanceHelper(language, module, nullptr);
}
lldb::TypeSystemSP TypeSystem::CreateInstance(lldb::LanguageType language,
Target *target) {
return CreateInstanceHelper(language, nullptr, target);
}
#ifndef NDEBUG
bool TypeSystem::Verify(lldb::opaque_compiler_type_t type) { return true; }
#endif
bool TypeSystem::IsAnonymousType(lldb::opaque_compiler_type_t type) {
return false;
}
CompilerType TypeSystem::GetArrayType(lldb::opaque_compiler_type_t type,
uint64_t size) {
return CompilerType();
}
CompilerType
TypeSystem::GetLValueReferenceType(lldb::opaque_compiler_type_t type) {
return CompilerType();
}
CompilerType
TypeSystem::GetRValueReferenceType(lldb::opaque_compiler_type_t type) {
return CompilerType();
}
CompilerType TypeSystem::GetAtomicType(lldb::opaque_compiler_type_t type) {
return CompilerType();
}
CompilerType TypeSystem::AddConstModifier(lldb::opaque_compiler_type_t type) {
return CompilerType();
}
CompilerType
TypeSystem::AddVolatileModifier(lldb::opaque_compiler_type_t type) {
return CompilerType();
}
CompilerType
TypeSystem::AddRestrictModifier(lldb::opaque_compiler_type_t type) {
return CompilerType();
}
CompilerType TypeSystem::CreateTypedef(lldb::opaque_compiler_type_t type,
const char *name,
const CompilerDeclContext &decl_ctx,
uint32_t opaque_payload) {
return CompilerType();
}
CompilerType TypeSystem::GetBuiltinTypeByName(ConstString name) {
return CompilerType();
}
CompilerType TypeSystem::GetTypeForFormatters(void *type) {
return CompilerType(this, type);
}
-size_t TypeSystem::GetNumTemplateArguments(lldb::opaque_compiler_type_t type) {
+size_t TypeSystem::GetNumTemplateArguments(lldb::opaque_compiler_type_t type,
+ bool expand_pack) {
return 0;
}
TemplateArgumentKind
-TypeSystem::GetTemplateArgumentKind(opaque_compiler_type_t type, size_t idx) {
+TypeSystem::GetTemplateArgumentKind(opaque_compiler_type_t type, size_t idx,
+ bool expand_pack) {
return eTemplateArgumentKindNull;
}
CompilerType TypeSystem::GetTypeTemplateArgument(opaque_compiler_type_t type,
- size_t idx) {
+ size_t idx, bool expand_pack) {
return CompilerType();
}
llvm::Optional<CompilerType::IntegralTemplateArgument>
-TypeSystem::GetIntegralTemplateArgument(opaque_compiler_type_t type,
- size_t idx) {
+TypeSystem::GetIntegralTemplateArgument(opaque_compiler_type_t type, size_t idx,
+ bool expand_pack) {
return llvm::None;
}
LazyBool TypeSystem::ShouldPrintAsOneLiner(void *type, ValueObject *valobj) {
return eLazyBoolCalculate;
}
bool TypeSystem::IsMeaninglessWithoutDynamicResolution(void *type) {
return false;
}
ConstString TypeSystem::DeclGetMangledName(void *opaque_decl) {
return ConstString();
}
CompilerDeclContext TypeSystem::DeclGetDeclContext(void *opaque_decl) {
return CompilerDeclContext();
}
CompilerType TypeSystem::DeclGetFunctionReturnType(void *opaque_decl) {
return CompilerType();
}
size_t TypeSystem::DeclGetFunctionNumArguments(void *opaque_decl) { return 0; }
CompilerType TypeSystem::DeclGetFunctionArgumentType(void *opaque_decl,
size_t arg_idx) {
return CompilerType();
}
std::vector<CompilerDecl>
TypeSystem::DeclContextFindDeclByName(void *opaque_decl_ctx, ConstString name,
bool ignore_imported_decls) {
return std::vector<CompilerDecl>();
}
std::unique_ptr<UtilityFunction>
TypeSystem::CreateUtilityFunction(std::string text, std::string name) {
return {};
}
#pragma mark TypeSystemMap
TypeSystemMap::TypeSystemMap() : m_mutex(), m_map() {}
TypeSystemMap::~TypeSystemMap() = default;
void TypeSystemMap::Clear() {
collection map;
{
std::lock_guard<std::mutex> guard(m_mutex);
map = m_map;
m_clear_in_progress = true;
}
std::set<TypeSystem *> visited;
for (auto pair : map) {
TypeSystem *type_system = pair.second.get();
if (type_system && !visited.count(type_system)) {
visited.insert(type_system);
type_system->Finalize();
}
}
map.clear();
{
std::lock_guard<std::mutex> guard(m_mutex);
m_map.clear();
m_clear_in_progress = false;
}
}
void TypeSystemMap::ForEach(std::function<bool(TypeSystem *)> const &callback) {
std::lock_guard<std::mutex> guard(m_mutex);
// Use a std::set so we only call the callback once for each unique
// TypeSystem instance
std::set<TypeSystem *> visited;
for (auto pair : m_map) {
TypeSystem *type_system = pair.second.get();
if (type_system && !visited.count(type_system)) {
visited.insert(type_system);
if (!callback(type_system))
break;
}
}
}
llvm::Expected<TypeSystem &> TypeSystemMap::GetTypeSystemForLanguage(
lldb::LanguageType language,
llvm::Optional<CreateCallback> create_callback) {
std::lock_guard<std::mutex> guard(m_mutex);
if (m_clear_in_progress)
return llvm::make_error<llvm::StringError>(
"Unable to get TypeSystem because TypeSystemMap is being cleared",
llvm::inconvertibleErrorCode());
collection::iterator pos = m_map.find(language);
if (pos != m_map.end()) {
auto *type_system = pos->second.get();
if (type_system)
return *type_system;
return llvm::make_error<llvm::StringError>(
"TypeSystem for language " +
llvm::StringRef(Language::GetNameForLanguageType(language)) +
" doesn't exist",
llvm::inconvertibleErrorCode());
}
for (const auto &pair : m_map) {
if (pair.second && pair.second->SupportsLanguage(language)) {
// Add a new mapping for "language" to point to an already existing
// TypeSystem that supports this language
m_map[language] = pair.second;
if (pair.second.get())
return *pair.second.get();
return llvm::make_error<llvm::StringError>(
"TypeSystem for language " +
llvm::StringRef(Language::GetNameForLanguageType(language)) +
" doesn't exist",
llvm::inconvertibleErrorCode());
}
}
if (!create_callback)
return llvm::make_error<llvm::StringError>(
"Unable to find type system for language " +
llvm::StringRef(Language::GetNameForLanguageType(language)),
llvm::inconvertibleErrorCode());
// Cache even if we get a shared pointer that contains a null type system
// back
TypeSystemSP type_system_sp = (*create_callback)();
m_map[language] = type_system_sp;
if (type_system_sp.get())
return *type_system_sp.get();
return llvm::make_error<llvm::StringError>(
"TypeSystem for language " +
llvm::StringRef(Language::GetNameForLanguageType(language)) +
" doesn't exist",
llvm::inconvertibleErrorCode());
}
llvm::Expected<TypeSystem &>
TypeSystemMap::GetTypeSystemForLanguage(lldb::LanguageType language,
Module *module, bool can_create) {
if (can_create) {
return GetTypeSystemForLanguage(
language, llvm::Optional<CreateCallback>([language, module]() {
return TypeSystem::CreateInstance(language, module);
}));
}
return GetTypeSystemForLanguage(language);
}
llvm::Expected<TypeSystem &>
TypeSystemMap::GetTypeSystemForLanguage(lldb::LanguageType language,
Target *target, bool can_create) {
if (can_create) {
return GetTypeSystemForLanguage(
language, llvm::Optional<CreateCallback>([language, target]() {
return TypeSystem::CreateInstance(language, target);
}));
}
return GetTypeSystemForLanguage(language);
}
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index c4795a80ead2..bc20f33f174c 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -1,1554 +1,1559 @@
//===----------- VectorUtils.cpp - Vectorizer utility functions -----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines vectorizer utilities.
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/CommandLine.h"
#define DEBUG_TYPE "vectorutils"
using namespace llvm;
using namespace llvm::PatternMatch;
/// Maximum factor for an interleaved memory access.
static cl::opt<unsigned> MaxInterleaveGroupFactor(
"max-interleave-group-factor", cl::Hidden,
cl::desc("Maximum factor for an interleaved access group (default = 8)"),
cl::init(8));
/// Return true if all of the intrinsic's arguments and return type are scalars
/// for the scalar form of the intrinsic, and vectors for the vector form of the
/// intrinsic (except operands that are marked as always being scalar by
/// isVectorIntrinsicWithScalarOpAtArg).
bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
switch (ID) {
case Intrinsic::abs: // Begin integer bit-manipulation.
case Intrinsic::bswap:
case Intrinsic::bitreverse:
case Intrinsic::ctpop:
case Intrinsic::ctlz:
case Intrinsic::cttz:
case Intrinsic::fshl:
case Intrinsic::fshr:
case Intrinsic::smax:
case Intrinsic::smin:
case Intrinsic::umax:
case Intrinsic::umin:
case Intrinsic::sadd_sat:
case Intrinsic::ssub_sat:
case Intrinsic::uadd_sat:
case Intrinsic::usub_sat:
case Intrinsic::smul_fix:
case Intrinsic::smul_fix_sat:
case Intrinsic::umul_fix:
case Intrinsic::umul_fix_sat:
case Intrinsic::sqrt: // Begin floating-point.
case Intrinsic::sin:
case Intrinsic::cos:
case Intrinsic::exp:
case Intrinsic::exp2:
case Intrinsic::log:
case Intrinsic::log10:
case Intrinsic::log2:
case Intrinsic::fabs:
case Intrinsic::minnum:
case Intrinsic::maxnum:
case Intrinsic::minimum:
case Intrinsic::maximum:
case Intrinsic::copysign:
case Intrinsic::floor:
case Intrinsic::ceil:
case Intrinsic::trunc:
case Intrinsic::rint:
case Intrinsic::nearbyint:
case Intrinsic::round:
case Intrinsic::roundeven:
case Intrinsic::pow:
case Intrinsic::fma:
case Intrinsic::fmuladd:
case Intrinsic::powi:
case Intrinsic::canonicalize:
case Intrinsic::fptosi_sat:
case Intrinsic::fptoui_sat:
return true;
default:
return false;
}
}
/// Identifies if the vector form of the intrinsic has a scalar operand.
bool llvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
unsigned ScalarOpdIdx) {
switch (ID) {
case Intrinsic::abs:
case Intrinsic::ctlz:
case Intrinsic::cttz:
case Intrinsic::powi:
return (ScalarOpdIdx == 1);
case Intrinsic::smul_fix:
case Intrinsic::smul_fix_sat:
case Intrinsic::umul_fix:
case Intrinsic::umul_fix_sat:
return (ScalarOpdIdx == 2);
default:
return false;
}
}
bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID,
unsigned OpdIdx) {
switch (ID) {
case Intrinsic::fptosi_sat:
case Intrinsic::fptoui_sat:
return OpdIdx == 0;
case Intrinsic::powi:
return OpdIdx == 1;
default:
return false;
}
}
/// Returns intrinsic ID for call.
/// For the input call instruction it finds mapping intrinsic and returns
/// its ID, in case it does not found it return not_intrinsic.
Intrinsic::ID llvm::getVectorIntrinsicIDForCall(const CallInst *CI,
const TargetLibraryInfo *TLI) {
Intrinsic::ID ID = getIntrinsicForCallSite(*CI, TLI);
if (ID == Intrinsic::not_intrinsic)
return Intrinsic::not_intrinsic;
if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||
ID == Intrinsic::lifetime_end || ID == Intrinsic::assume ||
ID == Intrinsic::experimental_noalias_scope_decl ||
ID == Intrinsic::sideeffect || ID == Intrinsic::pseudoprobe)
return ID;
return Intrinsic::not_intrinsic;
}
/// Find the operand of the GEP that should be checked for consecutive
/// stores. This ignores trailing indices that have no effect on the final
/// pointer.
unsigned llvm::getGEPInductionOperand(const GetElementPtrInst *Gep) {
const DataLayout &DL = Gep->getModule()->getDataLayout();
unsigned LastOperand = Gep->getNumOperands() - 1;
TypeSize GEPAllocSize = DL.getTypeAllocSize(Gep->getResultElementType());
// Walk backwards and try to peel off zeros.
while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
// Find the type we're currently indexing into.
gep_type_iterator GEPTI = gep_type_begin(Gep);
std::advance(GEPTI, LastOperand - 2);
// If it's a type with the same allocation size as the result of the GEP we
// can peel off the zero index.
if (DL.getTypeAllocSize(GEPTI.getIndexedType()) != GEPAllocSize)
break;
--LastOperand;
}
return LastOperand;
}
/// If the argument is a GEP, then returns the operand identified by
/// getGEPInductionOperand. However, if there is some other non-loop-invariant
/// operand, it returns that instead.
Value *llvm::stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
if (!GEP)
return Ptr;
unsigned InductionOperand = getGEPInductionOperand(GEP);
// Check that all of the gep indices are uniform except for our induction
// operand.
for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i)
if (i != InductionOperand &&
!SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(i)), Lp))
return Ptr;
return GEP->getOperand(InductionOperand);
}
/// If a value has only one user that is a CastInst, return it.
Value *llvm::getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty) {
Value *UniqueCast = nullptr;
for (User *U : Ptr->users()) {
CastInst *CI = dyn_cast<CastInst>(U);
if (CI && CI->getType() == Ty) {
if (!UniqueCast)
UniqueCast = CI;
else
return nullptr;
}
}
return UniqueCast;
}
/// Get the stride of a pointer access in a loop. Looks for symbolic
/// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
Value *llvm::getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
auto *PtrTy = dyn_cast<PointerType>(Ptr->getType());
if (!PtrTy || PtrTy->isAggregateType())
return nullptr;
// Try to remove a gep instruction to make the pointer (actually index at this
// point) easier analyzable. If OrigPtr is equal to Ptr we are analyzing the
// pointer, otherwise, we are analyzing the index.
Value *OrigPtr = Ptr;
// The size of the pointer access.
int64_t PtrAccessSize = 1;
Ptr = stripGetElementPtr(Ptr, SE, Lp);
const SCEV *V = SE->getSCEV(Ptr);
if (Ptr != OrigPtr)
// Strip off casts.
while (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(V))
V = C->getOperand();
const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V);
if (!S)
return nullptr;
V = S->getStepRecurrence(*SE);
if (!V)
return nullptr;
// Strip off the size of access multiplication if we are still analyzing the
// pointer.
if (OrigPtr == Ptr) {
if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) {
if (M->getOperand(0)->getSCEVType() != scConstant)
return nullptr;
const APInt &APStepVal = cast<SCEVConstant>(M->getOperand(0))->getAPInt();
// Huge step value - give up.
if (APStepVal.getBitWidth() > 64)
return nullptr;
int64_t StepVal = APStepVal.getSExtValue();
if (PtrAccessSize != StepVal)
return nullptr;
V = M->getOperand(1);
}
}
// Strip off casts.
Type *StripedOffRecurrenceCast = nullptr;
if (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(V)) {
StripedOffRecurrenceCast = C->getType();
V = C->getOperand();
}
// Look for the loop invariant symbolic value.
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V);
if (!U)
return nullptr;
Value *Stride = U->getValue();
if (!Lp->isLoopInvariant(Stride))
return nullptr;
// If we have stripped off the recurrence cast we have to make sure that we
// return the value that is used in this loop so that we can replace it later.
if (StripedOffRecurrenceCast)
Stride = getUniqueCastUse(Stride, Lp, StripedOffRecurrenceCast);
return Stride;
}
/// Given a vector and an element number, see if the scalar value is
/// already around as a register, for example if it were inserted then extracted
/// from the vector.
Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
assert(V->getType()->isVectorTy() && "Not looking at a vector?");
VectorType *VTy = cast<VectorType>(V->getType());
// For fixed-length vector, return undef for out of range access.
if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
unsigned Width = FVTy->getNumElements();
if (EltNo >= Width)
return UndefValue::get(FVTy->getElementType());
}
if (Constant *C = dyn_cast<Constant>(V))
return C->getAggregateElement(EltNo);
if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
// If this is an insert to a variable element, we don't know what it is.
if (!isa<ConstantInt>(III->getOperand(2)))
return nullptr;
unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
// If this is an insert to the element we are looking for, return the
// inserted value.
if (EltNo == IIElt)
return III->getOperand(1);
// Guard against infinite loop on malformed, unreachable IR.
if (III == III->getOperand(0))
return nullptr;
// Otherwise, the insertelement doesn't modify the value, recurse on its
// vector input.
return findScalarElement(III->getOperand(0), EltNo);
}
ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V);
// Restrict the following transformation to fixed-length vector.
if (SVI && isa<FixedVectorType>(SVI->getType())) {
unsigned LHSWidth =
cast<FixedVectorType>(SVI->getOperand(0)->getType())->getNumElements();
int InEl = SVI->getMaskValue(EltNo);
if (InEl < 0)
return UndefValue::get(VTy->getElementType());
if (InEl < (int)LHSWidth)
return findScalarElement(SVI->getOperand(0), InEl);
return findScalarElement(SVI->getOperand(1), InEl - LHSWidth);
}
// Extract a value from a vector add operation with a constant zero.
// TODO: Use getBinOpIdentity() to generalize this.
Value *Val; Constant *C;
if (match(V, m_Add(m_Value(Val), m_Constant(C))))
if (Constant *Elt = C->getAggregateElement(EltNo))
if (Elt->isNullValue())
return findScalarElement(Val, EltNo);
// If the vector is a splat then we can trivially find the scalar element.
if (isa<ScalableVectorType>(VTy))
if (Value *Splat = getSplatValue(V))
if (EltNo < VTy->getElementCount().getKnownMinValue())
return Splat;
// Otherwise, we don't know.
return nullptr;
}
int llvm::getSplatIndex(ArrayRef<int> Mask) {
int SplatIndex = -1;
for (int M : Mask) {
// Ignore invalid (undefined) mask elements.
if (M < 0)
continue;
// There can be only 1 non-negative mask element value if this is a splat.
if (SplatIndex != -1 && SplatIndex != M)
return -1;
// Initialize the splat index to the 1st non-negative mask element.
SplatIndex = M;
}
assert((SplatIndex == -1 || SplatIndex >= 0) && "Negative index?");
return SplatIndex;
}
/// Get splat value if the input is a splat vector or return nullptr.
/// This function is not fully general. It checks only 2 cases:
/// the input value is (1) a splat constant vector or (2) a sequence
/// of instructions that broadcasts a scalar at element 0.
Value *llvm::getSplatValue(const Value *V) {
if (isa<VectorType>(V->getType()))
if (auto *C = dyn_cast<Constant>(V))
return C->getSplatValue();
// shuf (inselt ?, Splat, 0), ?, <0, undef, 0, ...>
Value *Splat;
if (match(V,
m_Shuffle(m_InsertElt(m_Value(), m_Value(Splat), m_ZeroInt()),
m_Value(), m_ZeroMask())))
return Splat;
return nullptr;
}
bool llvm::isSplatValue(const Value *V, int Index, unsigned Depth) {
assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
if (isa<VectorType>(V->getType())) {
if (isa<UndefValue>(V))
return true;
// FIXME: We can allow undefs, but if Index was specified, we may want to
// check that the constant is defined at that index.
if (auto *C = dyn_cast<Constant>(V))
return C->getSplatValue() != nullptr;
}
if (auto *Shuf = dyn_cast<ShuffleVectorInst>(V)) {
// FIXME: We can safely allow undefs here. If Index was specified, we will
// check that the mask elt is defined at the required index.
if (!is_splat(Shuf->getShuffleMask()))
return false;
// Match any index.
if (Index == -1)
return true;
// Match a specific element. The mask should be defined at and match the
// specified index.
return Shuf->getMaskValue(Index) == Index;
}
// The remaining tests are all recursive, so bail out if we hit the limit.
if (Depth++ == MaxAnalysisRecursionDepth)
return false;
// If both operands of a binop are splats, the result is a splat.
Value *X, *Y, *Z;
if (match(V, m_BinOp(m_Value(X), m_Value(Y))))
return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth);
// If all operands of a select are splats, the result is a splat.
if (match(V, m_Select(m_Value(X), m_Value(Y), m_Value(Z))))
return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth) &&
isSplatValue(Z, Index, Depth);
// TODO: Add support for unary ops (fneg), casts, intrinsics (overflow ops).
return false;
}
void llvm::narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask,
SmallVectorImpl<int> &ScaledMask) {
assert(Scale > 0 && "Unexpected scaling factor");
// Fast-path: if no scaling, then it is just a copy.
if (Scale == 1) {
ScaledMask.assign(Mask.begin(), Mask.end());
return;
}
ScaledMask.clear();
for (int MaskElt : Mask) {
if (MaskElt >= 0) {
assert(((uint64_t)Scale * MaskElt + (Scale - 1)) <= INT32_MAX &&
"Overflowed 32-bits");
}
for (int SliceElt = 0; SliceElt != Scale; ++SliceElt)
ScaledMask.push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + SliceElt);
}
}
bool llvm::widenShuffleMaskElts(int Scale, ArrayRef<int> Mask,
SmallVectorImpl<int> &ScaledMask) {
assert(Scale > 0 && "Unexpected scaling factor");
// Fast-path: if no scaling, then it is just a copy.
if (Scale == 1) {
ScaledMask.assign(Mask.begin(), Mask.end());
return true;
}
// We must map the original elements down evenly to a type with less elements.
int NumElts = Mask.size();
if (NumElts % Scale != 0)
return false;
ScaledMask.clear();
ScaledMask.reserve(NumElts / Scale);
// Step through the input mask by splitting into Scale-sized slices.
do {
ArrayRef<int> MaskSlice = Mask.take_front(Scale);
assert((int)MaskSlice.size() == Scale && "Expected Scale-sized slice.");
// The first element of the slice determines how we evaluate this slice.
int SliceFront = MaskSlice.front();
if (SliceFront < 0) {
// Negative values (undef or other "sentinel" values) must be equal across
// the entire slice.
if (!is_splat(MaskSlice))
return false;
ScaledMask.push_back(SliceFront);
} else {
// A positive mask element must be cleanly divisible.
if (SliceFront % Scale != 0)
return false;
// Elements of the slice must be consecutive.
for (int i = 1; i < Scale; ++i)
if (MaskSlice[i] != SliceFront + i)
return false;
ScaledMask.push_back(SliceFront / Scale);
}
Mask = Mask.drop_front(Scale);
} while (!Mask.empty());
assert((int)ScaledMask.size() * Scale == NumElts && "Unexpected scaled mask");
// All elements of the original mask can be scaled down to map to the elements
// of a mask with wider elements.
return true;
}
void llvm::processShuffleMasks(
ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,
unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,
function_ref<void(ArrayRef<int>, unsigned, unsigned)> SingleInputAction,
function_ref<void(ArrayRef<int>, unsigned, unsigned)> ManyInputsAction) {
SmallVector<SmallVector<SmallVector<int>>> Res(NumOfDestRegs);
// Try to perform better estimation of the permutation.
// 1. Split the source/destination vectors into real registers.
// 2. Do the mask analysis to identify which real registers are
// permuted.
int Sz = Mask.size();
unsigned SzDest = Sz / NumOfDestRegs;
unsigned SzSrc = Sz / NumOfSrcRegs;
for (unsigned I = 0; I < NumOfDestRegs; ++I) {
auto &RegMasks = Res[I];
RegMasks.assign(NumOfSrcRegs, {});
// Check that the values in dest registers are in the one src
// register.
for (unsigned K = 0; K < SzDest; ++K) {
int Idx = I * SzDest + K;
if (Idx == Sz)
break;
if (Mask[Idx] >= Sz || Mask[Idx] == UndefMaskElem)
continue;
int SrcRegIdx = Mask[Idx] / SzSrc;
// Add a cost of PermuteTwoSrc for each new source register permute,
// if we have more than one source registers.
if (RegMasks[SrcRegIdx].empty())
RegMasks[SrcRegIdx].assign(SzDest, UndefMaskElem);
RegMasks[SrcRegIdx][K] = Mask[Idx] % SzSrc;
}
}
// Process split mask.
for (unsigned I = 0; I < NumOfUsedRegs; ++I) {
auto &Dest = Res[I];
int NumSrcRegs =
count_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });
switch (NumSrcRegs) {
case 0:
// No input vectors were used!
NoInputAction();
break;
case 1: {
// Find the only mask with at least single undef mask elem.
auto *It =
find_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });
unsigned SrcReg = std::distance(Dest.begin(), It);
SingleInputAction(*It, SrcReg, I);
break;
}
default: {
// The first mask is a permutation of a single register. Since we have >2
// input registers to shuffle, we merge the masks for 2 first registers
// and generate a shuffle of 2 registers rather than the reordering of the
// first register and then shuffle with the second register. Next,
// generate the shuffles of the resulting register + the remaining
// registers from the list.
auto &&CombineMasks = [](MutableArrayRef<int> FirstMask,
ArrayRef<int> SecondMask) {
for (int Idx = 0, VF = FirstMask.size(); Idx < VF; ++Idx) {
if (SecondMask[Idx] != UndefMaskElem) {
assert(FirstMask[Idx] == UndefMaskElem &&
"Expected undefined mask element.");
FirstMask[Idx] = SecondMask[Idx] + VF;
}
}
};
auto &&NormalizeMask = [](MutableArrayRef<int> Mask) {
for (int Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) {
if (Mask[Idx] != UndefMaskElem)
Mask[Idx] = Idx;
}
};
int SecondIdx;
do {
int FirstIdx = -1;
SecondIdx = -1;
MutableArrayRef<int> FirstMask, SecondMask;
for (unsigned I = 0; I < NumOfDestRegs; ++I) {
SmallVectorImpl<int> &RegMask = Dest[I];
if (RegMask.empty())
continue;
if (FirstIdx == SecondIdx) {
FirstIdx = I;
FirstMask = RegMask;
continue;
}
SecondIdx = I;
SecondMask = RegMask;
CombineMasks(FirstMask, SecondMask);
ManyInputsAction(FirstMask, FirstIdx, SecondIdx);
NormalizeMask(FirstMask);
RegMask.clear();
SecondMask = FirstMask;
SecondIdx = FirstIdx;
}
if (FirstIdx != SecondIdx && SecondIdx >= 0) {
CombineMasks(SecondMask, FirstMask);
ManyInputsAction(SecondMask, SecondIdx, FirstIdx);
Dest[FirstIdx].clear();
NormalizeMask(SecondMask);
}
} while (SecondIdx >= 0);
break;
}
}
}
}
MapVector<Instruction *, uint64_t>
llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
const TargetTransformInfo *TTI) {
// DemandedBits will give us every value's live-out bits. But we want
// to ensure no extra casts would need to be inserted, so every DAG
// of connected values must have the same minimum bitwidth.
EquivalenceClasses<Value *> ECs;
SmallVector<Value *, 16> Worklist;
SmallPtrSet<Value *, 4> Roots;
SmallPtrSet<Value *, 16> Visited;
DenseMap<Value *, uint64_t> DBits;
SmallPtrSet<Instruction *, 4> InstructionSet;
MapVector<Instruction *, uint64_t> MinBWs;
// Determine the roots. We work bottom-up, from truncs or icmps.
bool SeenExtFromIllegalType = false;
for (auto *BB : Blocks)
for (auto &I : *BB) {
InstructionSet.insert(&I);
if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) &&
!TTI->isTypeLegal(I.getOperand(0)->getType()))
SeenExtFromIllegalType = true;
// Only deal with non-vector integers up to 64-bits wide.
if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) &&
!I.getType()->isVectorTy() &&
I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {
// Don't make work for ourselves. If we know the loaded type is legal,
// don't add it to the worklist.
if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType()))
continue;
Worklist.push_back(&I);
Roots.insert(&I);
}
}
// Early exit.
if (Worklist.empty() || (TTI && !SeenExtFromIllegalType))
return MinBWs;
// Now proceed breadth-first, unioning values together.
while (!Worklist.empty()) {
Value *Val = Worklist.pop_back_val();
Value *Leader = ECs.getOrInsertLeaderValue(Val);
if (!Visited.insert(Val).second)
continue;
// Non-instructions terminate a chain successfully.
if (!isa<Instruction>(Val))
continue;
Instruction *I = cast<Instruction>(Val);
// If we encounter a type that is larger than 64 bits, we can't represent
// it so bail out.
if (DB.getDemandedBits(I).getBitWidth() > 64)
return MapVector<Instruction *, uint64_t>();
uint64_t V = DB.getDemandedBits(I).getZExtValue();
DBits[Leader] |= V;
DBits[I] = V;
// Casts, loads and instructions outside of our range terminate a chain
// successfully.
if (isa<SExtInst>(I) || isa<ZExtInst>(I) || isa<LoadInst>(I) ||
!InstructionSet.count(I))
continue;
// Unsafe casts terminate a chain unsuccessfully. We can't do anything
// useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to
// transform anything that relies on them.
if (isa<BitCastInst>(I) || isa<PtrToIntInst>(I) || isa<IntToPtrInst>(I) ||
!I->getType()->isIntegerTy()) {
DBits[Leader] |= ~0ULL;
continue;
}
// We don't modify the types of PHIs. Reductions will already have been
// truncated if possible, and inductions' sizes will have been chosen by
// indvars.
if (isa<PHINode>(I))
continue;
if (DBits[Leader] == ~0ULL)
// All bits demanded, no point continuing.
continue;
for (Value *O : cast<User>(I)->operands()) {
ECs.unionSets(Leader, O);
Worklist.push_back(O);
}
}
// Now we've discovered all values, walk them to see if there are
// any users we didn't see. If there are, we can't optimize that
// chain.
for (auto &I : DBits)
for (auto *U : I.first->users())
if (U->getType()->isIntegerTy() && DBits.count(U) == 0)
DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL;
for (auto I = ECs.begin(), E = ECs.end(); I != E; ++I) {
uint64_t LeaderDemandedBits = 0;
for (Value *M : llvm::make_range(ECs.member_begin(I), ECs.member_end()))
LeaderDemandedBits |= DBits[M];
uint64_t MinBW = (sizeof(LeaderDemandedBits) * 8) -
llvm::countLeadingZeros(LeaderDemandedBits);
// Round up to a power of 2
if (!isPowerOf2_64((uint64_t)MinBW))
MinBW = NextPowerOf2(MinBW);
// We don't modify the types of PHIs. Reductions will already have been
// truncated if possible, and inductions' sizes will have been chosen by
// indvars.
// If we are required to shrink a PHI, abandon this entire equivalence class.
bool Abort = false;
for (Value *M : llvm::make_range(ECs.member_begin(I), ECs.member_end()))
if (isa<PHINode>(M) && MinBW < M->getType()->getScalarSizeInBits()) {
Abort = true;
break;
}
if (Abort)
continue;
for (Value *M : llvm::make_range(ECs.member_begin(I), ECs.member_end())) {
if (!isa<Instruction>(M))
continue;
Type *Ty = M->getType();
if (Roots.count(M))
Ty = cast<Instruction>(M)->getOperand(0)->getType();
if (MinBW < Ty->getScalarSizeInBits())
MinBWs[cast<Instruction>(M)] = MinBW;
}
}
return MinBWs;
}
/// Add all access groups in @p AccGroups to @p List.
template <typename ListT>
static void addToAccessGroupList(ListT &List, MDNode *AccGroups) {
// Interpret an access group as a list containing itself.
if (AccGroups->getNumOperands() == 0) {
assert(isValidAsAccessGroup(AccGroups) && "Node must be an access group");
List.insert(AccGroups);
return;
}
for (const auto &AccGroupListOp : AccGroups->operands()) {
auto *Item = cast<MDNode>(AccGroupListOp.get());
assert(isValidAsAccessGroup(Item) && "List item must be an access group");
List.insert(Item);
}
}
MDNode *llvm::uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2) {
if (!AccGroups1)
return AccGroups2;
if (!AccGroups2)
return AccGroups1;
if (AccGroups1 == AccGroups2)
return AccGroups1;
SmallSetVector<Metadata *, 4> Union;
addToAccessGroupList(Union, AccGroups1);
addToAccessGroupList(Union, AccGroups2);
if (Union.size() == 0)
return nullptr;
if (Union.size() == 1)
return cast<MDNode>(Union.front());
LLVMContext &Ctx = AccGroups1->getContext();
return MDNode::get(Ctx, Union.getArrayRef());
}
MDNode *llvm::intersectAccessGroups(const Instruction *Inst1,
const Instruction *Inst2) {
bool MayAccessMem1 = Inst1->mayReadOrWriteMemory();
bool MayAccessMem2 = Inst2->mayReadOrWriteMemory();
if (!MayAccessMem1 && !MayAccessMem2)
return nullptr;
if (!MayAccessMem1)
return Inst2->getMetadata(LLVMContext::MD_access_group);
if (!MayAccessMem2)
return Inst1->getMetadata(LLVMContext::MD_access_group);
MDNode *MD1 = Inst1->getMetadata(LLVMContext::MD_access_group);
MDNode *MD2 = Inst2->getMetadata(LLVMContext::MD_access_group);
if (!MD1 || !MD2)
return nullptr;
if (MD1 == MD2)
return MD1;
// Use set for scalable 'contains' check.
SmallPtrSet<Metadata *, 4> AccGroupSet2;
addToAccessGroupList(AccGroupSet2, MD2);
SmallVector<Metadata *, 4> Intersection;
if (MD1->getNumOperands() == 0) {
assert(isValidAsAccessGroup(MD1) && "Node must be an access group");
if (AccGroupSet2.count(MD1))
Intersection.push_back(MD1);
} else {
for (const MDOperand &Node : MD1->operands()) {
auto *Item = cast<MDNode>(Node.get());
assert(isValidAsAccessGroup(Item) && "List item must be an access group");
if (AccGroupSet2.count(Item))
Intersection.push_back(Item);
}
}
if (Intersection.size() == 0)
return nullptr;
if (Intersection.size() == 1)
return cast<MDNode>(Intersection.front());
LLVMContext &Ctx = Inst1->getContext();
return MDNode::get(Ctx, Intersection);
}
/// \returns \p I after propagating metadata from \p VL.
Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) {
if (VL.empty())
return Inst;
Instruction *I0 = cast<Instruction>(VL[0]);
SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
I0->getAllMetadataOtherThanDebugLoc(Metadata);
for (auto Kind : {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load,
LLVMContext::MD_access_group}) {
MDNode *MD = I0->getMetadata(Kind);
for (int J = 1, E = VL.size(); MD && J != E; ++J) {
const Instruction *IJ = cast<Instruction>(VL[J]);
MDNode *IMD = IJ->getMetadata(Kind);
switch (Kind) {
case LLVMContext::MD_tbaa:
MD = MDNode::getMostGenericTBAA(MD, IMD);
break;
case LLVMContext::MD_alias_scope:
MD = MDNode::getMostGenericAliasScope(MD, IMD);
break;
case LLVMContext::MD_fpmath:
MD = MDNode::getMostGenericFPMath(MD, IMD);
break;
case LLVMContext::MD_noalias:
case LLVMContext::MD_nontemporal:
case LLVMContext::MD_invariant_load:
MD = MDNode::intersect(MD, IMD);
break;
case LLVMContext::MD_access_group:
MD = intersectAccessGroups(Inst, IJ);
break;
default:
llvm_unreachable("unhandled metadata");
}
}
Inst->setMetadata(Kind, MD);
}
return Inst;
}
Constant *
llvm::createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF,
const InterleaveGroup<Instruction> &Group) {
// All 1's means mask is not needed.
if (Group.getNumMembers() == Group.getFactor())
return nullptr;
// TODO: support reversed access.
assert(!Group.isReverse() && "Reversed group not supported.");
SmallVector<Constant *, 16> Mask;
for (unsigned i = 0; i < VF; i++)
for (unsigned j = 0; j < Group.getFactor(); ++j) {
unsigned HasMember = Group.getMember(j) ? 1 : 0;
Mask.push_back(Builder.getInt1(HasMember));
}
return ConstantVector::get(Mask);
}
llvm::SmallVector<int, 16>
llvm::createReplicatedMask(unsigned ReplicationFactor, unsigned VF) {
SmallVector<int, 16> MaskVec;
for (unsigned i = 0; i < VF; i++)
for (unsigned j = 0; j < ReplicationFactor; j++)
MaskVec.push_back(i);
return MaskVec;
}
llvm::SmallVector<int, 16> llvm::createInterleaveMask(unsigned VF,
unsigned NumVecs) {
SmallVector<int, 16> Mask;
for (unsigned i = 0; i < VF; i++)
for (unsigned j = 0; j < NumVecs; j++)
Mask.push_back(j * VF + i);
return Mask;
}
llvm::SmallVector<int, 16>
llvm::createStrideMask(unsigned Start, unsigned Stride, unsigned VF) {
SmallVector<int, 16> Mask;
for (unsigned i = 0; i < VF; i++)
Mask.push_back(Start + i * Stride);
return Mask;
}
llvm::SmallVector<int, 16> llvm::createSequentialMask(unsigned Start,
unsigned NumInts,
unsigned NumUndefs) {
SmallVector<int, 16> Mask;
for (unsigned i = 0; i < NumInts; i++)
Mask.push_back(Start + i);
for (unsigned i = 0; i < NumUndefs; i++)
Mask.push_back(-1);
return Mask;
}
llvm::SmallVector<int, 16> llvm::createUnaryMask(ArrayRef<int> Mask,
unsigned NumElts) {
// Avoid casts in the loop and make sure we have a reasonable number.
int NumEltsSigned = NumElts;
assert(NumEltsSigned > 0 && "Expected smaller or non-zero element count");
// If the mask chooses an element from operand 1, reduce it to choose from the
// corresponding element of operand 0. Undef mask elements are unchanged.
SmallVector<int, 16> UnaryMask;
for (int MaskElt : Mask) {
assert((MaskElt < NumEltsSigned * 2) && "Expected valid shuffle mask");
int UnaryElt = MaskElt >= NumEltsSigned ? MaskElt - NumEltsSigned : MaskElt;
UnaryMask.push_back(UnaryElt);
}
return UnaryMask;
}
/// A helper function for concatenating vectors. This function concatenates two
/// vectors having the same element type. If the second vector has fewer
/// elements than the first, it is padded with undefs.
static Value *concatenateTwoVectors(IRBuilderBase &Builder, Value *V1,
Value *V2) {
VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType());
VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType());
assert(VecTy1 && VecTy2 &&
VecTy1->getScalarType() == VecTy2->getScalarType() &&
"Expect two vectors with the same element type");
unsigned NumElts1 = cast<FixedVectorType>(VecTy1)->getNumElements();
unsigned NumElts2 = cast<FixedVectorType>(VecTy2)->getNumElements();
assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements");
if (NumElts1 > NumElts2) {
// Extend with UNDEFs.
V2 = Builder.CreateShuffleVector(
V2, createSequentialMask(0, NumElts2, NumElts1 - NumElts2));
}
return Builder.CreateShuffleVector(
V1, V2, createSequentialMask(0, NumElts1 + NumElts2, 0));
}
Value *llvm::concatenateVectors(IRBuilderBase &Builder,
ArrayRef<Value *> Vecs) {
unsigned NumVecs = Vecs.size();
assert(NumVecs > 1 && "Should be at least two vectors");
SmallVector<Value *, 8> ResList;
ResList.append(Vecs.begin(), Vecs.end());
do {
SmallVector<Value *, 8> TmpList;
for (unsigned i = 0; i < NumVecs - 1; i += 2) {
Value *V0 = ResList[i], *V1 = ResList[i + 1];
assert((V0->getType() == V1->getType() || i == NumVecs - 2) &&
"Only the last vector may have a different type");
TmpList.push_back(concatenateTwoVectors(Builder, V0, V1));
}
// Push the last vector if the total number of vectors is odd.
if (NumVecs % 2 != 0)
TmpList.push_back(ResList[NumVecs - 1]);
ResList = TmpList;
NumVecs = ResList.size();
} while (NumVecs > 1);
return ResList[0];
}
bool llvm::maskIsAllZeroOrUndef(Value *Mask) {
assert(isa<VectorType>(Mask->getType()) &&
isa<IntegerType>(Mask->getType()->getScalarType()) &&
cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1 &&
"Mask must be a vector of i1");
auto *ConstMask = dyn_cast<Constant>(Mask);
if (!ConstMask)
return false;
if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask))
return true;
if (isa<ScalableVectorType>(ConstMask->getType()))
return false;
for (unsigned
I = 0,
E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
I != E; ++I) {
if (auto *MaskElt = ConstMask->getAggregateElement(I))
if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt))
continue;
return false;
}
return true;
}
bool llvm::maskIsAllOneOrUndef(Value *Mask) {
assert(isa<VectorType>(Mask->getType()) &&
isa<IntegerType>(Mask->getType()->getScalarType()) &&
cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1 &&
"Mask must be a vector of i1");
auto *ConstMask = dyn_cast<Constant>(Mask);
if (!ConstMask)
return false;
if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
return true;
if (isa<ScalableVectorType>(ConstMask->getType()))
return false;
for (unsigned
I = 0,
E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
I != E; ++I) {
if (auto *MaskElt = ConstMask->getAggregateElement(I))
if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
continue;
return false;
}
return true;
}
/// TODO: This is a lot like known bits, but for
/// vectors. Is there something we can common this with?
APInt llvm::possiblyDemandedEltsInMask(Value *Mask) {
assert(isa<FixedVectorType>(Mask->getType()) &&
isa<IntegerType>(Mask->getType()->getScalarType()) &&
cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1 &&
"Mask must be a fixed width vector of i1");
const unsigned VWidth =
cast<FixedVectorType>(Mask->getType())->getNumElements();
APInt DemandedElts = APInt::getAllOnes(VWidth);
if (auto *CV = dyn_cast<ConstantVector>(Mask))
for (unsigned i = 0; i < VWidth; i++)
if (CV->getAggregateElement(i)->isNullValue())
DemandedElts.clearBit(i);
return DemandedElts;
}
bool InterleavedAccessInfo::isStrided(int Stride) {
unsigned Factor = std::abs(Stride);
return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;
}
void InterleavedAccessInfo::collectConstStrideAccesses(
MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
const ValueToValueMap &Strides) {
auto &DL = TheLoop->getHeader()->getModule()->getDataLayout();
// Since it's desired that the load/store instructions be maintained in
// "program order" for the interleaved access analysis, we have to visit the
// blocks in the loop in reverse postorder (i.e., in a topological order).
// Such an ordering will ensure that any load/store that may be executed
// before a second load/store will precede the second load/store in
// AccessStrideInfo.
LoopBlocksDFS DFS(TheLoop);
DFS.perform(LI);
for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
for (auto &I : *BB) {
Value *Ptr = getLoadStorePointerOperand(&I);
if (!Ptr)
continue;
Type *ElementTy = getLoadStoreType(&I);
+ // Currently, codegen doesn't support cases where the type size doesn't
+ // match the alloc size. Skip them for now.
+ uint64_t Size = DL.getTypeAllocSize(ElementTy);
+ if (Size * 8 != DL.getTypeSizeInBits(ElementTy))
+ continue;
+
// We don't check wrapping here because we don't know yet if Ptr will be
// part of a full group or a group with gaps. Checking wrapping for all
// pointers (even those that end up in groups with no gaps) will be overly
// conservative. For full groups, wrapping should be ok since if we would
// wrap around the address space we would do a memory access at nullptr
// even without the transformation. The wrapping checks are therefore
// deferred until after we've formed the interleaved groups.
int64_t Stride = getPtrStride(PSE, ElementTy, Ptr, TheLoop, Strides,
/*Assume=*/true, /*ShouldCheckWrap=*/false);
const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
- uint64_t Size = DL.getTypeAllocSize(ElementTy);
AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size,
getLoadStoreAlignment(&I));
}
}
// Analyze interleaved accesses and collect them into interleaved load and
// store groups.
//
// When generating code for an interleaved load group, we effectively hoist all
// loads in the group to the location of the first load in program order. When
// generating code for an interleaved store group, we sink all stores to the
// location of the last store. This code motion can change the order of load
// and store instructions and may break dependences.
//
// The code generation strategy mentioned above ensures that we won't violate
// any write-after-read (WAR) dependences.
//
// E.g., for the WAR dependence: a = A[i]; // (1)
// A[i] = b; // (2)
//
// The store group of (2) is always inserted at or below (2), and the load
// group of (1) is always inserted at or above (1). Thus, the instructions will
// never be reordered. All other dependences are checked to ensure the
// correctness of the instruction reordering.
//
// The algorithm visits all memory accesses in the loop in bottom-up program
// order. Program order is established by traversing the blocks in the loop in
// reverse postorder when collecting the accesses.
//
// We visit the memory accesses in bottom-up order because it can simplify the
// construction of store groups in the presence of write-after-write (WAW)
// dependences.
//
// E.g., for the WAW dependence: A[i] = a; // (1)
// A[i] = b; // (2)
// A[i + 1] = c; // (3)
//
// We will first create a store group with (3) and (2). (1) can't be added to
// this group because it and (2) are dependent. However, (1) can be grouped
// with other accesses that may precede it in program order. Note that a
// bottom-up order does not imply that WAW dependences should not be checked.
void InterleavedAccessInfo::analyzeInterleaving(
bool EnablePredicatedInterleavedMemAccesses) {
LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
const ValueToValueMap &Strides = LAI->getSymbolicStrides();
// Holds all accesses with a constant stride.
MapVector<Instruction *, StrideDescriptor> AccessStrideInfo;
collectConstStrideAccesses(AccessStrideInfo, Strides);
if (AccessStrideInfo.empty())
return;
// Collect the dependences in the loop.
collectDependences();
// Holds all interleaved store groups temporarily.
SmallSetVector<InterleaveGroup<Instruction> *, 4> StoreGroups;
// Holds all interleaved load groups temporarily.
SmallSetVector<InterleaveGroup<Instruction> *, 4> LoadGroups;
// Search in bottom-up program order for pairs of accesses (A and B) that can
// form interleaved load or store groups. In the algorithm below, access A
// precedes access B in program order. We initialize a group for B in the
// outer loop of the algorithm, and then in the inner loop, we attempt to
// insert each A into B's group if:
//
// 1. A and B have the same stride,
// 2. A and B have the same memory object size, and
// 3. A belongs in B's group according to its distance from B.
//
// Special care is taken to ensure group formation will not break any
// dependences.
for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();
BI != E; ++BI) {
Instruction *B = BI->first;
StrideDescriptor DesB = BI->second;
// Initialize a group for B if it has an allowable stride. Even if we don't
// create a group for B, we continue with the bottom-up algorithm to ensure
// we don't break any of B's dependences.
InterleaveGroup<Instruction> *Group = nullptr;
if (isStrided(DesB.Stride) &&
(!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {
Group = getInterleaveGroup(B);
if (!Group) {
LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
<< '\n');
Group = createInterleaveGroup(B, DesB.Stride, DesB.Alignment);
}
if (B->mayWriteToMemory())
StoreGroups.insert(Group);
else
LoadGroups.insert(Group);
}
for (auto AI = std::next(BI); AI != E; ++AI) {
Instruction *A = AI->first;
StrideDescriptor DesA = AI->second;
// Our code motion strategy implies that we can't have dependences
// between accesses in an interleaved group and other accesses located
// between the first and last member of the group. Note that this also
// means that a group can't have more than one member at a given offset.
// The accesses in a group can have dependences with other accesses, but
// we must ensure we don't extend the boundaries of the group such that
// we encompass those dependent accesses.
//
// For example, assume we have the sequence of accesses shown below in a
// stride-2 loop:
//
// (1, 2) is a group | A[i] = a; // (1)
// | A[i-1] = b; // (2) |
// A[i-3] = c; // (3)
// A[i] = d; // (4) | (2, 4) is not a group
//
// Because accesses (2) and (3) are dependent, we can group (2) with (1)
// but not with (4). If we did, the dependent access (3) would be within
// the boundaries of the (2, 4) group.
if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) {
// If a dependence exists and A is already in a group, we know that A
// must be a store since A precedes B and WAR dependences are allowed.
// Thus, A would be sunk below B. We release A's group to prevent this
// illegal code motion. A will then be free to form another group with
// instructions that precede it.
if (isInterleaved(A)) {
InterleaveGroup<Instruction> *StoreGroup = getInterleaveGroup(A);
LLVM_DEBUG(dbgs() << "LV: Invalidated store group due to "
"dependence between " << *A << " and "<< *B << '\n');
StoreGroups.remove(StoreGroup);
releaseGroup(StoreGroup);
}
// If a dependence exists and A is not already in a group (or it was
// and we just released it), B might be hoisted above A (if B is a
// load) or another store might be sunk below A (if B is a store). In
// either case, we can't add additional instructions to B's group. B
// will only form a group with instructions that it precedes.
break;
}
// At this point, we've checked for illegal code motion. If either A or B
// isn't strided, there's nothing left to do.
if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
continue;
// Ignore A if it's already in a group or isn't the same kind of memory
// operation as B.
// Note that mayReadFromMemory() isn't mutually exclusive to
// mayWriteToMemory in the case of atomic loads. We shouldn't see those
// here, canVectorizeMemory() should have returned false - except for the
// case we asked for optimization remarks.
if (isInterleaved(A) ||
(A->mayReadFromMemory() != B->mayReadFromMemory()) ||
(A->mayWriteToMemory() != B->mayWriteToMemory()))
continue;
// Check rules 1 and 2. Ignore A if its stride or size is different from
// that of B.
if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
continue;
// Ignore A if the memory object of A and B don't belong to the same
// address space
if (getLoadStoreAddressSpace(A) != getLoadStoreAddressSpace(B))
continue;
// Calculate the distance from A to B.
const SCEVConstant *DistToB = dyn_cast<SCEVConstant>(
PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
if (!DistToB)
continue;
int64_t DistanceToB = DistToB->getAPInt().getSExtValue();
// Check rule 3. Ignore A if its distance to B is not a multiple of the
// size.
if (DistanceToB % static_cast<int64_t>(DesB.Size))
continue;
// All members of a predicated interleave-group must have the same predicate,
// and currently must reside in the same BB.
BasicBlock *BlockA = A->getParent();
BasicBlock *BlockB = B->getParent();
if ((isPredicated(BlockA) || isPredicated(BlockB)) &&
(!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))
continue;
// The index of A is the index of B plus A's distance to B in multiples
// of the size.
int IndexA =
Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);
// Try to insert A into B's group.
if (Group->insertMember(A, IndexA, DesA.Alignment)) {
LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
<< " into the interleave group with" << *B
<< '\n');
InterleaveGroupMap[A] = Group;
// Set the first load in program order as the insert position.
if (A->mayReadFromMemory())
Group->setInsertPos(A);
}
} // Iteration over A accesses.
} // Iteration over B accesses.
auto InvalidateGroupIfMemberMayWrap = [&](InterleaveGroup<Instruction> *Group,
int Index,
std::string FirstOrLast) -> bool {
Instruction *Member = Group->getMember(Index);
assert(Member && "Group member does not exist");
Value *MemberPtr = getLoadStorePointerOperand(Member);
Type *AccessTy = getLoadStoreType(Member);
if (getPtrStride(PSE, AccessTy, MemberPtr, TheLoop, Strides,
/*Assume=*/false, /*ShouldCheckWrap=*/true))
return false;
LLVM_DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to "
<< FirstOrLast
<< " group member potentially pointer-wrapping.\n");
releaseGroup(Group);
return true;
};
// Remove interleaved groups with gaps whose memory
// accesses may wrap around. We have to revisit the getPtrStride analysis,
// this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
// not check wrapping (see documentation there).
// FORNOW we use Assume=false;
// TODO: Change to Assume=true but making sure we don't exceed the threshold
// of runtime SCEV assumptions checks (thereby potentially failing to
// vectorize altogether).
// Additional optional optimizations:
// TODO: If we are peeling the loop and we know that the first pointer doesn't
// wrap then we can deduce that all pointers in the group don't wrap.
// This means that we can forcefully peel the loop in order to only have to
// check the first pointer for no-wrap. When we'll change to use Assume=true
// we'll only need at most one runtime check per interleaved group.
for (auto *Group : LoadGroups) {
// Case 1: A full group. Can Skip the checks; For full groups, if the wide
// load would wrap around the address space we would do a memory access at
// nullptr even without the transformation.
if (Group->getNumMembers() == Group->getFactor())
continue;
// Case 2: If first and last members of the group don't wrap this implies
// that all the pointers in the group don't wrap.
// So we check only group member 0 (which is always guaranteed to exist),
// and group member Factor - 1; If the latter doesn't exist we rely on
// peeling (if it is a non-reversed accsess -- see Case 3).
if (InvalidateGroupIfMemberMayWrap(Group, 0, std::string("first")))
continue;
if (Group->getMember(Group->getFactor() - 1))
InvalidateGroupIfMemberMayWrap(Group, Group->getFactor() - 1,
std::string("last"));
else {
// Case 3: A non-reversed interleaved load group with gaps: We need
// to execute at least one scalar epilogue iteration. This will ensure
// we don't speculatively access memory out-of-bounds. We only need
// to look for a member at index factor - 1, since every group must have
// a member at index zero.
if (Group->isReverse()) {
LLVM_DEBUG(
dbgs() << "LV: Invalidate candidate interleaved group due to "
"a reverse access with gaps.\n");
releaseGroup(Group);
continue;
}
LLVM_DEBUG(
dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
RequiresScalarEpilogue = true;
}
}
for (auto *Group : StoreGroups) {
// Case 1: A full group. Can Skip the checks; For full groups, if the wide
// store would wrap around the address space we would do a memory access at
// nullptr even without the transformation.
if (Group->getNumMembers() == Group->getFactor())
continue;
// Interleave-store-group with gaps is implemented using masked wide store.
// Remove interleaved store groups with gaps if
// masked-interleaved-accesses are not enabled by the target.
if (!EnablePredicatedInterleavedMemAccesses) {
LLVM_DEBUG(
dbgs() << "LV: Invalidate candidate interleaved store group due "
"to gaps.\n");
releaseGroup(Group);
continue;
}
// Case 2: If first and last members of the group don't wrap this implies
// that all the pointers in the group don't wrap.
// So we check only group member 0 (which is always guaranteed to exist),
// and the last group member. Case 3 (scalar epilog) is not relevant for
// stores with gaps, which are implemented with masked-store (rather than
// speculative access, as in loads).
if (InvalidateGroupIfMemberMayWrap(Group, 0, std::string("first")))
continue;
for (int Index = Group->getFactor() - 1; Index > 0; Index--)
if (Group->getMember(Index)) {
InvalidateGroupIfMemberMayWrap(Group, Index, std::string("last"));
break;
}
}
}
void InterleavedAccessInfo::invalidateGroupsRequiringScalarEpilogue() {
// If no group had triggered the requirement to create an epilogue loop,
// there is nothing to do.
if (!requiresScalarEpilogue())
return;
bool ReleasedGroup = false;
// Release groups requiring scalar epilogues. Note that this also removes them
// from InterleaveGroups.
for (auto *Group : make_early_inc_range(InterleaveGroups)) {
if (!Group->requiresScalarEpilogue())
continue;
LLVM_DEBUG(
dbgs()
<< "LV: Invalidate candidate interleaved group due to gaps that "
"require a scalar epilogue (not allowed under optsize) and cannot "
"be masked (not enabled). \n");
releaseGroup(Group);
ReleasedGroup = true;
}
assert(ReleasedGroup && "At least one group must be invalidated, as a "
"scalar epilogue was required");
(void)ReleasedGroup;
RequiresScalarEpilogue = false;
}
template <typename InstT>
void InterleaveGroup<InstT>::addMetadata(InstT *NewInst) const {
llvm_unreachable("addMetadata can only be used for Instruction");
}
namespace llvm {
template <>
void InterleaveGroup<Instruction>::addMetadata(Instruction *NewInst) const {
SmallVector<Value *, 4> VL;
std::transform(Members.begin(), Members.end(), std::back_inserter(VL),
[](std::pair<int, Instruction *> p) { return p.second; });
propagateMetadata(NewInst, VL);
}
}
std::string VFABI::mangleTLIVectorName(StringRef VectorName,
StringRef ScalarName, unsigned numArgs,
ElementCount VF) {
SmallString<256> Buffer;
llvm::raw_svector_ostream Out(Buffer);
Out << "_ZGV" << VFABI::_LLVM_ << "N";
if (VF.isScalable())
Out << 'x';
else
Out << VF.getFixedValue();
for (unsigned I = 0; I < numArgs; ++I)
Out << "v";
Out << "_" << ScalarName << "(" << VectorName << ")";
return std::string(Out.str());
}
void VFABI::getVectorVariantNames(
const CallInst &CI, SmallVectorImpl<std::string> &VariantMappings) {
const StringRef S = CI.getFnAttr(VFABI::MappingsAttrName).getValueAsString();
if (S.empty())
return;
SmallVector<StringRef, 8> ListAttr;
S.split(ListAttr, ",");
for (const auto &S : SetVector<StringRef>(ListAttr.begin(), ListAttr.end())) {
#ifndef NDEBUG
LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << S << "'\n");
Optional<VFInfo> Info = VFABI::tryDemangleForVFABI(S, *(CI.getModule()));
assert(Info && "Invalid name for a VFABI variant.");
assert(CI.getModule()->getFunction(Info.value().VectorName) &&
"Vector function is missing.");
#endif
VariantMappings.push_back(std::string(S));
}
}
bool VFShape::hasValidParameterList() const {
for (unsigned Pos = 0, NumParams = Parameters.size(); Pos < NumParams;
++Pos) {
assert(Parameters[Pos].ParamPos == Pos && "Broken parameter list.");
switch (Parameters[Pos].ParamKind) {
default: // Nothing to check.
break;
case VFParamKind::OMP_Linear:
case VFParamKind::OMP_LinearRef:
case VFParamKind::OMP_LinearVal:
case VFParamKind::OMP_LinearUVal:
// Compile time linear steps must be non-zero.
if (Parameters[Pos].LinearStepOrPos == 0)
return false;
break;
case VFParamKind::OMP_LinearPos:
case VFParamKind::OMP_LinearRefPos:
case VFParamKind::OMP_LinearValPos:
case VFParamKind::OMP_LinearUValPos:
// The runtime linear step must be referring to some other
// parameters in the signature.
if (Parameters[Pos].LinearStepOrPos >= int(NumParams))
return false;
// The linear step parameter must be marked as uniform.
if (Parameters[Parameters[Pos].LinearStepOrPos].ParamKind !=
VFParamKind::OMP_Uniform)
return false;
// The linear step parameter can't point at itself.
if (Parameters[Pos].LinearStepOrPos == int(Pos))
return false;
break;
case VFParamKind::GlobalPredicate:
// The global predicate must be the unique. Can be placed anywhere in the
// signature.
for (unsigned NextPos = Pos + 1; NextPos < NumParams; ++NextPos)
if (Parameters[NextPos].ParamKind == VFParamKind::GlobalPredicate)
return false;
break;
}
}
return true;
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 35650b9bd00e..ecdaef0442da 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1,11354 +1,11355 @@
//===- SelectionDAGBuilder.cpp - Selection-DAG building -------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This implements routines for translating from LLVM IR into SelectionDAG IR.
//
//===----------------------------------------------------------------------===//
#include "SelectionDAGBuilder.h"
#include "SDNodeDbgValue.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineInstrBundleIterator.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/SwiftErrorValueTracking.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Statepoint.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/MCContext.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cstddef>
#include <iterator>
#include <limits>
#include <tuple>
using namespace llvm;
using namespace PatternMatch;
using namespace SwitchCG;
#define DEBUG_TYPE "isel"
/// LimitFloatPrecision - Generate low-precision inline sequences for
/// some float libcalls (6, 8 or 12 bits).
static unsigned LimitFloatPrecision;
static cl::opt<bool>
InsertAssertAlign("insert-assert-align", cl::init(true),
cl::desc("Insert the experimental `assertalign` node."),
cl::ReallyHidden);
static cl::opt<unsigned, true>
LimitFPPrecision("limit-float-precision",
cl::desc("Generate low-precision inline sequences "
"for some float libcalls"),
cl::location(LimitFloatPrecision), cl::Hidden,
cl::init(0));
static cl::opt<unsigned> SwitchPeelThreshold(
"switch-peel-threshold", cl::Hidden, cl::init(66),
cl::desc("Set the case probability threshold for peeling the case from a "
"switch statement. A value greater than 100 will void this "
"optimization"));
// Limit the width of DAG chains. This is important in general to prevent
// DAG-based analysis from blowing up. For example, alias analysis and
// load clustering may not complete in reasonable time. It is difficult to
// recognize and avoid this situation within each individual analysis, and
// future analyses are likely to have the same behavior. Limiting DAG width is
// the safe approach and will be especially important with global DAGs.
//
// MaxParallelChains default is arbitrarily high to avoid affecting
// optimization, but could be lowered to improve compile time. Any ld-ld-st-st
// sequence over this should have been converted to llvm.memcpy by the
// frontend. It is easy to induce this behavior with .ll code such as:
// %buffer = alloca [4096 x i8]
// %data = load [4096 x i8]* %argPtr
// store [4096 x i8] %data, [4096 x i8]* %buffer
static const unsigned MaxParallelChains = 64;
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, const Value *V,
Optional<CallingConv::ID> CC);
/// getCopyFromParts - Create a value that contains the specified legal parts
/// combined into the value they represent. If the parts combine to a type
/// larger than ValueVT then AssertOp can be used to specify whether the extra
/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
/// (ISD::AssertSext).
static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, const Value *V,
Optional<CallingConv::ID> CC = None,
Optional<ISD::NodeType> AssertOp = None) {
// Let the target assemble the parts if it wants to
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (SDValue Val = TLI.joinRegisterPartsIntoValue(DAG, DL, Parts, NumParts,
PartVT, ValueVT, CC))
return Val;
if (ValueVT.isVector())
return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT, V,
CC);
assert(NumParts > 0 && "No parts to assemble!");
SDValue Val = Parts[0];
if (NumParts > 1) {
// Assemble the value from multiple parts.
if (ValueVT.isInteger()) {
unsigned PartBits = PartVT.getSizeInBits();
unsigned ValueBits = ValueVT.getSizeInBits();
// Assemble the power of 2 part.
unsigned RoundParts =
(NumParts & (NumParts - 1)) ? 1 << Log2_32(NumParts) : NumParts;
unsigned RoundBits = PartBits * RoundParts;
EVT RoundVT = RoundBits == ValueBits ?
ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
SDValue Lo, Hi;
EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
if (RoundParts > 2) {
Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
PartVT, HalfVT, V);
Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
RoundParts / 2, PartVT, HalfVT, V);
} else {
Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
}
if (DAG.getDataLayout().isBigEndian())
std::swap(Lo, Hi);
Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi);
if (RoundParts < NumParts) {
// Assemble the trailing non-power-of-2 part.
unsigned OddParts = NumParts - RoundParts;
EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
Hi = getCopyFromParts(DAG, DL, Parts + RoundParts, OddParts, PartVT,
OddVT, V, CC);
// Combine the round and odd parts.
Lo = Val;
if (DAG.getDataLayout().isBigEndian())
std::swap(Lo, Hi);
EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
DAG.getConstant(Lo.getValueSizeInBits(), DL,
TLI.getShiftAmountTy(
TotalVT, DAG.getDataLayout())));
Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
}
} else if (PartVT.isFloatingPoint()) {
// FP split into multiple FP parts (for ppcf128)
assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 &&
"Unexpected split");
SDValue Lo, Hi;
Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout()))
std::swap(Lo, Hi);
Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
} else {
// FP split into integer parts (soft fp)
assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
!PartVT.isVector() && "Unexpected split");
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V, CC);
}
}
// There is now one part, held in Val. Correct it to match ValueVT.
// PartEVT is the type of the register class that holds the value.
// ValueVT is the type of the inline asm operation.
EVT PartEVT = Val.getValueType();
if (PartEVT == ValueVT)
return Val;
if (PartEVT.isInteger() && ValueVT.isFloatingPoint() &&
ValueVT.bitsLT(PartEVT)) {
// For an FP value in an integer part, we need to truncate to the right
// width first.
PartEVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val);
}
// Handle types that have the same size.
if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits())
return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
// Handle types with different sizes.
if (PartEVT.isInteger() && ValueVT.isInteger()) {
if (ValueVT.bitsLT(PartEVT)) {
// For a truncate, see if we have any information to
// indicate whether the truncated bits will always be
// zero or sign-extension.
if (AssertOp)
Val = DAG.getNode(*AssertOp, DL, PartEVT, Val,
DAG.getValueType(ValueVT));
return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
}
return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
}
if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
// FP_ROUND's are always exact here.
if (ValueVT.bitsLT(Val.getValueType()))
return DAG.getNode(
ISD::FP_ROUND, DL, ValueVT, Val,
DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())));
return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
}
// Handle MMX to a narrower integer type by bitcasting MMX to integer and
// then truncating.
if (PartEVT == MVT::x86mmx && ValueVT.isInteger() &&
ValueVT.bitsLT(PartEVT)) {
Val = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Val);
return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
}
report_fatal_error("Unknown mismatch in getCopyFromParts!");
}
static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V,
const Twine &ErrMsg) {
const Instruction *I = dyn_cast_or_null<Instruction>(V);
if (!V)
return Ctx.emitError(ErrMsg);
const char *AsmError = ", possible invalid constraint for vector type";
if (const CallInst *CI = dyn_cast<CallInst>(I))
if (CI->isInlineAsm())
return Ctx.emitError(I, ErrMsg + AsmError);
return Ctx.emitError(I, ErrMsg);
}
/// getCopyFromPartsVector - Create a value that contains the specified legal
/// parts combined into the value they represent. If the parts combine to a
/// type larger than ValueVT then AssertOp can be used to specify whether the
/// extra bits are known to be zero (ISD::AssertZext) or sign extended from
/// ValueVT (ISD::AssertSext).
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, const Value *V,
Optional<CallingConv::ID> CallConv) {
assert(ValueVT.isVector() && "Not a vector value");
assert(NumParts > 0 && "No parts to assemble!");
const bool IsABIRegCopy = CallConv.has_value();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Val = Parts[0];
// Handle a multi-element vector.
if (NumParts > 1) {
EVT IntermediateVT;
MVT RegisterVT;
unsigned NumIntermediates;
unsigned NumRegs;
if (IsABIRegCopy) {
NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
*DAG.getContext(), *CallConv, ValueVT, IntermediateVT,
NumIntermediates, RegisterVT);
} else {
NumRegs =
TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
NumIntermediates, RegisterVT);
}
assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
NumParts = NumRegs; // Silence a compiler warning.
assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
assert(RegisterVT.getSizeInBits() ==
Parts[0].getSimpleValueType().getSizeInBits() &&
"Part type sizes don't match!");
// Assemble the parts into intermediate operands.
SmallVector<SDValue, 8> Ops(NumIntermediates);
if (NumIntermediates == NumParts) {
// If the register was not expanded, truncate or copy the value,
// as appropriate.
for (unsigned i = 0; i != NumParts; ++i)
Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
PartVT, IntermediateVT, V, CallConv);
} else if (NumParts > 0) {
// If the intermediate type was expanded, build the intermediate
// operands from the parts.
assert(NumParts % NumIntermediates == 0 &&
"Must expand into a divisible number of parts!");
unsigned Factor = NumParts / NumIntermediates;
for (unsigned i = 0; i != NumIntermediates; ++i)
Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
PartVT, IntermediateVT, V, CallConv);
}
// Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
// intermediate operands.
EVT BuiltVectorTy =
IntermediateVT.isVector()
? EVT::getVectorVT(
*DAG.getContext(), IntermediateVT.getScalarType(),
IntermediateVT.getVectorElementCount() * NumParts)
: EVT::getVectorVT(*DAG.getContext(),
IntermediateVT.getScalarType(),
NumIntermediates);
Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS
: ISD::BUILD_VECTOR,
DL, BuiltVectorTy, Ops);
}
// There is now one part, held in Val. Correct it to match ValueVT.
EVT PartEVT = Val.getValueType();
if (PartEVT == ValueVT)
return Val;
if (PartEVT.isVector()) {
// Vector/Vector bitcast.
if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
// If the element type of the source/dest vectors are the same, but the
// parts vector has more elements than the value vector, then we have a
// vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
// elements we want.
if (PartEVT.getVectorElementCount() != ValueVT.getVectorElementCount()) {
assert((PartEVT.getVectorElementCount().getKnownMinValue() >
ValueVT.getVectorElementCount().getKnownMinValue()) &&
(PartEVT.getVectorElementCount().isScalable() ==
ValueVT.getVectorElementCount().isScalable()) &&
"Cannot narrow, it would be a lossy transformation");
PartEVT =
EVT::getVectorVT(*DAG.getContext(), PartEVT.getVectorElementType(),
ValueVT.getVectorElementCount());
Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, PartEVT, Val,
DAG.getVectorIdxConstant(0, DL));
if (PartEVT == ValueVT)
return Val;
}
// Promoted vector extract
return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);
}
// Trivial bitcast if the types are the same size and the destination
// vector type is legal.
if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() &&
TLI.isTypeLegal(ValueVT))
return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
if (ValueVT.getVectorNumElements() != 1) {
// Certain ABIs require that vectors are passed as integers. For vectors
// are the same size, this is an obvious bitcast.
if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) {
return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
} else if (ValueVT.bitsLT(PartEVT)) {
const uint64_t ValueSize = ValueVT.getFixedSizeInBits();
EVT IntermediateType = EVT::getIntegerVT(*DAG.getContext(), ValueSize);
// Drop the extra bits.
Val = DAG.getNode(ISD::TRUNCATE, DL, IntermediateType, Val);
return DAG.getBitcast(ValueVT, Val);
}
diagnosePossiblyInvalidConstraint(
*DAG.getContext(), V, "non-trivial scalar-to-vector conversion");
return DAG.getUNDEF(ValueVT);
}
// Handle cases such as i8 -> <1 x i1>
EVT ValueSVT = ValueVT.getVectorElementType();
if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT) {
if (ValueSVT.getSizeInBits() == PartEVT.getSizeInBits())
Val = DAG.getNode(ISD::BITCAST, DL, ValueSVT, Val);
else
Val = ValueVT.isFloatingPoint()
? DAG.getFPExtendOrRound(Val, DL, ValueSVT)
: DAG.getAnyExtOrTrunc(Val, DL, ValueSVT);
}
return DAG.getBuildVector(ValueVT, DL, Val);
}
static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
SDValue Val, SDValue *Parts, unsigned NumParts,
MVT PartVT, const Value *V,
Optional<CallingConv::ID> CallConv);
/// getCopyToParts - Create a series of nodes that contain the specified value
/// split into legal parts. If the parts contain more bits than Val, then, for
/// integers, ExtendKind can be used to specify how to generate the extra bits.
static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
SDValue *Parts, unsigned NumParts, MVT PartVT,
const Value *V,
Optional<CallingConv::ID> CallConv = None,
ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
// Let the target split the parts if it wants to
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.splitValueIntoRegisterParts(DAG, DL, Val, Parts, NumParts, PartVT,
CallConv))
return;
EVT ValueVT = Val.getValueType();
// Handle the vector case separately.
if (ValueVT.isVector())
return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V,
CallConv);
unsigned PartBits = PartVT.getSizeInBits();
unsigned OrigNumParts = NumParts;
assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) &&
"Copying to an illegal type!");
if (NumParts == 0)
return;
assert(!ValueVT.isVector() && "Vector case handled elsewhere");
EVT PartEVT = PartVT;
if (PartEVT == ValueVT) {
assert(NumParts == 1 && "No-op copy with multiple parts!");
Parts[0] = Val;
return;
}
if (NumParts * PartBits > ValueVT.getSizeInBits()) {
// If the parts cover more bits than the value has, promote the value.
if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
assert(NumParts == 1 && "Do not know what to promote to!");
Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
} else {
if (ValueVT.isFloatingPoint()) {
// FP values need to be bitcast, then extended if they are being put
// into a larger container.
ValueVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
}
assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
ValueVT.isInteger() &&
"Unknown mismatch!");
ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
if (PartVT == MVT::x86mmx)
Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
}
} else if (PartBits == ValueVT.getSizeInBits()) {
// Different types of the same size.
assert(NumParts == 1 && PartEVT != ValueVT);
Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
} else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
// If the parts cover less bits than value has, truncate the value.
assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
ValueVT.isInteger() &&
"Unknown mismatch!");
ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
if (PartVT == MVT::x86mmx)
Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
}
// The value may have changed - recompute ValueVT.
ValueVT = Val.getValueType();
assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
"Failed to tile the value with PartVT!");
if (NumParts == 1) {
if (PartEVT != ValueVT) {
diagnosePossiblyInvalidConstraint(*DAG.getContext(), V,
"scalar-to-vector conversion failed");
Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
}
Parts[0] = Val;
return;
}
// Expand the value into multiple parts.
if (NumParts & (NumParts - 1)) {
// The number of parts is not a power of 2. Split off and copy the tail.
assert(PartVT.isInteger() && ValueVT.isInteger() &&
"Do not know what to expand to!");
unsigned RoundParts = 1 << Log2_32(NumParts);
unsigned RoundBits = RoundParts * PartBits;
unsigned OddParts = NumParts - RoundParts;
SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
DAG.getShiftAmountConstant(RoundBits, ValueVT, DL));
getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V,
CallConv);
if (DAG.getDataLayout().isBigEndian())
// The odd parts were reversed by getCopyToParts - unreverse them.
std::reverse(Parts + RoundParts, Parts + NumParts);
NumParts = RoundParts;
ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
}
// The number of parts is a power of 2. Repeatedly bisect the value using
// EXTRACT_ELEMENT.
Parts[0] = DAG.getNode(ISD::BITCAST, DL,
EVT::getIntegerVT(*DAG.getContext(),
ValueVT.getSizeInBits()),
Val);
for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
for (unsigned i = 0; i < NumParts; i += StepSize) {
unsigned ThisBits = StepSize * PartBits / 2;
EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
SDValue &Part0 = Parts[i];
SDValue &Part1 = Parts[i+StepSize/2];
Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
ThisVT, Part0, DAG.getIntPtrConstant(1, DL));
Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
ThisVT, Part0, DAG.getIntPtrConstant(0, DL));
if (ThisBits == PartBits && ThisVT != PartVT) {
Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0);
Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1);
}
}
}
if (DAG.getDataLayout().isBigEndian())
std::reverse(Parts, Parts + OrigNumParts);
}
static SDValue widenVectorToPartType(SelectionDAG &DAG, SDValue Val,
const SDLoc &DL, EVT PartVT) {
if (!PartVT.isVector())
return SDValue();
EVT ValueVT = Val.getValueType();
ElementCount PartNumElts = PartVT.getVectorElementCount();
ElementCount ValueNumElts = ValueVT.getVectorElementCount();
// We only support widening vectors with equivalent element types and
// fixed/scalable properties. If a target needs to widen a fixed-length type
// to a scalable one, it should be possible to use INSERT_SUBVECTOR below.
if (ElementCount::isKnownLE(PartNumElts, ValueNumElts) ||
PartNumElts.isScalable() != ValueNumElts.isScalable() ||
PartVT.getVectorElementType() != ValueVT.getVectorElementType())
return SDValue();
// Widening a scalable vector to another scalable vector is done by inserting
// the vector into a larger undef one.
if (PartNumElts.isScalable())
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
Val, DAG.getVectorIdxConstant(0, DL));
EVT ElementVT = PartVT.getVectorElementType();
// Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
// undef elements.
SmallVector<SDValue, 16> Ops;
DAG.ExtractVectorElements(Val, Ops);
SDValue EltUndef = DAG.getUNDEF(ElementVT);
Ops.append((PartNumElts - ValueNumElts).getFixedValue(), EltUndef);
// FIXME: Use CONCAT for 2x -> 4x.
return DAG.getBuildVector(PartVT, DL, Ops);
}
/// getCopyToPartsVector - Create a series of nodes that contain the specified
/// value split into legal parts.
static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
SDValue Val, SDValue *Parts, unsigned NumParts,
MVT PartVT, const Value *V,
Optional<CallingConv::ID> CallConv) {
EVT ValueVT = Val.getValueType();
assert(ValueVT.isVector() && "Not a vector");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const bool IsABIRegCopy = CallConv.has_value();
if (NumParts == 1) {
EVT PartEVT = PartVT;
if (PartEVT == ValueVT) {
// Nothing to do.
} else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
// Bitconvert vector->vector case.
Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
} else if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, PartVT)) {
Val = Widened;
} else if (PartVT.isVector() &&
PartEVT.getVectorElementType().bitsGE(
ValueVT.getVectorElementType()) &&
PartEVT.getVectorElementCount() ==
ValueVT.getVectorElementCount()) {
// Promoted vector extract
Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
} else if (PartEVT.isVector() &&
PartEVT.getVectorElementType() !=
ValueVT.getVectorElementType() &&
TLI.getTypeAction(*DAG.getContext(), ValueVT) ==
TargetLowering::TypeWidenVector) {
// Combination of widening and promotion.
EVT WidenVT =
EVT::getVectorVT(*DAG.getContext(), ValueVT.getVectorElementType(),
PartVT.getVectorElementCount());
SDValue Widened = widenVectorToPartType(DAG, Val, DL, WidenVT);
Val = DAG.getAnyExtOrTrunc(Widened, DL, PartVT);
} else {
if (ValueVT.getVectorElementCount().isScalar()) {
Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
DAG.getVectorIdxConstant(0, DL));
} else {
uint64_t ValueSize = ValueVT.getFixedSizeInBits();
assert(PartVT.getFixedSizeInBits() > ValueSize &&
"lossy conversion of vector to scalar type");
EVT IntermediateType = EVT::getIntegerVT(*DAG.getContext(), ValueSize);
Val = DAG.getBitcast(IntermediateType, Val);
Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
}
}
assert(Val.getValueType() == PartVT && "Unexpected vector part value type");
Parts[0] = Val;
return;
}
// Handle a multi-element vector.
EVT IntermediateVT;
MVT RegisterVT;
unsigned NumIntermediates;
unsigned NumRegs;
if (IsABIRegCopy) {
NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
*DAG.getContext(), CallConv.value(), ValueVT, IntermediateVT,
NumIntermediates, RegisterVT);
} else {
NumRegs =
TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
NumIntermediates, RegisterVT);
}
assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
NumParts = NumRegs; // Silence a compiler warning.
assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
assert(IntermediateVT.isScalableVector() == ValueVT.isScalableVector() &&
"Mixing scalable and fixed vectors when copying in parts");
Optional<ElementCount> DestEltCnt;
if (IntermediateVT.isVector())
DestEltCnt = IntermediateVT.getVectorElementCount() * NumIntermediates;
else
DestEltCnt = ElementCount::getFixed(NumIntermediates);
EVT BuiltVectorTy = EVT::getVectorVT(
*DAG.getContext(), IntermediateVT.getScalarType(), *DestEltCnt);
if (ValueVT == BuiltVectorTy) {
// Nothing to do.
} else if (ValueVT.getSizeInBits() == BuiltVectorTy.getSizeInBits()) {
// Bitconvert vector->vector case.
Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val);
} else {
if (BuiltVectorTy.getVectorElementType().bitsGT(
ValueVT.getVectorElementType())) {
// Integer promotion.
ValueVT = EVT::getVectorVT(*DAG.getContext(),
BuiltVectorTy.getVectorElementType(),
ValueVT.getVectorElementCount());
Val = DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
}
if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) {
Val = Widened;
}
}
assert(Val.getValueType() == BuiltVectorTy && "Unexpected vector value type");
// Split the vector into intermediate operands.
SmallVector<SDValue, 8> Ops(NumIntermediates);
for (unsigned i = 0; i != NumIntermediates; ++i) {
if (IntermediateVT.isVector()) {
// This does something sensible for scalable vectors - see the
// definition of EXTRACT_SUBVECTOR for further details.
unsigned IntermediateNumElts = IntermediateVT.getVectorMinNumElements();
Ops[i] =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val,
DAG.getVectorIdxConstant(i * IntermediateNumElts, DL));
} else {
Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val,
DAG.getVectorIdxConstant(i, DL));
}
}
// Split the intermediate operands into legal parts.
if (NumParts == NumIntermediates) {
// If the register was not expanded, promote or copy the value,
// as appropriate.
for (unsigned i = 0; i != NumParts; ++i)
getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V, CallConv);
} else if (NumParts > 0) {
// If the intermediate type was expanded, split each the value into
// legal parts.
assert(NumIntermediates != 0 && "division by zero");
assert(NumParts % NumIntermediates == 0 &&
"Must expand into a divisible number of parts!");
unsigned Factor = NumParts / NumIntermediates;
for (unsigned i = 0; i != NumIntermediates; ++i)
getCopyToParts(DAG, DL, Ops[i], &Parts[i * Factor], Factor, PartVT, V,
CallConv);
}
}
RegsForValue::RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt,
EVT valuevt, Optional<CallingConv::ID> CC)
: ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs),
RegCount(1, regs.size()), CallConv(CC) {}
RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
const DataLayout &DL, unsigned Reg, Type *Ty,
Optional<CallingConv::ID> CC) {
ComputeValueVTs(TLI, DL, Ty, ValueVTs);
CallConv = CC;
for (EVT ValueVT : ValueVTs) {
unsigned NumRegs =
isABIMangled()
? TLI.getNumRegistersForCallingConv(Context, CC.value(), ValueVT)
: TLI.getNumRegisters(Context, ValueVT);
MVT RegisterVT =
isABIMangled()
? TLI.getRegisterTypeForCallingConv(Context, CC.value(), ValueVT)
: TLI.getRegisterType(Context, ValueVT);
for (unsigned i = 0; i != NumRegs; ++i)
Regs.push_back(Reg + i);
RegVTs.push_back(RegisterVT);
RegCount.push_back(NumRegs);
Reg += NumRegs;
}
}
SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
FunctionLoweringInfo &FuncInfo,
const SDLoc &dl, SDValue &Chain,
SDValue *Flag, const Value *V) const {
// A Value with type {} or [0 x %t] needs no registers.
if (ValueVTs.empty())
return SDValue();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// Assemble the legal parts into the final values.
SmallVector<SDValue, 4> Values(ValueVTs.size());
SmallVector<SDValue, 8> Parts;
for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
// Copy the legal parts from the registers.
EVT ValueVT = ValueVTs[Value];
unsigned NumRegs = RegCount[Value];
MVT RegisterVT =
isABIMangled() ? TLI.getRegisterTypeForCallingConv(
*DAG.getContext(), CallConv.value(), RegVTs[Value])
: RegVTs[Value];
Parts.resize(NumRegs);
for (unsigned i = 0; i != NumRegs; ++i) {
SDValue P;
if (!Flag) {
P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
} else {
P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
*Flag = P.getValue(2);
}
Chain = P.getValue(1);
Parts[i] = P;
// If the source register was virtual and if we know something about it,
// add an assert node.
if (!Register::isVirtualRegister(Regs[Part + i]) ||
!RegisterVT.isInteger())
continue;
const FunctionLoweringInfo::LiveOutInfo *LOI =
FuncInfo.GetLiveOutRegInfo(Regs[Part+i]);
if (!LOI)
continue;
unsigned RegSize = RegisterVT.getScalarSizeInBits();
unsigned NumSignBits = LOI->NumSignBits;
unsigned NumZeroBits = LOI->Known.countMinLeadingZeros();
if (NumZeroBits == RegSize) {
// The current value is a zero.
// Explicitly express that as it would be easier for
// optimizations to kick in.
Parts[i] = DAG.getConstant(0, dl, RegisterVT);
continue;
}
// FIXME: We capture more information than the dag can represent. For
// now, just use the tightest assertzext/assertsext possible.
bool isSExt;
EVT FromVT(MVT::Other);
if (NumZeroBits) {
FromVT = EVT::getIntegerVT(*DAG.getContext(), RegSize - NumZeroBits);
isSExt = false;
} else if (NumSignBits > 1) {
FromVT =
EVT::getIntegerVT(*DAG.getContext(), RegSize - NumSignBits + 1);
isSExt = true;
} else {
continue;
}
// Add an assertion node.
assert(FromVT != MVT::Other);
Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
RegisterVT, P, DAG.getValueType(FromVT));
}
Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), NumRegs,
RegisterVT, ValueVT, V, CallConv);
Part += NumRegs;
Parts.clear();
}
return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values);
}
void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
const SDLoc &dl, SDValue &Chain, SDValue *Flag,
const Value *V,
ISD::NodeType PreferredExtendType) const {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
ISD::NodeType ExtendKind = PreferredExtendType;
// Get the list of the values's legal parts.
unsigned NumRegs = Regs.size();
SmallVector<SDValue, 8> Parts(NumRegs);
for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
unsigned NumParts = RegCount[Value];
MVT RegisterVT =
isABIMangled() ? TLI.getRegisterTypeForCallingConv(
*DAG.getContext(), CallConv.value(), RegVTs[Value])
: RegVTs[Value];
if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))
ExtendKind = ISD::ZERO_EXTEND;
getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), &Parts[Part],
NumParts, RegisterVT, V, CallConv, ExtendKind);
Part += NumParts;
}
// Copy the parts into the registers.
SmallVector<SDValue, 8> Chains(NumRegs);
for (unsigned i = 0; i != NumRegs; ++i) {
SDValue Part;
if (!Flag) {
Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
} else {
Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
*Flag = Part.getValue(1);
}
Chains[i] = Part.getValue(0);
}
if (NumRegs == 1 || Flag)
// If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
// flagged to it. That is the CopyToReg nodes and the user are considered
// a single scheduling unit. If we create a TokenFactor and return it as
// chain, then the TokenFactor is both a predecessor (operand) of the
// user as well as a successor (the TF operands are flagged to the user).
// c1, f1 = CopyToReg
// c2, f2 = CopyToReg
// c3 = TokenFactor c1, c2
// ...
// = op c3, ..., f2
Chain = Chains[NumRegs-1];
else
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
}
void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
unsigned MatchingIdx, const SDLoc &dl,
SelectionDAG &DAG,
std::vector<SDValue> &Ops) const {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
if (HasMatching)
Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
else if (!Regs.empty() && Register::isVirtualRegister(Regs.front())) {
// Put the register class of the virtual registers in the flag word. That
// way, later passes can recompute register class constraints for inline
// assembly as well as normal instructions.
// Don't do this for tied operands that can use the regclass information
// from the def.
const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
const TargetRegisterClass *RC = MRI.getRegClass(Regs.front());
Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
}
SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32);
Ops.push_back(Res);
if (Code == InlineAsm::Kind_Clobber) {
// Clobbers should always have a 1:1 mapping with registers, and may
// reference registers that have illegal (e.g. vector) types. Hence, we
// shouldn't try to apply any sort of splitting logic to them.
assert(Regs.size() == RegVTs.size() && Regs.size() == ValueVTs.size() &&
"No 1:1 mapping from clobbers to regs?");
Register SP = TLI.getStackPointerRegisterToSaveRestore();
(void)SP;
for (unsigned I = 0, E = ValueVTs.size(); I != E; ++I) {
Ops.push_back(DAG.getRegister(Regs[I], RegVTs[I]));
assert(
(Regs[I] != SP ||
DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment()) &&
"If we clobbered the stack pointer, MFI should know about it.");
}
return;
}
for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
MVT RegisterVT = RegVTs[Value];
unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value],
RegisterVT);
for (unsigned i = 0; i != NumRegs; ++i) {
assert(Reg < Regs.size() && "Mismatch in # registers expected");
unsigned TheReg = Regs[Reg++];
Ops.push_back(DAG.getRegister(TheReg, RegisterVT));
}
}
}
SmallVector<std::pair<unsigned, TypeSize>, 4>
RegsForValue::getRegsAndSizes() const {
SmallVector<std::pair<unsigned, TypeSize>, 4> OutVec;
unsigned I = 0;
for (auto CountAndVT : zip_first(RegCount, RegVTs)) {
unsigned RegCount = std::get<0>(CountAndVT);
MVT RegisterVT = std::get<1>(CountAndVT);
TypeSize RegisterSize = RegisterVT.getSizeInBits();
for (unsigned E = I + RegCount; I != E; ++I)
OutVec.push_back(std::make_pair(Regs[I], RegisterSize));
}
return OutVec;
}
void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa,
const TargetLibraryInfo *li) {
AA = aa;
GFI = gfi;
LibInfo = li;
Context = DAG.getContext();
LPadToCallSiteMap.clear();
SL->init(DAG.getTargetLoweringInfo(), TM, DAG.getDataLayout());
}
void SelectionDAGBuilder::clear() {
NodeMap.clear();
UnusedArgNodeMap.clear();
PendingLoads.clear();
PendingExports.clear();
PendingConstrainedFP.clear();
PendingConstrainedFPStrict.clear();
CurInst = nullptr;
HasTailCall = false;
SDNodeOrder = LowestSDNodeOrder;
StatepointLowering.clear();
}
void SelectionDAGBuilder::clearDanglingDebugInfo() {
DanglingDebugInfoMap.clear();
}
// Update DAG root to include dependencies on Pending chains.
SDValue SelectionDAGBuilder::updateRoot(SmallVectorImpl<SDValue> &Pending) {
SDValue Root = DAG.getRoot();
if (Pending.empty())
return Root;
// Add current root to PendingChains, unless we already indirectly
// depend on it.
if (Root.getOpcode() != ISD::EntryToken) {
unsigned i = 0, e = Pending.size();
for (; i != e; ++i) {
assert(Pending[i].getNode()->getNumOperands() > 1);
if (Pending[i].getNode()->getOperand(0) == Root)
break; // Don't add the root if we already indirectly depend on it.
}
if (i == e)
Pending.push_back(Root);
}
if (Pending.size() == 1)
Root = Pending[0];
else
Root = DAG.getTokenFactor(getCurSDLoc(), Pending);
DAG.setRoot(Root);
Pending.clear();
return Root;
}
SDValue SelectionDAGBuilder::getMemoryRoot() {
return updateRoot(PendingLoads);
}
SDValue SelectionDAGBuilder::getRoot() {
// Chain up all pending constrained intrinsics together with all
// pending loads, by simply appending them to PendingLoads and
// then calling getMemoryRoot().
PendingLoads.reserve(PendingLoads.size() +
PendingConstrainedFP.size() +
PendingConstrainedFPStrict.size());
PendingLoads.append(PendingConstrainedFP.begin(),
PendingConstrainedFP.end());
PendingLoads.append(PendingConstrainedFPStrict.begin(),
PendingConstrainedFPStrict.end());
PendingConstrainedFP.clear();
PendingConstrainedFPStrict.clear();
return getMemoryRoot();
}
SDValue SelectionDAGBuilder::getControlRoot() {
// We need to emit pending fpexcept.strict constrained intrinsics,
// so append them to the PendingExports list.
PendingExports.append(PendingConstrainedFPStrict.begin(),
PendingConstrainedFPStrict.end());
PendingConstrainedFPStrict.clear();
return updateRoot(PendingExports);
}
void SelectionDAGBuilder::visit(const Instruction &I) {
// Set up outgoing PHI node register values before emitting the terminator.
if (I.isTerminator()) {
HandlePHINodesInSuccessorBlocks(I.getParent());
}
// Increase the SDNodeOrder if dealing with a non-debug instruction.
if (!isa<DbgInfoIntrinsic>(I))
++SDNodeOrder;
CurInst = &I;
visit(I.getOpcode(), I);
if (!I.isTerminator() && !HasTailCall &&
!isa<GCStatepointInst>(I)) // statepoints handle their exports internally
CopyToExportRegsIfNeeded(&I);
CurInst = nullptr;
}
void SelectionDAGBuilder::visitPHI(const PHINode &) {
llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!");
}
void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
// Note: this doesn't use InstVisitor, because it has to work with
// ConstantExpr's in addition to instructions.
switch (Opcode) {
default: llvm_unreachable("Unknown instruction type encountered!");
// Build the switch statement using the Instruction.def file.
#define HANDLE_INST(NUM, OPCODE, CLASS) \
case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break;
#include "llvm/IR/Instruction.def"
}
}
void SelectionDAGBuilder::addDanglingDebugInfo(const DbgValueInst *DI,
DebugLoc DL, unsigned Order) {
// We treat variadic dbg_values differently at this stage.
if (DI->hasArgList()) {
// For variadic dbg_values we will now insert an undef.
// FIXME: We can potentially recover these!
SmallVector<SDDbgOperand, 2> Locs;
for (const Value *V : DI->getValues()) {
auto Undef = UndefValue::get(V->getType());
Locs.push_back(SDDbgOperand::fromConst(Undef));
}
SDDbgValue *SDV = DAG.getDbgValueList(
DI->getVariable(), DI->getExpression(), Locs, {},
/*IsIndirect=*/false, DL, Order, /*IsVariadic=*/true);
DAG.AddDbgValue(SDV, /*isParameter=*/false);
} else {
// TODO: Dangling debug info will eventually either be resolved or produce
// an Undef DBG_VALUE. However in the resolution case, a gap may appear
// between the original dbg.value location and its resolved DBG_VALUE,
// which we should ideally fill with an extra Undef DBG_VALUE.
assert(DI->getNumVariableLocationOps() == 1 &&
"DbgValueInst without an ArgList should have a single location "
"operand.");
DanglingDebugInfoMap[DI->getValue(0)].emplace_back(DI, DL, Order);
}
}
void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable,
const DIExpression *Expr) {
auto isMatchingDbgValue = [&](DanglingDebugInfo &DDI) {
const DbgValueInst *DI = DDI.getDI();
DIVariable *DanglingVariable = DI->getVariable();
DIExpression *DanglingExpr = DI->getExpression();
if (DanglingVariable == Variable && Expr->fragmentsOverlap(DanglingExpr)) {
LLVM_DEBUG(dbgs() << "Dropping dangling debug info for " << *DI << "\n");
return true;
}
return false;
};
for (auto &DDIMI : DanglingDebugInfoMap) {
DanglingDebugInfoVector &DDIV = DDIMI.second;
// If debug info is to be dropped, run it through final checks to see
// whether it can be salvaged.
for (auto &DDI : DDIV)
if (isMatchingDbgValue(DDI))
salvageUnresolvedDbgValue(DDI);
erase_if(DDIV, isMatchingDbgValue);
}
}
// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
// generate the debug data structures now that we've seen its definition.
void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
SDValue Val) {
auto DanglingDbgInfoIt = DanglingDebugInfoMap.find(V);
if (DanglingDbgInfoIt == DanglingDebugInfoMap.end())
return;
DanglingDebugInfoVector &DDIV = DanglingDbgInfoIt->second;
for (auto &DDI : DDIV) {
const DbgValueInst *DI = DDI.getDI();
assert(!DI->hasArgList() && "Not implemented for variadic dbg_values");
assert(DI && "Ill-formed DanglingDebugInfo");
DebugLoc dl = DDI.getdl();
unsigned ValSDNodeOrder = Val.getNode()->getIROrder();
unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
DILocalVariable *Variable = DI->getVariable();
DIExpression *Expr = DI->getExpression();
assert(Variable->isValidLocationForIntrinsic(dl) &&
"Expected inlined-at fields to agree");
SDDbgValue *SDV;
if (Val.getNode()) {
// FIXME: I doubt that it is correct to resolve a dangling DbgValue as a
// FuncArgumentDbgValue (it would be hoisted to the function entry, and if
// we couldn't resolve it directly when examining the DbgValue intrinsic
// in the first place we should not be more successful here). Unless we
// have some test case that prove this to be correct we should avoid
// calling EmitFuncArgumentDbgValue here.
if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl,
FuncArgumentDbgValueKind::Value, Val)) {
LLVM_DEBUG(dbgs() << "Resolve dangling debug info [order="
<< DbgSDNodeOrder << "] for:\n " << *DI << "\n");
LLVM_DEBUG(dbgs() << " By mapping to:\n "; Val.dump());
// Increase the SDNodeOrder for the DbgValue here to make sure it is
// inserted after the definition of Val when emitting the instructions
// after ISel. An alternative could be to teach
// ScheduleDAGSDNodes::EmitSchedule to delay the insertion properly.
LLVM_DEBUG(if (ValSDNodeOrder > DbgSDNodeOrder) dbgs()
<< "changing SDNodeOrder from " << DbgSDNodeOrder << " to "
<< ValSDNodeOrder << "\n");
SDV = getDbgValue(Val, Variable, Expr, dl,
std::max(DbgSDNodeOrder, ValSDNodeOrder));
DAG.AddDbgValue(SDV, false);
} else
LLVM_DEBUG(dbgs() << "Resolved dangling debug info for " << *DI
<< "in EmitFuncArgumentDbgValue\n");
} else {
LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
auto Undef = UndefValue::get(DDI.getDI()->getValue(0)->getType());
auto SDV =
DAG.getConstantDbgValue(Variable, Expr, Undef, dl, DbgSDNodeOrder);
DAG.AddDbgValue(SDV, false);
}
}
DDIV.clear();
}
void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
// TODO: For the variadic implementation, instead of only checking the fail
// state of `handleDebugValue`, we need know specifically which values were
// invalid, so that we attempt to salvage only those values when processing
// a DIArgList.
assert(!DDI.getDI()->hasArgList() &&
"Not implemented for variadic dbg_values");
Value *V = DDI.getDI()->getValue(0);
DILocalVariable *Var = DDI.getDI()->getVariable();
DIExpression *Expr = DDI.getDI()->getExpression();
DebugLoc DL = DDI.getdl();
DebugLoc InstDL = DDI.getDI()->getDebugLoc();
unsigned SDOrder = DDI.getSDNodeOrder();
// Currently we consider only dbg.value intrinsics -- we tell the salvager
// that DW_OP_stack_value is desired.
assert(isa<DbgValueInst>(DDI.getDI()));
bool StackValue = true;
// Can this Value can be encoded without any further work?
if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder, /*IsVariadic=*/false))
return;
// Attempt to salvage back through as many instructions as possible. Bail if
// a non-instruction is seen, such as a constant expression or global
// variable. FIXME: Further work could recover those too.
while (isa<Instruction>(V)) {
Instruction &VAsInst = *cast<Instruction>(V);
// Temporary "0", awaiting real implementation.
SmallVector<uint64_t, 16> Ops;
SmallVector<Value *, 4> AdditionalValues;
V = salvageDebugInfoImpl(VAsInst, Expr->getNumLocationOperands(), Ops,
AdditionalValues);
// If we cannot salvage any further, and haven't yet found a suitable debug
// expression, bail out.
if (!V)
break;
// TODO: If AdditionalValues isn't empty, then the salvage can only be
// represented with a DBG_VALUE_LIST, so we give up. When we have support
// here for variadic dbg_values, remove that condition.
if (!AdditionalValues.empty())
break;
// New value and expr now represent this debuginfo.
Expr = DIExpression::appendOpsToArg(Expr, Ops, 0, StackValue);
// Some kind of simplification occurred: check whether the operand of the
// salvaged debug expression can be encoded in this DAG.
if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder,
/*IsVariadic=*/false)) {
LLVM_DEBUG(dbgs() << "Salvaged debug location info for:\n "
<< *DDI.getDI() << "\nBy stripping back to:\n " << *V);
return;
}
}
// This was the final opportunity to salvage this debug information, and it
// couldn't be done. Place an undef DBG_VALUE at this location to terminate
// any earlier variable location.
auto Undef = UndefValue::get(DDI.getDI()->getValue(0)->getType());
auto SDV = DAG.getConstantDbgValue(Var, Expr, Undef, DL, SDNodeOrder);
DAG.AddDbgValue(SDV, false);
LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << *DDI.getDI()
<< "\n");
LLVM_DEBUG(dbgs() << " Last seen at:\n " << *DDI.getDI()->getOperand(0)
<< "\n");
}
bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
DILocalVariable *Var,
DIExpression *Expr, DebugLoc dl,
DebugLoc InstDL, unsigned Order,
bool IsVariadic) {
if (Values.empty())
return true;
SmallVector<SDDbgOperand> LocationOps;
SmallVector<SDNode *> Dependencies;
for (const Value *V : Values) {
// Constant value.
if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V) ||
isa<ConstantPointerNull>(V)) {
LocationOps.emplace_back(SDDbgOperand::fromConst(V));
continue;
}
// If the Value is a frame index, we can create a FrameIndex debug value
// without relying on the DAG at all.
if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
auto SI = FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end()) {
LocationOps.emplace_back(SDDbgOperand::fromFrameIdx(SI->second));
continue;
}
}
// Do not use getValue() in here; we don't want to generate code at
// this point if it hasn't been done yet.
SDValue N = NodeMap[V];
if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map.
N = UnusedArgNodeMap[V];
if (N.getNode()) {
// Only emit func arg dbg value for non-variadic dbg.values for now.
if (!IsVariadic &&
EmitFuncArgumentDbgValue(V, Var, Expr, dl,
FuncArgumentDbgValueKind::Value, N))
return true;
if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {
// Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can
// describe stack slot locations.
//
// Consider "int x = 0; int *px = &x;". There are two kinds of
// interesting debug values here after optimization:
//
// dbg.value(i32* %px, !"int *px", !DIExpression()), and
// dbg.value(i32* %px, !"int x", !DIExpression(DW_OP_deref))
//
// Both describe the direct values of their associated variables.
Dependencies.push_back(N.getNode());
LocationOps.emplace_back(SDDbgOperand::fromFrameIdx(FISDN->getIndex()));
continue;
}
LocationOps.emplace_back(
SDDbgOperand::fromNode(N.getNode(), N.getResNo()));
continue;
}
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// Special rules apply for the first dbg.values of parameter variables in a
// function. Identify them by the fact they reference Argument Values, that
// they're parameters, and they are parameters of the current function. We
// need to let them dangle until they get an SDNode.
bool IsParamOfFunc =
isa<Argument>(V) && Var->isParameter() && !InstDL.getInlinedAt();
if (IsParamOfFunc)
return false;
// The value is not used in this block yet (or it would have an SDNode).
// We still want the value to appear for the user if possible -- if it has
// an associated VReg, we can refer to that instead.
auto VMI = FuncInfo.ValueMap.find(V);
if (VMI != FuncInfo.ValueMap.end()) {
unsigned Reg = VMI->second;
// If this is a PHI node, it may be split up into several MI PHI nodes
// (in FunctionLoweringInfo::set).
RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
V->getType(), None);
if (RFV.occupiesMultipleRegs()) {
// FIXME: We could potentially support variadic dbg_values here.
if (IsVariadic)
return false;
unsigned Offset = 0;
unsigned BitsToDescribe = 0;
if (auto VarSize = Var->getSizeInBits())
BitsToDescribe = *VarSize;
if (auto Fragment = Expr->getFragmentInfo())
BitsToDescribe = Fragment->SizeInBits;
for (const auto &RegAndSize : RFV.getRegsAndSizes()) {
// Bail out if all bits are described already.
if (Offset >= BitsToDescribe)
break;
// TODO: handle scalable vectors.
unsigned RegisterSize = RegAndSize.second;
unsigned FragmentSize = (Offset + RegisterSize > BitsToDescribe)
? BitsToDescribe - Offset
: RegisterSize;
auto FragmentExpr = DIExpression::createFragmentExpression(
Expr, Offset, FragmentSize);
if (!FragmentExpr)
continue;
SDDbgValue *SDV = DAG.getVRegDbgValue(
Var, *FragmentExpr, RegAndSize.first, false, dl, SDNodeOrder);
DAG.AddDbgValue(SDV, false);
Offset += RegisterSize;
}
return true;
}
// We can use simple vreg locations for variadic dbg_values as well.
LocationOps.emplace_back(SDDbgOperand::fromVReg(Reg));
continue;
}
// We failed to create a SDDbgOperand for V.
return false;
}
// We have created a SDDbgOperand for each Value in Values.
// Should use Order instead of SDNodeOrder?
assert(!LocationOps.empty());
SDDbgValue *SDV =
DAG.getDbgValueList(Var, Expr, LocationOps, Dependencies,
/*IsIndirect=*/false, dl, SDNodeOrder, IsVariadic);
DAG.AddDbgValue(SDV, /*isParameter=*/false);
return true;
}
void SelectionDAGBuilder::resolveOrClearDbgInfo() {
// Try to fixup any remaining dangling debug info -- and drop it if we can't.
for (auto &Pair : DanglingDebugInfoMap)
for (auto &DDI : Pair.second)
salvageUnresolvedDbgValue(DDI);
clearDanglingDebugInfo();
}
/// getCopyFromRegs - If there was virtual register allocated for the value V
/// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {
DenseMap<const Value *, Register>::iterator It = FuncInfo.ValueMap.find(V);
SDValue Result;
if (It != FuncInfo.ValueMap.end()) {
Register InReg = It->second;
RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
DAG.getDataLayout(), InReg, Ty,
None); // This is not an ABI copy.
SDValue Chain = DAG.getEntryNode();
Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr,
V);
resolveDanglingDebugInfo(V, Result);
}
return Result;
}
/// getValue - Return an SDValue for the given Value.
SDValue SelectionDAGBuilder::getValue(const Value *V) {
// If we already have an SDValue for this value, use it. It's important
// to do this first, so that we don't create a CopyFromReg if we already
// have a regular SDValue.
SDValue &N = NodeMap[V];
if (N.getNode()) return N;
// If there's a virtual register allocated and initialized for this
// value, use it.
if (SDValue copyFromReg = getCopyFromRegs(V, V->getType()))
return copyFromReg;
// Otherwise create a new SDValue and remember it.
SDValue Val = getValueImpl(V);
NodeMap[V] = Val;
resolveDanglingDebugInfo(V, Val);
return Val;
}
/// getNonRegisterValue - Return an SDValue for the given Value, but
/// don't look in FuncInfo.ValueMap for a virtual register.
SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
// If we already have an SDValue for this value, use it.
SDValue &N = NodeMap[V];
if (N.getNode()) {
if (isa<ConstantSDNode>(N) || isa<ConstantFPSDNode>(N)) {
// Remove the debug location from the node as the node is about to be used
// in a location which may differ from the original debug location. This
// is relevant to Constant and ConstantFP nodes because they can appear
// as constant expressions inside PHI nodes.
N->setDebugLoc(DebugLoc());
}
return N;
}
// Otherwise create a new SDValue and remember it.
SDValue Val = getValueImpl(V);
NodeMap[V] = Val;
resolveDanglingDebugInfo(V, Val);
return Val;
}
/// getValueImpl - Helper function for getValue and getNonRegisterValue.
/// Create an SDValue for the given value.
SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (const Constant *C = dyn_cast<Constant>(V)) {
EVT VT = TLI.getValueType(DAG.getDataLayout(), V->getType(), true);
if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
return DAG.getConstant(*CI, getCurSDLoc(), VT);
if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
return DAG.getGlobalAddress(GV, getCurSDLoc(), VT);
if (isa<ConstantPointerNull>(C)) {
unsigned AS = V->getType()->getPointerAddressSpace();
return DAG.getConstant(0, getCurSDLoc(),
TLI.getPointerTy(DAG.getDataLayout(), AS));
}
if (match(C, m_VScale(DAG.getDataLayout())))
return DAG.getVScale(getCurSDLoc(), VT, APInt(VT.getSizeInBits(), 1));
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
return DAG.getConstantFP(*CFP, getCurSDLoc(), VT);
if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
return DAG.getUNDEF(VT);
if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
visit(CE->getOpcode(), *CE);
SDValue N1 = NodeMap[V];
assert(N1.getNode() && "visit didn't populate the NodeMap!");
return N1;
}
if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) {
SmallVector<SDValue, 4> Constants;
for (const Use &U : C->operands()) {
SDNode *Val = getValue(U).getNode();
// If the operand is an empty aggregate, there are no values.
if (!Val) continue;
// Add each leaf value from the operand to the Constants list
// to form a flattened list of all the values.
for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
Constants.push_back(SDValue(Val, i));
}
return DAG.getMergeValues(Constants, getCurSDLoc());
}
if (const ConstantDataSequential *CDS =
dyn_cast<ConstantDataSequential>(C)) {
SmallVector<SDValue, 4> Ops;
for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode();
// Add each leaf value from the operand to the Constants list
// to form a flattened list of all the values.
for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
Ops.push_back(SDValue(Val, i));
}
if (isa<ArrayType>(CDS->getType()))
return DAG.getMergeValues(Ops, getCurSDLoc());
return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
}
if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
"Unknown struct or array constant!");
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(TLI, DAG.getDataLayout(), C->getType(), ValueVTs);
unsigned NumElts = ValueVTs.size();
if (NumElts == 0)
return SDValue(); // empty struct
SmallVector<SDValue, 4> Constants(NumElts);
for (unsigned i = 0; i != NumElts; ++i) {
EVT EltVT = ValueVTs[i];
if (isa<UndefValue>(C))
Constants[i] = DAG.getUNDEF(EltVT);
else if (EltVT.isFloatingPoint())
Constants[i] = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
else
Constants[i] = DAG.getConstant(0, getCurSDLoc(), EltVT);
}
return DAG.getMergeValues(Constants, getCurSDLoc());
}
if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
return DAG.getBlockAddress(BA, VT);
if (const auto *Equiv = dyn_cast<DSOLocalEquivalent>(C))
return getValue(Equiv->getGlobalValue());
if (const auto *NC = dyn_cast<NoCFIValue>(C))
return getValue(NC->getGlobalValue());
VectorType *VecTy = cast<VectorType>(V->getType());
// Now that we know the number and type of the elements, get that number of
// elements into the Ops array based on what kind of constant it is.
if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
SmallVector<SDValue, 16> Ops;
unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
for (unsigned i = 0; i != NumElements; ++i)
Ops.push_back(getValue(CV->getOperand(i)));
return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
}
if (isa<ConstantAggregateZero>(C)) {
EVT EltVT =
TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType());
SDValue Op;
if (EltVT.isFloatingPoint())
Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
else
Op = DAG.getConstant(0, getCurSDLoc(), EltVT);
if (isa<ScalableVectorType>(VecTy))
return NodeMap[V] = DAG.getSplatVector(VT, getCurSDLoc(), Op);
SmallVector<SDValue, 16> Ops;
Ops.assign(cast<FixedVectorType>(VecTy)->getNumElements(), Op);
return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
}
llvm_unreachable("Unknown vector constant");
}
// If this is a static alloca, generate it as the frameindex instead of
// computation.
if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
DenseMap<const AllocaInst*, int>::iterator SI =
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end())
return DAG.getFrameIndex(SI->second,
TLI.getFrameIndexTy(DAG.getDataLayout()));
}
// If this is an instruction which fast-isel has deferred, select it now.
if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg,
Inst->getType(), None);
SDValue Chain = DAG.getEntryNode();
return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
}
if (const MetadataAsValue *MD = dyn_cast<MetadataAsValue>(V))
return DAG.getMDNode(cast<MDNode>(MD->getMetadata()));
if (const auto *BB = dyn_cast<BasicBlock>(V))
return DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
llvm_unreachable("Can't get register for value!");
}
void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) {
auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX;
bool IsCoreCLR = Pers == EHPersonality::CoreCLR;
bool IsSEH = isAsynchronousEHPersonality(Pers);
MachineBasicBlock *CatchPadMBB = FuncInfo.MBB;
if (!IsSEH)
CatchPadMBB->setIsEHScopeEntry();
// In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues.
if (IsMSVCCXX || IsCoreCLR)
CatchPadMBB->setIsEHFuncletEntry();
}
void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
// Update machine-CFG edge.
MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()];
FuncInfo.MBB->addSuccessor(TargetMBB);
TargetMBB->setIsEHCatchretTarget(true);
DAG.getMachineFunction().setHasEHCatchret(true);
auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
bool IsSEH = isAsynchronousEHPersonality(Pers);
if (IsSEH) {
// If this is not a fall-through branch or optimizations are switched off,
// emit the branch.
if (TargetMBB != NextBlock(FuncInfo.MBB) ||
TM.getOptLevel() == CodeGenOpt::None)
DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
getControlRoot(), DAG.getBasicBlock(TargetMBB)));
return;
}
// Figure out the funclet membership for the catchret's successor.
// This will be used by the FuncletLayout pass to determine how to order the
// BB's.
// A 'catchret' returns to the outer scope's color.
Value *ParentPad = I.getCatchSwitchParentPad();
const BasicBlock *SuccessorColor;
if (isa<ConstantTokenNone>(ParentPad))
SuccessorColor = &FuncInfo.Fn->getEntryBlock();
else
SuccessorColor = cast<Instruction>(ParentPad)->getParent();
assert(SuccessorColor && "No parent funclet for catchret!");
MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor];
assert(SuccessorColorMBB && "No MBB for SuccessorColor!");
// Create the terminator node.
SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other,
getControlRoot(), DAG.getBasicBlock(TargetMBB),
DAG.getBasicBlock(SuccessorColorMBB));
DAG.setRoot(Ret);
}
void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
// Don't emit any special code for the cleanuppad instruction. It just marks
// the start of an EH scope/funclet.
FuncInfo.MBB->setIsEHScopeEntry();
auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
if (Pers != EHPersonality::Wasm_CXX) {
FuncInfo.MBB->setIsEHFuncletEntry();
FuncInfo.MBB->setIsCleanupFuncletEntry();
}
}
// In wasm EH, even though a catchpad may not catch an exception if a tag does
// not match, it is OK to add only the first unwind destination catchpad to the
// successors, because there will be at least one invoke instruction within the
// catch scope that points to the next unwind destination, if one exists, so
// CFGSort cannot mess up with BB sorting order.
// (All catchpads with 'catch (type)' clauses have a 'llvm.rethrow' intrinsic
// call within them, and catchpads only consisting of 'catch (...)' have a
// '__cxa_end_catch' call within them, both of which generate invokes in case
// the next unwind destination exists, i.e., the next unwind destination is not
// the caller.)
//
// Having at most one EH pad successor is also simpler and helps later
// transformations.
//
// For example,
// current:
// invoke void @foo to ... unwind label %catch.dispatch
// catch.dispatch:
// %0 = catchswitch within ... [label %catch.start] unwind label %next
// catch.start:
// ...
// ... in this BB or some other child BB dominated by this BB there will be an
// invoke that points to 'next' BB as an unwind destination
//
// next: ; We don't need to add this to 'current' BB's successor
// ...
static void findWasmUnwindDestinations(
FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
BranchProbability Prob,
SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
&UnwindDests) {
while (EHPadBB) {
const Instruction *Pad = EHPadBB->getFirstNonPHI();
if (isa<CleanupPadInst>(Pad)) {
// Stop on cleanup pads.
UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
UnwindDests.back().first->setIsEHScopeEntry();
break;
} else if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
// Add the catchpad handlers to the possible destinations. We don't
// continue to the unwind destination of the catchswitch for wasm.
for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob);
UnwindDests.back().first->setIsEHScopeEntry();
}
break;
} else {
continue;
}
}
}
/// When an invoke or a cleanupret unwinds to the next EH pad, there are
/// many places it could ultimately go. In the IR, we have a single unwind
/// destination, but in the machine CFG, we enumerate all the possible blocks.
/// This function skips over imaginary basic blocks that hold catchswitch
/// instructions, and finds all the "real" machine
/// basic block destinations. As those destinations may not be successors of
/// EHPadBB, here we also calculate the edge probability to those destinations.
/// The passed-in Prob is the edge probability to EHPadBB.
static void findUnwindDestinations(
FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
BranchProbability Prob,
SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
&UnwindDests) {
EHPersonality Personality =
classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX;
bool IsSEH = isAsynchronousEHPersonality(Personality);
if (IsWasmCXX) {
findWasmUnwindDestinations(FuncInfo, EHPadBB, Prob, UnwindDests);
assert(UnwindDests.size() <= 1 &&
"There should be at most one unwind destination for wasm");
return;
}
while (EHPadBB) {
const Instruction *Pad = EHPadBB->getFirstNonPHI();
BasicBlock *NewEHPadBB = nullptr;
if (isa<LandingPadInst>(Pad)) {
// Stop on landingpads. They are not funclets.
UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
break;
} else if (isa<CleanupPadInst>(Pad)) {
// Stop on cleanup pads. Cleanups are always funclet entries for all known
// personalities.
UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
UnwindDests.back().first->setIsEHScopeEntry();
UnwindDests.back().first->setIsEHFuncletEntry();
break;
} else if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
// Add the catchpad handlers to the possible destinations.
for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob);
// For MSVC++ and the CLR, catchblocks are funclets and need prologues.
if (IsMSVCCXX || IsCoreCLR)
UnwindDests.back().first->setIsEHFuncletEntry();
if (!IsSEH)
UnwindDests.back().first->setIsEHScopeEntry();
}
NewEHPadBB = CatchSwitch->getUnwindDest();
} else {
continue;
}
BranchProbabilityInfo *BPI = FuncInfo.BPI;
if (BPI && NewEHPadBB)
Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB);
EHPadBB = NewEHPadBB;
}
}
void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) {
// Update successor info.
SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
auto UnwindDest = I.getUnwindDest();
BranchProbabilityInfo *BPI = FuncInfo.BPI;
BranchProbability UnwindDestProb =
(BPI && UnwindDest)
? BPI->getEdgeProbability(FuncInfo.MBB->getBasicBlock(), UnwindDest)
: BranchProbability::getZero();
findUnwindDestinations(FuncInfo, UnwindDest, UnwindDestProb, UnwindDests);
for (auto &UnwindDest : UnwindDests) {
UnwindDest.first->setIsEHPad();
addSuccessorWithProb(FuncInfo.MBB, UnwindDest.first, UnwindDest.second);
}
FuncInfo.MBB->normalizeSuccProbs();
// Create the terminator node.
SDValue Ret =
DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot());
DAG.setRoot(Ret);
}
void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) {
report_fatal_error("visitCatchSwitch not yet implemented!");
}
void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
auto &DL = DAG.getDataLayout();
SDValue Chain = getControlRoot();
SmallVector<ISD::OutputArg, 8> Outs;
SmallVector<SDValue, 8> OutVals;
// Calls to @llvm.experimental.deoptimize don't generate a return value, so
// lower
//
// %val = call <ty> @llvm.experimental.deoptimize()
// ret <ty> %val
//
// differently.
if (I.getParent()->getTerminatingDeoptimizeCall()) {
LowerDeoptimizingReturn();
return;
}
if (!FuncInfo.CanLowerReturn) {
unsigned DemoteReg = FuncInfo.DemoteRegister;
const Function *F = I.getParent()->getParent();
// Emit a store of the return value through the virtual register.
// Leave Outs empty so that LowerReturn won't try to load return
// registers the usual way.
SmallVector<EVT, 1> PtrValueVTs;
ComputeValueVTs(TLI, DL,
F->getReturnType()->getPointerTo(
DAG.getDataLayout().getAllocaAddrSpace()),
PtrValueVTs);
SDValue RetPtr =
DAG.getCopyFromReg(Chain, getCurSDLoc(), DemoteReg, PtrValueVTs[0]);
SDValue RetOp = getValue(I.getOperand(0));
SmallVector<EVT, 4> ValueVTs, MemVTs;
SmallVector<uint64_t, 4> Offsets;
ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &MemVTs,
&Offsets);
unsigned NumValues = ValueVTs.size();
SmallVector<SDValue, 4> Chains(NumValues);
Align BaseAlign = DL.getPrefTypeAlign(I.getOperand(0)->getType());
for (unsigned i = 0; i != NumValues; ++i) {
// An aggregate return value cannot wrap around the address space, so
// offsets to its parts don't wrap either.
SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr,
TypeSize::Fixed(Offsets[i]));
SDValue Val = RetOp.getValue(RetOp.getResNo() + i);
if (MemVTs[i] != ValueVTs[i])
Val = DAG.getPtrExtOrTrunc(Val, getCurSDLoc(), MemVTs[i]);
Chains[i] = DAG.getStore(
Chain, getCurSDLoc(), Val,
// FIXME: better loc info would be nice.
Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()),
commonAlignment(BaseAlign, Offsets[i]));
}
Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
MVT::Other, Chains);
} else if (I.getNumOperands() != 0) {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs);
unsigned NumValues = ValueVTs.size();
if (NumValues) {
SDValue RetOp = getValue(I.getOperand(0));
const Function *F = I.getParent()->getParent();
bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
I.getOperand(0)->getType(), F->getCallingConv(),
/*IsVarArg*/ false, DL);
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
if (F->getAttributes().hasRetAttr(Attribute::SExt))
ExtendKind = ISD::SIGN_EXTEND;
else if (F->getAttributes().hasRetAttr(Attribute::ZExt))
ExtendKind = ISD::ZERO_EXTEND;
LLVMContext &Context = F->getContext();
bool RetInReg = F->getAttributes().hasRetAttr(Attribute::InReg);
for (unsigned j = 0; j != NumValues; ++j) {
EVT VT = ValueVTs[j];
if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind);
CallingConv::ID CC = F->getCallingConv();
unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, CC, VT);
MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, CC, VT);
SmallVector<SDValue, 4> Parts(NumParts);
getCopyToParts(DAG, getCurSDLoc(),
SDValue(RetOp.getNode(), RetOp.getResNo() + j),
&Parts[0], NumParts, PartVT, &I, CC, ExtendKind);
// 'inreg' on function refers to return value
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
if (RetInReg)
Flags.setInReg();
if (I.getOperand(0)->getType()->isPointerTy()) {
Flags.setPointer();
Flags.setPointerAddrSpace(
cast<PointerType>(I.getOperand(0)->getType())->getAddressSpace());
}
if (NeedsRegBlock) {
Flags.setInConsecutiveRegs();
if (j == NumValues - 1)
Flags.setInConsecutiveRegsLast();
}
// Propagate extension type if any
if (ExtendKind == ISD::SIGN_EXTEND)
Flags.setSExt();
else if (ExtendKind == ISD::ZERO_EXTEND)
Flags.setZExt();
for (unsigned i = 0; i < NumParts; ++i) {
Outs.push_back(ISD::OutputArg(Flags,
Parts[i].getValueType().getSimpleVT(),
VT, /*isfixed=*/true, 0, 0));
OutVals.push_back(Parts[i]);
}
}
}
}
// Push in swifterror virtual register as the last element of Outs. This makes
// sure swifterror virtual register will be returned in the swifterror
// physical register.
const Function *F = I.getParent()->getParent();
if (TLI.supportSwiftError() &&
F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) {
assert(SwiftError.getFunctionArg() && "Need a swift error argument");
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
Flags.setSwiftError();
Outs.push_back(ISD::OutputArg(
Flags, /*vt=*/TLI.getPointerTy(DL), /*argvt=*/EVT(TLI.getPointerTy(DL)),
/*isfixed=*/true, /*origidx=*/1, /*partOffs=*/0));
// Create SDNode for the swifterror virtual register.
OutVals.push_back(
DAG.getRegister(SwiftError.getOrCreateVRegUseAt(
&I, FuncInfo.MBB, SwiftError.getFunctionArg()),
EVT(TLI.getPointerTy(DL))));
}
bool isVarArg = DAG.getMachineFunction().getFunction().isVarArg();
CallingConv::ID CallConv =
DAG.getMachineFunction().getFunction().getCallingConv();
Chain = DAG.getTargetLoweringInfo().LowerReturn(
Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG);
// Verify that the target's LowerReturn behaved as expected.
assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
"LowerReturn didn't return a valid chain!");
// Update the DAG with the new chain value resulting from return lowering.
DAG.setRoot(Chain);
}
/// CopyToExportRegsIfNeeded - If the given value has virtual registers
/// created for it, emit nodes to copy the value into the virtual
/// registers.
void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) {
// Skip empty types
if (V->getType()->isEmptyTy())
return;
DenseMap<const Value *, Register>::iterator VMI = FuncInfo.ValueMap.find(V);
if (VMI != FuncInfo.ValueMap.end()) {
assert(!V->use_empty() && "Unused value assigned virtual registers!");
CopyValueToVirtualRegister(V, VMI->second);
}
}
/// ExportFromCurrentBlock - If this condition isn't known to be exported from
/// the current basic block, add it to ValueMap now so that we'll get a
/// CopyTo/FromReg.
void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) {
// No need to export constants.
if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
// Already exported?
if (FuncInfo.isExportedInst(V)) return;
unsigned Reg = FuncInfo.InitializeRegForValue(V);
CopyValueToVirtualRegister(V, Reg);
}
bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
const BasicBlock *FromBB) {
// The operands of the setcc have to be in this block. We don't know
// how to export them from some other block.
if (const Instruction *VI = dyn_cast<Instruction>(V)) {
// Can export from current BB.
if (VI->getParent() == FromBB)
return true;
// Is already exported, noop.
return FuncInfo.isExportedInst(V);
}
// If this is an argument, we can export it if the BB is the entry block or
// if it is already exported.
if (isa<Argument>(V)) {
if (FromBB->isEntryBlock())
return true;
// Otherwise, can only export this if it is already exported.
return FuncInfo.isExportedInst(V);
}
// Otherwise, constants can always be exported.
return true;
}
/// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
BranchProbability
SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src,
const MachineBasicBlock *Dst) const {
BranchProbabilityInfo *BPI = FuncInfo.BPI;
const BasicBlock *SrcBB = Src->getBasicBlock();
const BasicBlock *DstBB = Dst->getBasicBlock();
if (!BPI) {
// If BPI is not available, set the default probability as 1 / N, where N is
// the number of successors.
auto SuccSize = std::max<uint32_t>(succ_size(SrcBB), 1);
return BranchProbability(1, SuccSize);
}
return BPI->getEdgeProbability(SrcBB, DstBB);
}
void SelectionDAGBuilder::addSuccessorWithProb(MachineBasicBlock *Src,
MachineBasicBlock *Dst,
BranchProbability Prob) {
if (!FuncInfo.BPI)
Src->addSuccessorWithoutProb(Dst);
else {
if (Prob.isUnknown())
Prob = getEdgeProbability(Src, Dst);
Src->addSuccessor(Dst, Prob);
}
}
static bool InBlock(const Value *V, const BasicBlock *BB) {
if (const Instruction *I = dyn_cast<Instruction>(V))
return I->getParent() == BB;
return true;
}
/// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
/// This function emits a branch and is used at the leaves of an OR or an
/// AND operator tree.
void
SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
BranchProbability TProb,
BranchProbability FProb,
bool InvertCond) {
const BasicBlock *BB = CurBB->getBasicBlock();
// If the leaf of the tree is a comparison, merge the condition into
// the caseblock.
if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
// The operands of the cmp have to be in this block. We don't know
// how to export them from some other block. If this is the first block
// of the sequence, no exporting is needed.
if (CurBB == SwitchBB ||
(isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
ISD::CondCode Condition;
if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
ICmpInst::Predicate Pred =
InvertCond ? IC->getInversePredicate() : IC->getPredicate();
Condition = getICmpCondCode(Pred);
} else {
const FCmpInst *FC = cast<FCmpInst>(Cond);
FCmpInst::Predicate Pred =
InvertCond ? FC->getInversePredicate() : FC->getPredicate();
Condition = getFCmpCondCode(Pred);
if (TM.Options.NoNaNsFPMath)
Condition = getFCmpCodeWithoutNaN(Condition);
}
CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr,
TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
SL->SwitchCases.push_back(CB);
return;
}
}
// Create a CaseBlock record representing this branch.
ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ;
CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()),
nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
SL->SwitchCases.push_back(CB);
}
void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
Instruction::BinaryOps Opc,
BranchProbability TProb,
BranchProbability FProb,
bool InvertCond) {
// Skip over not part of the tree and remember to invert op and operands at
// next level.
Value *NotCond;
if (match(Cond, m_OneUse(m_Not(m_Value(NotCond)))) &&
InBlock(NotCond, CurBB->getBasicBlock())) {
FindMergedConditions(NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
!InvertCond);
return;
}
const Instruction *BOp = dyn_cast<Instruction>(Cond);
const Value *BOpOp0, *BOpOp1;
// Compute the effective opcode for Cond, taking into account whether it needs
// to be inverted, e.g.
// and (not (or A, B)), C
// gets lowered as
// and (and (not A, not B), C)
Instruction::BinaryOps BOpc = (Instruction::BinaryOps)0;
if (BOp) {
BOpc = match(BOp, m_LogicalAnd(m_Value(BOpOp0), m_Value(BOpOp1)))
? Instruction::And
: (match(BOp, m_LogicalOr(m_Value(BOpOp0), m_Value(BOpOp1)))
? Instruction::Or
: (Instruction::BinaryOps)0);
if (InvertCond) {
if (BOpc == Instruction::And)
BOpc = Instruction::Or;
else if (BOpc == Instruction::Or)
BOpc = Instruction::And;
}
}
// If this node is not part of the or/and tree, emit it as a branch.
// Note that all nodes in the tree should have same opcode.
bool BOpIsInOrAndTree = BOpc && BOpc == Opc && BOp->hasOneUse();
if (!BOpIsInOrAndTree || BOp->getParent() != CurBB->getBasicBlock() ||
!InBlock(BOpOp0, CurBB->getBasicBlock()) ||
!InBlock(BOpOp1, CurBB->getBasicBlock())) {
EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB,
TProb, FProb, InvertCond);
return;
}
// Create TmpBB after CurBB.
MachineFunction::iterator BBI(CurBB);
MachineFunction &MF = DAG.getMachineFunction();
MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock());
CurBB->getParent()->insert(++BBI, TmpBB);
if (Opc == Instruction::Or) {
// Codegen X | Y as:
// BB1:
// jmp_if_X TBB
// jmp TmpBB
// TmpBB:
// jmp_if_Y TBB
// jmp FBB
//
// We have flexibility in setting Prob for BB1 and Prob for TmpBB.
// The requirement is that
// TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
// = TrueProb for original BB.
// Assuming the original probabilities are A and B, one choice is to set
// BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
// A/(1+B) and 2B/(1+B). This choice assumes that
// TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
// Another choice is to assume TrueProb for BB1 equals to TrueProb for
// TmpBB, but the math is more complicated.
auto NewTrueProb = TProb / 2;
auto NewFalseProb = TProb / 2 + FProb;
// Emit the LHS condition.
FindMergedConditions(BOpOp0, TBB, TmpBB, CurBB, SwitchBB, Opc, NewTrueProb,
NewFalseProb, InvertCond);
// Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
// Emit the RHS condition into TmpBB.
FindMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
Probs[1], InvertCond);
} else {
assert(Opc == Instruction::And && "Unknown merge op!");
// Codegen X & Y as:
// BB1:
// jmp_if_X TmpBB
// jmp FBB
// TmpBB:
// jmp_if_Y TBB
// jmp FBB
//
// This requires creation of TmpBB after CurBB.
// We have flexibility in setting Prob for BB1 and Prob for TmpBB.
// The requirement is that
// FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
// = FalseProb for original BB.
// Assuming the original probabilities are A and B, one choice is to set
// BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
// 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
// TrueProb for BB1 * FalseProb for TmpBB.
auto NewTrueProb = TProb + FProb / 2;
auto NewFalseProb = FProb / 2;
// Emit the LHS condition.
FindMergedConditions(BOpOp0, TmpBB, FBB, CurBB, SwitchBB, Opc, NewTrueProb,
NewFalseProb, InvertCond);
// Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
// Emit the RHS condition into TmpBB.
FindMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
Probs[1], InvertCond);
}
}
/// If the set of cases should be emitted as a series of branches, return true.
/// If we should emit this as a bunch of and/or'd together conditions, return
/// false.
bool
SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases) {
if (Cases.size() != 2) return true;
// If this is two comparisons of the same values or'd or and'd together, they
// will get folded into a single comparison, so don't emit two blocks.
if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
Cases[0].CmpRHS == Cases[1].CmpRHS) ||
(Cases[0].CmpRHS == Cases[1].CmpLHS &&
Cases[0].CmpLHS == Cases[1].CmpRHS)) {
return false;
}
// Handle: (X != null) | (Y != null) --> (X|Y) != 0
// Handle: (X == null) & (Y == null) --> (X|Y) == 0
if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
Cases[0].CC == Cases[1].CC &&
isa<Constant>(Cases[0].CmpRHS) &&
cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB)
return false;
if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
return false;
}
return true;
}
void SelectionDAGBuilder::visitBr(const BranchInst &I) {
MachineBasicBlock *BrMBB = FuncInfo.MBB;
// Update machine-CFG edges.
MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
if (I.isUnconditional()) {
// Update machine-CFG edges.
BrMBB->addSuccessor(Succ0MBB);
// If this is not a fall-through branch or optimizations are switched off,
// emit the branch.
if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None)
DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
MVT::Other, getControlRoot(),
DAG.getBasicBlock(Succ0MBB)));
return;
}
// If this condition is one of the special cases we handle, do special stuff
// now.
const Value *CondVal = I.getCondition();
MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
// If this is a series of conditions that are or'd or and'd together, emit
// this as a sequence of branches instead of setcc's with and/or operations.
// As long as jumps are not expensive (exceptions for multi-use logic ops,
// unpredictable branches, and vector extracts because those jumps are likely
// expensive for any target), this should improve performance.
// For example, instead of something like:
// cmp A, B
// C = seteq
// cmp D, E
// F = setle
// or C, F
// jnz foo
// Emit:
// cmp A, B
// je foo
// cmp D, E
// jle foo
const Instruction *BOp = dyn_cast<Instruction>(CondVal);
if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp &&
BOp->hasOneUse() && !I.hasMetadata(LLVMContext::MD_unpredictable)) {
Value *Vec;
const Value *BOp0, *BOp1;
Instruction::BinaryOps Opcode = (Instruction::BinaryOps)0;
if (match(BOp, m_LogicalAnd(m_Value(BOp0), m_Value(BOp1))))
Opcode = Instruction::And;
else if (match(BOp, m_LogicalOr(m_Value(BOp0), m_Value(BOp1))))
Opcode = Instruction::Or;
if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) {
FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode,
getEdgeProbability(BrMBB, Succ0MBB),
getEdgeProbability(BrMBB, Succ1MBB),
/*InvertCond=*/false);
// If the compares in later blocks need to use values not currently
// exported from this block, export them now. This block should always
// be the first entry.
assert(SL->SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
// Allow some cases to be rejected.
if (ShouldEmitAsBranches(SL->SwitchCases)) {
for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; ++i) {
ExportFromCurrentBlock(SL->SwitchCases[i].CmpLHS);
ExportFromCurrentBlock(SL->SwitchCases[i].CmpRHS);
}
// Emit the branch for this block.
visitSwitchCase(SL->SwitchCases[0], BrMBB);
SL->SwitchCases.erase(SL->SwitchCases.begin());
return;
}
// Okay, we decided not to do this, remove any inserted MBB's and clear
// SwitchCases.
for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; ++i)
FuncInfo.MF->erase(SL->SwitchCases[i].ThisBB);
SL->SwitchCases.clear();
}
}
// Create a CaseBlock record representing this branch.
CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
nullptr, Succ0MBB, Succ1MBB, BrMBB, getCurSDLoc());
// Use visitSwitchCase to actually insert the fast branch sequence for this
// cond branch.
visitSwitchCase(CB, BrMBB);
}
/// visitSwitchCase - Emits the necessary code to represent a single node in
/// the binary search tree resulting from lowering a switch instruction.
void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
MachineBasicBlock *SwitchBB) {
SDValue Cond;
SDValue CondLHS = getValue(CB.CmpLHS);
SDLoc dl = CB.DL;
if (CB.CC == ISD::SETTRUE) {
// Branch or fall through to TrueBB.
addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb);
SwitchBB->normalizeSuccProbs();
if (CB.TrueBB != NextBlock(SwitchBB)) {
DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, getControlRoot(),
DAG.getBasicBlock(CB.TrueBB)));
}
return;
}
auto &TLI = DAG.getTargetLoweringInfo();
EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), CB.CmpLHS->getType());
// Build the setcc now.
if (!CB.CmpMHS) {
// Fold "(X == true)" to X and "(X == false)" to !X to
// handle common cases produced by branch lowering.
if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
CB.CC == ISD::SETEQ)
Cond = CondLHS;
else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
CB.CC == ISD::SETEQ) {
SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType());
Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
} else {
SDValue CondRHS = getValue(CB.CmpRHS);
// If a pointer's DAG type is larger than its memory type then the DAG
// values are zero-extended. This breaks signed comparisons so truncate
// back to the underlying type before doing the compare.
if (CondLHS.getValueType() != MemVT) {
CondLHS = DAG.getPtrExtOrTrunc(CondLHS, getCurSDLoc(), MemVT);
CondRHS = DAG.getPtrExtOrTrunc(CondRHS, getCurSDLoc(), MemVT);
}
Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, CondRHS, CB.CC);
}
} else {
assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
SDValue CmpOp = getValue(CB.CmpMHS);
EVT VT = CmpOp.getValueType();
if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, dl, VT),
ISD::SETLE);
} else {
SDValue SUB = DAG.getNode(ISD::SUB, dl,
VT, CmpOp, DAG.getConstant(Low, dl, VT));
Cond = DAG.getSetCC(dl, MVT::i1, SUB,
DAG.getConstant(High-Low, dl, VT), ISD::SETULE);
}
}
// Update successor info
addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb);
// TrueBB and FalseBB are always different unless the incoming IR is
// degenerate. This only happens when running llc on weird IR.
if (CB.TrueBB != CB.FalseBB)
addSuccessorWithProb(SwitchBB, CB.FalseBB, CB.FalseProb);
SwitchBB->normalizeSuccProbs();
// If the lhs block is the next block, invert the condition so that we can
// fall through to the lhs instead of the rhs block.
if (CB.TrueBB == NextBlock(SwitchBB)) {
std::swap(CB.TrueBB, CB.FalseBB);
SDValue True = DAG.getConstant(1, dl, Cond.getValueType());
Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
}
SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
MVT::Other, getControlRoot(), Cond,
DAG.getBasicBlock(CB.TrueBB));
// Insert the false branch. Do this even if it's a fall through branch,
// this makes it easier to do DAG optimizations which require inverting
// the branch condition.
BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
DAG.getBasicBlock(CB.FalseBB));
DAG.setRoot(BrCond);
}
/// visitJumpTable - Emit JumpTable node in the current MBB
void SelectionDAGBuilder::visitJumpTable(SwitchCG::JumpTable &JT) {
// Emit the code for the jump table
assert(JT.Reg != -1U && "Should lower JT Header first!");
EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(),
JT.Reg, PTy);
SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(),
MVT::Other, Index.getValue(1),
Table, Index);
DAG.setRoot(BrJumpTable);
}
/// visitJumpTableHeader - This function emits necessary code to produce index
/// in the JumpTable from switch case.
void SelectionDAGBuilder::visitJumpTableHeader(SwitchCG::JumpTable &JT,
JumpTableHeader &JTH,
MachineBasicBlock *SwitchBB) {
SDLoc dl = getCurSDLoc();
// Subtract the lowest switch case value from the value being switched on.
SDValue SwitchOp = getValue(JTH.SValue);
EVT VT = SwitchOp.getValueType();
SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
DAG.getConstant(JTH.First, dl, VT));
// The SDNode we just created, which holds the value being switched on minus
// the smallest case value, needs to be copied to a virtual register so it
// can be used as an index into the jump table in a subsequent basic block.
// This value may be smaller or larger than the target's pointer type, and
// therefore require extension or truncating.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy(DAG.getDataLayout()));
unsigned JumpTableReg =
FuncInfo.CreateReg(TLI.getPointerTy(DAG.getDataLayout()));
SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl,
JumpTableReg, SwitchOp);
JT.Reg = JumpTableReg;
if (!JTH.FallthroughUnreachable) {
// Emit the range check for the jump table, and branch to the default block
// for the switch statement if the value being switched on exceeds the
// largest case in the switch.
SDValue CMP = DAG.getSetCC(
dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
Sub.getValueType()),
Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT);
SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
MVT::Other, CopyTo, CMP,
DAG.getBasicBlock(JT.Default));
// Avoid emitting unnecessary branches to the next block.
if (JT.MBB != NextBlock(SwitchBB))
BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
DAG.getBasicBlock(JT.MBB));
DAG.setRoot(BrCond);
} else {
// Avoid emitting unnecessary branches to the next block.
if (JT.MBB != NextBlock(SwitchBB))
DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, CopyTo,
DAG.getBasicBlock(JT.MBB)));
else
DAG.setRoot(CopyTo);
}
}
/// Create a LOAD_STACK_GUARD node, and let it carry the target specific global
/// variable if there exists one.
static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL,
SDValue &Chain) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
EVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout());
MachineFunction &MF = DAG.getMachineFunction();
Value *Global = TLI.getSDagStackGuard(*MF.getFunction().getParent());
MachineSDNode *Node =
DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain);
if (Global) {
MachinePointerInfo MPInfo(Global);
auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
MachineMemOperand::MODereferenceable;
MachineMemOperand *MemRef = MF.getMachineMemOperand(
MPInfo, Flags, PtrTy.getSizeInBits() / 8, DAG.getEVTAlign(PtrTy));
DAG.setNodeMemRefs(Node, {MemRef});
}
if (PtrTy != PtrMemTy)
return DAG.getPtrExtOrTrunc(SDValue(Node, 0), DL, PtrMemTy);
return SDValue(Node, 0);
}
/// Codegen a new tail for a stack protector check ParentMBB which has had its
/// tail spliced into a stack protector check success bb.
///
/// For a high level explanation of how this fits into the stack protector
/// generation see the comment on the declaration of class
/// StackProtectorDescriptor.
void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
MachineBasicBlock *ParentBB) {
// First create the loads to the guard/stack slot for the comparison.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
EVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout());
MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo();
int FI = MFI.getStackProtectorIndex();
SDValue Guard;
SDLoc dl = getCurSDLoc();
SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
const Module &M = *ParentBB->getParent()->getFunction().getParent();
Align Align =
DAG.getDataLayout().getPrefTypeAlign(Type::getInt8PtrTy(M.getContext()));
// Generate code to load the content of the guard slot.
SDValue GuardVal = DAG.getLoad(
PtrMemTy, dl, DAG.getEntryNode(), StackSlotPtr,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), Align,
MachineMemOperand::MOVolatile);
if (TLI.useStackGuardXorFP())
GuardVal = TLI.emitStackGuardXorFP(DAG, GuardVal, dl);
// Retrieve guard check function, nullptr if instrumentation is inlined.
if (const Function *GuardCheckFn = TLI.getSSPStackGuardCheck(M)) {
// The target provides a guard check function to validate the guard value.
// Generate a call to that function with the content of the guard slot as
// argument.
FunctionType *FnTy = GuardCheckFn->getFunctionType();
assert(FnTy->getNumParams() == 1 && "Invalid function signature");
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Node = GuardVal;
Entry.Ty = FnTy->getParamType(0);
if (GuardCheckFn->hasParamAttribute(0, Attribute::AttrKind::InReg))
Entry.IsInReg = true;
Args.push_back(Entry);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(getCurSDLoc())
.setChain(DAG.getEntryNode())
.setCallee(GuardCheckFn->getCallingConv(), FnTy->getReturnType(),
getValue(GuardCheckFn), std::move(Args));
std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
DAG.setRoot(Result.second);
return;
}
// If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD.
// Otherwise, emit a volatile load to retrieve the stack guard value.
SDValue Chain = DAG.getEntryNode();
if (TLI.useLoadStackGuardNode()) {
Guard = getLoadStackGuard(DAG, dl, Chain);
} else {
const Value *IRGuard = TLI.getSDagStackGuard(M);
SDValue GuardPtr = getValue(IRGuard);
Guard = DAG.getLoad(PtrMemTy, dl, Chain, GuardPtr,
MachinePointerInfo(IRGuard, 0), Align,
MachineMemOperand::MOVolatile);
}
// Perform the comparison via a getsetcc.
SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(),
*DAG.getContext(),
Guard.getValueType()),
Guard, GuardVal, ISD::SETNE);
// If the guard/stackslot do not equal, branch to failure MBB.
SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
MVT::Other, GuardVal.getOperand(0),
Cmp, DAG.getBasicBlock(SPD.getFailureMBB()));
// Otherwise branch to success MBB.
SDValue Br = DAG.getNode(ISD::BR, dl,
MVT::Other, BrCond,
DAG.getBasicBlock(SPD.getSuccessMBB()));
DAG.setRoot(Br);
}
/// Codegen the failure basic block for a stack protector check.
///
/// A failure stack protector machine basic block consists simply of a call to
/// __stack_chk_fail().
///
/// For a high level explanation of how this fits into the stack protector
/// generation see the comment on the declaration of class
/// StackProtectorDescriptor.
void
SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setDiscardResult(true);
SDValue Chain =
TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,
None, CallOptions, getCurSDLoc()).second;
// On PS4/PS5, the "return address" must still be within the calling
// function, even if it's at the very end, so emit an explicit TRAP here.
// Passing 'true' for doesNotReturn above won't generate the trap for us.
if (TM.getTargetTriple().isPS())
Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain);
// WebAssembly needs an unreachable instruction after a non-returning call,
// because the function return type can be different from __stack_chk_fail's
// return type (void).
if (TM.getTargetTriple().isWasm())
Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain);
DAG.setRoot(Chain);
}
/// visitBitTestHeader - This function emits necessary code to produce value
/// suitable for "bit tests"
void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
MachineBasicBlock *SwitchBB) {
SDLoc dl = getCurSDLoc();
// Subtract the minimum value.
SDValue SwitchOp = getValue(B.SValue);
EVT VT = SwitchOp.getValueType();
SDValue RangeSub =
DAG.getNode(ISD::SUB, dl, VT, SwitchOp, DAG.getConstant(B.First, dl, VT));
// Determine the type of the test operands.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
bool UsePtrType = false;
if (!TLI.isTypeLegal(VT)) {
UsePtrType = true;
} else {
for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) {
// Switch table case range are encoded into series of masks.
// Just use pointer type, it's guaranteed to fit.
UsePtrType = true;
break;
}
}
SDValue Sub = RangeSub;
if (UsePtrType) {
VT = TLI.getPointerTy(DAG.getDataLayout());
Sub = DAG.getZExtOrTrunc(Sub, dl, VT);
}
B.RegVT = VT.getSimpleVT();
B.Reg = FuncInfo.CreateReg(B.RegVT);
SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, B.Reg, Sub);
MachineBasicBlock* MBB = B.Cases[0].ThisBB;
if (!B.FallthroughUnreachable)
addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
addSuccessorWithProb(SwitchBB, MBB, B.Prob);
SwitchBB->normalizeSuccProbs();
SDValue Root = CopyTo;
if (!B.FallthroughUnreachable) {
// Conditional branch to the default block.
SDValue RangeCmp = DAG.getSetCC(dl,
TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
RangeSub.getValueType()),
RangeSub, DAG.getConstant(B.Range, dl, RangeSub.getValueType()),
ISD::SETUGT);
Root = DAG.getNode(ISD::BRCOND, dl, MVT::Other, Root, RangeCmp,
DAG.getBasicBlock(B.Default));
}
// Avoid emitting unnecessary branches to the next block.
if (MBB != NextBlock(SwitchBB))
Root = DAG.getNode(ISD::BR, dl, MVT::Other, Root, DAG.getBasicBlock(MBB));
DAG.setRoot(Root);
}
/// visitBitTestCase - this function produces one "bit test"
void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
MachineBasicBlock* NextMBB,
BranchProbability BranchProbToNext,
unsigned Reg,
BitTestCase &B,
MachineBasicBlock *SwitchBB) {
SDLoc dl = getCurSDLoc();
MVT VT = BB.RegVT;
SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT);
SDValue Cmp;
unsigned PopCount = countPopulation(B.Mask);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (PopCount == 1) {
// Testing for a single bit; just compare the shift count with what it
// would need to be to shift a 1 bit in that position.
Cmp = DAG.getSetCC(
dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
ShiftOp, DAG.getConstant(countTrailingZeros(B.Mask), dl, VT),
ISD::SETEQ);
} else if (PopCount == BB.Range) {
// There is only one zero bit in the range, test for it directly.
Cmp = DAG.getSetCC(
dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
ShiftOp, DAG.getConstant(countTrailingOnes(B.Mask), dl, VT),
ISD::SETNE);
} else {
// Make desired shift
SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT,
DAG.getConstant(1, dl, VT), ShiftOp);
// Emit bit tests and jumps
SDValue AndOp = DAG.getNode(ISD::AND, dl,
VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT));
Cmp = DAG.getSetCC(
dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE);
}
// The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb);
// The branch probability from SwitchBB to NextMBB is BranchProbToNext.
addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext);
// It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
// one as they are relative probabilities (and thus work more like weights),
// and hence we need to normalize them to let the sum of them become one.
SwitchBB->normalizeSuccProbs();
SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl,
MVT::Other, getControlRoot(),
Cmp, DAG.getBasicBlock(B.TargetBB));
// Avoid emitting unnecessary branches to the next block.
if (NextMBB != NextBlock(SwitchBB))
BrAnd = DAG.getNode(ISD::BR, dl, MVT::Other, BrAnd,
DAG.getBasicBlock(NextMBB));
DAG.setRoot(BrAnd);
}
void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
// Retrieve successors. Look through artificial IR level blocks like
// catchswitch for successors.
MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
const BasicBlock *EHPadBB = I.getSuccessor(1);
// Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
// have to do anything here to lower funclet bundles.
assert(!I.hasOperandBundlesOtherThan(
{LLVMContext::OB_deopt, LLVMContext::OB_gc_transition,
LLVMContext::OB_gc_live, LLVMContext::OB_funclet,
LLVMContext::OB_cfguardtarget,
LLVMContext::OB_clang_arc_attachedcall}) &&
"Cannot lower invokes with arbitrary operand bundles yet!");
const Value *Callee(I.getCalledOperand());
const Function *Fn = dyn_cast<Function>(Callee);
if (isa<InlineAsm>(Callee))
visitInlineAsm(I, EHPadBB);
else if (Fn && Fn->isIntrinsic()) {
switch (Fn->getIntrinsicID()) {
default:
llvm_unreachable("Cannot invoke this intrinsic");
case Intrinsic::donothing:
// Ignore invokes to @llvm.donothing: jump directly to the next BB.
case Intrinsic::seh_try_begin:
case Intrinsic::seh_scope_begin:
case Intrinsic::seh_try_end:
case Intrinsic::seh_scope_end:
break;
case Intrinsic::experimental_patchpoint_void:
case Intrinsic::experimental_patchpoint_i64:
visitPatchpoint(I, EHPadBB);
break;
case Intrinsic::experimental_gc_statepoint:
LowerStatepoint(cast<GCStatepointInst>(I), EHPadBB);
break;
case Intrinsic::wasm_rethrow: {
// This is usually done in visitTargetIntrinsic, but this intrinsic is
// special because it can be invoked, so we manually lower it to a DAG
// node here.
SmallVector<SDValue, 8> Ops;
Ops.push_back(getRoot()); // inchain
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
Ops.push_back(
DAG.getTargetConstant(Intrinsic::wasm_rethrow, getCurSDLoc(),
TLI.getPointerTy(DAG.getDataLayout())));
SDVTList VTs = DAG.getVTList(ArrayRef<EVT>({MVT::Other})); // outchain
DAG.setRoot(DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops));
break;
}
}
} else if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) {
// Currently we do not lower any intrinsic calls with deopt operand bundles.
// Eventually we will support lowering the @llvm.experimental.deoptimize
// intrinsic, and right now there are no plans to support other intrinsics
// with deopt state.
LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB);
} else {
LowerCallTo(I, getValue(Callee), false, false, EHPadBB);
}
// If the value of the invoke is used outside of its defining block, make it
// available as a virtual register.
// We already took care of the exported value for the statepoint instruction
// during call to the LowerStatepoint.
if (!isa<GCStatepointInst>(I)) {
CopyToExportRegsIfNeeded(&I);
}
SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
BranchProbabilityInfo *BPI = FuncInfo.BPI;
BranchProbability EHPadBBProb =
BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB)
: BranchProbability::getZero();
findUnwindDestinations(FuncInfo, EHPadBB, EHPadBBProb, UnwindDests);
// Update successor info.
addSuccessorWithProb(InvokeMBB, Return);
for (auto &UnwindDest : UnwindDests) {
UnwindDest.first->setIsEHPad();
addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second);
}
InvokeMBB->normalizeSuccProbs();
// Drop into normal successor.
DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(),
DAG.getBasicBlock(Return)));
}
void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
MachineBasicBlock *CallBrMBB = FuncInfo.MBB;
// Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
// have to do anything here to lower funclet bundles.
assert(!I.hasOperandBundlesOtherThan(
{LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
"Cannot lower callbrs with arbitrary operand bundles yet!");
assert(I.isInlineAsm() && "Only know how to handle inlineasm callbr");
visitInlineAsm(I);
CopyToExportRegsIfNeeded(&I);
// Retrieve successors.
SmallPtrSet<BasicBlock *, 8> Dests;
Dests.insert(I.getDefaultDest());
MachineBasicBlock *Return = FuncInfo.MBBMap[I.getDefaultDest()];
// Update successor info.
addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne());
for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) {
BasicBlock *Dest = I.getIndirectDest(i);
MachineBasicBlock *Target = FuncInfo.MBBMap[Dest];
Target->setIsInlineAsmBrIndirectTarget();
Target->setHasAddressTaken();
// Don't add duplicate machine successors.
if (Dests.insert(Dest).second)
addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
}
CallBrMBB->normalizeSuccProbs();
// Drop into default successor.
DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
MVT::Other, getControlRoot(),
DAG.getBasicBlock(Return)));
}
void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
}
void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
assert(FuncInfo.MBB->isEHPad() &&
"Call to landingpad not in landing pad!");
// If there aren't registers to copy the values into (e.g., during SjLj
// exceptions), then don't bother to create these DAG nodes.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const Constant *PersonalityFn = FuncInfo.Fn->getPersonalityFn();
if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
return;
// If landingpad's return type is token type, we don't create DAG nodes
// for its exception pointer and selector value. The extraction of exception
// pointer or selector value from token type landingpads is not currently
// supported.
if (LP.getType()->isTokenTy())
return;
SmallVector<EVT, 2> ValueVTs;
SDLoc dl = getCurSDLoc();
ComputeValueVTs(TLI, DAG.getDataLayout(), LP.getType(), ValueVTs);
assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported");
// Get the two live-in registers as SDValues. The physregs have already been
// copied into virtual registers.
SDValue Ops[2];
if (FuncInfo.ExceptionPointerVirtReg) {
Ops[0] = DAG.getZExtOrTrunc(
DAG.getCopyFromReg(DAG.getEntryNode(), dl,
FuncInfo.ExceptionPointerVirtReg,
TLI.getPointerTy(DAG.getDataLayout())),
dl, ValueVTs[0]);
} else {
Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout()));
}
Ops[1] = DAG.getZExtOrTrunc(
DAG.getCopyFromReg(DAG.getEntryNode(), dl,
FuncInfo.ExceptionSelectorVirtReg,
TLI.getPointerTy(DAG.getDataLayout())),
dl, ValueVTs[1]);
// Merge into one.
SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
DAG.getVTList(ValueVTs), Ops);
setValue(&LP, Res);
}
void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
MachineBasicBlock *Last) {
// Update JTCases.
for (JumpTableBlock &JTB : SL->JTCases)
if (JTB.first.HeaderBB == First)
JTB.first.HeaderBB = Last;
// Update BitTestCases.
for (BitTestBlock &BTB : SL->BitTestCases)
if (BTB.Parent == First)
BTB.Parent = Last;
}
void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
// Update machine-CFG edges with unique successors.
SmallSet<BasicBlock*, 32> Done;
for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) {
BasicBlock *BB = I.getSuccessor(i);
bool Inserted = Done.insert(BB).second;
if (!Inserted)
continue;
MachineBasicBlock *Succ = FuncInfo.MBBMap[BB];
addSuccessorWithProb(IndirectBrMBB, Succ);
}
IndirectBrMBB->normalizeSuccProbs();
DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(),
MVT::Other, getControlRoot(),
getValue(I.getAddress())));
}
void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
if (!DAG.getTarget().Options.TrapUnreachable)
return;
// We may be able to ignore unreachable behind a noreturn call.
if (DAG.getTarget().Options.NoTrapAfterNoreturn) {
const BasicBlock &BB = *I.getParent();
if (&I != &BB.front()) {
BasicBlock::const_iterator PredI =
std::prev(BasicBlock::const_iterator(&I));
if (const CallInst *Call = dyn_cast<CallInst>(&*PredI)) {
if (Call->doesNotReturn())
return;
}
}
}
DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
}
void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) {
SDNodeFlags Flags;
if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
Flags.copyFMF(*FPOp);
SDValue Op = getValue(I.getOperand(0));
SDValue UnNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op.getValueType(),
Op, Flags);
setValue(&I, UnNodeValue);
}
void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) {
SDNodeFlags Flags;
if (auto *OFBinOp = dyn_cast<OverflowingBinaryOperator>(&I)) {
Flags.setNoSignedWrap(OFBinOp->hasNoSignedWrap());
Flags.setNoUnsignedWrap(OFBinOp->hasNoUnsignedWrap());
}
if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I))
Flags.setExact(ExactOp->isExact());
if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
Flags.copyFMF(*FPOp);
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
SDValue BinNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(),
Op1, Op2, Flags);
setValue(&I, BinNodeValue);
}
void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy(
Op1.getValueType(), DAG.getDataLayout());
// Coerce the shift amount to the right type if we can. This exposes the
// truncate or zext to optimization early.
if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
assert(ShiftTy.getSizeInBits() >= Log2_32_Ceil(Op1.getValueSizeInBits()) &&
"Unexpected shift type");
Op2 = DAG.getZExtOrTrunc(Op2, getCurSDLoc(), ShiftTy);
}
bool nuw = false;
bool nsw = false;
bool exact = false;
if (Opcode == ISD::SRL || Opcode == ISD::SRA || Opcode == ISD::SHL) {
if (const OverflowingBinaryOperator *OFBinOp =
dyn_cast<const OverflowingBinaryOperator>(&I)) {
nuw = OFBinOp->hasNoUnsignedWrap();
nsw = OFBinOp->hasNoSignedWrap();
}
if (const PossiblyExactOperator *ExactOp =
dyn_cast<const PossiblyExactOperator>(&I))
exact = ExactOp->isExact();
}
SDNodeFlags Flags;
Flags.setExact(exact);
Flags.setNoSignedWrap(nsw);
Flags.setNoUnsignedWrap(nuw);
SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2,
Flags);
setValue(&I, Res);
}
void SelectionDAGBuilder::visitSDiv(const User &I) {
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
SDNodeFlags Flags;
Flags.setExact(isa<PossiblyExactOperator>(&I) &&
cast<PossiblyExactOperator>(&I)->isExact());
setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1,
Op2, Flags));
}
void SelectionDAGBuilder::visitICmp(const User &I) {
ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
predicate = IC->getPredicate();
else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
predicate = ICmpInst::Predicate(IC->getPredicate());
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
ISD::CondCode Opcode = getICmpCondCode(predicate);
auto &TLI = DAG.getTargetLoweringInfo();
EVT MemVT =
TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType());
// If a pointer's DAG type is larger than its memory type then the DAG values
// are zero-extended. This breaks signed comparisons so truncate back to the
// underlying type before doing the compare.
if (Op1.getValueType() != MemVT) {
Op1 = DAG.getPtrExtOrTrunc(Op1, getCurSDLoc(), MemVT);
Op2 = DAG.getPtrExtOrTrunc(Op2, getCurSDLoc(), MemVT);
}
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode));
}
void SelectionDAGBuilder::visitFCmp(const User &I) {
FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I))
predicate = FC->getPredicate();
else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
predicate = FCmpInst::Predicate(FC->getPredicate());
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
ISD::CondCode Condition = getFCmpCondCode(predicate);
auto *FPMO = cast<FPMathOperator>(&I);
if (FPMO->hasNoNaNs() || TM.Options.NoNaNsFPMath)
Condition = getFCmpCodeWithoutNaN(Condition);
SDNodeFlags Flags;
Flags.copyFMF(*FPMO);
SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition));
}
// Check if the condition of the select has one use or two users that are both
// selects with the same condition.
static bool hasOnlySelectUsers(const Value *Cond) {
return llvm::all_of(Cond->users(), [](const Value *V) {
return isa<SelectInst>(V);
});
}
void SelectionDAGBuilder::visitSelect(const User &I) {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(),
ValueVTs);
unsigned NumValues = ValueVTs.size();
if (NumValues == 0) return;
SmallVector<SDValue, 4> Values(NumValues);
SDValue Cond = getValue(I.getOperand(0));
SDValue LHSVal = getValue(I.getOperand(1));
SDValue RHSVal = getValue(I.getOperand(2));
SmallVector<SDValue, 1> BaseOps(1, Cond);
ISD::NodeType OpCode =
Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT;
bool IsUnaryAbs = false;
bool Negate = false;
SDNodeFlags Flags;
if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
Flags.copyFMF(*FPOp);
// Min/max matching is only viable if all output VTs are the same.
if (is_splat(ValueVTs)) {
EVT VT = ValueVTs[0];
LLVMContext &Ctx = *DAG.getContext();
auto &TLI = DAG.getTargetLoweringInfo();
// We care about the legality of the operation after it has been type
// legalized.
while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal)
VT = TLI.getTypeToTransformTo(Ctx, VT);
// If the vselect is legal, assume we want to leave this as a vector setcc +
// vselect. Otherwise, if this is going to be scalarized, we want to see if
// min/max is legal on the scalar type.
bool UseScalarMinMax = VT.isVector() &&
!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT);
Value *LHS, *RHS;
auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
ISD::NodeType Opc = ISD::DELETED_NODE;
switch (SPR.Flavor) {
case SPF_UMAX: Opc = ISD::UMAX; break;
case SPF_UMIN: Opc = ISD::UMIN; break;
case SPF_SMAX: Opc = ISD::SMAX; break;
case SPF_SMIN: Opc = ISD::SMIN; break;
case SPF_FMINNUM:
switch (SPR.NaNBehavior) {
case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
case SPNB_RETURNS_NAN: Opc = ISD::FMINIMUM; break;
case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
case SPNB_RETURNS_ANY: {
if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT))
Opc = ISD::FMINNUM;
else if (TLI.isOperationLegalOrCustom(ISD::FMINIMUM, VT))
Opc = ISD::FMINIMUM;
else if (UseScalarMinMax)
Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ?
ISD::FMINNUM : ISD::FMINIMUM;
break;
}
}
break;
case SPF_FMAXNUM:
switch (SPR.NaNBehavior) {
case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
case SPNB_RETURNS_NAN: Opc = ISD::FMAXIMUM; break;
case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
case SPNB_RETURNS_ANY:
if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT))
Opc = ISD::FMAXNUM;
else if (TLI.isOperationLegalOrCustom(ISD::FMAXIMUM, VT))
Opc = ISD::FMAXIMUM;
else if (UseScalarMinMax)
Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ?
ISD::FMAXNUM : ISD::FMAXIMUM;
break;
}
break;
case SPF_NABS:
Negate = true;
LLVM_FALLTHROUGH;
case SPF_ABS:
IsUnaryAbs = true;
Opc = ISD::ABS;
break;
default: break;
}
if (!IsUnaryAbs && Opc != ISD::DELETED_NODE &&
(TLI.isOperationLegalOrCustom(Opc, VT) ||
(UseScalarMinMax &&
TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) &&
// If the underlying comparison instruction is used by any other
// instruction, the consumed instructions won't be destroyed, so it is
// not profitable to convert to a min/max.
hasOnlySelectUsers(cast<SelectInst>(I).getCondition())) {
OpCode = Opc;
LHSVal = getValue(LHS);
RHSVal = getValue(RHS);
BaseOps.clear();
}
if (IsUnaryAbs) {
OpCode = Opc;
LHSVal = getValue(LHS);
BaseOps.clear();
}
}
if (IsUnaryAbs) {
for (unsigned i = 0; i != NumValues; ++i) {
SDLoc dl = getCurSDLoc();
EVT VT = LHSVal.getNode()->getValueType(LHSVal.getResNo() + i);
Values[i] =
DAG.getNode(OpCode, dl, VT, LHSVal.getValue(LHSVal.getResNo() + i));
if (Negate)
Values[i] = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT),
Values[i]);
}
} else {
for (unsigned i = 0; i != NumValues; ++i) {
SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end());
Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i));
Values[i] = DAG.getNode(
OpCode, getCurSDLoc(),
LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), Ops, Flags);
}
}
setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
DAG.getVTList(ValueVTs), Values));
}
void SelectionDAGBuilder::visitTrunc(const User &I) {
// TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
SDValue N = getValue(I.getOperand(0));
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N));
}
void SelectionDAGBuilder::visitZExt(const User &I) {
// ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
// ZExt also can't be a cast to bool for same reason. So, nothing much to do
SDValue N = getValue(I.getOperand(0));
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N));
}
void SelectionDAGBuilder::visitSExt(const User &I) {
// SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
// SExt also can't be a cast to bool for same reason. So, nothing much to do
SDValue N = getValue(I.getOperand(0));
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N));
}
void SelectionDAGBuilder::visitFPTrunc(const User &I) {
// FPTrunc is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
SDLoc dl = getCurSDLoc();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N,
DAG.getTargetConstant(
0, dl, TLI.getPointerTy(DAG.getDataLayout()))));
}
void SelectionDAGBuilder::visitFPExt(const User &I) {
// FPExt is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N));
}
void SelectionDAGBuilder::visitFPToUI(const User &I) {
// FPToUI is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N));
}
void SelectionDAGBuilder::visitFPToSI(const User &I) {
// FPToSI is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N));
}
void SelectionDAGBuilder::visitUIToFP(const User &I) {
// UIToFP is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N));
}
void SelectionDAGBuilder::visitSIToFP(const User &I) {
// SIToFP is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N));
}
void SelectionDAGBuilder::visitPtrToInt(const User &I) {
// What to do depends on the size of the integer and the size of the pointer.
// We can either truncate, zero extend, or no-op, accordingly.
SDValue N = getValue(I.getOperand(0));
auto &TLI = DAG.getTargetLoweringInfo();
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
EVT PtrMemVT =
TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType());
N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), PtrMemVT);
N = DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT);
setValue(&I, N);
}
void SelectionDAGBuilder::visitIntToPtr(const User &I) {
// What to do depends on the size of the integer and the size of the pointer.
// We can either truncate, zero extend, or no-op, accordingly.
SDValue N = getValue(I.getOperand(0));
auto &TLI = DAG.getTargetLoweringInfo();
EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
EVT PtrMemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType());
N = DAG.getZExtOrTrunc(N, getCurSDLoc(), PtrMemVT);
N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), DestVT);
setValue(&I, N);
}
void SelectionDAGBuilder::visitBitCast(const User &I) {
SDValue N = getValue(I.getOperand(0));
SDLoc dl = getCurSDLoc();
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
// BitCast assures us that source and destination are the same size so this is
// either a BITCAST or a no-op.
if (DestVT != N.getValueType())
setValue(&I, DAG.getNode(ISD::BITCAST, dl,
DestVT, N)); // convert types.
// Check if the original LLVM IR Operand was a ConstantInt, because getValue()
// might fold any kind of constant expression to an integer constant and that
// is not what we are looking for. Only recognize a bitcast of a genuine
// constant integer as an opaque constant.
else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0)))
setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false,
/*isOpaque*/true));
else
setValue(&I, N); // noop cast.
}
void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const Value *SV = I.getOperand(0);
SDValue N = getValue(SV);
EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
unsigned SrcAS = SV->getType()->getPointerAddressSpace();
unsigned DestAS = I.getType()->getPointerAddressSpace();
if (!TM.isNoopAddrSpaceCast(SrcAS, DestAS))
N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS);
setValue(&I, N);
}
void SelectionDAGBuilder::visitInsertElement(const User &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue InVec = getValue(I.getOperand(0));
SDValue InVal = getValue(I.getOperand(1));
SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(),
TLI.getVectorIdxTy(DAG.getDataLayout()));
setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(),
TLI.getValueType(DAG.getDataLayout(), I.getType()),
InVec, InVal, InIdx));
}
void SelectionDAGBuilder::visitExtractElement(const User &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue InVec = getValue(I.getOperand(0));
SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(),
TLI.getVectorIdxTy(DAG.getDataLayout()));
setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(),
TLI.getValueType(DAG.getDataLayout(), I.getType()),
InVec, InIdx));
}
void SelectionDAGBuilder::visitShuffleVector(const User &I) {
SDValue Src1 = getValue(I.getOperand(0));
SDValue Src2 = getValue(I.getOperand(1));
ArrayRef<int> Mask;
if (auto *SVI = dyn_cast<ShuffleVectorInst>(&I))
Mask = SVI->getShuffleMask();
else
Mask = cast<ConstantExpr>(I).getShuffleMask();
SDLoc DL = getCurSDLoc();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
EVT SrcVT = Src1.getValueType();
if (all_of(Mask, [](int Elem) { return Elem == 0; }) &&
VT.isScalableVector()) {
// Canonical splat form of first element of first input vector.
SDValue FirstElt =
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcVT.getScalarType(), Src1,
DAG.getVectorIdxConstant(0, DL));
setValue(&I, DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, FirstElt));
return;
}
// For now, we only handle splats for scalable vectors.
// The DAGCombiner will perform a BUILD_VECTOR -> SPLAT_VECTOR transformation
// for targets that support a SPLAT_VECTOR for non-scalable vector types.
assert(!VT.isScalableVector() && "Unsupported scalable vector shuffle");
unsigned SrcNumElts = SrcVT.getVectorNumElements();
unsigned MaskNumElts = Mask.size();
if (SrcNumElts == MaskNumElts) {
setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask));
return;
}
// Normalize the shuffle vector since mask and vector length don't match.
if (SrcNumElts < MaskNumElts) {
// Mask is longer than the source vectors. We can use concatenate vector to
// make the mask and vectors lengths match.
if (MaskNumElts % SrcNumElts == 0) {
// Mask length is a multiple of the source vector length.
// Check if the shuffle is some kind of concatenation of the input
// vectors.
unsigned NumConcat = MaskNumElts / SrcNumElts;
bool IsConcat = true;
SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
for (unsigned i = 0; i != MaskNumElts; ++i) {
int Idx = Mask[i];
if (Idx < 0)
continue;
// Ensure the indices in each SrcVT sized piece are sequential and that
// the same source is used for the whole piece.
if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
(ConcatSrcs[i / SrcNumElts] >= 0 &&
ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) {
IsConcat = false;
break;
}
// Remember which source this index came from.
ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
}
// The shuffle is concatenating multiple vectors together. Just emit
// a CONCAT_VECTORS operation.
if (IsConcat) {
SmallVector<SDValue, 8> ConcatOps;
for (auto Src : ConcatSrcs) {
if (Src < 0)
ConcatOps.push_back(DAG.getUNDEF(SrcVT));
else if (Src == 0)
ConcatOps.push_back(Src1);
else
ConcatOps.push_back(Src2);
}
setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps));
return;
}
}
unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
EVT PaddedVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
PaddedMaskNumElts);
// Pad both vectors with undefs to make them the same length as the mask.
SDValue UndefVal = DAG.getUNDEF(SrcVT);
SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
MOps1[0] = Src1;
MOps2[0] = Src2;
Src1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1);
Src2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2);
// Readjust mask for new input vector length.
SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
for (unsigned i = 0; i != MaskNumElts; ++i) {
int Idx = Mask[i];
if (Idx >= (int)SrcNumElts)
Idx -= SrcNumElts - PaddedMaskNumElts;
MappedOps[i] = Idx;
}
SDValue Result = DAG.getVectorShuffle(PaddedVT, DL, Src1, Src2, MappedOps);
// If the concatenated vector was padded, extract a subvector with the
// correct number of elements.
if (MaskNumElts != PaddedMaskNumElts)
Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Result,
DAG.getVectorIdxConstant(0, DL));
setValue(&I, Result);
return;
}
if (SrcNumElts > MaskNumElts) {
// Analyze the access pattern of the vector to see if we can extract
// two subvectors and do the shuffle.
int StartIdx[2] = { -1, -1 }; // StartIdx to extract from
bool CanExtract = true;
for (int Idx : Mask) {
unsigned Input = 0;
if (Idx < 0)
continue;
if (Idx >= (int)SrcNumElts) {
Input = 1;
Idx -= SrcNumElts;
}
// If all the indices come from the same MaskNumElts sized portion of
// the sources we can use extract. Also make sure the extract wouldn't
// extract past the end of the source.
int NewStartIdx = alignDown(Idx, MaskNumElts);
if (NewStartIdx + MaskNumElts > SrcNumElts ||
(StartIdx[Input] >= 0 && StartIdx[Input] != NewStartIdx))
CanExtract = false;
// Make sure we always update StartIdx as we use it to track if all
// elements are undef.
StartIdx[Input] = NewStartIdx;
}
if (StartIdx[0] < 0 && StartIdx[1] < 0) {
setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
return;
}
if (CanExtract) {
// Extract appropriate subvector and generate a vector shuffle
for (unsigned Input = 0; Input < 2; ++Input) {
SDValue &Src = Input == 0 ? Src1 : Src2;
if (StartIdx[Input] < 0)
Src = DAG.getUNDEF(VT);
else {
Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Src,
DAG.getVectorIdxConstant(StartIdx[Input], DL));
}
}
// Calculate new mask.
SmallVector<int, 8> MappedOps(Mask.begin(), Mask.end());
for (int &Idx : MappedOps) {
if (Idx >= (int)SrcNumElts)
Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
else if (Idx >= 0)
Idx -= StartIdx[0];
}
setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps));
return;
}
}
// We can't use either concat vectors or extract subvectors so fall back to
// replacing the shuffle with extract and build vector.
// to insert and build vector.
EVT EltVT = VT.getVectorElementType();
SmallVector<SDValue,8> Ops;
for (int Idx : Mask) {
SDValue Res;
if (Idx < 0) {
Res = DAG.getUNDEF(EltVT);
} else {
SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;
Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src,
DAG.getVectorIdxConstant(Idx, DL));
}
Ops.push_back(Res);
}
setValue(&I, DAG.getBuildVector(VT, DL, Ops));
}
void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
ArrayRef<unsigned> Indices = I.getIndices();
const Value *Op0 = I.getOperand(0);
const Value *Op1 = I.getOperand(1);
Type *AggTy = I.getType();
Type *ValTy = Op1->getType();
bool IntoUndef = isa<UndefValue>(Op0);
bool FromUndef = isa<UndefValue>(Op1);
unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SmallVector<EVT, 4> AggValueVTs;
ComputeValueVTs(TLI, DAG.getDataLayout(), AggTy, AggValueVTs);
SmallVector<EVT, 4> ValValueVTs;
ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
unsigned NumAggValues = AggValueVTs.size();
unsigned NumValValues = ValValueVTs.size();
SmallVector<SDValue, 4> Values(NumAggValues);
// Ignore an insertvalue that produces an empty object
if (!NumAggValues) {
setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
return;
}
SDValue Agg = getValue(Op0);
unsigned i = 0;
// Copy the beginning value(s) from the original aggregate.
for (; i != LinearIndex; ++i)
Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
SDValue(Agg.getNode(), Agg.getResNo() + i);
// Copy values from the inserted value(s).
if (NumValValues) {
SDValue Val = getValue(Op1);
for (; i != LinearIndex + NumValValues; ++i)
Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
}
// Copy remaining value(s) from the original aggregate.
for (; i != NumAggValues; ++i)
Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
SDValue(Agg.getNode(), Agg.getResNo() + i);
setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
DAG.getVTList(AggValueVTs), Values));
}
void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
ArrayRef<unsigned> Indices = I.getIndices();
const Value *Op0 = I.getOperand(0);
Type *AggTy = Op0->getType();
Type *ValTy = I.getType();
bool OutOfUndef = isa<UndefValue>(Op0);
unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SmallVector<EVT, 4> ValValueVTs;
ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
unsigned NumValValues = ValValueVTs.size();
// Ignore a extractvalue that produces an empty object
if (!NumValValues) {
setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
return;
}
SmallVector<SDValue, 4> Values(NumValValues);
SDValue Agg = getValue(Op0);
// Copy out the selected value(s).
for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)
Values[i - LinearIndex] =
OutOfUndef ?
DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
SDValue(Agg.getNode(), Agg.getResNo() + i);
setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
DAG.getVTList(ValValueVTs), Values));
}
void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
Value *Op0 = I.getOperand(0);
// Note that the pointer operand may be a vector of pointers. Take the scalar
// element which holds a pointer.
unsigned AS = Op0->getType()->getScalarType()->getPointerAddressSpace();
SDValue N = getValue(Op0);
SDLoc dl = getCurSDLoc();
auto &TLI = DAG.getTargetLoweringInfo();
// Normalize Vector GEP - all scalar operands should be converted to the
// splat vector.
bool IsVectorGEP = I.getType()->isVectorTy();
ElementCount VectorElementCount =
IsVectorGEP ? cast<VectorType>(I.getType())->getElementCount()
: ElementCount::getFixed(0);
if (IsVectorGEP && !N.getValueType().isVector()) {
LLVMContext &Context = *DAG.getContext();
EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorElementCount);
if (VectorElementCount.isScalable())
N = DAG.getSplatVector(VT, dl, N);
else
N = DAG.getSplatBuildVector(VT, dl, N);
}
for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I);
GTI != E; ++GTI) {
const Value *Idx = GTI.getOperand();
if (StructType *StTy = GTI.getStructTypeOrNull()) {
unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
if (Field) {
// N = N + Offset
uint64_t Offset =
DAG.getDataLayout().getStructLayout(StTy)->getElementOffset(Field);
// In an inbounds GEP with an offset that is nonnegative even when
// interpreted as signed, assume there is no unsigned overflow.
SDNodeFlags Flags;
if (int64_t(Offset) >= 0 && cast<GEPOperator>(I).isInBounds())
Flags.setNoUnsignedWrap(true);
N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N,
DAG.getConstant(Offset, dl, N.getValueType()), Flags);
}
} else {
// IdxSize is the width of the arithmetic according to IR semantics.
// In SelectionDAG, we may prefer to do arithmetic in a wider bitwidth
// (and fix up the result later).
unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS);
MVT IdxTy = MVT::getIntegerVT(IdxSize);
TypeSize ElementSize =
DAG.getDataLayout().getTypeAllocSize(GTI.getIndexedType());
// We intentionally mask away the high bits here; ElementSize may not
// fit in IdxTy.
APInt ElementMul(IdxSize, ElementSize.getKnownMinSize());
bool ElementScalable = ElementSize.isScalable();
// If this is a scalar constant or a splat vector of constants,
// handle it quickly.
const auto *C = dyn_cast<Constant>(Idx);
if (C && isa<VectorType>(C->getType()))
C = C->getSplatValue();
const auto *CI = dyn_cast_or_null<ConstantInt>(C);
if (CI && CI->isZero())
continue;
if (CI && !ElementScalable) {
APInt Offs = ElementMul * CI->getValue().sextOrTrunc(IdxSize);
LLVMContext &Context = *DAG.getContext();
SDValue OffsVal;
if (IsVectorGEP)
OffsVal = DAG.getConstant(
Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorElementCount));
else
OffsVal = DAG.getConstant(Offs, dl, IdxTy);
// In an inbounds GEP with an offset that is nonnegative even when
// interpreted as signed, assume there is no unsigned overflow.
SDNodeFlags Flags;
if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds())
Flags.setNoUnsignedWrap(true);
OffsVal = DAG.getSExtOrTrunc(OffsVal, dl, N.getValueType());
N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, Flags);
continue;
}
// N = N + Idx * ElementMul;
SDValue IdxN = getValue(Idx);
if (!IdxN.getValueType().isVector() && IsVectorGEP) {
EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(),
VectorElementCount);
if (VectorElementCount.isScalable())
IdxN = DAG.getSplatVector(VT, dl, IdxN);
else
IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
}
// If the index is smaller or larger than intptr_t, truncate or extend
// it.
IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
if (ElementScalable) {
EVT VScaleTy = N.getValueType().getScalarType();
SDValue VScale = DAG.getNode(
ISD::VSCALE, dl, VScaleTy,
DAG.getConstant(ElementMul.getZExtValue(), dl, VScaleTy));
if (IsVectorGEP)
VScale = DAG.getSplatVector(N.getValueType(), dl, VScale);
IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, VScale);
} else {
// If this is a multiply by a power of two, turn it into a shl
// immediately. This is a very common case.
if (ElementMul != 1) {
if (ElementMul.isPowerOf2()) {
unsigned Amt = ElementMul.logBase2();
IdxN = DAG.getNode(ISD::SHL, dl,
N.getValueType(), IdxN,
DAG.getConstant(Amt, dl, IdxN.getValueType()));
} else {
SDValue Scale = DAG.getConstant(ElementMul.getZExtValue(), dl,
IdxN.getValueType());
IdxN = DAG.getNode(ISD::MUL, dl,
N.getValueType(), IdxN, Scale);
}
}
}
N = DAG.getNode(ISD::ADD, dl,
N.getValueType(), N, IdxN);
}
}
MVT PtrTy = TLI.getPointerTy(DAG.getDataLayout(), AS);
MVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout(), AS);
if (IsVectorGEP) {
PtrTy = MVT::getVectorVT(PtrTy, VectorElementCount);
PtrMemTy = MVT::getVectorVT(PtrMemTy, VectorElementCount);
}
if (PtrMemTy != PtrTy && !cast<GEPOperator>(I).isInBounds())
N = DAG.getPtrExtendInReg(N, dl, PtrMemTy);
setValue(&I, N);
}
void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
// If this is a fixed sized alloca in the entry block of the function,
// allocate it statically on the stack.
if (FuncInfo.StaticAllocaMap.count(&I))
return; // getValue will auto-populate this.
SDLoc dl = getCurSDLoc();
Type *Ty = I.getAllocatedType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
auto &DL = DAG.getDataLayout();
TypeSize TySize = DL.getTypeAllocSize(Ty);
MaybeAlign Alignment = std::max(DL.getPrefTypeAlign(Ty), I.getAlign());
SDValue AllocSize = getValue(I.getArraySize());
EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout(), DL.getAllocaAddrSpace());
if (AllocSize.getValueType() != IntPtr)
AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr);
if (TySize.isScalable())
AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr, AllocSize,
DAG.getVScale(dl, IntPtr,
APInt(IntPtr.getScalarSizeInBits(),
TySize.getKnownMinValue())));
else
AllocSize =
DAG.getNode(ISD::MUL, dl, IntPtr, AllocSize,
DAG.getConstant(TySize.getFixedValue(), dl, IntPtr));
// Handle alignment. If the requested alignment is less than or equal to
// the stack alignment, ignore it. If the size is greater than or equal to
// the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
Align StackAlign = DAG.getSubtarget().getFrameLowering()->getStackAlign();
if (*Alignment <= StackAlign)
Alignment = None;
const uint64_t StackAlignMask = StackAlign.value() - 1U;
// Round the size of the allocation up to the stack alignment size
// by add SA-1 to the size. This doesn't overflow because we're computing
// an address inside an alloca.
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize,
DAG.getConstant(StackAlignMask, dl, IntPtr), Flags);
// Mask out the low bits for alignment purposes.
AllocSize = DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
DAG.getConstant(~StackAlignMask, dl, IntPtr));
SDValue Ops[] = {
getRoot(), AllocSize,
DAG.getConstant(Alignment ? Alignment->value() : 0, dl, IntPtr)};
SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops);
setValue(&I, DSA);
DAG.setRoot(DSA.getValue(1));
assert(FuncInfo.MF->getFrameInfo().hasVarSizedObjects());
}
void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
if (I.isAtomic())
return visitAtomicLoad(I);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const Value *SV = I.getOperand(0);
if (TLI.supportSwiftError()) {
// Swifterror values can come from either a function parameter with
// swifterror attribute or an alloca with swifterror attribute.
if (const Argument *Arg = dyn_cast<Argument>(SV)) {
if (Arg->hasSwiftErrorAttr())
return visitLoadFromSwiftError(I);
}
if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
if (Alloca->isSwiftError())
return visitLoadFromSwiftError(I);
}
}
SDValue Ptr = getValue(SV);
Type *Ty = I.getType();
Align Alignment = I.getAlign();
AAMDNodes AAInfo = I.getAAMetadata();
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
SmallVector<EVT, 4> ValueVTs, MemVTs;
SmallVector<uint64_t, 4> Offsets;
ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets);
unsigned NumValues = ValueVTs.size();
if (NumValues == 0)
return;
bool isVolatile = I.isVolatile();
MachineMemOperand::Flags MMOFlags =
TLI.getLoadMemOperandFlags(I, DAG.getDataLayout());
SDValue Root;
bool ConstantMemory = false;
if (isVolatile)
// Serialize volatile loads with other side effects.
Root = getRoot();
else if (NumValues > MaxParallelChains)
Root = getMemoryRoot();
else if (AA &&
AA->pointsToConstantMemory(MemoryLocation(
SV,
LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)),
AAInfo))) {
// Do not serialize (non-volatile) loads of constant memory with anything.
Root = DAG.getEntryNode();
ConstantMemory = true;
MMOFlags |= MachineMemOperand::MOInvariant;
// FIXME: pointsToConstantMemory probably does not imply dereferenceable,
// but the previous usage implied it did. Probably should check
// isDereferenceableAndAlignedPointer.
MMOFlags |= MachineMemOperand::MODereferenceable;
} else {
// Do not serialize non-volatile loads against each other.
Root = DAG.getRoot();
}
SDLoc dl = getCurSDLoc();
if (isVolatile)
Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG);
// An aggregate load cannot wrap around the address space, so offsets to its
// parts don't wrap either.
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
SmallVector<SDValue, 4> Values(NumValues);
SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
EVT PtrVT = Ptr.getValueType();
unsigned ChainI = 0;
for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
// Serializing loads here may result in excessive register pressure, and
// TokenFactor places arbitrary choke points on the scheduler. SD scheduling
// could recover a bit by hoisting nodes upward in the chain by recognizing
// they are side-effect free or do not alias. The optimizer should really
// avoid this case by converting large object/array copies to llvm.memcpy
// (MaxParallelChains should always remain as failsafe).
if (ChainI == MaxParallelChains) {
assert(PendingLoads.empty() && "PendingLoads must be serialized first");
SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
makeArrayRef(Chains.data(), ChainI));
Root = Chain;
ChainI = 0;
}
SDValue A = DAG.getNode(ISD::ADD, dl,
PtrVT, Ptr,
DAG.getConstant(Offsets[i], dl, PtrVT),
Flags);
SDValue L = DAG.getLoad(MemVTs[i], dl, Root, A,
MachinePointerInfo(SV, Offsets[i]), Alignment,
MMOFlags, AAInfo, Ranges);
Chains[ChainI] = L.getValue(1);
if (MemVTs[i] != ValueVTs[i])
L = DAG.getZExtOrTrunc(L, dl, ValueVTs[i]);
Values[i] = L;
}
if (!ConstantMemory) {
SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
makeArrayRef(Chains.data(), ChainI));
if (isVolatile)
DAG.setRoot(Chain);
else
PendingLoads.push_back(Chain);
}
setValue(&I, DAG.getNode(ISD::MERGE_VALUES, dl,
DAG.getVTList(ValueVTs), Values));
}
void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
assert(DAG.getTargetLoweringInfo().supportSwiftError() &&
"call visitStoreToSwiftError when backend supports swifterror");
SmallVector<EVT, 4> ValueVTs;
SmallVector<uint64_t, 4> Offsets;
const Value *SrcV = I.getOperand(0);
ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
SrcV->getType(), ValueVTs, &Offsets);
assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
"expect a single EVT for swifterror");
SDValue Src = getValue(SrcV);
// Create a virtual register, then update the virtual register.
Register VReg =
SwiftError.getOrCreateVRegDefAt(&I, FuncInfo.MBB, I.getPointerOperand());
// Chain, DL, Reg, N or Chain, DL, Reg, N, Glue
// Chain can be getRoot or getControlRoot.
SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg,
SDValue(Src.getNode(), Src.getResNo()));
DAG.setRoot(CopyNode);
}
void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
assert(DAG.getTargetLoweringInfo().supportSwiftError() &&
"call visitLoadFromSwiftError when backend supports swifterror");
assert(!I.isVolatile() &&
!I.hasMetadata(LLVMContext::MD_nontemporal) &&
!I.hasMetadata(LLVMContext::MD_invariant_load) &&
"Support volatile, non temporal, invariant for load_from_swift_error");
const Value *SV = I.getOperand(0);
Type *Ty = I.getType();
assert(
(!AA ||
!AA->pointsToConstantMemory(MemoryLocation(
SV, LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)),
I.getAAMetadata()))) &&
"load_from_swift_error should not be constant memory");
SmallVector<EVT, 4> ValueVTs;
SmallVector<uint64_t, 4> Offsets;
ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Ty,
ValueVTs, &Offsets);
assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
"expect a single EVT for swifterror");
// Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT
SDValue L = DAG.getCopyFromReg(
getRoot(), getCurSDLoc(),
SwiftError.getOrCreateVRegUseAt(&I, FuncInfo.MBB, SV), ValueVTs[0]);
setValue(&I, L);
}
void SelectionDAGBuilder::visitStore(const StoreInst &I) {
if (I.isAtomic())
return visitAtomicStore(I);
const Value *SrcV = I.getOperand(0);
const Value *PtrV = I.getOperand(1);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.supportSwiftError()) {
// Swifterror values can come from either a function parameter with
// swifterror attribute or an alloca with swifterror attribute.
if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
if (Arg->hasSwiftErrorAttr())
return visitStoreToSwiftError(I);
}
if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
if (Alloca->isSwiftError())
return visitStoreToSwiftError(I);
}
}
SmallVector<EVT, 4> ValueVTs, MemVTs;
SmallVector<uint64_t, 4> Offsets;
ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
SrcV->getType(), ValueVTs, &MemVTs, &Offsets);
unsigned NumValues = ValueVTs.size();
if (NumValues == 0)
return;
// Get the lowered operands. Note that we do this after
// checking if NumResults is zero, because with zero results
// the operands won't have values in the map.
SDValue Src = getValue(SrcV);
SDValue Ptr = getValue(PtrV);
SDValue Root = I.isVolatile() ? getRoot() : getMemoryRoot();
SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
SDLoc dl = getCurSDLoc();
Align Alignment = I.getAlign();
AAMDNodes AAInfo = I.getAAMetadata();
auto MMOFlags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout());
// An aggregate load cannot wrap around the address space, so offsets to its
// parts don't wrap either.
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
unsigned ChainI = 0;
for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
// See visitLoad comments.
if (ChainI == MaxParallelChains) {
SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
makeArrayRef(Chains.data(), ChainI));
Root = Chain;
ChainI = 0;
}
SDValue Add =
DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(Offsets[i]), dl, Flags);
SDValue Val = SDValue(Src.getNode(), Src.getResNo() + i);
if (MemVTs[i] != ValueVTs[i])
Val = DAG.getPtrExtOrTrunc(Val, dl, MemVTs[i]);
SDValue St =
DAG.getStore(Root, dl, Val, Add, MachinePointerInfo(PtrV, Offsets[i]),
Alignment, MMOFlags, AAInfo);
Chains[ChainI] = St;
}
SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
makeArrayRef(Chains.data(), ChainI));
DAG.setRoot(StoreNode);
}
void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
bool IsCompressing) {
SDLoc sdl = getCurSDLoc();
auto getMaskedStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
MaybeAlign &Alignment) {
// llvm.masked.store.*(Src0, Ptr, alignment, Mask)
Src0 = I.getArgOperand(0);
Ptr = I.getArgOperand(1);
Alignment = cast<ConstantInt>(I.getArgOperand(2))->getMaybeAlignValue();
Mask = I.getArgOperand(3);
};
auto getCompressingStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
MaybeAlign &Alignment) {
// llvm.masked.compressstore.*(Src0, Ptr, Mask)
Src0 = I.getArgOperand(0);
Ptr = I.getArgOperand(1);
Mask = I.getArgOperand(2);
Alignment = None;
};
Value *PtrOperand, *MaskOperand, *Src0Operand;
MaybeAlign Alignment;
if (IsCompressing)
getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
else
getMaskedStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
SDValue Ptr = getValue(PtrOperand);
SDValue Src0 = getValue(Src0Operand);
SDValue Mask = getValue(MaskOperand);
SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
EVT VT = Src0.getValueType();
if (!Alignment)
Alignment = DAG.getEVTAlign(VT);
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
MemoryLocation::UnknownSize, *Alignment, I.getAAMetadata());
SDValue StoreNode =
DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO,
ISD::UNINDEXED, false /* Truncating */, IsCompressing);
DAG.setRoot(StoreNode);
setValue(&I, StoreNode);
}
// Get a uniform base for the Gather/Scatter intrinsic.
// The first argument of the Gather/Scatter intrinsic is a vector of pointers.
// We try to represent it as a base pointer + vector of indices.
// Usually, the vector of pointers comes from a 'getelementptr' instruction.
// The first operand of the GEP may be a single pointer or a vector of pointers
// Example:
// %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind
// or
// %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind
// %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, ..
//
// When the first GEP operand is a single pointer - it is the uniform base we
// are looking for. If first operand of the GEP is a splat vector - we
// extract the splat value and use it as a uniform base.
// In all other cases the function returns 'false'.
static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index,
ISD::MemIndexType &IndexType, SDValue &Scale,
SelectionDAGBuilder *SDB, const BasicBlock *CurBB,
uint64_t ElemSize) {
SelectionDAG& DAG = SDB->DAG;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const DataLayout &DL = DAG.getDataLayout();
assert(Ptr->getType()->isVectorTy() && "Unexpected pointer type");
// Handle splat constant pointer.
if (auto *C = dyn_cast<Constant>(Ptr)) {
C = C->getSplatValue();
if (!C)
return false;
Base = SDB->getValue(C);
ElementCount NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
EVT VT = EVT::getVectorVT(*DAG.getContext(), TLI.getPointerTy(DL), NumElts);
Index = DAG.getConstant(0, SDB->getCurSDLoc(), VT);
IndexType = ISD::SIGNED_SCALED;
Scale = DAG.getTargetConstant(1, SDB->getCurSDLoc(), TLI.getPointerTy(DL));
return true;
}
const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
if (!GEP || GEP->getParent() != CurBB)
return false;
if (GEP->getNumOperands() != 2)
return false;
const Value *BasePtr = GEP->getPointerOperand();
const Value *IndexVal = GEP->getOperand(GEP->getNumOperands() - 1);
// Make sure the base is scalar and the index is a vector.
if (BasePtr->getType()->isVectorTy() || !IndexVal->getType()->isVectorTy())
return false;
Base = SDB->getValue(BasePtr);
Index = SDB->getValue(IndexVal);
IndexType = ISD::SIGNED_SCALED;
// MGATHER/MSCATTER are only required to support scaling by one or by the
// element size. Other scales may be produced using target-specific DAG
// combines.
uint64_t ScaleVal = DL.getTypeAllocSize(GEP->getResultElementType());
if (ScaleVal != ElemSize && ScaleVal != 1)
return false;
Scale =
DAG.getTargetConstant(ScaleVal, SDB->getCurSDLoc(), TLI.getPointerTy(DL));
return true;
}
void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
SDLoc sdl = getCurSDLoc();
// llvm.masked.scatter.*(Src0, Ptrs, alignment, Mask)
const Value *Ptr = I.getArgOperand(1);
SDValue Src0 = getValue(I.getArgOperand(0));
SDValue Mask = getValue(I.getArgOperand(3));
EVT VT = Src0.getValueType();
Align Alignment = cast<ConstantInt>(I.getArgOperand(2))
->getMaybeAlignValue()
.value_or(DAG.getEVTAlign(VT.getScalarType()));
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Base;
SDValue Index;
ISD::MemIndexType IndexType;
SDValue Scale;
bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this,
I.getParent(), VT.getScalarStoreSize());
unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(AS), MachineMemOperand::MOStore,
// TODO: Make MachineMemOperands aware of scalable
// vectors.
MemoryLocation::UnknownSize, Alignment, I.getAAMetadata());
if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(Ptr);
IndexType = ISD::SIGNED_SCALED;
Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
}
EVT IdxVT = Index.getValueType();
EVT EltTy = IdxVT.getVectorElementType();
if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
Index = DAG.getNode(ISD::SIGN_EXTEND, sdl, NewIdxVT, Index);
}
SDValue Ops[] = { getMemoryRoot(), Src0, Mask, Base, Index, Scale };
SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,
Ops, MMO, IndexType, false);
DAG.setRoot(Scatter);
setValue(&I, Scatter);
}
void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
SDLoc sdl = getCurSDLoc();
auto getMaskedLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
MaybeAlign &Alignment) {
// @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
Ptr = I.getArgOperand(0);
Alignment = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue();
Mask = I.getArgOperand(2);
Src0 = I.getArgOperand(3);
};
auto getExpandingLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
MaybeAlign &Alignment) {
// @llvm.masked.expandload.*(Ptr, Mask, Src0)
Ptr = I.getArgOperand(0);
Alignment = None;
Mask = I.getArgOperand(1);
Src0 = I.getArgOperand(2);
};
Value *PtrOperand, *MaskOperand, *Src0Operand;
MaybeAlign Alignment;
if (IsExpanding)
getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
else
getMaskedLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
SDValue Ptr = getValue(PtrOperand);
SDValue Src0 = getValue(Src0Operand);
SDValue Mask = getValue(MaskOperand);
SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
EVT VT = Src0.getValueType();
if (!Alignment)
Alignment = DAG.getEVTAlign(VT);
AAMDNodes AAInfo = I.getAAMetadata();
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
// Do not serialize masked loads of constant memory with anything.
MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
bool AddToChain = !AA || !AA->pointsToConstantMemory(ML);
SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
SDValue Load =
DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Offset, Mask, Src0, VT, MMO,
ISD::UNINDEXED, ISD::NON_EXTLOAD, IsExpanding);
if (AddToChain)
PendingLoads.push_back(Load.getValue(1));
setValue(&I, Load);
}
void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
SDLoc sdl = getCurSDLoc();
// @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0)
const Value *Ptr = I.getArgOperand(0);
SDValue Src0 = getValue(I.getArgOperand(3));
SDValue Mask = getValue(I.getArgOperand(2));
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
Align Alignment = cast<ConstantInt>(I.getArgOperand(1))
->getMaybeAlignValue()
.value_or(DAG.getEVTAlign(VT.getScalarType()));
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
SDValue Root = DAG.getRoot();
SDValue Base;
SDValue Index;
ISD::MemIndexType IndexType;
SDValue Scale;
bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this,
I.getParent(), VT.getScalarStoreSize());
unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(AS), MachineMemOperand::MOLoad,
// TODO: Make MachineMemOperands aware of scalable
// vectors.
MemoryLocation::UnknownSize, Alignment, I.getAAMetadata(), Ranges);
if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(Ptr);
IndexType = ISD::SIGNED_SCALED;
Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
}
EVT IdxVT = Index.getValueType();
EVT EltTy = IdxVT.getVectorElementType();
if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
Index = DAG.getNode(ISD::SIGN_EXTEND, sdl, NewIdxVT, Index);
}
SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale };
SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,
Ops, MMO, IndexType, ISD::NON_EXTLOAD);
PendingLoads.push_back(Gather.getValue(1));
setValue(&I, Gather);
}
void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
SDLoc dl = getCurSDLoc();
AtomicOrdering SuccessOrdering = I.getSuccessOrdering();
AtomicOrdering FailureOrdering = I.getFailureOrdering();
SyncScope::ID SSID = I.getSyncScopeID();
SDValue InChain = getRoot();
MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType();
SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
auto Flags = TLI.getAtomicMemOperandFlags(I, DAG.getDataLayout());
MachineFunction &MF = DAG.getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
DAG.getEVTAlign(MemVT), AAMDNodes(), nullptr, SSID, SuccessOrdering,
FailureOrdering);
SDValue L = DAG.getAtomicCmpSwap(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS,
dl, MemVT, VTs, InChain,
getValue(I.getPointerOperand()),
getValue(I.getCompareOperand()),
getValue(I.getNewValOperand()), MMO);
SDValue OutChain = L.getValue(2);
setValue(&I, L);
DAG.setRoot(OutChain);
}
void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
SDLoc dl = getCurSDLoc();
ISD::NodeType NT;
switch (I.getOperation()) {
default: llvm_unreachable("Unknown atomicrmw operation");
case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break;
case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break;
case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break;
case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break;
case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break;
case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break;
case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break;
case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break;
case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break;
case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break;
case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
case AtomicRMWInst::FAdd: NT = ISD::ATOMIC_LOAD_FADD; break;
case AtomicRMWInst::FSub: NT = ISD::ATOMIC_LOAD_FSUB; break;
case AtomicRMWInst::FMax: NT = ISD::ATOMIC_LOAD_FMAX; break;
case AtomicRMWInst::FMin: NT = ISD::ATOMIC_LOAD_FMIN; break;
}
AtomicOrdering Ordering = I.getOrdering();
SyncScope::ID SSID = I.getSyncScopeID();
SDValue InChain = getRoot();
auto MemVT = getValue(I.getValOperand()).getSimpleValueType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
auto Flags = TLI.getAtomicMemOperandFlags(I, DAG.getDataLayout());
MachineFunction &MF = DAG.getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
DAG.getEVTAlign(MemVT), AAMDNodes(), nullptr, SSID, Ordering);
SDValue L =
DAG.getAtomic(NT, dl, MemVT, InChain,
getValue(I.getPointerOperand()), getValue(I.getValOperand()),
MMO);
SDValue OutChain = L.getValue(1);
setValue(&I, L);
DAG.setRoot(OutChain);
}
void SelectionDAGBuilder::visitFence(const FenceInst &I) {
SDLoc dl = getCurSDLoc();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Ops[3];
Ops[0] = getRoot();
Ops[1] = DAG.getTargetConstant((unsigned)I.getOrdering(), dl,
TLI.getFenceOperandTy(DAG.getDataLayout()));
Ops[2] = DAG.getTargetConstant(I.getSyncScopeID(), dl,
TLI.getFenceOperandTy(DAG.getDataLayout()));
DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops));
}
void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
SDLoc dl = getCurSDLoc();
AtomicOrdering Order = I.getOrdering();
SyncScope::ID SSID = I.getSyncScopeID();
SDValue InChain = getRoot();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType());
if (!TLI.supportsUnalignedAtomics() &&
I.getAlign().value() < MemVT.getSizeInBits() / 8)
report_fatal_error("Cannot generate unaligned atomic load");
auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout());
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
I.getAlign(), AAMDNodes(), nullptr, SSID, Order);
InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
SDValue Ptr = getValue(I.getPointerOperand());
if (TLI.lowerAtomicLoadAsLoadSDNode(I)) {
// TODO: Once this is better exercised by tests, it should be merged with
// the normal path for loads to prevent future divergence.
SDValue L = DAG.getLoad(MemVT, dl, InChain, Ptr, MMO);
if (MemVT != VT)
L = DAG.getPtrExtOrTrunc(L, dl, VT);
setValue(&I, L);
SDValue OutChain = L.getValue(1);
if (!I.isUnordered())
DAG.setRoot(OutChain);
else
PendingLoads.push_back(OutChain);
return;
}
SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain,
Ptr, MMO);
SDValue OutChain = L.getValue(1);
if (MemVT != VT)
L = DAG.getPtrExtOrTrunc(L, dl, VT);
setValue(&I, L);
DAG.setRoot(OutChain);
}
void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
SDLoc dl = getCurSDLoc();
AtomicOrdering Ordering = I.getOrdering();
SyncScope::ID SSID = I.getSyncScopeID();
SDValue InChain = getRoot();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT MemVT =
TLI.getMemValueType(DAG.getDataLayout(), I.getValueOperand()->getType());
if (I.getAlign().value() < MemVT.getSizeInBits() / 8)
report_fatal_error("Cannot generate unaligned atomic store");
auto Flags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout());
MachineFunction &MF = DAG.getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
I.getAlign(), AAMDNodes(), nullptr, SSID, Ordering);
SDValue Val = getValue(I.getValueOperand());
if (Val.getValueType() != MemVT)
Val = DAG.getPtrExtOrTrunc(Val, dl, MemVT);
SDValue Ptr = getValue(I.getPointerOperand());
if (TLI.lowerAtomicStoreAsStoreSDNode(I)) {
// TODO: Once this is better exercised by tests, it should be merged with
// the normal path for stores to prevent future divergence.
SDValue S = DAG.getStore(InChain, dl, Val, Ptr, MMO);
DAG.setRoot(S);
return;
}
SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain,
Ptr, Val, MMO);
DAG.setRoot(OutChain);
}
/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
/// node.
void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
unsigned Intrinsic) {
// Ignore the callsite's attributes. A specific call site may be marked with
// readnone, but the lowering code will expect the chain based on the
// definition.
const Function *F = I.getCalledFunction();
bool HasChain = !F->doesNotAccessMemory();
bool OnlyLoad = HasChain && F->onlyReadsMemory();
// Build the operand list.
SmallVector<SDValue, 8> Ops;
if (HasChain) { // If this intrinsic has side-effects, chainify it.
if (OnlyLoad) {
// We don't need to serialize loads against other loads.
Ops.push_back(DAG.getRoot());
} else {
Ops.push_back(getRoot());
}
}
// Info is set by getTgtMemIntrinsic
TargetLowering::IntrinsicInfo Info;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I,
DAG.getMachineFunction(),
Intrinsic);
// Add the intrinsic ID as an integer operand if it's not a target intrinsic.
if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
Info.opc == ISD::INTRINSIC_W_CHAIN)
Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(),
TLI.getPointerTy(DAG.getDataLayout())));
// Add all operands of the call to the operand list.
for (unsigned i = 0, e = I.arg_size(); i != e; ++i) {
const Value *Arg = I.getArgOperand(i);
if (!I.paramHasAttr(i, Attribute::ImmArg)) {
Ops.push_back(getValue(Arg));
continue;
}
// Use TargetConstant instead of a regular constant for immarg.
EVT VT = TLI.getValueType(DAG.getDataLayout(), Arg->getType(), true);
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Arg)) {
assert(CI->getBitWidth() <= 64 &&
"large intrinsic immediates not handled");
Ops.push_back(DAG.getTargetConstant(*CI, SDLoc(), VT));
} else {
Ops.push_back(
DAG.getTargetConstantFP(*cast<ConstantFP>(Arg), SDLoc(), VT));
}
}
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
if (HasChain)
ValueVTs.push_back(MVT::Other);
SDVTList VTs = DAG.getVTList(ValueVTs);
// Propagate fast-math-flags from IR to node(s).
SDNodeFlags Flags;
if (auto *FPMO = dyn_cast<FPMathOperator>(&I))
Flags.copyFMF(*FPMO);
SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
// Create the node.
SDValue Result;
if (IsTgtIntrinsic) {
// This is target intrinsic that touches memory
Result =
DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT,
MachinePointerInfo(Info.ptrVal, Info.offset),
Info.align, Info.flags, Info.size,
I.getAAMetadata());
} else if (!HasChain) {
Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
} else if (!I.getType()->isVoidTy()) {
Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops);
} else {
Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops);
}
if (HasChain) {
SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
if (OnlyLoad)
PendingLoads.push_back(Chain);
else
DAG.setRoot(Chain);
}
if (!I.getType()->isVoidTy()) {
if (!isa<VectorType>(I.getType()))
Result = lowerRangeToAssertZExt(DAG, I, Result);
MaybeAlign Alignment = I.getRetAlign();
if (!Alignment)
Alignment = F->getAttributes().getRetAlignment();
// Insert `assertalign` node if there's an alignment.
if (InsertAssertAlign && Alignment) {
Result =
DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne());
}
setValue(&I, Result);
}
}
/// GetSignificand - Get the significand and build it into a floating-point
/// number with exponent of 1:
///
/// Op = (Op & 0x007fffff) | 0x3f800000;
///
/// where Op is the hexadecimal representation of floating point value.
static SDValue GetSignificand(SelectionDAG &DAG, SDValue Op, const SDLoc &dl) {
SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
DAG.getConstant(0x007fffff, dl, MVT::i32));
SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
DAG.getConstant(0x3f800000, dl, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
}
/// GetExponent - Get the exponent:
///
/// (float)(int)(((Op & 0x7f800000) >> 23) - 127);
///
/// where Op is the hexadecimal representation of floating point value.
static SDValue GetExponent(SelectionDAG &DAG, SDValue Op,
const TargetLowering &TLI, const SDLoc &dl) {
SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
DAG.getConstant(0x7f800000, dl, MVT::i32));
SDValue t1 = DAG.getNode(
ISD::SRL, dl, MVT::i32, t0,
DAG.getConstant(23, dl,
TLI.getShiftAmountTy(MVT::i32, DAG.getDataLayout())));
SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
DAG.getConstant(127, dl, MVT::i32));
return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
}
/// getF32Constant - Get 32-bit floating point constant.
static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt,
const SDLoc &dl) {
return DAG.getConstantFP(APFloat(APFloat::IEEEsingle(), APInt(32, Flt)), dl,
MVT::f32);
}
static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl,
SelectionDAG &DAG) {
// TODO: What fast-math-flags should be set on the floating-point nodes?
// IntegerPartOfX = ((int32_t)(t0);
SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
// FractionalPartOfX = t0 - (float)IntegerPartOfX;
SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
// IntegerPartOfX <<= 23;
IntegerPartOfX =
DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
DAG.getConstant(23, dl,
DAG.getTargetLoweringInfo().getShiftAmountTy(
MVT::i32, DAG.getDataLayout())));
SDValue TwoToFractionalPartOfX;
if (LimitFloatPrecision <= 6) {
// For floating-point precision of 6:
//
// TwoToFractionalPartOfX =
// 0.997535578f +
// (0.735607626f + 0.252464424f * x) * x;
//
// error 0.0144103317, which is 6 bits
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
getF32Constant(DAG, 0x3e814304, dl));
SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
getF32Constant(DAG, 0x3f3c50c8, dl));
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
getF32Constant(DAG, 0x3f7f5e7e, dl));
} else if (LimitFloatPrecision <= 12) {
// For floating-point precision of 12:
//
// TwoToFractionalPartOfX =
// 0.999892986f +
// (0.696457318f +
// (0.224338339f + 0.792043434e-1f * x) * x) * x;
//
// error 0.000107046256, which is 13 to 14 bits
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
getF32Constant(DAG, 0x3da235e3, dl));
SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
getF32Constant(DAG, 0x3e65b8f3, dl));
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
getF32Constant(DAG, 0x3f324b07, dl));
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
getF32Constant(DAG, 0x3f7ff8fd, dl));
} else { // LimitFloatPrecision <= 18
// For floating-point precision of 18:
//
// TwoToFractionalPartOfX =
// 0.999999982f +
// (0.693148872f +
// (0.240227044f +
// (0.554906021e-1f +
// (0.961591928e-2f +
// (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
// error 2.47208000*10^(-7), which is better than 18 bits
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
getF32Constant(DAG, 0x3924b03e, dl));
SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
getF32Constant(DAG, 0x3ab24b87, dl));
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
getF32Constant(DAG, 0x3c1d8c17, dl));
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
getF32Constant(DAG, 0x3d634a1d, dl));
SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
getF32Constant(DAG, 0x3e75fe14, dl));
SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
getF32Constant(DAG, 0x3f317234, dl));
SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
getF32Constant(DAG, 0x3f800000, dl));
}
// Add the exponent into the result in integer domain.
SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX);
return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX));
}
/// expandExp - Lower an exp intrinsic. Handles the special sequences for
/// limited-precision mode.
static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI, SDNodeFlags Flags) {
if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
// Put the exponent in the right bit position for later addition to the
// final result:
//
// t0 = Op * log2(e)
// TODO: What fast-math-flags should be set here?
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
DAG.getConstantFP(numbers::log2ef, dl, MVT::f32));
return getLimitedPrecisionExp2(t0, dl, DAG);
}
// No special expansion.
return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op, Flags);
}
/// expandLog - Lower a log intrinsic. Handles the special sequences for
/// limited-precision mode.
static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI, SDNodeFlags Flags) {
// TODO: What fast-math-flags should be set on the floating-point nodes?
if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
// Scale the exponent by log(2).
SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
SDValue LogOfExponent =
DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
DAG.getConstantFP(numbers::ln2f, dl, MVT::f32));
// Get the significand and build it into a floating-point number with
// exponent of 1.
SDValue X = GetSignificand(DAG, Op1, dl);
SDValue LogOfMantissa;
if (LimitFloatPrecision <= 6) {
// For floating-point precision of 6:
//
// LogofMantissa =
// -1.1609546f +
// (1.4034025f - 0.23903021f * x) * x;
//
// error 0.0034276066, which is better than 8 bits
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
getF32Constant(DAG, 0xbe74c456, dl));
SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
getF32Constant(DAG, 0x3fb3a2b1, dl));
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
getF32Constant(DAG, 0x3f949a29, dl));
} else if (LimitFloatPrecision <= 12) {
// For floating-point precision of 12:
//
// LogOfMantissa =
// -1.7417939f +
// (2.8212026f +
// (-1.4699568f +
// (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
//
// error 0.000061011436, which is 14 bits
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
getF32Constant(DAG, 0xbd67b6d6, dl));
SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
getF32Constant(DAG, 0x3ee4f4b8, dl));
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
getF32Constant(DAG, 0x3fbc278b, dl));
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
getF32Constant(DAG, 0x40348e95, dl));
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
getF32Constant(DAG, 0x3fdef31a, dl));
} else { // LimitFloatPrecision <= 18
// For floating-point precision of 18:
//
// LogOfMantissa =
// -2.1072184f +
// (4.2372794f +
// (-3.7029485f +
// (2.2781945f +
// (-0.87823314f +
// (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
//
// error 0.0000023660568, which is better than 18 bits
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
getF32Constant(DAG, 0xbc91e5ac, dl));
SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
getF32Constant(DAG, 0x3e4350aa, dl));
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
getF32Constant(DAG, 0x3f60d3e3, dl));
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
getF32Constant(DAG, 0x4011cdf0, dl));
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
getF32Constant(DAG, 0x406cfd1c, dl));
SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
getF32Constant(DAG, 0x408797cb, dl));
SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
getF32Constant(DAG, 0x4006dcab, dl));
}
return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa);
}
// No special expansion.
return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op, Flags);
}
/// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for
/// limited-precision mode.
static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI, SDNodeFlags Flags) {
// TODO: What fast-math-flags should be set on the floating-point nodes?
if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
// Get the exponent.
SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
// Get the significand and build it into a floating-point number with
// exponent of 1.
SDValue X = GetSignificand(DAG, Op1, dl);
// Different possible minimax approximations of significand in
// floating-point for various degrees of accuracy over [1,2].
SDValue Log2ofMantissa;
if (LimitFloatPrecision <= 6) {
// For floating-point precision of 6:
//
// Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
//
// error 0.0049451742, which is more than 7 bits
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
getF32Constant(DAG, 0xbeb08fe0, dl));
SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
getF32Constant(DAG, 0x40019463, dl));
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
getF32Constant(DAG, 0x3fd6633d, dl));
} else if (LimitFloatPrecision <= 12) {
// For floating-point precision of 12:
//
// Log2ofMantissa =
// -2.51285454f +
// (4.07009056f +
// (-2.12067489f +
// (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
//
// error 0.0000876136000, which is better than 13 bits
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
getF32Constant(DAG, 0xbda7262e, dl));
SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
getF32Constant(DAG, 0x3f25280b, dl));
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
getF32Constant(DAG, 0x4007b923, dl));
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
getF32Constant(DAG, 0x40823e2f, dl));
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
getF32Constant(DAG, 0x4020d29c, dl));
} else { // LimitFloatPrecision <= 18
// For floating-point precision of 18:
//
// Log2ofMantissa =
// -3.0400495f +
// (6.1129976f +
// (-5.3420409f +
// (3.2865683f +
// (-1.2669343f +
// (0.27515199f -
// 0.25691327e-1f * x) * x) * x) * x) * x) * x;
//
// error 0.0000018516, which is better than 18 bits
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
getF32Constant(DAG, 0xbcd2769e, dl));
SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
getF32Constant(DAG, 0x3e8ce0b9, dl));
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
getF32Constant(DAG, 0x3fa22ae7, dl));
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
getF32Constant(DAG, 0x40525723, dl));
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
getF32Constant(DAG, 0x40aaf200, dl));
SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
getF32Constant(DAG, 0x40c39dad, dl));
SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
getF32Constant(DAG, 0x4042902c, dl));
}
return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa);
}
// No special expansion.
return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op, Flags);
}
/// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for
/// limited-precision mode.
static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI, SDNodeFlags Flags) {
// TODO: What fast-math-flags should be set on the floating-point nodes?
if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
// Scale the exponent by log10(2) [0.30102999f].
SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
getF32Constant(DAG, 0x3e9a209a, dl));
// Get the significand and build it into a floating-point number with
// exponent of 1.
SDValue X = GetSignificand(DAG, Op1, dl);
SDValue Log10ofMantissa;
if (LimitFloatPrecision <= 6) {
// For floating-point precision of 6:
//
// Log10ofMantissa =
// -0.50419619f +
// (0.60948995f - 0.10380950f * x) * x;
//
// error 0.0014886165, which is 6 bits
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
getF32Constant(DAG, 0xbdd49a13, dl));
SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
getF32Constant(DAG, 0x3f1c0789, dl));
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
getF32Constant(DAG, 0x3f011300, dl));
} else if (LimitFloatPrecision <= 12) {
// For floating-point precision of 12:
//
// Log10ofMantissa =
// -0.64831180f +
// (0.91751397f +
// (-0.31664806f + 0.47637168e-1f * x) * x) * x;
//
// error 0.00019228036, which is better than 12 bits
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
getF32Constant(DAG, 0x3d431f31, dl));
SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
getF32Constant(DAG, 0x3ea21fb2, dl));
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
getF32Constant(DAG, 0x3f6ae232, dl));
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
getF32Constant(DAG, 0x3f25f7c3, dl));
} else { // LimitFloatPrecision <= 18
// For floating-point precision of 18:
//
// Log10ofMantissa =
// -0.84299375f +
// (1.5327582f +
// (-1.0688956f +
// (0.49102474f +
// (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
//
// error 0.0000037995730, which is better than 18 bits
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
getF32Constant(DAG, 0x3c5d51ce, dl));
SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
getF32Constant(DAG, 0x3e00685a, dl));
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
getF32Constant(DAG, 0x3efb6798, dl));
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
getF32Constant(DAG, 0x3f88d192, dl));
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
getF32Constant(DAG, 0x3fc4316c, dl));
SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
getF32Constant(DAG, 0x3f57ce70, dl));
}
return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa);
}
// No special expansion.
return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op, Flags);
}
/// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for
/// limited-precision mode.
static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI, SDNodeFlags Flags) {
if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18)
return getLimitedPrecisionExp2(Op, dl, DAG);
// No special expansion.
return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op, Flags);
}
/// visitPow - Lower a pow intrinsic. Handles the special sequences for
/// limited-precision mode with x == 10.0f.
static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS,
SelectionDAG &DAG, const TargetLowering &TLI,
SDNodeFlags Flags) {
bool IsExp10 = false;
if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) {
APFloat Ten(10.0f);
IsExp10 = LHSC->isExactlyValue(Ten);
}
}
// TODO: What fast-math-flags should be set on the FMUL node?
if (IsExp10) {
// Put the exponent in the right bit position for later addition to the
// final result:
//
// #define LOG2OF10 3.3219281f
// t0 = Op * LOG2OF10;
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS,
getF32Constant(DAG, 0x40549a78, dl));
return getLimitedPrecisionExp2(t0, dl, DAG);
}
// No special expansion.
return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS, Flags);
}
/// ExpandPowI - Expand a llvm.powi intrinsic.
static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
SelectionDAG &DAG) {
// If RHS is a constant, we can expand this out to a multiplication tree if
// it's beneficial on the target, otherwise we end up lowering to a call to
// __powidf2 (for example).
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
unsigned Val = RHSC->getSExtValue();
// powi(x, 0) -> 1.0
if (Val == 0)
return DAG.getConstantFP(1.0, DL, LHS.getValueType());
if (DAG.getTargetLoweringInfo().isBeneficialToExpandPowI(
Val, DAG.shouldOptForSize())) {
// Get the exponent as a positive value.
if ((int)Val < 0)
Val = -Val;
// We use the simple binary decomposition method to generate the multiply
// sequence. There are more optimal ways to do this (for example,
// powi(x,15) generates one more multiply than it should), but this has
// the benefit of being both really simple and much better than a libcall.
SDValue Res; // Logically starts equal to 1.0
SDValue CurSquare = LHS;
// TODO: Intrinsics should have fast-math-flags that propagate to these
// nodes.
while (Val) {
if (Val & 1) {
if (Res.getNode())
Res =
DAG.getNode(ISD::FMUL, DL, Res.getValueType(), Res, CurSquare);
else
Res = CurSquare; // 1.0*CurSquare.
}
CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(),
CurSquare, CurSquare);
Val >>= 1;
}
// If the original was negative, invert the result, producing 1/(x*x*x).
if (RHSC->getSExtValue() < 0)
Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(),
DAG.getConstantFP(1.0, DL, LHS.getValueType()), Res);
return Res;
}
}
// Otherwise, expand to a libcall.
return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
}
static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL,
SDValue LHS, SDValue RHS, SDValue Scale,
SelectionDAG &DAG, const TargetLowering &TLI) {
EVT VT = LHS.getValueType();
bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
LLVMContext &Ctx = *DAG.getContext();
// If the type is legal but the operation isn't, this node might survive all
// the way to operation legalization. If we end up there and we do not have
// the ability to widen the type (if VT*2 is not legal), we cannot expand the
// node.
// Coax the legalizer into expanding the node during type legalization instead
// by bumping the size by one bit. This will force it to Promote, enabling the
// early expansion and avoiding the need to expand later.
// We don't have to do this if Scale is 0; that can always be expanded, unless
// it's a saturating signed operation. Those can experience true integer
// division overflow, a case which we must avoid.
// FIXME: We wouldn't have to do this (or any of the early
// expansion/promotion) if it was possible to expand a libcall of an
// illegal type during operation legalization. But it's not, so things
// get a bit hacky.
unsigned ScaleInt = cast<ConstantSDNode>(Scale)->getZExtValue();
if ((ScaleInt > 0 || (Saturating && Signed)) &&
(TLI.isTypeLegal(VT) ||
(VT.isVector() && TLI.isTypeLegal(VT.getVectorElementType())))) {
TargetLowering::LegalizeAction Action = TLI.getFixedPointOperationAction(
Opcode, VT, ScaleInt);
if (Action != TargetLowering::Legal && Action != TargetLowering::Custom) {
EVT PromVT;
if (VT.isScalarInteger())
PromVT = EVT::getIntegerVT(Ctx, VT.getSizeInBits() + 1);
else if (VT.isVector()) {
PromVT = VT.getVectorElementType();
PromVT = EVT::getIntegerVT(Ctx, PromVT.getSizeInBits() + 1);
PromVT = EVT::getVectorVT(Ctx, PromVT, VT.getVectorElementCount());
} else
llvm_unreachable("Wrong VT for DIVFIX?");
if (Signed) {
LHS = DAG.getSExtOrTrunc(LHS, DL, PromVT);
RHS = DAG.getSExtOrTrunc(RHS, DL, PromVT);
} else {
LHS = DAG.getZExtOrTrunc(LHS, DL, PromVT);
RHS = DAG.getZExtOrTrunc(RHS, DL, PromVT);
}
EVT ShiftTy = TLI.getShiftAmountTy(PromVT, DAG.getDataLayout());
// For saturating operations, we need to shift up the LHS to get the
// proper saturation width, and then shift down again afterwards.
if (Saturating)
LHS = DAG.getNode(ISD::SHL, DL, PromVT, LHS,
DAG.getConstant(1, DL, ShiftTy));
SDValue Res = DAG.getNode(Opcode, DL, PromVT, LHS, RHS, Scale);
if (Saturating)
Res = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, PromVT, Res,
DAG.getConstant(1, DL, ShiftTy));
return DAG.getZExtOrTrunc(Res, DL, VT);
}
}
return DAG.getNode(Opcode, DL, VT, LHS, RHS, Scale);
}
// getUnderlyingArgRegs - Find underlying registers used for a truncated,
// bitcasted, or split argument. Returns a list of <Register, size in bits>
static void
getUnderlyingArgRegs(SmallVectorImpl<std::pair<unsigned, TypeSize>> &Regs,
const SDValue &N) {
switch (N.getOpcode()) {
case ISD::CopyFromReg: {
SDValue Op = N.getOperand(1);
Regs.emplace_back(cast<RegisterSDNode>(Op)->getReg(),
Op.getValueType().getSizeInBits());
return;
}
case ISD::BITCAST:
case ISD::AssertZext:
case ISD::AssertSext:
case ISD::TRUNCATE:
getUnderlyingArgRegs(Regs, N.getOperand(0));
return;
case ISD::BUILD_PAIR:
case ISD::BUILD_VECTOR:
case ISD::CONCAT_VECTORS:
for (SDValue Op : N->op_values())
getUnderlyingArgRegs(Regs, Op);
return;
default:
return;
}
}
/// If the DbgValueInst is a dbg_value of a function argument, create the
/// corresponding DBG_VALUE machine instruction for it now. At the end of
/// instruction selection, they will be inserted to the entry BB.
/// We don't currently support this for variadic dbg_values, as they shouldn't
/// appear for function arguments or in the prologue.
bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
const Value *V, DILocalVariable *Variable, DIExpression *Expr,
DILocation *DL, FuncArgumentDbgValueKind Kind, const SDValue &N) {
const Argument *Arg = dyn_cast<Argument>(V);
if (!Arg)
return false;
MachineFunction &MF = DAG.getMachineFunction();
const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
// Helper to create DBG_INSTR_REFs or DBG_VALUEs, depending on what kind
// we've been asked to pursue.
auto MakeVRegDbgValue = [&](Register Reg, DIExpression *FragExpr,
bool Indirect) {
if (Reg.isVirtual() && MF.useDebugInstrRef()) {
// For VRegs, in instruction referencing mode, create a DBG_INSTR_REF
// pointing at the VReg, which will be patched up later.
auto &Inst = TII->get(TargetOpcode::DBG_INSTR_REF);
auto MIB = BuildMI(MF, DL, Inst);
MIB.addReg(Reg);
MIB.addImm(0);
MIB.addMetadata(Variable);
auto *NewDIExpr = FragExpr;
// We don't have an "Indirect" field in DBG_INSTR_REF, fold that into
// the DIExpression.
if (Indirect)
NewDIExpr = DIExpression::prepend(FragExpr, DIExpression::DerefBefore);
MIB.addMetadata(NewDIExpr);
return MIB;
} else {
// Create a completely standard DBG_VALUE.
auto &Inst = TII->get(TargetOpcode::DBG_VALUE);
return BuildMI(MF, DL, Inst, Indirect, Reg, Variable, FragExpr);
}
};
if (Kind == FuncArgumentDbgValueKind::Value) {
// ArgDbgValues are hoisted to the beginning of the entry block. So we
// should only emit as ArgDbgValue if the dbg.value intrinsic is found in
// the entry block.
bool IsInEntryBlock = FuncInfo.MBB == &FuncInfo.MF->front();
if (!IsInEntryBlock)
return false;
// ArgDbgValues are hoisted to the beginning of the entry block. So we
// should only emit as ArgDbgValue if the dbg.value intrinsic describes a
// variable that also is a param.
//
// Although, if we are at the top of the entry block already, we can still
// emit using ArgDbgValue. This might catch some situations when the
// dbg.value refers to an argument that isn't used in the entry block, so
// any CopyToReg node would be optimized out and the only way to express
// this DBG_VALUE is by using the physical reg (or FI) as done in this
// method. ArgDbgValues are hoisted to the beginning of the entry block. So
// we should only emit as ArgDbgValue if the Variable is an argument to the
// current function, and the dbg.value intrinsic is found in the entry
// block.
bool VariableIsFunctionInputArg = Variable->isParameter() &&
!DL->getInlinedAt();
bool IsInPrologue = SDNodeOrder == LowestSDNodeOrder;
if (!IsInPrologue && !VariableIsFunctionInputArg)
return false;
// Here we assume that a function argument on IR level only can be used to
// describe one input parameter on source level. If we for example have
// source code like this
//
// struct A { long x, y; };
// void foo(struct A a, long b) {
// ...
// b = a.x;
// ...
// }
//
// and IR like this
//
// define void @foo(i32 %a1, i32 %a2, i32 %b) {
// entry:
// call void @llvm.dbg.value(metadata i32 %a1, "a", DW_OP_LLVM_fragment
// call void @llvm.dbg.value(metadata i32 %a2, "a", DW_OP_LLVM_fragment
// call void @llvm.dbg.value(metadata i32 %b, "b",
// ...
// call void @llvm.dbg.value(metadata i32 %a1, "b"
// ...
//
// then the last dbg.value is describing a parameter "b" using a value that
// is an argument. But since we already has used %a1 to describe a parameter
// we should not handle that last dbg.value here (that would result in an
// incorrect hoisting of the DBG_VALUE to the function entry).
// Notice that we allow one dbg.value per IR level argument, to accommodate
// for the situation with fragments above.
if (VariableIsFunctionInputArg) {
unsigned ArgNo = Arg->getArgNo();
if (ArgNo >= FuncInfo.DescribedArgs.size())
FuncInfo.DescribedArgs.resize(ArgNo + 1, false);
else if (!IsInPrologue && FuncInfo.DescribedArgs.test(ArgNo))
return false;
FuncInfo.DescribedArgs.set(ArgNo);
}
}
bool IsIndirect = false;
Optional<MachineOperand> Op;
// Some arguments' frame index is recorded during argument lowering.
int FI = FuncInfo.getArgumentFrameIndex(Arg);
if (FI != std::numeric_limits<int>::max())
Op = MachineOperand::CreateFI(FI);
SmallVector<std::pair<unsigned, TypeSize>, 8> ArgRegsAndSizes;
if (!Op && N.getNode()) {
getUnderlyingArgRegs(ArgRegsAndSizes, N);
Register Reg;
if (ArgRegsAndSizes.size() == 1)
Reg = ArgRegsAndSizes.front().first;
if (Reg && Reg.isVirtual()) {
MachineRegisterInfo &RegInfo = MF.getRegInfo();
Register PR = RegInfo.getLiveInPhysReg(Reg);
if (PR)
Reg = PR;
}
if (Reg) {
Op = MachineOperand::CreateReg(Reg, false);
IsIndirect = Kind != FuncArgumentDbgValueKind::Value;
}
}
if (!Op && N.getNode()) {
// Check if frame index is available.
SDValue LCandidate = peekThroughBitcasts(N);
if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(LCandidate.getNode()))
if (FrameIndexSDNode *FINode =
dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
Op = MachineOperand::CreateFI(FINode->getIndex());
}
if (!Op) {
// Create a DBG_VALUE for each decomposed value in ArgRegs to cover Reg
auto splitMultiRegDbgValue = [&](ArrayRef<std::pair<unsigned, TypeSize>>
SplitRegs) {
unsigned Offset = 0;
for (const auto &RegAndSize : SplitRegs) {
// If the expression is already a fragment, the current register
// offset+size might extend beyond the fragment. In this case, only
// the register bits that are inside the fragment are relevant.
int RegFragmentSizeInBits = RegAndSize.second;
if (auto ExprFragmentInfo = Expr->getFragmentInfo()) {
uint64_t ExprFragmentSizeInBits = ExprFragmentInfo->SizeInBits;
// The register is entirely outside the expression fragment,
// so is irrelevant for debug info.
if (Offset >= ExprFragmentSizeInBits)
break;
// The register is partially outside the expression fragment, only
// the low bits within the fragment are relevant for debug info.
if (Offset + RegFragmentSizeInBits > ExprFragmentSizeInBits) {
RegFragmentSizeInBits = ExprFragmentSizeInBits - Offset;
}
}
auto FragmentExpr = DIExpression::createFragmentExpression(
Expr, Offset, RegFragmentSizeInBits);
Offset += RegAndSize.second;
// If a valid fragment expression cannot be created, the variable's
// correct value cannot be determined and so it is set as Undef.
if (!FragmentExpr) {
SDDbgValue *SDV = DAG.getConstantDbgValue(
Variable, Expr, UndefValue::get(V->getType()), DL, SDNodeOrder);
DAG.AddDbgValue(SDV, false);
continue;
}
MachineInstr *NewMI =
MakeVRegDbgValue(RegAndSize.first, *FragmentExpr,
Kind != FuncArgumentDbgValueKind::Value);
FuncInfo.ArgDbgValues.push_back(NewMI);
}
};
// Check if ValueMap has reg number.
DenseMap<const Value *, Register>::const_iterator
VMI = FuncInfo.ValueMap.find(V);
if (VMI != FuncInfo.ValueMap.end()) {
const auto &TLI = DAG.getTargetLoweringInfo();
RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second,
V->getType(), None);
if (RFV.occupiesMultipleRegs()) {
splitMultiRegDbgValue(RFV.getRegsAndSizes());
return true;
}
Op = MachineOperand::CreateReg(VMI->second, false);
IsIndirect = Kind != FuncArgumentDbgValueKind::Value;
} else if (ArgRegsAndSizes.size() > 1) {
// This was split due to the calling convention, and no virtual register
// mapping exists for the value.
splitMultiRegDbgValue(ArgRegsAndSizes);
return true;
}
}
if (!Op)
return false;
assert(Variable->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
MachineInstr *NewMI = nullptr;
if (Op->isReg())
NewMI = MakeVRegDbgValue(Op->getReg(), Expr, IsIndirect);
else
NewMI = BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), true, *Op,
Variable, Expr);
// Otherwise, use ArgDbgValues.
FuncInfo.ArgDbgValues.push_back(NewMI);
return true;
}
/// Return the appropriate SDDbgValue based on N.
SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
DILocalVariable *Variable,
DIExpression *Expr,
const DebugLoc &dl,
unsigned DbgSDNodeOrder) {
if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {
// Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe
// stack slot locations.
//
// Consider "int x = 0; int *px = &x;". There are two kinds of interesting
// debug values here after optimization:
//
// dbg.value(i32* %px, !"int *px", !DIExpression()), and
// dbg.value(i32* %px, !"int x", !DIExpression(DW_OP_deref))
//
// Both describe the direct values of their associated variables.
return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(),
/*IsIndirect*/ false, dl, DbgSDNodeOrder);
}
return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(),
/*IsIndirect*/ false, dl, DbgSDNodeOrder);
}
static unsigned FixedPointIntrinsicToOpcode(unsigned Intrinsic) {
switch (Intrinsic) {
case Intrinsic::smul_fix:
return ISD::SMULFIX;
case Intrinsic::umul_fix:
return ISD::UMULFIX;
case Intrinsic::smul_fix_sat:
return ISD::SMULFIXSAT;
case Intrinsic::umul_fix_sat:
return ISD::UMULFIXSAT;
case Intrinsic::sdiv_fix:
return ISD::SDIVFIX;
case Intrinsic::udiv_fix:
return ISD::UDIVFIX;
case Intrinsic::sdiv_fix_sat:
return ISD::SDIVFIXSAT;
case Intrinsic::udiv_fix_sat:
return ISD::UDIVFIXSAT;
default:
llvm_unreachable("Unhandled fixed point intrinsic");
}
}
void SelectionDAGBuilder::lowerCallToExternalSymbol(const CallInst &I,
const char *FunctionName) {
assert(FunctionName && "FunctionName must not be nullptr");
SDValue Callee = DAG.getExternalSymbol(
FunctionName,
DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()));
LowerCallTo(I, Callee, I.isTailCall(), I.isMustTailCall());
}
/// Given a @llvm.call.preallocated.setup, return the corresponding
/// preallocated call.
static const CallBase *FindPreallocatedCall(const Value *PreallocatedSetup) {
assert(cast<CallBase>(PreallocatedSetup)
->getCalledFunction()
->getIntrinsicID() == Intrinsic::call_preallocated_setup &&
"expected call_preallocated_setup Value");
for (const auto *U : PreallocatedSetup->users()) {
auto *UseCall = cast<CallBase>(U);
const Function *Fn = UseCall->getCalledFunction();
if (!Fn || Fn->getIntrinsicID() != Intrinsic::call_preallocated_arg) {
return UseCall;
}
}
llvm_unreachable("expected corresponding call to preallocated setup/arg");
}
/// Lower the call to the specified intrinsic function.
void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
unsigned Intrinsic) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDLoc sdl = getCurSDLoc();
DebugLoc dl = getCurDebugLoc();
SDValue Res;
SDNodeFlags Flags;
if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
Flags.copyFMF(*FPOp);
switch (Intrinsic) {
default:
// By default, turn this into a target intrinsic node.
visitTargetIntrinsic(I, Intrinsic);
return;
case Intrinsic::vscale: {
match(&I, m_VScale(DAG.getDataLayout()));
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
setValue(&I, DAG.getVScale(sdl, VT, APInt(VT.getSizeInBits(), 1)));
return;
}
case Intrinsic::vastart: visitVAStart(I); return;
case Intrinsic::vaend: visitVAEnd(I); return;
case Intrinsic::vacopy: visitVACopy(I); return;
case Intrinsic::returnaddress:
setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl,
TLI.getValueType(DAG.getDataLayout(), I.getType()),
getValue(I.getArgOperand(0))));
return;
case Intrinsic::addressofreturnaddress:
setValue(&I,
DAG.getNode(ISD::ADDROFRETURNADDR, sdl,
TLI.getValueType(DAG.getDataLayout(), I.getType())));
return;
case Intrinsic::sponentry:
setValue(&I,
DAG.getNode(ISD::SPONENTRY, sdl,
TLI.getValueType(DAG.getDataLayout(), I.getType())));
return;
case Intrinsic::frameaddress:
setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl,
TLI.getFrameIndexTy(DAG.getDataLayout()),
getValue(I.getArgOperand(0))));
return;
case Intrinsic::read_volatile_register:
case Intrinsic::read_register: {
Value *Reg = I.getArgOperand(0);
SDValue Chain = getRoot();
SDValue RegName =
DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
Res = DAG.getNode(ISD::READ_REGISTER, sdl,
DAG.getVTList(VT, MVT::Other), Chain, RegName);
setValue(&I, Res);
DAG.setRoot(Res.getValue(1));
return;
}
case Intrinsic::write_register: {
Value *Reg = I.getArgOperand(0);
Value *RegValue = I.getArgOperand(1);
SDValue Chain = getRoot();
SDValue RegName =
DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain,
RegName, getValue(RegValue)));
return;
}
case Intrinsic::memcpy: {
const auto &MCI = cast<MemCpyInst>(I);
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
// @llvm.memcpy defines 0 and 1 to both mean no alignment.
Align DstAlign = MCI.getDestAlign().valueOrOne();
Align SrcAlign = MCI.getSourceAlign().valueOrOne();
Align Alignment = std::min(DstAlign, SrcAlign);
bool isVol = MCI.isVolatile();
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
// FIXME: Support passing different dest/src alignments to the memcpy DAG
// node.
SDValue Root = isVol ? getRoot() : getMemoryRoot();
SDValue MC = DAG.getMemcpy(
Root, sdl, Op1, Op2, Op3, Alignment, isVol,
/* AlwaysInline */ false, isTC, MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata(), AA);
updateDAGForMaybeTailCall(MC);
return;
}
case Intrinsic::memcpy_inline: {
const auto &MCI = cast<MemCpyInlineInst>(I);
SDValue Dst = getValue(I.getArgOperand(0));
SDValue Src = getValue(I.getArgOperand(1));
SDValue Size = getValue(I.getArgOperand(2));
assert(isa<ConstantSDNode>(Size) && "memcpy_inline needs constant size");
// @llvm.memcpy.inline defines 0 and 1 to both mean no alignment.
Align DstAlign = MCI.getDestAlign().valueOrOne();
Align SrcAlign = MCI.getSourceAlign().valueOrOne();
Align Alignment = std::min(DstAlign, SrcAlign);
bool isVol = MCI.isVolatile();
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
// FIXME: Support passing different dest/src alignments to the memcpy DAG
// node.
SDValue MC = DAG.getMemcpy(
getRoot(), sdl, Dst, Src, Size, Alignment, isVol,
/* AlwaysInline */ true, isTC, MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata(), AA);
updateDAGForMaybeTailCall(MC);
return;
}
case Intrinsic::memset: {
const auto &MSI = cast<MemSetInst>(I);
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
// @llvm.memset defines 0 and 1 to both mean no alignment.
Align Alignment = MSI.getDestAlign().valueOrOne();
bool isVol = MSI.isVolatile();
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
SDValue Root = isVol ? getRoot() : getMemoryRoot();
SDValue MS = DAG.getMemset(
Root, sdl, Op1, Op2, Op3, Alignment, isVol, /* AlwaysInline */ false,
isTC, MachinePointerInfo(I.getArgOperand(0)), I.getAAMetadata());
updateDAGForMaybeTailCall(MS);
return;
}
case Intrinsic::memset_inline: {
const auto &MSII = cast<MemSetInlineInst>(I);
SDValue Dst = getValue(I.getArgOperand(0));
SDValue Value = getValue(I.getArgOperand(1));
SDValue Size = getValue(I.getArgOperand(2));
assert(isa<ConstantSDNode>(Size) && "memset_inline needs constant size");
// @llvm.memset defines 0 and 1 to both mean no alignment.
Align DstAlign = MSII.getDestAlign().valueOrOne();
bool isVol = MSII.isVolatile();
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
SDValue Root = isVol ? getRoot() : getMemoryRoot();
SDValue MC = DAG.getMemset(Root, sdl, Dst, Value, Size, DstAlign, isVol,
/* AlwaysInline */ true, isTC,
MachinePointerInfo(I.getArgOperand(0)),
I.getAAMetadata());
updateDAGForMaybeTailCall(MC);
return;
}
case Intrinsic::memmove: {
const auto &MMI = cast<MemMoveInst>(I);
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
// @llvm.memmove defines 0 and 1 to both mean no alignment.
Align DstAlign = MMI.getDestAlign().valueOrOne();
Align SrcAlign = MMI.getSourceAlign().valueOrOne();
Align Alignment = std::min(DstAlign, SrcAlign);
bool isVol = MMI.isVolatile();
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
// FIXME: Support passing different dest/src alignments to the memmove DAG
// node.
SDValue Root = isVol ? getRoot() : getMemoryRoot();
SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Alignment, isVol,
isTC, MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)),
I.getAAMetadata(), AA);
updateDAGForMaybeTailCall(MM);
return;
}
case Intrinsic::memcpy_element_unordered_atomic: {
const AtomicMemCpyInst &MI = cast<AtomicMemCpyInst>(I);
SDValue Dst = getValue(MI.getRawDest());
SDValue Src = getValue(MI.getRawSource());
SDValue Length = getValue(MI.getLength());
Type *LengthTy = MI.getLength()->getType();
unsigned ElemSz = MI.getElementSizeInBytes();
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
SDValue MC =
DAG.getAtomicMemcpy(getRoot(), sdl, Dst, Src, Length, LengthTy, ElemSz,
isTC, MachinePointerInfo(MI.getRawDest()),
MachinePointerInfo(MI.getRawSource()));
updateDAGForMaybeTailCall(MC);
return;
}
case Intrinsic::memmove_element_unordered_atomic: {
auto &MI = cast<AtomicMemMoveInst>(I);
SDValue Dst = getValue(MI.getRawDest());
SDValue Src = getValue(MI.getRawSource());
SDValue Length = getValue(MI.getLength());
Type *LengthTy = MI.getLength()->getType();
unsigned ElemSz = MI.getElementSizeInBytes();
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
SDValue MC =
DAG.getAtomicMemmove(getRoot(), sdl, Dst, Src, Length, LengthTy, ElemSz,
isTC, MachinePointerInfo(MI.getRawDest()),
MachinePointerInfo(MI.getRawSource()));
updateDAGForMaybeTailCall(MC);
return;
}
case Intrinsic::memset_element_unordered_atomic: {
auto &MI = cast<AtomicMemSetInst>(I);
SDValue Dst = getValue(MI.getRawDest());
SDValue Val = getValue(MI.getValue());
SDValue Length = getValue(MI.getLength());
Type *LengthTy = MI.getLength()->getType();
unsigned ElemSz = MI.getElementSizeInBytes();
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
SDValue MC =
DAG.getAtomicMemset(getRoot(), sdl, Dst, Val, Length, LengthTy, ElemSz,
isTC, MachinePointerInfo(MI.getRawDest()));
updateDAGForMaybeTailCall(MC);
return;
}
case Intrinsic::call_preallocated_setup: {
const CallBase *PreallocatedCall = FindPreallocatedCall(&I);
SDValue SrcValue = DAG.getSrcValue(PreallocatedCall);
SDValue Res = DAG.getNode(ISD::PREALLOCATED_SETUP, sdl, MVT::Other,
getRoot(), SrcValue);
setValue(&I, Res);
DAG.setRoot(Res);
return;
}
case Intrinsic::call_preallocated_arg: {
const CallBase *PreallocatedCall = FindPreallocatedCall(I.getOperand(0));
SDValue SrcValue = DAG.getSrcValue(PreallocatedCall);
SDValue Ops[3];
Ops[0] = getRoot();
Ops[1] = SrcValue;
Ops[2] = DAG.getTargetConstant(*cast<ConstantInt>(I.getArgOperand(1)), sdl,
MVT::i32); // arg index
SDValue Res = DAG.getNode(
ISD::PREALLOCATED_ARG, sdl,
DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Ops);
setValue(&I, Res);
DAG.setRoot(Res.getValue(1));
return;
}
case Intrinsic::dbg_addr:
case Intrinsic::dbg_declare: {
// Assume dbg.addr and dbg.declare can not currently use DIArgList, i.e.
// they are non-variadic.
const auto &DI = cast<DbgVariableIntrinsic>(I);
assert(!DI.hasArgList() && "Only dbg.value should currently use DIArgList");
DILocalVariable *Variable = DI.getVariable();
DIExpression *Expression = DI.getExpression();
dropDanglingDebugInfo(Variable, Expression);
assert(Variable && "Missing variable");
LLVM_DEBUG(dbgs() << "SelectionDAG visiting debug intrinsic: " << DI
<< "\n");
// Check if address has undef value.
const Value *Address = DI.getVariableLocationOp(0);
if (!Address || isa<UndefValue>(Address) ||
(Address->use_empty() && !isa<Argument>(Address))) {
LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI
<< " (bad/undef/unused-arg address)\n");
return;
}
bool isParameter = Variable->isParameter() || isa<Argument>(Address);
// Check if this variable can be described by a frame index, typically
// either as a static alloca or a byval parameter.
int FI = std::numeric_limits<int>::max();
if (const auto *AI =
dyn_cast<AllocaInst>(Address->stripInBoundsConstantOffsets())) {
if (AI->isStaticAlloca()) {
auto I = FuncInfo.StaticAllocaMap.find(AI);
if (I != FuncInfo.StaticAllocaMap.end())
FI = I->second;
}
} else if (const auto *Arg = dyn_cast<Argument>(
Address->stripInBoundsConstantOffsets())) {
FI = FuncInfo.getArgumentFrameIndex(Arg);
}
// llvm.dbg.addr is control dependent and always generates indirect
// DBG_VALUE instructions. llvm.dbg.declare is handled as a frame index in
// the MachineFunction variable table.
if (FI != std::numeric_limits<int>::max()) {
if (Intrinsic == Intrinsic::dbg_addr) {
SDDbgValue *SDV = DAG.getFrameIndexDbgValue(
Variable, Expression, FI, getRoot().getNode(), /*IsIndirect*/ true,
dl, SDNodeOrder);
DAG.AddDbgValue(SDV, isParameter);
} else {
LLVM_DEBUG(dbgs() << "Skipping " << DI
<< " (variable info stashed in MF side table)\n");
}
return;
}
SDValue &N = NodeMap[Address];
if (!N.getNode() && isa<Argument>(Address))
// Check unused arguments map.
N = UnusedArgNodeMap[Address];
SDDbgValue *SDV;
if (N.getNode()) {
if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
Address = BCI->getOperand(0);
// Parameters are handled specially.
auto FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
if (isParameter && FINode) {
// Byval parameter. We have a frame index at this point.
SDV =
DAG.getFrameIndexDbgValue(Variable, Expression, FINode->getIndex(),
/*IsIndirect*/ true, dl, SDNodeOrder);
} else if (isa<Argument>(Address)) {
// Address is an argument, so try to emit its dbg value using
// virtual register info from the FuncInfo.ValueMap.
EmitFuncArgumentDbgValue(Address, Variable, Expression, dl,
FuncArgumentDbgValueKind::Declare, N);
return;
} else {
SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
true, dl, SDNodeOrder);
}
DAG.AddDbgValue(SDV, isParameter);
} else {
// If Address is an argument then try to emit its dbg value using
// virtual register info from the FuncInfo.ValueMap.
if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl,
FuncArgumentDbgValueKind::Declare, N)) {
LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI
<< " (could not emit func-arg dbg_value)\n");
}
}
return;
}
case Intrinsic::dbg_label: {
const DbgLabelInst &DI = cast<DbgLabelInst>(I);
DILabel *Label = DI.getLabel();
assert(Label && "Missing label");
SDDbgLabel *SDV;
SDV = DAG.getDbgLabel(Label, dl, SDNodeOrder);
DAG.AddDbgLabel(SDV);
return;
}
case Intrinsic::dbg_value: {
const DbgValueInst &DI = cast<DbgValueInst>(I);
assert(DI.getVariable() && "Missing variable");
DILocalVariable *Variable = DI.getVariable();
DIExpression *Expression = DI.getExpression();
dropDanglingDebugInfo(Variable, Expression);
SmallVector<Value *, 4> Values(DI.getValues());
if (Values.empty())
return;
if (llvm::is_contained(Values, nullptr))
return;
bool IsVariadic = DI.hasArgList();
if (!handleDebugValue(Values, Variable, Expression, dl, DI.getDebugLoc(),
SDNodeOrder, IsVariadic))
addDanglingDebugInfo(&DI, dl, SDNodeOrder);
return;
}
case Intrinsic::eh_typeid_for: {
// Find the type id for the given typeinfo.
GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0));
unsigned TypeID = DAG.getMachineFunction().getTypeIDFor(GV);
Res = DAG.getConstant(TypeID, sdl, MVT::i32);
setValue(&I, Res);
return;
}
case Intrinsic::eh_return_i32:
case Intrinsic::eh_return_i64:
DAG.getMachineFunction().setCallsEHReturn(true);
DAG.setRoot(DAG.getNode(ISD::EH_RETURN, sdl,
MVT::Other,
getControlRoot(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1))));
return;
case Intrinsic::eh_unwind_init:
DAG.getMachineFunction().setCallsUnwindInit(true);
return;
case Intrinsic::eh_dwarf_cfa:
setValue(&I, DAG.getNode(ISD::EH_DWARF_CFA, sdl,
TLI.getPointerTy(DAG.getDataLayout()),
getValue(I.getArgOperand(0))));
return;
case Intrinsic::eh_sjlj_callsite: {
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(0));
assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!");
MMI.setCurrentCallSite(CI->getZExtValue());
return;
}
case Intrinsic::eh_sjlj_functioncontext: {
// Get and store the index of the function context.
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
AllocaInst *FnCtx =
cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts());
int FI = FuncInfo.StaticAllocaMap[FnCtx];
MFI.setFunctionContextIndex(FI);
return;
}
case Intrinsic::eh_sjlj_setjmp: {
SDValue Ops[2];
Ops[0] = getRoot();
Ops[1] = getValue(I.getArgOperand(0));
SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, sdl,
DAG.getVTList(MVT::i32, MVT::Other), Ops);
setValue(&I, Op.getValue(0));
DAG.setRoot(Op.getValue(1));
return;
}
case Intrinsic::eh_sjlj_longjmp:
DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other,
getRoot(), getValue(I.getArgOperand(0))));
return;
case Intrinsic::eh_sjlj_setup_dispatch:
DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other,
getRoot()));
return;
case Intrinsic::masked_gather:
visitMaskedGather(I);
return;
case Intrinsic::masked_load:
visitMaskedLoad(I);
return;
case Intrinsic::masked_scatter:
visitMaskedScatter(I);
return;
case Intrinsic::masked_store:
visitMaskedStore(I);
return;
case Intrinsic::masked_expandload:
visitMaskedLoad(I, true /* IsExpanding */);
return;
case Intrinsic::masked_compressstore:
visitMaskedStore(I, true /* IsCompressing */);
return;
case Intrinsic::powi:
setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), DAG));
return;
case Intrinsic::log:
setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
return;
case Intrinsic::log2:
setValue(&I,
expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
return;
case Intrinsic::log10:
setValue(&I,
expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
return;
case Intrinsic::exp:
setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
return;
case Intrinsic::exp2:
setValue(&I,
expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
return;
case Intrinsic::pow:
setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), DAG, TLI, Flags));
return;
case Intrinsic::sqrt:
case Intrinsic::fabs:
case Intrinsic::sin:
case Intrinsic::cos:
case Intrinsic::floor:
case Intrinsic::ceil:
case Intrinsic::trunc:
case Intrinsic::rint:
case Intrinsic::nearbyint:
case Intrinsic::round:
case Intrinsic::roundeven:
case Intrinsic::canonicalize: {
unsigned Opcode;
switch (Intrinsic) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
case Intrinsic::fabs: Opcode = ISD::FABS; break;
case Intrinsic::sin: Opcode = ISD::FSIN; break;
case Intrinsic::cos: Opcode = ISD::FCOS; break;
case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
case Intrinsic::rint: Opcode = ISD::FRINT; break;
case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
case Intrinsic::round: Opcode = ISD::FROUND; break;
case Intrinsic::roundeven: Opcode = ISD::FROUNDEVEN; break;
case Intrinsic::canonicalize: Opcode = ISD::FCANONICALIZE; break;
}
setValue(&I, DAG.getNode(Opcode, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)), Flags));
return;
}
case Intrinsic::lround:
case Intrinsic::llround:
case Intrinsic::lrint:
case Intrinsic::llrint: {
unsigned Opcode;
switch (Intrinsic) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::lround: Opcode = ISD::LROUND; break;
case Intrinsic::llround: Opcode = ISD::LLROUND; break;
case Intrinsic::lrint: Opcode = ISD::LRINT; break;
case Intrinsic::llrint: Opcode = ISD::LLRINT; break;
}
EVT RetVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
setValue(&I, DAG.getNode(Opcode, sdl, RetVT,
getValue(I.getArgOperand(0))));
return;
}
case Intrinsic::minnum:
setValue(&I, DAG.getNode(ISD::FMINNUM, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), Flags));
return;
case Intrinsic::maxnum:
setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), Flags));
return;
case Intrinsic::minimum:
setValue(&I, DAG.getNode(ISD::FMINIMUM, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), Flags));
return;
case Intrinsic::maximum:
setValue(&I, DAG.getNode(ISD::FMAXIMUM, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), Flags));
return;
case Intrinsic::copysign:
setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), Flags));
return;
case Intrinsic::arithmetic_fence: {
setValue(&I, DAG.getNode(ISD::ARITH_FENCE, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)), Flags));
return;
}
case Intrinsic::fma:
setValue(&I, DAG.getNode(
ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)),
getValue(I.getArgOperand(2)), Flags));
return;
#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
case Intrinsic::INTRINSIC:
#include "llvm/IR/ConstrainedOps.def"
visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I));
return;
#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
#include "llvm/IR/VPIntrinsics.def"
visitVectorPredicationIntrinsic(cast<VPIntrinsic>(I));
return;
case Intrinsic::fptrunc_round: {
// Get the last argument, the metadata and convert it to an integer in the
// call
Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(1))->getMetadata();
Optional<RoundingMode> RoundMode =
convertStrToRoundingMode(cast<MDString>(MD)->getString());
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
// Propagate fast-math-flags from IR to node(s).
SDNodeFlags Flags;
Flags.copyFMF(*cast<FPMathOperator>(&I));
SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
SDValue Result;
Result = DAG.getNode(
ISD::FPTRUNC_ROUND, sdl, VT, getValue(I.getArgOperand(0)),
DAG.getTargetConstant((int)*RoundMode, sdl,
TLI.getPointerTy(DAG.getDataLayout())));
setValue(&I, Result);
return;
}
case Intrinsic::fmuladd: {
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) {
setValue(&I, DAG.getNode(ISD::FMA, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)),
getValue(I.getArgOperand(2)), Flags));
} else {
// TODO: Intrinsic calls should have fast-math-flags.
SDValue Mul = DAG.getNode(
ISD::FMUL, sdl, getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), Flags);
SDValue Add = DAG.getNode(ISD::FADD, sdl,
getValue(I.getArgOperand(0)).getValueType(),
Mul, getValue(I.getArgOperand(2)), Flags);
setValue(&I, Add);
}
return;
}
case Intrinsic::convert_to_fp16:
setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16,
DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16,
getValue(I.getArgOperand(0)),
DAG.getTargetConstant(0, sdl,
MVT::i32))));
return;
case Intrinsic::convert_from_fp16:
setValue(&I, DAG.getNode(ISD::FP_EXTEND, sdl,
TLI.getValueType(DAG.getDataLayout(), I.getType()),
DAG.getNode(ISD::BITCAST, sdl, MVT::f16,
getValue(I.getArgOperand(0)))));
return;
case Intrinsic::fptosi_sat: {
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
setValue(&I, DAG.getNode(ISD::FP_TO_SINT_SAT, sdl, VT,
getValue(I.getArgOperand(0)),
DAG.getValueType(VT.getScalarType())));
return;
}
case Intrinsic::fptoui_sat: {
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
setValue(&I, DAG.getNode(ISD::FP_TO_UINT_SAT, sdl, VT,
getValue(I.getArgOperand(0)),
DAG.getValueType(VT.getScalarType())));
return;
}
case Intrinsic::set_rounding:
Res = DAG.getNode(ISD::SET_ROUNDING, sdl, MVT::Other,
{getRoot(), getValue(I.getArgOperand(0))});
setValue(&I, Res);
DAG.setRoot(Res.getValue(0));
return;
case Intrinsic::is_fpclass: {
const DataLayout DLayout = DAG.getDataLayout();
EVT DestVT = TLI.getValueType(DLayout, I.getType());
EVT ArgVT = TLI.getValueType(DLayout, I.getArgOperand(0)->getType());
unsigned Test = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
MachineFunction &MF = DAG.getMachineFunction();
const Function &F = MF.getFunction();
SDValue Op = getValue(I.getArgOperand(0));
SDNodeFlags Flags;
Flags.setNoFPExcept(
!F.getAttributes().hasFnAttr(llvm::Attribute::StrictFP));
// If ISD::IS_FPCLASS should be expanded, do it right now, because the
// expansion can use illegal types. Making expansion early allows
// legalizing these types prior to selection.
if (!TLI.isOperationLegalOrCustom(ISD::IS_FPCLASS, ArgVT)) {
SDValue Result = TLI.expandIS_FPCLASS(DestVT, Op, Test, Flags, sdl, DAG);
setValue(&I, Result);
return;
}
SDValue Check = DAG.getTargetConstant(Test, sdl, MVT::i32);
SDValue V = DAG.getNode(ISD::IS_FPCLASS, sdl, DestVT, {Op, Check}, Flags);
setValue(&I, V);
return;
}
case Intrinsic::pcmarker: {
SDValue Tmp = getValue(I.getArgOperand(0));
DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp));
return;
}
case Intrinsic::readcyclecounter: {
SDValue Op = getRoot();
Res = DAG.getNode(ISD::READCYCLECOUNTER, sdl,
DAG.getVTList(MVT::i64, MVT::Other), Op);
setValue(&I, Res);
DAG.setRoot(Res.getValue(1));
return;
}
case Intrinsic::bitreverse:
setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0))));
return;
case Intrinsic::bswap:
setValue(&I, DAG.getNode(ISD::BSWAP, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0))));
return;
case Intrinsic::cttz: {
SDValue Arg = getValue(I.getArgOperand(0));
ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
EVT Ty = Arg.getValueType();
setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF,
sdl, Ty, Arg));
return;
}
case Intrinsic::ctlz: {
SDValue Arg = getValue(I.getArgOperand(0));
ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
EVT Ty = Arg.getValueType();
setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF,
sdl, Ty, Arg));
return;
}
case Intrinsic::ctpop: {
SDValue Arg = getValue(I.getArgOperand(0));
EVT Ty = Arg.getValueType();
setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg));
return;
}
case Intrinsic::fshl:
case Intrinsic::fshr: {
bool IsFSHL = Intrinsic == Intrinsic::fshl;
SDValue X = getValue(I.getArgOperand(0));
SDValue Y = getValue(I.getArgOperand(1));
SDValue Z = getValue(I.getArgOperand(2));
EVT VT = X.getValueType();
if (X == Y) {
auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR;
setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z));
} else {
auto FunnelOpcode = IsFSHL ? ISD::FSHL : ISD::FSHR;
setValue(&I, DAG.getNode(FunnelOpcode, sdl, VT, X, Y, Z));
}
return;
}
case Intrinsic::sadd_sat: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::SADDSAT, sdl, Op1.getValueType(), Op1, Op2));
return;
}
case Intrinsic::uadd_sat: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::UADDSAT, sdl, Op1.getValueType(), Op1, Op2));
return;
}
case Intrinsic::ssub_sat: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::SSUBSAT, sdl, Op1.getValueType(), Op1, Op2));
return;
}
case Intrinsic::usub_sat: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::USUBSAT, sdl, Op1.getValueType(), Op1, Op2));
return;
}
case Intrinsic::sshl_sat: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::SSHLSAT, sdl, Op1.getValueType(), Op1, Op2));
return;
}
case Intrinsic::ushl_sat: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::USHLSAT, sdl, Op1.getValueType(), Op1, Op2));
return;
}
case Intrinsic::smul_fix:
case Intrinsic::umul_fix:
case Intrinsic::smul_fix_sat:
case Intrinsic::umul_fix_sat: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
setValue(&I, DAG.getNode(FixedPointIntrinsicToOpcode(Intrinsic), sdl,
Op1.getValueType(), Op1, Op2, Op3));
return;
}
case Intrinsic::sdiv_fix:
case Intrinsic::udiv_fix:
case Intrinsic::sdiv_fix_sat:
case Intrinsic::udiv_fix_sat: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
setValue(&I, expandDivFix(FixedPointIntrinsicToOpcode(Intrinsic), sdl,
Op1, Op2, Op3, DAG, TLI));
return;
}
case Intrinsic::smax: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::SMAX, sdl, Op1.getValueType(), Op1, Op2));
return;
}
case Intrinsic::smin: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::SMIN, sdl, Op1.getValueType(), Op1, Op2));
return;
}
case Intrinsic::umax: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::UMAX, sdl, Op1.getValueType(), Op1, Op2));
return;
}
case Intrinsic::umin: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::UMIN, sdl, Op1.getValueType(), Op1, Op2));
return;
}
case Intrinsic::abs: {
// TODO: Preserve "int min is poison" arg in SDAG?
SDValue Op1 = getValue(I.getArgOperand(0));
setValue(&I, DAG.getNode(ISD::ABS, sdl, Op1.getValueType(), Op1));
return;
}
case Intrinsic::stacksave: {
SDValue Op = getRoot();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
Res = DAG.getNode(ISD::STACKSAVE, sdl, DAG.getVTList(VT, MVT::Other), Op);
setValue(&I, Res);
DAG.setRoot(Res.getValue(1));
return;
}
case Intrinsic::stackrestore:
Res = getValue(I.getArgOperand(0));
DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res));
return;
case Intrinsic::get_dynamic_area_offset: {
SDValue Op = getRoot();
EVT PtrTy = TLI.getFrameIndexTy(DAG.getDataLayout());
EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
// Result type for @llvm.get.dynamic.area.offset should match PtrTy for
// target.
if (PtrTy.getFixedSizeInBits() < ResTy.getFixedSizeInBits())
report_fatal_error("Wrong result type for @llvm.get.dynamic.area.offset"
" intrinsic!");
Res = DAG.getNode(ISD::GET_DYNAMIC_AREA_OFFSET, sdl, DAG.getVTList(ResTy),
Op);
DAG.setRoot(Op);
setValue(&I, Res);
return;
}
case Intrinsic::stackguard: {
MachineFunction &MF = DAG.getMachineFunction();
const Module &M = *MF.getFunction().getParent();
SDValue Chain = getRoot();
if (TLI.useLoadStackGuardNode()) {
Res = getLoadStackGuard(DAG, sdl, Chain);
} else {
EVT PtrTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
const Value *Global = TLI.getSDagStackGuard(M);
Align Align = DAG.getDataLayout().getPrefTypeAlign(Global->getType());
Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global),
MachinePointerInfo(Global, 0), Align,
MachineMemOperand::MOVolatile);
}
if (TLI.useStackGuardXorFP())
Res = TLI.emitStackGuardXorFP(DAG, Res, sdl);
DAG.setRoot(Chain);
setValue(&I, Res);
return;
}
case Intrinsic::stackprotector: {
// Emit code into the DAG to store the stack guard onto the stack.
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
SDValue Src, Chain = getRoot();
if (TLI.useLoadStackGuardNode())
Src = getLoadStackGuard(DAG, sdl, Chain);
else
Src = getValue(I.getArgOperand(0)); // The guard's value.
AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
int FI = FuncInfo.StaticAllocaMap[Slot];
MFI.setStackProtectorIndex(FI);
EVT PtrTy = TLI.getFrameIndexTy(DAG.getDataLayout());
SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
// Store the stack protector onto the stack.
Res = DAG.getStore(
Chain, sdl, Src, FIN,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
MaybeAlign(), MachineMemOperand::MOVolatile);
setValue(&I, Res);
DAG.setRoot(Res);
return;
}
case Intrinsic::objectsize:
llvm_unreachable("llvm.objectsize.* should have been lowered already");
case Intrinsic::is_constant:
llvm_unreachable("llvm.is.constant.* should have been lowered already");
case Intrinsic::annotation:
case Intrinsic::ptr_annotation:
case Intrinsic::launder_invariant_group:
case Intrinsic::strip_invariant_group:
// Drop the intrinsic, but forward the value
setValue(&I, getValue(I.getOperand(0)));
return;
case Intrinsic::assume:
case Intrinsic::experimental_noalias_scope_decl:
case Intrinsic::var_annotation:
case Intrinsic::sideeffect:
// Discard annotate attributes, noalias scope declarations, assumptions, and
// artificial side-effects.
return;
case Intrinsic::codeview_annotation: {
// Emit a label associated with this metadata.
MachineFunction &MF = DAG.getMachineFunction();
MCSymbol *Label =
MF.getMMI().getContext().createTempSymbol("annotation", true);
Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(0))->getMetadata();
MF.addCodeViewAnnotation(Label, cast<MDNode>(MD));
Res = DAG.getLabelNode(ISD::ANNOTATION_LABEL, sdl, getRoot(), Label);
DAG.setRoot(Res);
return;
}
case Intrinsic::init_trampoline: {
const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts());
SDValue Ops[6];
Ops[0] = getRoot();
Ops[1] = getValue(I.getArgOperand(0));
Ops[2] = getValue(I.getArgOperand(1));
Ops[3] = getValue(I.getArgOperand(2));
Ops[4] = DAG.getSrcValue(I.getArgOperand(0));
Ops[5] = DAG.getSrcValue(F);
Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops);
DAG.setRoot(Res);
return;
}
case Intrinsic::adjust_trampoline:
setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl,
TLI.getPointerTy(DAG.getDataLayout()),
getValue(I.getArgOperand(0))));
return;
case Intrinsic::gcroot: {
assert(DAG.getMachineFunction().getFunction().hasGC() &&
"only valid in functions with gc specified, enforced by Verifier");
assert(GFI && "implied by previous");
const Value *Alloca = I.getArgOperand(0)->stripPointerCasts();
const Constant *TypeMap = cast<Constant>(I.getArgOperand(1));
FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
GFI->addStackRoot(FI->getIndex(), TypeMap);
return;
}
case Intrinsic::gcread:
case Intrinsic::gcwrite:
llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
case Intrinsic::flt_rounds:
Res = DAG.getNode(ISD::FLT_ROUNDS_, sdl, {MVT::i32, MVT::Other}, getRoot());
setValue(&I, Res);
DAG.setRoot(Res.getValue(1));
return;
case Intrinsic::expect:
// Just replace __builtin_expect(exp, c) with EXP.
setValue(&I, getValue(I.getArgOperand(0)));
return;
case Intrinsic::ubsantrap:
case Intrinsic::debugtrap:
case Intrinsic::trap: {
StringRef TrapFuncName =
I.getAttributes().getFnAttr("trap-func-name").getValueAsString();
if (TrapFuncName.empty()) {
switch (Intrinsic) {
case Intrinsic::trap:
DAG.setRoot(DAG.getNode(ISD::TRAP, sdl, MVT::Other, getRoot()));
break;
case Intrinsic::debugtrap:
DAG.setRoot(DAG.getNode(ISD::DEBUGTRAP, sdl, MVT::Other, getRoot()));
break;
case Intrinsic::ubsantrap:
DAG.setRoot(DAG.getNode(
ISD::UBSANTRAP, sdl, MVT::Other, getRoot(),
DAG.getTargetConstant(
cast<ConstantInt>(I.getArgOperand(0))->getZExtValue(), sdl,
MVT::i32)));
break;
default: llvm_unreachable("unknown trap intrinsic");
}
return;
}
TargetLowering::ArgListTy Args;
if (Intrinsic == Intrinsic::ubsantrap) {
Args.push_back(TargetLoweringBase::ArgListEntry());
Args[0].Val = I.getArgOperand(0);
Args[0].Node = getValue(Args[0].Val);
Args[0].Ty = Args[0].Val->getType();
}
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
CallingConv::C, I.getType(),
DAG.getExternalSymbol(TrapFuncName.data(),
TLI.getPointerTy(DAG.getDataLayout())),
std::move(Args));
std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
DAG.setRoot(Result.second);
return;
}
case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow:
case Intrinsic::usub_with_overflow:
case Intrinsic::ssub_with_overflow:
case Intrinsic::umul_with_overflow:
case Intrinsic::smul_with_overflow: {
ISD::NodeType Op;
switch (Intrinsic) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break;
case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break;
case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break;
case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break;
case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break;
case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break;
}
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
EVT ResultVT = Op1.getValueType();
EVT OverflowVT = MVT::i1;
if (ResultVT.isVector())
OverflowVT = EVT::getVectorVT(
*Context, OverflowVT, ResultVT.getVectorElementCount());
SDVTList VTs = DAG.getVTList(ResultVT, OverflowVT);
setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2));
return;
}
case Intrinsic::prefetch: {
SDValue Ops[5];
unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
auto Flags = rw == 0 ? MachineMemOperand::MOLoad :MachineMemOperand::MOStore;
Ops[0] = DAG.getRoot();
Ops[1] = getValue(I.getArgOperand(0));
Ops[2] = getValue(I.getArgOperand(1));
Ops[3] = getValue(I.getArgOperand(2));
Ops[4] = getValue(I.getArgOperand(3));
SDValue Result = DAG.getMemIntrinsicNode(
ISD::PREFETCH, sdl, DAG.getVTList(MVT::Other), Ops,
EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)),
/* align */ None, Flags);
// Chain the prefetch in parallell with any pending loads, to stay out of
// the way of later optimizations.
PendingLoads.push_back(Result);
Result = getRoot();
DAG.setRoot(Result);
return;
}
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end: {
bool IsStart = (Intrinsic == Intrinsic::lifetime_start);
// Stack coloring is not enabled in O0, discard region information.
if (TM.getOptLevel() == CodeGenOpt::None)
return;
const int64_t ObjectSize =
cast<ConstantInt>(I.getArgOperand(0))->getSExtValue();
Value *const ObjectPtr = I.getArgOperand(1);
SmallVector<const Value *, 4> Allocas;
getUnderlyingObjects(ObjectPtr, Allocas);
for (const Value *Alloca : Allocas) {
const AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(Alloca);
// Could not find an Alloca.
if (!LifetimeObject)
continue;
// First check that the Alloca is static, otherwise it won't have a
// valid frame index.
auto SI = FuncInfo.StaticAllocaMap.find(LifetimeObject);
if (SI == FuncInfo.StaticAllocaMap.end())
return;
const int FrameIndex = SI->second;
int64_t Offset;
if (GetPointerBaseWithConstantOffset(
ObjectPtr, Offset, DAG.getDataLayout()) != LifetimeObject)
Offset = -1; // Cannot determine offset from alloca to lifetime object.
Res = DAG.getLifetimeNode(IsStart, sdl, getRoot(), FrameIndex, ObjectSize,
Offset);
DAG.setRoot(Res);
}
return;
}
case Intrinsic::pseudoprobe: {
auto Guid = cast<ConstantInt>(I.getArgOperand(0))->getZExtValue();
auto Index = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
auto Attr = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
Res = DAG.getPseudoProbeNode(sdl, getRoot(), Guid, Index, Attr);
DAG.setRoot(Res);
return;
}
case Intrinsic::invariant_start:
// Discard region information.
setValue(&I,
DAG.getUNDEF(TLI.getValueType(DAG.getDataLayout(), I.getType())));
return;
case Intrinsic::invariant_end:
// Discard region information.
return;
case Intrinsic::clear_cache:
/// FunctionName may be null.
if (const char *FunctionName = TLI.getClearCacheBuiltinName())
lowerCallToExternalSymbol(I, FunctionName);
return;
case Intrinsic::donothing:
case Intrinsic::seh_try_begin:
case Intrinsic::seh_scope_begin:
case Intrinsic::seh_try_end:
case Intrinsic::seh_scope_end:
// ignore
return;
case Intrinsic::experimental_stackmap:
visitStackmap(I);
return;
case Intrinsic::experimental_patchpoint_void:
case Intrinsic::experimental_patchpoint_i64:
visitPatchpoint(I);
return;
case Intrinsic::experimental_gc_statepoint:
LowerStatepoint(cast<GCStatepointInst>(I));
return;
case Intrinsic::experimental_gc_result:
visitGCResult(cast<GCResultInst>(I));
return;
case Intrinsic::experimental_gc_relocate:
visitGCRelocate(cast<GCRelocateInst>(I));
return;
case Intrinsic::instrprof_cover:
llvm_unreachable("instrprof failed to lower a cover");
case Intrinsic::instrprof_increment:
llvm_unreachable("instrprof failed to lower an increment");
case Intrinsic::instrprof_value_profile:
llvm_unreachable("instrprof failed to lower a value profiling call");
case Intrinsic::localescape: {
MachineFunction &MF = DAG.getMachineFunction();
const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
// Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
// is the same on all targets.
for (unsigned Idx = 0, E = I.arg_size(); Idx < E; ++Idx) {
Value *Arg = I.getArgOperand(Idx)->stripPointerCasts();
if (isa<ConstantPointerNull>(Arg))
continue; // Skip null pointers. They represent a hole in index space.
AllocaInst *Slot = cast<AllocaInst>(Arg);
assert(FuncInfo.StaticAllocaMap.count(Slot) &&
"can only escape static allocas");
int FI = FuncInfo.StaticAllocaMap[Slot];
MCSymbol *FrameAllocSym =
MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
GlobalValue::dropLLVMManglingEscape(MF.getName()), Idx);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl,
TII->get(TargetOpcode::LOCAL_ESCAPE))
.addSym(FrameAllocSym)
.addFrameIndex(FI);
}
return;
}
case Intrinsic::localrecover: {
// i8* @llvm.localrecover(i8* %fn, i8* %fp, i32 %idx)
MachineFunction &MF = DAG.getMachineFunction();
// Get the symbol that defines the frame offset.
auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts());
auto *Idx = cast<ConstantInt>(I.getArgOperand(2));
unsigned IdxVal =
unsigned(Idx->getLimitedValue(std::numeric_limits<int>::max()));
MCSymbol *FrameAllocSym =
MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
GlobalValue::dropLLVMManglingEscape(Fn->getName()), IdxVal);
Value *FP = I.getArgOperand(1);
SDValue FPVal = getValue(FP);
EVT PtrVT = FPVal.getValueType();
// Create a MCSymbol for the label to avoid any target lowering
// that would make this PC relative.
SDValue OffsetSym = DAG.getMCSymbol(FrameAllocSym, PtrVT);
SDValue OffsetVal =
DAG.getNode(ISD::LOCAL_RECOVER, sdl, PtrVT, OffsetSym);
// Add the offset to the FP.
SDValue Add = DAG.getMemBasePlusOffset(FPVal, OffsetVal, sdl);
setValue(&I, Add);
return;
}
case Intrinsic::eh_exceptionpointer:
case Intrinsic::eh_exceptioncode: {
// Get the exception pointer vreg, copy from it, and resize it to fit.
const auto *CPI = cast<CatchPadInst>(I.getArgOperand(0));
MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT);
unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, PtrRC);
SDValue N = DAG.getCopyFromReg(DAG.getEntryNode(), sdl, VReg, PtrVT);
if (Intrinsic == Intrinsic::eh_exceptioncode)
N = DAG.getZExtOrTrunc(N, sdl, MVT::i32);
setValue(&I, N);
return;
}
case Intrinsic::xray_customevent: {
// Here we want to make sure that the intrinsic behaves as if it has a
// specific calling convention, and only for x86_64.
// FIXME: Support other platforms later.
const auto &Triple = DAG.getTarget().getTargetTriple();
if (Triple.getArch() != Triple::x86_64)
return;
SmallVector<SDValue, 8> Ops;
// We want to say that we always want the arguments in registers.
SDValue LogEntryVal = getValue(I.getArgOperand(0));
SDValue StrSizeVal = getValue(I.getArgOperand(1));
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Chain = getRoot();
Ops.push_back(LogEntryVal);
Ops.push_back(StrSizeVal);
Ops.push_back(Chain);
// We need to enforce the calling convention for the callsite, so that
// argument ordering is enforced correctly, and that register allocation can
// see that some registers may be assumed clobbered and have to preserve
// them across calls to the intrinsic.
MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHABLE_EVENT_CALL,
sdl, NodeTys, Ops);
SDValue patchableNode = SDValue(MN, 0);
DAG.setRoot(patchableNode);
setValue(&I, patchableNode);
return;
}
case Intrinsic::xray_typedevent: {
// Here we want to make sure that the intrinsic behaves as if it has a
// specific calling convention, and only for x86_64.
// FIXME: Support other platforms later.
const auto &Triple = DAG.getTarget().getTargetTriple();
if (Triple.getArch() != Triple::x86_64)
return;
SmallVector<SDValue, 8> Ops;
// We want to say that we always want the arguments in registers.
// It's unclear to me how manipulating the selection DAG here forces callers
// to provide arguments in registers instead of on the stack.
SDValue LogTypeId = getValue(I.getArgOperand(0));
SDValue LogEntryVal = getValue(I.getArgOperand(1));
SDValue StrSizeVal = getValue(I.getArgOperand(2));
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Chain = getRoot();
Ops.push_back(LogTypeId);
Ops.push_back(LogEntryVal);
Ops.push_back(StrSizeVal);
Ops.push_back(Chain);
// We need to enforce the calling convention for the callsite, so that
// argument ordering is enforced correctly, and that register allocation can
// see that some registers may be assumed clobbered and have to preserve
// them across calls to the intrinsic.
MachineSDNode *MN = DAG.getMachineNode(
TargetOpcode::PATCHABLE_TYPED_EVENT_CALL, sdl, NodeTys, Ops);
SDValue patchableNode = SDValue(MN, 0);
DAG.setRoot(patchableNode);
setValue(&I, patchableNode);
return;
}
case Intrinsic::experimental_deoptimize:
LowerDeoptimizeCall(&I);
return;
case Intrinsic::experimental_stepvector:
visitStepVector(I);
return;
case Intrinsic::vector_reduce_fadd:
case Intrinsic::vector_reduce_fmul:
case Intrinsic::vector_reduce_add:
case Intrinsic::vector_reduce_mul:
case Intrinsic::vector_reduce_and:
case Intrinsic::vector_reduce_or:
case Intrinsic::vector_reduce_xor:
case Intrinsic::vector_reduce_smax:
case Intrinsic::vector_reduce_smin:
case Intrinsic::vector_reduce_umax:
case Intrinsic::vector_reduce_umin:
case Intrinsic::vector_reduce_fmax:
case Intrinsic::vector_reduce_fmin:
visitVectorReduce(I, Intrinsic);
return;
case Intrinsic::icall_branch_funnel: {
SmallVector<SDValue, 16> Ops;
Ops.push_back(getValue(I.getArgOperand(0)));
int64_t Offset;
auto *Base = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset(
I.getArgOperand(1), Offset, DAG.getDataLayout()));
if (!Base)
report_fatal_error(
"llvm.icall.branch.funnel operand must be a GlobalValue");
Ops.push_back(DAG.getTargetGlobalAddress(Base, sdl, MVT::i64, 0));
struct BranchFunnelTarget {
int64_t Offset;
SDValue Target;
};
SmallVector<BranchFunnelTarget, 8> Targets;
for (unsigned Op = 1, N = I.arg_size(); Op != N; Op += 2) {
auto *ElemBase = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset(
I.getArgOperand(Op), Offset, DAG.getDataLayout()));
if (ElemBase != Base)
report_fatal_error("all llvm.icall.branch.funnel operands must refer "
"to the same GlobalValue");
SDValue Val = getValue(I.getArgOperand(Op + 1));
auto *GA = dyn_cast<GlobalAddressSDNode>(Val);
if (!GA)
report_fatal_error(
"llvm.icall.branch.funnel operand must be a GlobalValue");
Targets.push_back({Offset, DAG.getTargetGlobalAddress(
GA->getGlobal(), sdl, Val.getValueType(),
GA->getOffset())});
}
llvm::sort(Targets,
[](const BranchFunnelTarget &T1, const BranchFunnelTarget &T2) {
return T1.Offset < T2.Offset;
});
for (auto &T : Targets) {
Ops.push_back(DAG.getTargetConstant(T.Offset, sdl, MVT::i32));
Ops.push_back(T.Target);
}
Ops.push_back(DAG.getRoot()); // Chain
SDValue N(DAG.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL, sdl,
MVT::Other, Ops),
0);
DAG.setRoot(N);
setValue(&I, N);
HasTailCall = true;
return;
}
case Intrinsic::wasm_landingpad_index:
// Information this intrinsic contained has been transferred to
// MachineFunction in SelectionDAGISel::PrepareEHLandingPad. We can safely
// delete it now.
return;
case Intrinsic::aarch64_settag:
case Intrinsic::aarch64_settag_zero: {
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
bool ZeroMemory = Intrinsic == Intrinsic::aarch64_settag_zero;
SDValue Val = TSI.EmitTargetCodeForSetTag(
DAG, sdl, getRoot(), getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), MachinePointerInfo(I.getArgOperand(0)),
ZeroMemory);
DAG.setRoot(Val);
setValue(&I, Val);
return;
}
case Intrinsic::ptrmask: {
SDValue Ptr = getValue(I.getOperand(0));
SDValue Const = getValue(I.getOperand(1));
EVT PtrVT = Ptr.getValueType();
setValue(&I, DAG.getNode(ISD::AND, sdl, PtrVT, Ptr,
DAG.getZExtOrTrunc(Const, sdl, PtrVT)));
return;
}
case Intrinsic::get_active_lane_mask: {
EVT CCVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
SDValue Index = getValue(I.getOperand(0));
EVT ElementVT = Index.getValueType();
if (!TLI.shouldExpandGetActiveLaneMask(CCVT, ElementVT)) {
visitTargetIntrinsic(I, Intrinsic);
return;
}
SDValue TripCount = getValue(I.getOperand(1));
auto VecTy = CCVT.changeVectorElementType(ElementVT);
SDValue VectorIndex, VectorTripCount;
if (VecTy.isScalableVector()) {
VectorIndex = DAG.getSplatVector(VecTy, sdl, Index);
VectorTripCount = DAG.getSplatVector(VecTy, sdl, TripCount);
} else {
VectorIndex = DAG.getSplatBuildVector(VecTy, sdl, Index);
VectorTripCount = DAG.getSplatBuildVector(VecTy, sdl, TripCount);
}
SDValue VectorStep = DAG.getStepVector(sdl, VecTy);
SDValue VectorInduction = DAG.getNode(
ISD::UADDSAT, sdl, VecTy, VectorIndex, VectorStep);
SDValue SetCC = DAG.getSetCC(sdl, CCVT, VectorInduction,
VectorTripCount, ISD::CondCode::SETULT);
setValue(&I, SetCC);
return;
}
case Intrinsic::vector_insert: {
SDValue Vec = getValue(I.getOperand(0));
SDValue SubVec = getValue(I.getOperand(1));
SDValue Index = getValue(I.getOperand(2));
// The intrinsic's index type is i64, but the SDNode requires an index type
// suitable for the target. Convert the index as required.
MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
if (Index.getValueType() != VectorIdxTy)
Index = DAG.getVectorIdxConstant(
cast<ConstantSDNode>(Index)->getZExtValue(), sdl);
EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
setValue(&I, DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, ResultVT, Vec, SubVec,
Index));
return;
}
case Intrinsic::vector_extract: {
SDValue Vec = getValue(I.getOperand(0));
SDValue Index = getValue(I.getOperand(1));
EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
// The intrinsic's index type is i64, but the SDNode requires an index type
// suitable for the target. Convert the index as required.
MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
if (Index.getValueType() != VectorIdxTy)
Index = DAG.getVectorIdxConstant(
cast<ConstantSDNode>(Index)->getZExtValue(), sdl);
setValue(&I,
DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, ResultVT, Vec, Index));
return;
}
case Intrinsic::experimental_vector_reverse:
visitVectorReverse(I);
return;
case Intrinsic::experimental_vector_splice:
visitVectorSplice(I);
return;
}
}
void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
const ConstrainedFPIntrinsic &FPI) {
SDLoc sdl = getCurSDLoc();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(TLI, DAG.getDataLayout(), FPI.getType(), ValueVTs);
ValueVTs.push_back(MVT::Other); // Out chain
// We do not need to serialize constrained FP intrinsics against
// each other or against (nonvolatile) loads, so they can be
// chained like loads.
SDValue Chain = DAG.getRoot();
SmallVector<SDValue, 4> Opers;
Opers.push_back(Chain);
if (FPI.isUnaryOp()) {
Opers.push_back(getValue(FPI.getArgOperand(0)));
} else if (FPI.isTernaryOp()) {
Opers.push_back(getValue(FPI.getArgOperand(0)));
Opers.push_back(getValue(FPI.getArgOperand(1)));
Opers.push_back(getValue(FPI.getArgOperand(2)));
} else {
Opers.push_back(getValue(FPI.getArgOperand(0)));
Opers.push_back(getValue(FPI.getArgOperand(1)));
}
auto pushOutChain = [this](SDValue Result, fp::ExceptionBehavior EB) {
assert(Result.getNode()->getNumValues() == 2);
// Push node to the appropriate list so that future instructions can be
// chained up correctly.
SDValue OutChain = Result.getValue(1);
switch (EB) {
case fp::ExceptionBehavior::ebIgnore:
// The only reason why ebIgnore nodes still need to be chained is that
// they might depend on the current rounding mode, and therefore must
// not be moved across instruction that may change that mode.
LLVM_FALLTHROUGH;
case fp::ExceptionBehavior::ebMayTrap:
// These must not be moved across calls or instructions that may change
// floating-point exception masks.
PendingConstrainedFP.push_back(OutChain);
break;
case fp::ExceptionBehavior::ebStrict:
// These must not be moved across calls or instructions that may change
// floating-point exception masks or read floating-point exception flags.
// In addition, they cannot be optimized out even if unused.
PendingConstrainedFPStrict.push_back(OutChain);
break;
}
};
SDVTList VTs = DAG.getVTList(ValueVTs);
fp::ExceptionBehavior EB = *FPI.getExceptionBehavior();
SDNodeFlags Flags;
if (EB == fp::ExceptionBehavior::ebIgnore)
Flags.setNoFPExcept(true);
if (auto *FPOp = dyn_cast<FPMathOperator>(&FPI))
Flags.copyFMF(*FPOp);
unsigned Opcode;
switch (FPI.getIntrinsicID()) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
case Intrinsic::INTRINSIC: \
Opcode = ISD::STRICT_##DAGN; \
break;
#include "llvm/IR/ConstrainedOps.def"
case Intrinsic::experimental_constrained_fmuladd: {
Opcode = ISD::STRICT_FMA;
// Break fmuladd into fmul and fadd.
if (TM.Options.AllowFPOpFusion == FPOpFusion::Strict ||
!TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(),
ValueVTs[0])) {
Opers.pop_back();
SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, sdl, VTs, Opers, Flags);
pushOutChain(Mul, EB);
Opcode = ISD::STRICT_FADD;
Opers.clear();
Opers.push_back(Mul.getValue(1));
Opers.push_back(Mul.getValue(0));
Opers.push_back(getValue(FPI.getArgOperand(2)));
}
break;
}
}
// A few strict DAG nodes carry additional operands that are not
// set up by the default code above.
switch (Opcode) {
default: break;
case ISD::STRICT_FP_ROUND:
Opers.push_back(
DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())));
break;
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS: {
auto *FPCmp = dyn_cast<ConstrainedFPCmpIntrinsic>(&FPI);
ISD::CondCode Condition = getFCmpCondCode(FPCmp->getPredicate());
if (TM.Options.NoNaNsFPMath)
Condition = getFCmpCodeWithoutNaN(Condition);
Opers.push_back(DAG.getCondCode(Condition));
break;
}
}
SDValue Result = DAG.getNode(Opcode, sdl, VTs, Opers, Flags);
pushOutChain(Result, EB);
SDValue FPResult = Result.getValue(0);
setValue(&FPI, FPResult);
}
static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {
Optional<unsigned> ResOPC;
switch (VPIntrin.getIntrinsicID()) {
#define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD) \
case Intrinsic::VPID: \
ResOPC = ISD::VPSD; \
break;
#include "llvm/IR/VPIntrinsics.def"
}
if (!ResOPC)
llvm_unreachable(
"Inconsistency: no SDNode available for this VPIntrinsic!");
if (*ResOPC == ISD::VP_REDUCE_SEQ_FADD ||
*ResOPC == ISD::VP_REDUCE_SEQ_FMUL) {
if (VPIntrin.getFastMathFlags().allowReassoc())
return *ResOPC == ISD::VP_REDUCE_SEQ_FADD ? ISD::VP_REDUCE_FADD
: ISD::VP_REDUCE_FMUL;
}
return *ResOPC;
}
void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT,
SmallVector<SDValue, 7> &OpValues,
bool IsGather) {
SDLoc DL = getCurSDLoc();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
Value *PtrOperand = VPIntrin.getArgOperand(0);
MaybeAlign Alignment = VPIntrin.getPointerAlignment();
AAMDNodes AAInfo = VPIntrin.getAAMetadata();
const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range);
SDValue LD;
bool AddToChain = true;
if (!IsGather) {
// Do not serialize variable-length loads of constant memory with
// anything.
if (!Alignment)
Alignment = DAG.getEVTAlign(VT);
MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
AddToChain = !AA || !AA->pointsToConstantMemory(ML);
SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
LD = DAG.getLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2],
MMO, false /*IsExpanding */);
} else {
if (!Alignment)
Alignment = DAG.getEVTAlign(VT.getScalarType());
unsigned AS =
PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(AS), MachineMemOperand::MOLoad,
MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
SDValue Base, Index, Scale;
ISD::MemIndexType IndexType;
bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
this, VPIntrin.getParent(),
VT.getScalarStoreSize());
if (!UniformBase) {
Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(PtrOperand);
IndexType = ISD::SIGNED_SCALED;
Scale =
DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()));
}
EVT IdxVT = Index.getValueType();
EVT EltTy = IdxVT.getVectorElementType();
if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index);
}
LD = DAG.getGatherVP(
DAG.getVTList(VT, MVT::Other), VT, DL,
{DAG.getRoot(), Base, Index, Scale, OpValues[1], OpValues[2]}, MMO,
IndexType);
}
if (AddToChain)
PendingLoads.push_back(LD.getValue(1));
setValue(&VPIntrin, LD);
}
void SelectionDAGBuilder::visitVPStoreScatter(const VPIntrinsic &VPIntrin,
SmallVector<SDValue, 7> &OpValues,
bool IsScatter) {
SDLoc DL = getCurSDLoc();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
Value *PtrOperand = VPIntrin.getArgOperand(1);
EVT VT = OpValues[0].getValueType();
MaybeAlign Alignment = VPIntrin.getPointerAlignment();
AAMDNodes AAInfo = VPIntrin.getAAMetadata();
SDValue ST;
if (!IsScatter) {
if (!Alignment)
Alignment = DAG.getEVTAlign(VT);
SDValue Ptr = OpValues[1];
SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
MemoryLocation::UnknownSize, *Alignment, AAInfo);
ST = DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], Ptr, Offset,
OpValues[2], OpValues[3], VT, MMO, ISD::UNINDEXED,
/* IsTruncating */ false, /*IsCompressing*/ false);
} else {
if (!Alignment)
Alignment = DAG.getEVTAlign(VT.getScalarType());
unsigned AS =
PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(AS), MachineMemOperand::MOStore,
MemoryLocation::UnknownSize, *Alignment, AAInfo);
SDValue Base, Index, Scale;
ISD::MemIndexType IndexType;
bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
this, VPIntrin.getParent(),
VT.getScalarStoreSize());
if (!UniformBase) {
Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(PtrOperand);
IndexType = ISD::SIGNED_SCALED;
Scale =
DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()));
}
EVT IdxVT = Index.getValueType();
EVT EltTy = IdxVT.getVectorElementType();
if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index);
}
ST = DAG.getScatterVP(DAG.getVTList(MVT::Other), VT, DL,
{getMemoryRoot(), OpValues[0], Base, Index, Scale,
OpValues[2], OpValues[3]},
MMO, IndexType);
}
DAG.setRoot(ST);
setValue(&VPIntrin, ST);
}
void SelectionDAGBuilder::visitVPStridedLoad(
const VPIntrinsic &VPIntrin, EVT VT, SmallVectorImpl<SDValue> &OpValues) {
SDLoc DL = getCurSDLoc();
Value *PtrOperand = VPIntrin.getArgOperand(0);
MaybeAlign Alignment = VPIntrin.getPointerAlignment();
if (!Alignment)
Alignment = DAG.getEVTAlign(VT.getScalarType());
AAMDNodes AAInfo = VPIntrin.getAAMetadata();
const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range);
MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
bool AddToChain = !AA || !AA->pointsToConstantMemory(ML);
SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
SDValue LD = DAG.getStridedLoadVP(VT, DL, InChain, OpValues[0], OpValues[1],
OpValues[2], OpValues[3], MMO,
false /*IsExpanding*/);
if (AddToChain)
PendingLoads.push_back(LD.getValue(1));
setValue(&VPIntrin, LD);
}
void SelectionDAGBuilder::visitVPStridedStore(
const VPIntrinsic &VPIntrin, SmallVectorImpl<SDValue> &OpValues) {
SDLoc DL = getCurSDLoc();
Value *PtrOperand = VPIntrin.getArgOperand(1);
EVT VT = OpValues[0].getValueType();
MaybeAlign Alignment = VPIntrin.getPointerAlignment();
if (!Alignment)
Alignment = DAG.getEVTAlign(VT.getScalarType());
AAMDNodes AAInfo = VPIntrin.getAAMetadata();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
MemoryLocation::UnknownSize, *Alignment, AAInfo);
SDValue ST = DAG.getStridedStoreVP(
getMemoryRoot(), DL, OpValues[0], OpValues[1],
DAG.getUNDEF(OpValues[1].getValueType()), OpValues[2], OpValues[3],
OpValues[4], VT, MMO, ISD::UNINDEXED, /*IsTruncating*/ false,
/*IsCompressing*/ false);
DAG.setRoot(ST);
setValue(&VPIntrin, ST);
}
void SelectionDAGBuilder::visitVPCmp(const VPCmpIntrinsic &VPIntrin) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDLoc DL = getCurSDLoc();
ISD::CondCode Condition;
CmpInst::Predicate CondCode = VPIntrin.getPredicate();
bool IsFP = VPIntrin.getOperand(0)->getType()->isFPOrFPVectorTy();
if (IsFP) {
// FIXME: Regular fcmps are FPMathOperators which may have fast-math (nnan)
// flags, but calls that don't return floating-point types can't be
// FPMathOperators, like vp.fcmp. This affects constrained fcmp too.
Condition = getFCmpCondCode(CondCode);
if (TM.Options.NoNaNsFPMath)
Condition = getFCmpCodeWithoutNaN(Condition);
} else {
Condition = getICmpCondCode(CondCode);
}
SDValue Op1 = getValue(VPIntrin.getOperand(0));
SDValue Op2 = getValue(VPIntrin.getOperand(1));
// #2 is the condition code
SDValue MaskOp = getValue(VPIntrin.getOperand(3));
SDValue EVL = getValue(VPIntrin.getOperand(4));
MVT EVLParamVT = TLI.getVPExplicitVectorLengthTy();
assert(EVLParamVT.isScalarInteger() && EVLParamVT.bitsGE(MVT::i32) &&
"Unexpected target EVL type");
EVL = DAG.getNode(ISD::ZERO_EXTEND, DL, EVLParamVT, EVL);
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
VPIntrin.getType());
setValue(&VPIntrin,
DAG.getSetCCVP(DL, DestVT, Op1, Op2, Condition, MaskOp, EVL));
}
void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
const VPIntrinsic &VPIntrin) {
SDLoc DL = getCurSDLoc();
unsigned Opcode = getISDForVPIntrinsic(VPIntrin);
auto IID = VPIntrin.getIntrinsicID();
if (const auto *CmpI = dyn_cast<VPCmpIntrinsic>(&VPIntrin))
return visitVPCmp(*CmpI);
SmallVector<EVT, 4> ValueVTs;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
ComputeValueVTs(TLI, DAG.getDataLayout(), VPIntrin.getType(), ValueVTs);
SDVTList VTs = DAG.getVTList(ValueVTs);
auto EVLParamPos = VPIntrinsic::getVectorLengthParamPos(IID);
MVT EVLParamVT = TLI.getVPExplicitVectorLengthTy();
assert(EVLParamVT.isScalarInteger() && EVLParamVT.bitsGE(MVT::i32) &&
"Unexpected target EVL type");
// Request operands.
SmallVector<SDValue, 7> OpValues;
for (unsigned I = 0; I < VPIntrin.arg_size(); ++I) {
auto Op = getValue(VPIntrin.getArgOperand(I));
if (I == EVLParamPos)
Op = DAG.getNode(ISD::ZERO_EXTEND, DL, EVLParamVT, Op);
OpValues.push_back(Op);
}
switch (Opcode) {
default: {
SDNodeFlags SDFlags;
if (auto *FPMO = dyn_cast<FPMathOperator>(&VPIntrin))
SDFlags.copyFMF(*FPMO);
SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues, SDFlags);
setValue(&VPIntrin, Result);
break;
}
case ISD::VP_LOAD:
case ISD::VP_GATHER:
visitVPLoadGather(VPIntrin, ValueVTs[0], OpValues,
Opcode == ISD::VP_GATHER);
break;
case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
visitVPStridedLoad(VPIntrin, ValueVTs[0], OpValues);
break;
case ISD::VP_STORE:
case ISD::VP_SCATTER:
visitVPStoreScatter(VPIntrin, OpValues, Opcode == ISD::VP_SCATTER);
break;
case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
visitVPStridedStore(VPIntrin, OpValues);
break;
}
}
SDValue SelectionDAGBuilder::lowerStartEH(SDValue Chain,
const BasicBlock *EHPadBB,
MCSymbol *&BeginLabel) {
MachineFunction &MF = DAG.getMachineFunction();
MachineModuleInfo &MMI = MF.getMMI();
// Insert a label before the invoke call to mark the try range. This can be
// used to detect deletion of the invoke via the MachineModuleInfo.
BeginLabel = MMI.getContext().createTempSymbol();
// For SjLj, keep track of which landing pads go with which invokes
// so as to maintain the ordering of pads in the LSDA.
unsigned CallSiteIndex = MMI.getCurrentCallSite();
if (CallSiteIndex) {
MF.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex);
// Now that the call site is handled, stop tracking it.
MMI.setCurrentCallSite(0);
}
return DAG.getEHLabel(getCurSDLoc(), Chain, BeginLabel);
}
SDValue SelectionDAGBuilder::lowerEndEH(SDValue Chain, const InvokeInst *II,
const BasicBlock *EHPadBB,
MCSymbol *BeginLabel) {
assert(BeginLabel && "BeginLabel should've been set");
MachineFunction &MF = DAG.getMachineFunction();
MachineModuleInfo &MMI = MF.getMMI();
// Insert a label at the end of the invoke call to mark the try range. This
// can be used to detect deletion of the invoke via the MachineModuleInfo.
MCSymbol *EndLabel = MMI.getContext().createTempSymbol();
Chain = DAG.getEHLabel(getCurSDLoc(), Chain, EndLabel);
// Inform MachineModuleInfo of range.
auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
// There is a platform (e.g. wasm) that uses funclet style IR but does not
// actually use outlined funclets and their LSDA info style.
if (MF.hasEHFunclets() && isFuncletEHPersonality(Pers)) {
assert(II && "II should've been set");
WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo();
EHInfo->addIPToStateRange(II, BeginLabel, EndLabel);
} else if (!isScopedEHPersonality(Pers)) {
assert(EHPadBB);
MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
}
return Chain;
}
std::pair<SDValue, SDValue>
SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
const BasicBlock *EHPadBB) {
MCSymbol *BeginLabel = nullptr;
if (EHPadBB) {
// Both PendingLoads and PendingExports must be flushed here;
// this call might not return.
(void)getRoot();
DAG.setRoot(lowerStartEH(getControlRoot(), EHPadBB, BeginLabel));
CLI.setChain(getRoot());
}
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
assert((CLI.IsTailCall || Result.second.getNode()) &&
"Non-null chain expected with non-tail call!");
assert((Result.second.getNode() || !Result.first.getNode()) &&
"Null value expected with tail call!");
if (!Result.second.getNode()) {
// As a special case, a null chain means that a tail call has been emitted
// and the DAG root is already updated.
HasTailCall = true;
// Since there's no actual continuation from this block, nothing can be
// relying on us setting vregs for them.
PendingExports.clear();
} else {
DAG.setRoot(Result.second);
}
if (EHPadBB) {
DAG.setRoot(lowerEndEH(getRoot(), cast_or_null<InvokeInst>(CLI.CB), EHPadBB,
BeginLabel));
}
return Result;
}
void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
bool isTailCall,
bool isMustTailCall,
const BasicBlock *EHPadBB) {
auto &DL = DAG.getDataLayout();
FunctionType *FTy = CB.getFunctionType();
Type *RetTy = CB.getType();
TargetLowering::ArgListTy Args;
Args.reserve(CB.arg_size());
const Value *SwiftErrorVal = nullptr;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (isTailCall) {
// Avoid emitting tail calls in functions with the disable-tail-calls
// attribute.
auto *Caller = CB.getParent()->getParent();
if (Caller->getFnAttribute("disable-tail-calls").getValueAsString() ==
"true" && !isMustTailCall)
isTailCall = false;
// We can't tail call inside a function with a swifterror argument. Lowering
// does not support this yet. It would have to move into the swifterror
// register before the call.
if (TLI.supportSwiftError() &&
Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
isTailCall = false;
}
for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) {
TargetLowering::ArgListEntry Entry;
const Value *V = *I;
// Skip empty types
if (V->getType()->isEmptyTy())
continue;
SDValue ArgNode = getValue(V);
Entry.Node = ArgNode; Entry.Ty = V->getType();
Entry.setAttributes(&CB, I - CB.arg_begin());
// Use swifterror virtual register as input to the call.
if (Entry.IsSwiftError && TLI.supportSwiftError()) {
SwiftErrorVal = V;
// We find the virtual register for the actual swifterror argument.
// Instead of using the Value, we use the virtual register instead.
Entry.Node =
DAG.getRegister(SwiftError.getOrCreateVRegUseAt(&CB, FuncInfo.MBB, V),
EVT(TLI.getPointerTy(DL)));
}
Args.push_back(Entry);
// If we have an explicit sret argument that is an Instruction, (i.e., it
// might point to function-local memory), we can't meaningfully tail-call.
if (Entry.IsSRet && isa<Instruction>(V))
isTailCall = false;
}
// If call site has a cfguardtarget operand bundle, create and add an
// additional ArgListEntry.
if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_cfguardtarget)) {
TargetLowering::ArgListEntry Entry;
Value *V = Bundle->Inputs[0];
SDValue ArgNode = getValue(V);
Entry.Node = ArgNode;
Entry.Ty = V->getType();
Entry.IsCFGuardTarget = true;
Args.push_back(Entry);
}
// Check if target-independent constraints permit a tail call here.
// Target-dependent constraints are checked within TLI->LowerCallTo.
if (isTailCall && !isInTailCallPosition(CB, DAG.getTarget()))
isTailCall = false;
// Disable tail calls if there is an swifterror argument. Targets have not
// been updated to support tail calls.
if (TLI.supportSwiftError() && SwiftErrorVal)
isTailCall = false;
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(getCurSDLoc())
.setChain(getRoot())
.setCallee(RetTy, FTy, Callee, std::move(Args), CB)
.setTailCall(isTailCall)
.setConvergent(CB.isConvergent())
.setIsPreallocated(
CB.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0);
std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
if (Result.first.getNode()) {
Result.first = lowerRangeToAssertZExt(DAG, CB, Result.first);
setValue(&CB, Result.first);
}
// The last element of CLI.InVals has the SDValue for swifterror return.
// Here we copy it to a virtual register and update SwiftErrorMap for
// book-keeping.
if (SwiftErrorVal && TLI.supportSwiftError()) {
// Get the last element of InVals.
SDValue Src = CLI.InVals.back();
Register VReg =
SwiftError.getOrCreateVRegDefAt(&CB, FuncInfo.MBB, SwiftErrorVal);
SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src);
DAG.setRoot(CopyNode);
}
}
static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
SelectionDAGBuilder &Builder) {
// Check to see if this load can be trivially constant folded, e.g. if the
// input is from a string literal.
if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
// Cast pointer to the type we really want to load.
Type *LoadTy =
Type::getIntNTy(PtrVal->getContext(), LoadVT.getScalarSizeInBits());
if (LoadVT.isVector())
LoadTy = FixedVectorType::get(LoadTy, LoadVT.getVectorNumElements());
LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
PointerType::getUnqual(LoadTy));
if (const Constant *LoadCst =
ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput),
LoadTy, Builder.DAG.getDataLayout()))
return Builder.getValue(LoadCst);
}
// Otherwise, we have to emit the load. If the pointer is to unfoldable but
// still constant memory, the input chain can be the entry node.
SDValue Root;
bool ConstantMemory = false;
// Do not serialize (non-volatile) loads of constant memory with anything.
if (Builder.AA && Builder.AA->pointsToConstantMemory(PtrVal)) {
Root = Builder.DAG.getEntryNode();
ConstantMemory = true;
} else {
// Do not serialize non-volatile loads against each other.
Root = Builder.DAG.getRoot();
}
SDValue Ptr = Builder.getValue(PtrVal);
SDValue LoadVal =
Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root, Ptr,
MachinePointerInfo(PtrVal), Align(1));
if (!ConstantMemory)
Builder.PendingLoads.push_back(LoadVal.getValue(1));
return LoadVal;
}
/// Record the value for an instruction that produces an integer result,
/// converting the type where necessary.
void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
SDValue Value,
bool IsSigned) {
EVT VT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType(), true);
if (IsSigned)
Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT);
else
Value = DAG.getZExtOrTrunc(Value, getCurSDLoc(), VT);
setValue(&I, Value);
}
/// See if we can lower a memcmp/bcmp call into an optimized form. If so, return
/// true and lower it. Otherwise return false, and it will be lowered like a
/// normal call.
/// The caller already checked that \p I calls the appropriate LibFunc with a
/// correct prototype.
bool SelectionDAGBuilder::visitMemCmpBCmpCall(const CallInst &I) {
const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1);
const Value *Size = I.getArgOperand(2);
const ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(getValue(Size));
if (CSize && CSize->getZExtValue() == 0) {
EVT CallVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType(), true);
setValue(&I, DAG.getConstant(0, getCurSDLoc(), CallVT));
return true;
}
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForMemcmp(
DAG, getCurSDLoc(), DAG.getRoot(), getValue(LHS), getValue(RHS),
getValue(Size), MachinePointerInfo(LHS), MachinePointerInfo(RHS));
if (Res.first.getNode()) {
processIntegerCallValue(I, Res.first, true);
PendingLoads.push_back(Res.second);
return true;
}
// memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0
// memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0
if (!CSize || !isOnlyUsedInZeroEqualityComparison(&I))
return false;
// If the target has a fast compare for the given size, it will return a
// preferred load type for that size. Require that the load VT is legal and
// that the target supports unaligned loads of that type. Otherwise, return
// INVALID.
auto hasFastLoadsAndCompare = [&](unsigned NumBits) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
MVT LVT = TLI.hasFastEqualityCompare(NumBits);
if (LVT != MVT::INVALID_SIMPLE_VALUE_TYPE) {
// TODO: Handle 5 byte compare as 4-byte + 1 byte.
// TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
// TODO: Check alignment of src and dest ptrs.
unsigned DstAS = LHS->getType()->getPointerAddressSpace();
unsigned SrcAS = RHS->getType()->getPointerAddressSpace();
if (!TLI.isTypeLegal(LVT) ||
!TLI.allowsMisalignedMemoryAccesses(LVT, SrcAS) ||
!TLI.allowsMisalignedMemoryAccesses(LVT, DstAS))
LVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
}
return LVT;
};
// This turns into unaligned loads. We only do this if the target natively
// supports the MVT we'll be loading or if it is small enough (<= 4) that
// we'll only produce a small number of byte loads.
MVT LoadVT;
unsigned NumBitsToCompare = CSize->getZExtValue() * 8;
switch (NumBitsToCompare) {
default:
return false;
case 16:
LoadVT = MVT::i16;
break;
case 32:
LoadVT = MVT::i32;
break;
case 64:
case 128:
case 256:
LoadVT = hasFastLoadsAndCompare(NumBitsToCompare);
break;
}
if (LoadVT == MVT::INVALID_SIMPLE_VALUE_TYPE)
return false;
SDValue LoadL = getMemCmpLoad(LHS, LoadVT, *this);
SDValue LoadR = getMemCmpLoad(RHS, LoadVT, *this);
// Bitcast to a wide integer type if the loads are vectors.
if (LoadVT.isVector()) {
EVT CmpVT = EVT::getIntegerVT(LHS->getContext(), LoadVT.getSizeInBits());
LoadL = DAG.getBitcast(CmpVT, LoadL);
LoadR = DAG.getBitcast(CmpVT, LoadR);
}
SDValue Cmp = DAG.getSetCC(getCurSDLoc(), MVT::i1, LoadL, LoadR, ISD::SETNE);
processIntegerCallValue(I, Cmp, false);
return true;
}
/// See if we can lower a memchr call into an optimized form. If so, return
/// true and lower it. Otherwise return false, and it will be lowered like a
/// normal call.
/// The caller already checked that \p I calls the appropriate LibFunc with a
/// correct prototype.
bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) {
const Value *Src = I.getArgOperand(0);
const Value *Char = I.getArgOperand(1);
const Value *Length = I.getArgOperand(2);
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
TSI.EmitTargetCodeForMemchr(DAG, getCurSDLoc(), DAG.getRoot(),
getValue(Src), getValue(Char), getValue(Length),
MachinePointerInfo(Src));
if (Res.first.getNode()) {
setValue(&I, Res.first);
PendingLoads.push_back(Res.second);
return true;
}
return false;
}
/// See if we can lower a mempcpy call into an optimized form. If so, return
/// true and lower it. Otherwise return false, and it will be lowered like a
/// normal call.
/// The caller already checked that \p I calls the appropriate LibFunc with a
/// correct prototype.
bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
SDValue Dst = getValue(I.getArgOperand(0));
SDValue Src = getValue(I.getArgOperand(1));
SDValue Size = getValue(I.getArgOperand(2));
Align DstAlign = DAG.InferPtrAlign(Dst).valueOrOne();
Align SrcAlign = DAG.InferPtrAlign(Src).valueOrOne();
// DAG::getMemcpy needs Alignment to be defined.
Align Alignment = std::min(DstAlign, SrcAlign);
bool isVol = false;
SDLoc sdl = getCurSDLoc();
// In the mempcpy context we need to pass in a false value for isTailCall
// because the return pointer needs to be adjusted by the size of
// the copied memory.
SDValue Root = isVol ? getRoot() : getMemoryRoot();
SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Alignment, isVol, false,
/*isTailCall=*/false,
MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)),
I.getAAMetadata());
assert(MC.getNode() != nullptr &&
"** memcpy should not be lowered as TailCall in mempcpy context **");
DAG.setRoot(MC);
// Check if Size needs to be truncated or extended.
Size = DAG.getSExtOrTrunc(Size, sdl, Dst.getValueType());
// Adjust return pointer to point just past the last dst byte.
SDValue DstPlusSize = DAG.getNode(ISD::ADD, sdl, Dst.getValueType(),
Dst, Size);
setValue(&I, DstPlusSize);
return true;
}
/// See if we can lower a strcpy call into an optimized form. If so, return
/// true and lower it, otherwise return false and it will be lowered like a
/// normal call.
/// The caller already checked that \p I calls the appropriate LibFunc with a
/// correct prototype.
bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) {
const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
TSI.EmitTargetCodeForStrcpy(DAG, getCurSDLoc(), getRoot(),
getValue(Arg0), getValue(Arg1),
MachinePointerInfo(Arg0),
MachinePointerInfo(Arg1), isStpcpy);
if (Res.first.getNode()) {
setValue(&I, Res.first);
DAG.setRoot(Res.second);
return true;
}
return false;
}
/// See if we can lower a strcmp call into an optimized form. If so, return
/// true and lower it, otherwise return false and it will be lowered like a
/// normal call.
/// The caller already checked that \p I calls the appropriate LibFunc with a
/// correct prototype.
bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) {
const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
TSI.EmitTargetCodeForStrcmp(DAG, getCurSDLoc(), DAG.getRoot(),
getValue(Arg0), getValue(Arg1),
MachinePointerInfo(Arg0),
MachinePointerInfo(Arg1));
if (Res.first.getNode()) {
processIntegerCallValue(I, Res.first, true);
PendingLoads.push_back(Res.second);
return true;
}
return false;
}
/// See if we can lower a strlen call into an optimized form. If so, return
/// true and lower it, otherwise return false and it will be lowered like a
/// normal call.
/// The caller already checked that \p I calls the appropriate LibFunc with a
/// correct prototype.
bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) {
const Value *Arg0 = I.getArgOperand(0);
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
TSI.EmitTargetCodeForStrlen(DAG, getCurSDLoc(), DAG.getRoot(),
getValue(Arg0), MachinePointerInfo(Arg0));
if (Res.first.getNode()) {
processIntegerCallValue(I, Res.first, false);
PendingLoads.push_back(Res.second);
return true;
}
return false;
}
/// See if we can lower a strnlen call into an optimized form. If so, return
/// true and lower it, otherwise return false and it will be lowered like a
/// normal call.
/// The caller already checked that \p I calls the appropriate LibFunc with a
/// correct prototype.
bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) {
const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
TSI.EmitTargetCodeForStrnlen(DAG, getCurSDLoc(), DAG.getRoot(),
getValue(Arg0), getValue(Arg1),
MachinePointerInfo(Arg0));
if (Res.first.getNode()) {
processIntegerCallValue(I, Res.first, false);
PendingLoads.push_back(Res.second);
return true;
}
return false;
}
/// See if we can lower a unary floating-point operation into an SDNode with
/// the specified Opcode. If so, return true and lower it, otherwise return
/// false and it will be lowered like a normal call.
/// The caller already checked that \p I calls the appropriate LibFunc with a
/// correct prototype.
bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
unsigned Opcode) {
// We already checked this call's prototype; verify it doesn't modify errno.
if (!I.onlyReadsMemory())
return false;
SDNodeFlags Flags;
Flags.copyFMF(cast<FPMathOperator>(I));
SDValue Tmp = getValue(I.getArgOperand(0));
setValue(&I,
DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp, Flags));
return true;
}
/// See if we can lower a binary floating-point operation into an SDNode with
/// the specified Opcode. If so, return true and lower it. Otherwise return
/// false, and it will be lowered like a normal call.
/// The caller already checked that \p I calls the appropriate LibFunc with a
/// correct prototype.
bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I,
unsigned Opcode) {
// We already checked this call's prototype; verify it doesn't modify errno.
if (!I.onlyReadsMemory())
return false;
SDNodeFlags Flags;
Flags.copyFMF(cast<FPMathOperator>(I));
SDValue Tmp0 = getValue(I.getArgOperand(0));
SDValue Tmp1 = getValue(I.getArgOperand(1));
EVT VT = Tmp0.getValueType();
setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1, Flags));
return true;
}
void SelectionDAGBuilder::visitCall(const CallInst &I) {
// Handle inline assembly differently.
if (I.isInlineAsm()) {
visitInlineAsm(I);
return;
}
if (Function *F = I.getCalledFunction()) {
diagnoseDontCall(I);
if (F->isDeclaration()) {
// Is this an LLVM intrinsic or a target-specific intrinsic?
unsigned IID = F->getIntrinsicID();
if (!IID)
if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo())
IID = II->getIntrinsicID(F);
if (IID) {
visitIntrinsicCall(I, IID);
return;
}
}
// Check for well-known libc/libm calls. If the function is internal, it
// can't be a library call. Don't do the check if marked as nobuiltin for
// some reason or the call site requires strict floating point semantics.
LibFunc Func;
if (!I.isNoBuiltin() && !I.isStrictFP() && !F->hasLocalLinkage() &&
F->hasName() && LibInfo->getLibFunc(*F, Func) &&
LibInfo->hasOptimizedCodeGen(Func)) {
switch (Func) {
default: break;
case LibFunc_bcmp:
if (visitMemCmpBCmpCall(I))
return;
break;
case LibFunc_copysign:
case LibFunc_copysignf:
case LibFunc_copysignl:
// We already checked this call's prototype; verify it doesn't modify
// errno.
if (I.onlyReadsMemory()) {
SDValue LHS = getValue(I.getArgOperand(0));
SDValue RHS = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurSDLoc(),
LHS.getValueType(), LHS, RHS));
return;
}
break;
case LibFunc_fabs:
case LibFunc_fabsf:
case LibFunc_fabsl:
if (visitUnaryFloatCall(I, ISD::FABS))
return;
break;
case LibFunc_fmin:
case LibFunc_fminf:
case LibFunc_fminl:
if (visitBinaryFloatCall(I, ISD::FMINNUM))
return;
break;
case LibFunc_fmax:
case LibFunc_fmaxf:
case LibFunc_fmaxl:
if (visitBinaryFloatCall(I, ISD::FMAXNUM))
return;
break;
case LibFunc_sin:
case LibFunc_sinf:
case LibFunc_sinl:
if (visitUnaryFloatCall(I, ISD::FSIN))
return;
break;
case LibFunc_cos:
case LibFunc_cosf:
case LibFunc_cosl:
if (visitUnaryFloatCall(I, ISD::FCOS))
return;
break;
case LibFunc_sqrt:
case LibFunc_sqrtf:
case LibFunc_sqrtl:
case LibFunc_sqrt_finite:
case LibFunc_sqrtf_finite:
case LibFunc_sqrtl_finite:
if (visitUnaryFloatCall(I, ISD::FSQRT))
return;
break;
case LibFunc_floor:
case LibFunc_floorf:
case LibFunc_floorl:
if (visitUnaryFloatCall(I, ISD::FFLOOR))
return;
break;
case LibFunc_nearbyint:
case LibFunc_nearbyintf:
case LibFunc_nearbyintl:
if (visitUnaryFloatCall(I, ISD::FNEARBYINT))
return;
break;
case LibFunc_ceil:
case LibFunc_ceilf:
case LibFunc_ceill:
if (visitUnaryFloatCall(I, ISD::FCEIL))
return;
break;
case LibFunc_rint:
case LibFunc_rintf:
case LibFunc_rintl:
if (visitUnaryFloatCall(I, ISD::FRINT))
return;
break;
case LibFunc_round:
case LibFunc_roundf:
case LibFunc_roundl:
if (visitUnaryFloatCall(I, ISD::FROUND))
return;
break;
case LibFunc_trunc:
case LibFunc_truncf:
case LibFunc_truncl:
if (visitUnaryFloatCall(I, ISD::FTRUNC))
return;
break;
case LibFunc_log2:
case LibFunc_log2f:
case LibFunc_log2l:
if (visitUnaryFloatCall(I, ISD::FLOG2))
return;
break;
case LibFunc_exp2:
case LibFunc_exp2f:
case LibFunc_exp2l:
if (visitUnaryFloatCall(I, ISD::FEXP2))
return;
break;
case LibFunc_memcmp:
if (visitMemCmpBCmpCall(I))
return;
break;
case LibFunc_mempcpy:
if (visitMemPCpyCall(I))
return;
break;
case LibFunc_memchr:
if (visitMemChrCall(I))
return;
break;
case LibFunc_strcpy:
if (visitStrCpyCall(I, false))
return;
break;
case LibFunc_stpcpy:
if (visitStrCpyCall(I, true))
return;
break;
case LibFunc_strcmp:
if (visitStrCmpCall(I))
return;
break;
case LibFunc_strlen:
if (visitStrLenCall(I))
return;
break;
case LibFunc_strnlen:
if (visitStrNLenCall(I))
return;
break;
}
}
}
// Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
// have to do anything here to lower funclet bundles.
// CFGuardTarget bundles are lowered in LowerCallTo.
assert(!I.hasOperandBundlesOtherThan(
{LLVMContext::OB_deopt, LLVMContext::OB_funclet,
LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated,
LLVMContext::OB_clang_arc_attachedcall}) &&
"Cannot lower calls with arbitrary operand bundles!");
SDValue Callee = getValue(I.getCalledOperand());
if (I.countOperandBundlesOfType(LLVMContext::OB_deopt))
LowerCallSiteWithDeoptBundle(&I, Callee, nullptr);
else
// Check if we can potentially perform a tail call. More detailed checking
// is be done within LowerCallTo, after more information about the call is
// known.
LowerCallTo(I, Callee, I.isTailCall(), I.isMustTailCall());
}
namespace {
/// AsmOperandInfo - This contains information for each constraint that we are
/// lowering.
class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo {
public:
/// CallOperand - If this is the result output operand or a clobber
/// this is null, otherwise it is the incoming operand to the CallInst.
/// This gets modified as the asm is processed.
SDValue CallOperand;
/// AssignedRegs - If this is a register or register class operand, this
/// contains the set of register corresponding to the operand.
RegsForValue AssignedRegs;
explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info)
: TargetLowering::AsmOperandInfo(info), CallOperand(nullptr, 0) {
}
/// Whether or not this operand accesses memory
bool hasMemory(const TargetLowering &TLI) const {
// Indirect operand accesses access memory.
if (isIndirect)
return true;
for (const auto &Code : Codes)
if (TLI.getConstraintType(Code) == TargetLowering::C_Memory)
return true;
return false;
}
};
} // end anonymous namespace
/// Make sure that the output operand \p OpInfo and its corresponding input
/// operand \p MatchingOpInfo have compatible constraint types (otherwise error
/// out).
static void patchMatchingInput(const SDISelAsmOperandInfo &OpInfo,
SDISelAsmOperandInfo &MatchingOpInfo,
SelectionDAG &DAG) {
if (OpInfo.ConstraintVT == MatchingOpInfo.ConstraintVT)
return;
const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
const auto &TLI = DAG.getTargetLoweringInfo();
std::pair<unsigned, const TargetRegisterClass *> MatchRC =
TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
OpInfo.ConstraintVT);
std::pair<unsigned, const TargetRegisterClass *> InputRC =
TLI.getRegForInlineAsmConstraint(TRI, MatchingOpInfo.ConstraintCode,
MatchingOpInfo.ConstraintVT);
if ((OpInfo.ConstraintVT.isInteger() !=
MatchingOpInfo.ConstraintVT.isInteger()) ||
(MatchRC.second != InputRC.second)) {
// FIXME: error out in a more elegant fashion
report_fatal_error("Unsupported asm: input constraint"
" with a matching output constraint of"
" incompatible type!");
}
MatchingOpInfo.ConstraintVT = OpInfo.ConstraintVT;
}
/// Get a direct memory input to behave well as an indirect operand.
/// This may introduce stores, hence the need for a \p Chain.
/// \return The (possibly updated) chain.
static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
SDISelAsmOperandInfo &OpInfo,
SelectionDAG &DAG) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// If we don't have an indirect input, put it in the constpool if we can,
// otherwise spill it to a stack slot.
// TODO: This isn't quite right. We need to handle these according to
// the addressing mode that the constraint wants. Also, this may take
// an additional register for the computation and we don't want that
// either.
// If the operand is a float, integer, or vector constant, spill to a
// constant pool entry to get its address.
const Value *OpVal = OpInfo.CallOperandVal;
if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) {
OpInfo.CallOperand = DAG.getConstantPool(
cast<Constant>(OpVal), TLI.getPointerTy(DAG.getDataLayout()));
return Chain;
}
// Otherwise, create a stack slot and emit a store to it before the asm.
Type *Ty = OpVal->getType();
auto &DL = DAG.getDataLayout();
uint64_t TySize = DL.getTypeAllocSize(Ty);
MachineFunction &MF = DAG.getMachineFunction();
int SSFI = MF.getFrameInfo().CreateStackObject(
TySize, DL.getPrefTypeAlign(Ty), false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getFrameIndexTy(DL));
Chain = DAG.getTruncStore(Chain, Location, OpInfo.CallOperand, StackSlot,
MachinePointerInfo::getFixedStack(MF, SSFI),
TLI.getMemValueType(DL, Ty));
OpInfo.CallOperand = StackSlot;
return Chain;
}
/// GetRegistersForValue - Assign registers (virtual or physical) for the
/// specified operand. We prefer to assign virtual registers, to allow the
/// register allocator to handle the assignment process. However, if the asm
/// uses features that we can't model on machineinstrs, we have SDISel do the
/// allocation. This produces generally horrible, but correct, code.
///
/// OpInfo describes the operand
/// RefOpInfo describes the matching operand if any, the operand otherwise
static llvm::Optional<unsigned>
getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
SDISelAsmOperandInfo &OpInfo,
SDISelAsmOperandInfo &RefOpInfo) {
LLVMContext &Context = *DAG.getContext();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
MachineFunction &MF = DAG.getMachineFunction();
SmallVector<unsigned, 4> Regs;
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
// No work to do for memory/address operands.
if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
OpInfo.ConstraintType == TargetLowering::C_Address)
return None;
// If this is a constraint for a single physreg, or a constraint for a
// register class, find it.
unsigned AssignedReg;
const TargetRegisterClass *RC;
std::tie(AssignedReg, RC) = TLI.getRegForInlineAsmConstraint(
&TRI, RefOpInfo.ConstraintCode, RefOpInfo.ConstraintVT);
// RC is unset only on failure. Return immediately.
if (!RC)
return None;
// Get the actual register value type. This is important, because the user
// may have asked for (e.g.) the AX register in i32 type. We need to
// remember that AX is actually i16 to get the right extension.
const MVT RegVT = *TRI.legalclasstypes_begin(*RC);
if (OpInfo.ConstraintVT != MVT::Other && RegVT != MVT::Untyped) {
// If this is an FP operand in an integer register (or visa versa), or more
// generally if the operand value disagrees with the register class we plan
// to stick it in, fix the operand type.
//
// If this is an input value, the bitcast to the new type is done now.
// Bitcast for output value is done at the end of visitInlineAsm().
if ((OpInfo.Type == InlineAsm::isOutput ||
OpInfo.Type == InlineAsm::isInput) &&
!TRI.isTypeLegalForClass(*RC, OpInfo.ConstraintVT)) {
// Try to convert to the first EVT that the reg class contains. If the
// types are identical size, use a bitcast to convert (e.g. two differing
// vector types). Note: output bitcast is done at the end of
// visitInlineAsm().
if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
// Exclude indirect inputs while they are unsupported because the code
// to perform the load is missing and thus OpInfo.CallOperand still
// refers to the input address rather than the pointed-to value.
if (OpInfo.Type == InlineAsm::isInput && !OpInfo.isIndirect)
OpInfo.CallOperand =
DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand);
OpInfo.ConstraintVT = RegVT;
// If the operand is an FP value and we want it in integer registers,
// use the corresponding integer type. This turns an f64 value into
// i64, which can be passed with two i32 values on a 32-bit machine.
} else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
MVT VT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits());
if (OpInfo.Type == InlineAsm::isInput)
OpInfo.CallOperand =
DAG.getNode(ISD::BITCAST, DL, VT, OpInfo.CallOperand);
OpInfo.ConstraintVT = VT;
}
}
}
// No need to allocate a matching input constraint since the constraint it's
// matching to has already been allocated.
if (OpInfo.isMatchingInputConstraint())
return None;
EVT ValueVT = OpInfo.ConstraintVT;
if (OpInfo.ConstraintVT == MVT::Other)
ValueVT = RegVT;
// Initialize NumRegs.
unsigned NumRegs = 1;
if (OpInfo.ConstraintVT != MVT::Other)
NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT, RegVT);
// If this is a constraint for a specific physical register, like {r17},
// assign it now.
// If this associated to a specific register, initialize iterator to correct
// place. If virtual, make sure we have enough registers
// Initialize iterator if necessary
TargetRegisterClass::iterator I = RC->begin();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
// Do not check for single registers.
if (AssignedReg) {
I = std::find(I, RC->end(), AssignedReg);
if (I == RC->end()) {
// RC does not contain the selected register, which indicates a
// mismatch between the register and the required type/bitwidth.
return {AssignedReg};
}
}
for (; NumRegs; --NumRegs, ++I) {
assert(I != RC->end() && "Ran out of registers to allocate!");
Register R = AssignedReg ? Register(*I) : RegInfo.createVirtualRegister(RC);
Regs.push_back(R);
}
OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
return None;
}
static unsigned
findMatchingInlineAsmOperand(unsigned OperandNo,
const std::vector<SDValue> &AsmNodeOperands) {
// Scan until we find the definition we already emitted of this operand.
unsigned CurOp = InlineAsm::Op_FirstOperand;
for (; OperandNo; --OperandNo) {
// Advance to the next operand.
unsigned OpFlag =
cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
assert((InlineAsm::isRegDefKind(OpFlag) ||
InlineAsm::isRegDefEarlyClobberKind(OpFlag) ||
InlineAsm::isMemKind(OpFlag)) &&
"Skipped past definitions?");
CurOp += InlineAsm::getNumOperandRegisters(OpFlag) + 1;
}
return CurOp;
}
namespace {
class ExtraFlags {
unsigned Flags = 0;
public:
explicit ExtraFlags(const CallBase &Call) {
const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
if (IA->hasSideEffects())
Flags |= InlineAsm::Extra_HasSideEffects;
if (IA->isAlignStack())
Flags |= InlineAsm::Extra_IsAlignStack;
if (Call.isConvergent())
Flags |= InlineAsm::Extra_IsConvergent;
Flags |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
}
void update(const TargetLowering::AsmOperandInfo &OpInfo) {
// Ideally, we would only check against memory constraints. However, the
// meaning of an Other constraint can be target-specific and we can't easily
// reason about it. Therefore, be conservative and set MayLoad/MayStore
// for Other constraints as well.
if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
OpInfo.ConstraintType == TargetLowering::C_Other) {
if (OpInfo.Type == InlineAsm::isInput)
Flags |= InlineAsm::Extra_MayLoad;
else if (OpInfo.Type == InlineAsm::isOutput)
Flags |= InlineAsm::Extra_MayStore;
else if (OpInfo.Type == InlineAsm::isClobber)
Flags |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore);
}
}
unsigned get() const { return Flags; }
};
} // end anonymous namespace
/// visitInlineAsm - Handle a call to an InlineAsm object.
void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
const BasicBlock *EHPadBB) {
const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
/// ConstraintOperands - Information about all of the constraints.
SmallVector<SDISelAsmOperandInfo, 16> ConstraintOperands;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(
DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), Call);
// First Pass: Calculate HasSideEffects and ExtraFlags (AlignStack,
// AsmDialect, MayLoad, MayStore).
bool HasSideEffect = IA->hasSideEffects();
ExtraFlags ExtraInfo(Call);
for (auto &T : TargetConstraints) {
ConstraintOperands.push_back(SDISelAsmOperandInfo(T));
SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
if (OpInfo.CallOperandVal)
OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
if (!HasSideEffect)
HasSideEffect = OpInfo.hasMemory(TLI);
// Determine if this InlineAsm MayLoad or MayStore based on the constraints.
// FIXME: Could we compute this on OpInfo rather than T?
// Compute the constraint code and ConstraintType to use.
TLI.ComputeConstraintToUse(T, SDValue());
if (T.ConstraintType == TargetLowering::C_Immediate &&
OpInfo.CallOperand && !isa<ConstantSDNode>(OpInfo.CallOperand))
// We've delayed emitting a diagnostic like the "n" constraint because
// inlining could cause an integer showing up.
return emitInlineAsmError(Call, "constraint '" + Twine(T.ConstraintCode) +
"' expects an integer constant "
"expression");
ExtraInfo.update(T);
}
// We won't need to flush pending loads if this asm doesn't touch
// memory and is nonvolatile.
SDValue Flag, Chain = (HasSideEffect) ? getRoot() : DAG.getRoot();
bool EmitEHLabels = isa<InvokeInst>(Call) && IA->canThrow();
if (EmitEHLabels) {
assert(EHPadBB && "InvokeInst must have an EHPadBB");
}
bool IsCallBr = isa<CallBrInst>(Call);
if (IsCallBr || EmitEHLabels) {
// If this is a callbr or invoke we need to flush pending exports since
// inlineasm_br and invoke are terminators.
// We need to do this before nodes are glued to the inlineasm_br node.
Chain = getControlRoot();
}
MCSymbol *BeginLabel = nullptr;
if (EmitEHLabels) {
Chain = lowerStartEH(Chain, EHPadBB, BeginLabel);
}
// Second pass over the constraints: compute which constraint option to use.
for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
// If this is an output operand with a matching input operand, look up the
// matching input. If their types mismatch, e.g. one is an integer, the
// other is floating point, or their sizes are different, flag it as an
// error.
if (OpInfo.hasMatchingInput()) {
SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
patchMatchingInput(OpInfo, Input, DAG);
}
// Compute the constraint code and ConstraintType to use.
TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);
if ((OpInfo.ConstraintType == TargetLowering::C_Memory &&
OpInfo.Type == InlineAsm::isClobber) ||
OpInfo.ConstraintType == TargetLowering::C_Address)
continue;
// If this is a memory input, and if the operand is not indirect, do what we
// need to provide an address for the memory input.
if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
!OpInfo.isIndirect) {
assert((OpInfo.isMultipleAlternative ||
(OpInfo.Type == InlineAsm::isInput)) &&
"Can only indirectify direct input operands!");
// Memory operands really want the address of the value.
Chain = getAddressForMemoryInput(Chain, getCurSDLoc(), OpInfo, DAG);
// There is no longer a Value* corresponding to this operand.
OpInfo.CallOperandVal = nullptr;
// It is now an indirect operand.
OpInfo.isIndirect = true;
}
}
// AsmNodeOperands - The operands for the ISD::INLINEASM node.
std::vector<SDValue> AsmNodeOperands;
AsmNodeOperands.push_back(SDValue()); // reserve space for input chain
AsmNodeOperands.push_back(DAG.getTargetExternalSymbol(
IA->getAsmString().c_str(), TLI.getProgramPointerTy(DAG.getDataLayout())));
// If we have a !srcloc metadata node associated with it, we want to attach
// this to the ultimately generated inline asm machineinstr. To do this, we
// pass in the third operand as this (potentially null) inline asm MDNode.
const MDNode *SrcLoc = Call.getMetadata("srcloc");
AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
// Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore
// bits as operand 3.
AsmNodeOperands.push_back(DAG.getTargetConstant(
ExtraInfo.get(), getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
// Third pass: Loop over operands to prepare DAG-level operands.. As part of
// this, assign virtual and physical registers for inputs and otput.
for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
// Assign Registers.
SDISelAsmOperandInfo &RefOpInfo =
OpInfo.isMatchingInputConstraint()
? ConstraintOperands[OpInfo.getMatchedOperand()]
: OpInfo;
const auto RegError =
getRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);
if (RegError) {
const MachineFunction &MF = DAG.getMachineFunction();
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
const char *RegName = TRI.getName(RegError.value());
emitInlineAsmError(Call, "register '" + Twine(RegName) +
"' allocated for constraint '" +
Twine(OpInfo.ConstraintCode) +
"' does not match required type");
return;
}
auto DetectWriteToReservedRegister = [&]() {
const MachineFunction &MF = DAG.getMachineFunction();
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
for (unsigned Reg : OpInfo.AssignedRegs.Regs) {
if (Register::isPhysicalRegister(Reg) &&
TRI.isInlineAsmReadOnlyReg(MF, Reg)) {
const char *RegName = TRI.getName(Reg);
emitInlineAsmError(Call, "write to reserved register '" +
Twine(RegName) + "'");
return true;
}
}
return false;
};
assert((OpInfo.ConstraintType != TargetLowering::C_Address ||
(OpInfo.Type == InlineAsm::isInput &&
!OpInfo.isMatchingInputConstraint())) &&
"Only address as input operand is allowed.");
switch (OpInfo.Type) {
case InlineAsm::isOutput:
if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
unsigned ConstraintID =
TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
assert(ConstraintID != InlineAsm::Constraint_Unknown &&
"Failed to convert memory constraint code to constraint id.");
// Add information to the INLINEASM node to know about this output.
unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID);
AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, getCurSDLoc(),
MVT::i32));
AsmNodeOperands.push_back(OpInfo.CallOperand);
} else {
// Otherwise, this outputs to a register (directly for C_Register /
// C_RegisterClass, and a target-defined fashion for
// C_Immediate/C_Other). Find a register that we can use.
if (OpInfo.AssignedRegs.Regs.empty()) {
emitInlineAsmError(
Call, "couldn't allocate output register for constraint '" +
Twine(OpInfo.ConstraintCode) + "'");
return;
}
if (DetectWriteToReservedRegister())
return;
// Add information to the INLINEASM node to know that this register is
// set.
OpInfo.AssignedRegs.AddInlineAsmOperands(
OpInfo.isEarlyClobber ? InlineAsm::Kind_RegDefEarlyClobber
: InlineAsm::Kind_RegDef,
false, 0, getCurSDLoc(), DAG, AsmNodeOperands);
}
break;
case InlineAsm::isInput:
case InlineAsm::isLabel: {
SDValue InOperandVal = OpInfo.CallOperand;
if (OpInfo.isMatchingInputConstraint()) {
// If this is required to match an output register we have already set,
// just use its register.
auto CurOp = findMatchingInlineAsmOperand(OpInfo.getMatchedOperand(),
AsmNodeOperands);
unsigned OpFlag =
cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
if (InlineAsm::isRegDefKind(OpFlag) ||
InlineAsm::isRegDefEarlyClobberKind(OpFlag)) {
// Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
if (OpInfo.isIndirect) {
// This happens on gcc/testsuite/gcc.dg/pr8788-1.c
emitInlineAsmError(Call, "inline asm not supported yet: "
"don't know how to handle tied "
"indirect register inputs");
return;
}
SmallVector<unsigned, 4> Regs;
MachineFunction &MF = DAG.getMachineFunction();
MachineRegisterInfo &MRI = MF.getRegInfo();
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
auto *R = cast<RegisterSDNode>(AsmNodeOperands[CurOp+1]);
Register TiedReg = R->getReg();
MVT RegVT = R->getSimpleValueType(0);
const TargetRegisterClass *RC =
TiedReg.isVirtual() ? MRI.getRegClass(TiedReg)
: RegVT != MVT::Untyped ? TLI.getRegClassFor(RegVT)
: TRI.getMinimalPhysRegClass(TiedReg);
unsigned NumRegs = InlineAsm::getNumOperandRegisters(OpFlag);
for (unsigned i = 0; i != NumRegs; ++i)
Regs.push_back(MRI.createVirtualRegister(RC));
RegsForValue MatchedRegs(Regs, RegVT, InOperandVal.getValueType());
SDLoc dl = getCurSDLoc();
// Use the produced MatchedRegs object to
MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag, &Call);
MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
true, OpInfo.getMatchedOperand(), dl,
DAG, AsmNodeOperands);
break;
}
assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!");
assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 &&
"Unexpected number of operands");
// Add information to the INLINEASM node to know about this input.
// See InlineAsm.h isUseOperandTiedToDef.
OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag);
OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
OpInfo.getMatchedOperand());
AsmNodeOperands.push_back(DAG.getTargetConstant(
OpFlag, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
break;
}
// Treat indirect 'X' constraint as memory.
if (OpInfo.ConstraintType == TargetLowering::C_Other &&
OpInfo.isIndirect)
OpInfo.ConstraintType = TargetLowering::C_Memory;
if (OpInfo.ConstraintType == TargetLowering::C_Immediate ||
OpInfo.ConstraintType == TargetLowering::C_Other) {
std::vector<SDValue> Ops;
TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
Ops, DAG);
if (Ops.empty()) {
if (OpInfo.ConstraintType == TargetLowering::C_Immediate)
if (isa<ConstantSDNode>(InOperandVal)) {
emitInlineAsmError(Call, "value out of range for constraint '" +
Twine(OpInfo.ConstraintCode) + "'");
return;
}
emitInlineAsmError(Call,
"invalid operand for inline asm constraint '" +
Twine(OpInfo.ConstraintCode) + "'");
return;
}
// Add information to the INLINEASM node to know about this input.
unsigned ResOpType =
InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
AsmNodeOperands.push_back(DAG.getTargetConstant(
ResOpType, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
llvm::append_range(AsmNodeOperands, Ops);
break;
}
if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
OpInfo.ConstraintType == TargetLowering::C_Address) {
assert((OpInfo.isIndirect ||
OpInfo.ConstraintType != TargetLowering::C_Memory) &&
"Operand must be indirect to be a mem!");
assert(InOperandVal.getValueType() ==
TLI.getPointerTy(DAG.getDataLayout()) &&
"Memory operands expect pointer values");
unsigned ConstraintID =
TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
assert(ConstraintID != InlineAsm::Constraint_Unknown &&
"Failed to convert memory constraint code to constraint id.");
// Add information to the INLINEASM node to know about this input.
unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID);
AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
getCurSDLoc(),
MVT::i32));
AsmNodeOperands.push_back(InOperandVal);
break;
}
assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
OpInfo.ConstraintType == TargetLowering::C_Register) &&
"Unknown constraint type!");
// TODO: Support this.
if (OpInfo.isIndirect) {
emitInlineAsmError(
Call, "Don't know how to handle indirect register inputs yet "
"for constraint '" +
Twine(OpInfo.ConstraintCode) + "'");
return;
}
// Copy the input into the appropriate registers.
if (OpInfo.AssignedRegs.Regs.empty()) {
emitInlineAsmError(Call,
"couldn't allocate input reg for constraint '" +
Twine(OpInfo.ConstraintCode) + "'");
return;
}
if (DetectWriteToReservedRegister())
return;
SDLoc dl = getCurSDLoc();
OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag,
&Call);
OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
dl, DAG, AsmNodeOperands);
break;
}
case InlineAsm::isClobber:
// Add the clobbered value to the operand list, so that the register
// allocator is aware that the physreg got clobbered.
if (!OpInfo.AssignedRegs.Regs.empty())
OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber,
false, 0, getCurSDLoc(), DAG,
AsmNodeOperands);
break;
}
}
// Finish up input operands. Set the input chain and add the flag last.
AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
unsigned ISDOpc = IsCallBr ? ISD::INLINEASM_BR : ISD::INLINEASM;
Chain = DAG.getNode(ISDOpc, getCurSDLoc(),
DAG.getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
Flag = Chain.getValue(1);
// Do additional work to generate outputs.
SmallVector<EVT, 1> ResultVTs;
SmallVector<SDValue, 1> ResultValues;
SmallVector<SDValue, 8> OutChains;
llvm::Type *CallResultType = Call.getType();
ArrayRef<Type *> ResultTypes;
if (StructType *StructResult = dyn_cast<StructType>(CallResultType))
ResultTypes = StructResult->elements();
else if (!CallResultType->isVoidTy())
ResultTypes = makeArrayRef(CallResultType);
auto CurResultType = ResultTypes.begin();
auto handleRegAssign = [&](SDValue V) {
assert(CurResultType != ResultTypes.end() && "Unexpected value");
assert((*CurResultType)->isSized() && "Unexpected unsized type");
EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), *CurResultType);
++CurResultType;
// If the type of the inline asm call site return value is different but has
// same size as the type of the asm output bitcast it. One example of this
// is for vectors with different width / number of elements. This can
// happen for register classes that can contain multiple different value
// types. The preg or vreg allocated may not have the same VT as was
// expected.
//
// This can also happen for a return value that disagrees with the register
// class it is put in, eg. a double in a general-purpose register on a
// 32-bit machine.
if (ResultVT != V.getValueType() &&
ResultVT.getSizeInBits() == V.getValueSizeInBits())
V = DAG.getNode(ISD::BITCAST, getCurSDLoc(), ResultVT, V);
else if (ResultVT != V.getValueType() && ResultVT.isInteger() &&
V.getValueType().isInteger()) {
// If a result value was tied to an input value, the computed result
// may have a wider width than the expected result. Extract the
// relevant portion.
V = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultVT, V);
}
assert(ResultVT == V.getValueType() && "Asm result value mismatch!");
ResultVTs.push_back(ResultVT);
ResultValues.push_back(V);
};
// Deal with output operands.
for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
if (OpInfo.Type == InlineAsm::isOutput) {
SDValue Val;
// Skip trivial output operands.
if (OpInfo.AssignedRegs.Regs.empty())
continue;
switch (OpInfo.ConstraintType) {
case TargetLowering::C_Register:
case TargetLowering::C_RegisterClass:
Val = OpInfo.AssignedRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(),
Chain, &Flag, &Call);
break;
case TargetLowering::C_Immediate:
case TargetLowering::C_Other:
Val = TLI.LowerAsmOutputForConstraint(Chain, Flag, getCurSDLoc(),
OpInfo, DAG);
break;
case TargetLowering::C_Memory:
break; // Already handled.
case TargetLowering::C_Address:
break; // Silence warning.
case TargetLowering::C_Unknown:
assert(false && "Unexpected unknown constraint");
}
// Indirect output manifest as stores. Record output chains.
if (OpInfo.isIndirect) {
const Value *Ptr = OpInfo.CallOperandVal;
assert(Ptr && "Expected value CallOperandVal for indirect asm operand");
SDValue Store = DAG.getStore(Chain, getCurSDLoc(), Val, getValue(Ptr),
MachinePointerInfo(Ptr));
OutChains.push_back(Store);
} else {
// generate CopyFromRegs to associated registers.
assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
if (Val.getOpcode() == ISD::MERGE_VALUES) {
for (const SDValue &V : Val->op_values())
handleRegAssign(V);
} else
handleRegAssign(Val);
}
}
}
// Set results.
if (!ResultValues.empty()) {
assert(CurResultType == ResultTypes.end() &&
"Mismatch in number of ResultTypes");
assert(ResultValues.size() == ResultTypes.size() &&
"Mismatch in number of output operands in asm result");
SDValue V = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
DAG.getVTList(ResultVTs), ResultValues);
setValue(&Call, V);
}
// Collect store chains.
if (!OutChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, OutChains);
if (EmitEHLabels) {
Chain = lowerEndEH(Chain, cast<InvokeInst>(&Call), EHPadBB, BeginLabel);
}
// Only Update Root if inline assembly has a memory effect.
if (ResultValues.empty() || HasSideEffect || !OutChains.empty() || IsCallBr ||
EmitEHLabels)
DAG.setRoot(Chain);
}
void SelectionDAGBuilder::emitInlineAsmError(const CallBase &Call,
const Twine &Message) {
LLVMContext &Ctx = *DAG.getContext();
Ctx.emitError(&Call, Message);
// Make sure we leave the DAG in a valid state
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SmallVector<EVT, 1> ValueVTs;
ComputeValueVTs(TLI, DAG.getDataLayout(), Call.getType(), ValueVTs);
if (ValueVTs.empty())
return;
SmallVector<SDValue, 1> Ops;
for (unsigned i = 0, e = ValueVTs.size(); i != e; ++i)
Ops.push_back(DAG.getUNDEF(ValueVTs[i]));
setValue(&Call, DAG.getMergeValues(Ops, getCurSDLoc()));
}
void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
DAG.setRoot(DAG.getNode(ISD::VASTART, getCurSDLoc(),
MVT::Other, getRoot(),
getValue(I.getArgOperand(0)),
DAG.getSrcValue(I.getArgOperand(0))));
}
void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const DataLayout &DL = DAG.getDataLayout();
SDValue V = DAG.getVAArg(
TLI.getMemValueType(DAG.getDataLayout(), I.getType()), getCurSDLoc(),
getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)),
DL.getABITypeAlign(I.getType()).value());
DAG.setRoot(V.getValue(1));
if (I.getType()->isPointerTy())
V = DAG.getPtrExtOrTrunc(
V, getCurSDLoc(), TLI.getValueType(DAG.getDataLayout(), I.getType()));
setValue(&I, V);
}
void SelectionDAGBuilder::visitVAEnd(const CallInst &I) {
DAG.setRoot(DAG.getNode(ISD::VAEND, getCurSDLoc(),
MVT::Other, getRoot(),
getValue(I.getArgOperand(0)),
DAG.getSrcValue(I.getArgOperand(0))));
}
void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurSDLoc(),
MVT::Other, getRoot(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)),
DAG.getSrcValue(I.getArgOperand(0)),
DAG.getSrcValue(I.getArgOperand(1))));
}
SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
const Instruction &I,
SDValue Op) {
const MDNode *Range = I.getMetadata(LLVMContext::MD_range);
if (!Range)
return Op;
ConstantRange CR = getConstantRangeFromMetadata(*Range);
if (CR.isFullSet() || CR.isEmptySet() || CR.isUpperWrapped())
return Op;
APInt Lo = CR.getUnsignedMin();
if (!Lo.isMinValue())
return Op;
APInt Hi = CR.getUnsignedMax();
unsigned Bits = std::max(Hi.getActiveBits(),
static_cast<unsigned>(IntegerType::MIN_INT_BITS));
EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), Bits);
SDLoc SL = getCurSDLoc();
SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(), Op,
DAG.getValueType(SmallVT));
unsigned NumVals = Op.getNode()->getNumValues();
if (NumVals == 1)
return ZExt;
SmallVector<SDValue, 4> Ops;
Ops.push_back(ZExt);
for (unsigned I = 1; I != NumVals; ++I)
Ops.push_back(Op.getValue(I));
return DAG.getMergeValues(Ops, SL);
}
/// Populate a CallLowerinInfo (into \p CLI) based on the properties of
/// the call being lowered.
///
/// This is a helper for lowering intrinsics that follow a target calling
/// convention or require stack pointer adjustment. Only a subset of the
/// intrinsic's operands need to participate in the calling convention.
void SelectionDAGBuilder::populateCallLoweringInfo(
TargetLowering::CallLoweringInfo &CLI, const CallBase *Call,
unsigned ArgIdx, unsigned NumArgs, SDValue Callee, Type *ReturnTy,
bool IsPatchPoint) {
TargetLowering::ArgListTy Args;
Args.reserve(NumArgs);
// Populate the argument list.
// Attributes for args start at offset 1, after the return attribute.
for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs;
ArgI != ArgE; ++ArgI) {
const Value *V = Call->getOperand(ArgI);
assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
TargetLowering::ArgListEntry Entry;
Entry.Node = getValue(V);
Entry.Ty = V->getType();
Entry.setAttributes(Call, ArgI);
Args.push_back(Entry);
}
CLI.setDebugLoc(getCurSDLoc())
.setChain(getRoot())
.setCallee(Call->getCallingConv(), ReturnTy, Callee, std::move(Args))
.setDiscardResult(Call->use_empty())
.setIsPatchPoint(IsPatchPoint)
.setIsPreallocated(
Call->countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0);
}
/// Add a stack map intrinsic call's live variable operands to a stackmap
/// or patchpoint target node's operand list.
///
/// Constants are converted to TargetConstants purely as an optimization to
/// avoid constant materialization and register allocation.
///
/// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not
/// generate addess computation nodes, and so FinalizeISel can convert the
/// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids
/// address materialization and register allocation, but may also be required
/// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an
/// alloca in the entry block, then the runtime may assume that the alloca's
/// StackMap location can be read immediately after compilation and that the
/// location is valid at any point during execution (this is similar to the
/// assumption made by the llvm.gcroot intrinsic). If the alloca's location were
/// only available in a register, then the runtime would need to trap when
/// execution reaches the StackMap in order to read the alloca's location.
static void addStackMapLiveVars(const CallBase &Call, unsigned StartIdx,
const SDLoc &DL, SmallVectorImpl<SDValue> &Ops,
SelectionDAGBuilder &Builder) {
SelectionDAG &DAG = Builder.DAG;
for (unsigned I = StartIdx; I < Call.arg_size(); I++) {
SDValue Op = Builder.getValue(Call.getArgOperand(I));
// Things on the stack are pointer-typed, meaning that they are already
// legal and can be emitted directly to target nodes.
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) {
Ops.push_back(DAG.getTargetFrameIndex(FI->getIndex(), Op.getValueType()));
} else {
// Otherwise emit a target independent node to be legalised.
Ops.push_back(Builder.getValue(Call.getArgOperand(I)));
}
}
}
/// Lower llvm.experimental.stackmap.
void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
// void @llvm.experimental.stackmap(i64 <id>, i32 <numShadowBytes>,
// [live variables...])
assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value.");
SDValue Chain, InFlag, Callee, NullPtr;
SmallVector<SDValue, 32> Ops;
SDLoc DL = getCurSDLoc();
Callee = getValue(CI.getCalledOperand());
NullPtr = DAG.getIntPtrConstant(0, DL, true);
// The stackmap intrinsic only records the live variables (the arguments
// passed to it) and emits NOPS (if requested). Unlike the patchpoint
// intrinsic, this won't be lowered to a function call. This means we don't
// have to worry about calling conventions and target specific lowering code.
// Instead we perform the call lowering right here.
//
// chain, flag = CALLSEQ_START(chain, 0, 0)
// chain, flag = STACKMAP(id, nbytes, ..., chain, flag)
// chain, flag = CALLSEQ_END(chain, 0, 0, flag)
//
Chain = DAG.getCALLSEQ_START(getRoot(), 0, 0, DL);
InFlag = Chain.getValue(1);
// Add the STACKMAP operands, starting with DAG house-keeping.
Ops.push_back(Chain);
Ops.push_back(InFlag);
// Add the <id>, <numShadowBytes> operands.
//
// These do not require legalisation, and can be emitted directly to target
// constant nodes.
SDValue ID = getValue(CI.getArgOperand(0));
assert(ID.getValueType() == MVT::i64);
SDValue IDConst = DAG.getTargetConstant(
cast<ConstantSDNode>(ID)->getZExtValue(), DL, ID.getValueType());
Ops.push_back(IDConst);
SDValue Shad = getValue(CI.getArgOperand(1));
assert(Shad.getValueType() == MVT::i32);
SDValue ShadConst = DAG.getTargetConstant(
cast<ConstantSDNode>(Shad)->getZExtValue(), DL, Shad.getValueType());
Ops.push_back(ShadConst);
// Add the live variables.
addStackMapLiveVars(CI, 2, DL, Ops, *this);
// Create the STACKMAP node.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
Chain = DAG.getNode(ISD::STACKMAP, DL, NodeTys, Ops);
InFlag = Chain.getValue(1);
Chain = DAG.getCALLSEQ_END(Chain, NullPtr, NullPtr, InFlag, DL);
// Stackmaps don't generate values, so nothing goes into the NodeMap.
// Set the root to the target-lowered call chain.
DAG.setRoot(Chain);
// Inform the Frame Information that we have a stackmap in this function.
FuncInfo.MF->getFrameInfo().setHasStackMap();
}
/// Lower llvm.experimental.patchpoint directly to its target opcode.
void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB,
const BasicBlock *EHPadBB) {
// void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
// i32 <numBytes>,
// i8* <target>,
// i32 <numArgs>,
// [Args...],
// [live variables...])
CallingConv::ID CC = CB.getCallingConv();
bool IsAnyRegCC = CC == CallingConv::AnyReg;
bool HasDef = !CB.getType()->isVoidTy();
SDLoc dl = getCurSDLoc();
SDValue Callee = getValue(CB.getArgOperand(PatchPointOpers::TargetPos));
// Handle immediate and symbolic callees.
if (auto* ConstCallee = dyn_cast<ConstantSDNode>(Callee))
Callee = DAG.getIntPtrConstant(ConstCallee->getZExtValue(), dl,
/*isTarget=*/true);
else if (auto* SymbolicCallee = dyn_cast<GlobalAddressSDNode>(Callee))
Callee = DAG.getTargetGlobalAddress(SymbolicCallee->getGlobal(),
SDLoc(SymbolicCallee),
SymbolicCallee->getValueType(0));
// Get the real number of arguments participating in the call <numArgs>
SDValue NArgVal = getValue(CB.getArgOperand(PatchPointOpers::NArgPos));
unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue();
// Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>
// Intrinsics include all meta-operands up to but not including CC.
unsigned NumMetaOpers = PatchPointOpers::CCPos;
assert(CB.arg_size() >= NumMetaOpers + NumArgs &&
"Not enough arguments provided to the patchpoint intrinsic");
// For AnyRegCC the arguments are lowered later on manually.
unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
Type *ReturnTy =
IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CB.getType();
TargetLowering::CallLoweringInfo CLI(DAG);
populateCallLoweringInfo(CLI, &CB, NumMetaOpers, NumCallArgs, Callee,
ReturnTy, true);
std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
SDNode *CallEnd = Result.second.getNode();
if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg))
CallEnd = CallEnd->getOperand(0).getNode();
/// Get a call instruction from the call sequence chain.
/// Tail calls are not allowed.
assert(CallEnd->getOpcode() == ISD::CALLSEQ_END &&
"Expected a callseq node.");
SDNode *Call = CallEnd->getOperand(0).getNode();
bool HasGlue = Call->getGluedNode();
// Replace the target specific call node with the patchable intrinsic.
SmallVector<SDValue, 8> Ops;
// Push the chain.
Ops.push_back(*(Call->op_begin()));
// Optionally, push the glue (if any).
if (HasGlue)
Ops.push_back(*(Call->op_end() - 1));
// Push the register mask info.
if (HasGlue)
Ops.push_back(*(Call->op_end() - 2));
else
Ops.push_back(*(Call->op_end() - 1));
// Add the <id> and <numBytes> constants.
SDValue IDVal = getValue(CB.getArgOperand(PatchPointOpers::IDPos));
Ops.push_back(DAG.getTargetConstant(
cast<ConstantSDNode>(IDVal)->getZExtValue(), dl, MVT::i64));
SDValue NBytesVal = getValue(CB.getArgOperand(PatchPointOpers::NBytesPos));
Ops.push_back(DAG.getTargetConstant(
cast<ConstantSDNode>(NBytesVal)->getZExtValue(), dl,
MVT::i32));
// Add the callee.
Ops.push_back(Callee);
// Adjust <numArgs> to account for any arguments that have been passed on the
// stack instead.
// Call Node: Chain, Target, {Args}, RegMask, [Glue]
unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 4 : 3);
NumCallRegArgs = IsAnyRegCC ? NumArgs : NumCallRegArgs;
Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, dl, MVT::i32));
// Add the calling convention
Ops.push_back(DAG.getTargetConstant((unsigned)CC, dl, MVT::i32));
// Add the arguments we omitted previously. The register allocator should
// place these in any free register.
if (IsAnyRegCC)
for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i)
Ops.push_back(getValue(CB.getArgOperand(i)));
// Push the arguments from the call instruction.
SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1;
Ops.append(Call->op_begin() + 2, e);
// Push live variables for the stack map.
addStackMapLiveVars(CB, NumMetaOpers + NumArgs, dl, Ops, *this);
SDVTList NodeTys;
if (IsAnyRegCC && HasDef) {
// Create the return types based on the intrinsic definition
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SmallVector<EVT, 3> ValueVTs;
ComputeValueVTs(TLI, DAG.getDataLayout(), CB.getType(), ValueVTs);
assert(ValueVTs.size() == 1 && "Expected only one return value type.");
// There is always a chain and a glue type at the end
ValueVTs.push_back(MVT::Other);
ValueVTs.push_back(MVT::Glue);
NodeTys = DAG.getVTList(ValueVTs);
} else
NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
// Replace the target specific call node with a PATCHPOINT node.
SDValue PPV = DAG.getNode(ISD::PATCHPOINT, dl, NodeTys, Ops);
// Update the NodeMap.
if (HasDef) {
if (IsAnyRegCC)
setValue(&CB, SDValue(PPV.getNode(), 0));
else
setValue(&CB, Result.first);
}
// Fixup the consumers of the intrinsic. The chain and glue may be used in the
// call sequence. Furthermore the location of the chain and glue can change
// when the AnyReg calling convention is used and the intrinsic returns a
// value.
if (IsAnyRegCC && HasDef) {
SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)};
SDValue To[] = {PPV.getValue(1), PPV.getValue(2)};
DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
} else
DAG.ReplaceAllUsesWith(Call, PPV.getNode());
DAG.DeleteNode(Call);
// Inform the Frame Information that we have a patchpoint in this function.
FuncInfo.MF->getFrameInfo().setHasPatchPoint();
}
void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
unsigned Intrinsic) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2;
if (I.arg_size() > 1)
Op2 = getValue(I.getArgOperand(1));
SDLoc dl = getCurSDLoc();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
SDValue Res;
SDNodeFlags SDFlags;
if (auto *FPMO = dyn_cast<FPMathOperator>(&I))
SDFlags.copyFMF(*FPMO);
switch (Intrinsic) {
case Intrinsic::vector_reduce_fadd:
if (SDFlags.hasAllowReassociation())
Res = DAG.getNode(ISD::FADD, dl, VT, Op1,
DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2, SDFlags),
SDFlags);
else
Res = DAG.getNode(ISD::VECREDUCE_SEQ_FADD, dl, VT, Op1, Op2, SDFlags);
break;
case Intrinsic::vector_reduce_fmul:
if (SDFlags.hasAllowReassociation())
Res = DAG.getNode(ISD::FMUL, dl, VT, Op1,
DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2, SDFlags),
SDFlags);
else
Res = DAG.getNode(ISD::VECREDUCE_SEQ_FMUL, dl, VT, Op1, Op2, SDFlags);
break;
case Intrinsic::vector_reduce_add:
Res = DAG.getNode(ISD::VECREDUCE_ADD, dl, VT, Op1);
break;
case Intrinsic::vector_reduce_mul:
Res = DAG.getNode(ISD::VECREDUCE_MUL, dl, VT, Op1);
break;
case Intrinsic::vector_reduce_and:
Res = DAG.getNode(ISD::VECREDUCE_AND, dl, VT, Op1);
break;
case Intrinsic::vector_reduce_or:
Res = DAG.getNode(ISD::VECREDUCE_OR, dl, VT, Op1);
break;
case Intrinsic::vector_reduce_xor:
Res = DAG.getNode(ISD::VECREDUCE_XOR, dl, VT, Op1);
break;
case Intrinsic::vector_reduce_smax:
Res = DAG.getNode(ISD::VECREDUCE_SMAX, dl, VT, Op1);
break;
case Intrinsic::vector_reduce_smin:
Res = DAG.getNode(ISD::VECREDUCE_SMIN, dl, VT, Op1);
break;
case Intrinsic::vector_reduce_umax:
Res = DAG.getNode(ISD::VECREDUCE_UMAX, dl, VT, Op1);
break;
case Intrinsic::vector_reduce_umin:
Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1);
break;
case Intrinsic::vector_reduce_fmax:
Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1, SDFlags);
break;
case Intrinsic::vector_reduce_fmin:
Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags);
break;
default:
llvm_unreachable("Unhandled vector reduce intrinsic");
}
setValue(&I, Res);
}
/// Returns an AttributeList representing the attributes applied to the return
/// value of the given call.
static AttributeList getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) {
SmallVector<Attribute::AttrKind, 2> Attrs;
if (CLI.RetSExt)
Attrs.push_back(Attribute::SExt);
if (CLI.RetZExt)
Attrs.push_back(Attribute::ZExt);
if (CLI.IsInReg)
Attrs.push_back(Attribute::InReg);
return AttributeList::get(CLI.RetTy->getContext(), AttributeList::ReturnIndex,
Attrs);
}
/// TargetLowering::LowerCallTo - This is the default LowerCallTo
/// implementation, which just calls LowerCall.
/// FIXME: When all targets are
/// migrated to using LowerCall, this hook should be integrated into SDISel.
std::pair<SDValue, SDValue>
TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// Handle the incoming return values from the call.
CLI.Ins.clear();
Type *OrigRetTy = CLI.RetTy;
SmallVector<EVT, 4> RetTys;
SmallVector<uint64_t, 4> Offsets;
auto &DL = CLI.DAG.getDataLayout();
ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets);
if (CLI.IsPostTypeLegalization) {
// If we are lowering a libcall after legalization, split the return type.
SmallVector<EVT, 4> OldRetTys;
SmallVector<uint64_t, 4> OldOffsets;
RetTys.swap(OldRetTys);
Offsets.swap(OldOffsets);
for (size_t i = 0, e = OldRetTys.size(); i != e; ++i) {
EVT RetVT = OldRetTys[i];
uint64_t Offset = OldOffsets[i];
MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), RetVT);
unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), RetVT);
unsigned RegisterVTByteSZ = RegisterVT.getSizeInBits() / 8;
RetTys.append(NumRegs, RegisterVT);
for (unsigned j = 0; j != NumRegs; ++j)
Offsets.push_back(Offset + j * RegisterVTByteSZ);
}
}
SmallVector<ISD::OutputArg, 4> Outs;
GetReturnInfo(CLI.CallConv, CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL);
bool CanLowerReturn =
this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(),
CLI.IsVarArg, Outs, CLI.RetTy->getContext());
SDValue DemoteStackSlot;
int DemoteStackIdx = -100;
if (!CanLowerReturn) {
// FIXME: equivalent assert?
// assert(!CS.hasInAllocaArgument() &&
// "sret demotion is incompatible with inalloca");
uint64_t TySize = DL.getTypeAllocSize(CLI.RetTy);
Align Alignment = DL.getPrefTypeAlign(CLI.RetTy);
MachineFunction &MF = CLI.DAG.getMachineFunction();
DemoteStackIdx =
MF.getFrameInfo().CreateStackObject(TySize, Alignment, false);
Type *StackSlotPtrType = PointerType::get(CLI.RetTy,
DL.getAllocaAddrSpace());
DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getFrameIndexTy(DL));
ArgListEntry Entry;
Entry.Node = DemoteStackSlot;
Entry.Ty = StackSlotPtrType;
Entry.IsSExt = false;
Entry.IsZExt = false;
Entry.IsInReg = false;
Entry.IsSRet = true;
Entry.IsNest = false;
Entry.IsByVal = false;
Entry.IsByRef = false;
Entry.IsReturned = false;
Entry.IsSwiftSelf = false;
Entry.IsSwiftAsync = false;
Entry.IsSwiftError = false;
Entry.IsCFGuardTarget = false;
Entry.Alignment = Alignment;
CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
CLI.NumFixedArgs += 1;
+ CLI.getArgs()[0].IndirectType = CLI.RetTy;
CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());
// sret demotion isn't compatible with tail-calls, since the sret argument
// points into the callers stack frame.
CLI.IsTailCall = false;
} else {
bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
CLI.RetTy, CLI.CallConv, CLI.IsVarArg, DL);
for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
ISD::ArgFlagsTy Flags;
if (NeedsRegBlock) {
Flags.setInConsecutiveRegs();
if (I == RetTys.size() - 1)
Flags.setInConsecutiveRegsLast();
}
EVT VT = RetTys[I];
MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
CLI.CallConv, VT);
unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
CLI.CallConv, VT);
for (unsigned i = 0; i != NumRegs; ++i) {
ISD::InputArg MyFlags;
MyFlags.Flags = Flags;
MyFlags.VT = RegisterVT;
MyFlags.ArgVT = VT;
MyFlags.Used = CLI.IsReturnValueUsed;
if (CLI.RetTy->isPointerTy()) {
MyFlags.Flags.setPointer();
MyFlags.Flags.setPointerAddrSpace(
cast<PointerType>(CLI.RetTy)->getAddressSpace());
}
if (CLI.RetSExt)
MyFlags.Flags.setSExt();
if (CLI.RetZExt)
MyFlags.Flags.setZExt();
if (CLI.IsInReg)
MyFlags.Flags.setInReg();
CLI.Ins.push_back(MyFlags);
}
}
}
// We push in swifterror return as the last element of CLI.Ins.
ArgListTy &Args = CLI.getArgs();
if (supportSwiftError()) {
for (const ArgListEntry &Arg : Args) {
if (Arg.IsSwiftError) {
ISD::InputArg MyFlags;
MyFlags.VT = getPointerTy(DL);
MyFlags.ArgVT = EVT(getPointerTy(DL));
MyFlags.Flags.setSwiftError();
CLI.Ins.push_back(MyFlags);
}
}
}
// Handle all of the outgoing arguments.
CLI.Outs.clear();
CLI.OutVals.clear();
for (unsigned i = 0, e = Args.size(); i != e; ++i) {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs);
// FIXME: Split arguments if CLI.IsPostTypeLegalization
Type *FinalType = Args[i].Ty;
if (Args[i].IsByVal)
FinalType = Args[i].IndirectType;
bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
FinalType, CLI.CallConv, CLI.IsVarArg, DL);
for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
++Value) {
EVT VT = ValueVTs[Value];
Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext());
SDValue Op = SDValue(Args[i].Node.getNode(),
Args[i].Node.getResNo() + Value);
ISD::ArgFlagsTy Flags;
// Certain targets (such as MIPS), may have a different ABI alignment
// for a type depending on the context. Give the target a chance to
// specify the alignment it wants.
const Align OriginalAlignment(getABIAlignmentForCallingConv(ArgTy, DL));
Flags.setOrigAlign(OriginalAlignment);
if (Args[i].Ty->isPointerTy()) {
Flags.setPointer();
Flags.setPointerAddrSpace(
cast<PointerType>(Args[i].Ty)->getAddressSpace());
}
if (Args[i].IsZExt)
Flags.setZExt();
if (Args[i].IsSExt)
Flags.setSExt();
if (Args[i].IsInReg) {
// If we are using vectorcall calling convention, a structure that is
// passed InReg - is surely an HVA
if (CLI.CallConv == CallingConv::X86_VectorCall &&
isa<StructType>(FinalType)) {
// The first value of a structure is marked
if (0 == Value)
Flags.setHvaStart();
Flags.setHva();
}
// Set InReg Flag
Flags.setInReg();
}
if (Args[i].IsSRet)
Flags.setSRet();
if (Args[i].IsSwiftSelf)
Flags.setSwiftSelf();
if (Args[i].IsSwiftAsync)
Flags.setSwiftAsync();
if (Args[i].IsSwiftError)
Flags.setSwiftError();
if (Args[i].IsCFGuardTarget)
Flags.setCFGuardTarget();
if (Args[i].IsByVal)
Flags.setByVal();
if (Args[i].IsByRef)
Flags.setByRef();
if (Args[i].IsPreallocated) {
Flags.setPreallocated();
// Set the byval flag for CCAssignFn callbacks that don't know about
// preallocated. This way we can know how many bytes we should've
// allocated and how many bytes a callee cleanup function will pop. If
// we port preallocated to more targets, we'll have to add custom
// preallocated handling in the various CC lowering callbacks.
Flags.setByVal();
}
if (Args[i].IsInAlloca) {
Flags.setInAlloca();
// Set the byval flag for CCAssignFn callbacks that don't know about
// inalloca. This way we can know how many bytes we should've allocated
// and how many bytes a callee cleanup function will pop. If we port
// inalloca to more targets, we'll have to add custom inalloca handling
// in the various CC lowering callbacks.
Flags.setByVal();
}
Align MemAlign;
if (Args[i].IsByVal || Args[i].IsInAlloca || Args[i].IsPreallocated) {
unsigned FrameSize = DL.getTypeAllocSize(Args[i].IndirectType);
Flags.setByValSize(FrameSize);
// info is not there but there are cases it cannot get right.
if (auto MA = Args[i].Alignment)
MemAlign = *MA;
else
MemAlign = Align(getByValTypeAlignment(Args[i].IndirectType, DL));
} else if (auto MA = Args[i].Alignment) {
MemAlign = *MA;
} else {
MemAlign = OriginalAlignment;
}
Flags.setMemAlign(MemAlign);
if (Args[i].IsNest)
Flags.setNest();
if (NeedsRegBlock)
Flags.setInConsecutiveRegs();
MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
CLI.CallConv, VT);
unsigned NumParts = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
CLI.CallConv, VT);
SmallVector<SDValue, 4> Parts(NumParts);
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
if (Args[i].IsSExt)
ExtendKind = ISD::SIGN_EXTEND;
else if (Args[i].IsZExt)
ExtendKind = ISD::ZERO_EXTEND;
// Conservatively only handle 'returned' on non-vectors that can be lowered,
// for now.
if (Args[i].IsReturned && !Op.getValueType().isVector() &&
CanLowerReturn) {
assert((CLI.RetTy == Args[i].Ty ||
(CLI.RetTy->isPointerTy() && Args[i].Ty->isPointerTy() &&
CLI.RetTy->getPointerAddressSpace() ==
Args[i].Ty->getPointerAddressSpace())) &&
RetTys.size() == NumValues && "unexpected use of 'returned'");
// Before passing 'returned' to the target lowering code, ensure that
// either the register MVT and the actual EVT are the same size or that
// the return value and argument are extended in the same way; in these
// cases it's safe to pass the argument register value unchanged as the
// return register value (although it's at the target's option whether
// to do so)
// TODO: allow code generation to take advantage of partially preserved
// registers rather than clobbering the entire register when the
// parameter extension method is not compatible with the return
// extension method
if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) ||
(ExtendKind != ISD::ANY_EXTEND && CLI.RetSExt == Args[i].IsSExt &&
CLI.RetZExt == Args[i].IsZExt))
Flags.setReturned();
}
getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, CLI.CB,
CLI.CallConv, ExtendKind);
for (unsigned j = 0; j != NumParts; ++j) {
// if it isn't first piece, alignment must be 1
// For scalable vectors the scalable part is currently handled
// by individual targets, so we just use the known minimum size here.
ISD::OutputArg MyFlags(
Flags, Parts[j].getValueType().getSimpleVT(), VT,
i < CLI.NumFixedArgs, i,
j * Parts[j].getValueType().getStoreSize().getKnownMinSize());
if (NumParts > 1 && j == 0)
MyFlags.Flags.setSplit();
else if (j != 0) {
MyFlags.Flags.setOrigAlign(Align(1));
if (j == NumParts - 1)
MyFlags.Flags.setSplitEnd();
}
CLI.Outs.push_back(MyFlags);
CLI.OutVals.push_back(Parts[j]);
}
if (NeedsRegBlock && Value == NumValues - 1)
CLI.Outs[CLI.Outs.size() - 1].Flags.setInConsecutiveRegsLast();
}
}
SmallVector<SDValue, 4> InVals;
CLI.Chain = LowerCall(CLI, InVals);
// Update CLI.InVals to use outside of this function.
CLI.InVals = InVals;
// Verify that the target's LowerCall behaved as expected.
assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other &&
"LowerCall didn't return a valid chain!");
assert((!CLI.IsTailCall || InVals.empty()) &&
"LowerCall emitted a return value for a tail call!");
assert((CLI.IsTailCall || InVals.size() == CLI.Ins.size()) &&
"LowerCall didn't emit the correct number of values!");
// For a tail call, the return value is merely live-out and there aren't
// any nodes in the DAG representing it. Return a special value to
// indicate that a tail call has been emitted and no more Instructions
// should be processed in the current block.
if (CLI.IsTailCall) {
CLI.DAG.setRoot(CLI.Chain);
return std::make_pair(SDValue(), SDValue());
}
#ifndef NDEBUG
for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) {
assert(InVals[i].getNode() && "LowerCall emitted a null value!");
assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() &&
"LowerCall emitted a value with the wrong type!");
}
#endif
SmallVector<SDValue, 4> ReturnValues;
if (!CanLowerReturn) {
// The instruction result is the result of loading from the
// hidden sret parameter.
SmallVector<EVT, 1> PVTs;
Type *PtrRetTy = OrigRetTy->getPointerTo(DL.getAllocaAddrSpace());
ComputeValueVTs(*this, DL, PtrRetTy, PVTs);
assert(PVTs.size() == 1 && "Pointers should fit in one register");
EVT PtrVT = PVTs[0];
unsigned NumValues = RetTys.size();
ReturnValues.resize(NumValues);
SmallVector<SDValue, 4> Chains(NumValues);
// An aggregate return value cannot wrap around the address space, so
// offsets to its parts don't wrap either.
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
MachineFunction &MF = CLI.DAG.getMachineFunction();
Align HiddenSRetAlign = MF.getFrameInfo().getObjectAlign(DemoteStackIdx);
for (unsigned i = 0; i < NumValues; ++i) {
SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot,
CLI.DAG.getConstant(Offsets[i], CLI.DL,
PtrVT), Flags);
SDValue L = CLI.DAG.getLoad(
RetTys[i], CLI.DL, CLI.Chain, Add,
MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(),
DemoteStackIdx, Offsets[i]),
HiddenSRetAlign);
ReturnValues[i] = L;
Chains[i] = L.getValue(1);
}
CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, CLI.DL, MVT::Other, Chains);
} else {
// Collect the legal value parts into potentially illegal values
// that correspond to the original function's return values.
Optional<ISD::NodeType> AssertOp;
if (CLI.RetSExt)
AssertOp = ISD::AssertSext;
else if (CLI.RetZExt)
AssertOp = ISD::AssertZext;
unsigned CurReg = 0;
for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
EVT VT = RetTys[I];
MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
CLI.CallConv, VT);
unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
CLI.CallConv, VT);
ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
NumRegs, RegisterVT, VT, nullptr,
CLI.CallConv, AssertOp));
CurReg += NumRegs;
}
// For a function returning void, there is no return value. We can't create
// such a node, so we just return a null return value in that case. In
// that case, nothing will actually look at the value.
if (ReturnValues.empty())
return std::make_pair(SDValue(), CLI.Chain);
}
SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL,
CLI.DAG.getVTList(RetTys), ReturnValues);
return std::make_pair(Res, CLI.Chain);
}
/// Places new result values for the node in Results (their number
/// and types must exactly match those of the original return values of
/// the node), or leaves Results empty, which indicates that the node is not
/// to be custom lowered after all.
void TargetLowering::LowerOperationWrapper(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
SDValue Res = LowerOperation(SDValue(N, 0), DAG);
if (!Res.getNode())
return;
// If the original node has one result, take the return value from
// LowerOperation as is. It might not be result number 0.
if (N->getNumValues() == 1) {
Results.push_back(Res);
return;
}
// If the original node has multiple results, then the return node should
// have the same number of results.
assert((N->getNumValues() == Res->getNumValues()) &&
"Lowering returned the wrong number of results!");
// Places new result values base on N result number.
for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
Results.push_back(Res.getValue(I));
}
SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("LowerOperation not implemented for this target!");
}
void SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V,
unsigned Reg,
ISD::NodeType ExtendType) {
SDValue Op = getNonRegisterValue(V);
assert((Op.getOpcode() != ISD::CopyFromReg ||
cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
"Copy from a reg to the same reg!");
assert(!Register::isPhysicalRegister(Reg) && "Is a physreg");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// If this is an InlineAsm we have to match the registers required, not the
// notional registers required by the type.
RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, V->getType(),
None); // This is not an ABI copy.
SDValue Chain = DAG.getEntryNode();
if (ExtendType == ISD::ANY_EXTEND) {
auto PreferredExtendIt = FuncInfo.PreferredExtendType.find(V);
if (PreferredExtendIt != FuncInfo.PreferredExtendType.end())
ExtendType = PreferredExtendIt->second;
}
RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType);
PendingExports.push_back(Chain);
}
#include "llvm/CodeGen/SelectionDAGISel.h"
/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
/// entry block, return true. This includes arguments used by switches, since
/// the switch may expand into multiple basic blocks.
static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
// With FastISel active, we may be splitting blocks, so force creation
// of virtual registers for all non-dead arguments.
if (FastISel)
return A->use_empty();
const BasicBlock &Entry = A->getParent()->front();
for (const User *U : A->users())
if (cast<Instruction>(U)->getParent() != &Entry || isa<SwitchInst>(U))
return false; // Use not in entry block.
return true;
}
using ArgCopyElisionMapTy =
DenseMap<const Argument *,
std::pair<const AllocaInst *, const StoreInst *>>;
/// Scan the entry block of the function in FuncInfo for arguments that look
/// like copies into a local alloca. Record any copied arguments in
/// ArgCopyElisionCandidates.
static void
findArgumentCopyElisionCandidates(const DataLayout &DL,
FunctionLoweringInfo *FuncInfo,
ArgCopyElisionMapTy &ArgCopyElisionCandidates) {
// Record the state of every static alloca used in the entry block. Argument
// allocas are all used in the entry block, so we need approximately as many
// entries as we have arguments.
enum StaticAllocaInfo { Unknown, Clobbered, Elidable };
SmallDenseMap<const AllocaInst *, StaticAllocaInfo, 8> StaticAllocas;
unsigned NumArgs = FuncInfo->Fn->arg_size();
StaticAllocas.reserve(NumArgs * 2);
auto GetInfoIfStaticAlloca = [&](const Value *V) -> StaticAllocaInfo * {
if (!V)
return nullptr;
V = V->stripPointerCasts();
const auto *AI = dyn_cast<AllocaInst>(V);
if (!AI || !AI->isStaticAlloca() || !FuncInfo->StaticAllocaMap.count(AI))
return nullptr;
auto Iter = StaticAllocas.insert({AI, Unknown});
return &Iter.first->second;
};
// Look for stores of arguments to static allocas. Look through bitcasts and
// GEPs to handle type coercions, as long as the alloca is fully initialized
// by the store. Any non-store use of an alloca escapes it and any subsequent
// unanalyzed store might write it.
// FIXME: Handle structs initialized with multiple stores.
for (const Instruction &I : FuncInfo->Fn->getEntryBlock()) {
// Look for stores, and handle non-store uses conservatively.
const auto *SI = dyn_cast<StoreInst>(&I);
if (!SI) {
// We will look through cast uses, so ignore them completely.
if (I.isCast())
continue;
// Ignore debug info and pseudo op intrinsics, they don't escape or store
// to allocas.
if (I.isDebugOrPseudoInst())
continue;
// This is an unknown instruction. Assume it escapes or writes to all
// static alloca operands.
for (const Use &U : I.operands()) {
if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(U))
*Info = StaticAllocaInfo::Clobbered;
}
continue;
}
// If the stored value is a static alloca, mark it as escaped.
if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(SI->getValueOperand()))
*Info = StaticAllocaInfo::Clobbered;
// Check if the destination is a static alloca.
const Value *Dst = SI->getPointerOperand()->stripPointerCasts();
StaticAllocaInfo *Info = GetInfoIfStaticAlloca(Dst);
if (!Info)
continue;
const AllocaInst *AI = cast<AllocaInst>(Dst);
// Skip allocas that have been initialized or clobbered.
if (*Info != StaticAllocaInfo::Unknown)
continue;
// Check if the stored value is an argument, and that this store fully
// initializes the alloca.
// If the argument type has padding bits we can't directly forward a pointer
// as the upper bits may contain garbage.
// Don't elide copies from the same argument twice.
const Value *Val = SI->getValueOperand()->stripPointerCasts();
const auto *Arg = dyn_cast<Argument>(Val);
if (!Arg || Arg->hasPassPointeeByValueCopyAttr() ||
Arg->getType()->isEmptyTy() ||
DL.getTypeStoreSize(Arg->getType()) !=
DL.getTypeAllocSize(AI->getAllocatedType()) ||
!DL.typeSizeEqualsStoreSize(Arg->getType()) ||
ArgCopyElisionCandidates.count(Arg)) {
*Info = StaticAllocaInfo::Clobbered;
continue;
}
LLVM_DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI
<< '\n');
// Mark this alloca and store for argument copy elision.
*Info = StaticAllocaInfo::Elidable;
ArgCopyElisionCandidates.insert({Arg, {AI, SI}});
// Stop scanning if we've seen all arguments. This will happen early in -O0
// builds, which is useful, because -O0 builds have large entry blocks and
// many allocas.
if (ArgCopyElisionCandidates.size() == NumArgs)
break;
}
}
/// Try to elide argument copies from memory into a local alloca. Succeeds if
/// ArgVal is a load from a suitable fixed stack object.
static void tryToElideArgumentCopy(
FunctionLoweringInfo &FuncInfo, SmallVectorImpl<SDValue> &Chains,
DenseMap<int, int> &ArgCopyElisionFrameIndexMap,
SmallPtrSetImpl<const Instruction *> &ElidedArgCopyInstrs,
ArgCopyElisionMapTy &ArgCopyElisionCandidates, const Argument &Arg,
SDValue ArgVal, bool &ArgHasUses) {
// Check if this is a load from a fixed stack object.
auto *LNode = dyn_cast<LoadSDNode>(ArgVal);
if (!LNode)
return;
auto *FINode = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode());
if (!FINode)
return;
// Check that the fixed stack object is the right size and alignment.
// Look at the alignment that the user wrote on the alloca instead of looking
// at the stack object.
auto ArgCopyIter = ArgCopyElisionCandidates.find(&Arg);
assert(ArgCopyIter != ArgCopyElisionCandidates.end());
const AllocaInst *AI = ArgCopyIter->second.first;
int FixedIndex = FINode->getIndex();
int &AllocaIndex = FuncInfo.StaticAllocaMap[AI];
int OldIndex = AllocaIndex;
MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
if (MFI.getObjectSize(FixedIndex) != MFI.getObjectSize(OldIndex)) {
LLVM_DEBUG(
dbgs() << " argument copy elision failed due to bad fixed stack "
"object size\n");
return;
}
Align RequiredAlignment = AI->getAlign();
if (MFI.getObjectAlign(FixedIndex) < RequiredAlignment) {
LLVM_DEBUG(dbgs() << " argument copy elision failed: alignment of alloca "
"greater than stack argument alignment ("
<< DebugStr(RequiredAlignment) << " vs "
<< DebugStr(MFI.getObjectAlign(FixedIndex)) << ")\n");
return;
}
// Perform the elision. Delete the old stack object and replace its only use
// in the variable info map. Mark the stack object as mutable.
LLVM_DEBUG({
dbgs() << "Eliding argument copy from " << Arg << " to " << *AI << '\n'
<< " Replacing frame index " << OldIndex << " with " << FixedIndex
<< '\n';
});
MFI.RemoveStackObject(OldIndex);
MFI.setIsImmutableObjectIndex(FixedIndex, false);
AllocaIndex = FixedIndex;
ArgCopyElisionFrameIndexMap.insert({OldIndex, FixedIndex});
Chains.push_back(ArgVal.getValue(1));
// Avoid emitting code for the store implementing the copy.
const StoreInst *SI = ArgCopyIter->second.second;
ElidedArgCopyInstrs.insert(SI);
// Check for uses of the argument again so that we can avoid exporting ArgVal
// if it is't used by anything other than the store.
for (const Value *U : Arg.users()) {
if (U != SI) {
ArgHasUses = true;
break;
}
}
}
void SelectionDAGISel::LowerArguments(const Function &F) {
SelectionDAG &DAG = SDB->DAG;
SDLoc dl = SDB->getCurSDLoc();
const DataLayout &DL = DAG.getDataLayout();
SmallVector<ISD::InputArg, 16> Ins;
// In Naked functions we aren't going to save any registers.
if (F.hasFnAttribute(Attribute::Naked))
return;
if (!FuncInfo->CanLowerReturn) {
// Put in an sret pointer parameter before all the other parameters.
SmallVector<EVT, 1> ValueVTs;
ComputeValueVTs(*TLI, DAG.getDataLayout(),
F.getReturnType()->getPointerTo(
DAG.getDataLayout().getAllocaAddrSpace()),
ValueVTs);
// NOTE: Assuming that a pointer will never break down to more than one VT
// or one register.
ISD::ArgFlagsTy Flags;
Flags.setSRet();
MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]);
ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true,
ISD::InputArg::NoArgIndex, 0);
Ins.push_back(RetArg);
}
// Look for stores of arguments to static allocas. Mark such arguments with a
// flag to ask the target to give us the memory location of that argument if
// available.
ArgCopyElisionMapTy ArgCopyElisionCandidates;
findArgumentCopyElisionCandidates(DL, FuncInfo.get(),
ArgCopyElisionCandidates);
// Set up the incoming argument description vector.
for (const Argument &Arg : F.args()) {
unsigned ArgNo = Arg.getArgNo();
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs);
bool isArgValueUsed = !Arg.use_empty();
unsigned PartBase = 0;
Type *FinalType = Arg.getType();
if (Arg.hasAttribute(Attribute::ByVal))
FinalType = Arg.getParamByValType();
bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters(
FinalType, F.getCallingConv(), F.isVarArg(), DL);
for (unsigned Value = 0, NumValues = ValueVTs.size();
Value != NumValues; ++Value) {
EVT VT = ValueVTs[Value];
Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
ISD::ArgFlagsTy Flags;
if (Arg.getType()->isPointerTy()) {
Flags.setPointer();
Flags.setPointerAddrSpace(
cast<PointerType>(Arg.getType())->getAddressSpace());
}
if (Arg.hasAttribute(Attribute::ZExt))
Flags.setZExt();
if (Arg.hasAttribute(Attribute::SExt))
Flags.setSExt();
if (Arg.hasAttribute(Attribute::InReg)) {
// If we are using vectorcall calling convention, a structure that is
// passed InReg - is surely an HVA
if (F.getCallingConv() == CallingConv::X86_VectorCall &&
isa<StructType>(Arg.getType())) {
// The first value of a structure is marked
if (0 == Value)
Flags.setHvaStart();
Flags.setHva();
}
// Set InReg Flag
Flags.setInReg();
}
if (Arg.hasAttribute(Attribute::StructRet))
Flags.setSRet();
if (Arg.hasAttribute(Attribute::SwiftSelf))
Flags.setSwiftSelf();
if (Arg.hasAttribute(Attribute::SwiftAsync))
Flags.setSwiftAsync();
if (Arg.hasAttribute(Attribute::SwiftError))
Flags.setSwiftError();
if (Arg.hasAttribute(Attribute::ByVal))
Flags.setByVal();
if (Arg.hasAttribute(Attribute::ByRef))
Flags.setByRef();
if (Arg.hasAttribute(Attribute::InAlloca)) {
Flags.setInAlloca();
// Set the byval flag for CCAssignFn callbacks that don't know about
// inalloca. This way we can know how many bytes we should've allocated
// and how many bytes a callee cleanup function will pop. If we port
// inalloca to more targets, we'll have to add custom inalloca handling
// in the various CC lowering callbacks.
Flags.setByVal();
}
if (Arg.hasAttribute(Attribute::Preallocated)) {
Flags.setPreallocated();
// Set the byval flag for CCAssignFn callbacks that don't know about
// preallocated. This way we can know how many bytes we should've
// allocated and how many bytes a callee cleanup function will pop. If
// we port preallocated to more targets, we'll have to add custom
// preallocated handling in the various CC lowering callbacks.
Flags.setByVal();
}
// Certain targets (such as MIPS), may have a different ABI alignment
// for a type depending on the context. Give the target a chance to
// specify the alignment it wants.
const Align OriginalAlignment(
TLI->getABIAlignmentForCallingConv(ArgTy, DL));
Flags.setOrigAlign(OriginalAlignment);
Align MemAlign;
Type *ArgMemTy = nullptr;
if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated() ||
Flags.isByRef()) {
if (!ArgMemTy)
ArgMemTy = Arg.getPointeeInMemoryValueType();
uint64_t MemSize = DL.getTypeAllocSize(ArgMemTy);
// For in-memory arguments, size and alignment should be passed from FE.
// BE will guess if this info is not there but there are cases it cannot
// get right.
if (auto ParamAlign = Arg.getParamStackAlign())
MemAlign = *ParamAlign;
else if ((ParamAlign = Arg.getParamAlign()))
MemAlign = *ParamAlign;
else
MemAlign = Align(TLI->getByValTypeAlignment(ArgMemTy, DL));
if (Flags.isByRef())
Flags.setByRefSize(MemSize);
else
Flags.setByValSize(MemSize);
} else if (auto ParamAlign = Arg.getParamStackAlign()) {
MemAlign = *ParamAlign;
} else {
MemAlign = OriginalAlignment;
}
Flags.setMemAlign(MemAlign);
if (Arg.hasAttribute(Attribute::Nest))
Flags.setNest();
if (NeedsRegBlock)
Flags.setInConsecutiveRegs();
if (ArgCopyElisionCandidates.count(&Arg))
Flags.setCopyElisionCandidate();
if (Arg.hasAttribute(Attribute::Returned))
Flags.setReturned();
MVT RegisterVT = TLI->getRegisterTypeForCallingConv(
*CurDAG->getContext(), F.getCallingConv(), VT);
unsigned NumRegs = TLI->getNumRegistersForCallingConv(
*CurDAG->getContext(), F.getCallingConv(), VT);
for (unsigned i = 0; i != NumRegs; ++i) {
// For scalable vectors, use the minimum size; individual targets
// are responsible for handling scalable vector arguments and
// return values.
ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed,
ArgNo, PartBase+i*RegisterVT.getStoreSize().getKnownMinSize());
if (NumRegs > 1 && i == 0)
MyFlags.Flags.setSplit();
// if it isn't first piece, alignment must be 1
else if (i > 0) {
MyFlags.Flags.setOrigAlign(Align(1));
if (i == NumRegs - 1)
MyFlags.Flags.setSplitEnd();
}
Ins.push_back(MyFlags);
}
if (NeedsRegBlock && Value == NumValues - 1)
Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast();
PartBase += VT.getStoreSize().getKnownMinSize();
}
}
// Call the target to set up the argument values.
SmallVector<SDValue, 8> InVals;
SDValue NewRoot = TLI->LowerFormalArguments(
DAG.getRoot(), F.getCallingConv(), F.isVarArg(), Ins, dl, DAG, InVals);
// Verify that the target's LowerFormalArguments behaved as expected.
assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other &&
"LowerFormalArguments didn't return a valid chain!");
assert(InVals.size() == Ins.size() &&
"LowerFormalArguments didn't emit the correct number of values!");
LLVM_DEBUG({
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
assert(InVals[i].getNode() &&
"LowerFormalArguments emitted a null value!");
assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
"LowerFormalArguments emitted a value with the wrong type!");
}
});
// Update the DAG with the new chain value resulting from argument lowering.
DAG.setRoot(NewRoot);
// Set up the argument values.
unsigned i = 0;
if (!FuncInfo->CanLowerReturn) {
// Create a virtual register for the sret pointer, and put in a copy
// from the sret argument into it.
SmallVector<EVT, 1> ValueVTs;
ComputeValueVTs(*TLI, DAG.getDataLayout(),
F.getReturnType()->getPointerTo(
DAG.getDataLayout().getAllocaAddrSpace()),
ValueVTs);
MVT VT = ValueVTs[0].getSimpleVT();
MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
Optional<ISD::NodeType> AssertOp = None;
SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT,
nullptr, F.getCallingConv(), AssertOp);
MachineFunction& MF = SDB->DAG.getMachineFunction();
MachineRegisterInfo& RegInfo = MF.getRegInfo();
Register SRetReg =
RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT));
FuncInfo->DemoteRegister = SRetReg;
NewRoot =
SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), SRetReg, ArgValue);
DAG.setRoot(NewRoot);
// i indexes lowered arguments. Bump it past the hidden sret argument.
++i;
}
SmallVector<SDValue, 4> Chains;
DenseMap<int, int> ArgCopyElisionFrameIndexMap;
for (const Argument &Arg : F.args()) {
SmallVector<SDValue, 4> ArgValues;
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs);
unsigned NumValues = ValueVTs.size();
if (NumValues == 0)
continue;
bool ArgHasUses = !Arg.use_empty();
// Elide the copying store if the target loaded this argument from a
// suitable fixed stack object.
if (Ins[i].Flags.isCopyElisionCandidate()) {
tryToElideArgumentCopy(*FuncInfo, Chains, ArgCopyElisionFrameIndexMap,
ElidedArgCopyInstrs, ArgCopyElisionCandidates, Arg,
InVals[i], ArgHasUses);
}
// If this argument is unused then remember its value. It is used to generate
// debugging information.
bool isSwiftErrorArg =
TLI->supportSwiftError() &&
Arg.hasAttribute(Attribute::SwiftError);
if (!ArgHasUses && !isSwiftErrorArg) {
SDB->setUnusedArgValue(&Arg, InVals[i]);
// Also remember any frame index for use in FastISel.
if (FrameIndexSDNode *FI =
dyn_cast<FrameIndexSDNode>(InVals[i].getNode()))
FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
}
for (unsigned Val = 0; Val != NumValues; ++Val) {
EVT VT = ValueVTs[Val];
MVT PartVT = TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(),
F.getCallingConv(), VT);
unsigned NumParts = TLI->getNumRegistersForCallingConv(
*CurDAG->getContext(), F.getCallingConv(), VT);
// Even an apparent 'unused' swifterror argument needs to be returned. So
// we do generate a copy for it that can be used on return from the
// function.
if (ArgHasUses || isSwiftErrorArg) {
Optional<ISD::NodeType> AssertOp;
if (Arg.hasAttribute(Attribute::SExt))
AssertOp = ISD::AssertSext;
else if (Arg.hasAttribute(Attribute::ZExt))
AssertOp = ISD::AssertZext;
ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
PartVT, VT, nullptr,
F.getCallingConv(), AssertOp));
}
i += NumParts;
}
// We don't need to do anything else for unused arguments.
if (ArgValues.empty())
continue;
// Note down frame index.
if (FrameIndexSDNode *FI =
dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues),
SDB->getCurSDLoc());
SDB->setValue(&Arg, Res);
if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
// We want to associate the argument with the frame index, among
// involved operands, that correspond to the lowest address. The
// getCopyFromParts function, called earlier, is swapping the order of
// the operands to BUILD_PAIR depending on endianness. The result of
// that swapping is that the least significant bits of the argument will
// be in the first operand of the BUILD_PAIR node, and the most
// significant bits will be in the second operand.
unsigned LowAddressOp = DAG.getDataLayout().isBigEndian() ? 1 : 0;
if (LoadSDNode *LNode =
dyn_cast<LoadSDNode>(Res.getOperand(LowAddressOp).getNode()))
if (FrameIndexSDNode *FI =
dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
}
// Analyses past this point are naive and don't expect an assertion.
if (Res.getOpcode() == ISD::AssertZext)
Res = Res.getOperand(0);
// Update the SwiftErrorVRegDefMap.
if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) {
unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
if (Register::isVirtualRegister(Reg))
SwiftError->setCurrentVReg(FuncInfo->MBB, SwiftError->getFunctionArg(),
Reg);
}
// If this argument is live outside of the entry block, insert a copy from
// wherever we got it to the vreg that other BB's will reference it as.
if (Res.getOpcode() == ISD::CopyFromReg) {
// If we can, though, try to skip creating an unnecessary vreg.
// FIXME: This isn't very clean... it would be nice to make this more
// general.
unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
if (Register::isVirtualRegister(Reg)) {
FuncInfo->ValueMap[&Arg] = Reg;
continue;
}
}
if (!isOnlyUsedInEntryBlock(&Arg, TM.Options.EnableFastISel)) {
FuncInfo->InitializeRegForValue(&Arg);
SDB->CopyToExportRegsIfNeeded(&Arg);
}
}
if (!Chains.empty()) {
Chains.push_back(NewRoot);
NewRoot = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
}
DAG.setRoot(NewRoot);
assert(i == InVals.size() && "Argument register count mismatch!");
// If any argument copy elisions occurred and we have debug info, update the
// stale frame indices used in the dbg.declare variable info table.
MachineFunction::VariableDbgInfoMapTy &DbgDeclareInfo = MF->getVariableDbgInfo();
if (!DbgDeclareInfo.empty() && !ArgCopyElisionFrameIndexMap.empty()) {
for (MachineFunction::VariableDbgInfo &VI : DbgDeclareInfo) {
auto I = ArgCopyElisionFrameIndexMap.find(VI.Slot);
if (I != ArgCopyElisionFrameIndexMap.end())
VI.Slot = I->second;
}
}
// Finally, if the target has anything special to do, allow it to do so.
emitFunctionEntryCode();
}
/// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to
/// ensure constants are generated when needed. Remember the virtual registers
/// that need to be added to the Machine PHI nodes as input. We cannot just
/// directly add them, because expansion might result in multiple MBB's for one
/// BB. As such, the start of the BB might correspond to a different MBB than
/// the end.
void
SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const Instruction *TI = LLVMBB->getTerminator();
SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
// Check PHI nodes in successors that expect a value to be available from this
// block.
for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
const BasicBlock *SuccBB = TI->getSuccessor(succ);
if (!isa<PHINode>(SuccBB->begin())) continue;
MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
// If this terminator has multiple identical successors (common for
// switches), only handle each succ once.
if (!SuccsHandled.insert(SuccMBB).second)
continue;
MachineBasicBlock::iterator MBBI = SuccMBB->begin();
// At this point we know that there is a 1-1 correspondence between LLVM PHI
// nodes and Machine PHI nodes, but the incoming operands have not been
// emitted yet.
for (const PHINode &PN : SuccBB->phis()) {
// Ignore dead phi's.
if (PN.use_empty())
continue;
// Skip empty types
if (PN.getType()->isEmptyTy())
continue;
unsigned Reg;
const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB);
if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
unsigned &RegOut = ConstantsOut[C];
if (RegOut == 0) {
RegOut = FuncInfo.CreateRegs(C);
// We need to zero/sign extend ConstantInt phi operands to match
// assumptions in FunctionLoweringInfo::ComputePHILiveOutRegInfo.
ISD::NodeType ExtendType = ISD::ANY_EXTEND;
if (auto *CI = dyn_cast<ConstantInt>(C))
ExtendType = TLI.signExtendConstant(CI) ? ISD::SIGN_EXTEND
: ISD::ZERO_EXTEND;
CopyValueToVirtualRegister(C, RegOut, ExtendType);
}
Reg = RegOut;
} else {
DenseMap<const Value *, Register>::iterator I =
FuncInfo.ValueMap.find(PHIOp);
if (I != FuncInfo.ValueMap.end())
Reg = I->second;
else {
assert(isa<AllocaInst>(PHIOp) &&
FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
"Didn't codegen value into a register!??");
Reg = FuncInfo.CreateRegs(PHIOp);
CopyValueToVirtualRegister(PHIOp, Reg);
}
}
// Remember that this register needs to added to the machine PHI node as
// the input for this MBB.
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(TLI, DAG.getDataLayout(), PN.getType(), ValueVTs);
for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
EVT VT = ValueVTs[vti];
unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
for (unsigned i = 0, e = NumRegisters; i != e; ++i)
FuncInfo.PHINodesToUpdate.push_back(
std::make_pair(&*MBBI++, Reg + i));
Reg += NumRegisters;
}
}
}
ConstantsOut.clear();
}
MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) {
MachineFunction::iterator I(MBB);
if (++I == FuncInfo.MF->end())
return nullptr;
return &*I;
}
/// During lowering new call nodes can be created (such as memset, etc.).
/// Those will become new roots of the current DAG, but complications arise
/// when they are tail calls. In such cases, the call lowering will update
/// the root, but the builder still needs to know that a tail call has been
/// lowered in order to avoid generating an additional return.
void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) {
// If the node is null, we do have a tail call.
if (MaybeTC.getNode() != nullptr)
DAG.setRoot(MaybeTC);
else
HasTailCall = true;
}
void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
MachineBasicBlock *SwitchMBB,
MachineBasicBlock *DefaultMBB) {
MachineFunction *CurMF = FuncInfo.MF;
MachineBasicBlock *NextMBB = nullptr;
MachineFunction::iterator BBI(W.MBB);
if (++BBI != FuncInfo.MF->end())
NextMBB = &*BBI;
unsigned Size = W.LastCluster - W.FirstCluster + 1;
BranchProbabilityInfo *BPI = FuncInfo.BPI;
if (Size == 2 && W.MBB == SwitchMBB) {
// If any two of the cases has the same destination, and if one value
// is the same as the other, but has one bit unset that the other has set,
// use bit manipulation to do two compares at once. For example:
// "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
// TODO: This could be extended to merge any 2 cases in switches with 3
// cases.
// TODO: Handle cases where W.CaseBB != SwitchBB.
CaseCluster &Small = *W.FirstCluster;
CaseCluster &Big = *W.LastCluster;
if (Small.Low == Small.High && Big.Low == Big.High &&
Small.MBB == Big.MBB) {
const APInt &SmallValue = Small.Low->getValue();
const APInt &BigValue = Big.Low->getValue();
// Check that there is only one bit different.
APInt CommonBit = BigValue ^ SmallValue;
if (CommonBit.isPowerOf2()) {
SDValue CondLHS = getValue(Cond);
EVT VT = CondLHS.getValueType();
SDLoc DL = getCurSDLoc();
SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
DAG.getConstant(CommonBit, DL, VT));
SDValue Cond = DAG.getSetCC(
DL, MVT::i1, Or, DAG.getConstant(BigValue | SmallValue, DL, VT),
ISD::SETEQ);
// Update successor info.
// Both Small and Big will jump to Small.BB, so we sum up the
// probabilities.
addSuccessorWithProb(SwitchMBB, Small.MBB, Small.Prob + Big.Prob);
if (BPI)
addSuccessorWithProb(
SwitchMBB, DefaultMBB,
// The default destination is the first successor in IR.
BPI->getEdgeProbability(SwitchMBB->getBasicBlock(), (unsigned)0));
else
addSuccessorWithProb(SwitchMBB, DefaultMBB);
// Insert the true branch.
SDValue BrCond =
DAG.getNode(ISD::BRCOND, DL, MVT::Other, getControlRoot(), Cond,
DAG.getBasicBlock(Small.MBB));
// Insert the false branch.
BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
DAG.getBasicBlock(DefaultMBB));
DAG.setRoot(BrCond);
return;
}
}
}
if (TM.getOptLevel() != CodeGenOpt::None) {
// Here, we order cases by probability so the most likely case will be
// checked first. However, two clusters can have the same probability in
// which case their relative ordering is non-deterministic. So we use Low
// as a tie-breaker as clusters are guaranteed to never overlap.
llvm::sort(W.FirstCluster, W.LastCluster + 1,
[](const CaseCluster &a, const CaseCluster &b) {
return a.Prob != b.Prob ?
a.Prob > b.Prob :
a.Low->getValue().slt(b.Low->getValue());
});
// Rearrange the case blocks so that the last one falls through if possible
// without changing the order of probabilities.
for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) {
--I;
if (I->Prob > W.LastCluster->Prob)
break;
if (I->Kind == CC_Range && I->MBB == NextMBB) {
std::swap(*I, *W.LastCluster);
break;
}
}
}
// Compute total probability.
BranchProbability DefaultProb = W.DefaultProb;
BranchProbability UnhandledProbs = DefaultProb;
for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I)
UnhandledProbs += I->Prob;
MachineBasicBlock *CurMBB = W.MBB;
for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) {
bool FallthroughUnreachable = false;
MachineBasicBlock *Fallthrough;
if (I == W.LastCluster) {
// For the last cluster, fall through to the default destination.
Fallthrough = DefaultMBB;
FallthroughUnreachable = isa<UnreachableInst>(
DefaultMBB->getBasicBlock()->getFirstNonPHIOrDbg());
} else {
Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock());
CurMF->insert(BBI, Fallthrough);
// Put Cond in a virtual register to make it available from the new blocks.
ExportFromCurrentBlock(Cond);
}
UnhandledProbs -= I->Prob;
switch (I->Kind) {
case CC_JumpTable: {
// FIXME: Optimize away range check based on pivot comparisons.
JumpTableHeader *JTH = &SL->JTCases[I->JTCasesIndex].first;
SwitchCG::JumpTable *JT = &SL->JTCases[I->JTCasesIndex].second;
// The jump block hasn't been inserted yet; insert it here.
MachineBasicBlock *JumpMBB = JT->MBB;
CurMF->insert(BBI, JumpMBB);
auto JumpProb = I->Prob;
auto FallthroughProb = UnhandledProbs;
// If the default statement is a target of the jump table, we evenly
// distribute the default probability to successors of CurMBB. Also
// update the probability on the edge from JumpMBB to Fallthrough.
for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(),
SE = JumpMBB->succ_end();
SI != SE; ++SI) {
if (*SI == DefaultMBB) {
JumpProb += DefaultProb / 2;
FallthroughProb -= DefaultProb / 2;
JumpMBB->setSuccProbability(SI, DefaultProb / 2);
JumpMBB->normalizeSuccProbs();
break;
}
}
if (FallthroughUnreachable)
JTH->FallthroughUnreachable = true;
if (!JTH->FallthroughUnreachable)
addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);
CurMBB->normalizeSuccProbs();
// The jump table header will be inserted in our current block, do the
// range check, and fall through to our fallthrough block.
JTH->HeaderBB = CurMBB;
JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader.
// If we're in the right place, emit the jump table header right now.
if (CurMBB == SwitchMBB) {
visitJumpTableHeader(*JT, *JTH, SwitchMBB);
JTH->Emitted = true;
}
break;
}
case CC_BitTests: {
// FIXME: Optimize away range check based on pivot comparisons.
BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex];
// The bit test blocks haven't been inserted yet; insert them here.
for (BitTestCase &BTC : BTB->Cases)
CurMF->insert(BBI, BTC.ThisBB);
// Fill in fields of the BitTestBlock.
BTB->Parent = CurMBB;
BTB->Default = Fallthrough;
BTB->DefaultProb = UnhandledProbs;
// If the cases in bit test don't form a contiguous range, we evenly
// distribute the probability on the edge to Fallthrough to two
// successors of CurMBB.
if (!BTB->ContiguousRange) {
BTB->Prob += DefaultProb / 2;
BTB->DefaultProb -= DefaultProb / 2;
}
if (FallthroughUnreachable)
BTB->FallthroughUnreachable = true;
// If we're in the right place, emit the bit test header right now.
if (CurMBB == SwitchMBB) {
visitBitTestHeader(*BTB, SwitchMBB);
BTB->Emitted = true;
}
break;
}
case CC_Range: {
const Value *RHS, *LHS, *MHS;
ISD::CondCode CC;
if (I->Low == I->High) {
// Check Cond == I->Low.
CC = ISD::SETEQ;
LHS = Cond;
RHS=I->Low;
MHS = nullptr;
} else {
// Check I->Low <= Cond <= I->High.
CC = ISD::SETLE;
LHS = I->Low;
MHS = Cond;
RHS = I->High;
}
// If Fallthrough is unreachable, fold away the comparison.
if (FallthroughUnreachable)
CC = ISD::SETTRUE;
// The false probability is the sum of all unhandled cases.
CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB,
getCurSDLoc(), I->Prob, UnhandledProbs);
if (CurMBB == SwitchMBB)
visitSwitchCase(CB, SwitchMBB);
else
SL->SwitchCases.push_back(CB);
break;
}
}
CurMBB = Fallthrough;
}
}
unsigned SelectionDAGBuilder::caseClusterRank(const CaseCluster &CC,
CaseClusterIt First,
CaseClusterIt Last) {
return std::count_if(First, Last + 1, [&](const CaseCluster &X) {
if (X.Prob != CC.Prob)
return X.Prob > CC.Prob;
// Ties are broken by comparing the case value.
return X.Low->getValue().slt(CC.Low->getValue());
});
}
void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
const SwitchWorkListItem &W,
Value *Cond,
MachineBasicBlock *SwitchMBB) {
assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) &&
"Clusters not sorted?");
assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!");
// Balance the tree based on branch probabilities to create a near-optimal (in
// terms of search time given key frequency) binary search tree. See e.g. Kurt
// Mehlhorn "Nearly Optimal Binary Search Trees" (1975).
CaseClusterIt LastLeft = W.FirstCluster;
CaseClusterIt FirstRight = W.LastCluster;
auto LeftProb = LastLeft->Prob + W.DefaultProb / 2;
auto RightProb = FirstRight->Prob + W.DefaultProb / 2;
// Move LastLeft and FirstRight towards each other from opposite directions to
// find a partitioning of the clusters which balances the probability on both
// sides. If LeftProb and RightProb are equal, alternate which side is
// taken to ensure 0-probability nodes are distributed evenly.
unsigned I = 0;
while (LastLeft + 1 < FirstRight) {
if (LeftProb < RightProb || (LeftProb == RightProb && (I & 1)))
LeftProb += (++LastLeft)->Prob;
else
RightProb += (--FirstRight)->Prob;
I++;
}
while (true) {
// Our binary search tree differs from a typical BST in that ours can have up
// to three values in each leaf. The pivot selection above doesn't take that
// into account, which means the tree might require more nodes and be less
// efficient. We compensate for this here.
unsigned NumLeft = LastLeft - W.FirstCluster + 1;
unsigned NumRight = W.LastCluster - FirstRight + 1;
if (std::min(NumLeft, NumRight) < 3 && std::max(NumLeft, NumRight) > 3) {
// If one side has less than 3 clusters, and the other has more than 3,
// consider taking a cluster from the other side.
if (NumLeft < NumRight) {
// Consider moving the first cluster on the right to the left side.
CaseCluster &CC = *FirstRight;
unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster);
unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft);
if (LeftSideRank <= RightSideRank) {
// Moving the cluster to the left does not demote it.
++LastLeft;
++FirstRight;
continue;
}
} else {
assert(NumRight < NumLeft);
// Consider moving the last element on the left to the right side.
CaseCluster &CC = *LastLeft;
unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft);
unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster);
if (RightSideRank <= LeftSideRank) {
// Moving the cluster to the right does not demot it.
--LastLeft;
--FirstRight;
continue;
}
}
}
break;
}
assert(LastLeft + 1 == FirstRight);
assert(LastLeft >= W.FirstCluster);
assert(FirstRight <= W.LastCluster);
// Use the first element on the right as pivot since we will make less-than
// comparisons against it.
CaseClusterIt PivotCluster = FirstRight;
assert(PivotCluster > W.FirstCluster);
assert(PivotCluster <= W.LastCluster);
CaseClusterIt FirstLeft = W.FirstCluster;
CaseClusterIt LastRight = W.LastCluster;
const ConstantInt *Pivot = PivotCluster->Low;
// New blocks will be inserted immediately after the current one.
MachineFunction::iterator BBI(W.MBB);
++BBI;
// We will branch to the LHS if Value < Pivot. If LHS is a single cluster,
// we can branch to its destination directly if it's squeezed exactly in
// between the known lower bound and Pivot - 1.
MachineBasicBlock *LeftMBB;
if (FirstLeft == LastLeft && FirstLeft->Kind == CC_Range &&
FirstLeft->Low == W.GE &&
(FirstLeft->High->getValue() + 1LL) == Pivot->getValue()) {
LeftMBB = FirstLeft->MBB;
} else {
LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
FuncInfo.MF->insert(BBI, LeftMBB);
WorkList.push_back(
{LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultProb / 2});
// Put Cond in a virtual register to make it available from the new blocks.
ExportFromCurrentBlock(Cond);
}
// Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a
// single cluster, RHS.Low == Pivot, and we can branch to its destination
// directly if RHS.High equals the current upper bound.
MachineBasicBlock *RightMBB;
if (FirstRight == LastRight && FirstRight->Kind == CC_Range &&
W.LT && (FirstRight->High->getValue() + 1ULL) == W.LT->getValue()) {
RightMBB = FirstRight->MBB;
} else {
RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
FuncInfo.MF->insert(BBI, RightMBB);
WorkList.push_back(
{RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultProb / 2});
// Put Cond in a virtual register to make it available from the new blocks.
ExportFromCurrentBlock(Cond);
}
// Create the CaseBlock record that will be used to lower the branch.
CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB,
getCurSDLoc(), LeftProb, RightProb);
if (W.MBB == SwitchMBB)
visitSwitchCase(CB, SwitchMBB);
else
SL->SwitchCases.push_back(CB);
}
// Scale CaseProb after peeling a case with the probablity of PeeledCaseProb
// from the swith statement.
static BranchProbability scaleCaseProbality(BranchProbability CaseProb,
BranchProbability PeeledCaseProb) {
if (PeeledCaseProb == BranchProbability::getOne())
return BranchProbability::getZero();
BranchProbability SwitchProb = PeeledCaseProb.getCompl();
uint32_t Numerator = CaseProb.getNumerator();
uint32_t Denominator = SwitchProb.scale(CaseProb.getDenominator());
return BranchProbability(Numerator, std::max(Numerator, Denominator));
}
// Try to peel the top probability case if it exceeds the threshold.
// Return current MachineBasicBlock for the switch statement if the peeling
// does not occur.
// If the peeling is performed, return the newly created MachineBasicBlock
// for the peeled switch statement. Also update Clusters to remove the peeled
// case. PeeledCaseProb is the BranchProbability for the peeled case.
MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster(
const SwitchInst &SI, CaseClusterVector &Clusters,
BranchProbability &PeeledCaseProb) {
MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
// Don't perform if there is only one cluster or optimizing for size.
if (SwitchPeelThreshold > 100 || !FuncInfo.BPI || Clusters.size() < 2 ||
TM.getOptLevel() == CodeGenOpt::None ||
SwitchMBB->getParent()->getFunction().hasMinSize())
return SwitchMBB;
BranchProbability TopCaseProb = BranchProbability(SwitchPeelThreshold, 100);
unsigned PeeledCaseIndex = 0;
bool SwitchPeeled = false;
for (unsigned Index = 0; Index < Clusters.size(); ++Index) {
CaseCluster &CC = Clusters[Index];
if (CC.Prob < TopCaseProb)
continue;
TopCaseProb = CC.Prob;
PeeledCaseIndex = Index;
SwitchPeeled = true;
}
if (!SwitchPeeled)
return SwitchMBB;
LLVM_DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: "
<< TopCaseProb << "\n");
// Record the MBB for the peeled switch statement.
MachineFunction::iterator BBI(SwitchMBB);
++BBI;
MachineBasicBlock *PeeledSwitchMBB =
FuncInfo.MF->CreateMachineBasicBlock(SwitchMBB->getBasicBlock());
FuncInfo.MF->insert(BBI, PeeledSwitchMBB);
ExportFromCurrentBlock(SI.getCondition());
auto PeeledCaseIt = Clusters.begin() + PeeledCaseIndex;
SwitchWorkListItem W = {SwitchMBB, PeeledCaseIt, PeeledCaseIt,
nullptr, nullptr, TopCaseProb.getCompl()};
lowerWorkItem(W, SI.getCondition(), SwitchMBB, PeeledSwitchMBB);
Clusters.erase(PeeledCaseIt);
for (CaseCluster &CC : Clusters) {
LLVM_DEBUG(
dbgs() << "Scale the probablity for one cluster, before scaling: "
<< CC.Prob << "\n");
CC.Prob = scaleCaseProbality(CC.Prob, TopCaseProb);
LLVM_DEBUG(dbgs() << "After scaling: " << CC.Prob << "\n");
}
PeeledCaseProb = TopCaseProb;
return PeeledSwitchMBB;
}
void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
// Extract cases from the switch.
BranchProbabilityInfo *BPI = FuncInfo.BPI;
CaseClusterVector Clusters;
Clusters.reserve(SI.getNumCases());
for (auto I : SI.cases()) {
MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()];
const ConstantInt *CaseVal = I.getCaseValue();
BranchProbability Prob =
BPI ? BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex())
: BranchProbability(1, SI.getNumCases() + 1);
Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob));
}
MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()];
// Cluster adjacent cases with the same destination. We do this at all
// optimization levels because it's cheap to do and will make codegen faster
// if there are many clusters.
sortAndRangeify(Clusters);
// The branch probablity of the peeled case.
BranchProbability PeeledCaseProb = BranchProbability::getZero();
MachineBasicBlock *PeeledSwitchMBB =
peelDominantCaseCluster(SI, Clusters, PeeledCaseProb);
// If there is only the default destination, jump there directly.
MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
if (Clusters.empty()) {
assert(PeeledSwitchMBB == SwitchMBB);
SwitchMBB->addSuccessor(DefaultMBB);
if (DefaultMBB != NextBlock(SwitchMBB)) {
DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
getControlRoot(), DAG.getBasicBlock(DefaultMBB)));
}
return;
}
SL->findJumpTables(Clusters, &SI, DefaultMBB, DAG.getPSI(), DAG.getBFI());
SL->findBitTestClusters(Clusters, &SI);
LLVM_DEBUG({
dbgs() << "Case clusters: ";
for (const CaseCluster &C : Clusters) {
if (C.Kind == CC_JumpTable)
dbgs() << "JT:";
if (C.Kind == CC_BitTests)
dbgs() << "BT:";
C.Low->getValue().print(dbgs(), true);
if (C.Low != C.High) {
dbgs() << '-';
C.High->getValue().print(dbgs(), true);
}
dbgs() << ' ';
}
dbgs() << '\n';
});
assert(!Clusters.empty());
SwitchWorkList WorkList;
CaseClusterIt First = Clusters.begin();
CaseClusterIt Last = Clusters.end() - 1;
auto DefaultProb = getEdgeProbability(PeeledSwitchMBB, DefaultMBB);
// Scale the branchprobability for DefaultMBB if the peel occurs and
// DefaultMBB is not replaced.
if (PeeledCaseProb != BranchProbability::getZero() &&
DefaultMBB == FuncInfo.MBBMap[SI.getDefaultDest()])
DefaultProb = scaleCaseProbality(DefaultProb, PeeledCaseProb);
WorkList.push_back(
{PeeledSwitchMBB, First, Last, nullptr, nullptr, DefaultProb});
while (!WorkList.empty()) {
SwitchWorkListItem W = WorkList.pop_back_val();
unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;
if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None &&
!DefaultMBB->getParent()->getFunction().hasMinSize()) {
// For optimized builds, lower large range as a balanced binary tree.
splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB);
continue;
}
lowerWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB);
}
}
void SelectionDAGBuilder::visitStepVector(const CallInst &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
auto DL = getCurSDLoc();
EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
setValue(&I, DAG.getStepVector(DL, ResultVT));
}
void SelectionDAGBuilder::visitVectorReverse(const CallInst &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
SDLoc DL = getCurSDLoc();
SDValue V = getValue(I.getOperand(0));
assert(VT == V.getValueType() && "Malformed vector.reverse!");
if (VT.isScalableVector()) {
setValue(&I, DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V));
return;
}
// Use VECTOR_SHUFFLE for the fixed-length vector
// to maintain existing behavior.
SmallVector<int, 8> Mask;
unsigned NumElts = VT.getVectorMinNumElements();
for (unsigned i = 0; i != NumElts; ++i)
Mask.push_back(NumElts - 1 - i);
setValue(&I, DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), Mask));
}
void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(),
ValueVTs);
unsigned NumValues = ValueVTs.size();
if (NumValues == 0) return;
SmallVector<SDValue, 4> Values(NumValues);
SDValue Op = getValue(I.getOperand(0));
for (unsigned i = 0; i != NumValues; ++i)
Values[i] = DAG.getNode(ISD::FREEZE, getCurSDLoc(), ValueVTs[i],
SDValue(Op.getNode(), Op.getResNo() + i));
setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
DAG.getVTList(ValueVTs), Values));
}
void SelectionDAGBuilder::visitVectorSplice(const CallInst &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
SDLoc DL = getCurSDLoc();
SDValue V1 = getValue(I.getOperand(0));
SDValue V2 = getValue(I.getOperand(1));
int64_t Imm = cast<ConstantInt>(I.getOperand(2))->getSExtValue();
// VECTOR_SHUFFLE doesn't support a scalable mask so use a dedicated node.
if (VT.isScalableVector()) {
MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
setValue(&I, DAG.getNode(ISD::VECTOR_SPLICE, DL, VT, V1, V2,
DAG.getConstant(Imm, DL, IdxVT)));
return;
}
unsigned NumElts = VT.getVectorNumElements();
uint64_t Idx = (NumElts + Imm) % NumElts;
// Use VECTOR_SHUFFLE to maintain original behaviour for fixed-length vectors.
SmallVector<int, 8> Mask;
for (unsigned i = 0; i < NumElts; ++i)
Mask.push_back(Idx + i);
setValue(&I, DAG.getVectorShuffle(VT, DL, V1, V2, Mask));
}
diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp
index 8dc8d381ad16..a63118067139 100644
--- a/llvm/lib/CodeGen/TypePromotion.cpp
+++ b/llvm/lib/CodeGen/TypePromotion.cpp
@@ -1,954 +1,960 @@
//===----- TypePromotion.cpp ----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// This is an opcode based type promotion pass for small types that would
/// otherwise be promoted during legalisation. This works around the limitations
/// of selection dag for cyclic regions. The search begins from icmp
/// instructions operands where a tree, consisting of non-wrapping or safe
/// wrapping instructions, is built, checked and promoted if possible.
///
//===----------------------------------------------------------------------===//
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "type-promotion"
#define PASS_NAME "Type Promotion"
using namespace llvm;
static cl::opt<bool> DisablePromotion("disable-type-promotion", cl::Hidden,
cl::init(false),
cl::desc("Disable type promotion pass"));
// The goal of this pass is to enable more efficient code generation for
// operations on narrow types (i.e. types with < 32-bits) and this is a
// motivating IR code example:
//
// define hidden i32 @cmp(i8 zeroext) {
// %2 = add i8 %0, -49
// %3 = icmp ult i8 %2, 3
// ..
// }
//
// The issue here is that i8 is type-legalized to i32 because i8 is not a
// legal type. Thus, arithmetic is done in integer-precision, but then the
// byte value is masked out as follows:
//
// t19: i32 = add t4, Constant:i32<-49>
// t24: i32 = and t19, Constant:i32<255>
//
// Consequently, we generate code like this:
//
// subs r0, #49
// uxtb r1, r0
// cmp r1, #3
//
// This shows that masking out the byte value results in generation of
// the UXTB instruction. This is not optimal as r0 already contains the byte
// value we need, and so instead we can just generate:
//
// sub.w r1, r0, #49
// cmp r1, #3
//
// We achieve this by type promoting the IR to i32 like so for this example:
//
// define i32 @cmp(i8 zeroext %c) {
// %0 = zext i8 %c to i32
// %c.off = add i32 %0, -49
// %1 = icmp ult i32 %c.off, 3
// ..
// }
//
// For this to be valid and legal, we need to prove that the i32 add is
// producing the same value as the i8 addition, and that e.g. no overflow
// happens.
//
// A brief sketch of the algorithm and some terminology.
// We pattern match interesting IR patterns:
// - which have "sources": instructions producing narrow values (i8, i16), and
// - they have "sinks": instructions consuming these narrow values.
//
// We collect all instruction connecting sources and sinks in a worklist, so
// that we can mutate these instruction and perform type promotion when it is
// legal to do so.
namespace {
class IRPromoter {
LLVMContext &Ctx;
unsigned PromotedWidth = 0;
SetVector<Value *> &Visited;
SetVector<Value *> &Sources;
SetVector<Instruction *> &Sinks;
SmallPtrSetImpl<Instruction *> &SafeWrap;
IntegerType *ExtTy = nullptr;
SmallPtrSet<Value *, 8> NewInsts;
SmallPtrSet<Instruction *, 4> InstsToRemove;
DenseMap<Value *, SmallVector<Type *, 4>> TruncTysMap;
SmallPtrSet<Value *, 8> Promoted;
void ReplaceAllUsersOfWith(Value *From, Value *To);
void ExtendSources();
void ConvertTruncs();
void PromoteTree();
void TruncateSinks();
void Cleanup();
public:
IRPromoter(LLVMContext &C, unsigned Width,
SetVector<Value *> &visited, SetVector<Value *> &sources,
SetVector<Instruction *> &sinks,
SmallPtrSetImpl<Instruction *> &wrap)
: Ctx(C), PromotedWidth(Width), Visited(visited),
Sources(sources), Sinks(sinks), SafeWrap(wrap) {
ExtTy = IntegerType::get(Ctx, PromotedWidth);
}
void Mutate();
};
class TypePromotion : public FunctionPass {
unsigned TypeSize = 0;
LLVMContext *Ctx = nullptr;
unsigned RegisterBitWidth = 0;
SmallPtrSet<Value *, 16> AllVisited;
SmallPtrSet<Instruction *, 8> SafeToPromote;
SmallPtrSet<Instruction *, 4> SafeWrap;
// Does V have the same size result type as TypeSize.
bool EqualTypeSize(Value *V);
// Does V have the same size, or narrower, result type as TypeSize.
bool LessOrEqualTypeSize(Value *V);
// Does V have a result type that is wider than TypeSize.
bool GreaterThanTypeSize(Value *V);
// Does V have a result type that is narrower than TypeSize.
bool LessThanTypeSize(Value *V);
// Should V be a leaf in the promote tree?
bool isSource(Value *V);
// Should V be a root in the promotion tree?
bool isSink(Value *V);
// Should we change the result type of V? It will result in the users of V
// being visited.
bool shouldPromote(Value *V);
// Is I an add or a sub, which isn't marked as nuw, but where a wrapping
// result won't affect the computation?
bool isSafeWrap(Instruction *I);
// Can V have its integer type promoted, or can the type be ignored.
bool isSupportedType(Value *V);
// Is V an instruction with a supported opcode or another value that we can
// handle, such as constants and basic blocks.
bool isSupportedValue(Value *V);
// Is V an instruction thats result can trivially promoted, or has safe
// wrapping.
bool isLegalToPromote(Value *V);
bool TryToPromote(Value *V, unsigned PromotedWidth);
public:
static char ID;
TypePromotion() : FunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<TargetPassConfig>();
AU.setPreservesCFG();
}
StringRef getPassName() const override { return PASS_NAME; }
bool runOnFunction(Function &F) override;
};
} // namespace
static bool GenerateSignBits(Instruction *I) {
unsigned Opc = I->getOpcode();
return Opc == Instruction::AShr || Opc == Instruction::SDiv ||
Opc == Instruction::SRem || Opc == Instruction::SExt;
}
bool TypePromotion::EqualTypeSize(Value *V) {
return V->getType()->getScalarSizeInBits() == TypeSize;
}
bool TypePromotion::LessOrEqualTypeSize(Value *V) {
return V->getType()->getScalarSizeInBits() <= TypeSize;
}
bool TypePromotion::GreaterThanTypeSize(Value *V) {
return V->getType()->getScalarSizeInBits() > TypeSize;
}
bool TypePromotion::LessThanTypeSize(Value *V) {
return V->getType()->getScalarSizeInBits() < TypeSize;
}
/// Return true if the given value is a source in the use-def chain, producing
/// a narrow 'TypeSize' value. These values will be zext to start the promotion
/// of the tree to i32. We guarantee that these won't populate the upper bits
/// of the register. ZExt on the loads will be free, and the same for call
/// return values because we only accept ones that guarantee a zeroext ret val.
/// Many arguments will have the zeroext attribute too, so those would be free
/// too.
bool TypePromotion::isSource(Value *V) {
if (!isa<IntegerType>(V->getType()))
return false;
// TODO Allow zext to be sources.
if (isa<Argument>(V))
return true;
else if (isa<LoadInst>(V))
return true;
else if (isa<BitCastInst>(V))
return true;
else if (auto *Call = dyn_cast<CallInst>(V))
return Call->hasRetAttr(Attribute::AttrKind::ZExt);
else if (auto *Trunc = dyn_cast<TruncInst>(V))
return EqualTypeSize(Trunc);
return false;
}
/// Return true if V will require any promoted values to be truncated for the
/// the IR to remain valid. We can't mutate the value type of these
/// instructions.
bool TypePromotion::isSink(Value *V) {
// TODO The truncate also isn't actually necessary because we would already
// proved that the data value is kept within the range of the original data
// type. We currently remove any truncs inserted for handling zext sinks.
// Sinks are:
// - points where the value in the register is being observed, such as an
// icmp, switch or store.
// - points where value types have to match, such as calls and returns.
// - zext are included to ease the transformation and are generally removed
// later on.
if (auto *Store = dyn_cast<StoreInst>(V))
return LessOrEqualTypeSize(Store->getValueOperand());
if (auto *Return = dyn_cast<ReturnInst>(V))
return LessOrEqualTypeSize(Return->getReturnValue());
if (auto *ZExt = dyn_cast<ZExtInst>(V))
return GreaterThanTypeSize(ZExt);
if (auto *Switch = dyn_cast<SwitchInst>(V))
return LessThanTypeSize(Switch->getCondition());
if (auto *ICmp = dyn_cast<ICmpInst>(V))
return ICmp->isSigned() || LessThanTypeSize(ICmp->getOperand(0));
return isa<CallInst>(V);
}
/// Return whether this instruction can safely wrap.
bool TypePromotion::isSafeWrap(Instruction *I) {
// We can support a potentially wrapping instruction (I) if:
// - It is only used by an unsigned icmp.
// - The icmp uses a constant.
// - The wrapping value (I) is decreasing, i.e would underflow - wrapping
// around zero to become a larger number than before.
// - The wrapping instruction (I) also uses a constant.
//
// We can then use the two constants to calculate whether the result would
// wrap in respect to itself in the original bitwidth. If it doesn't wrap,
// just underflows the range, the icmp would give the same result whether the
// result has been truncated or not. We calculate this by:
// - Zero extending both constants, if needed, to RegisterBitWidth.
// - Take the absolute value of I's constant, adding this to the icmp const.
// - Check that this value is not out of range for small type. If it is, it
// means that it has underflowed enough to wrap around the icmp constant.
//
// For example:
//
// %sub = sub i8 %a, 2
// %cmp = icmp ule i8 %sub, 254
//
// If %a = 0, %sub = -2 == FE == 254
// But if this is evalulated as a i32
// %sub = -2 == FF FF FF FE == 4294967294
// So the unsigned compares (i8 and i32) would not yield the same result.
//
// Another way to look at it is:
// %a - 2 <= 254
// %a + 2 <= 254 + 2
// %a <= 256
// And we can't represent 256 in the i8 format, so we don't support it.
//
// Whereas:
//
// %sub i8 %a, 1
// %cmp = icmp ule i8 %sub, 254
//
// If %a = 0, %sub = -1 == FF == 255
// As i32:
// %sub = -1 == FF FF FF FF == 4294967295
//
// In this case, the unsigned compare results would be the same and this
// would also be true for ult, uge and ugt:
// - (255 < 254) == (0xFFFFFFFF < 254) == false
// - (255 <= 254) == (0xFFFFFFFF <= 254) == false
// - (255 > 254) == (0xFFFFFFFF > 254) == true
// - (255 >= 254) == (0xFFFFFFFF >= 254) == true
//
// To demonstrate why we can't handle increasing values:
//
// %add = add i8 %a, 2
// %cmp = icmp ult i8 %add, 127
//
// If %a = 254, %add = 256 == (i8 1)
// As i32:
// %add = 256
//
// (1 < 127) != (256 < 127)
unsigned Opc = I->getOpcode();
if (Opc != Instruction::Add && Opc != Instruction::Sub)
return false;
if (!I->hasOneUse() || !isa<ICmpInst>(*I->user_begin()) ||
!isa<ConstantInt>(I->getOperand(1)))
return false;
// Don't support an icmp that deals with sign bits.
auto *CI = cast<ICmpInst>(*I->user_begin());
if (CI->isSigned() || CI->isEquality())
return false;
ConstantInt *ICmpConstant = nullptr;
if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(0)))
ICmpConstant = Const;
else if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(1)))
ICmpConstant = Const;
else
return false;
const APInt &ICmpConst = ICmpConstant->getValue();
APInt OverflowConst = cast<ConstantInt>(I->getOperand(1))->getValue();
if (Opc == Instruction::Sub)
OverflowConst = -OverflowConst;
if (!OverflowConst.isNonPositive())
return false;
// Using C1 = OverflowConst and C2 = ICmpConst, we can either prove that:
// zext(x) + sext(C1) <u zext(C2) if C1 < 0 and C1 >s C2
// zext(x) + sext(C1) <u sext(C2) if C1 < 0 and C1 <=s C2
if (OverflowConst.sgt(ICmpConst)) {
LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for sext "
<< "const of " << *I << "\n");
SafeWrap.insert(I);
return true;
} else {
LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for sext "
<< "const of " << *I << " and " << *CI << "\n");
SafeWrap.insert(I);
SafeWrap.insert(CI);
return true;
}
return false;
}
bool TypePromotion::shouldPromote(Value *V) {
if (!isa<IntegerType>(V->getType()) || isSink(V))
return false;
if (isSource(V))
return true;
auto *I = dyn_cast<Instruction>(V);
if (!I)
return false;
if (isa<ICmpInst>(I))
return false;
return true;
}
/// Return whether we can safely mutate V's type to ExtTy without having to be
/// concerned with zero extending or truncation.
static bool isPromotedResultSafe(Instruction *I) {
if (GenerateSignBits(I))
return false;
if (!isa<OverflowingBinaryOperator>(I))
return true;
return I->hasNoUnsignedWrap();
}
void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) {
SmallVector<Instruction *, 4> Users;
Instruction *InstTo = dyn_cast<Instruction>(To);
bool ReplacedAll = true;
LLVM_DEBUG(dbgs() << "IR Promotion: Replacing " << *From << " with " << *To
<< "\n");
for (Use &U : From->uses()) {
auto *User = cast<Instruction>(U.getUser());
if (InstTo && User->isIdenticalTo(InstTo)) {
ReplacedAll = false;
continue;
}
Users.push_back(User);
}
for (auto *U : Users)
U->replaceUsesOfWith(From, To);
if (ReplacedAll)
if (auto *I = dyn_cast<Instruction>(From))
InstsToRemove.insert(I);
}
void IRPromoter::ExtendSources() {
IRBuilder<> Builder{Ctx};
auto InsertZExt = [&](Value *V, Instruction *InsertPt) {
assert(V->getType() != ExtTy && "zext already extends to i32");
LLVM_DEBUG(dbgs() << "IR Promotion: Inserting ZExt for " << *V << "\n");
Builder.SetInsertPoint(InsertPt);
if (auto *I = dyn_cast<Instruction>(V))
Builder.SetCurrentDebugLocation(I->getDebugLoc());
Value *ZExt = Builder.CreateZExt(V, ExtTy);
if (auto *I = dyn_cast<Instruction>(ZExt)) {
if (isa<Argument>(V))
I->moveBefore(InsertPt);
else
I->moveAfter(InsertPt);
NewInsts.insert(I);
}
ReplaceAllUsersOfWith(V, ZExt);
};
// Now, insert extending instructions between the sources and their users.
LLVM_DEBUG(dbgs() << "IR Promotion: Promoting sources:\n");
for (auto *V : Sources) {
LLVM_DEBUG(dbgs() << " - " << *V << "\n");
if (auto *I = dyn_cast<Instruction>(V))
InsertZExt(I, I);
else if (auto *Arg = dyn_cast<Argument>(V)) {
BasicBlock &BB = Arg->getParent()->front();
InsertZExt(Arg, &*BB.getFirstInsertionPt());
} else {
llvm_unreachable("unhandled source that needs extending");
}
Promoted.insert(V);
}
}
void IRPromoter::PromoteTree() {
LLVM_DEBUG(dbgs() << "IR Promotion: Mutating the tree..\n");
// Mutate the types of the instructions within the tree. Here we handle
// constant operands.
for (auto *V : Visited) {
if (Sources.count(V))
continue;
auto *I = cast<Instruction>(V);
if (Sinks.count(I))
continue;
for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) {
Value *Op = I->getOperand(i);
if ((Op->getType() == ExtTy) || !isa<IntegerType>(Op->getType()))
continue;
if (auto *Const = dyn_cast<ConstantInt>(Op)) {
// For subtract, we don't need to sext the constant. We only put it in
// SafeWrap because SafeWrap.size() is used elsewhere.
// For cmp, we need to sign extend a constant appearing in either
// operand. For add, we should only sign extend the RHS.
Constant *NewConst = (SafeWrap.contains(I) &&
(I->getOpcode() == Instruction::ICmp || i == 1) &&
I->getOpcode() != Instruction::Sub)
? ConstantExpr::getSExt(Const, ExtTy)
: ConstantExpr::getZExt(Const, ExtTy);
I->setOperand(i, NewConst);
} else if (isa<UndefValue>(Op))
I->setOperand(i, ConstantInt::get(ExtTy, 0));
}
// Mutate the result type, unless this is an icmp or switch.
if (!isa<ICmpInst>(I) && !isa<SwitchInst>(I)) {
I->mutateType(ExtTy);
Promoted.insert(I);
}
}
}
void IRPromoter::TruncateSinks() {
LLVM_DEBUG(dbgs() << "IR Promotion: Fixing up the sinks:\n");
IRBuilder<> Builder{Ctx};
auto InsertTrunc = [&](Value *V, Type *TruncTy) -> Instruction * {
if (!isa<Instruction>(V) || !isa<IntegerType>(V->getType()))
return nullptr;
if ((!Promoted.count(V) && !NewInsts.count(V)) || Sources.count(V))
return nullptr;
LLVM_DEBUG(dbgs() << "IR Promotion: Creating " << *TruncTy << " Trunc for "
<< *V << "\n");
Builder.SetInsertPoint(cast<Instruction>(V));
auto *Trunc = dyn_cast<Instruction>(Builder.CreateTrunc(V, TruncTy));
if (Trunc)
NewInsts.insert(Trunc);
return Trunc;
};
// Fix up any stores or returns that use the results of the promoted
// chain.
for (auto *I : Sinks) {
LLVM_DEBUG(dbgs() << "IR Promotion: For Sink: " << *I << "\n");
// Handle calls separately as we need to iterate over arg operands.
if (auto *Call = dyn_cast<CallInst>(I)) {
for (unsigned i = 0; i < Call->arg_size(); ++i) {
Value *Arg = Call->getArgOperand(i);
Type *Ty = TruncTysMap[Call][i];
if (Instruction *Trunc = InsertTrunc(Arg, Ty)) {
Trunc->moveBefore(Call);
Call->setArgOperand(i, Trunc);
}
}
continue;
}
// Special case switches because we need to truncate the condition.
if (auto *Switch = dyn_cast<SwitchInst>(I)) {
Type *Ty = TruncTysMap[Switch][0];
if (Instruction *Trunc = InsertTrunc(Switch->getCondition(), Ty)) {
Trunc->moveBefore(Switch);
Switch->setCondition(Trunc);
}
continue;
}
// Don't insert a trunc for a zext which can still legally promote.
if (auto ZExt = dyn_cast<ZExtInst>(I))
if (ZExt->getType()->getScalarSizeInBits() > PromotedWidth)
continue;
// Now handle the others.
for (unsigned i = 0; i < I->getNumOperands(); ++i) {
Type *Ty = TruncTysMap[I][i];
if (Instruction *Trunc = InsertTrunc(I->getOperand(i), Ty)) {
Trunc->moveBefore(I);
I->setOperand(i, Trunc);
}
}
}
}
void IRPromoter::Cleanup() {
LLVM_DEBUG(dbgs() << "IR Promotion: Cleanup..\n");
// Some zexts will now have become redundant, along with their trunc
- // operands, so remove them
+ // operands, so remove them.
+ // Some zexts need to be replaced with truncate if src bitwidth is larger.
for (auto *V : Visited) {
if (!isa<ZExtInst>(V))
continue;
auto ZExt = cast<ZExtInst>(V);
if (ZExt->getDestTy() != ExtTy)
continue;
Value *Src = ZExt->getOperand(0);
if (ZExt->getSrcTy() == ZExt->getDestTy()) {
LLVM_DEBUG(dbgs() << "IR Promotion: Removing unnecessary cast: " << *ZExt
<< "\n");
ReplaceAllUsersOfWith(ZExt, Src);
continue;
+ } else if (ZExt->getSrcTy()->getScalarSizeInBits() > PromotedWidth) {
+ IRBuilder<> Builder{ZExt};
+ Value *Trunc = Builder.CreateTrunc(Src, ZExt->getDestTy());
+ ReplaceAllUsersOfWith(ZExt, Trunc);
+ continue;
}
// We've inserted a trunc for a zext sink, but we already know that the
// input is in range, negating the need for the trunc.
if (NewInsts.count(Src) && isa<TruncInst>(Src)) {
auto *Trunc = cast<TruncInst>(Src);
assert(Trunc->getOperand(0)->getType() == ExtTy &&
"expected inserted trunc to be operating on i32");
ReplaceAllUsersOfWith(ZExt, Trunc->getOperand(0));
}
}
for (auto *I : InstsToRemove) {
LLVM_DEBUG(dbgs() << "IR Promotion: Removing " << *I << "\n");
I->dropAllReferences();
I->eraseFromParent();
}
}
void IRPromoter::ConvertTruncs() {
LLVM_DEBUG(dbgs() << "IR Promotion: Converting truncs..\n");
IRBuilder<> Builder{Ctx};
for (auto *V : Visited) {
if (!isa<TruncInst>(V) || Sources.count(V))
continue;
auto *Trunc = cast<TruncInst>(V);
Builder.SetInsertPoint(Trunc);
IntegerType *SrcTy = cast<IntegerType>(Trunc->getOperand(0)->getType());
IntegerType *DestTy = cast<IntegerType>(TruncTysMap[Trunc][0]);
unsigned NumBits = DestTy->getScalarSizeInBits();
ConstantInt *Mask =
ConstantInt::get(SrcTy, APInt::getMaxValue(NumBits).getZExtValue());
Value *Masked = Builder.CreateAnd(Trunc->getOperand(0), Mask);
if (auto *I = dyn_cast<Instruction>(Masked))
NewInsts.insert(I);
ReplaceAllUsersOfWith(Trunc, Masked);
}
}
void IRPromoter::Mutate() {
LLVM_DEBUG(dbgs() << "IR Promotion: Promoting use-def chains to "
<< PromotedWidth << "-bits\n");
// Cache original types of the values that will likely need truncating
for (auto *I : Sinks) {
if (auto *Call = dyn_cast<CallInst>(I)) {
for (Value *Arg : Call->args())
TruncTysMap[Call].push_back(Arg->getType());
} else if (auto *Switch = dyn_cast<SwitchInst>(I))
TruncTysMap[I].push_back(Switch->getCondition()->getType());
else {
for (unsigned i = 0; i < I->getNumOperands(); ++i)
TruncTysMap[I].push_back(I->getOperand(i)->getType());
}
}
for (auto *V : Visited) {
if (!isa<TruncInst>(V) || Sources.count(V))
continue;
auto *Trunc = cast<TruncInst>(V);
TruncTysMap[Trunc].push_back(Trunc->getDestTy());
}
// Insert zext instructions between sources and their users.
ExtendSources();
// Promote visited instructions, mutating their types in place.
PromoteTree();
// Convert any truncs, that aren't sources, into AND masks.
ConvertTruncs();
// Insert trunc instructions for use by calls, stores etc...
TruncateSinks();
// Finally, remove unecessary zexts and truncs, delete old instructions and
// clear the data structures.
Cleanup();
LLVM_DEBUG(dbgs() << "IR Promotion: Mutation complete\n");
}
/// We disallow booleans to make life easier when dealing with icmps but allow
/// any other integer that fits in a scalar register. Void types are accepted
/// so we can handle switches.
bool TypePromotion::isSupportedType(Value *V) {
Type *Ty = V->getType();
// Allow voids and pointers, these won't be promoted.
if (Ty->isVoidTy() || Ty->isPointerTy())
return true;
if (!isa<IntegerType>(Ty) || cast<IntegerType>(Ty)->getBitWidth() == 1 ||
cast<IntegerType>(Ty)->getBitWidth() > RegisterBitWidth)
return false;
return LessOrEqualTypeSize(V);
}
/// We accept most instructions, as well as Arguments and ConstantInsts. We
/// Disallow casts other than zext and truncs and only allow calls if their
/// return value is zeroext. We don't allow opcodes that can introduce sign
/// bits.
bool TypePromotion::isSupportedValue(Value *V) {
if (auto *I = dyn_cast<Instruction>(V)) {
switch (I->getOpcode()) {
default:
return isa<BinaryOperator>(I) && isSupportedType(I) &&
!GenerateSignBits(I);
case Instruction::GetElementPtr:
case Instruction::Store:
case Instruction::Br:
case Instruction::Switch:
return true;
case Instruction::PHI:
case Instruction::Select:
case Instruction::Ret:
case Instruction::Load:
case Instruction::Trunc:
case Instruction::BitCast:
return isSupportedType(I);
case Instruction::ZExt:
return isSupportedType(I->getOperand(0));
case Instruction::ICmp:
// Now that we allow small types than TypeSize, only allow icmp of
// TypeSize because they will require a trunc to be legalised.
// TODO: Allow icmp of smaller types, and calculate at the end
// whether the transform would be beneficial.
if (isa<PointerType>(I->getOperand(0)->getType()))
return true;
return EqualTypeSize(I->getOperand(0));
case Instruction::Call: {
// Special cases for calls as we need to check for zeroext
// TODO We should accept calls even if they don't have zeroext, as they
// can still be sinks.
auto *Call = cast<CallInst>(I);
return isSupportedType(Call) &&
Call->hasRetAttr(Attribute::AttrKind::ZExt);
}
}
} else if (isa<Constant>(V) && !isa<ConstantExpr>(V)) {
return isSupportedType(V);
} else if (isa<Argument>(V))
return isSupportedType(V);
return isa<BasicBlock>(V);
}
/// Check that the type of V would be promoted and that the original type is
/// smaller than the targeted promoted type. Check that we're not trying to
/// promote something larger than our base 'TypeSize' type.
bool TypePromotion::isLegalToPromote(Value *V) {
auto *I = dyn_cast<Instruction>(V);
if (!I)
return true;
if (SafeToPromote.count(I))
return true;
if (isPromotedResultSafe(I) || isSafeWrap(I)) {
SafeToPromote.insert(I);
return true;
}
return false;
}
bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) {
Type *OrigTy = V->getType();
TypeSize = OrigTy->getPrimitiveSizeInBits().getFixedSize();
SafeToPromote.clear();
SafeWrap.clear();
if (!isSupportedValue(V) || !shouldPromote(V) || !isLegalToPromote(V))
return false;
LLVM_DEBUG(dbgs() << "IR Promotion: TryToPromote: " << *V << ", from "
<< TypeSize << " bits to " << PromotedWidth << "\n");
SetVector<Value *> WorkList;
SetVector<Value *> Sources;
SetVector<Instruction *> Sinks;
SetVector<Value *> CurrentVisited;
WorkList.insert(V);
// Return true if V was added to the worklist as a supported instruction,
// if it was already visited, or if we don't need to explore it (e.g.
// pointer values and GEPs), and false otherwise.
auto AddLegalInst = [&](Value *V) {
if (CurrentVisited.count(V))
return true;
// Ignore GEPs because they don't need promoting and the constant indices
// will prevent the transformation.
if (isa<GetElementPtrInst>(V))
return true;
if (!isSupportedValue(V) || (shouldPromote(V) && !isLegalToPromote(V))) {
LLVM_DEBUG(dbgs() << "IR Promotion: Can't handle: " << *V << "\n");
return false;
}
WorkList.insert(V);
return true;
};
// Iterate through, and add to, a tree of operands and users in the use-def.
while (!WorkList.empty()) {
Value *V = WorkList.pop_back_val();
if (CurrentVisited.count(V))
continue;
// Ignore non-instructions, other than arguments.
if (!isa<Instruction>(V) && !isSource(V))
continue;
// If we've already visited this value from somewhere, bail now because
// the tree has already been explored.
// TODO: This could limit the transform, ie if we try to promote something
// from an i8 and fail first, before trying an i16.
if (AllVisited.count(V))
return false;
CurrentVisited.insert(V);
AllVisited.insert(V);
// Calls can be both sources and sinks.
if (isSink(V))
Sinks.insert(cast<Instruction>(V));
if (isSource(V))
Sources.insert(V);
if (!isSink(V) && !isSource(V)) {
if (auto *I = dyn_cast<Instruction>(V)) {
// Visit operands of any instruction visited.
for (auto &U : I->operands()) {
if (!AddLegalInst(U))
return false;
}
}
}
// Don't visit users of a node which isn't going to be mutated unless its a
// source.
if (isSource(V) || shouldPromote(V)) {
for (Use &U : V->uses()) {
if (!AddLegalInst(U.getUser()))
return false;
}
}
}
LLVM_DEBUG({
dbgs() << "IR Promotion: Visited nodes:\n";
for (auto *I : CurrentVisited)
I->dump();
});
unsigned ToPromote = 0;
unsigned NonFreeArgs = 0;
SmallPtrSet<BasicBlock *, 4> Blocks;
for (auto *V : CurrentVisited) {
if (auto *I = dyn_cast<Instruction>(V))
Blocks.insert(I->getParent());
if (Sources.count(V)) {
if (auto *Arg = dyn_cast<Argument>(V))
if (!Arg->hasZExtAttr() && !Arg->hasSExtAttr())
++NonFreeArgs;
continue;
}
if (Sinks.count(cast<Instruction>(V)))
continue;
++ToPromote;
}
// DAG optimizations should be able to handle these cases better, especially
// for function arguments.
if (ToPromote < 2 || (Blocks.size() == 1 && (NonFreeArgs > SafeWrap.size())))
return false;
IRPromoter Promoter(*Ctx, PromotedWidth, CurrentVisited, Sources, Sinks,
SafeWrap);
Promoter.Mutate();
return true;
}
bool TypePromotion::runOnFunction(Function &F) {
if (skipFunction(F) || DisablePromotion)
return false;
LLVM_DEBUG(dbgs() << "IR Promotion: Running on " << F.getName() << "\n");
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
if (!TPC)
return false;
AllVisited.clear();
SafeToPromote.clear();
SafeWrap.clear();
bool MadeChange = false;
const DataLayout &DL = F.getParent()->getDataLayout();
const TargetMachine &TM = TPC->getTM<TargetMachine>();
const TargetSubtargetInfo *SubtargetInfo = TM.getSubtargetImpl(F);
const TargetLowering *TLI = SubtargetInfo->getTargetLowering();
const TargetTransformInfo &TII =
getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
RegisterBitWidth =
TII.getRegisterBitWidth(TargetTransformInfo::RGK_Scalar).getFixedSize();
Ctx = &F.getParent()->getContext();
// Search up from icmps to try to promote their operands.
for (BasicBlock &BB : F) {
for (Instruction &I : BB) {
if (AllVisited.count(&I))
continue;
if (!isa<ICmpInst>(&I))
continue;
auto *ICmp = cast<ICmpInst>(&I);
// Skip signed or pointer compares
if (ICmp->isSigned() || !isa<IntegerType>(ICmp->getOperand(0)->getType()))
continue;
LLVM_DEBUG(dbgs() << "IR Promotion: Searching from: " << *ICmp << "\n");
for (auto &Op : ICmp->operands()) {
if (auto *I = dyn_cast<Instruction>(Op)) {
EVT SrcVT = TLI->getValueType(DL, I->getType());
if (SrcVT.isSimple() && TLI->isTypeLegal(SrcVT.getSimpleVT()))
break;
if (TLI->getTypeAction(*Ctx, SrcVT) !=
TargetLowering::TypePromoteInteger)
break;
EVT PromotedVT = TLI->getTypeToTransformTo(*Ctx, SrcVT);
if (RegisterBitWidth < PromotedVT.getFixedSizeInBits()) {
LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target register "
<< "for promoted type\n");
break;
}
MadeChange |= TryToPromote(I, PromotedVT.getFixedSizeInBits());
break;
}
}
}
}
AllVisited.clear();
SafeToPromote.clear();
SafeWrap.clear();
return MadeChange;
}
INITIALIZE_PASS_BEGIN(TypePromotion, DEBUG_TYPE, PASS_NAME, false, false)
INITIALIZE_PASS_END(TypePromotion, DEBUG_TYPE, PASS_NAME, false, false)
char TypePromotion::ID = 0;
FunctionPass *llvm::createTypePromotionPass() { return new TypePromotion(); }
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 75594f90c926..b9962da1d302 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -1,4706 +1,4706 @@
//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the auto-upgrade helper functions.
// This is where deprecated IR intrinsics and other IR features are updated to
// current specifications.
//
//===----------------------------------------------------------------------===//
#include "llvm/IR/AutoUpgrade.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Regex.h"
#include <cstring>
using namespace llvm;
static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
// changed their type from v4f32 to v2i64.
static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
Function *&NewFn) {
// Check whether this is an old version of the function, which received
// v4f32 arguments.
Type *Arg0Type = F->getFunctionType()->getParamType(0);
if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
return false;
// Yes, it's old, replace it with new version.
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
return true;
}
// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
// arguments have changed their type from i32 to i8.
static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
Function *&NewFn) {
// Check that the last argument is an i32.
Type *LastArgType = F->getFunctionType()->getParamType(
F->getFunctionType()->getNumParams() - 1);
if (!LastArgType->isIntegerTy(32))
return false;
// Move this function aside and map down.
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
return true;
}
// Upgrade the declaration of fp compare intrinsics that change return type
// from scalar to vXi1 mask.
static bool UpgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID,
Function *&NewFn) {
// Check if the return type is a vector.
if (F->getReturnType()->isVectorTy())
return false;
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
return true;
}
static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
// All of the intrinsics matches below should be marked with which llvm
// version started autoupgrading them. At some point in the future we would
// like to use this information to remove upgrade code for some older
// intrinsics. It is currently undecided how we will determine that future
// point.
if (Name == "addcarryx.u32" || // Added in 8.0
Name == "addcarryx.u64" || // Added in 8.0
Name == "addcarry.u32" || // Added in 8.0
Name == "addcarry.u64" || // Added in 8.0
Name == "subborrow.u32" || // Added in 8.0
Name == "subborrow.u64" || // Added in 8.0
Name.startswith("sse2.padds.") || // Added in 8.0
Name.startswith("sse2.psubs.") || // Added in 8.0
Name.startswith("sse2.paddus.") || // Added in 8.0
Name.startswith("sse2.psubus.") || // Added in 8.0
Name.startswith("avx2.padds.") || // Added in 8.0
Name.startswith("avx2.psubs.") || // Added in 8.0
Name.startswith("avx2.paddus.") || // Added in 8.0
Name.startswith("avx2.psubus.") || // Added in 8.0
Name.startswith("avx512.padds.") || // Added in 8.0
Name.startswith("avx512.psubs.") || // Added in 8.0
Name.startswith("avx512.mask.padds.") || // Added in 8.0
Name.startswith("avx512.mask.psubs.") || // Added in 8.0
Name.startswith("avx512.mask.paddus.") || // Added in 8.0
Name.startswith("avx512.mask.psubus.") || // Added in 8.0
Name=="ssse3.pabs.b.128" || // Added in 6.0
Name=="ssse3.pabs.w.128" || // Added in 6.0
Name=="ssse3.pabs.d.128" || // Added in 6.0
Name.startswith("fma4.vfmadd.s") || // Added in 7.0
Name.startswith("fma.vfmadd.") || // Added in 7.0
Name.startswith("fma.vfmsub.") || // Added in 7.0
Name.startswith("fma.vfmsubadd.") || // Added in 7.0
Name.startswith("fma.vfnmadd.") || // Added in 7.0
Name.startswith("fma.vfnmsub.") || // Added in 7.0
Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
Name.startswith("avx512.kunpck") || //added in 6.0
Name.startswith("avx2.pabs.") || // Added in 6.0
Name.startswith("avx512.mask.pabs.") || // Added in 6.0
Name.startswith("avx512.broadcastm") || // Added in 6.0
Name == "sse.sqrt.ss" || // Added in 7.0
Name == "sse2.sqrt.sd" || // Added in 7.0
Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
Name.startswith("avx.sqrt.p") || // Added in 7.0
Name.startswith("sse2.sqrt.p") || // Added in 7.0
Name.startswith("sse.sqrt.p") || // Added in 7.0
Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
Name.startswith("sse2.pcmpeq.") || // Added in 3.1
Name.startswith("sse2.pcmpgt.") || // Added in 3.1
Name.startswith("avx2.pcmpeq.") || // Added in 3.1
Name.startswith("avx2.pcmpgt.") || // Added in 3.1
Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
Name.startswith("avx.vperm2f128.") || // Added in 6.0
Name == "avx2.vperm2i128" || // Added in 6.0
Name == "sse.add.ss" || // Added in 4.0
Name == "sse2.add.sd" || // Added in 4.0
Name == "sse.sub.ss" || // Added in 4.0
Name == "sse2.sub.sd" || // Added in 4.0
Name == "sse.mul.ss" || // Added in 4.0
Name == "sse2.mul.sd" || // Added in 4.0
Name == "sse.div.ss" || // Added in 4.0
Name == "sse2.div.sd" || // Added in 4.0
Name == "sse41.pmaxsb" || // Added in 3.9
Name == "sse2.pmaxs.w" || // Added in 3.9
Name == "sse41.pmaxsd" || // Added in 3.9
Name == "sse2.pmaxu.b" || // Added in 3.9
Name == "sse41.pmaxuw" || // Added in 3.9
Name == "sse41.pmaxud" || // Added in 3.9
Name == "sse41.pminsb" || // Added in 3.9
Name == "sse2.pmins.w" || // Added in 3.9
Name == "sse41.pminsd" || // Added in 3.9
Name == "sse2.pminu.b" || // Added in 3.9
Name == "sse41.pminuw" || // Added in 3.9
Name == "sse41.pminud" || // Added in 3.9
Name == "avx512.kand.w" || // Added in 7.0
Name == "avx512.kandn.w" || // Added in 7.0
Name == "avx512.knot.w" || // Added in 7.0
Name == "avx512.kor.w" || // Added in 7.0
Name == "avx512.kxor.w" || // Added in 7.0
Name == "avx512.kxnor.w" || // Added in 7.0
Name == "avx512.kortestc.w" || // Added in 7.0
Name == "avx512.kortestz.w" || // Added in 7.0
Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
Name.startswith("avx2.pmax") || // Added in 3.9
Name.startswith("avx2.pmin") || // Added in 3.9
Name.startswith("avx512.mask.pmax") || // Added in 4.0
Name.startswith("avx512.mask.pmin") || // Added in 4.0
Name.startswith("avx2.vbroadcast") || // Added in 3.8
Name.startswith("avx2.pbroadcast") || // Added in 3.8
Name.startswith("avx.vpermil.") || // Added in 3.1
Name.startswith("sse2.pshuf") || // Added in 3.9
Name.startswith("avx512.pbroadcast") || // Added in 3.9
Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
Name.startswith("avx512.mask.movddup") || // Added in 3.9
Name.startswith("avx512.mask.movshdup") || // Added in 3.9
Name.startswith("avx512.mask.movsldup") || // Added in 3.9
Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
Name.startswith("avx512.mask.punpckl") || // Added in 3.9
Name.startswith("avx512.mask.punpckh") || // Added in 3.9
Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
Name.startswith("avx512.mask.pand.") || // Added in 3.9
Name.startswith("avx512.mask.pandn.") || // Added in 3.9
Name.startswith("avx512.mask.por.") || // Added in 3.9
Name.startswith("avx512.mask.pxor.") || // Added in 3.9
Name.startswith("avx512.mask.and.") || // Added in 3.9
Name.startswith("avx512.mask.andn.") || // Added in 3.9
Name.startswith("avx512.mask.or.") || // Added in 3.9
Name.startswith("avx512.mask.xor.") || // Added in 3.9
Name.startswith("avx512.mask.padd.") || // Added in 4.0
Name.startswith("avx512.mask.psub.") || // Added in 4.0
Name.startswith("avx512.mask.pmull.") || // Added in 4.0
Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
Name.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
Name == "avx512.mask.vcvtph2ps.128" || // Added in 11.0
Name == "avx512.mask.vcvtph2ps.256" || // Added in 11.0
Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0
Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0
Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
Name == "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
Name == "avx512.cvtusi2sd" || // Added in 7.0
Name.startswith("avx512.mask.permvar.") || // Added in 7.0
Name == "sse2.pmulu.dq" || // Added in 7.0
Name == "sse41.pmuldq" || // Added in 7.0
Name == "avx2.pmulu.dq" || // Added in 7.0
Name == "avx2.pmul.dq" || // Added in 7.0
Name == "avx512.pmulu.dq.512" || // Added in 7.0
Name == "avx512.pmul.dq.512" || // Added in 7.0
Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
Name.startswith("avx512.cmp.p") || // Added in 12.0
Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
Name.startswith("avx512.mask.psll.d") || // Added in 4.0
Name.startswith("avx512.mask.psll.q") || // Added in 4.0
Name.startswith("avx512.mask.psll.w") || // Added in 4.0
Name.startswith("avx512.mask.psra.d") || // Added in 4.0
Name.startswith("avx512.mask.psra.q") || // Added in 4.0
Name.startswith("avx512.mask.psra.w") || // Added in 4.0
Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
Name.startswith("avx512.mask.pslli") || // Added in 4.0
Name.startswith("avx512.mask.psrai") || // Added in 4.0
Name.startswith("avx512.mask.psrli") || // Added in 4.0
Name.startswith("avx512.mask.psllv") || // Added in 4.0
Name.startswith("avx512.mask.psrav") || // Added in 4.0
Name.startswith("avx512.mask.psrlv") || // Added in 4.0
Name.startswith("sse41.pmovsx") || // Added in 3.8
Name.startswith("sse41.pmovzx") || // Added in 3.9
Name.startswith("avx2.pmovsx") || // Added in 3.9
Name.startswith("avx2.pmovzx") || // Added in 3.9
Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0
Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
Name.startswith("avx512.vpshld.") || // Added in 8.0
Name.startswith("avx512.vpshrd.") || // Added in 8.0
Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
Name.startswith("avx512.mask.conflict.") || // Added in 9.0
Name == "avx512.mask.pmov.qd.256" || // Added in 9.0
Name == "avx512.mask.pmov.qd.512" || // Added in 9.0
Name == "avx512.mask.pmov.wb.256" || // Added in 9.0
Name == "avx512.mask.pmov.wb.512" || // Added in 9.0
Name == "sse.cvtsi2ss" || // Added in 7.0
Name == "sse.cvtsi642ss" || // Added in 7.0
Name == "sse2.cvtsi2sd" || // Added in 7.0
Name == "sse2.cvtsi642sd" || // Added in 7.0
Name == "sse2.cvtss2sd" || // Added in 7.0
Name == "sse2.cvtdq2pd" || // Added in 3.9
Name == "sse2.cvtdq2ps" || // Added in 7.0
Name == "sse2.cvtps2pd" || // Added in 3.9
Name == "avx.cvtdq2.pd.256" || // Added in 3.9
Name == "avx.cvtdq2.ps.256" || // Added in 7.0
Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
Name.startswith("vcvtph2ps.") || // Added in 11.0
Name.startswith("avx.vinsertf128.") || // Added in 3.7
Name == "avx2.vinserti128" || // Added in 3.7
Name.startswith("avx512.mask.insert") || // Added in 4.0
Name.startswith("avx.vextractf128.") || // Added in 3.7
Name == "avx2.vextracti128" || // Added in 3.7
Name.startswith("avx512.mask.vextract") || // Added in 4.0
Name.startswith("sse4a.movnt.") || // Added in 3.9
Name.startswith("avx.movnt.") || // Added in 3.2
Name.startswith("avx512.storent.") || // Added in 3.9
Name == "sse41.movntdqa" || // Added in 5.0
Name == "avx2.movntdqa" || // Added in 5.0
Name == "avx512.movntdqa" || // Added in 5.0
Name == "sse2.storel.dq" || // Added in 3.9
Name.startswith("sse.storeu.") || // Added in 3.9
Name.startswith("sse2.storeu.") || // Added in 3.9
Name.startswith("avx.storeu.") || // Added in 3.9
Name.startswith("avx512.mask.storeu.") || // Added in 3.9
Name.startswith("avx512.mask.store.p") || // Added in 3.9
Name.startswith("avx512.mask.store.b.") || // Added in 3.9
Name.startswith("avx512.mask.store.w.") || // Added in 3.9
Name.startswith("avx512.mask.store.d.") || // Added in 3.9
Name.startswith("avx512.mask.store.q.") || // Added in 3.9
Name == "avx512.mask.store.ss" || // Added in 7.0
Name.startswith("avx512.mask.loadu.") || // Added in 3.9
Name.startswith("avx512.mask.load.") || // Added in 3.9
Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
Name.startswith("avx512.mask.expand.b") || // Added in 9.0
Name.startswith("avx512.mask.expand.w") || // Added in 9.0
Name.startswith("avx512.mask.expand.d") || // Added in 9.0
Name.startswith("avx512.mask.expand.q") || // Added in 9.0
Name.startswith("avx512.mask.expand.p") || // Added in 9.0
Name.startswith("avx512.mask.compress.b") || // Added in 9.0
Name.startswith("avx512.mask.compress.w") || // Added in 9.0
Name.startswith("avx512.mask.compress.d") || // Added in 9.0
Name.startswith("avx512.mask.compress.q") || // Added in 9.0
Name.startswith("avx512.mask.compress.p") || // Added in 9.0
Name == "sse42.crc32.64.8" || // Added in 3.4
Name.startswith("avx.vbroadcast.s") || // Added in 3.5
Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
Name.startswith("avx512.mask.palignr.") || // Added in 3.9
Name.startswith("avx512.mask.valign.") || // Added in 4.0
Name.startswith("sse2.psll.dq") || // Added in 3.7
Name.startswith("sse2.psrl.dq") || // Added in 3.7
Name.startswith("avx2.psll.dq") || // Added in 3.7
Name.startswith("avx2.psrl.dq") || // Added in 3.7
Name.startswith("avx512.psll.dq") || // Added in 3.9
Name.startswith("avx512.psrl.dq") || // Added in 3.9
Name == "sse41.pblendw" || // Added in 3.7
Name.startswith("sse41.blendp") || // Added in 3.7
Name.startswith("avx.blend.p") || // Added in 3.7
Name == "avx2.pblendw" || // Added in 3.7
Name.startswith("avx2.pblendd.") || // Added in 3.7
Name.startswith("avx.vbroadcastf128") || // Added in 4.0
Name == "avx2.vbroadcasti128" || // Added in 3.7
Name.startswith("avx512.mask.broadcastf32x4.") || // Added in 6.0
Name.startswith("avx512.mask.broadcastf64x2.") || // Added in 6.0
Name.startswith("avx512.mask.broadcastf32x8.") || // Added in 6.0
Name.startswith("avx512.mask.broadcastf64x4.") || // Added in 6.0
Name.startswith("avx512.mask.broadcasti32x4.") || // Added in 6.0
Name.startswith("avx512.mask.broadcasti64x2.") || // Added in 6.0
Name.startswith("avx512.mask.broadcasti32x8.") || // Added in 6.0
Name.startswith("avx512.mask.broadcasti64x4.") || // Added in 6.0
Name == "xop.vpcmov" || // Added in 3.8
Name == "xop.vpcmov.256" || // Added in 5.0
Name.startswith("avx512.mask.move.s") || // Added in 4.0
Name.startswith("avx512.cvtmask2") || // Added in 5.0
Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
Name.startswith("xop.vprot") || // Added in 8.0
Name.startswith("avx512.prol") || // Added in 8.0
Name.startswith("avx512.pror") || // Added in 8.0
Name.startswith("avx512.mask.prorv.") || // Added in 8.0
Name.startswith("avx512.mask.pror.") || // Added in 8.0
Name.startswith("avx512.mask.prolv.") || // Added in 8.0
Name.startswith("avx512.mask.prol.") || // Added in 8.0
Name.startswith("avx512.ptestm") || //Added in 6.0
Name.startswith("avx512.ptestnm") || //Added in 6.0
Name.startswith("avx512.mask.pavg")) // Added in 6.0
return true;
return false;
}
static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
Function *&NewFn) {
// Only handle intrinsics that start with "x86.".
if (!Name.startswith("x86."))
return false;
// Remove "x86." prefix.
Name = Name.substr(4);
if (ShouldUpgradeX86Intrinsic(F, Name)) {
NewFn = nullptr;
return true;
}
if (Name == "rdtscp") { // Added in 8.0
// If this intrinsic has 0 operands, it's the new version.
if (F->getFunctionType()->getNumParams() == 0)
return false;
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::x86_rdtscp);
return true;
}
// SSE4.1 ptest functions may have an old signature.
if (Name.startswith("sse41.ptest")) { // Added in 3.2
if (Name.substr(11) == "c")
return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
if (Name.substr(11) == "z")
return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
if (Name.substr(11) == "nzc")
return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
}
// Several blend and other instructions with masks used the wrong number of
// bits.
if (Name == "sse41.insertps") // Added in 3.6
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
NewFn);
if (Name == "sse41.dppd") // Added in 3.6
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
NewFn);
if (Name == "sse41.dpps") // Added in 3.6
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
NewFn);
if (Name == "sse41.mpsadbw") // Added in 3.6
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
NewFn);
if (Name == "avx.dp.ps.256") // Added in 3.6
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
NewFn);
if (Name == "avx2.mpsadbw") // Added in 3.6
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
NewFn);
if (Name == "avx512.mask.cmp.pd.128") // Added in 7.0
return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_128,
NewFn);
if (Name == "avx512.mask.cmp.pd.256") // Added in 7.0
return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_256,
NewFn);
if (Name == "avx512.mask.cmp.pd.512") // Added in 7.0
return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_512,
NewFn);
if (Name == "avx512.mask.cmp.ps.128") // Added in 7.0
return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_128,
NewFn);
if (Name == "avx512.mask.cmp.ps.256") // Added in 7.0
return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_256,
NewFn);
if (Name == "avx512.mask.cmp.ps.512") // Added in 7.0
return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_512,
NewFn);
// frcz.ss/sd may need to have an argument dropped. Added in 3.2
if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::x86_xop_vfrcz_ss);
return true;
}
if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::x86_xop_vfrcz_sd);
return true;
}
// Upgrade any XOP PERMIL2 index operand still using a float/double vector.
if (Name.startswith("xop.vpermil2")) { // Added in 3.9
auto Idx = F->getFunctionType()->getParamType(2);
if (Idx->isFPOrFPVectorTy()) {
rename(F);
unsigned IdxSize = Idx->getPrimitiveSizeInBits();
unsigned EltSize = Idx->getScalarSizeInBits();
Intrinsic::ID Permil2ID;
if (EltSize == 64 && IdxSize == 128)
Permil2ID = Intrinsic::x86_xop_vpermil2pd;
else if (EltSize == 32 && IdxSize == 128)
Permil2ID = Intrinsic::x86_xop_vpermil2ps;
else if (EltSize == 64 && IdxSize == 256)
Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
else
Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
return true;
}
}
if (Name == "seh.recoverfp") {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
return true;
}
return false;
}
static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
assert(F && "Illegal to upgrade a non-existent Function.");
// Quickly eliminate it, if it's not a candidate.
StringRef Name = F->getName();
if (Name.size() <= 8 || !Name.startswith("llvm."))
return false;
Name = Name.substr(5); // Strip off "llvm."
switch (Name[0]) {
default: break;
case 'a': {
if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
F->arg_begin()->getType());
return true;
}
if (Name.startswith("aarch64.neon.frintn")) {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::roundeven,
F->arg_begin()->getType());
return true;
}
if (Name.startswith("aarch64.neon.rbit")) {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
F->arg_begin()->getType());
return true;
}
if (Name.startswith("arm.neon.vclz")) {
Type* args[2] = {
F->arg_begin()->getType(),
Type::getInt1Ty(F->getContext())
};
// Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
// the end of the name. Change name from llvm.arm.neon.vclz.* to
// llvm.ctlz.*
FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
"llvm.ctlz." + Name.substr(14), F->getParent());
return true;
}
if (Name.startswith("arm.neon.vcnt")) {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
F->arg_begin()->getType());
return true;
}
static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
if (vstRegex.match(Name)) {
static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
Intrinsic::arm_neon_vst2,
Intrinsic::arm_neon_vst3,
Intrinsic::arm_neon_vst4};
static const Intrinsic::ID StoreLaneInts[] = {
Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
Intrinsic::arm_neon_vst4lane
};
auto fArgs = F->getFunctionType()->params();
Type *Tys[] = {fArgs[0], fArgs[1]};
if (!Name.contains("lane"))
NewFn = Intrinsic::getDeclaration(F->getParent(),
StoreInts[fArgs.size() - 3], Tys);
else
NewFn = Intrinsic::getDeclaration(F->getParent(),
StoreLaneInts[fArgs.size() - 5], Tys);
return true;
}
if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
return true;
}
if (Name.startswith("arm.neon.vqadds.")) {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sadd_sat,
F->arg_begin()->getType());
return true;
}
if (Name.startswith("arm.neon.vqaddu.")) {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::uadd_sat,
F->arg_begin()->getType());
return true;
}
if (Name.startswith("arm.neon.vqsubs.")) {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ssub_sat,
F->arg_begin()->getType());
return true;
}
if (Name.startswith("arm.neon.vqsubu.")) {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::usub_sat,
F->arg_begin()->getType());
return true;
}
if (Name.startswith("aarch64.neon.addp")) {
if (F->arg_size() != 2)
break; // Invalid IR.
VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
if (Ty && Ty->getElementType()->isFloatingPointTy()) {
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::aarch64_neon_faddp, Ty);
return true;
}
}
// Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and v16i8
// respectively
if ((Name.startswith("arm.neon.bfdot.") ||
Name.startswith("aarch64.neon.bfdot.")) &&
Name.endswith("i8")) {
Intrinsic::ID IID =
StringSwitch<Intrinsic::ID>(Name)
.Cases("arm.neon.bfdot.v2f32.v8i8",
"arm.neon.bfdot.v4f32.v16i8",
Intrinsic::arm_neon_bfdot)
.Cases("aarch64.neon.bfdot.v2f32.v8i8",
"aarch64.neon.bfdot.v4f32.v16i8",
Intrinsic::aarch64_neon_bfdot)
.Default(Intrinsic::not_intrinsic);
if (IID == Intrinsic::not_intrinsic)
break;
size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
assert((OperandWidth == 64 || OperandWidth == 128) &&
"Unexpected operand width");
LLVMContext &Ctx = F->getParent()->getContext();
std::array<Type *, 2> Tys {{
F->getReturnType(),
FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)
}};
NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
return true;
}
// Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic anymore
// and accept v8bf16 instead of v16i8
if ((Name.startswith("arm.neon.bfm") ||
Name.startswith("aarch64.neon.bfm")) &&
Name.endswith(".v4f32.v16i8")) {
Intrinsic::ID IID =
StringSwitch<Intrinsic::ID>(Name)
.Case("arm.neon.bfmmla.v4f32.v16i8",
Intrinsic::arm_neon_bfmmla)
.Case("arm.neon.bfmlalb.v4f32.v16i8",
Intrinsic::arm_neon_bfmlalb)
.Case("arm.neon.bfmlalt.v4f32.v16i8",
Intrinsic::arm_neon_bfmlalt)
.Case("aarch64.neon.bfmmla.v4f32.v16i8",
Intrinsic::aarch64_neon_bfmmla)
.Case("aarch64.neon.bfmlalb.v4f32.v16i8",
Intrinsic::aarch64_neon_bfmlalb)
.Case("aarch64.neon.bfmlalt.v4f32.v16i8",
Intrinsic::aarch64_neon_bfmlalt)
.Default(Intrinsic::not_intrinsic);
if (IID == Intrinsic::not_intrinsic)
break;
std::array<Type *, 0> Tys;
NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
return true;
}
if (Name == "arm.mve.vctp64" &&
cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
// A vctp64 returning a v4i1 is converted to return a v2i1. Rename the
// function and deal with it below in UpgradeIntrinsicCall.
rename(F);
return true;
}
// These too are changed to accept a v2i1 insteead of the old v4i1.
if (Name == "arm.mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
Name == "arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
Name == "arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
Name == "arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
Name == "arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
Name == "arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
Name == "arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
Name == "arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
Name == "arm.cde.vcx1q.predicated.v2i64.v4i1" ||
Name == "arm.cde.vcx1qa.predicated.v2i64.v4i1" ||
Name == "arm.cde.vcx2q.predicated.v2i64.v4i1" ||
Name == "arm.cde.vcx2qa.predicated.v2i64.v4i1" ||
Name == "arm.cde.vcx3q.predicated.v2i64.v4i1" ||
Name == "arm.cde.vcx3qa.predicated.v2i64.v4i1")
return true;
if (Name == "amdgcn.alignbit") {
// Target specific intrinsic became redundant
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::fshr,
{F->getReturnType()});
return true;
}
break;
}
case 'c': {
if (Name.startswith("ctlz.") && F->arg_size() == 1) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
F->arg_begin()->getType());
return true;
}
if (Name.startswith("cttz.") && F->arg_size() == 1) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
F->arg_begin()->getType());
return true;
}
break;
}
case 'd': {
if (Name == "dbg.value" && F->arg_size() == 4) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
return true;
}
break;
}
case 'e': {
if (Name.startswith("experimental.vector.extract.")) {
rename(F);
Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::vector_extract, Tys);
return true;
}
if (Name.startswith("experimental.vector.insert.")) {
rename(F);
auto Args = F->getFunctionType()->params();
Type *Tys[] = {Args[0], Args[1]};
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::vector_insert, Tys);
return true;
}
SmallVector<StringRef, 2> Groups;
static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[a-z][0-9]+");
if (R.match(Name, &Groups)) {
Intrinsic::ID ID;
ID = StringSwitch<Intrinsic::ID>(Groups[1])
.Case("add", Intrinsic::vector_reduce_add)
.Case("mul", Intrinsic::vector_reduce_mul)
.Case("and", Intrinsic::vector_reduce_and)
.Case("or", Intrinsic::vector_reduce_or)
.Case("xor", Intrinsic::vector_reduce_xor)
.Case("smax", Intrinsic::vector_reduce_smax)
.Case("smin", Intrinsic::vector_reduce_smin)
.Case("umax", Intrinsic::vector_reduce_umax)
.Case("umin", Intrinsic::vector_reduce_umin)
.Case("fmax", Intrinsic::vector_reduce_fmax)
.Case("fmin", Intrinsic::vector_reduce_fmin)
.Default(Intrinsic::not_intrinsic);
if (ID != Intrinsic::not_intrinsic) {
rename(F);
auto Args = F->getFunctionType()->params();
NewFn = Intrinsic::getDeclaration(F->getParent(), ID, {Args[0]});
return true;
}
}
static const Regex R2(
"^experimental.vector.reduce.v2.([a-z]+)\\.[fi][0-9]+");
Groups.clear();
if (R2.match(Name, &Groups)) {
Intrinsic::ID ID = Intrinsic::not_intrinsic;
if (Groups[1] == "fadd")
ID = Intrinsic::vector_reduce_fadd;
if (Groups[1] == "fmul")
ID = Intrinsic::vector_reduce_fmul;
if (ID != Intrinsic::not_intrinsic) {
rename(F);
auto Args = F->getFunctionType()->params();
Type *Tys[] = {Args[1]};
NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
return true;
}
}
break;
}
case 'i':
case 'l': {
bool IsLifetimeStart = Name.startswith("lifetime.start");
if (IsLifetimeStart || Name.startswith("invariant.start")) {
Intrinsic::ID ID = IsLifetimeStart ?
Intrinsic::lifetime_start : Intrinsic::invariant_start;
auto Args = F->getFunctionType()->params();
Type* ObjectPtr[1] = {Args[1]};
if (F->getName() != Intrinsic::getName(ID, ObjectPtr, F->getParent())) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
return true;
}
}
bool IsLifetimeEnd = Name.startswith("lifetime.end");
if (IsLifetimeEnd || Name.startswith("invariant.end")) {
Intrinsic::ID ID = IsLifetimeEnd ?
Intrinsic::lifetime_end : Intrinsic::invariant_end;
auto Args = F->getFunctionType()->params();
Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
if (F->getName() != Intrinsic::getName(ID, ObjectPtr, F->getParent())) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
return true;
}
}
if (Name.startswith("invariant.group.barrier")) {
// Rename invariant.group.barrier to launder.invariant.group
auto Args = F->getFunctionType()->params();
Type* ObjectPtr[1] = {Args[0]};
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::launder_invariant_group, ObjectPtr);
return true;
}
break;
}
case 'm': {
if (Name.startswith("masked.load.")) {
Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
if (F->getName() !=
Intrinsic::getName(Intrinsic::masked_load, Tys, F->getParent())) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::masked_load,
Tys);
return true;
}
}
if (Name.startswith("masked.store.")) {
auto Args = F->getFunctionType()->params();
Type *Tys[] = { Args[0], Args[1] };
if (F->getName() !=
Intrinsic::getName(Intrinsic::masked_store, Tys, F->getParent())) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::masked_store,
Tys);
return true;
}
}
// Renaming gather/scatter intrinsics with no address space overloading
// to the new overload which includes an address space
if (Name.startswith("masked.gather.")) {
Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
if (F->getName() !=
Intrinsic::getName(Intrinsic::masked_gather, Tys, F->getParent())) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::masked_gather, Tys);
return true;
}
}
if (Name.startswith("masked.scatter.")) {
auto Args = F->getFunctionType()->params();
Type *Tys[] = {Args[0], Args[1]};
if (F->getName() !=
Intrinsic::getName(Intrinsic::masked_scatter, Tys, F->getParent())) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::masked_scatter, Tys);
return true;
}
}
// Updating the memory intrinsics (memcpy/memmove/memset) that have an
// alignment parameter to embedding the alignment as an attribute of
// the pointer args.
if (Name.startswith("memcpy.") && F->arg_size() == 5) {
rename(F);
// Get the types of dest, src, and len
ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
ParamTypes);
return true;
}
if (Name.startswith("memmove.") && F->arg_size() == 5) {
rename(F);
// Get the types of dest, src, and len
ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
ParamTypes);
return true;
}
if (Name.startswith("memset.") && F->arg_size() == 5) {
rename(F);
// Get the types of dest, and len
const auto *FT = F->getFunctionType();
Type *ParamTypes[2] = {
FT->getParamType(0), // Dest
FT->getParamType(2) // len
};
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
ParamTypes);
return true;
}
break;
}
case 'n': {
if (Name.startswith("nvvm.")) {
Name = Name.substr(5);
// The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
.Cases("brev32", "brev64", Intrinsic::bitreverse)
.Case("clz.i", Intrinsic::ctlz)
.Case("popc.i", Intrinsic::ctpop)
.Default(Intrinsic::not_intrinsic);
if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
{F->getReturnType()});
return true;
}
// The following nvvm intrinsics correspond exactly to an LLVM idiom, but
// not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
//
// TODO: We could add lohi.i2d.
bool Expand = StringSwitch<bool>(Name)
.Cases("abs.i", "abs.ll", true)
.Cases("clz.ll", "popc.ll", "h2f", true)
.Cases("max.i", "max.ll", "max.ui", "max.ull", true)
.Cases("min.i", "min.ll", "min.ui", "min.ull", true)
.StartsWith("atomic.load.add.f32.p", true)
.StartsWith("atomic.load.add.f64.p", true)
.Default(false);
if (Expand) {
NewFn = nullptr;
return true;
}
}
break;
}
case 'o':
// We only need to change the name to match the mangling including the
// address space.
if (Name.startswith("objectsize.")) {
Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
if (F->arg_size() == 2 || F->arg_size() == 3 ||
F->getName() !=
Intrinsic::getName(Intrinsic::objectsize, Tys, F->getParent())) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
Tys);
return true;
}
}
break;
case 'p':
if (Name == "prefetch") {
// Handle address space overloading.
Type *Tys[] = {F->arg_begin()->getType()};
if (F->getName() !=
Intrinsic::getName(Intrinsic::prefetch, Tys, F->getParent())) {
rename(F);
NewFn =
Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys);
return true;
}
} else if (Name.startswith("ptr.annotation.") && F->arg_size() == 4) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::ptr_annotation,
F->arg_begin()->getType());
return true;
}
break;
case 's':
if (Name == "stackprotectorcheck") {
NewFn = nullptr;
return true;
}
break;
case 'v': {
if (Name == "var.annotation" && F->arg_size() == 4) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::var_annotation);
return true;
}
break;
}
case 'x':
if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
return true;
}
auto *ST = dyn_cast<StructType>(F->getReturnType());
if (ST && (!ST->isLiteral() || ST->isPacked())) {
// Replace return type with literal non-packed struct. Only do this for
// intrinsics declared to return a struct, not for intrinsics with
// overloaded return type, in which case the exact struct type will be
// mangled into the name.
SmallVector<Intrinsic::IITDescriptor> Desc;
Intrinsic::getIntrinsicInfoTableEntries(F->getIntrinsicID(), Desc);
if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
auto *FT = F->getFunctionType();
auto *NewST = StructType::get(ST->getContext(), ST->elements());
auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
std::string Name = F->getName().str();
rename(F);
NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
Name, F->getParent());
// The new function may also need remangling.
- if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F))
+ if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
NewFn = *Result;
return true;
}
}
// Remangle our intrinsic since we upgrade the mangling
auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
if (Result != None) {
NewFn = *Result;
return true;
}
// This may not belong here. This function is effectively being overloaded
// to both detect an intrinsic which needs upgrading, and to provide the
// upgraded form of the intrinsic. We should perhaps have two separate
// functions for this.
return false;
}
bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
NewFn = nullptr;
bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
assert(F != NewFn && "Intrinsic function upgraded to the same function");
// Upgrade intrinsic attributes. This does not change the function.
if (NewFn)
F = NewFn;
if (Intrinsic::ID id = F->getIntrinsicID())
F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
return Upgraded;
}
GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
GV->getName() == "llvm.global_dtors")) ||
!GV->hasInitializer())
return nullptr;
ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
if (!ATy)
return nullptr;
StructType *STy = dyn_cast<StructType>(ATy->getElementType());
if (!STy || STy->getNumElements() != 2)
return nullptr;
LLVMContext &C = GV->getContext();
IRBuilder<> IRB(C);
auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
IRB.getInt8PtrTy());
Constant *Init = GV->getInitializer();
unsigned N = Init->getNumOperands();
std::vector<Constant *> NewCtors(N);
for (unsigned i = 0; i != N; ++i) {
auto Ctor = cast<Constant>(Init->getOperand(i));
NewCtors[i] = ConstantStruct::get(
EltTy, Ctor->getAggregateElement(0u), Ctor->getAggregateElement(1),
Constant::getNullValue(IRB.getInt8PtrTy()));
}
Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
NewInit, GV->getName());
}
// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
// to byte shuffles.
static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
Value *Op, unsigned Shift) {
auto *ResultTy = cast<FixedVectorType>(Op->getType());
unsigned NumElts = ResultTy->getNumElements() * 8;
// Bitcast from a 64-bit element type to a byte element type.
Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
Op = Builder.CreateBitCast(Op, VecTy, "cast");
// We'll be shuffling in zeroes.
Value *Res = Constant::getNullValue(VecTy);
// If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
// we'll just return the zero vector.
if (Shift < 16) {
int Idxs[64];
// 256/512-bit version is split into 2/4 16-byte lanes.
for (unsigned l = 0; l != NumElts; l += 16)
for (unsigned i = 0; i != 16; ++i) {
unsigned Idx = NumElts + i - Shift;
if (Idx < NumElts)
Idx -= NumElts - 16; // end of lane, switch operand.
Idxs[l + i] = Idx + l;
}
Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
}
// Bitcast back to a 64-bit element type.
return Builder.CreateBitCast(Res, ResultTy, "cast");
}
// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
// to byte shuffles.
static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
unsigned Shift) {
auto *ResultTy = cast<FixedVectorType>(Op->getType());
unsigned NumElts = ResultTy->getNumElements() * 8;
// Bitcast from a 64-bit element type to a byte element type.
Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
Op = Builder.CreateBitCast(Op, VecTy, "cast");
// We'll be shuffling in zeroes.
Value *Res = Constant::getNullValue(VecTy);
// If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
// we'll just return the zero vector.
if (Shift < 16) {
int Idxs[64];
// 256/512-bit version is split into 2/4 16-byte lanes.
for (unsigned l = 0; l != NumElts; l += 16)
for (unsigned i = 0; i != 16; ++i) {
unsigned Idx = i + Shift;
if (Idx >= 16)
Idx += NumElts - 16; // end of lane, switch operand.
Idxs[l + i] = Idx + l;
}
Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
}
// Bitcast back to a 64-bit element type.
return Builder.CreateBitCast(Res, ResultTy, "cast");
}
static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
unsigned NumElts) {
assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
llvm::VectorType *MaskTy = FixedVectorType::get(
Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
Mask = Builder.CreateBitCast(Mask, MaskTy);
// If we have less than 8 elements (1, 2 or 4), then the starting mask was an
// i8 and we need to extract down to the right number of elements.
if (NumElts <= 4) {
int Indices[4];
for (unsigned i = 0; i != NumElts; ++i)
Indices[i] = i;
Mask = Builder.CreateShuffleVector(
Mask, Mask, makeArrayRef(Indices, NumElts), "extract");
}
return Mask;
}
static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
Value *Op0, Value *Op1) {
// If the mask is all ones just emit the first operation.
if (const auto *C = dyn_cast<Constant>(Mask))
if (C->isAllOnesValue())
return Op0;
Mask = getX86MaskVec(Builder, Mask,
cast<FixedVectorType>(Op0->getType())->getNumElements());
return Builder.CreateSelect(Mask, Op0, Op1);
}
static Value *EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask,
Value *Op0, Value *Op1) {
// If the mask is all ones just emit the first operation.
if (const auto *C = dyn_cast<Constant>(Mask))
if (C->isAllOnesValue())
return Op0;
auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
Mask->getType()->getIntegerBitWidth());
Mask = Builder.CreateBitCast(Mask, MaskTy);
Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
return Builder.CreateSelect(Mask, Op0, Op1);
}
// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
Value *Op1, Value *Shift,
Value *Passthru, Value *Mask,
bool IsVALIGN) {
unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
// Mask the immediate for VALIGN.
if (IsVALIGN)
ShiftVal &= (NumElts - 1);
// If palignr is shifting the pair of vectors more than the size of two
// lanes, emit zero.
if (ShiftVal >= 32)
return llvm::Constant::getNullValue(Op0->getType());
// If palignr is shifting the pair of input vectors more than one lane,
// but less than two lanes, convert to shifting in zeroes.
if (ShiftVal > 16) {
ShiftVal -= 16;
Op1 = Op0;
Op0 = llvm::Constant::getNullValue(Op0->getType());
}
int Indices[64];
// 256-bit palignr operates on 128-bit lanes so we need to handle that
for (unsigned l = 0; l < NumElts; l += 16) {
for (unsigned i = 0; i != 16; ++i) {
unsigned Idx = ShiftVal + i;
if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
Idx += NumElts - 16; // End of lane, switch operand.
Indices[l + i] = Idx + l;
}
}
Value *Align = Builder.CreateShuffleVector(Op1, Op0,
makeArrayRef(Indices, NumElts),
"palignr");
return EmitX86Select(Builder, Mask, Align, Passthru);
}
static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI,
bool ZeroMask, bool IndexForm) {
Type *Ty = CI.getType();
unsigned VecWidth = Ty->getPrimitiveSizeInBits();
unsigned EltWidth = Ty->getScalarSizeInBits();
bool IsFloat = Ty->isFPOrFPVectorTy();
Intrinsic::ID IID;
if (VecWidth == 128 && EltWidth == 32 && IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_d_128;
else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_q_128;
else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_d_256;
else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_q_256;
else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_d_512;
else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_q_512;
else if (VecWidth == 128 && EltWidth == 16)
IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
else if (VecWidth == 256 && EltWidth == 16)
IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
else if (VecWidth == 512 && EltWidth == 16)
IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
else if (VecWidth == 128 && EltWidth == 8)
IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
else if (VecWidth == 256 && EltWidth == 8)
IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
else if (VecWidth == 512 && EltWidth == 8)
IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
else
llvm_unreachable("Unexpected intrinsic");
Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
CI.getArgOperand(2) };
// If this isn't index form we need to swap operand 0 and 1.
if (!IndexForm)
std::swap(Args[0], Args[1]);
Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
Args);
Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
: Builder.CreateBitCast(CI.getArgOperand(1),
Ty);
return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
}
static Value *UpgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI,
Intrinsic::ID IID) {
Type *Ty = CI.getType();
Value *Op0 = CI.getOperand(0);
Value *Op1 = CI.getOperand(1);
Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
if (CI.arg_size() == 4) { // For masked intrinsics.
Value *VecSrc = CI.getOperand(2);
Value *Mask = CI.getOperand(3);
Res = EmitX86Select(Builder, Mask, Res, VecSrc);
}
return Res;
}
static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI,
bool IsRotateRight) {
Type *Ty = CI.getType();
Value *Src = CI.getArgOperand(0);
Value *Amt = CI.getArgOperand(1);
// Amount may be scalar immediate, in which case create a splat vector.
// Funnel shifts amounts are treated as modulo and types are all power-of-2 so
// we only care about the lowest log2 bits anyway.
if (Amt->getType() != Ty) {
unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
Amt = Builder.CreateVectorSplat(NumElts, Amt);
}
Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
if (CI.arg_size() == 4) { // For masked intrinsics.
Value *VecSrc = CI.getOperand(2);
Value *Mask = CI.getOperand(3);
Res = EmitX86Select(Builder, Mask, Res, VecSrc);
}
return Res;
}
static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
bool IsSigned) {
Type *Ty = CI.getType();
Value *LHS = CI.getArgOperand(0);
Value *RHS = CI.getArgOperand(1);
CmpInst::Predicate Pred;
switch (Imm) {
case 0x0:
Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
break;
case 0x1:
Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
break;
case 0x2:
Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
break;
case 0x3:
Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
break;
case 0x4:
Pred = ICmpInst::ICMP_EQ;
break;
case 0x5:
Pred = ICmpInst::ICMP_NE;
break;
case 0x6:
return Constant::getNullValue(Ty); // FALSE
case 0x7:
return Constant::getAllOnesValue(Ty); // TRUE
default:
llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
}
Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
Value *Ext = Builder.CreateSExt(Cmp, Ty);
return Ext;
}
static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI,
bool IsShiftRight, bool ZeroMask) {
Type *Ty = CI.getType();
Value *Op0 = CI.getArgOperand(0);
Value *Op1 = CI.getArgOperand(1);
Value *Amt = CI.getArgOperand(2);
if (IsShiftRight)
std::swap(Op0, Op1);
// Amount may be scalar immediate, in which case create a splat vector.
// Funnel shifts amounts are treated as modulo and types are all power-of-2 so
// we only care about the lowest log2 bits anyway.
if (Amt->getType() != Ty) {
unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
Amt = Builder.CreateVectorSplat(NumElts, Amt);
}
Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
unsigned NumArgs = CI.arg_size();
if (NumArgs >= 4) { // For masked intrinsics.
Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
CI.getArgOperand(0);
Value *Mask = CI.getOperand(NumArgs - 1);
Res = EmitX86Select(Builder, Mask, Res, VecSrc);
}
return Res;
}
static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
Value *Ptr, Value *Data, Value *Mask,
bool Aligned) {
// Cast the pointer to the right type.
Ptr = Builder.CreateBitCast(Ptr,
llvm::PointerType::getUnqual(Data->getType()));
const Align Alignment =
Aligned
? Align(Data->getType()->getPrimitiveSizeInBits().getFixedSize() / 8)
: Align(1);
// If the mask is all ones just emit a regular store.
if (const auto *C = dyn_cast<Constant>(Mask))
if (C->isAllOnesValue())
return Builder.CreateAlignedStore(Data, Ptr, Alignment);
// Convert the mask from an integer type to a vector of i1.
unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
Mask = getX86MaskVec(Builder, Mask, NumElts);
return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
}
static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
Value *Ptr, Value *Passthru, Value *Mask,
bool Aligned) {
Type *ValTy = Passthru->getType();
// Cast the pointer to the right type.
Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
const Align Alignment =
Aligned
? Align(Passthru->getType()->getPrimitiveSizeInBits().getFixedSize() /
8)
: Align(1);
// If the mask is all ones just emit a regular store.
if (const auto *C = dyn_cast<Constant>(Mask))
if (C->isAllOnesValue())
return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
// Convert the mask from an integer type to a vector of i1.
unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
Mask = getX86MaskVec(Builder, Mask, NumElts);
return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
}
static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
Type *Ty = CI.getType();
Value *Op0 = CI.getArgOperand(0);
Function *F = Intrinsic::getDeclaration(CI.getModule(), Intrinsic::abs, Ty);
Value *Res = Builder.CreateCall(F, {Op0, Builder.getInt1(false)});
if (CI.arg_size() == 3)
Res = EmitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
return Res;
}
static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
Type *Ty = CI.getType();
// Arguments have a vXi32 type so cast to vXi64.
Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
if (IsSigned) {
// Shift left then arithmetic shift right.
Constant *ShiftAmt = ConstantInt::get(Ty, 32);
LHS = Builder.CreateShl(LHS, ShiftAmt);
LHS = Builder.CreateAShr(LHS, ShiftAmt);
RHS = Builder.CreateShl(RHS, ShiftAmt);
RHS = Builder.CreateAShr(RHS, ShiftAmt);
} else {
// Clear the upper bits.
Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
LHS = Builder.CreateAnd(LHS, Mask);
RHS = Builder.CreateAnd(RHS, Mask);
}
Value *Res = Builder.CreateMul(LHS, RHS);
if (CI.arg_size() == 4)
Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
return Res;
}
// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
Value *Mask) {
unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
if (Mask) {
const auto *C = dyn_cast<Constant>(Mask);
if (!C || !C->isAllOnesValue())
Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
}
if (NumElts < 8) {
int Indices[8];
for (unsigned i = 0; i != NumElts; ++i)
Indices[i] = i;
for (unsigned i = NumElts; i != 8; ++i)
Indices[i] = NumElts + i % NumElts;
Vec = Builder.CreateShuffleVector(Vec,
Constant::getNullValue(Vec->getType()),
Indices);
}
return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
}
static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI,
unsigned CC, bool Signed) {
Value *Op0 = CI.getArgOperand(0);
unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
Value *Cmp;
if (CC == 3) {
Cmp = Constant::getNullValue(
FixedVectorType::get(Builder.getInt1Ty(), NumElts));
} else if (CC == 7) {
Cmp = Constant::getAllOnesValue(
FixedVectorType::get(Builder.getInt1Ty(), NumElts));
} else {
ICmpInst::Predicate Pred;
switch (CC) {
default: llvm_unreachable("Unknown condition code");
case 0: Pred = ICmpInst::ICMP_EQ; break;
case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
case 4: Pred = ICmpInst::ICMP_NE; break;
case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
}
Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
}
Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
}
// Replace a masked intrinsic with an older unmasked intrinsic.
static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI,
Intrinsic::ID IID) {
Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
Value *Rep = Builder.CreateCall(Intrin,
{ CI.getArgOperand(0), CI.getArgOperand(1) });
return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
}
static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI) {
Value* A = CI.getArgOperand(0);
Value* B = CI.getArgOperand(1);
Value* Src = CI.getArgOperand(2);
Value* Mask = CI.getArgOperand(3);
Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
Value* Cmp = Builder.CreateIsNotNull(AndNode);
Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
return Builder.CreateInsertElement(A, Select, (uint64_t)0);
}
static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI) {
Value* Op = CI.getArgOperand(0);
Type* ReturnOp = CI.getType();
unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
Value *Mask = getX86MaskVec(Builder, Op, NumElts);
return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
}
// Replace intrinsic with unmasked version and a select.
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
CallBase &CI, Value *&Rep) {
Name = Name.substr(12); // Remove avx512.mask.
unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
unsigned EltWidth = CI.getType()->getScalarSizeInBits();
Intrinsic::ID IID;
if (Name.startswith("max.p")) {
if (VecWidth == 128 && EltWidth == 32)
IID = Intrinsic::x86_sse_max_ps;
else if (VecWidth == 128 && EltWidth == 64)
IID = Intrinsic::x86_sse2_max_pd;
else if (VecWidth == 256 && EltWidth == 32)
IID = Intrinsic::x86_avx_max_ps_256;
else if (VecWidth == 256 && EltWidth == 64)
IID = Intrinsic::x86_avx_max_pd_256;
else
llvm_unreachable("Unexpected intrinsic");
} else if (Name.startswith("min.p")) {
if (VecWidth == 128 && EltWidth == 32)
IID = Intrinsic::x86_sse_min_ps;
else if (VecWidth == 128 && EltWidth == 64)
IID = Intrinsic::x86_sse2_min_pd;
else if (VecWidth == 256 && EltWidth == 32)
IID = Intrinsic::x86_avx_min_ps_256;
else if (VecWidth == 256 && EltWidth == 64)
IID = Intrinsic::x86_avx_min_pd_256;
else
llvm_unreachable("Unexpected intrinsic");
} else if (Name.startswith("pshuf.b.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_ssse3_pshuf_b_128;
else if (VecWidth == 256)
IID = Intrinsic::x86_avx2_pshuf_b;
else if (VecWidth == 512)
IID = Intrinsic::x86_avx512_pshuf_b_512;
else
llvm_unreachable("Unexpected intrinsic");
} else if (Name.startswith("pmul.hr.sw.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
else if (VecWidth == 256)
IID = Intrinsic::x86_avx2_pmul_hr_sw;
else if (VecWidth == 512)
IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
else
llvm_unreachable("Unexpected intrinsic");
} else if (Name.startswith("pmulh.w.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_sse2_pmulh_w;
else if (VecWidth == 256)
IID = Intrinsic::x86_avx2_pmulh_w;
else if (VecWidth == 512)
IID = Intrinsic::x86_avx512_pmulh_w_512;
else
llvm_unreachable("Unexpected intrinsic");
} else if (Name.startswith("pmulhu.w.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_sse2_pmulhu_w;
else if (VecWidth == 256)
IID = Intrinsic::x86_avx2_pmulhu_w;
else if (VecWidth == 512)
IID = Intrinsic::x86_avx512_pmulhu_w_512;
else
llvm_unreachable("Unexpected intrinsic");
} else if (Name.startswith("pmaddw.d.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_sse2_pmadd_wd;
else if (VecWidth == 256)
IID = Intrinsic::x86_avx2_pmadd_wd;
else if (VecWidth == 512)
IID = Intrinsic::x86_avx512_pmaddw_d_512;
else
llvm_unreachable("Unexpected intrinsic");
} else if (Name.startswith("pmaddubs.w.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
else if (VecWidth == 256)
IID = Intrinsic::x86_avx2_pmadd_ub_sw;
else if (VecWidth == 512)
IID = Intrinsic::x86_avx512_pmaddubs_w_512;
else
llvm_unreachable("Unexpected intrinsic");
} else if (Name.startswith("packsswb.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_sse2_packsswb_128;
else if (VecWidth == 256)
IID = Intrinsic::x86_avx2_packsswb;
else if (VecWidth == 512)
IID = Intrinsic::x86_avx512_packsswb_512;
else
llvm_unreachable("Unexpected intrinsic");
} else if (Name.startswith("packssdw.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_sse2_packssdw_128;
else if (VecWidth == 256)
IID = Intrinsic::x86_avx2_packssdw;
else if (VecWidth == 512)
IID = Intrinsic::x86_avx512_packssdw_512;
else
llvm_unreachable("Unexpected intrinsic");
} else if (Name.startswith("packuswb.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_sse2_packuswb_128;
else if (VecWidth == 256)
IID = Intrinsic::x86_avx2_packuswb;
else if (VecWidth == 512)
IID = Intrinsic::x86_avx512_packuswb_512;
else
llvm_unreachable("Unexpected intrinsic");
} else if (Name.startswith("packusdw.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_sse41_packusdw;
else if (VecWidth == 256)
IID = Intrinsic::x86_avx2_packusdw;
else if (VecWidth == 512)
IID = Intrinsic::x86_avx512_packusdw_512;
else
llvm_unreachable("Unexpected intrinsic");
} else if (Name.startswith("vpermilvar.")) {
if (VecWidth == 128 && EltWidth == 32)
IID = Intrinsic::x86_avx_vpermilvar_ps;
else if (VecWidth == 128 && EltWidth == 64)
IID = Intrinsic::x86_avx_vpermilvar_pd;
else if (VecWidth == 256 && EltWidth == 32)
IID = Intrinsic::x86_avx_vpermilvar_ps_256;
else if (VecWidth == 256 && EltWidth == 64)
IID = Intrinsic::x86_avx_vpermilvar_pd_256;
else if (VecWidth == 512 && EltWidth == 32)
IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
else if (VecWidth == 512 && EltWidth == 64)
IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
else
llvm_unreachable("Unexpected intrinsic");
} else if (Name == "cvtpd2dq.256") {
IID = Intrinsic::x86_avx_cvt_pd2dq_256;
} else if (Name == "cvtpd2ps.256") {
IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
} else if (Name == "cvttpd2dq.256") {
IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
} else if (Name == "cvttps2dq.128") {
IID = Intrinsic::x86_sse2_cvttps2dq;
} else if (Name == "cvttps2dq.256") {
IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
} else if (Name.startswith("permvar.")) {
bool IsFloat = CI.getType()->isFPOrFPVectorTy();
if (VecWidth == 256 && EltWidth == 32 && IsFloat)
IID = Intrinsic::x86_avx2_permps;
else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
IID = Intrinsic::x86_avx2_permd;
else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
IID = Intrinsic::x86_avx512_permvar_df_256;
else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
IID = Intrinsic::x86_avx512_permvar_di_256;
else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
IID = Intrinsic::x86_avx512_permvar_sf_512;
else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
IID = Intrinsic::x86_avx512_permvar_si_512;
else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
IID = Intrinsic::x86_avx512_permvar_df_512;
else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
IID = Intrinsic::x86_avx512_permvar_di_512;
else if (VecWidth == 128 && EltWidth == 16)
IID = Intrinsic::x86_avx512_permvar_hi_128;
else if (VecWidth == 256 && EltWidth == 16)
IID = Intrinsic::x86_avx512_permvar_hi_256;
else if (VecWidth == 512 && EltWidth == 16)
IID = Intrinsic::x86_avx512_permvar_hi_512;
else if (VecWidth == 128 && EltWidth == 8)
IID = Intrinsic::x86_avx512_permvar_qi_128;
else if (VecWidth == 256 && EltWidth == 8)
IID = Intrinsic::x86_avx512_permvar_qi_256;
else if (VecWidth == 512 && EltWidth == 8)
IID = Intrinsic::x86_avx512_permvar_qi_512;
else
llvm_unreachable("Unexpected intrinsic");
} else if (Name.startswith("dbpsadbw.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_avx512_dbpsadbw_128;
else if (VecWidth == 256)
IID = Intrinsic::x86_avx512_dbpsadbw_256;
else if (VecWidth == 512)
IID = Intrinsic::x86_avx512_dbpsadbw_512;
else
llvm_unreachable("Unexpected intrinsic");
} else if (Name.startswith("pmultishift.qb.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_avx512_pmultishift_qb_128;
else if (VecWidth == 256)
IID = Intrinsic::x86_avx512_pmultishift_qb_256;
else if (VecWidth == 512)
IID = Intrinsic::x86_avx512_pmultishift_qb_512;
else
llvm_unreachable("Unexpected intrinsic");
} else if (Name.startswith("conflict.")) {
if (Name[9] == 'd' && VecWidth == 128)
IID = Intrinsic::x86_avx512_conflict_d_128;
else if (Name[9] == 'd' && VecWidth == 256)
IID = Intrinsic::x86_avx512_conflict_d_256;
else if (Name[9] == 'd' && VecWidth == 512)
IID = Intrinsic::x86_avx512_conflict_d_512;
else if (Name[9] == 'q' && VecWidth == 128)
IID = Intrinsic::x86_avx512_conflict_q_128;
else if (Name[9] == 'q' && VecWidth == 256)
IID = Intrinsic::x86_avx512_conflict_q_256;
else if (Name[9] == 'q' && VecWidth == 512)
IID = Intrinsic::x86_avx512_conflict_q_512;
else
llvm_unreachable("Unexpected intrinsic");
} else if (Name.startswith("pavg.")) {
if (Name[5] == 'b' && VecWidth == 128)
IID = Intrinsic::x86_sse2_pavg_b;
else if (Name[5] == 'b' && VecWidth == 256)
IID = Intrinsic::x86_avx2_pavg_b;
else if (Name[5] == 'b' && VecWidth == 512)
IID = Intrinsic::x86_avx512_pavg_b_512;
else if (Name[5] == 'w' && VecWidth == 128)
IID = Intrinsic::x86_sse2_pavg_w;
else if (Name[5] == 'w' && VecWidth == 256)
IID = Intrinsic::x86_avx2_pavg_w;
else if (Name[5] == 'w' && VecWidth == 512)
IID = Intrinsic::x86_avx512_pavg_w_512;
else
llvm_unreachable("Unexpected intrinsic");
} else
return false;
SmallVector<Value *, 4> Args(CI.args());
Args.pop_back();
Args.pop_back();
Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
Args);
unsigned NumArgs = CI.arg_size();
Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
CI.getArgOperand(NumArgs - 2));
return true;
}
/// Upgrade comment in call to inline asm that represents an objc retain release
/// marker.
void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
size_t Pos;
if (AsmStr->find("mov\tfp") == 0 &&
AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
(Pos = AsmStr->find("# marker")) != std::string::npos) {
AsmStr->replace(Pos, 1, ";");
}
}
static Value *UpgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F,
IRBuilder<> &Builder) {
if (Name == "mve.vctp64.old") {
// Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
// correct type.
Value *VCTP = Builder.CreateCall(
Intrinsic::getDeclaration(F->getParent(), Intrinsic::arm_mve_vctp64),
CI->getArgOperand(0), CI->getName());
Value *C1 = Builder.CreateCall(
Intrinsic::getDeclaration(
F->getParent(), Intrinsic::arm_mve_pred_v2i,
{VectorType::get(Builder.getInt1Ty(), 2, false)}),
VCTP);
return Builder.CreateCall(
Intrinsic::getDeclaration(
F->getParent(), Intrinsic::arm_mve_pred_i2v,
{VectorType::get(Builder.getInt1Ty(), 4, false)}),
C1);
} else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
Name == "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
Name == "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
std::vector<Type *> Tys;
unsigned ID = CI->getIntrinsicID();
Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
switch (ID) {
case Intrinsic::arm_mve_mull_int_predicated:
case Intrinsic::arm_mve_vqdmull_predicated:
case Intrinsic::arm_mve_vldr_gather_base_predicated:
Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
break;
case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
case Intrinsic::arm_mve_vstr_scatter_base_predicated:
case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
V2I1Ty};
break;
case Intrinsic::arm_mve_vldr_gather_offset_predicated:
Tys = {CI->getType(), CI->getOperand(0)->getType(),
CI->getOperand(1)->getType(), V2I1Ty};
break;
case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
CI->getOperand(2)->getType(), V2I1Ty};
break;
case Intrinsic::arm_cde_vcx1q_predicated:
case Intrinsic::arm_cde_vcx1qa_predicated:
case Intrinsic::arm_cde_vcx2q_predicated:
case Intrinsic::arm_cde_vcx2qa_predicated:
case Intrinsic::arm_cde_vcx3q_predicated:
case Intrinsic::arm_cde_vcx3qa_predicated:
Tys = {CI->getOperand(1)->getType(), V2I1Ty};
break;
default:
llvm_unreachable("Unhandled Intrinsic!");
}
std::vector<Value *> Ops;
for (Value *Op : CI->args()) {
Type *Ty = Op->getType();
if (Ty->getScalarSizeInBits() == 1) {
Value *C1 = Builder.CreateCall(
Intrinsic::getDeclaration(
F->getParent(), Intrinsic::arm_mve_pred_v2i,
{VectorType::get(Builder.getInt1Ty(), 4, false)}),
Op);
Op = Builder.CreateCall(
Intrinsic::getDeclaration(F->getParent(),
Intrinsic::arm_mve_pred_i2v, {V2I1Ty}),
C1);
}
Ops.push_back(Op);
}
Function *Fn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
return Builder.CreateCall(Fn, Ops, CI->getName());
}
llvm_unreachable("Unknown function for ARM CallBase upgrade.");
}
/// Upgrade a call to an old intrinsic. All argument and return casting must be
/// provided to seamlessly integrate with existing context.
void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Function *F = CI->getCalledFunction();
LLVMContext &C = CI->getContext();
IRBuilder<> Builder(C);
Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
assert(F && "Intrinsic call is not direct?");
if (!NewFn) {
// Get the Function's name.
StringRef Name = F->getName();
assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
Name = Name.substr(5);
bool IsX86 = Name.startswith("x86.");
if (IsX86)
Name = Name.substr(4);
bool IsNVVM = Name.startswith("nvvm.");
if (IsNVVM)
Name = Name.substr(5);
bool IsARM = Name.startswith("arm.");
if (IsARM)
Name = Name.substr(4);
if (IsX86 && Name.startswith("sse4a.movnt.")) {
Module *M = F->getParent();
SmallVector<Metadata *, 1> Elts;
Elts.push_back(
ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
MDNode *Node = MDNode::get(C, Elts);
Value *Arg0 = CI->getArgOperand(0);
Value *Arg1 = CI->getArgOperand(1);
// Nontemporal (unaligned) store of the 0'th element of the float/double
// vector.
Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
Value *Extract =
Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
SI->setMetadata(M->getMDKindID("nontemporal"), Node);
// Remove intrinsic.
CI->eraseFromParent();
return;
}
if (IsX86 && (Name.startswith("avx.movnt.") ||
Name.startswith("avx512.storent."))) {
Module *M = F->getParent();
SmallVector<Metadata *, 1> Elts;
Elts.push_back(
ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
MDNode *Node = MDNode::get(C, Elts);
Value *Arg0 = CI->getArgOperand(0);
Value *Arg1 = CI->getArgOperand(1);
// Convert the type of the pointer to a pointer to the stored type.
Value *BC = Builder.CreateBitCast(Arg0,
PointerType::getUnqual(Arg1->getType()),
"cast");
StoreInst *SI = Builder.CreateAlignedStore(
Arg1, BC,
Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedSize() / 8));
SI->setMetadata(M->getMDKindID("nontemporal"), Node);
// Remove intrinsic.
CI->eraseFromParent();
return;
}
if (IsX86 && Name == "sse2.storel.dq") {
Value *Arg0 = CI->getArgOperand(0);
Value *Arg1 = CI->getArgOperand(1);
auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
Value *BC = Builder.CreateBitCast(Arg0,
PointerType::getUnqual(Elt->getType()),
"cast");
Builder.CreateAlignedStore(Elt, BC, Align(1));
// Remove intrinsic.
CI->eraseFromParent();
return;
}
if (IsX86 && (Name.startswith("sse.storeu.") ||
Name.startswith("sse2.storeu.") ||
Name.startswith("avx.storeu."))) {
Value *Arg0 = CI->getArgOperand(0);
Value *Arg1 = CI->getArgOperand(1);
Arg0 = Builder.CreateBitCast(Arg0,
PointerType::getUnqual(Arg1->getType()),
"cast");
Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
// Remove intrinsic.
CI->eraseFromParent();
return;
}
if (IsX86 && Name == "avx512.mask.store.ss") {
Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
Mask, false);
// Remove intrinsic.
CI->eraseFromParent();
return;
}
if (IsX86 && (Name.startswith("avx512.mask.store"))) {
// "avx512.mask.storeu." or "avx512.mask.store."
bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(2), Aligned);
// Remove intrinsic.
CI->eraseFromParent();
return;
}
Value *Rep;
// Upgrade packed integer vector compare intrinsics to compare instructions.
if (IsX86 && (Name.startswith("sse2.pcmp") ||
Name.startswith("avx2.pcmp"))) {
// "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
bool CmpEq = Name[9] == 'e';
Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
CI->getArgOperand(0), CI->getArgOperand(1));
Rep = Builder.CreateSExt(Rep, CI->getType(), "");
} else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
Type *ExtTy = Type::getInt32Ty(C);
if (CI->getOperand(0)->getType()->isIntegerTy(8))
ExtTy = Type::getInt64Ty(C);
unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
ExtTy->getPrimitiveSizeInBits();
Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
Rep = Builder.CreateVectorSplat(NumElts, Rep);
} else if (IsX86 && (Name == "sse.sqrt.ss" ||
Name == "sse2.sqrt.sd")) {
Value *Vec = CI->getArgOperand(0);
Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
Function *Intr = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::sqrt, Elt0->getType());
Elt0 = Builder.CreateCall(Intr, Elt0);
Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
} else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
Name.startswith("sse2.sqrt.p") ||
Name.startswith("sse.sqrt.p"))) {
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
Intrinsic::sqrt,
CI->getType()),
{CI->getArgOperand(0)});
} else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
if (CI->arg_size() == 4 &&
(!isa<ConstantInt>(CI->getArgOperand(3)) ||
cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
: Intrinsic::x86_avx512_sqrt_pd_512;
Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
IID), Args);
} else {
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
Intrinsic::sqrt,
CI->getType()),
{CI->getArgOperand(0)});
}
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
} else if (IsX86 && (Name.startswith("avx512.ptestm") ||
Name.startswith("avx512.ptestnm"))) {
Value *Op0 = CI->getArgOperand(0);
Value *Op1 = CI->getArgOperand(1);
Value *Mask = CI->getArgOperand(2);
Rep = Builder.CreateAnd(Op0, Op1);
llvm::Type *Ty = Op0->getType();
Value *Zero = llvm::Constant::getNullValue(Ty);
ICmpInst::Predicate Pred =
Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
Rep = Builder.CreateICmp(Pred, Rep, Zero);
Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
} else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
->getNumElements();
Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
} else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
unsigned NumElts = CI->getType()->getScalarSizeInBits();
Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
int Indices[64];
for (unsigned i = 0; i != NumElts; ++i)
Indices[i] = i;
// First extract half of each vector. This gives better codegen than
// doing it in a single shuffle.
LHS = Builder.CreateShuffleVector(LHS, LHS,
makeArrayRef(Indices, NumElts / 2));
RHS = Builder.CreateShuffleVector(RHS, RHS,
makeArrayRef(Indices, NumElts / 2));
// Concat the vectors.
// NOTE: Operands have to be swapped to match intrinsic definition.
Rep = Builder.CreateShuffleVector(RHS, LHS,
makeArrayRef(Indices, NumElts));
Rep = Builder.CreateBitCast(Rep, CI->getType());
} else if (IsX86 && Name == "avx512.kand.w") {
Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
Rep = Builder.CreateAnd(LHS, RHS);
Rep = Builder.CreateBitCast(Rep, CI->getType());
} else if (IsX86 && Name == "avx512.kandn.w") {
Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
LHS = Builder.CreateNot(LHS);
Rep = Builder.CreateAnd(LHS, RHS);
Rep = Builder.CreateBitCast(Rep, CI->getType());
} else if (IsX86 && Name == "avx512.kor.w") {
Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
Rep = Builder.CreateOr(LHS, RHS);
Rep = Builder.CreateBitCast(Rep, CI->getType());
} else if (IsX86 && Name == "avx512.kxor.w") {
Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
Rep = Builder.CreateXor(LHS, RHS);
Rep = Builder.CreateBitCast(Rep, CI->getType());
} else if (IsX86 && Name == "avx512.kxnor.w") {
Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
LHS = Builder.CreateNot(LHS);
Rep = Builder.CreateXor(LHS, RHS);
Rep = Builder.CreateBitCast(Rep, CI->getType());
} else if (IsX86 && Name == "avx512.knot.w") {
Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
Rep = Builder.CreateNot(Rep);
Rep = Builder.CreateBitCast(Rep, CI->getType());
} else if (IsX86 &&
(Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
Rep = Builder.CreateOr(LHS, RHS);
Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
Value *C;
if (Name[14] == 'c')
C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
else
C = ConstantInt::getNullValue(Builder.getInt16Ty());
Rep = Builder.CreateICmpEQ(Rep, C);
Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
} else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
Name == "sse.div.ss" || Name == "sse2.div.sd")) {
Type *I32Ty = Type::getInt32Ty(C);
Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
ConstantInt::get(I32Ty, 0));
Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
ConstantInt::get(I32Ty, 0));
Value *EltOp;
if (Name.contains(".add."))
EltOp = Builder.CreateFAdd(Elt0, Elt1);
else if (Name.contains(".sub."))
EltOp = Builder.CreateFSub(Elt0, Elt1);
else if (Name.contains(".mul."))
EltOp = Builder.CreateFMul(Elt0, Elt1);
else
EltOp = Builder.CreateFDiv(Elt0, Elt1);
Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
ConstantInt::get(I32Ty, 0));
} else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
// "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
bool CmpEq = Name[16] == 'e';
Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
} else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")) {
Type *OpTy = CI->getArgOperand(0)->getType();
unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
Intrinsic::ID IID;
switch (VecWidth) {
default: llvm_unreachable("Unexpected intrinsic");
case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
}
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
{ CI->getOperand(0), CI->getArgOperand(1) });
Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
Type *OpTy = CI->getArgOperand(0)->getType();
unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
unsigned EltWidth = OpTy->getScalarSizeInBits();
Intrinsic::ID IID;
if (VecWidth == 128 && EltWidth == 32)
IID = Intrinsic::x86_avx512_fpclass_ps_128;
else if (VecWidth == 256 && EltWidth == 32)
IID = Intrinsic::x86_avx512_fpclass_ps_256;
else if (VecWidth == 512 && EltWidth == 32)
IID = Intrinsic::x86_avx512_fpclass_ps_512;
else if (VecWidth == 128 && EltWidth == 64)
IID = Intrinsic::x86_avx512_fpclass_pd_128;
else if (VecWidth == 256 && EltWidth == 64)
IID = Intrinsic::x86_avx512_fpclass_pd_256;
else if (VecWidth == 512 && EltWidth == 64)
IID = Intrinsic::x86_avx512_fpclass_pd_512;
else
llvm_unreachable("Unexpected intrinsic");
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
{ CI->getOperand(0), CI->getArgOperand(1) });
Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.cmp.p")) {
SmallVector<Value *, 4> Args(CI->args());
Type *OpTy = Args[0]->getType();
unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
unsigned EltWidth = OpTy->getScalarSizeInBits();
Intrinsic::ID IID;
if (VecWidth == 128 && EltWidth == 32)
IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
else if (VecWidth == 256 && EltWidth == 32)
IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
else if (VecWidth == 512 && EltWidth == 32)
IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
else if (VecWidth == 128 && EltWidth == 64)
IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
else if (VecWidth == 256 && EltWidth == 64)
IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
else if (VecWidth == 512 && EltWidth == 64)
IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
else
llvm_unreachable("Unexpected intrinsic");
Value *Mask = Constant::getAllOnesValue(CI->getType());
if (VecWidth == 512)
std::swap(Mask, Args.back());
Args.push_back(Mask);
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
Args);
} else if (IsX86 && Name.startswith("avx512.mask.cmp.")) {
// Integer compare intrinsics.
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
} else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
} else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
Name.startswith("avx512.cvtw2mask.") ||
Name.startswith("avx512.cvtd2mask.") ||
Name.startswith("avx512.cvtq2mask."))) {
Value *Op = CI->getArgOperand(0);
Value *Zero = llvm::Constant::getNullValue(Op->getType());
Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
} else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
Name == "ssse3.pabs.w.128" ||
Name == "ssse3.pabs.d.128" ||
Name.startswith("avx2.pabs") ||
Name.startswith("avx512.mask.pabs"))) {
Rep = upgradeAbs(Builder, *CI);
} else if (IsX86 && (Name == "sse41.pmaxsb" ||
Name == "sse2.pmaxs.w" ||
Name == "sse41.pmaxsd" ||
Name.startswith("avx2.pmaxs") ||
Name.startswith("avx512.mask.pmaxs"))) {
Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
} else if (IsX86 && (Name == "sse2.pmaxu.b" ||
Name == "sse41.pmaxuw" ||
Name == "sse41.pmaxud" ||
Name.startswith("avx2.pmaxu") ||
Name.startswith("avx512.mask.pmaxu"))) {
Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
} else if (IsX86 && (Name == "sse41.pminsb" ||
Name == "sse2.pmins.w" ||
Name == "sse41.pminsd" ||
Name.startswith("avx2.pmins") ||
Name.startswith("avx512.mask.pmins"))) {
Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
} else if (IsX86 && (Name == "sse2.pminu.b" ||
Name == "sse41.pminuw" ||
Name == "sse41.pminud" ||
Name.startswith("avx2.pminu") ||
Name.startswith("avx512.mask.pminu"))) {
Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
} else if (IsX86 && (Name == "sse2.pmulu.dq" ||
Name == "avx2.pmulu.dq" ||
Name == "avx512.pmulu.dq.512" ||
Name.startswith("avx512.mask.pmulu.dq."))) {
Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
} else if (IsX86 && (Name == "sse41.pmuldq" ||
Name == "avx2.pmul.dq" ||
Name == "avx512.pmul.dq.512" ||
Name.startswith("avx512.mask.pmul.dq."))) {
Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
} else if (IsX86 && (Name == "sse.cvtsi2ss" ||
Name == "sse2.cvtsi2sd" ||
Name == "sse.cvtsi642ss" ||
Name == "sse2.cvtsi642sd")) {
Rep = Builder.CreateSIToFP(
CI->getArgOperand(1),
cast<VectorType>(CI->getType())->getElementType());
Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
} else if (IsX86 && Name == "avx512.cvtusi2sd") {
Rep = Builder.CreateUIToFP(
CI->getArgOperand(1),
cast<VectorType>(CI->getType())->getElementType());
Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
} else if (IsX86 && Name == "sse2.cvtss2sd") {
Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
Rep = Builder.CreateFPExt(
Rep, cast<VectorType>(CI->getType())->getElementType());
Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
} else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
Name == "sse2.cvtdq2ps" ||
Name == "avx.cvtdq2.pd.256" ||
Name == "avx.cvtdq2.ps.256" ||
Name.startswith("avx512.mask.cvtdq2pd.") ||
Name.startswith("avx512.mask.cvtudq2pd.") ||
Name.startswith("avx512.mask.cvtdq2ps.") ||
Name.startswith("avx512.mask.cvtudq2ps.") ||
Name.startswith("avx512.mask.cvtqq2pd.") ||
Name.startswith("avx512.mask.cvtuqq2pd.") ||
Name == "avx512.mask.cvtqq2ps.256" ||
Name == "avx512.mask.cvtqq2ps.512" ||
Name == "avx512.mask.cvtuqq2ps.256" ||
Name == "avx512.mask.cvtuqq2ps.512" ||
Name == "sse2.cvtps2pd" ||
Name == "avx.cvt.ps2.pd.256" ||
Name == "avx512.mask.cvtps2pd.128" ||
Name == "avx512.mask.cvtps2pd.256")) {
auto *DstTy = cast<FixedVectorType>(CI->getType());
Rep = CI->getArgOperand(0);
auto *SrcTy = cast<FixedVectorType>(Rep->getType());
unsigned NumDstElts = DstTy->getNumElements();
if (NumDstElts < SrcTy->getNumElements()) {
assert(NumDstElts == 2 && "Unexpected vector size");
Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
}
bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
if (IsPS2PD)
Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
else if (CI->arg_size() == 4 &&
(!isa<ConstantInt>(CI->getArgOperand(3)) ||
cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
: Intrinsic::x86_avx512_sitofp_round;
Function *F = Intrinsic::getDeclaration(CI->getModule(), IID,
{ DstTy, SrcTy });
Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
} else {
Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
: Builder.CreateSIToFP(Rep, DstTy, "cvt");
}
if (CI->arg_size() >= 3)
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
} else if (IsX86 && (Name.startswith("avx512.mask.vcvtph2ps.") ||
Name.startswith("vcvtph2ps."))) {
auto *DstTy = cast<FixedVectorType>(CI->getType());
Rep = CI->getArgOperand(0);
auto *SrcTy = cast<FixedVectorType>(Rep->getType());
unsigned NumDstElts = DstTy->getNumElements();
if (NumDstElts != SrcTy->getNumElements()) {
assert(NumDstElts == 4 && "Unexpected vector size");
Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
}
Rep = Builder.CreateBitCast(
Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
if (CI->arg_size() >= 3)
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
} else if (IsX86 && Name.startswith("avx512.mask.load")) {
// "avx512.mask.loadu." or "avx512.mask.load."
bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
Rep =
UpgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(2), Aligned);
} else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
auto *ResultTy = cast<FixedVectorType>(CI->getType());
Type *PtrTy = ResultTy->getElementType();
// Cast the pointer to element type.
Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
llvm::PointerType::getUnqual(PtrTy));
Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
ResultTy->getNumElements());
Function *ELd = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::masked_expandload,
ResultTy);
Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
} else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
Type *PtrTy = ResultTy->getElementType();
// Cast the pointer to element type.
Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
llvm::PointerType::getUnqual(PtrTy));
Value *MaskVec =
getX86MaskVec(Builder, CI->getArgOperand(2),
cast<FixedVectorType>(ResultTy)->getNumElements());
Function *CSt = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::masked_compressstore,
ResultTy);
Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
} else if (IsX86 && (Name.startswith("avx512.mask.compress.") ||
Name.startswith("avx512.mask.expand."))) {
auto *ResultTy = cast<FixedVectorType>(CI->getType());
Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
ResultTy->getNumElements());
bool IsCompress = Name[12] == 'c';
Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
: Intrinsic::x86_avx512_mask_expand;
Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
MaskVec });
} else if (IsX86 && Name.startswith("xop.vpcom")) {
bool IsSigned;
if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") ||
Name.endswith("uq"))
IsSigned = false;
else if (Name.endswith("b") || Name.endswith("w") || Name.endswith("d") ||
Name.endswith("q"))
IsSigned = true;
else
llvm_unreachable("Unknown suffix");
unsigned Imm;
if (CI->arg_size() == 3) {
Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
} else {
Name = Name.substr(9); // strip off "xop.vpcom"
if (Name.startswith("lt"))
Imm = 0;
else if (Name.startswith("le"))
Imm = 1;
else if (Name.startswith("gt"))
Imm = 2;
else if (Name.startswith("ge"))
Imm = 3;
else if (Name.startswith("eq"))
Imm = 4;
else if (Name.startswith("ne"))
Imm = 5;
else if (Name.startswith("false"))
Imm = 6;
else if (Name.startswith("true"))
Imm = 7;
else
llvm_unreachable("Unknown condition");
}
Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
} else if (IsX86 && Name.startswith("xop.vpcmov")) {
Value *Sel = CI->getArgOperand(2);
Value *NotSel = Builder.CreateNot(Sel);
Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
Rep = Builder.CreateOr(Sel0, Sel1);
} else if (IsX86 && (Name.startswith("xop.vprot") ||
Name.startswith("avx512.prol") ||
Name.startswith("avx512.mask.prol"))) {
Rep = upgradeX86Rotate(Builder, *CI, false);
} else if (IsX86 && (Name.startswith("avx512.pror") ||
Name.startswith("avx512.mask.pror"))) {
Rep = upgradeX86Rotate(Builder, *CI, true);
} else if (IsX86 && (Name.startswith("avx512.vpshld.") ||
Name.startswith("avx512.mask.vpshld") ||
Name.startswith("avx512.maskz.vpshld"))) {
bool ZeroMask = Name[11] == 'z';
Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
} else if (IsX86 && (Name.startswith("avx512.vpshrd.") ||
Name.startswith("avx512.mask.vpshrd") ||
Name.startswith("avx512.maskz.vpshrd"))) {
bool ZeroMask = Name[11] == 'z';
Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
} else if (IsX86 && Name == "sse42.crc32.64.8") {
Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::x86_sse42_crc32_32_8);
Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
Rep = Builder.CreateZExt(Rep, CI->getType(), "");
} else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
Name.startswith("avx512.vbroadcast.s"))) {
// Replace broadcasts with a series of insertelements.
auto *VecTy = cast<FixedVectorType>(CI->getType());
Type *EltTy = VecTy->getElementType();
unsigned EltNum = VecTy->getNumElements();
Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
EltTy->getPointerTo());
Value *Load = Builder.CreateLoad(EltTy, Cast);
Type *I32Ty = Type::getInt32Ty(C);
Rep = PoisonValue::get(VecTy);
for (unsigned I = 0; I < EltNum; ++I)
Rep = Builder.CreateInsertElement(Rep, Load,
ConstantInt::get(I32Ty, I));
} else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
Name.startswith("sse41.pmovzx") ||
Name.startswith("avx2.pmovsx") ||
Name.startswith("avx2.pmovzx") ||
Name.startswith("avx512.mask.pmovsx") ||
Name.startswith("avx512.mask.pmovzx"))) {
auto *DstTy = cast<FixedVectorType>(CI->getType());
unsigned NumDstElts = DstTy->getNumElements();
// Extract a subvector of the first NumDstElts lanes and sign/zero extend.
SmallVector<int, 8> ShuffleMask(NumDstElts);
for (unsigned i = 0; i != NumDstElts; ++i)
ShuffleMask[i] = i;
Value *SV =
Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
bool DoSext = (StringRef::npos != Name.find("pmovsx"));
Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
: Builder.CreateZExt(SV, DstTy);
// If there are 3 arguments, it's a masked intrinsic so we need a select.
if (CI->arg_size() == 3)
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
} else if (Name == "avx512.mask.pmov.qd.256" ||
Name == "avx512.mask.pmov.qd.512" ||
Name == "avx512.mask.pmov.wb.256" ||
Name == "avx512.mask.pmov.wb.512") {
Type *Ty = CI->getArgOperand(1)->getType();
Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
} else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
Name == "avx2.vbroadcasti128")) {
// Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
PointerType::getUnqual(VT));
Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));
if (NumSrcElts == 2)
Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
else
Rep = Builder.CreateShuffleVector(
Load, ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
} else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
Name.startswith("avx512.mask.shuf.f"))) {
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
Type *VT = CI->getType();
unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
unsigned ControlBitsMask = NumLanes - 1;
unsigned NumControlBits = NumLanes / 2;
SmallVector<int, 8> ShuffleMask(0);
for (unsigned l = 0; l != NumLanes; ++l) {
unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
// We actually need the other source.
if (l >= NumLanes / 2)
LaneMask += NumLanes;
for (unsigned i = 0; i != NumElementsInLane; ++i)
ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
}
Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
CI->getArgOperand(1), ShuffleMask);
Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
CI->getArgOperand(3));
}else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
Name.startswith("avx512.mask.broadcasti"))) {
unsigned NumSrcElts =
cast<FixedVectorType>(CI->getArgOperand(0)->getType())
->getNumElements();
unsigned NumDstElts =
cast<FixedVectorType>(CI->getType())->getNumElements();
SmallVector<int, 8> ShuffleMask(NumDstElts);
for (unsigned i = 0; i != NumDstElts; ++i)
ShuffleMask[i] = i % NumSrcElts;
Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
CI->getArgOperand(0),
ShuffleMask);
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
} else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
Name.startswith("avx2.vbroadcast") ||
Name.startswith("avx512.pbroadcast") ||
Name.startswith("avx512.mask.broadcast.s"))) {
// Replace vp?broadcasts with a vector shuffle.
Value *Op = CI->getArgOperand(0);
ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
SmallVector<int, 8> M;
ShuffleVectorInst::getShuffleMask(Constant::getNullValue(MaskTy), M);
Rep = Builder.CreateShuffleVector(Op, M);
if (CI->arg_size() == 3)
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
} else if (IsX86 && (Name.startswith("sse2.padds.") ||
Name.startswith("avx2.padds.") ||
Name.startswith("avx512.padds.") ||
Name.startswith("avx512.mask.padds."))) {
Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
} else if (IsX86 && (Name.startswith("sse2.psubs.") ||
Name.startswith("avx2.psubs.") ||
Name.startswith("avx512.psubs.") ||
Name.startswith("avx512.mask.psubs."))) {
Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
} else if (IsX86 && (Name.startswith("sse2.paddus.") ||
Name.startswith("avx2.paddus.") ||
Name.startswith("avx512.mask.paddus."))) {
Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
} else if (IsX86 && (Name.startswith("sse2.psubus.") ||
Name.startswith("avx2.psubus.") ||
Name.startswith("avx512.mask.psubus."))) {
Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
} else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
CI->getArgOperand(1),
CI->getArgOperand(2),
CI->getArgOperand(3),
CI->getArgOperand(4),
false);
} else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
CI->getArgOperand(1),
CI->getArgOperand(2),
CI->getArgOperand(3),
CI->getArgOperand(4),
true);
} else if (IsX86 && (Name == "sse2.psll.dq" ||
Name == "avx2.psll.dq")) {
// 128/256-bit shift left specified in bits.
unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
Shift / 8); // Shift is in bits.
} else if (IsX86 && (Name == "sse2.psrl.dq" ||
Name == "avx2.psrl.dq")) {
// 128/256-bit shift right specified in bits.
unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
Shift / 8); // Shift is in bits.
} else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
Name == "avx2.psll.dq.bs" ||
Name == "avx512.psll.dq.512")) {
// 128/256/512-bit shift left specified in bytes.
unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
} else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
Name == "avx2.psrl.dq.bs" ||
Name == "avx512.psrl.dq.512")) {
// 128/256/512-bit shift right specified in bytes.
unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
} else if (IsX86 && (Name == "sse41.pblendw" ||
Name.startswith("sse41.blendp") ||
Name.startswith("avx.blend.p") ||
Name == "avx2.pblendw" ||
Name.startswith("avx2.pblendd."))) {
Value *Op0 = CI->getArgOperand(0);
Value *Op1 = CI->getArgOperand(1);
unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
auto *VecTy = cast<FixedVectorType>(CI->getType());
unsigned NumElts = VecTy->getNumElements();
SmallVector<int, 16> Idxs(NumElts);
for (unsigned i = 0; i != NumElts; ++i)
Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
} else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
Name == "avx2.vinserti128" ||
Name.startswith("avx512.mask.insert"))) {
Value *Op0 = CI->getArgOperand(0);
Value *Op1 = CI->getArgOperand(1);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
unsigned DstNumElts =
cast<FixedVectorType>(CI->getType())->getNumElements();
unsigned SrcNumElts =
cast<FixedVectorType>(Op1->getType())->getNumElements();
unsigned Scale = DstNumElts / SrcNumElts;
// Mask off the high bits of the immediate value; hardware ignores those.
Imm = Imm % Scale;
// Extend the second operand into a vector the size of the destination.
SmallVector<int, 8> Idxs(DstNumElts);
for (unsigned i = 0; i != SrcNumElts; ++i)
Idxs[i] = i;
for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
Idxs[i] = SrcNumElts;
Rep = Builder.CreateShuffleVector(Op1, Idxs);
// Insert the second operand into the first operand.
// Note that there is no guarantee that instruction lowering will actually
// produce a vinsertf128 instruction for the created shuffles. In
// particular, the 0 immediate case involves no lane changes, so it can
// be handled as a blend.
// Example of shuffle mask for 32-bit elements:
// Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
// Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
// First fill with identify mask.
for (unsigned i = 0; i != DstNumElts; ++i)
Idxs[i] = i;
// Then replace the elements where we need to insert.
for (unsigned i = 0; i != SrcNumElts; ++i)
Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
// If the intrinsic has a mask operand, handle that.
if (CI->arg_size() == 5)
Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
CI->getArgOperand(3));
} else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
Name == "avx2.vextracti128" ||
Name.startswith("avx512.mask.vextract"))) {
Value *Op0 = CI->getArgOperand(0);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
unsigned DstNumElts =
cast<FixedVectorType>(CI->getType())->getNumElements();
unsigned SrcNumElts =
cast<FixedVectorType>(Op0->getType())->getNumElements();
unsigned Scale = SrcNumElts / DstNumElts;
// Mask off the high bits of the immediate value; hardware ignores those.
Imm = Imm % Scale;
// Get indexes for the subvector of the input vector.
SmallVector<int, 8> Idxs(DstNumElts);
for (unsigned i = 0; i != DstNumElts; ++i) {
Idxs[i] = i + (Imm * DstNumElts);
}
Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
// If the intrinsic has a mask operand, handle that.
if (CI->arg_size() == 4)
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (!IsX86 && Name == "stackprotectorcheck") {
Rep = nullptr;
} else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
Name.startswith("avx512.mask.perm.di."))) {
Value *Op0 = CI->getArgOperand(0);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
auto *VecTy = cast<FixedVectorType>(CI->getType());
unsigned NumElts = VecTy->getNumElements();
SmallVector<int, 8> Idxs(NumElts);
for (unsigned i = 0; i != NumElts; ++i)
Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
if (CI->arg_size() == 4)
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
Name == "avx2.vperm2i128")) {
// The immediate permute control byte looks like this:
// [1:0] - select 128 bits from sources for low half of destination
// [2] - ignore
// [3] - zero low half of destination
// [5:4] - select 128 bits from sources for high half of destination
// [6] - ignore
// [7] - zero high half of destination
uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
unsigned HalfSize = NumElts / 2;
SmallVector<int, 8> ShuffleMask(NumElts);
// Determine which operand(s) are actually in use for this instruction.
Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
// If needed, replace operands based on zero mask.
V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
// Permute low half of result.
unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
for (unsigned i = 0; i < HalfSize; ++i)
ShuffleMask[i] = StartIndex + i;
// Permute high half of result.
StartIndex = (Imm & 0x10) ? HalfSize : 0;
for (unsigned i = 0; i < HalfSize; ++i)
ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
} else if (IsX86 && (Name.startswith("avx.vpermil.") ||
Name == "sse2.pshuf.d" ||
Name.startswith("avx512.mask.vpermil.p") ||
Name.startswith("avx512.mask.pshuf.d."))) {
Value *Op0 = CI->getArgOperand(0);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
auto *VecTy = cast<FixedVectorType>(CI->getType());
unsigned NumElts = VecTy->getNumElements();
// Calculate the size of each index in the immediate.
unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
unsigned IdxMask = ((1 << IdxSize) - 1);
SmallVector<int, 8> Idxs(NumElts);
// Lookup the bits for this element, wrapping around the immediate every
// 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
// to offset by the first index of each group.
for (unsigned i = 0; i != NumElts; ++i)
Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
if (CI->arg_size() == 4)
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && (Name == "sse2.pshufl.w" ||
Name.startswith("avx512.mask.pshufl.w."))) {
Value *Op0 = CI->getArgOperand(0);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
SmallVector<int, 16> Idxs(NumElts);
for (unsigned l = 0; l != NumElts; l += 8) {
for (unsigned i = 0; i != 4; ++i)
Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
for (unsigned i = 4; i != 8; ++i)
Idxs[i + l] = i + l;
}
Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
if (CI->arg_size() == 4)
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && (Name == "sse2.pshufh.w" ||
Name.startswith("avx512.mask.pshufh.w."))) {
Value *Op0 = CI->getArgOperand(0);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
SmallVector<int, 16> Idxs(NumElts);
for (unsigned l = 0; l != NumElts; l += 8) {
for (unsigned i = 0; i != 4; ++i)
Idxs[i + l] = i + l;
for (unsigned i = 0; i != 4; ++i)
Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
}
Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
if (CI->arg_size() == 4)
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
Value *Op0 = CI->getArgOperand(0);
Value *Op1 = CI->getArgOperand(1);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
unsigned HalfLaneElts = NumLaneElts / 2;
SmallVector<int, 16> Idxs(NumElts);
for (unsigned i = 0; i != NumElts; ++i) {
// Base index is the starting element of the lane.
Idxs[i] = i - (i % NumLaneElts);
// If we are half way through the lane switch to the other source.
if ((i % NumLaneElts) >= HalfLaneElts)
Idxs[i] += NumElts;
// Now select the specific element. By adding HalfLaneElts bits from
// the immediate. Wrapping around the immediate every 8-bits.
Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
}
Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
CI->getArgOperand(3));
} else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
Name.startswith("avx512.mask.movshdup") ||
Name.startswith("avx512.mask.movsldup"))) {
Value *Op0 = CI->getArgOperand(0);
unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
unsigned Offset = 0;
if (Name.startswith("avx512.mask.movshdup."))
Offset = 1;
SmallVector<int, 16> Idxs(NumElts);
for (unsigned l = 0; l != NumElts; l += NumLaneElts)
for (unsigned i = 0; i != NumLaneElts; i += 2) {
Idxs[i + l + 0] = i + l + Offset;
Idxs[i + l + 1] = i + l + Offset;
}
Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
} else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
Name.startswith("avx512.mask.unpckl."))) {
Value *Op0 = CI->getArgOperand(0);
Value *Op1 = CI->getArgOperand(1);
int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
SmallVector<int, 64> Idxs(NumElts);
for (int l = 0; l != NumElts; l += NumLaneElts)
for (int i = 0; i != NumLaneElts; ++i)
Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
Name.startswith("avx512.mask.unpckh."))) {
Value *Op0 = CI->getArgOperand(0);
Value *Op1 = CI->getArgOperand(1);
int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
SmallVector<int, 64> Idxs(NumElts);
for (int l = 0; l != NumElts; l += NumLaneElts)
for (int i = 0; i != NumLaneElts; ++i)
Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && (Name.startswith("avx512.mask.and.") ||
Name.startswith("avx512.mask.pand."))) {
VectorType *FTy = cast<VectorType>(CI->getType());
VectorType *ITy = VectorType::getInteger(FTy);
Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
Builder.CreateBitCast(CI->getArgOperand(1), ITy));
Rep = Builder.CreateBitCast(Rep, FTy);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && (Name.startswith("avx512.mask.andn.") ||
Name.startswith("avx512.mask.pandn."))) {
VectorType *FTy = cast<VectorType>(CI->getType());
VectorType *ITy = VectorType::getInteger(FTy);
Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
Rep = Builder.CreateAnd(Rep,
Builder.CreateBitCast(CI->getArgOperand(1), ITy));
Rep = Builder.CreateBitCast(Rep, FTy);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && (Name.startswith("avx512.mask.or.") ||
Name.startswith("avx512.mask.por."))) {
VectorType *FTy = cast<VectorType>(CI->getType());
VectorType *ITy = VectorType::getInteger(FTy);
Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
Builder.CreateBitCast(CI->getArgOperand(1), ITy));
Rep = Builder.CreateBitCast(Rep, FTy);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && (Name.startswith("avx512.mask.xor.") ||
Name.startswith("avx512.mask.pxor."))) {
VectorType *FTy = cast<VectorType>(CI->getType());
VectorType *ITy = VectorType::getInteger(FTy);
Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
Builder.CreateBitCast(CI->getArgOperand(1), ITy));
Rep = Builder.CreateBitCast(Rep, FTy);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
if (Name.endswith(".512")) {
Intrinsic::ID IID;
if (Name[17] == 's')
IID = Intrinsic::x86_avx512_add_ps_512;
else
IID = Intrinsic::x86_avx512_add_pd_512;
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
{ CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(4) });
} else {
Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
}
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
if (Name.endswith(".512")) {
Intrinsic::ID IID;
if (Name[17] == 's')
IID = Intrinsic::x86_avx512_div_ps_512;
else
IID = Intrinsic::x86_avx512_div_pd_512;
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
{ CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(4) });
} else {
Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
}
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
if (Name.endswith(".512")) {
Intrinsic::ID IID;
if (Name[17] == 's')
IID = Intrinsic::x86_avx512_mul_ps_512;
else
IID = Intrinsic::x86_avx512_mul_pd_512;
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
{ CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(4) });
} else {
Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
}
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
if (Name.endswith(".512")) {
Intrinsic::ID IID;
if (Name[17] == 's')
IID = Intrinsic::x86_avx512_sub_ps_512;
else
IID = Intrinsic::x86_avx512_sub_pd_512;
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
{ CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(4) });
} else {
Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
}
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
Name.startswith("avx512.mask.min.p")) &&
Name.drop_front(18) == ".512") {
bool IsDouble = Name[17] == 'd';
bool IsMin = Name[13] == 'i';
static const Intrinsic::ID MinMaxTbl[2][2] = {
{ Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
{ Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
};
Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
{ CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(4) });
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
Intrinsic::ctlz,
CI->getType()),
{ CI->getArgOperand(0), Builder.getInt1(false) });
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
} else if (IsX86 && Name.startswith("avx512.mask.psll")) {
bool IsImmediate = Name[16] == 'i' ||
(Name.size() > 18 && Name[18] == 'i');
bool IsVariable = Name[16] == 'v';
char Size = Name[16] == '.' ? Name[17] :
Name[17] == '.' ? Name[18] :
Name[18] == '.' ? Name[19] :
Name[20];
Intrinsic::ID IID;
if (IsVariable && Name[17] != '.') {
if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
IID = Intrinsic::x86_avx2_psllv_q;
else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
IID = Intrinsic::x86_avx2_psllv_q_256;
else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
IID = Intrinsic::x86_avx2_psllv_d;
else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
IID = Intrinsic::x86_avx2_psllv_d_256;
else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
IID = Intrinsic::x86_avx512_psllv_w_128;
else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
IID = Intrinsic::x86_avx512_psllv_w_256;
else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
IID = Intrinsic::x86_avx512_psllv_w_512;
else
llvm_unreachable("Unexpected size");
} else if (Name.endswith(".128")) {
if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
: Intrinsic::x86_sse2_psll_d;
else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
: Intrinsic::x86_sse2_psll_q;
else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
: Intrinsic::x86_sse2_psll_w;
else
llvm_unreachable("Unexpected size");
} else if (Name.endswith(".256")) {
if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
: Intrinsic::x86_avx2_psll_d;
else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
: Intrinsic::x86_avx2_psll_q;
else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
: Intrinsic::x86_avx2_psll_w;
else
llvm_unreachable("Unexpected size");
} else {
if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
Intrinsic::x86_avx512_psll_d_512;
else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
Intrinsic::x86_avx512_psll_q_512;
else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
: Intrinsic::x86_avx512_psll_w_512;
else
llvm_unreachable("Unexpected size");
}
Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
} else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
bool IsImmediate = Name[16] == 'i' ||
(Name.size() > 18 && Name[18] == 'i');
bool IsVariable = Name[16] == 'v';
char Size = Name[16] == '.' ? Name[17] :
Name[17] == '.' ? Name[18] :
Name[18] == '.' ? Name[19] :
Name[20];
Intrinsic::ID IID;
if (IsVariable && Name[17] != '.') {
if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
IID = Intrinsic::x86_avx2_psrlv_q;
else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
IID = Intrinsic::x86_avx2_psrlv_q_256;
else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
IID = Intrinsic::x86_avx2_psrlv_d;
else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
IID = Intrinsic::x86_avx2_psrlv_d_256;
else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
IID = Intrinsic::x86_avx512_psrlv_w_128;
else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
IID = Intrinsic::x86_avx512_psrlv_w_256;
else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
IID = Intrinsic::x86_avx512_psrlv_w_512;
else
llvm_unreachable("Unexpected size");
} else if (Name.endswith(".128")) {
if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
: Intrinsic::x86_sse2_psrl_d;
else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
: Intrinsic::x86_sse2_psrl_q;
else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
: Intrinsic::x86_sse2_psrl_w;
else
llvm_unreachable("Unexpected size");
} else if (Name.endswith(".256")) {
if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
: Intrinsic::x86_avx2_psrl_d;
else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
: Intrinsic::x86_avx2_psrl_q;
else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
: Intrinsic::x86_avx2_psrl_w;
else
llvm_unreachable("Unexpected size");
} else {
if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
Intrinsic::x86_avx512_psrl_d_512;
else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
Intrinsic::x86_avx512_psrl_q_512;
else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
: Intrinsic::x86_avx512_psrl_w_512;
else
llvm_unreachable("Unexpected size");
}
Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
} else if (IsX86 && Name.startswith("avx512.mask.psra")) {
bool IsImmediate = Name[16] == 'i' ||
(Name.size() > 18 && Name[18] == 'i');
bool IsVariable = Name[16] == 'v';
char Size = Name[16] == '.' ? Name[17] :
Name[17] == '.' ? Name[18] :
Name[18] == '.' ? Name[19] :
Name[20];
Intrinsic::ID IID;
if (IsVariable && Name[17] != '.') {
if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
IID = Intrinsic::x86_avx2_psrav_d;
else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
IID = Intrinsic::x86_avx2_psrav_d_256;
else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
IID = Intrinsic::x86_avx512_psrav_w_128;
else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
IID = Intrinsic::x86_avx512_psrav_w_256;
else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
IID = Intrinsic::x86_avx512_psrav_w_512;
else
llvm_unreachable("Unexpected size");
} else if (Name.endswith(".128")) {
if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
: Intrinsic::x86_sse2_psra_d;
else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
Intrinsic::x86_avx512_psra_q_128;
else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
: Intrinsic::x86_sse2_psra_w;
else
llvm_unreachable("Unexpected size");
} else if (Name.endswith(".256")) {
if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
: Intrinsic::x86_avx2_psra_d;
else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
Intrinsic::x86_avx512_psra_q_256;
else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
: Intrinsic::x86_avx2_psra_w;
else
llvm_unreachable("Unexpected size");
} else {
if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
Intrinsic::x86_avx512_psra_d_512;
else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
Intrinsic::x86_avx512_psra_q_512;
else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
: Intrinsic::x86_avx512_psra_w_512;
else
llvm_unreachable("Unexpected size");
}
Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
} else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
Rep = upgradeMaskedMove(Builder, *CI);
} else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
Rep = UpgradeMaskToInt(Builder, *CI);
} else if (IsX86 && Name.endswith(".movntdqa")) {
Module *M = F->getParent();
MDNode *Node = MDNode::get(
C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
Value *Ptr = CI->getArgOperand(0);
// Convert the type of the pointer to a pointer to the stored type.
Value *BC = Builder.CreateBitCast(
Ptr, PointerType::getUnqual(CI->getType()), "cast");
LoadInst *LI = Builder.CreateAlignedLoad(
CI->getType(), BC,
Align(CI->getType()->getPrimitiveSizeInBits().getFixedSize() / 8));
LI->setMetadata(M->getMDKindID("nontemporal"), Node);
Rep = LI;
} else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
Name.startswith("fma.vfmsub.") ||
Name.startswith("fma.vfnmadd.") ||
Name.startswith("fma.vfnmsub."))) {
bool NegMul = Name[6] == 'n';
bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(2) };
if (IsScalar) {
Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
}
if (NegMul && !IsScalar)
Ops[0] = Builder.CreateFNeg(Ops[0]);
if (NegMul && IsScalar)
Ops[1] = Builder.CreateFNeg(Ops[1]);
if (NegAcc)
Ops[2] = Builder.CreateFNeg(Ops[2]);
Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
Intrinsic::fma,
Ops[0]->getType()),
Ops);
if (IsScalar)
Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
(uint64_t)0);
} else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(2) };
Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
Intrinsic::fma,
Ops[0]->getType()),
Ops);
Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
Rep, (uint64_t)0);
} else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") ||
Name.startswith("avx512.maskz.vfmadd.s") ||
Name.startswith("avx512.mask3.vfmadd.s") ||
Name.startswith("avx512.mask3.vfmsub.s") ||
Name.startswith("avx512.mask3.vfnmsub.s"))) {
bool IsMask3 = Name[11] == '3';
bool IsMaskZ = Name[11] == 'z';
// Drop the "avx512.mask." to make it easier.
Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
bool NegMul = Name[2] == 'n';
bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
Value *A = CI->getArgOperand(0);
Value *B = CI->getArgOperand(1);
Value *C = CI->getArgOperand(2);
if (NegMul && (IsMask3 || IsMaskZ))
A = Builder.CreateFNeg(A);
if (NegMul && !(IsMask3 || IsMaskZ))
B = Builder.CreateFNeg(B);
if (NegAcc)
C = Builder.CreateFNeg(C);
A = Builder.CreateExtractElement(A, (uint64_t)0);
B = Builder.CreateExtractElement(B, (uint64_t)0);
C = Builder.CreateExtractElement(C, (uint64_t)0);
if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
Intrinsic::ID IID;
if (Name.back() == 'd')
IID = Intrinsic::x86_avx512_vfmadd_f64;
else
IID = Intrinsic::x86_avx512_vfmadd_f32;
Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
Rep = Builder.CreateCall(FMA, Ops);
} else {
Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
Intrinsic::fma,
A->getType());
Rep = Builder.CreateCall(FMA, { A, B, C });
}
Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
IsMask3 ? C : A;
// For Mask3 with NegAcc, we need to create a new extractelement that
// avoids the negation above.
if (NegAcc && IsMask3)
PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
(uint64_t)0);
Rep = EmitX86ScalarSelect(Builder, CI->getArgOperand(3),
Rep, PassThru);
Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
Rep, (uint64_t)0);
} else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
Name.startswith("avx512.mask.vfnmadd.p") ||
Name.startswith("avx512.mask.vfnmsub.p") ||
Name.startswith("avx512.mask3.vfmadd.p") ||
Name.startswith("avx512.mask3.vfmsub.p") ||
Name.startswith("avx512.mask3.vfnmsub.p") ||
Name.startswith("avx512.maskz.vfmadd.p"))) {
bool IsMask3 = Name[11] == '3';
bool IsMaskZ = Name[11] == 'z';
// Drop the "avx512.mask." to make it easier.
Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
bool NegMul = Name[2] == 'n';
bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
Value *A = CI->getArgOperand(0);
Value *B = CI->getArgOperand(1);
Value *C = CI->getArgOperand(2);
if (NegMul && (IsMask3 || IsMaskZ))
A = Builder.CreateFNeg(A);
if (NegMul && !(IsMask3 || IsMaskZ))
B = Builder.CreateFNeg(B);
if (NegAcc)
C = Builder.CreateFNeg(C);
if (CI->arg_size() == 5 &&
(!isa<ConstantInt>(CI->getArgOperand(4)) ||
cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
Intrinsic::ID IID;
// Check the character before ".512" in string.
if (Name[Name.size()-5] == 's')
IID = Intrinsic::x86_avx512_vfmadd_ps_512;
else
IID = Intrinsic::x86_avx512_vfmadd_pd_512;
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
{ A, B, C, CI->getArgOperand(4) });
} else {
Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
Intrinsic::fma,
A->getType());
Rep = Builder.CreateCall(FMA, { A, B, C });
}
Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
IsMask3 ? CI->getArgOperand(2) :
CI->getArgOperand(0);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
} else if (IsX86 && Name.startswith("fma.vfmsubadd.p")) {
unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
unsigned EltWidth = CI->getType()->getScalarSizeInBits();
Intrinsic::ID IID;
if (VecWidth == 128 && EltWidth == 32)
IID = Intrinsic::x86_fma_vfmaddsub_ps;
else if (VecWidth == 256 && EltWidth == 32)
IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
else if (VecWidth == 128 && EltWidth == 64)
IID = Intrinsic::x86_fma_vfmaddsub_pd;
else if (VecWidth == 256 && EltWidth == 64)
IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
else
llvm_unreachable("Unexpected intrinsic");
Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(2) };
Ops[2] = Builder.CreateFNeg(Ops[2]);
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
Ops);
} else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
Name.startswith("avx512.mask3.vfmaddsub.p") ||
Name.startswith("avx512.maskz.vfmaddsub.p") ||
Name.startswith("avx512.mask3.vfmsubadd.p"))) {
bool IsMask3 = Name[11] == '3';
bool IsMaskZ = Name[11] == 'z';
// Drop the "avx512.mask." to make it easier.
Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
bool IsSubAdd = Name[3] == 's';
if (CI->arg_size() == 5) {
Intrinsic::ID IID;
// Check the character before ".512" in string.
if (Name[Name.size()-5] == 's')
IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
else
IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(2), CI->getArgOperand(4) };
if (IsSubAdd)
Ops[2] = Builder.CreateFNeg(Ops[2]);
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
Ops);
} else {
int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(2) };
Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
Ops[0]->getType());
Value *Odd = Builder.CreateCall(FMA, Ops);
Ops[2] = Builder.CreateFNeg(Ops[2]);
Value *Even = Builder.CreateCall(FMA, Ops);
if (IsSubAdd)
std::swap(Even, Odd);
SmallVector<int, 32> Idxs(NumElts);
for (int i = 0; i != NumElts; ++i)
Idxs[i] = i + (i % 2) * NumElts;
Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
}
Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
IsMask3 ? CI->getArgOperand(2) :
CI->getArgOperand(0);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
} else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
Name.startswith("avx512.maskz.pternlog."))) {
bool ZeroMask = Name[11] == 'z';
unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
unsigned EltWidth = CI->getType()->getScalarSizeInBits();
Intrinsic::ID IID;
if (VecWidth == 128 && EltWidth == 32)
IID = Intrinsic::x86_avx512_pternlog_d_128;
else if (VecWidth == 256 && EltWidth == 32)
IID = Intrinsic::x86_avx512_pternlog_d_256;
else if (VecWidth == 512 && EltWidth == 32)
IID = Intrinsic::x86_avx512_pternlog_d_512;
else if (VecWidth == 128 && EltWidth == 64)
IID = Intrinsic::x86_avx512_pternlog_q_128;
else if (VecWidth == 256 && EltWidth == 64)
IID = Intrinsic::x86_avx512_pternlog_q_256;
else if (VecWidth == 512 && EltWidth == 64)
IID = Intrinsic::x86_avx512_pternlog_q_512;
else
llvm_unreachable("Unexpected intrinsic");
Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
CI->getArgOperand(2), CI->getArgOperand(3) };
Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
Args);
Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
: CI->getArgOperand(0);
Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
} else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") ||
Name.startswith("avx512.maskz.vpmadd52"))) {
bool ZeroMask = Name[11] == 'z';
bool High = Name[20] == 'h' || Name[21] == 'h';
unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
Intrinsic::ID IID;
if (VecWidth == 128 && !High)
IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
else if (VecWidth == 256 && !High)
IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
else if (VecWidth == 512 && !High)
IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
else if (VecWidth == 128 && High)
IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
else if (VecWidth == 256 && High)
IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
else if (VecWidth == 512 && High)
IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
else
llvm_unreachable("Unexpected intrinsic");
Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
CI->getArgOperand(2) };
Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
Args);
Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
: CI->getArgOperand(0);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
} else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
Name.startswith("avx512.mask.vpermt2var.") ||
Name.startswith("avx512.maskz.vpermt2var."))) {
bool ZeroMask = Name[11] == 'z';
bool IndexForm = Name[17] == 'i';
Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
} else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
Name.startswith("avx512.maskz.vpdpbusd.") ||
Name.startswith("avx512.mask.vpdpbusds.") ||
Name.startswith("avx512.maskz.vpdpbusds."))) {
bool ZeroMask = Name[11] == 'z';
bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
Intrinsic::ID IID;
if (VecWidth == 128 && !IsSaturating)
IID = Intrinsic::x86_avx512_vpdpbusd_128;
else if (VecWidth == 256 && !IsSaturating)
IID = Intrinsic::x86_avx512_vpdpbusd_256;
else if (VecWidth == 512 && !IsSaturating)
IID = Intrinsic::x86_avx512_vpdpbusd_512;
else if (VecWidth == 128 && IsSaturating)
IID = Intrinsic::x86_avx512_vpdpbusds_128;
else if (VecWidth == 256 && IsSaturating)
IID = Intrinsic::x86_avx512_vpdpbusds_256;
else if (VecWidth == 512 && IsSaturating)
IID = Intrinsic::x86_avx512_vpdpbusds_512;
else
llvm_unreachable("Unexpected intrinsic");
Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(2) };
Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
Args);
Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
: CI->getArgOperand(0);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
} else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
Name.startswith("avx512.maskz.vpdpwssd.") ||
Name.startswith("avx512.mask.vpdpwssds.") ||
Name.startswith("avx512.maskz.vpdpwssds."))) {
bool ZeroMask = Name[11] == 'z';
bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
Intrinsic::ID IID;
if (VecWidth == 128 && !IsSaturating)
IID = Intrinsic::x86_avx512_vpdpwssd_128;
else if (VecWidth == 256 && !IsSaturating)
IID = Intrinsic::x86_avx512_vpdpwssd_256;
else if (VecWidth == 512 && !IsSaturating)
IID = Intrinsic::x86_avx512_vpdpwssd_512;
else if (VecWidth == 128 && IsSaturating)
IID = Intrinsic::x86_avx512_vpdpwssds_128;
else if (VecWidth == 256 && IsSaturating)
IID = Intrinsic::x86_avx512_vpdpwssds_256;
else if (VecWidth == 512 && IsSaturating)
IID = Intrinsic::x86_avx512_vpdpwssds_512;
else
llvm_unreachable("Unexpected intrinsic");
Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(2) };
Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
Args);
Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
: CI->getArgOperand(0);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
} else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
Name == "addcarry.u32" || Name == "addcarry.u64" ||
Name == "subborrow.u32" || Name == "subborrow.u64")) {
Intrinsic::ID IID;
if (Name[0] == 'a' && Name.back() == '2')
IID = Intrinsic::x86_addcarry_32;
else if (Name[0] == 'a' && Name.back() == '4')
IID = Intrinsic::x86_addcarry_64;
else if (Name[0] == 's' && Name.back() == '2')
IID = Intrinsic::x86_subborrow_32;
else if (Name[0] == 's' && Name.back() == '4')
IID = Intrinsic::x86_subborrow_64;
else
llvm_unreachable("Unexpected intrinsic");
// Make a call with 3 operands.
Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(2)};
Value *NewCall = Builder.CreateCall(
Intrinsic::getDeclaration(CI->getModule(), IID),
Args);
// Extract the second result and store it.
Value *Data = Builder.CreateExtractValue(NewCall, 1);
// Cast the pointer to the right type.
Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
llvm::PointerType::getUnqual(Data->getType()));
Builder.CreateAlignedStore(Data, Ptr, Align(1));
// Replace the original call result with the first result of the new call.
Value *CF = Builder.CreateExtractValue(NewCall, 0);
CI->replaceAllUsesWith(CF);
Rep = nullptr;
} else if (IsX86 && Name.startswith("avx512.mask.") &&
upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
// Rep will be updated by the call in the condition.
} else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
Value *Arg = CI->getArgOperand(0);
Value *Neg = Builder.CreateNeg(Arg, "neg");
Value *Cmp = Builder.CreateICmpSGE(
Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
} else if (IsNVVM && (Name.startswith("atomic.load.add.f32.p") ||
Name.startswith("atomic.load.add.f64.p"))) {
Value *Ptr = CI->getArgOperand(0);
Value *Val = CI->getArgOperand(1);
Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
AtomicOrdering::SequentiallyConsistent);
} else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
Name == "max.ui" || Name == "max.ull")) {
Value *Arg0 = CI->getArgOperand(0);
Value *Arg1 = CI->getArgOperand(1);
Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
: Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
} else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
Name == "min.ui" || Name == "min.ull")) {
Value *Arg0 = CI->getArgOperand(0);
Value *Arg1 = CI->getArgOperand(1);
Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
: Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
} else if (IsNVVM && Name == "clz.ll") {
// llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
Value *Arg = CI->getArgOperand(0);
Value *Ctlz = Builder.CreateCall(
Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
{Arg->getType()}),
{Arg, Builder.getFalse()}, "ctlz");
Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
} else if (IsNVVM && Name == "popc.ll") {
// llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
// i64.
Value *Arg = CI->getArgOperand(0);
Value *Popc = Builder.CreateCall(
Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
{Arg->getType()}),
Arg, "ctpop");
Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
} else if (IsNVVM && Name == "h2f") {
Rep = Builder.CreateCall(Intrinsic::getDeclaration(
F->getParent(), Intrinsic::convert_from_fp16,
{Builder.getFloatTy()}),
CI->getArgOperand(0), "h2f");
} else if (IsARM) {
Rep = UpgradeARMIntrinsicCall(Name, CI, F, Builder);
} else {
llvm_unreachable("Unknown function for CallBase upgrade.");
}
if (Rep)
CI->replaceAllUsesWith(Rep);
CI->eraseFromParent();
return;
}
const auto &DefaultCase = [&]() -> void {
if (CI->getFunctionType() == NewFn->getFunctionType()) {
// Handle generic mangling change.
assert(
(CI->getCalledFunction()->getName() != NewFn->getName()) &&
"Unknown function for CallBase upgrade and isn't just a name change");
CI->setCalledFunction(NewFn);
return;
}
// This must be an upgrade from a named to a literal struct.
auto *OldST = cast<StructType>(CI->getType());
assert(OldST != NewFn->getReturnType() && "Return type must have changed");
assert(OldST->getNumElements() ==
cast<StructType>(NewFn->getReturnType())->getNumElements() &&
"Must have same number of elements");
SmallVector<Value *> Args(CI->args());
Value *NewCI = Builder.CreateCall(NewFn, Args);
Value *Res = PoisonValue::get(OldST);
for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
Res = Builder.CreateInsertValue(Res, Elem, Idx);
}
CI->replaceAllUsesWith(Res);
CI->eraseFromParent();
return;
};
CallInst *NewCall = nullptr;
switch (NewFn->getIntrinsicID()) {
default: {
DefaultCase();
return;
}
case Intrinsic::arm_neon_vst1:
case Intrinsic::arm_neon_vst2:
case Intrinsic::arm_neon_vst3:
case Intrinsic::arm_neon_vst4:
case Intrinsic::arm_neon_vst2lane:
case Intrinsic::arm_neon_vst3lane:
case Intrinsic::arm_neon_vst4lane: {
SmallVector<Value *, 4> Args(CI->args());
NewCall = Builder.CreateCall(NewFn, Args);
break;
}
case Intrinsic::arm_neon_bfdot:
case Intrinsic::arm_neon_bfmmla:
case Intrinsic::arm_neon_bfmlalb:
case Intrinsic::arm_neon_bfmlalt:
case Intrinsic::aarch64_neon_bfdot:
case Intrinsic::aarch64_neon_bfmmla:
case Intrinsic::aarch64_neon_bfmlalb:
case Intrinsic::aarch64_neon_bfmlalt: {
SmallVector<Value *, 3> Args;
assert(CI->arg_size() == 3 &&
"Mismatch between function args and call args");
size_t OperandWidth =
CI->getArgOperand(1)->getType()->getPrimitiveSizeInBits();
assert((OperandWidth == 64 || OperandWidth == 128) &&
"Unexpected operand width");
Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
auto Iter = CI->args().begin();
Args.push_back(*Iter++);
Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
NewCall = Builder.CreateCall(NewFn, Args);
break;
}
case Intrinsic::bitreverse:
NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
break;
case Intrinsic::ctlz:
case Intrinsic::cttz:
assert(CI->arg_size() == 1 &&
"Mismatch between function args and call args");
NewCall =
Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
break;
case Intrinsic::objectsize: {
Value *NullIsUnknownSize =
CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
Value *Dynamic =
CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
NewCall = Builder.CreateCall(
NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
break;
}
case Intrinsic::ctpop:
NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
break;
case Intrinsic::convert_from_fp16:
NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
break;
case Intrinsic::dbg_value:
// Upgrade from the old version that had an extra offset argument.
assert(CI->arg_size() == 4);
// Drop nonzero offsets instead of attempting to upgrade them.
if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
if (Offset->isZeroValue()) {
NewCall = Builder.CreateCall(
NewFn,
{CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
break;
}
CI->eraseFromParent();
return;
case Intrinsic::ptr_annotation:
// Upgrade from versions that lacked the annotation attribute argument.
if (CI->arg_size() != 4) {
DefaultCase();
return;
}
// Create a new call with an added null annotation attribute argument.
NewCall = Builder.CreateCall(
NewFn,
{CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
NewCall->takeName(CI);
CI->replaceAllUsesWith(NewCall);
CI->eraseFromParent();
return;
case Intrinsic::var_annotation:
// Upgrade from versions that lacked the annotation attribute argument.
assert(CI->arg_size() == 4 &&
"Before LLVM 12.0 this intrinsic took four arguments");
// Create a new call with an added null annotation attribute argument.
NewCall = Builder.CreateCall(
NewFn,
{CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
CI->eraseFromParent();
return;
case Intrinsic::x86_xop_vfrcz_ss:
case Intrinsic::x86_xop_vfrcz_sd:
NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
break;
case Intrinsic::x86_xop_vpermil2pd:
case Intrinsic::x86_xop_vpermil2ps:
case Intrinsic::x86_xop_vpermil2pd_256:
case Intrinsic::x86_xop_vpermil2ps_256: {
SmallVector<Value *, 4> Args(CI->args());
VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
NewCall = Builder.CreateCall(NewFn, Args);
break;
}
case Intrinsic::x86_sse41_ptestc:
case Intrinsic::x86_sse41_ptestz:
case Intrinsic::x86_sse41_ptestnzc: {
// The arguments for these intrinsics used to be v4f32, and changed
// to v2i64. This is purely a nop, since those are bitwise intrinsics.
// So, the only thing required is a bitcast for both arguments.
// First, check the arguments have the old type.
Value *Arg0 = CI->getArgOperand(0);
if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
return;
// Old intrinsic, add bitcasts
Value *Arg1 = CI->getArgOperand(1);
auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
break;
}
case Intrinsic::x86_rdtscp: {
// This used to take 1 arguments. If we have no arguments, it is already
// upgraded.
if (CI->getNumOperands() == 0)
return;
NewCall = Builder.CreateCall(NewFn);
// Extract the second result and store it.
Value *Data = Builder.CreateExtractValue(NewCall, 1);
// Cast the pointer to the right type.
Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
llvm::PointerType::getUnqual(Data->getType()));
Builder.CreateAlignedStore(Data, Ptr, Align(1));
// Replace the original call result with the first result of the new call.
Value *TSC = Builder.CreateExtractValue(NewCall, 0);
NewCall->takeName(CI);
CI->replaceAllUsesWith(TSC);
CI->eraseFromParent();
return;
}
case Intrinsic::x86_sse41_insertps:
case Intrinsic::x86_sse41_dppd:
case Intrinsic::x86_sse41_dpps:
case Intrinsic::x86_sse41_mpsadbw:
case Intrinsic::x86_avx_dp_ps_256:
case Intrinsic::x86_avx2_mpsadbw: {
// Need to truncate the last argument from i32 to i8 -- this argument models
// an inherently 8-bit immediate operand to these x86 instructions.
SmallVector<Value *, 4> Args(CI->args());
// Replace the last argument with a trunc.
Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
NewCall = Builder.CreateCall(NewFn, Args);
break;
}
case Intrinsic::x86_avx512_mask_cmp_pd_128:
case Intrinsic::x86_avx512_mask_cmp_pd_256:
case Intrinsic::x86_avx512_mask_cmp_pd_512:
case Intrinsic::x86_avx512_mask_cmp_ps_128:
case Intrinsic::x86_avx512_mask_cmp_ps_256:
case Intrinsic::x86_avx512_mask_cmp_ps_512: {
SmallVector<Value *, 4> Args(CI->args());
unsigned NumElts =
cast<FixedVectorType>(Args[0]->getType())->getNumElements();
Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
NewCall = Builder.CreateCall(NewFn, Args);
Value *Res = ApplyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
NewCall->takeName(CI);
CI->replaceAllUsesWith(Res);
CI->eraseFromParent();
return;
}
case Intrinsic::thread_pointer: {
NewCall = Builder.CreateCall(NewFn, {});
break;
}
case Intrinsic::invariant_start:
case Intrinsic::invariant_end: {
SmallVector<Value *, 4> Args(CI->args());
NewCall = Builder.CreateCall(NewFn, Args);
break;
}
case Intrinsic::masked_load:
case Intrinsic::masked_store:
case Intrinsic::masked_gather:
case Intrinsic::masked_scatter: {
SmallVector<Value *, 4> Args(CI->args());
NewCall = Builder.CreateCall(NewFn, Args);
NewCall->copyMetadata(*CI);
break;
}
case Intrinsic::memcpy:
case Intrinsic::memmove:
case Intrinsic::memset: {
// We have to make sure that the call signature is what we're expecting.
// We only want to change the old signatures by removing the alignment arg:
// @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
// -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
// @llvm.memset...(i8*, i8, i[32|64], i32, i1)
// -> @llvm.memset...(i8*, i8, i[32|64], i1)
// Note: i8*'s in the above can be any pointer type
if (CI->arg_size() != 5) {
DefaultCase();
return;
}
// Remove alignment argument (3), and add alignment attributes to the
// dest/src pointers.
Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(2), CI->getArgOperand(4)};
NewCall = Builder.CreateCall(NewFn, Args);
AttributeList OldAttrs = CI->getAttributes();
AttributeList NewAttrs = AttributeList::get(
C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
{OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
NewCall->setAttributes(NewAttrs);
auto *MemCI = cast<MemIntrinsic>(NewCall);
// All mem intrinsics support dest alignment.
const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
MemCI->setDestAlignment(Align->getMaybeAlignValue());
// Memcpy/Memmove also support source alignment.
if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
MTI->setSourceAlignment(Align->getMaybeAlignValue());
break;
}
}
assert(NewCall && "Should have either set this variable or returned through "
"the default case");
NewCall->takeName(CI);
CI->replaceAllUsesWith(NewCall);
CI->eraseFromParent();
}
void llvm::UpgradeCallsToIntrinsic(Function *F) {
assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
// Check if this function should be upgraded and get the replacement function
// if there is one.
Function *NewFn;
if (UpgradeIntrinsicFunction(F, NewFn)) {
// Replace all users of the old function with the new function or new
// instructions. This is not a range loop because the call is deleted.
for (User *U : make_early_inc_range(F->users()))
if (CallBase *CB = dyn_cast<CallBase>(U))
UpgradeIntrinsicCall(CB, NewFn);
// Remove old function, no longer used, from the module.
F->eraseFromParent();
}
}
MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
// Check if the tag uses struct-path aware TBAA format.
if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
return &MD;
auto &Context = MD.getContext();
if (MD.getNumOperands() == 3) {
Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
MDNode *ScalarType = MDNode::get(Context, Elts);
// Create a MDNode <ScalarType, ScalarType, offset 0, const>
Metadata *Elts2[] = {ScalarType, ScalarType,
ConstantAsMetadata::get(
Constant::getNullValue(Type::getInt64Ty(Context))),
MD.getOperand(2)};
return MDNode::get(Context, Elts2);
}
// Create a MDNode <MD, MD, offset 0>
Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
Type::getInt64Ty(Context)))};
return MDNode::get(Context, Elts);
}
Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
Instruction *&Temp) {
if (Opc != Instruction::BitCast)
return nullptr;
Temp = nullptr;
Type *SrcTy = V->getType();
if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
LLVMContext &Context = V->getContext();
// We have no information about target data layout, so we assume that
// the maximum pointer size is 64bit.
Type *MidTy = Type::getInt64Ty(Context);
Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
}
return nullptr;
}
Constant *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
if (Opc != Instruction::BitCast)
return nullptr;
Type *SrcTy = C->getType();
if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
LLVMContext &Context = C->getContext();
// We have no information about target data layout, so we assume that
// the maximum pointer size is 64bit.
Type *MidTy = Type::getInt64Ty(Context);
return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
DestTy);
}
return nullptr;
}
/// Check the debug info version number, if it is out-dated, drop the debug
/// info. Return true if module is modified.
bool llvm::UpgradeDebugInfo(Module &M) {
unsigned Version = getDebugMetadataVersionFromModule(M);
if (Version == DEBUG_METADATA_VERSION) {
bool BrokenDebugInfo = false;
if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
report_fatal_error("Broken module found, compilation aborted!");
if (!BrokenDebugInfo)
// Everything is ok.
return false;
else {
// Diagnose malformed debug info.
DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
M.getContext().diagnose(Diag);
}
}
bool Modified = StripDebugInfo(M);
if (Modified && Version != DEBUG_METADATA_VERSION) {
// Diagnose a version mismatch.
DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
M.getContext().diagnose(DiagVersion);
}
return Modified;
}
/// This checks for objc retain release marker which should be upgraded. It
/// returns true if module is modified.
static bool UpgradeRetainReleaseMarker(Module &M) {
bool Changed = false;
const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
if (ModRetainReleaseMarker) {
MDNode *Op = ModRetainReleaseMarker->getOperand(0);
if (Op) {
MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
if (ID) {
SmallVector<StringRef, 4> ValueComp;
ID->getString().split(ValueComp, "#");
if (ValueComp.size() == 2) {
std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
ID = MDString::get(M.getContext(), NewValue);
}
M.addModuleFlag(Module::Error, MarkerKey, ID);
M.eraseNamedMetadata(ModRetainReleaseMarker);
Changed = true;
}
}
}
return Changed;
}
void llvm::UpgradeARCRuntime(Module &M) {
// This lambda converts normal function calls to ARC runtime functions to
// intrinsic calls.
auto UpgradeToIntrinsic = [&](const char *OldFunc,
llvm::Intrinsic::ID IntrinsicFunc) {
Function *Fn = M.getFunction(OldFunc);
if (!Fn)
return;
Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);
for (User *U : make_early_inc_range(Fn->users())) {
CallInst *CI = dyn_cast<CallInst>(U);
if (!CI || CI->getCalledFunction() != Fn)
continue;
IRBuilder<> Builder(CI->getParent(), CI->getIterator());
FunctionType *NewFuncTy = NewFn->getFunctionType();
SmallVector<Value *, 2> Args;
// Don't upgrade the intrinsic if it's not valid to bitcast the return
// value to the return type of the old function.
if (NewFuncTy->getReturnType() != CI->getType() &&
!CastInst::castIsValid(Instruction::BitCast, CI,
NewFuncTy->getReturnType()))
continue;
bool InvalidCast = false;
for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
Value *Arg = CI->getArgOperand(I);
// Bitcast argument to the parameter type of the new function if it's
// not a variadic argument.
if (I < NewFuncTy->getNumParams()) {
// Don't upgrade the intrinsic if it's not valid to bitcast the argument
// to the parameter type of the new function.
if (!CastInst::castIsValid(Instruction::BitCast, Arg,
NewFuncTy->getParamType(I))) {
InvalidCast = true;
break;
}
Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
}
Args.push_back(Arg);
}
if (InvalidCast)
continue;
// Create a call instruction that calls the new function.
CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
NewCall->takeName(CI);
// Bitcast the return value back to the type of the old call.
Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
if (!CI->use_empty())
CI->replaceAllUsesWith(NewRetVal);
CI->eraseFromParent();
}
if (Fn->use_empty())
Fn->eraseFromParent();
};
// Unconditionally convert a call to "clang.arc.use" to a call to
// "llvm.objc.clang.arc.use".
UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
// Upgrade the retain release marker. If there is no need to upgrade
// the marker, that means either the module is already new enough to contain
// new intrinsics or it is not ARC. There is no need to upgrade runtime call.
if (!UpgradeRetainReleaseMarker(M))
return;
std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
{"objc_autorelease", llvm::Intrinsic::objc_autorelease},
{"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
{"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
{"objc_autoreleaseReturnValue",
llvm::Intrinsic::objc_autoreleaseReturnValue},
{"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
{"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
{"objc_initWeak", llvm::Intrinsic::objc_initWeak},
{"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
{"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
{"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
{"objc_release", llvm::Intrinsic::objc_release},
{"objc_retain", llvm::Intrinsic::objc_retain},
{"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
{"objc_retainAutoreleaseReturnValue",
llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
{"objc_retainAutoreleasedReturnValue",
llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
{"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
{"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
{"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
{"objc_unsafeClaimAutoreleasedReturnValue",
llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
{"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
{"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
{"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
{"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
{"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
{"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
{"objc_arc_annotation_topdown_bbstart",
llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
{"objc_arc_annotation_topdown_bbend",
llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
{"objc_arc_annotation_bottomup_bbstart",
llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
{"objc_arc_annotation_bottomup_bbend",
llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
for (auto &I : RuntimeFuncs)
UpgradeToIntrinsic(I.first, I.second);
}
bool llvm::UpgradeModuleFlags(Module &M) {
NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
if (!ModFlags)
return false;
bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
bool HasSwiftVersionFlag = false;
uint8_t SwiftMajorVersion, SwiftMinorVersion;
uint32_t SwiftABIVersion;
auto Int8Ty = Type::getInt8Ty(M.getContext());
auto Int32Ty = Type::getInt32Ty(M.getContext());
for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
MDNode *Op = ModFlags->getOperand(I);
if (Op->getNumOperands() != 3)
continue;
MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
if (!ID)
continue;
if (ID->getString() == "Objective-C Image Info Version")
HasObjCFlag = true;
if (ID->getString() == "Objective-C Class Properties")
HasClassProperties = true;
// Upgrade PIC/PIE Module Flags. The module flag behavior for these two
// field was Error and now they are Max.
if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
if (auto *Behavior =
mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
if (Behavior->getLimitedValue() == Module::Error) {
Type *Int32Ty = Type::getInt32Ty(M.getContext());
Metadata *Ops[3] = {
ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)),
MDString::get(M.getContext(), ID->getString()),
Op->getOperand(2)};
ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
Changed = true;
}
}
}
// Upgrade branch protection and return address signing module flags. The
// module flag behavior for these fields were Error and now they are Min.
if (ID->getString() == "branch-target-enforcement" ||
ID->getString().startswith("sign-return-address")) {
if (auto *Behavior =
mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
if (Behavior->getLimitedValue() == Module::Error) {
Type *Int32Ty = Type::getInt32Ty(M.getContext());
Metadata *Ops[3] = {
ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
Op->getOperand(1), Op->getOperand(2)};
ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
Changed = true;
}
}
}
// Upgrade Objective-C Image Info Section. Removed the whitespce in the
// section name so that llvm-lto will not complain about mismatching
// module flags that is functionally the same.
if (ID->getString() == "Objective-C Image Info Section") {
if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
SmallVector<StringRef, 4> ValueComp;
Value->getString().split(ValueComp, " ");
if (ValueComp.size() != 1) {
std::string NewValue;
for (auto &S : ValueComp)
NewValue += S.str();
Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
MDString::get(M.getContext(), NewValue)};
ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
Changed = true;
}
}
}
// IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
// If the higher bits are set, it adds new module flag for swift info.
if (ID->getString() == "Objective-C Garbage Collection") {
auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
if (Md) {
assert(Md->getValue() && "Expected non-empty metadata");
auto Type = Md->getValue()->getType();
if (Type == Int8Ty)
continue;
unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
if ((Val & 0xff) != Val) {
HasSwiftVersionFlag = true;
SwiftABIVersion = (Val & 0xff00) >> 8;
SwiftMajorVersion = (Val & 0xff000000) >> 24;
SwiftMinorVersion = (Val & 0xff0000) >> 16;
}
Metadata *Ops[3] = {
ConstantAsMetadata::get(ConstantInt::get(Int32Ty,Module::Error)),
Op->getOperand(1),
ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
Changed = true;
}
}
}
// "Objective-C Class Properties" is recently added for Objective-C. We
// upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
// flag of value 0, so we can correclty downgrade this flag when trying to
// link an ObjC bitcode without this module flag with an ObjC bitcode with
// this module flag.
if (HasObjCFlag && !HasClassProperties) {
M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
(uint32_t)0);
Changed = true;
}
if (HasSwiftVersionFlag) {
M.addModuleFlag(Module::Error, "Swift ABI Version",
SwiftABIVersion);
M.addModuleFlag(Module::Error, "Swift Major Version",
ConstantInt::get(Int8Ty, SwiftMajorVersion));
M.addModuleFlag(Module::Error, "Swift Minor Version",
ConstantInt::get(Int8Ty, SwiftMinorVersion));
Changed = true;
}
return Changed;
}
void llvm::UpgradeSectionAttributes(Module &M) {
auto TrimSpaces = [](StringRef Section) -> std::string {
SmallVector<StringRef, 5> Components;
Section.split(Components, ',');
SmallString<32> Buffer;
raw_svector_ostream OS(Buffer);
for (auto Component : Components)
OS << ',' << Component.trim();
return std::string(OS.str().substr(1));
};
for (auto &GV : M.globals()) {
if (!GV.hasSection())
continue;
StringRef Section = GV.getSection();
if (!Section.startswith("__DATA, __objc_catlist"))
continue;
// __DATA, __objc_catlist, regular, no_dead_strip
// __DATA,__objc_catlist,regular,no_dead_strip
GV.setSection(TrimSpaces(Section));
}
}
namespace {
// Prior to LLVM 10.0, the strictfp attribute could be used on individual
// callsites within a function that did not also have the strictfp attribute.
// Since 10.0, if strict FP semantics are needed within a function, the
// function must have the strictfp attribute and all calls within the function
// must also have the strictfp attribute. This latter restriction is
// necessary to prevent unwanted libcall simplification when a function is
// being cloned (such as for inlining).
//
// The "dangling" strictfp attribute usage was only used to prevent constant
// folding and other libcall simplification. The nobuiltin attribute on the
// callsite has the same effect.
struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
StrictFPUpgradeVisitor() = default;
void visitCallBase(CallBase &Call) {
if (!Call.isStrictFP())
return;
if (isa<ConstrainedFPIntrinsic>(&Call))
return;
// If we get here, the caller doesn't have the strictfp attribute
// but this callsite does. Replace the strictfp attribute with nobuiltin.
Call.removeFnAttr(Attribute::StrictFP);
Call.addFnAttr(Attribute::NoBuiltin);
}
};
} // namespace
void llvm::UpgradeFunctionAttributes(Function &F) {
// If a function definition doesn't have the strictfp attribute,
// convert any callsite strictfp attributes to nobuiltin.
if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
StrictFPUpgradeVisitor SFPV;
SFPV.visit(F);
}
// Remove all incompatibile attributes from function.
F.removeRetAttrs(AttributeFuncs::typeIncompatible(F.getReturnType()));
for (auto &Arg : F.args())
Arg.removeAttrs(AttributeFuncs::typeIncompatible(Arg.getType()));
}
static bool isOldLoopArgument(Metadata *MD) {
auto *T = dyn_cast_or_null<MDTuple>(MD);
if (!T)
return false;
if (T->getNumOperands() < 1)
return false;
auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
if (!S)
return false;
return S->getString().startswith("llvm.vectorizer.");
}
static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
StringRef OldPrefix = "llvm.vectorizer.";
assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
if (OldTag == "llvm.vectorizer.unroll")
return MDString::get(C, "llvm.loop.interleave.count");
return MDString::get(
C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
.str());
}
static Metadata *upgradeLoopArgument(Metadata *MD) {
auto *T = dyn_cast_or_null<MDTuple>(MD);
if (!T)
return MD;
if (T->getNumOperands() < 1)
return MD;
auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
if (!OldTag)
return MD;
if (!OldTag->getString().startswith("llvm.vectorizer."))
return MD;
// This has an old tag. Upgrade it.
SmallVector<Metadata *, 8> Ops;
Ops.reserve(T->getNumOperands());
Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
Ops.push_back(T->getOperand(I));
return MDTuple::get(T->getContext(), Ops);
}
MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
auto *T = dyn_cast<MDTuple>(&N);
if (!T)
return &N;
if (none_of(T->operands(), isOldLoopArgument))
return &N;
SmallVector<Metadata *, 8> Ops;
Ops.reserve(T->getNumOperands());
for (Metadata *MD : T->operands())
Ops.push_back(upgradeLoopArgument(MD));
return MDTuple::get(T->getContext(), Ops);
}
std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
Triple T(TT);
// For AMDGPU we uprgrade older DataLayouts to include the default globals
// address space of 1.
if (T.isAMDGPU() && !DL.contains("-G") && !DL.startswith("G")) {
return DL.empty() ? std::string("G1") : (DL + "-G1").str();
}
std::string Res = DL.str();
if (!T.isX86())
return Res;
// If the datalayout matches the expected format, add pointer size address
// spaces to the datalayout.
std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";
if (!DL.contains(AddrSpaces)) {
SmallVector<StringRef, 4> Groups;
Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)");
if (R.match(DL, &Groups))
Res = (Groups[1] + AddrSpaces + Groups[3]).str();
}
// For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
// Raising the alignment is safe because Clang did not produce f80 values in
// the MSVC environment before this upgrade was added.
if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
StringRef Ref = Res;
auto I = Ref.find("-f80:32-");
if (I != StringRef::npos)
Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
}
return Res;
}
void llvm::UpgradeAttributes(AttrBuilder &B) {
StringRef FramePointer;
Attribute A = B.getAttribute("no-frame-pointer-elim");
if (A.isValid()) {
// The value can be "true" or "false".
FramePointer = A.getValueAsString() == "true" ? "all" : "none";
B.removeAttribute("no-frame-pointer-elim");
}
if (B.contains("no-frame-pointer-elim-non-leaf")) {
// The value is ignored. "no-frame-pointer-elim"="true" takes priority.
if (FramePointer != "all")
FramePointer = "non-leaf";
B.removeAttribute("no-frame-pointer-elim-non-leaf");
}
if (!FramePointer.empty())
B.addAttribute("frame-pointer", FramePointer);
A = B.getAttribute("null-pointer-is-valid");
if (A.isValid()) {
// The value can be "true" or "false".
bool NullPointerIsValid = A.getValueAsString() == "true";
B.removeAttribute("null-pointer-is-valid");
if (NullPointerIsValid)
B.addAttribute(Attribute::NullPointerIsValid);
}
}
void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
// clang.arc.attachedcall bundles are now required to have an operand.
// If they don't, it's okay to drop them entirely: when there is an operand,
// the "attachedcall" is meaningful and required, but without an operand,
// it's just a marker NOP. Dropping it merely prevents an optimization.
erase_if(Bundles, [&](OperandBundleDef &OBD) {
return OBD.getTag() == "clang.arc.attachedcall" &&
OBD.inputs().empty();
});
}
diff --git a/llvm/lib/Support/Unix/Signals.inc b/llvm/lib/Support/Unix/Signals.inc
index bf145bffe8bf..23ac012b9e00 100644
--- a/llvm/lib/Support/Unix/Signals.inc
+++ b/llvm/lib/Support/Unix/Signals.inc
@@ -1,663 +1,659 @@
//===- Signals.cpp - Generic Unix Signals Implementation -----*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines some helpful functions for dealing with the possibility of
// Unix signals occurring while your program is running.
//
//===----------------------------------------------------------------------===//
//
// This file is extremely careful to only do signal-safe things while in a
// signal handler. In particular, memory allocation and acquiring a mutex
// while in a signal handler should never occur. ManagedStatic isn't usable from
// a signal handler for 2 reasons:
//
// 1. Creating a new one allocates.
// 2. The signal handler could fire while llvm_shutdown is being processed, in
// which case the ManagedStatic is in an unknown state because it could
// already have been destroyed, or be in the process of being destroyed.
//
// Modifying the behavior of the signal handlers (such as registering new ones)
// can acquire a mutex, but all this guarantees is that the signal handler
// behavior is only modified by one thread at a time. A signal handler can still
// fire while this occurs!
//
// Adding work to a signal handler requires lock-freedom (and assume atomics are
// always lock-free) because the signal handler could fire while new work is
// being added.
//
//===----------------------------------------------------------------------===//
#include "Unix.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Config/config.h"
#include "llvm/Demangle/Demangle.h"
#include "llvm/Support/ExitCodes.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FileUtilities.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/SaveAndRestore.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <string>
#ifdef HAVE_BACKTRACE
# include BACKTRACE_HEADER // For backtrace().
#endif
#if HAVE_SIGNAL_H
#include <signal.h>
#endif
#if HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif
#if HAVE_DLFCN_H
#include <dlfcn.h>
#endif
#if HAVE_MACH_MACH_H
#include <mach/mach.h>
#endif
#if HAVE_LINK_H
#include <link.h>
#endif
#ifdef HAVE__UNWIND_BACKTRACE
// FIXME: We should be able to use <unwind.h> for any target that has an
// _Unwind_Backtrace function, but on FreeBSD the configure test passes
// despite the function not existing, and on Android, <unwind.h> conflicts
// with <link.h>.
#ifdef __GLIBC__
#include <unwind.h>
#else
#undef HAVE__UNWIND_BACKTRACE
#endif
#endif
using namespace llvm;
static void SignalHandler(int Sig); // defined below.
static void InfoSignalHandler(int Sig); // defined below.
using SignalHandlerFunctionType = void (*)();
/// The function to call if ctrl-c is pressed.
static std::atomic<SignalHandlerFunctionType> InterruptFunction =
ATOMIC_VAR_INIT(nullptr);
static std::atomic<SignalHandlerFunctionType> InfoSignalFunction =
ATOMIC_VAR_INIT(nullptr);
/// The function to call on SIGPIPE (one-time use only).
static std::atomic<SignalHandlerFunctionType> OneShotPipeSignalFunction =
ATOMIC_VAR_INIT(nullptr);
namespace {
/// Signal-safe removal of files.
/// Inserting and erasing from the list isn't signal-safe, but removal of files
/// themselves is signal-safe. Memory is freed when the head is freed, deletion
/// is therefore not signal-safe either.
class FileToRemoveList {
std::atomic<char *> Filename = ATOMIC_VAR_INIT(nullptr);
std::atomic<FileToRemoveList *> Next = ATOMIC_VAR_INIT(nullptr);
FileToRemoveList() = default;
// Not signal-safe.
FileToRemoveList(const std::string &str) : Filename(strdup(str.c_str())) {}
public:
// Not signal-safe.
~FileToRemoveList() {
if (FileToRemoveList *N = Next.exchange(nullptr))
delete N;
if (char *F = Filename.exchange(nullptr))
free(F);
}
// Not signal-safe.
static void insert(std::atomic<FileToRemoveList *> &Head,
const std::string &Filename) {
// Insert the new file at the end of the list.
FileToRemoveList *NewHead = new FileToRemoveList(Filename);
std::atomic<FileToRemoveList *> *InsertionPoint = &Head;
FileToRemoveList *OldHead = nullptr;
while (!InsertionPoint->compare_exchange_strong(OldHead, NewHead)) {
InsertionPoint = &OldHead->Next;
OldHead = nullptr;
}
}
// Not signal-safe.
static void erase(std::atomic<FileToRemoveList *> &Head,
const std::string &Filename) {
// Use a lock to avoid concurrent erase: the comparison would access
// free'd memory.
static ManagedStatic<sys::SmartMutex<true>> Lock;
sys::SmartScopedLock<true> Writer(*Lock);
for (FileToRemoveList *Current = Head.load(); Current;
Current = Current->Next.load()) {
if (char *OldFilename = Current->Filename.load()) {
if (OldFilename != Filename)
continue;
// Leave an empty filename.
OldFilename = Current->Filename.exchange(nullptr);
// The filename might have become null between the time we
// compared it and we exchanged it.
if (OldFilename)
free(OldFilename);
}
}
}
// Signal-safe.
static void removeAllFiles(std::atomic<FileToRemoveList *> &Head) {
// If cleanup were to occur while we're removing files we'd have a bad time.
// Make sure we're OK by preventing cleanup from doing anything while we're
// removing files. If cleanup races with us and we win we'll have a leak,
// but we won't crash.
FileToRemoveList *OldHead = Head.exchange(nullptr);
for (FileToRemoveList *currentFile = OldHead; currentFile;
currentFile = currentFile->Next.load()) {
// If erasing was occuring while we're trying to remove files we'd look
// at free'd data. Take away the path and put it back when done.
if (char *path = currentFile->Filename.exchange(nullptr)) {
// Get the status so we can determine if it's a file or directory. If we
// can't stat the file, ignore it.
struct stat buf;
if (stat(path, &buf) != 0)
continue;
// If this is not a regular file, ignore it. We want to prevent removal
// of special files like /dev/null, even if the compiler is being run
// with the super-user permissions.
if (!S_ISREG(buf.st_mode))
continue;
// Otherwise, remove the file. We ignore any errors here as there is
// nothing else we can do.
unlink(path);
// We're done removing the file, erasing can safely proceed.
currentFile->Filename.exchange(path);
}
}
// We're done removing files, cleanup can safely proceed.
Head.exchange(OldHead);
}
};
static std::atomic<FileToRemoveList *> FilesToRemove = ATOMIC_VAR_INIT(nullptr);
/// Clean up the list in a signal-friendly manner.
/// Recall that signals can fire during llvm_shutdown. If this occurs we should
/// either clean something up or nothing at all, but we shouldn't crash!
struct FilesToRemoveCleanup {
// Not signal-safe.
~FilesToRemoveCleanup() {
FileToRemoveList *Head = FilesToRemove.exchange(nullptr);
if (Head)
delete Head;
}
};
} // namespace
static StringRef Argv0;
/// Signals that represent requested termination. There's no bug or failure, or
/// if there is, it's not our direct responsibility. For whatever reason, our
/// continued execution is no longer desirable.
static const int IntSigs[] = {
SIGHUP, SIGINT, SIGTERM, SIGUSR2
};
/// Signals that represent that we have a bug, and our prompt termination has
/// been ordered.
static const int KillSigs[] = {
SIGILL, SIGTRAP, SIGABRT, SIGFPE, SIGBUS, SIGSEGV, SIGQUIT
#ifdef SIGSYS
, SIGSYS
#endif
#ifdef SIGXCPU
, SIGXCPU
#endif
#ifdef SIGXFSZ
, SIGXFSZ
#endif
#ifdef SIGEMT
, SIGEMT
#endif
};
/// Signals that represent requests for status.
static const int InfoSigs[] = {
SIGUSR1
#ifdef SIGINFO
, SIGINFO
#endif
};
static const size_t NumSigs =
array_lengthof(IntSigs) + array_lengthof(KillSigs) +
array_lengthof(InfoSigs) + 1 /* SIGPIPE */;
static std::atomic<unsigned> NumRegisteredSignals = ATOMIC_VAR_INIT(0);
static struct {
struct sigaction SA;
int SigNo;
} RegisteredSignalInfo[NumSigs];
#if defined(HAVE_SIGALTSTACK)
// Hold onto both the old and new alternate signal stack so that it's not
// reported as a leak. We don't make any attempt to remove our alt signal
// stack if we remove our signal handlers; that can't be done reliably if
// someone else is also trying to do the same thing.
static stack_t OldAltStack;
LLVM_ATTRIBUTE_USED static void *NewAltStackPointer;
static void CreateSigAltStack() {
const size_t AltStackSize = MINSIGSTKSZ + 64 * 1024;
// If we're executing on the alternate stack, or we already have an alternate
// signal stack that we're happy with, there's nothing for us to do. Don't
// reduce the size, some other part of the process might need a larger stack
// than we do.
if (sigaltstack(nullptr, &OldAltStack) != 0 ||
OldAltStack.ss_flags & SS_ONSTACK ||
(OldAltStack.ss_sp && OldAltStack.ss_size >= AltStackSize))
return;
stack_t AltStack = {};
AltStack.ss_sp = static_cast<char *>(safe_malloc(AltStackSize));
NewAltStackPointer = AltStack.ss_sp; // Save to avoid reporting a leak.
AltStack.ss_size = AltStackSize;
if (sigaltstack(&AltStack, &OldAltStack) != 0)
free(AltStack.ss_sp);
}
#else
static void CreateSigAltStack() {}
#endif
static void RegisterHandlers() { // Not signal-safe.
// The mutex prevents other threads from registering handlers while we're
// doing it. We also have to protect the handlers and their count because
// a signal handler could fire while we're registeting handlers.
static ManagedStatic<sys::SmartMutex<true>> SignalHandlerRegistrationMutex;
sys::SmartScopedLock<true> Guard(*SignalHandlerRegistrationMutex);
// If the handlers are already registered, we're done.
if (NumRegisteredSignals.load() != 0)
return;
// Create an alternate stack for signal handling. This is necessary for us to
// be able to reliably handle signals due to stack overflow.
CreateSigAltStack();
enum class SignalKind { IsKill, IsInfo };
auto registerHandler = [&](int Signal, SignalKind Kind) {
unsigned Index = NumRegisteredSignals.load();
assert(Index < array_lengthof(RegisteredSignalInfo) &&
"Out of space for signal handlers!");
struct sigaction NewHandler;
switch (Kind) {
case SignalKind::IsKill:
NewHandler.sa_handler = SignalHandler;
NewHandler.sa_flags = SA_NODEFER | SA_RESETHAND | SA_ONSTACK;
break;
case SignalKind::IsInfo:
NewHandler.sa_handler = InfoSignalHandler;
NewHandler.sa_flags = SA_ONSTACK;
break;
}
sigemptyset(&NewHandler.sa_mask);
// Install the new handler, save the old one in RegisteredSignalInfo.
sigaction(Signal, &NewHandler, &RegisteredSignalInfo[Index].SA);
RegisteredSignalInfo[Index].SigNo = Signal;
++NumRegisteredSignals;
};
for (auto S : IntSigs)
registerHandler(S, SignalKind::IsKill);
for (auto S : KillSigs)
registerHandler(S, SignalKind::IsKill);
if (OneShotPipeSignalFunction)
registerHandler(SIGPIPE, SignalKind::IsKill);
for (auto S : InfoSigs)
registerHandler(S, SignalKind::IsInfo);
}
void sys::unregisterHandlers() {
// Restore all of the signal handlers to how they were before we showed up.
for (unsigned i = 0, e = NumRegisteredSignals.load(); i != e; ++i) {
sigaction(RegisteredSignalInfo[i].SigNo,
&RegisteredSignalInfo[i].SA, nullptr);
--NumRegisteredSignals;
}
}
/// Process the FilesToRemove list.
static void RemoveFilesToRemove() {
FileToRemoveList::removeAllFiles(FilesToRemove);
}
void sys::CleanupOnSignal(uintptr_t Context) {
int Sig = (int)Context;
if (llvm::is_contained(InfoSigs, Sig)) {
InfoSignalHandler(Sig);
return;
}
RemoveFilesToRemove();
if (llvm::is_contained(IntSigs, Sig) || Sig == SIGPIPE)
return;
llvm::sys::RunSignalHandlers();
}
// The signal handler that runs.
static void SignalHandler(int Sig) {
// Restore the signal behavior to default, so that the program actually
// crashes when we return and the signal reissues. This also ensures that if
// we crash in our signal handler that the program will terminate immediately
// instead of recursing in the signal handler.
sys::unregisterHandlers();
// Unmask all potentially blocked kill signals.
sigset_t SigMask;
sigfillset(&SigMask);
sigprocmask(SIG_UNBLOCK, &SigMask, nullptr);
{
RemoveFilesToRemove();
if (Sig == SIGPIPE)
if (auto OldOneShotPipeFunction =
OneShotPipeSignalFunction.exchange(nullptr))
return OldOneShotPipeFunction();
bool IsIntSig = llvm::is_contained(IntSigs, Sig);
if (IsIntSig)
if (auto OldInterruptFunction = InterruptFunction.exchange(nullptr))
return OldInterruptFunction();
if (Sig == SIGPIPE || IsIntSig) {
raise(Sig); // Execute the default handler.
return;
}
}
// Otherwise if it is a fault (like SEGV) run any handler.
llvm::sys::RunSignalHandlers();
#ifdef __s390__
// On S/390, certain signals are delivered with PSW Address pointing to
// *after* the faulting instruction. Simply returning from the signal
// handler would continue execution after that point, instead of
// re-raising the signal. Raise the signal manually in those cases.
if (Sig == SIGILL || Sig == SIGFPE || Sig == SIGTRAP)
raise(Sig);
#endif
}
static void InfoSignalHandler(int Sig) {
SaveAndRestore<int> SaveErrnoDuringASignalHandler(errno);
if (SignalHandlerFunctionType CurrentInfoFunction = InfoSignalFunction)
CurrentInfoFunction();
}
void llvm::sys::RunInterruptHandlers() {
RemoveFilesToRemove();
}
void llvm::sys::SetInterruptFunction(void (*IF)()) {
InterruptFunction.exchange(IF);
RegisterHandlers();
}
void llvm::sys::SetInfoSignalFunction(void (*Handler)()) {
InfoSignalFunction.exchange(Handler);
RegisterHandlers();
}
void llvm::sys::SetOneShotPipeSignalFunction(void (*Handler)()) {
OneShotPipeSignalFunction.exchange(Handler);
RegisterHandlers();
}
void llvm::sys::DefaultOneShotPipeSignalHandler() {
- // UNIX03 conformance requires a non-zero exit code and an error message
- // to stderr when writing to a closed stdout fails.
- errs() << "error: write on a pipe with no reader\n";
-
// Send a special return code that drivers can check for, from sysexits.h.
exit(EX_IOERR);
}
// The public API
bool llvm::sys::RemoveFileOnSignal(StringRef Filename,
std::string* ErrMsg) {
// Ensure that cleanup will occur as soon as one file is added.
static ManagedStatic<FilesToRemoveCleanup> FilesToRemoveCleanup;
*FilesToRemoveCleanup;
FileToRemoveList::insert(FilesToRemove, Filename.str());
RegisterHandlers();
return false;
}
// The public API
void llvm::sys::DontRemoveFileOnSignal(StringRef Filename) {
FileToRemoveList::erase(FilesToRemove, Filename.str());
}
/// Add a function to be called when a signal is delivered to the process. The
/// handler can have a cookie passed to it to identify what instance of the
/// handler it is.
void llvm::sys::AddSignalHandler(sys::SignalHandlerCallback FnPtr,
void *Cookie) { // Signal-safe.
insertSignalHandler(FnPtr, Cookie);
RegisterHandlers();
}
#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES && HAVE_LINK_H && \
(defined(__linux__) || defined(__FreeBSD__) || \
defined(__FreeBSD_kernel__) || defined(__NetBSD__))
struct DlIteratePhdrData {
void **StackTrace;
int depth;
bool first;
const char **modules;
intptr_t *offsets;
const char *main_exec_name;
};
static int dl_iterate_phdr_cb(dl_phdr_info *info, size_t size, void *arg) {
DlIteratePhdrData *data = (DlIteratePhdrData*)arg;
const char *name = data->first ? data->main_exec_name : info->dlpi_name;
data->first = false;
for (int i = 0; i < info->dlpi_phnum; i++) {
const auto *phdr = &info->dlpi_phdr[i];
if (phdr->p_type != PT_LOAD)
continue;
intptr_t beg = info->dlpi_addr + phdr->p_vaddr;
intptr_t end = beg + phdr->p_memsz;
for (int j = 0; j < data->depth; j++) {
if (data->modules[j])
continue;
intptr_t addr = (intptr_t)data->StackTrace[j];
if (beg <= addr && addr < end) {
data->modules[j] = name;
data->offsets[j] = addr - info->dlpi_addr;
}
}
}
return 0;
}
/// If this is an ELF platform, we can find all loaded modules and their virtual
/// addresses with dl_iterate_phdr.
static bool findModulesAndOffsets(void **StackTrace, int Depth,
const char **Modules, intptr_t *Offsets,
const char *MainExecutableName,
StringSaver &StrPool) {
DlIteratePhdrData data = {StackTrace, Depth, true,
Modules, Offsets, MainExecutableName};
dl_iterate_phdr(dl_iterate_phdr_cb, &data);
return true;
}
#else
/// This platform does not have dl_iterate_phdr, so we do not yet know how to
/// find all loaded DSOs.
static bool findModulesAndOffsets(void **StackTrace, int Depth,
const char **Modules, intptr_t *Offsets,
const char *MainExecutableName,
StringSaver &StrPool) {
return false;
}
#endif // defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES && ...
#if ENABLE_BACKTRACES && defined(HAVE__UNWIND_BACKTRACE)
static int unwindBacktrace(void **StackTrace, int MaxEntries) {
if (MaxEntries < 0)
return 0;
// Skip the first frame ('unwindBacktrace' itself).
int Entries = -1;
auto HandleFrame = [&](_Unwind_Context *Context) -> _Unwind_Reason_Code {
// Apparently we need to detect reaching the end of the stack ourselves.
void *IP = (void *)_Unwind_GetIP(Context);
if (!IP)
return _URC_END_OF_STACK;
assert(Entries < MaxEntries && "recursively called after END_OF_STACK?");
if (Entries >= 0)
StackTrace[Entries] = IP;
if (++Entries == MaxEntries)
return _URC_END_OF_STACK;
return _URC_NO_REASON;
};
_Unwind_Backtrace(
[](_Unwind_Context *Context, void *Handler) {
return (*static_cast<decltype(HandleFrame) *>(Handler))(Context);
},
static_cast<void *>(&HandleFrame));
return std::max(Entries, 0);
}
#endif
// In the case of a program crash or fault, print out a stack trace so that the
// user has an indication of why and where we died.
//
// On glibc systems we have the 'backtrace' function, which works nicely, but
// doesn't demangle symbols.
void llvm::sys::PrintStackTrace(raw_ostream &OS, int Depth) {
#if ENABLE_BACKTRACES
static void *StackTrace[256];
int depth = 0;
#if defined(HAVE_BACKTRACE)
// Use backtrace() to output a backtrace on Linux systems with glibc.
if (!depth)
depth = backtrace(StackTrace, static_cast<int>(array_lengthof(StackTrace)));
#endif
#if defined(HAVE__UNWIND_BACKTRACE)
// Try _Unwind_Backtrace() if backtrace() failed.
if (!depth)
depth = unwindBacktrace(StackTrace,
static_cast<int>(array_lengthof(StackTrace)));
#endif
if (!depth)
return;
// If "Depth" is not provided by the caller, use the return value of
// backtrace() for printing a symbolized stack trace.
if (!Depth)
Depth = depth;
if (printSymbolizedStackTrace(Argv0, StackTrace, Depth, OS))
return;
OS << "Stack dump without symbol names (ensure you have llvm-symbolizer in "
"your PATH or set the environment var `LLVM_SYMBOLIZER_PATH` to point "
"to it):\n";
#if HAVE_DLFCN_H && HAVE_DLADDR
int width = 0;
for (int i = 0; i < depth; ++i) {
Dl_info dlinfo;
dladdr(StackTrace[i], &dlinfo);
const char* name = strrchr(dlinfo.dli_fname, '/');
int nwidth;
if (!name) nwidth = strlen(dlinfo.dli_fname);
else nwidth = strlen(name) - 1;
if (nwidth > width) width = nwidth;
}
for (int i = 0; i < depth; ++i) {
Dl_info dlinfo;
dladdr(StackTrace[i], &dlinfo);
OS << format("%-2d", i);
const char* name = strrchr(dlinfo.dli_fname, '/');
if (!name) OS << format(" %-*s", width, dlinfo.dli_fname);
else OS << format(" %-*s", width, name+1);
OS << format(" %#0*lx", (int)(sizeof(void*) * 2) + 2,
(unsigned long)StackTrace[i]);
if (dlinfo.dli_sname != nullptr) {
OS << ' ';
int res;
char* d = itaniumDemangle(dlinfo.dli_sname, nullptr, nullptr, &res);
if (!d) OS << dlinfo.dli_sname;
else OS << d;
free(d);
OS << format(" + %tu", (static_cast<const char*>(StackTrace[i])-
static_cast<const char*>(dlinfo.dli_saddr)));
}
OS << '\n';
}
#elif defined(HAVE_BACKTRACE)
backtrace_symbols_fd(StackTrace, Depth, STDERR_FILENO);
#endif
#endif
}
static void PrintStackTraceSignalHandler(void *) {
sys::PrintStackTrace(llvm::errs());
}
void llvm::sys::DisableSystemDialogsOnCrash() {}
/// When an error signal (such as SIGABRT or SIGSEGV) is delivered to the
/// process, print a stack trace and then exit.
void llvm::sys::PrintStackTraceOnErrorSignal(StringRef Argv0,
bool DisableCrashReporting) {
::Argv0 = Argv0;
AddSignalHandler(PrintStackTraceSignalHandler, nullptr);
#if defined(__APPLE__) && ENABLE_CRASH_OVERRIDES
// Environment variable to disable any kind of crash dialog.
if (DisableCrashReporting || getenv("LLVM_DISABLE_CRASH_REPORT")) {
mach_port_t self = mach_task_self();
exception_mask_t mask = EXC_MASK_CRASH;
kern_return_t ret = task_set_exception_ports(self,
mask,
MACH_PORT_NULL,
EXCEPTION_STATE_IDENTITY | MACH_EXCEPTION_CODES,
THREAD_STATE_NONE);
(void)ret;
}
#endif
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c28216048d7c..06e21f90ebf1 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1,21960 +1,21959 @@
//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the AArch64TargetLowering class.
//
//===----------------------------------------------------------------------===//
#include "AArch64ISelLowering.h"
#include "AArch64CallingConvention.h"
#include "AArch64ExpandImm.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64PerfectShuffle.h"
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "Utils/AArch64BaseInfo.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ObjCARCUtil.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetCallingConv.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/OperandTraits.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <algorithm>
#include <bitset>
#include <cassert>
#include <cctype>
#include <cstdint>
#include <cstdlib>
#include <iterator>
#include <limits>
#include <tuple>
#include <utility>
#include <vector>
using namespace llvm;
using namespace llvm::PatternMatch;
#define DEBUG_TYPE "aarch64-lower"
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumShiftInserts, "Number of vector shift inserts");
STATISTIC(NumOptimizedImms, "Number of times immediates were optimized");
// FIXME: The necessary dtprel relocations don't seem to be supported
// well in the GNU bfd and gold linkers at the moment. Therefore, by
// default, for now, fall back to GeneralDynamic code generation.
cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
"aarch64-elf-ldtls-generation", cl::Hidden,
cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
cl::init(false));
static cl::opt<bool>
EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
cl::desc("Enable AArch64 logical imm instruction "
"optimization"),
cl::init(true));
// Temporary option added for the purpose of testing functionality added
// to DAGCombiner.cpp in D92230. It is expected that this can be removed
// in future when both implementations will be based off MGATHER rather
// than the GLD1 nodes added for the SVE gather load intrinsics.
static cl::opt<bool>
EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden,
cl::desc("Combine extends of AArch64 masked "
"gather intrinsics"),
cl::init(true));
/// Value type used for condition codes.
static const MVT MVT_CC = MVT::i32;
static inline EVT getPackedSVEVectorVT(EVT VT) {
switch (VT.getSimpleVT().SimpleTy) {
default:
llvm_unreachable("unexpected element type for vector");
case MVT::i8:
return MVT::nxv16i8;
case MVT::i16:
return MVT::nxv8i16;
case MVT::i32:
return MVT::nxv4i32;
case MVT::i64:
return MVT::nxv2i64;
case MVT::f16:
return MVT::nxv8f16;
case MVT::f32:
return MVT::nxv4f32;
case MVT::f64:
return MVT::nxv2f64;
case MVT::bf16:
return MVT::nxv8bf16;
}
}
// NOTE: Currently there's only a need to return integer vector types. If this
// changes then just add an extra "type" parameter.
static inline EVT getPackedSVEVectorVT(ElementCount EC) {
switch (EC.getKnownMinValue()) {
default:
llvm_unreachable("unexpected element count for vector");
case 16:
return MVT::nxv16i8;
case 8:
return MVT::nxv8i16;
case 4:
return MVT::nxv4i32;
case 2:
return MVT::nxv2i64;
}
}
static inline EVT getPromotedVTForPredicate(EVT VT) {
assert(VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) &&
"Expected scalable predicate vector type!");
switch (VT.getVectorMinNumElements()) {
default:
llvm_unreachable("unexpected element count for vector");
case 2:
return MVT::nxv2i64;
case 4:
return MVT::nxv4i32;
case 8:
return MVT::nxv8i16;
case 16:
return MVT::nxv16i8;
}
}
/// Returns true if VT's elements occupy the lowest bit positions of its
/// associated register class without any intervening space.
///
/// For example, nxv2f16, nxv4f16 and nxv8f16 are legal types that belong to the
/// same register class, but only nxv8f16 can be treated as a packed vector.
static inline bool isPackedVectorType(EVT VT, SelectionDAG &DAG) {
assert(VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
"Expected legal vector type!");
return VT.isFixedLengthVector() ||
VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock;
}
// Returns true for ####_MERGE_PASSTHRU opcodes, whose operands have a leading
// predicate and end with a passthru value matching the result type.
static bool isMergePassthruOpcode(unsigned Opc) {
switch (Opc) {
default:
return false;
case AArch64ISD::BITREVERSE_MERGE_PASSTHRU:
case AArch64ISD::BSWAP_MERGE_PASSTHRU:
case AArch64ISD::REVH_MERGE_PASSTHRU:
case AArch64ISD::REVW_MERGE_PASSTHRU:
case AArch64ISD::REVD_MERGE_PASSTHRU:
case AArch64ISD::CTLZ_MERGE_PASSTHRU:
case AArch64ISD::CTPOP_MERGE_PASSTHRU:
case AArch64ISD::DUP_MERGE_PASSTHRU:
case AArch64ISD::ABS_MERGE_PASSTHRU:
case AArch64ISD::NEG_MERGE_PASSTHRU:
case AArch64ISD::FNEG_MERGE_PASSTHRU:
case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU:
case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU:
case AArch64ISD::FCEIL_MERGE_PASSTHRU:
case AArch64ISD::FFLOOR_MERGE_PASSTHRU:
case AArch64ISD::FNEARBYINT_MERGE_PASSTHRU:
case AArch64ISD::FRINT_MERGE_PASSTHRU:
case AArch64ISD::FROUND_MERGE_PASSTHRU:
case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU:
case AArch64ISD::FTRUNC_MERGE_PASSTHRU:
case AArch64ISD::FP_ROUND_MERGE_PASSTHRU:
case AArch64ISD::FP_EXTEND_MERGE_PASSTHRU:
case AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU:
case AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU:
case AArch64ISD::FCVTZU_MERGE_PASSTHRU:
case AArch64ISD::FCVTZS_MERGE_PASSTHRU:
case AArch64ISD::FSQRT_MERGE_PASSTHRU:
case AArch64ISD::FRECPX_MERGE_PASSTHRU:
case AArch64ISD::FABS_MERGE_PASSTHRU:
return true;
}
}
// Returns true if inactive lanes are known to be zeroed by construction.
static bool isZeroingInactiveLanes(SDValue Op) {
switch (Op.getOpcode()) {
default:
// We guarantee i1 splat_vectors to zero the other lanes by
// implementing it with ptrue and possibly a punpklo for nxv1i1.
if (ISD::isConstantSplatVectorAllOnes(Op.getNode()))
return true;
return false;
case AArch64ISD::PTRUE:
case AArch64ISD::SETCC_MERGE_ZERO:
return true;
case ISD::INTRINSIC_WO_CHAIN:
switch (Op.getConstantOperandVal(0)) {
default:
return false;
case Intrinsic::aarch64_sve_ptrue:
case Intrinsic::aarch64_sve_pnext:
case Intrinsic::aarch64_sve_cmpeq:
case Intrinsic::aarch64_sve_cmpne:
case Intrinsic::aarch64_sve_cmpge:
case Intrinsic::aarch64_sve_cmpgt:
case Intrinsic::aarch64_sve_cmphs:
case Intrinsic::aarch64_sve_cmphi:
case Intrinsic::aarch64_sve_cmpeq_wide:
case Intrinsic::aarch64_sve_cmpne_wide:
case Intrinsic::aarch64_sve_cmpge_wide:
case Intrinsic::aarch64_sve_cmpgt_wide:
case Intrinsic::aarch64_sve_cmplt_wide:
case Intrinsic::aarch64_sve_cmple_wide:
case Intrinsic::aarch64_sve_cmphs_wide:
case Intrinsic::aarch64_sve_cmphi_wide:
case Intrinsic::aarch64_sve_cmplo_wide:
case Intrinsic::aarch64_sve_cmpls_wide:
case Intrinsic::aarch64_sve_fcmpeq:
case Intrinsic::aarch64_sve_fcmpne:
case Intrinsic::aarch64_sve_fcmpge:
case Intrinsic::aarch64_sve_fcmpgt:
case Intrinsic::aarch64_sve_fcmpuo:
return true;
}
}
}
AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
const AArch64Subtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
// AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
// we have to make something up. Arbitrarily, choose ZeroOrOne.
setBooleanContents(ZeroOrOneBooleanContent);
// When comparing vectors the result sets the different elements in the
// vector to all-one or all-zero.
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
// Set up the register classes.
addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
if (Subtarget->hasLS64()) {
addRegisterClass(MVT::i64x8, &AArch64::GPR64x8ClassRegClass);
setOperationAction(ISD::LOAD, MVT::i64x8, Custom);
setOperationAction(ISD::STORE, MVT::i64x8, Custom);
}
if (Subtarget->hasFPARMv8()) {
addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
addRegisterClass(MVT::bf16, &AArch64::FPR16RegClass);
addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
}
if (Subtarget->hasNEON()) {
addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
// Someone set us up the NEON.
addDRTypeForNEON(MVT::v2f32);
addDRTypeForNEON(MVT::v8i8);
addDRTypeForNEON(MVT::v4i16);
addDRTypeForNEON(MVT::v2i32);
addDRTypeForNEON(MVT::v1i64);
addDRTypeForNEON(MVT::v1f64);
addDRTypeForNEON(MVT::v4f16);
if (Subtarget->hasBF16())
addDRTypeForNEON(MVT::v4bf16);
addQRTypeForNEON(MVT::v4f32);
addQRTypeForNEON(MVT::v2f64);
addQRTypeForNEON(MVT::v16i8);
addQRTypeForNEON(MVT::v8i16);
addQRTypeForNEON(MVT::v4i32);
addQRTypeForNEON(MVT::v2i64);
addQRTypeForNEON(MVT::v8f16);
if (Subtarget->hasBF16())
addQRTypeForNEON(MVT::v8bf16);
}
if (Subtarget->hasSVE() || Subtarget->hasSME()) {
// Add legal sve predicate types
addRegisterClass(MVT::nxv1i1, &AArch64::PPRRegClass);
addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass);
addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass);
addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass);
addRegisterClass(MVT::nxv16i1, &AArch64::PPRRegClass);
// Add legal sve data types
addRegisterClass(MVT::nxv16i8, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv8i16, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv4i32, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv2i64, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv2f16, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv4f16, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv8f16, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv2f32, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass);
if (Subtarget->hasBF16()) {
addRegisterClass(MVT::nxv2bf16, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv4bf16, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv8bf16, &AArch64::ZPRRegClass);
}
if (Subtarget->useSVEForFixedLengthVectors()) {
for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
if (useSVEForFixedLengthVectorVT(VT))
addRegisterClass(VT, &AArch64::ZPRRegClass);
for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
if (useSVEForFixedLengthVectorVT(VT))
addRegisterClass(VT, &AArch64::ZPRRegClass);
}
}
// Compute derived properties from the register classes
computeRegisterProperties(Subtarget->getRegisterInfo());
// Provide all sorts of operation actions
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
setOperationAction(ISD::SETCC, MVT::i32, Custom);
setOperationAction(ISD::SETCC, MVT::i64, Custom);
setOperationAction(ISD::SETCC, MVT::f16, Custom);
setOperationAction(ISD::SETCC, MVT::f32, Custom);
setOperationAction(ISD::SETCC, MVT::f64, Custom);
setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Custom);
setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Custom);
setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Custom);
setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom);
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
setOperationAction(ISD::BR_CC, MVT::i32, Custom);
setOperationAction(ISD::BR_CC, MVT::i64, Custom);
setOperationAction(ISD::BR_CC, MVT::f16, Custom);
setOperationAction(ISD::BR_CC, MVT::f32, Custom);
setOperationAction(ISD::BR_CC, MVT::f64, Custom);
setOperationAction(ISD::SELECT, MVT::i32, Custom);
setOperationAction(ISD::SELECT, MVT::i64, Custom);
setOperationAction(ISD::SELECT, MVT::f16, Custom);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
setOperationAction(ISD::SELECT, MVT::f64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
setOperationAction(ISD::BR_JT, MVT::Other, Custom);
setOperationAction(ISD::JumpTable, MVT::i64, Custom);
setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
setOperationAction(ISD::FREM, MVT::f32, Expand);
setOperationAction(ISD::FREM, MVT::f64, Expand);
setOperationAction(ISD::FREM, MVT::f80, Expand);
setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
// Custom lowering hooks are needed for XOR
// to fold it into CSINC/CSINV.
setOperationAction(ISD::XOR, MVT::i32, Custom);
setOperationAction(ISD::XOR, MVT::i64, Custom);
// Virtually no operation on f128 is legal, but LLVM can't expand them when
// there's a valid register class, so we need custom operations in most cases.
setOperationAction(ISD::FABS, MVT::f128, Expand);
setOperationAction(ISD::FADD, MVT::f128, LibCall);
setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
setOperationAction(ISD::FCOS, MVT::f128, Expand);
setOperationAction(ISD::FDIV, MVT::f128, LibCall);
setOperationAction(ISD::FMA, MVT::f128, Expand);
setOperationAction(ISD::FMUL, MVT::f128, LibCall);
setOperationAction(ISD::FNEG, MVT::f128, Expand);
setOperationAction(ISD::FPOW, MVT::f128, Expand);
setOperationAction(ISD::FREM, MVT::f128, Expand);
setOperationAction(ISD::FRINT, MVT::f128, Expand);
setOperationAction(ISD::FSIN, MVT::f128, Expand);
setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
setOperationAction(ISD::FSQRT, MVT::f128, Expand);
setOperationAction(ISD::FSUB, MVT::f128, LibCall);
setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
setOperationAction(ISD::SETCC, MVT::f128, Custom);
setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom);
setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Custom);
setOperationAction(ISD::BR_CC, MVT::f128, Custom);
setOperationAction(ISD::SELECT, MVT::f128, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
// FIXME: f128 FMINIMUM and FMAXIMUM (including STRICT versions) currently
// aren't handled.
// Lowering for many of the conversions is actually specified by the non-f128
// type. The LowerXXX function will be trivial when f128 isn't involved.
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
// Variable arguments.
setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction(ISD::VAARG, MVT::Other, Custom);
setOperationAction(ISD::VACOPY, MVT::Other, Custom);
setOperationAction(ISD::VAEND, MVT::Other, Expand);
// Variable-sized objects.
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
if (Subtarget->isTargetWindows())
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
else
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
// Constant pool entries
setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
// BlockAddress
setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
// AArch64 lacks both left-rotate and popcount instructions.
setOperationAction(ISD::ROTL, MVT::i32, Expand);
setOperationAction(ISD::ROTL, MVT::i64, Expand);
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
setOperationAction(ISD::ROTL, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
}
// AArch64 doesn't have i32 MULH{S|U}.
setOperationAction(ISD::MULHU, MVT::i32, Expand);
setOperationAction(ISD::MULHS, MVT::i32, Expand);
// AArch64 doesn't have {U|S}MUL_LOHI.
setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
setOperationAction(ISD::CTPOP, MVT::i32, Custom);
setOperationAction(ISD::CTPOP, MVT::i64, Custom);
setOperationAction(ISD::CTPOP, MVT::i128, Custom);
setOperationAction(ISD::PARITY, MVT::i64, Custom);
setOperationAction(ISD::PARITY, MVT::i128, Custom);
setOperationAction(ISD::ABS, MVT::i32, Custom);
setOperationAction(ISD::ABS, MVT::i64, Custom);
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::UDIVREM, VT, Expand);
}
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::SREM, MVT::i64, Expand);
setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i64, Expand);
// Custom lower Add/Sub/Mul with overflow.
setOperationAction(ISD::SADDO, MVT::i32, Custom);
setOperationAction(ISD::SADDO, MVT::i64, Custom);
setOperationAction(ISD::UADDO, MVT::i32, Custom);
setOperationAction(ISD::UADDO, MVT::i64, Custom);
setOperationAction(ISD::SSUBO, MVT::i32, Custom);
setOperationAction(ISD::SSUBO, MVT::i64, Custom);
setOperationAction(ISD::USUBO, MVT::i32, Custom);
setOperationAction(ISD::USUBO, MVT::i64, Custom);
setOperationAction(ISD::SMULO, MVT::i32, Custom);
setOperationAction(ISD::SMULO, MVT::i64, Custom);
setOperationAction(ISD::UMULO, MVT::i32, Custom);
setOperationAction(ISD::UMULO, MVT::i64, Custom);
setOperationAction(ISD::ADDCARRY, MVT::i32, Custom);
setOperationAction(ISD::ADDCARRY, MVT::i64, Custom);
setOperationAction(ISD::SUBCARRY, MVT::i32, Custom);
setOperationAction(ISD::SUBCARRY, MVT::i64, Custom);
setOperationAction(ISD::SADDO_CARRY, MVT::i32, Custom);
setOperationAction(ISD::SADDO_CARRY, MVT::i64, Custom);
setOperationAction(ISD::SSUBO_CARRY, MVT::i32, Custom);
setOperationAction(ISD::SSUBO_CARRY, MVT::i64, Custom);
setOperationAction(ISD::FSIN, MVT::f32, Expand);
setOperationAction(ISD::FSIN, MVT::f64, Expand);
setOperationAction(ISD::FCOS, MVT::f32, Expand);
setOperationAction(ISD::FCOS, MVT::f64, Expand);
setOperationAction(ISD::FPOW, MVT::f32, Expand);
setOperationAction(ISD::FPOW, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
if (Subtarget->hasFullFP16())
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
else
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI,
ISD::FCOS, ISD::FSIN, ISD::FSINCOS,
ISD::FEXP, ISD::FEXP2, ISD::FLOG,
ISD::FLOG2, ISD::FLOG10, ISD::STRICT_FREM,
ISD::STRICT_FPOW, ISD::STRICT_FPOWI, ISD::STRICT_FCOS,
ISD::STRICT_FSIN, ISD::STRICT_FEXP, ISD::STRICT_FEXP2,
ISD::STRICT_FLOG, ISD::STRICT_FLOG2, ISD::STRICT_FLOG10}) {
setOperationAction(Op, MVT::f16, Promote);
setOperationAction(Op, MVT::v4f16, Expand);
setOperationAction(Op, MVT::v8f16, Expand);
}
if (!Subtarget->hasFullFP16()) {
for (auto Op :
{ISD::SELECT, ISD::SELECT_CC, ISD::SETCC,
ISD::BR_CC, ISD::FADD, ISD::FSUB,
ISD::FMUL, ISD::FDIV, ISD::FMA,
ISD::FNEG, ISD::FABS, ISD::FCEIL,
ISD::FSQRT, ISD::FFLOOR, ISD::FNEARBYINT,
ISD::FRINT, ISD::FROUND, ISD::FROUNDEVEN,
ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM,
ISD::FMINIMUM, ISD::FMAXIMUM, ISD::STRICT_FADD,
ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
ISD::STRICT_FMA, ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR,
ISD::STRICT_FSQRT, ISD::STRICT_FRINT, ISD::STRICT_FNEARBYINT,
ISD::STRICT_FROUND, ISD::STRICT_FTRUNC, ISD::STRICT_FROUNDEVEN,
ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM, ISD::STRICT_FMINIMUM,
ISD::STRICT_FMAXIMUM})
setOperationAction(Op, MVT::f16, Promote);
// Round-to-integer need custom lowering for fp16, as Promote doesn't work
// because the result type is integer.
for (auto Op : {ISD::STRICT_LROUND, ISD::STRICT_LLROUND, ISD::STRICT_LRINT,
ISD::STRICT_LLRINT})
setOperationAction(Op, MVT::f16, Custom);
// promote v4f16 to v4f32 when that is known to be safe.
setOperationAction(ISD::FADD, MVT::v4f16, Promote);
setOperationAction(ISD::FSUB, MVT::v4f16, Promote);
setOperationAction(ISD::FMUL, MVT::v4f16, Promote);
setOperationAction(ISD::FDIV, MVT::v4f16, Promote);
AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);
setOperationAction(ISD::FABS, MVT::v4f16, Expand);
setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
setOperationAction(ISD::FROUNDEVEN, MVT::v4f16, Expand);
setOperationAction(ISD::FMA, MVT::v4f16, Expand);
setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);
setOperationAction(ISD::FABS, MVT::v8f16, Expand);
setOperationAction(ISD::FADD, MVT::v8f16, Expand);
setOperationAction(ISD::FCEIL, MVT::v8f16, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand);
setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand);
setOperationAction(ISD::FMA, MVT::v8f16, Expand);
setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
setOperationAction(ISD::FROUNDEVEN, MVT::v8f16, Expand);
setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand);
setOperationAction(ISD::SETCC, MVT::v8f16, Expand);
setOperationAction(ISD::BR_CC, MVT::v8f16, Expand);
setOperationAction(ISD::SELECT, MVT::v8f16, Expand);
setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand);
}
// AArch64 has implementations of a lot of rounding-like FP operations.
for (auto Op :
{ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL,
ISD::FRINT, ISD::FTRUNC, ISD::FROUND,
ISD::FROUNDEVEN, ISD::FMINNUM, ISD::FMAXNUM,
ISD::FMINIMUM, ISD::FMAXIMUM, ISD::LROUND,
ISD::LLROUND, ISD::LRINT, ISD::LLRINT,
ISD::STRICT_FFLOOR, ISD::STRICT_FCEIL, ISD::STRICT_FNEARBYINT,
ISD::STRICT_FRINT, ISD::STRICT_FTRUNC, ISD::STRICT_FROUNDEVEN,
ISD::STRICT_FROUND, ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM,
ISD::STRICT_FMINIMUM, ISD::STRICT_FMAXIMUM, ISD::STRICT_LROUND,
ISD::STRICT_LLROUND, ISD::STRICT_LRINT, ISD::STRICT_LLRINT}) {
for (MVT Ty : {MVT::f32, MVT::f64})
setOperationAction(Op, Ty, Legal);
if (Subtarget->hasFullFP16())
setOperationAction(Op, MVT::f16, Legal);
}
// Basic strict FP operations are legal
for (auto Op : {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FSQRT}) {
for (MVT Ty : {MVT::f32, MVT::f64})
setOperationAction(Op, Ty, Legal);
if (Subtarget->hasFullFP16())
setOperationAction(Op, MVT::f16, Legal);
}
// Strict conversion to a larger type is legal
for (auto VT : {MVT::f32, MVT::f64})
setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal);
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
// Generate outline atomics library calls only if LSE was not specified for
// subtarget
if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, LibCall);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, LibCall);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, LibCall);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, LibCall);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, LibCall);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, LibCall);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i8, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i16, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i32, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i64, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, LibCall);
#define LCALLNAMES(A, B, N) \
setLibcallName(A##N##_RELAX, #B #N "_relax"); \
setLibcallName(A##N##_ACQ, #B #N "_acq"); \
setLibcallName(A##N##_REL, #B #N "_rel"); \
setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel");
#define LCALLNAME4(A, B) \
LCALLNAMES(A, B, 1) \
LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8)
#define LCALLNAME5(A, B) \
LCALLNAMES(A, B, 1) \
LCALLNAMES(A, B, 2) \
LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16)
LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas)
LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp)
LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd)
LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset)
LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr)
LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor)
#undef LCALLNAMES
#undef LCALLNAME4
#undef LCALLNAME5
}
// 128-bit loads and stores can be done without expanding
setOperationAction(ISD::LOAD, MVT::i128, Custom);
setOperationAction(ISD::STORE, MVT::i128, Custom);
// Aligned 128-bit loads and stores are single-copy atomic according to the
// v8.4a spec.
if (Subtarget->hasLSE2()) {
setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
}
// 256 bit non-temporal stores can be lowered to STNP. Do this as part of the
// custom lowering, as there are no un-paired non-temporal stores and
// legalization will break up 256 bit inputs.
setOperationAction(ISD::STORE, MVT::v32i8, Custom);
setOperationAction(ISD::STORE, MVT::v16i16, Custom);
setOperationAction(ISD::STORE, MVT::v16f16, Custom);
setOperationAction(ISD::STORE, MVT::v8i32, Custom);
setOperationAction(ISD::STORE, MVT::v8f32, Custom);
setOperationAction(ISD::STORE, MVT::v4f64, Custom);
setOperationAction(ISD::STORE, MVT::v4i64, Custom);
// Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
// This requires the Performance Monitors extension.
if (Subtarget->hasPerfMon())
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
// Issue __sincos_stret if available.
setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
} else {
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
}
if (Subtarget->getTargetTriple().isOSMSVCRT()) {
// MSVCRT doesn't have powi; fall back to pow
setLibcallName(RTLIB::POWI_F32, nullptr);
setLibcallName(RTLIB::POWI_F64, nullptr);
}
// Make floating-point constants legal for the large code model, so they don't
// become loads from the constant pool.
if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
}
// AArch64 does not have floating-point extending loads, i1 sign-extending
// load, floating-point truncating stores, or v2i32->v2i16 truncating store.
for (MVT VT : MVT::fp_valuetypes()) {
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
}
for (MVT VT : MVT::integer_valuetypes())
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f128, MVT::f80, Expand);
setTruncStoreAction(MVT::f128, MVT::f64, Expand);
setTruncStoreAction(MVT::f128, MVT::f32, Expand);
setTruncStoreAction(MVT::f128, MVT::f16, Expand);
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
setOperationAction(ISD::BITCAST, MVT::f16, Custom);
setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
// Indexed loads and stores are supported.
for (unsigned im = (unsigned)ISD::PRE_INC;
im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
setIndexedLoadAction(im, MVT::i8, Legal);
setIndexedLoadAction(im, MVT::i16, Legal);
setIndexedLoadAction(im, MVT::i32, Legal);
setIndexedLoadAction(im, MVT::i64, Legal);
setIndexedLoadAction(im, MVT::f64, Legal);
setIndexedLoadAction(im, MVT::f32, Legal);
setIndexedLoadAction(im, MVT::f16, Legal);
setIndexedLoadAction(im, MVT::bf16, Legal);
setIndexedStoreAction(im, MVT::i8, Legal);
setIndexedStoreAction(im, MVT::i16, Legal);
setIndexedStoreAction(im, MVT::i32, Legal);
setIndexedStoreAction(im, MVT::i64, Legal);
setIndexedStoreAction(im, MVT::f64, Legal);
setIndexedStoreAction(im, MVT::f32, Legal);
setIndexedStoreAction(im, MVT::f16, Legal);
setIndexedStoreAction(im, MVT::bf16, Legal);
}
// Trap.
setOperationAction(ISD::TRAP, MVT::Other, Legal);
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
// We combine OR nodes for bitfield operations.
setTargetDAGCombine(ISD::OR);
// Try to create BICs for vector ANDs.
setTargetDAGCombine(ISD::AND);
// Vector add and sub nodes may conceal a high-half opportunity.
// Also, try to fold ADD into CSINC/CSINV..
setTargetDAGCombine({ISD::ADD, ISD::ABS, ISD::SUB, ISD::XOR, ISD::SINT_TO_FP,
ISD::UINT_TO_FP});
setTargetDAGCombine({ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::FP_TO_SINT_SAT,
ISD::FP_TO_UINT_SAT, ISD::FDIV});
// Try and combine setcc with csel
setTargetDAGCombine(ISD::SETCC);
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
setTargetDAGCombine({ISD::ANY_EXTEND, ISD::ZERO_EXTEND, ISD::SIGN_EXTEND,
ISD::VECTOR_SPLICE, ISD::SIGN_EXTEND_INREG,
ISD::CONCAT_VECTORS, ISD::EXTRACT_SUBVECTOR,
ISD::INSERT_SUBVECTOR, ISD::STORE, ISD::BUILD_VECTOR});
if (Subtarget->supportsAddressTopByteIgnored())
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::MSTORE);
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine({ISD::SELECT, ISD::VSELECT});
setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN,
ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
ISD::VECREDUCE_ADD, ISD::STEP_VECTOR});
setTargetDAGCombine({ISD::MGATHER, ISD::MSCATTER});
setTargetDAGCombine(ISD::FP_EXTEND);
setTargetDAGCombine(ISD::GlobalAddress);
// In case of strict alignment, avoid an excessive number of byte wide stores.
MaxStoresPerMemsetOptSize = 8;
MaxStoresPerMemset =
Subtarget->requiresStrictAlign() ? MaxStoresPerMemsetOptSize : 32;
MaxGluedStoresPerMemcpy = 4;
MaxStoresPerMemcpyOptSize = 4;
MaxStoresPerMemcpy =
Subtarget->requiresStrictAlign() ? MaxStoresPerMemcpyOptSize : 16;
MaxStoresPerMemmoveOptSize = 4;
MaxStoresPerMemmove = 4;
MaxLoadsPerMemcmpOptSize = 4;
MaxLoadsPerMemcmp =
Subtarget->requiresStrictAlign() ? MaxLoadsPerMemcmpOptSize : 8;
setStackPointerRegisterToSaveRestore(AArch64::SP);
setSchedulingPreference(Sched::Hybrid);
EnableExtLdPromotion = true;
// Set required alignment.
setMinFunctionAlignment(Align(4));
// Set preferred alignments.
setPrefLoopAlignment(Align(1ULL << STI.getPrefLoopLogAlignment()));
setMaxBytesForAlignment(STI.getMaxBytesForLoopAlignment());
setPrefFunctionAlignment(Align(1ULL << STI.getPrefFunctionLogAlignment()));
// Only change the limit for entries in a jump table if specified by
// the sub target, but not at the command line.
unsigned MaxJT = STI.getMaximumJumpTableSize();
if (MaxJT && getMaximumJumpTableSize() == UINT_MAX)
setMaximumJumpTableSize(MaxJT);
setHasExtractBitsInsn(true);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
if (Subtarget->hasNEON()) {
// FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
// silliness like this:
for (auto Op :
{ISD::SELECT, ISD::SELECT_CC, ISD::SETCC,
ISD::BR_CC, ISD::FADD, ISD::FSUB,
ISD::FMUL, ISD::FDIV, ISD::FMA,
ISD::FNEG, ISD::FABS, ISD::FCEIL,
ISD::FSQRT, ISD::FFLOOR, ISD::FNEARBYINT,
ISD::FRINT, ISD::FROUND, ISD::FROUNDEVEN,
ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM,
ISD::FMINIMUM, ISD::FMAXIMUM, ISD::STRICT_FADD,
ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
ISD::STRICT_FMA, ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR,
ISD::STRICT_FSQRT, ISD::STRICT_FRINT, ISD::STRICT_FNEARBYINT,
ISD::STRICT_FROUND, ISD::STRICT_FTRUNC, ISD::STRICT_FROUNDEVEN,
ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM, ISD::STRICT_FMINIMUM,
ISD::STRICT_FMAXIMUM})
setOperationAction(Op, MVT::v1f64, Expand);
for (auto Op :
{ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::SINT_TO_FP, ISD::UINT_TO_FP,
ISD::FP_ROUND, ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT, ISD::MUL,
ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT,
ISD::STRICT_SINT_TO_FP, ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_ROUND})
setOperationAction(Op, MVT::v1i64, Expand);
// AArch64 doesn't have a direct vector ->f32 conversion instructions for
// elements smaller than i32, so promote the input to i32 first.
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
// Similarly, there is no direct i32 -> f64 vector conversion instruction.
// Or, direct i32 -> f16 vector conversion. Set it so custom, so the
// conversion happens in two steps: v4i32 -> v4f32 -> v4f16
for (auto Op : {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::STRICT_SINT_TO_FP,
ISD::STRICT_UINT_TO_FP})
for (auto VT : {MVT::v2i32, MVT::v2i64, MVT::v4i32})
setOperationAction(Op, VT, Custom);
if (Subtarget->hasFullFP16()) {
setOperationAction(ISD::ConstantFP, MVT::f16, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i8, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v16i8, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
} else {
// when AArch64 doesn't have fullfp16 support, promote the input
// to i32 first.
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v16i8, MVT::v16i32);
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v16i8, MVT::v16i32);
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
}
setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
setOperationAction(ISD::BITREVERSE, MVT::v8i8, Legal);
setOperationAction(ISD::BITREVERSE, MVT::v16i8, Legal);
setOperationAction(ISD::BITREVERSE, MVT::v2i32, Custom);
setOperationAction(ISD::BITREVERSE, MVT::v4i32, Custom);
setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
setOperationAction(ISD::BITREVERSE, MVT::v2i64, Custom);
for (auto VT : {MVT::v1i64, MVT::v2i64}) {
setOperationAction(ISD::UMAX, VT, Custom);
setOperationAction(ISD::SMAX, VT, Custom);
setOperationAction(ISD::UMIN, VT, Custom);
setOperationAction(ISD::SMIN, VT, Custom);
}
// AArch64 doesn't have MUL.2d:
setOperationAction(ISD::MUL, MVT::v2i64, Expand);
// Custom handling for some quad-vector types to detect MULL.
setOperationAction(ISD::MUL, MVT::v8i16, Custom);
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
// Saturates
for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
setOperationAction(ISD::SADDSAT, VT, Legal);
setOperationAction(ISD::UADDSAT, VT, Legal);
setOperationAction(ISD::SSUBSAT, VT, Legal);
setOperationAction(ISD::USUBSAT, VT, Legal);
}
for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
MVT::v4i32}) {
setOperationAction(ISD::AVGFLOORS, VT, Legal);
setOperationAction(ISD::AVGFLOORU, VT, Legal);
setOperationAction(ISD::AVGCEILS, VT, Legal);
setOperationAction(ISD::AVGCEILU, VT, Legal);
setOperationAction(ISD::ABDS, VT, Legal);
setOperationAction(ISD::ABDU, VT, Legal);
}
// Vector reductions
for (MVT VT : { MVT::v4f16, MVT::v2f32,
MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_FADD, VT, Legal);
}
}
for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
}
setOperationAction(ISD::VECREDUCE_ADD, MVT::v2i64, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
// Likewise, narrowing and extending vector loads/stores aren't handled
// directly.
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
setOperationAction(ISD::MULHS, VT, Legal);
setOperationAction(ISD::MULHU, VT, Legal);
} else {
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
}
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
setOperationAction(ISD::BSWAP, VT, Expand);
setOperationAction(ISD::CTTZ, VT, Expand);
for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
setTruncStoreAction(VT, InnerVT, Expand);
setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
}
}
// AArch64 has implementations of a lot of rounding-like FP operations.
for (auto Op :
{ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC,
ISD::FROUND, ISD::FROUNDEVEN, ISD::STRICT_FFLOOR,
ISD::STRICT_FNEARBYINT, ISD::STRICT_FCEIL, ISD::STRICT_FRINT,
ISD::STRICT_FTRUNC, ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN}) {
for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64})
setOperationAction(Op, Ty, Legal);
if (Subtarget->hasFullFP16())
for (MVT Ty : {MVT::v4f16, MVT::v8f16})
setOperationAction(Op, Ty, Legal);
}
setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);
setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
// ADDP custom lowering
for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
setOperationAction(ISD::ADD, VT, Custom);
// FADDP custom lowering
for (MVT VT : { MVT::v16f16, MVT::v8f32, MVT::v4f64 })
setOperationAction(ISD::FADD, VT, Custom);
}
if (Subtarget->hasSME()) {
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
}
// FIXME: Move lowering for more nodes here if those are common between
// SVE and SME.
if (Subtarget->hasSVE() || Subtarget->hasSME()) {
for (auto VT :
{MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) {
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
}
}
if (Subtarget->hasSVE()) {
for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
setOperationAction(ISD::BITREVERSE, VT, Custom);
setOperationAction(ISD::BSWAP, VT, Custom);
setOperationAction(ISD::CTLZ, VT, Custom);
setOperationAction(ISD::CTPOP, VT, Custom);
setOperationAction(ISD::CTTZ, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::UINT_TO_FP, VT, Custom);
setOperationAction(ISD::SINT_TO_FP, VT, Custom);
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::MULHS, VT, Custom);
setOperationAction(ISD::MULHU, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::SDIV, VT, Custom);
setOperationAction(ISD::UDIV, VT, Custom);
setOperationAction(ISD::SMIN, VT, Custom);
setOperationAction(ISD::UMIN, VT, Custom);
setOperationAction(ISD::SMAX, VT, Custom);
setOperationAction(ISD::UMAX, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::ABS, VT, Custom);
setOperationAction(ISD::ABDS, VT, Custom);
setOperationAction(ISD::ABDU, VT, Custom);
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::ROTL, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
setOperationAction(ISD::SADDSAT, VT, Legal);
setOperationAction(ISD::UADDSAT, VT, Legal);
setOperationAction(ISD::SSUBSAT, VT, Legal);
setOperationAction(ISD::USUBSAT, VT, Legal);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::UDIVREM, VT, Expand);
}
// Illegal unpacked integer vector types.
for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
}
// Legalize unpacked bitcasts to REINTERPRET_CAST.
for (auto VT : {MVT::nxv2i16, MVT::nxv4i16, MVT::nxv2i32, MVT::nxv2bf16,
MVT::nxv4bf16, MVT::nxv2f16, MVT::nxv4f16, MVT::nxv2f32})
setOperationAction(ISD::BITCAST, VT, Custom);
for (auto VT :
{ MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal);
for (auto VT :
{MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) {
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
// There are no legal MVT::nxv16f## based types.
if (VT != MVT::nxv16i1) {
setOperationAction(ISD::SINT_TO_FP, VT, Custom);
setOperationAction(ISD::UINT_TO_FP, VT, Custom);
}
}
// NEON doesn't support masked loads/stores/gathers/scatters, but SVE does
for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MSTORE, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
}
// Firstly, exclude all scalable vector extending loads/truncating stores,
// include both integer and floating scalable vector.
for (MVT VT : MVT::scalable_vector_valuetypes()) {
for (MVT InnerVT : MVT::scalable_vector_valuetypes()) {
setTruncStoreAction(VT, InnerVT, Expand);
setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
}
}
// Then, selectively enable those which we directly support.
setTruncStoreAction(MVT::nxv2i64, MVT::nxv2i8, Legal);
setTruncStoreAction(MVT::nxv2i64, MVT::nxv2i16, Legal);
setTruncStoreAction(MVT::nxv2i64, MVT::nxv2i32, Legal);
setTruncStoreAction(MVT::nxv4i32, MVT::nxv4i8, Legal);
setTruncStoreAction(MVT::nxv4i32, MVT::nxv4i16, Legal);
setTruncStoreAction(MVT::nxv8i16, MVT::nxv8i8, Legal);
for (auto Op : {ISD::ZEXTLOAD, ISD::SEXTLOAD, ISD::EXTLOAD}) {
setLoadExtAction(Op, MVT::nxv2i64, MVT::nxv2i8, Legal);
setLoadExtAction(Op, MVT::nxv2i64, MVT::nxv2i16, Legal);
setLoadExtAction(Op, MVT::nxv2i64, MVT::nxv2i32, Legal);
setLoadExtAction(Op, MVT::nxv4i32, MVT::nxv4i8, Legal);
setLoadExtAction(Op, MVT::nxv4i32, MVT::nxv4i16, Legal);
setLoadExtAction(Op, MVT::nxv8i16, MVT::nxv8i8, Legal);
}
// SVE supports truncating stores of 64 and 128-bit vectors
setTruncStoreAction(MVT::v2i64, MVT::v2i8, Custom);
setTruncStoreAction(MVT::v2i64, MVT::v2i16, Custom);
setTruncStoreAction(MVT::v2i64, MVT::v2i32, Custom);
setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
MVT::nxv4f32, MVT::nxv2f64}) {
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::FADD, VT, Custom);
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
setOperationAction(ISD::FDIV, VT, Custom);
setOperationAction(ISD::FMA, VT, Custom);
setOperationAction(ISD::FMAXIMUM, VT, Custom);
setOperationAction(ISD::FMAXNUM, VT, Custom);
setOperationAction(ISD::FMINIMUM, VT, Custom);
setOperationAction(ISD::FMINNUM, VT, Custom);
setOperationAction(ISD::FMUL, VT, Custom);
setOperationAction(ISD::FNEG, VT, Custom);
setOperationAction(ISD::FSUB, VT, Custom);
setOperationAction(ISD::FCEIL, VT, Custom);
setOperationAction(ISD::FFLOOR, VT, Custom);
setOperationAction(ISD::FNEARBYINT, VT, Custom);
setOperationAction(ISD::FRINT, VT, Custom);
setOperationAction(ISD::FROUND, VT, Custom);
setOperationAction(ISD::FROUNDEVEN, VT, Custom);
setOperationAction(ISD::FTRUNC, VT, Custom);
setOperationAction(ISD::FSQRT, VT, Custom);
setOperationAction(ISD::FABS, VT, Custom);
setOperationAction(ISD::FP_EXTEND, VT, Custom);
setOperationAction(ISD::FP_ROUND, VT, Custom);
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::FPOWI, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FSINCOS, VT, Expand);
setOperationAction(ISD::FEXP, VT, Expand);
setOperationAction(ISD::FEXP2, VT, Expand);
setOperationAction(ISD::FLOG, VT, Expand);
setOperationAction(ISD::FLOG2, VT, Expand);
setOperationAction(ISD::FLOG10, VT, Expand);
setCondCodeAction(ISD::SETO, VT, Expand);
setCondCodeAction(ISD::SETOLT, VT, Expand);
setCondCodeAction(ISD::SETLT, VT, Expand);
setCondCodeAction(ISD::SETOLE, VT, Expand);
setCondCodeAction(ISD::SETLE, VT, Expand);
setCondCodeAction(ISD::SETULT, VT, Expand);
setCondCodeAction(ISD::SETULE, VT, Expand);
setCondCodeAction(ISD::SETUGE, VT, Expand);
setCondCodeAction(ISD::SETUGT, VT, Expand);
setCondCodeAction(ISD::SETUEQ, VT, Expand);
setCondCodeAction(ISD::SETONE, VT, Expand);
}
for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
}
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
// NEON doesn't support integer divides, but SVE does
for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
setOperationAction(ISD::SDIV, VT, Custom);
setOperationAction(ISD::UDIV, VT, Custom);
}
// NEON doesn't support 64-bit vector integer muls, but SVE does.
setOperationAction(ISD::MUL, MVT::v1i64, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
// NOTE: Currently this has to happen after computeRegisterProperties rather
// than the preferred option of combining it with the addRegisterClass call.
if (Subtarget->useSVEForFixedLengthVectors()) {
for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
if (useSVEForFixedLengthVectorVT(VT))
addTypeForFixedLengthSVE(VT);
for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
if (useSVEForFixedLengthVectorVT(VT))
addTypeForFixedLengthSVE(VT);
// 64bit results can mean a bigger than NEON input.
for (auto VT : {MVT::v8i8, MVT::v4i16})
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom);
// 128bit results imply a bigger than NEON input.
for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
setOperationAction(ISD::TRUNCATE, VT, Custom);
for (auto VT : {MVT::v8f16, MVT::v4f32})
setOperationAction(ISD::FP_ROUND, VT, Custom);
// These operations are not supported on NEON but SVE can do them.
setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
setOperationAction(ISD::CTLZ, MVT::v1i64, Custom);
setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
setOperationAction(ISD::MULHS, MVT::v1i64, Custom);
setOperationAction(ISD::MULHS, MVT::v2i64, Custom);
setOperationAction(ISD::MULHU, MVT::v1i64, Custom);
setOperationAction(ISD::MULHU, MVT::v2i64, Custom);
setOperationAction(ISD::SMAX, MVT::v1i64, Custom);
setOperationAction(ISD::SMAX, MVT::v2i64, Custom);
setOperationAction(ISD::SMIN, MVT::v1i64, Custom);
setOperationAction(ISD::SMIN, MVT::v2i64, Custom);
setOperationAction(ISD::UMAX, MVT::v1i64, Custom);
setOperationAction(ISD::UMAX, MVT::v2i64, Custom);
setOperationAction(ISD::UMIN, MVT::v1i64, Custom);
setOperationAction(ISD::UMIN, MVT::v2i64, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, MVT::v2i64, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, MVT::v2i64, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, MVT::v2i64, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, MVT::v2i64, Custom);
// Int operations with no NEON support.
for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
MVT::v2i32, MVT::v4i32, MVT::v2i64}) {
setOperationAction(ISD::BITREVERSE, VT, Custom);
setOperationAction(ISD::CTTZ, VT, Custom);
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
}
// FP operations with no NEON support.
for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32,
MVT::v1f64, MVT::v2f64})
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
// Use SVE for vectors with more than 2 elements.
for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32})
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
}
setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv2i1, MVT::nxv2i64);
setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv4i1, MVT::nxv4i32);
setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv8i1, MVT::nxv8i16);
setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv16i1, MVT::nxv16i8);
setOperationAction(ISD::VSCALE, MVT::i32, Custom);
}
if (Subtarget->hasMOPS() && Subtarget->hasMTE()) {
// Only required for llvm.aarch64.mops.memset.tag
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
}
PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
IsStrictFPEnabled = true;
}
void AArch64TargetLowering::addTypeForNEON(MVT VT) {
assert(VT.isVector() && "VT should be a vector type");
if (VT.isFloatingPoint()) {
MVT PromoteTo = EVT(VT).changeVectorElementTypeToInteger().getSimpleVT();
setOperationPromotedToType(ISD::LOAD, VT, PromoteTo);
setOperationPromotedToType(ISD::STORE, VT, PromoteTo);
}
// Mark vector float intrinsics as expand.
if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::FLOG, VT, Expand);
setOperationAction(ISD::FLOG2, VT, Expand);
setOperationAction(ISD::FLOG10, VT, Expand);
setOperationAction(ISD::FEXP, VT, Expand);
setOperationAction(ISD::FEXP2, VT, Expand);
}
// But we do support custom-lowering for FCOPYSIGN.
if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
((VT == MVT::v4f16 || VT == MVT::v8f16) && Subtarget->hasFullFP16()))
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::OR, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
setOperationAction(ISD::SELECT, VT, Expand);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::VSELECT, VT, Expand);
for (MVT InnerVT : MVT::all_valuetypes())
setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
// CNT supports only B element sizes, then use UADDLP to widen.
if (VT != MVT::v8i8 && VT != MVT::v16i8)
setOperationAction(ISD::CTPOP, VT, Custom);
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::SDIV, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
for (unsigned Opcode :
{ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::FP_TO_SINT_SAT,
ISD::FP_TO_UINT_SAT, ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT})
setOperationAction(Opcode, VT, Custom);
if (!VT.isFloatingPoint())
setOperationAction(ISD::ABS, VT, Legal);
// [SU][MIN|MAX] are available for all NEON types apart from i64.
if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
setOperationAction(Opcode, VT, Legal);
// F[MIN|MAX][NUM|NAN] and simple strict operations are available for all FP
// NEON types.
if (VT.isFloatingPoint() &&
VT.getVectorElementType() != MVT::bf16 &&
(VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
for (unsigned Opcode :
{ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM,
ISD::STRICT_FMINIMUM, ISD::STRICT_FMAXIMUM, ISD::STRICT_FMINNUM,
ISD::STRICT_FMAXNUM, ISD::STRICT_FADD, ISD::STRICT_FSUB,
ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FMA,
ISD::STRICT_FSQRT})
setOperationAction(Opcode, VT, Legal);
// Strict fp extend and trunc are legal
if (VT.isFloatingPoint() && VT.getScalarSizeInBits() != 16)
setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal);
if (VT.isFloatingPoint() && VT.getScalarSizeInBits() != 64)
setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal);
// FIXME: We could potentially make use of the vector comparison instructions
// for STRICT_FSETCC and STRICT_FSETCSS, but there's a number of
// complications:
// * FCMPEQ/NE are quiet comparisons, the rest are signalling comparisons,
// so we would need to expand when the condition code doesn't match the
// kind of comparison.
// * Some kinds of comparison require more than one FCMXY instruction so
// would need to be expanded instead.
// * The lowering of the non-strict versions involves target-specific ISD
// nodes so we would likely need to add strict versions of all of them and
// handle them appropriately.
setOperationAction(ISD::STRICT_FSETCC, VT, Expand);
setOperationAction(ISD::STRICT_FSETCCS, VT, Expand);
if (Subtarget->isLittleEndian()) {
for (unsigned im = (unsigned)ISD::PRE_INC;
im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
setIndexedLoadAction(im, VT, Legal);
setIndexedStoreAction(im, VT, Legal);
}
}
}
bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT,
EVT OpVT) const {
// Only SVE has a 1:1 mapping from intrinsic -> instruction (whilelo).
if (!Subtarget->hasSVE())
return true;
// We can only support legal predicate result types. We can use the SVE
// whilelo instruction for generating fixed-width predicates too.
if (ResVT != MVT::nxv2i1 && ResVT != MVT::nxv4i1 && ResVT != MVT::nxv8i1 &&
ResVT != MVT::nxv16i1 && ResVT != MVT::v2i1 && ResVT != MVT::v4i1 &&
ResVT != MVT::v8i1 && ResVT != MVT::v16i1)
return true;
// The whilelo instruction only works with i32 or i64 scalar inputs.
if (OpVT != MVT::i32 && OpVT != MVT::i64)
return true;
return false;
}
void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
// By default everything must be expanded.
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
setOperationAction(Op, VT, Expand);
// We use EXTRACT_SUBVECTOR to "cast" a scalable vector to a fixed length one.
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
if (VT.isFloatingPoint()) {
setCondCodeAction(ISD::SETO, VT, Expand);
setCondCodeAction(ISD::SETOLT, VT, Expand);
setCondCodeAction(ISD::SETLT, VT, Expand);
setCondCodeAction(ISD::SETOLE, VT, Expand);
setCondCodeAction(ISD::SETLE, VT, Expand);
setCondCodeAction(ISD::SETULT, VT, Expand);
setCondCodeAction(ISD::SETULE, VT, Expand);
setCondCodeAction(ISD::SETUGE, VT, Expand);
setCondCodeAction(ISD::SETUGT, VT, Expand);
setCondCodeAction(ISD::SETUEQ, VT, Expand);
setCondCodeAction(ISD::SETONE, VT, Expand);
}
// Mark integer truncating stores/extending loads as having custom lowering
if (VT.isInteger()) {
MVT InnerVT = VT.changeVectorElementType(MVT::i8);
while (InnerVT != VT) {
setTruncStoreAction(VT, InnerVT, Custom);
setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Custom);
setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Custom);
InnerVT = InnerVT.changeVectorElementType(
MVT::getIntegerVT(2 * InnerVT.getScalarSizeInBits()));
}
}
// Mark floating-point truncating stores/extending loads as having custom
// lowering
if (VT.isFloatingPoint()) {
MVT InnerVT = VT.changeVectorElementType(MVT::f16);
while (InnerVT != VT) {
setTruncStoreAction(VT, InnerVT, Custom);
setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Custom);
InnerVT = InnerVT.changeVectorElementType(
MVT::getFloatingPointVT(2 * InnerVT.getScalarSizeInBits()));
}
}
// Lower fixed length vector operations to scalable equivalents.
setOperationAction(ISD::ABS, VT, Custom);
setOperationAction(ISD::ADD, VT, Custom);
setOperationAction(ISD::AND, VT, Custom);
setOperationAction(ISD::ANY_EXTEND, VT, Custom);
setOperationAction(ISD::BITCAST, VT, Custom);
setOperationAction(ISD::BITREVERSE, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::BSWAP, VT, Custom);
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::CTLZ, VT, Custom);
setOperationAction(ISD::CTPOP, VT, Custom);
setOperationAction(ISD::CTTZ, VT, Custom);
setOperationAction(ISD::FABS, VT, Custom);
setOperationAction(ISD::FADD, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::FCEIL, VT, Custom);
setOperationAction(ISD::FDIV, VT, Custom);
setOperationAction(ISD::FFLOOR, VT, Custom);
setOperationAction(ISD::FMA, VT, Custom);
setOperationAction(ISD::FMAXIMUM, VT, Custom);
setOperationAction(ISD::FMAXNUM, VT, Custom);
setOperationAction(ISD::FMINIMUM, VT, Custom);
setOperationAction(ISD::FMINNUM, VT, Custom);
setOperationAction(ISD::FMUL, VT, Custom);
setOperationAction(ISD::FNEARBYINT, VT, Custom);
setOperationAction(ISD::FNEG, VT, Custom);
setOperationAction(ISD::FP_EXTEND, VT, Custom);
setOperationAction(ISD::FP_ROUND, VT, Custom);
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
setOperationAction(ISD::FRINT, VT, Custom);
setOperationAction(ISD::FROUND, VT, Custom);
setOperationAction(ISD::FROUNDEVEN, VT, Custom);
setOperationAction(ISD::FSQRT, VT, Custom);
setOperationAction(ISD::FSUB, VT, Custom);
setOperationAction(ISD::FTRUNC, VT, Custom);
setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
setOperationAction(ISD::MSTORE, VT, Custom);
setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::MULHS, VT, Custom);
setOperationAction(ISD::MULHU, VT, Custom);
setOperationAction(ISD::OR, VT, Custom);
setOperationAction(ISD::SDIV, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
setOperationAction(ISD::SINT_TO_FP, VT, Custom);
setOperationAction(ISD::SMAX, VT, Custom);
setOperationAction(ISD::SMIN, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
setOperationAction(ISD::SUB, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::UDIV, VT, Custom);
setOperationAction(ISD::UINT_TO_FP, VT, Custom);
setOperationAction(ISD::UMAX, VT, Custom);
setOperationAction(ISD::UMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Custom);
setOperationAction(ISD::XOR, VT, Custom);
setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
}
void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
addRegisterClass(VT, &AArch64::FPR64RegClass);
addTypeForNEON(VT);
}
void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
addRegisterClass(VT, &AArch64::FPR128RegClass);
addTypeForNEON(VT);
}
EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &,
LLVMContext &C, EVT VT) const {
if (!VT.isVector())
return MVT::i32;
if (VT.isScalableVector())
return EVT::getVectorVT(C, MVT::i1, VT.getVectorElementCount());
return VT.changeVectorElementTypeToInteger();
}
static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
const APInt &Demanded,
TargetLowering::TargetLoweringOpt &TLO,
unsigned NewOpc) {
uint64_t OldImm = Imm, NewImm, Enc;
uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;
// Return if the immediate is already all zeros, all ones, a bimm32 or a
// bimm64.
if (Imm == 0 || Imm == Mask ||
AArch64_AM::isLogicalImmediate(Imm & Mask, Size))
return false;
unsigned EltSize = Size;
uint64_t DemandedBits = Demanded.getZExtValue();
// Clear bits that are not demanded.
Imm &= DemandedBits;
while (true) {
// The goal here is to set the non-demanded bits in a way that minimizes
// the number of switching between 0 and 1. In order to achieve this goal,
// we set the non-demanded bits to the value of the preceding demanded bits.
// For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
// non-demanded bit), we copy bit0 (1) to the least significant 'x',
// bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
// The final result is 0b11000011.
uint64_t NonDemandedBits = ~DemandedBits;
uint64_t InvertedImm = ~Imm & DemandedBits;
uint64_t RotatedImm =
((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
NonDemandedBits;
uint64_t Sum = RotatedImm + NonDemandedBits;
bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
uint64_t Ones = (Sum + Carry) & NonDemandedBits;
NewImm = (Imm | Ones) & Mask;
// If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
// or all-ones or all-zeros, in which case we can stop searching. Otherwise,
// we halve the element size and continue the search.
if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask)))
break;
// We cannot shrink the element size any further if it is 2-bits.
if (EltSize == 2)
return false;
EltSize /= 2;
Mask >>= EltSize;
uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;
// Return if there is mismatch in any of the demanded bits of Imm and Hi.
if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
return false;
// Merge the upper and lower halves of Imm and DemandedBits.
Imm |= Hi;
DemandedBits |= DemandedBitsHi;
}
++NumOptimizedImms;
// Replicate the element across the register width.
while (EltSize < Size) {
NewImm |= NewImm << EltSize;
EltSize *= 2;
}
(void)OldImm;
assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
"demanded bits should never be altered");
assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm");
// Create the new constant immediate node.
EVT VT = Op.getValueType();
SDLoc DL(Op);
SDValue New;
// If the new constant immediate is all-zeros or all-ones, let the target
// independent DAG combine optimize this node.
if (NewImm == 0 || NewImm == OrigMask) {
New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
TLO.DAG.getConstant(NewImm, DL, VT));
// Otherwise, create a machine node so that target independent DAG combine
// doesn't undo this optimization.
} else {
Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
New = SDValue(
TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
}
return TLO.CombineTo(Op, New);
}
bool AArch64TargetLowering::targetShrinkDemandedConstant(
SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
TargetLoweringOpt &TLO) const {
// Delay this optimization to as late as possible.
if (!TLO.LegalOps)
return false;
if (!EnableOptimizeLogicalImm)
return false;
EVT VT = Op.getValueType();
if (VT.isVector())
return false;
unsigned Size = VT.getSizeInBits();
assert((Size == 32 || Size == 64) &&
"i32 or i64 is expected after legalization.");
// Exit early if we demand all bits.
if (DemandedBits.countPopulation() == Size)
return false;
unsigned NewOpc;
switch (Op.getOpcode()) {
default:
return false;
case ISD::AND:
NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
break;
case ISD::OR:
NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
break;
case ISD::XOR:
NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
break;
}
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
if (!C)
return false;
uint64_t Imm = C->getZExtValue();
return optimizeLogicalImm(Op, Size, Imm, DemandedBits, TLO, NewOpc);
}
/// computeKnownBitsForTargetNode - Determine which of the bits specified in
/// Mask are known to be either zero or one and return them Known.
void AArch64TargetLowering::computeKnownBitsForTargetNode(
const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
const SelectionDAG &DAG, unsigned Depth) const {
switch (Op.getOpcode()) {
default:
break;
case AArch64ISD::DUP: {
SDValue SrcOp = Op.getOperand(0);
Known = DAG.computeKnownBits(SrcOp, Depth + 1);
if (SrcOp.getValueSizeInBits() != Op.getScalarValueSizeInBits()) {
assert(SrcOp.getValueSizeInBits() > Op.getScalarValueSizeInBits() &&
"Expected DUP implicit truncation");
Known = Known.trunc(Op.getScalarValueSizeInBits());
}
break;
}
case AArch64ISD::CSEL: {
KnownBits Known2;
Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
Known = KnownBits::commonBits(Known, Known2);
break;
}
case AArch64ISD::BICi: {
// Compute the bit cleared value.
uint64_t Mask =
~(Op->getConstantOperandVal(1) << Op->getConstantOperandVal(2));
Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
Known &= KnownBits::makeConstant(APInt(Known.getBitWidth(), Mask));
break;
}
case AArch64ISD::VLSHR: {
KnownBits Known2;
Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
Known = KnownBits::lshr(Known, Known2);
break;
}
case AArch64ISD::VASHR: {
KnownBits Known2;
Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
Known = KnownBits::ashr(Known, Known2);
break;
}
case AArch64ISD::LOADgot:
case AArch64ISD::ADDlow: {
if (!Subtarget->isTargetILP32())
break;
// In ILP32 mode all valid pointers are in the low 4GB of the address-space.
Known.Zero = APInt::getHighBitsSet(64, 32);
break;
}
case AArch64ISD::ASSERT_ZEXT_BOOL: {
Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
Known.Zero |= APInt(Known.getBitWidth(), 0xFE);
break;
}
case ISD::INTRINSIC_W_CHAIN: {
ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
switch (IntID) {
default: return;
case Intrinsic::aarch64_ldaxr:
case Intrinsic::aarch64_ldxr: {
unsigned BitWidth = Known.getBitWidth();
EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
unsigned MemBits = VT.getScalarSizeInBits();
Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
return;
}
}
break;
}
case ISD::INTRINSIC_WO_CHAIN:
case ISD::INTRINSIC_VOID: {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
switch (IntNo) {
default:
break;
case Intrinsic::aarch64_neon_umaxv:
case Intrinsic::aarch64_neon_uminv: {
// Figure out the datatype of the vector operand. The UMINV instruction
// will zero extend the result, so we can mark as known zero all the
// bits larger than the element datatype. 32-bit or larget doesn't need
// this as those are legal types and will be handled by isel directly.
MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
unsigned BitWidth = Known.getBitWidth();
if (VT == MVT::v8i8 || VT == MVT::v16i8) {
assert(BitWidth >= 8 && "Unexpected width!");
APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
Known.Zero |= Mask;
} else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
assert(BitWidth >= 16 && "Unexpected width!");
APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
Known.Zero |= Mask;
}
break;
} break;
}
}
}
}
MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
EVT) const {
return MVT::i64;
}
bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
bool *Fast) const {
if (Subtarget->requiresStrictAlign())
return false;
if (Fast) {
// Some CPUs are fine with unaligned stores except for 128-bit ones.
*Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||
// See comments in performSTORECombine() for more details about
// these conditions.
// Code that uses clang vector extensions can mark that it
// wants unaligned accesses to be treated as fast by
// underspecifying alignment to be 1 or 2.
Alignment <= 2 ||
// Disregard v2i64. Memcpy lowering produces those and splitting
// them regresses performance on micro-benchmarks and olden/bh.
VT == MVT::v2i64;
}
return true;
}
// Same as above but handling LLTs instead.
bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
LLT Ty, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
bool *Fast) const {
if (Subtarget->requiresStrictAlign())
return false;
if (Fast) {
// Some CPUs are fine with unaligned stores except for 128-bit ones.
*Fast = !Subtarget->isMisaligned128StoreSlow() ||
Ty.getSizeInBytes() != 16 ||
// See comments in performSTORECombine() for more details about
// these conditions.
// Code that uses clang vector extensions can mark that it
// wants unaligned accesses to be treated as fast by
// underspecifying alignment to be 1 or 2.
Alignment <= 2 ||
// Disregard v2i64. Memcpy lowering produces those and splitting
// them regresses performance on micro-benchmarks and olden/bh.
Ty == LLT::fixed_vector(2, 64);
}
return true;
}
FastISel *
AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) const {
return AArch64::createFastISel(funcInfo, libInfo);
}
const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
#define MAKE_CASE(V) \
case V: \
return #V;
switch ((AArch64ISD::NodeType)Opcode) {
case AArch64ISD::FIRST_NUMBER:
break;
MAKE_CASE(AArch64ISD::CALL)
MAKE_CASE(AArch64ISD::ADRP)
MAKE_CASE(AArch64ISD::ADR)
MAKE_CASE(AArch64ISD::ADDlow)
MAKE_CASE(AArch64ISD::LOADgot)
MAKE_CASE(AArch64ISD::RET_FLAG)
MAKE_CASE(AArch64ISD::BRCOND)
MAKE_CASE(AArch64ISD::CSEL)
MAKE_CASE(AArch64ISD::CSINV)
MAKE_CASE(AArch64ISD::CSNEG)
MAKE_CASE(AArch64ISD::CSINC)
MAKE_CASE(AArch64ISD::THREAD_POINTER)
MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)
MAKE_CASE(AArch64ISD::ABDS_PRED)
MAKE_CASE(AArch64ISD::ABDU_PRED)
MAKE_CASE(AArch64ISD::MUL_PRED)
MAKE_CASE(AArch64ISD::MULHS_PRED)
MAKE_CASE(AArch64ISD::MULHU_PRED)
MAKE_CASE(AArch64ISD::SDIV_PRED)
MAKE_CASE(AArch64ISD::SHL_PRED)
MAKE_CASE(AArch64ISD::SMAX_PRED)
MAKE_CASE(AArch64ISD::SMIN_PRED)
MAKE_CASE(AArch64ISD::SRA_PRED)
MAKE_CASE(AArch64ISD::SRL_PRED)
MAKE_CASE(AArch64ISD::UDIV_PRED)
MAKE_CASE(AArch64ISD::UMAX_PRED)
MAKE_CASE(AArch64ISD::UMIN_PRED)
MAKE_CASE(AArch64ISD::SRAD_MERGE_OP1)
MAKE_CASE(AArch64ISD::FNEG_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FCEIL_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FFLOOR_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FRINT_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FROUND_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FTRUNC_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FP_ROUND_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FCVTZU_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FCVTZS_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FSQRT_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FRECPX_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FABS_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::ABS_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::NEG_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO)
MAKE_CASE(AArch64ISD::ADC)
MAKE_CASE(AArch64ISD::SBC)
MAKE_CASE(AArch64ISD::ADDS)
MAKE_CASE(AArch64ISD::SUBS)
MAKE_CASE(AArch64ISD::ADCS)
MAKE_CASE(AArch64ISD::SBCS)
MAKE_CASE(AArch64ISD::ANDS)
MAKE_CASE(AArch64ISD::CCMP)
MAKE_CASE(AArch64ISD::CCMN)
MAKE_CASE(AArch64ISD::FCCMP)
MAKE_CASE(AArch64ISD::FCMP)
MAKE_CASE(AArch64ISD::STRICT_FCMP)
MAKE_CASE(AArch64ISD::STRICT_FCMPE)
MAKE_CASE(AArch64ISD::DUP)
MAKE_CASE(AArch64ISD::DUPLANE8)
MAKE_CASE(AArch64ISD::DUPLANE16)
MAKE_CASE(AArch64ISD::DUPLANE32)
MAKE_CASE(AArch64ISD::DUPLANE64)
MAKE_CASE(AArch64ISD::DUPLANE128)
MAKE_CASE(AArch64ISD::MOVI)
MAKE_CASE(AArch64ISD::MOVIshift)
MAKE_CASE(AArch64ISD::MOVIedit)
MAKE_CASE(AArch64ISD::MOVImsl)
MAKE_CASE(AArch64ISD::FMOV)
MAKE_CASE(AArch64ISD::MVNIshift)
MAKE_CASE(AArch64ISD::MVNImsl)
MAKE_CASE(AArch64ISD::BICi)
MAKE_CASE(AArch64ISD::ORRi)
MAKE_CASE(AArch64ISD::BSP)
MAKE_CASE(AArch64ISD::EXTR)
MAKE_CASE(AArch64ISD::ZIP1)
MAKE_CASE(AArch64ISD::ZIP2)
MAKE_CASE(AArch64ISD::UZP1)
MAKE_CASE(AArch64ISD::UZP2)
MAKE_CASE(AArch64ISD::TRN1)
MAKE_CASE(AArch64ISD::TRN2)
MAKE_CASE(AArch64ISD::REV16)
MAKE_CASE(AArch64ISD::REV32)
MAKE_CASE(AArch64ISD::REV64)
MAKE_CASE(AArch64ISD::EXT)
MAKE_CASE(AArch64ISD::SPLICE)
MAKE_CASE(AArch64ISD::VSHL)
MAKE_CASE(AArch64ISD::VLSHR)
MAKE_CASE(AArch64ISD::VASHR)
MAKE_CASE(AArch64ISD::VSLI)
MAKE_CASE(AArch64ISD::VSRI)
MAKE_CASE(AArch64ISD::CMEQ)
MAKE_CASE(AArch64ISD::CMGE)
MAKE_CASE(AArch64ISD::CMGT)
MAKE_CASE(AArch64ISD::CMHI)
MAKE_CASE(AArch64ISD::CMHS)
MAKE_CASE(AArch64ISD::FCMEQ)
MAKE_CASE(AArch64ISD::FCMGE)
MAKE_CASE(AArch64ISD::FCMGT)
MAKE_CASE(AArch64ISD::CMEQz)
MAKE_CASE(AArch64ISD::CMGEz)
MAKE_CASE(AArch64ISD::CMGTz)
MAKE_CASE(AArch64ISD::CMLEz)
MAKE_CASE(AArch64ISD::CMLTz)
MAKE_CASE(AArch64ISD::FCMEQz)
MAKE_CASE(AArch64ISD::FCMGEz)
MAKE_CASE(AArch64ISD::FCMGTz)
MAKE_CASE(AArch64ISD::FCMLEz)
MAKE_CASE(AArch64ISD::FCMLTz)
MAKE_CASE(AArch64ISD::SADDV)
MAKE_CASE(AArch64ISD::UADDV)
MAKE_CASE(AArch64ISD::SDOT)
MAKE_CASE(AArch64ISD::UDOT)
MAKE_CASE(AArch64ISD::SMINV)
MAKE_CASE(AArch64ISD::UMINV)
MAKE_CASE(AArch64ISD::SMAXV)
MAKE_CASE(AArch64ISD::UMAXV)
MAKE_CASE(AArch64ISD::SADDV_PRED)
MAKE_CASE(AArch64ISD::UADDV_PRED)
MAKE_CASE(AArch64ISD::SMAXV_PRED)
MAKE_CASE(AArch64ISD::UMAXV_PRED)
MAKE_CASE(AArch64ISD::SMINV_PRED)
MAKE_CASE(AArch64ISD::UMINV_PRED)
MAKE_CASE(AArch64ISD::ORV_PRED)
MAKE_CASE(AArch64ISD::EORV_PRED)
MAKE_CASE(AArch64ISD::ANDV_PRED)
MAKE_CASE(AArch64ISD::CLASTA_N)
MAKE_CASE(AArch64ISD::CLASTB_N)
MAKE_CASE(AArch64ISD::LASTA)
MAKE_CASE(AArch64ISD::LASTB)
MAKE_CASE(AArch64ISD::REINTERPRET_CAST)
MAKE_CASE(AArch64ISD::LS64_BUILD)
MAKE_CASE(AArch64ISD::LS64_EXTRACT)
MAKE_CASE(AArch64ISD::TBL)
MAKE_CASE(AArch64ISD::FADD_PRED)
MAKE_CASE(AArch64ISD::FADDA_PRED)
MAKE_CASE(AArch64ISD::FADDV_PRED)
MAKE_CASE(AArch64ISD::FDIV_PRED)
MAKE_CASE(AArch64ISD::FMA_PRED)
MAKE_CASE(AArch64ISD::FMAX_PRED)
MAKE_CASE(AArch64ISD::FMAXV_PRED)
MAKE_CASE(AArch64ISD::FMAXNM_PRED)
MAKE_CASE(AArch64ISD::FMAXNMV_PRED)
MAKE_CASE(AArch64ISD::FMIN_PRED)
MAKE_CASE(AArch64ISD::FMINV_PRED)
MAKE_CASE(AArch64ISD::FMINNM_PRED)
MAKE_CASE(AArch64ISD::FMINNMV_PRED)
MAKE_CASE(AArch64ISD::FMUL_PRED)
MAKE_CASE(AArch64ISD::FSUB_PRED)
MAKE_CASE(AArch64ISD::RDSVL)
MAKE_CASE(AArch64ISD::BIC)
MAKE_CASE(AArch64ISD::BIT)
MAKE_CASE(AArch64ISD::CBZ)
MAKE_CASE(AArch64ISD::CBNZ)
MAKE_CASE(AArch64ISD::TBZ)
MAKE_CASE(AArch64ISD::TBNZ)
MAKE_CASE(AArch64ISD::TC_RETURN)
MAKE_CASE(AArch64ISD::PREFETCH)
MAKE_CASE(AArch64ISD::SITOF)
MAKE_CASE(AArch64ISD::UITOF)
MAKE_CASE(AArch64ISD::NVCAST)
MAKE_CASE(AArch64ISD::MRS)
MAKE_CASE(AArch64ISD::SQSHL_I)
MAKE_CASE(AArch64ISD::UQSHL_I)
MAKE_CASE(AArch64ISD::SRSHR_I)
MAKE_CASE(AArch64ISD::URSHR_I)
MAKE_CASE(AArch64ISD::SQSHLU_I)
MAKE_CASE(AArch64ISD::WrapperLarge)
MAKE_CASE(AArch64ISD::LD2post)
MAKE_CASE(AArch64ISD::LD3post)
MAKE_CASE(AArch64ISD::LD4post)
MAKE_CASE(AArch64ISD::ST2post)
MAKE_CASE(AArch64ISD::ST3post)
MAKE_CASE(AArch64ISD::ST4post)
MAKE_CASE(AArch64ISD::LD1x2post)
MAKE_CASE(AArch64ISD::LD1x3post)
MAKE_CASE(AArch64ISD::LD1x4post)
MAKE_CASE(AArch64ISD::ST1x2post)
MAKE_CASE(AArch64ISD::ST1x3post)
MAKE_CASE(AArch64ISD::ST1x4post)
MAKE_CASE(AArch64ISD::LD1DUPpost)
MAKE_CASE(AArch64ISD::LD2DUPpost)
MAKE_CASE(AArch64ISD::LD3DUPpost)
MAKE_CASE(AArch64ISD::LD4DUPpost)
MAKE_CASE(AArch64ISD::LD1LANEpost)
MAKE_CASE(AArch64ISD::LD2LANEpost)
MAKE_CASE(AArch64ISD::LD3LANEpost)
MAKE_CASE(AArch64ISD::LD4LANEpost)
MAKE_CASE(AArch64ISD::ST2LANEpost)
MAKE_CASE(AArch64ISD::ST3LANEpost)
MAKE_CASE(AArch64ISD::ST4LANEpost)
MAKE_CASE(AArch64ISD::SMULL)
MAKE_CASE(AArch64ISD::UMULL)
MAKE_CASE(AArch64ISD::FRECPE)
MAKE_CASE(AArch64ISD::FRECPS)
MAKE_CASE(AArch64ISD::FRSQRTE)
MAKE_CASE(AArch64ISD::FRSQRTS)
MAKE_CASE(AArch64ISD::STG)
MAKE_CASE(AArch64ISD::STZG)
MAKE_CASE(AArch64ISD::ST2G)
MAKE_CASE(AArch64ISD::STZ2G)
MAKE_CASE(AArch64ISD::SUNPKHI)
MAKE_CASE(AArch64ISD::SUNPKLO)
MAKE_CASE(AArch64ISD::UUNPKHI)
MAKE_CASE(AArch64ISD::UUNPKLO)
MAKE_CASE(AArch64ISD::INSR)
MAKE_CASE(AArch64ISD::PTEST)
MAKE_CASE(AArch64ISD::PTRUE)
MAKE_CASE(AArch64ISD::LD1_MERGE_ZERO)
MAKE_CASE(AArch64ISD::LD1S_MERGE_ZERO)
MAKE_CASE(AArch64ISD::LDNF1_MERGE_ZERO)
MAKE_CASE(AArch64ISD::LDNF1S_MERGE_ZERO)
MAKE_CASE(AArch64ISD::LDFF1_MERGE_ZERO)
MAKE_CASE(AArch64ISD::LDFF1S_MERGE_ZERO)
MAKE_CASE(AArch64ISD::LD1RQ_MERGE_ZERO)
MAKE_CASE(AArch64ISD::LD1RO_MERGE_ZERO)
MAKE_CASE(AArch64ISD::SVE_LD2_MERGE_ZERO)
MAKE_CASE(AArch64ISD::SVE_LD3_MERGE_ZERO)
MAKE_CASE(AArch64ISD::SVE_LD4_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1_SXTW_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1_UXTW_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1_IMM_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1S_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1S_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1S_SXTW_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1S_UXTW_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1S_IMM_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1_SXTW_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1_UXTW_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1_IMM_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1S_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1S_IMM_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDNT1_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDNT1_INDEX_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDNT1S_MERGE_ZERO)
MAKE_CASE(AArch64ISD::ST1_PRED)
MAKE_CASE(AArch64ISD::SST1_PRED)
MAKE_CASE(AArch64ISD::SST1_SCALED_PRED)
MAKE_CASE(AArch64ISD::SST1_SXTW_PRED)
MAKE_CASE(AArch64ISD::SST1_UXTW_PRED)
MAKE_CASE(AArch64ISD::SST1_SXTW_SCALED_PRED)
MAKE_CASE(AArch64ISD::SST1_UXTW_SCALED_PRED)
MAKE_CASE(AArch64ISD::SST1_IMM_PRED)
MAKE_CASE(AArch64ISD::SSTNT1_PRED)
MAKE_CASE(AArch64ISD::SSTNT1_INDEX_PRED)
MAKE_CASE(AArch64ISD::LDP)
MAKE_CASE(AArch64ISD::STP)
MAKE_CASE(AArch64ISD::STNP)
MAKE_CASE(AArch64ISD::BITREVERSE_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::BSWAP_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::REVH_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::REVW_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::REVD_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::CTLZ_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::CTPOP_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::DUP_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::INDEX_VECTOR)
MAKE_CASE(AArch64ISD::ADDP)
MAKE_CASE(AArch64ISD::SADDLP)
MAKE_CASE(AArch64ISD::UADDLP)
MAKE_CASE(AArch64ISD::CALL_RVMARKER)
MAKE_CASE(AArch64ISD::ASSERT_ZEXT_BOOL)
MAKE_CASE(AArch64ISD::MOPS_MEMSET)
MAKE_CASE(AArch64ISD::MOPS_MEMSET_TAGGING)
MAKE_CASE(AArch64ISD::MOPS_MEMCOPY)
MAKE_CASE(AArch64ISD::MOPS_MEMMOVE)
MAKE_CASE(AArch64ISD::CALL_BTI)
}
#undef MAKE_CASE
return nullptr;
}
MachineBasicBlock *
AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
MachineBasicBlock *MBB) const {
// We materialise the F128CSEL pseudo-instruction as some control flow and a
// phi node:
// OrigBB:
// [... previous instrs leading to comparison ...]
// b.ne TrueBB
// b EndBB
// TrueBB:
// ; Fallthrough
// EndBB:
// Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
MachineFunction *MF = MBB->getParent();
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
DebugLoc DL = MI.getDebugLoc();
MachineFunction::iterator It = ++MBB->getIterator();
Register DestReg = MI.getOperand(0).getReg();
Register IfTrueReg = MI.getOperand(1).getReg();
Register IfFalseReg = MI.getOperand(2).getReg();
unsigned CondCode = MI.getOperand(3).getImm();
bool NZCVKilled = MI.getOperand(4).isKill();
MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
MF->insert(It, TrueBB);
MF->insert(It, EndBB);
// Transfer rest of current basic-block to EndBB
EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
MBB->end());
EndBB->transferSuccessorsAndUpdatePHIs(MBB);
BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
MBB->addSuccessor(TrueBB);
MBB->addSuccessor(EndBB);
// TrueBB falls through to the end.
TrueBB->addSuccessor(EndBB);
if (!NZCVKilled) {
TrueBB->addLiveIn(AArch64::NZCV);
EndBB->addLiveIn(AArch64::NZCV);
}
BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
.addReg(IfTrueReg)
.addMBB(TrueBB)
.addReg(IfFalseReg)
.addMBB(MBB);
MI.eraseFromParent();
return EndBB;
}
MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet(
MachineInstr &MI, MachineBasicBlock *BB) const {
assert(!isAsynchronousEHPersonality(classifyEHPersonality(
BB->getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!");
return BB;
}
MachineBasicBlock *
AArch64TargetLowering::EmitTileLoad(unsigned Opc, unsigned BaseReg,
MachineInstr &MI,
MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define);
MIB.add(MI.getOperand(1)); // slice index register
MIB.add(MI.getOperand(2)); // slice index offset
MIB.add(MI.getOperand(3)); // pg
MIB.add(MI.getOperand(4)); // base
MIB.add(MI.getOperand(5)); // offset
MI.eraseFromParent(); // The pseudo is gone now.
return BB;
}
MachineBasicBlock *
AArch64TargetLowering::EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
MachineInstrBuilder MIB =
BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::LDR_ZA));
MIB.addReg(AArch64::ZA, RegState::Define);
MIB.add(MI.getOperand(0)); // Vector select register
MIB.add(MI.getOperand(1)); // Vector select offset
MIB.add(MI.getOperand(2)); // Base
MIB.add(MI.getOperand(1)); // Offset, same as vector select offset
MI.eraseFromParent(); // The pseudo is gone now.
return BB;
}
MachineBasicBlock *
AArch64TargetLowering::EmitMopa(unsigned Opc, unsigned BaseReg,
MachineInstr &MI, MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define);
MIB.addReg(BaseReg + MI.getOperand(0).getImm());
MIB.add(MI.getOperand(1)); // pn
MIB.add(MI.getOperand(2)); // pm
MIB.add(MI.getOperand(3)); // zn
MIB.add(MI.getOperand(4)); // zm
MI.eraseFromParent(); // The pseudo is gone now.
return BB;
}
MachineBasicBlock *
AArch64TargetLowering::EmitInsertVectorToTile(unsigned Opc, unsigned BaseReg,
MachineInstr &MI,
MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define);
MIB.addReg(BaseReg + MI.getOperand(0).getImm());
MIB.add(MI.getOperand(1)); // Slice index register
MIB.add(MI.getOperand(2)); // Slice index offset
MIB.add(MI.getOperand(3)); // pg
MIB.add(MI.getOperand(4)); // zn
MI.eraseFromParent(); // The pseudo is gone now.
return BB;
}
MachineBasicBlock *
AArch64TargetLowering::EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
MachineInstrBuilder MIB =
BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::ZERO_M));
MIB.add(MI.getOperand(0)); // Mask
unsigned Mask = MI.getOperand(0).getImm();
for (unsigned I = 0; I < 8; I++) {
if (Mask & (1 << I))
MIB.addDef(AArch64::ZAD0 + I, RegState::ImplicitDefine);
}
MI.eraseFromParent(); // The pseudo is gone now.
return BB;
}
MachineBasicBlock *
AArch64TargetLowering::EmitAddVectorToTile(unsigned Opc, unsigned BaseReg,
MachineInstr &MI,
MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define);
MIB.addReg(BaseReg + MI.getOperand(0).getImm());
MIB.add(MI.getOperand(1)); // pn
MIB.add(MI.getOperand(2)); // pm
MIB.add(MI.getOperand(3)); // zn
MI.eraseFromParent(); // The pseudo is gone now.
return BB;
}
MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
MachineInstr &MI, MachineBasicBlock *BB) const {
switch (MI.getOpcode()) {
default:
#ifndef NDEBUG
MI.dump();
#endif
llvm_unreachable("Unexpected instruction for custom inserter!");
case AArch64::F128CSEL:
return EmitF128CSEL(MI, BB);
case TargetOpcode::STATEPOINT:
// STATEPOINT is a pseudo instruction which has no implicit defs/uses
// while bl call instruction (where statepoint will be lowered at the end)
// has implicit def. This def is early-clobber as it will be set at
// the moment of the call and earlier than any use is read.
// Add this implicit dead def here as a workaround.
MI.addOperand(*MI.getMF(),
MachineOperand::CreateReg(
AArch64::LR, /*isDef*/ true,
/*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
/*isUndef*/ false, /*isEarlyClobber*/ true));
LLVM_FALLTHROUGH;
case TargetOpcode::STACKMAP:
case TargetOpcode::PATCHPOINT:
return emitPatchPoint(MI, BB);
case AArch64::CATCHRET:
return EmitLoweredCatchRet(MI, BB);
case AArch64::LD1_MXIPXX_H_PSEUDO_B:
return EmitTileLoad(AArch64::LD1_MXIPXX_H_B, AArch64::ZAB0, MI, BB);
case AArch64::LD1_MXIPXX_H_PSEUDO_H:
return EmitTileLoad(AArch64::LD1_MXIPXX_H_H, AArch64::ZAH0, MI, BB);
case AArch64::LD1_MXIPXX_H_PSEUDO_S:
return EmitTileLoad(AArch64::LD1_MXIPXX_H_S, AArch64::ZAS0, MI, BB);
case AArch64::LD1_MXIPXX_H_PSEUDO_D:
return EmitTileLoad(AArch64::LD1_MXIPXX_H_D, AArch64::ZAD0, MI, BB);
case AArch64::LD1_MXIPXX_H_PSEUDO_Q:
return EmitTileLoad(AArch64::LD1_MXIPXX_H_Q, AArch64::ZAQ0, MI, BB);
case AArch64::LD1_MXIPXX_V_PSEUDO_B:
return EmitTileLoad(AArch64::LD1_MXIPXX_V_B, AArch64::ZAB0, MI, BB);
case AArch64::LD1_MXIPXX_V_PSEUDO_H:
return EmitTileLoad(AArch64::LD1_MXIPXX_V_H, AArch64::ZAH0, MI, BB);
case AArch64::LD1_MXIPXX_V_PSEUDO_S:
return EmitTileLoad(AArch64::LD1_MXIPXX_V_S, AArch64::ZAS0, MI, BB);
case AArch64::LD1_MXIPXX_V_PSEUDO_D:
return EmitTileLoad(AArch64::LD1_MXIPXX_V_D, AArch64::ZAD0, MI, BB);
case AArch64::LD1_MXIPXX_V_PSEUDO_Q:
return EmitTileLoad(AArch64::LD1_MXIPXX_V_Q, AArch64::ZAQ0, MI, BB);
case AArch64::LDR_ZA_PSEUDO:
return EmitFill(MI, BB);
case AArch64::BFMOPA_MPPZZ_PSEUDO:
return EmitMopa(AArch64::BFMOPA_MPPZZ, AArch64::ZAS0, MI, BB);
case AArch64::BFMOPS_MPPZZ_PSEUDO:
return EmitMopa(AArch64::BFMOPS_MPPZZ, AArch64::ZAS0, MI, BB);
case AArch64::FMOPAL_MPPZZ_PSEUDO:
return EmitMopa(AArch64::FMOPAL_MPPZZ, AArch64::ZAS0, MI, BB);
case AArch64::FMOPSL_MPPZZ_PSEUDO:
return EmitMopa(AArch64::FMOPSL_MPPZZ, AArch64::ZAS0, MI, BB);
case AArch64::FMOPA_MPPZZ_S_PSEUDO:
return EmitMopa(AArch64::FMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB);
case AArch64::FMOPS_MPPZZ_S_PSEUDO:
return EmitMopa(AArch64::FMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB);
case AArch64::FMOPA_MPPZZ_D_PSEUDO:
return EmitMopa(AArch64::FMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB);
case AArch64::FMOPS_MPPZZ_D_PSEUDO:
return EmitMopa(AArch64::FMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB);
case AArch64::SMOPA_MPPZZ_S_PSEUDO:
return EmitMopa(AArch64::SMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB);
case AArch64::SMOPS_MPPZZ_S_PSEUDO:
return EmitMopa(AArch64::SMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB);
case AArch64::UMOPA_MPPZZ_S_PSEUDO:
return EmitMopa(AArch64::UMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB);
case AArch64::UMOPS_MPPZZ_S_PSEUDO:
return EmitMopa(AArch64::UMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB);
case AArch64::SUMOPA_MPPZZ_S_PSEUDO:
return EmitMopa(AArch64::SUMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB);
case AArch64::SUMOPS_MPPZZ_S_PSEUDO:
return EmitMopa(AArch64::SUMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB);
case AArch64::USMOPA_MPPZZ_S_PSEUDO:
return EmitMopa(AArch64::USMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB);
case AArch64::USMOPS_MPPZZ_S_PSEUDO:
return EmitMopa(AArch64::USMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB);
case AArch64::SMOPA_MPPZZ_D_PSEUDO:
return EmitMopa(AArch64::SMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB);
case AArch64::SMOPS_MPPZZ_D_PSEUDO:
return EmitMopa(AArch64::SMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB);
case AArch64::UMOPA_MPPZZ_D_PSEUDO:
return EmitMopa(AArch64::UMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB);
case AArch64::UMOPS_MPPZZ_D_PSEUDO:
return EmitMopa(AArch64::UMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB);
case AArch64::SUMOPA_MPPZZ_D_PSEUDO:
return EmitMopa(AArch64::SUMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB);
case AArch64::SUMOPS_MPPZZ_D_PSEUDO:
return EmitMopa(AArch64::SUMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB);
case AArch64::USMOPA_MPPZZ_D_PSEUDO:
return EmitMopa(AArch64::USMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB);
case AArch64::USMOPS_MPPZZ_D_PSEUDO:
return EmitMopa(AArch64::USMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB);
case AArch64::INSERT_MXIPZ_H_PSEUDO_B:
return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_H_B, AArch64::ZAB0, MI,
BB);
case AArch64::INSERT_MXIPZ_H_PSEUDO_H:
return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_H_H, AArch64::ZAH0, MI,
BB);
case AArch64::INSERT_MXIPZ_H_PSEUDO_S:
return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_H_S, AArch64::ZAS0, MI,
BB);
case AArch64::INSERT_MXIPZ_H_PSEUDO_D:
return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_H_D, AArch64::ZAD0, MI,
BB);
case AArch64::INSERT_MXIPZ_H_PSEUDO_Q:
return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_H_Q, AArch64::ZAQ0, MI,
BB);
case AArch64::INSERT_MXIPZ_V_PSEUDO_B:
return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_V_B, AArch64::ZAB0, MI,
BB);
case AArch64::INSERT_MXIPZ_V_PSEUDO_H:
return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_V_H, AArch64::ZAH0, MI,
BB);
case AArch64::INSERT_MXIPZ_V_PSEUDO_S:
return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_V_S, AArch64::ZAS0, MI,
BB);
case AArch64::INSERT_MXIPZ_V_PSEUDO_D:
return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_V_D, AArch64::ZAD0, MI,
BB);
case AArch64::INSERT_MXIPZ_V_PSEUDO_Q:
return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_V_Q, AArch64::ZAQ0, MI,
BB);
case AArch64::ZERO_M_PSEUDO:
return EmitZero(MI, BB);
case AArch64::ADDHA_MPPZ_PSEUDO_S:
return EmitAddVectorToTile(AArch64::ADDHA_MPPZ_S, AArch64::ZAS0, MI, BB);
case AArch64::ADDVA_MPPZ_PSEUDO_S:
return EmitAddVectorToTile(AArch64::ADDVA_MPPZ_S, AArch64::ZAS0, MI, BB);
case AArch64::ADDHA_MPPZ_PSEUDO_D:
return EmitAddVectorToTile(AArch64::ADDHA_MPPZ_D, AArch64::ZAD0, MI, BB);
case AArch64::ADDVA_MPPZ_PSEUDO_D:
return EmitAddVectorToTile(AArch64::ADDVA_MPPZ_D, AArch64::ZAD0, MI, BB);
}
}
//===----------------------------------------------------------------------===//
// AArch64 Lowering private implementation.
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Lowering Code
//===----------------------------------------------------------------------===//
// Forward declarations of SVE fixed length lowering helpers
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT);
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V);
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V);
static SDValue convertFixedMaskToScalableVector(SDValue Mask,
SelectionDAG &DAG);
static SDValue getPredicateForScalableVector(SelectionDAG &DAG, SDLoc &DL,
EVT VT);
/// isZerosVector - Check whether SDNode N is a zero-filled vector.
static bool isZerosVector(const SDNode *N) {
// Look through a bit convert.
while (N->getOpcode() == ISD::BITCAST)
N = N->getOperand(0).getNode();
if (ISD::isConstantSplatVectorAllZeros(N))
return true;
if (N->getOpcode() != AArch64ISD::DUP)
return false;
auto Opnd0 = N->getOperand(0);
auto *CINT = dyn_cast<ConstantSDNode>(Opnd0);
auto *CFP = dyn_cast<ConstantFPSDNode>(Opnd0);
return (CINT && CINT->isZero()) || (CFP && CFP->isZero());
}
/// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
/// CC
static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) {
switch (CC) {
default:
llvm_unreachable("Unknown condition code!");
case ISD::SETNE:
return AArch64CC::NE;
case ISD::SETEQ:
return AArch64CC::EQ;
case ISD::SETGT:
return AArch64CC::GT;
case ISD::SETGE:
return AArch64CC::GE;
case ISD::SETLT:
return AArch64CC::LT;
case ISD::SETLE:
return AArch64CC::LE;
case ISD::SETUGT:
return AArch64CC::HI;
case ISD::SETUGE:
return AArch64CC::HS;
case ISD::SETULT:
return AArch64CC::LO;
case ISD::SETULE:
return AArch64CC::LS;
}
}
/// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
static void changeFPCCToAArch64CC(ISD::CondCode CC,
AArch64CC::CondCode &CondCode,
AArch64CC::CondCode &CondCode2) {
CondCode2 = AArch64CC::AL;
switch (CC) {
default:
llvm_unreachable("Unknown FP condition!");
case ISD::SETEQ:
case ISD::SETOEQ:
CondCode = AArch64CC::EQ;
break;
case ISD::SETGT:
case ISD::SETOGT:
CondCode = AArch64CC::GT;
break;
case ISD::SETGE:
case ISD::SETOGE:
CondCode = AArch64CC::GE;
break;
case ISD::SETOLT:
CondCode = AArch64CC::MI;
break;
case ISD::SETOLE:
CondCode = AArch64CC::LS;
break;
case ISD::SETONE:
CondCode = AArch64CC::MI;
CondCode2 = AArch64CC::GT;
break;
case ISD::SETO:
CondCode = AArch64CC::VC;
break;
case ISD::SETUO:
CondCode = AArch64CC::VS;
break;
case ISD::SETUEQ:
CondCode = AArch64CC::EQ;
CondCode2 = AArch64CC::VS;
break;
case ISD::SETUGT:
CondCode = AArch64CC::HI;
break;
case ISD::SETUGE:
CondCode = AArch64CC::PL;
break;
case ISD::SETLT:
case ISD::SETULT:
CondCode = AArch64CC::LT;
break;
case ISD::SETLE:
case ISD::SETULE:
CondCode = AArch64CC::LE;
break;
case ISD::SETNE:
case ISD::SETUNE:
CondCode = AArch64CC::NE;
break;
}
}
/// Convert a DAG fp condition code to an AArch64 CC.
/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
/// should be AND'ed instead of OR'ed.
static void changeFPCCToANDAArch64CC(ISD::CondCode CC,
AArch64CC::CondCode &CondCode,
AArch64CC::CondCode &CondCode2) {
CondCode2 = AArch64CC::AL;
switch (CC) {
default:
changeFPCCToAArch64CC(CC, CondCode, CondCode2);
assert(CondCode2 == AArch64CC::AL);
break;
case ISD::SETONE:
// (a one b)
// == ((a olt b) || (a ogt b))
// == ((a ord b) && (a une b))
CondCode = AArch64CC::VC;
CondCode2 = AArch64CC::NE;
break;
case ISD::SETUEQ:
// (a ueq b)
// == ((a uno b) || (a oeq b))
// == ((a ule b) && (a uge b))
CondCode = AArch64CC::PL;
CondCode2 = AArch64CC::LE;
break;
}
}
/// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
/// CC usable with the vector instructions. Fewer operations are available
/// without a real NZCV register, so we have to use less efficient combinations
/// to get the same effect.
static void changeVectorFPCCToAArch64CC(ISD::CondCode CC,
AArch64CC::CondCode &CondCode,
AArch64CC::CondCode &CondCode2,
bool &Invert) {
Invert = false;
switch (CC) {
default:
// Mostly the scalar mappings work fine.
changeFPCCToAArch64CC(CC, CondCode, CondCode2);
break;
case ISD::SETUO:
Invert = true;
LLVM_FALLTHROUGH;
case ISD::SETO:
CondCode = AArch64CC::MI;
CondCode2 = AArch64CC::GE;
break;
case ISD::SETUEQ:
case ISD::SETULT:
case ISD::SETULE:
case ISD::SETUGT:
case ISD::SETUGE:
// All of the compare-mask comparisons are ordered, but we can switch
// between the two by a double inversion. E.g. ULE == !OGT.
Invert = true;
changeFPCCToAArch64CC(getSetCCInverse(CC, /* FP inverse */ MVT::f32),
CondCode, CondCode2);
break;
}
}
static bool isLegalArithImmed(uint64_t C) {
// Matches AArch64DAGToDAGISel::SelectArithImmed().
bool IsLegal = (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
LLVM_DEBUG(dbgs() << "Is imm " << C
<< " legal: " << (IsLegal ? "yes\n" : "no\n"));
return IsLegal;
}
// Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
// the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
// can be set differently by this operation. It comes down to whether
// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
// everything is fine. If not then the optimization is wrong. Thus general
// comparisons are only valid if op2 != 0.
//
// So, finally, the only LLVM-native comparisons that don't mention C and V
// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
// the absence of information about op2.
static bool isCMN(SDValue Op, ISD::CondCode CC) {
return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
(CC == ISD::SETEQ || CC == ISD::SETNE);
}
static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl,
SelectionDAG &DAG, SDValue Chain,
bool IsSignaling) {
EVT VT = LHS.getValueType();
assert(VT != MVT::f128);
const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
if (VT == MVT::f16 && !FullFP16) {
LHS = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::f32, MVT::Other},
{Chain, LHS});
RHS = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::f32, MVT::Other},
{LHS.getValue(1), RHS});
Chain = RHS.getValue(1);
VT = MVT::f32;
}
unsigned Opcode =
IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP;
return DAG.getNode(Opcode, dl, {VT, MVT::Other}, {Chain, LHS, RHS});
}
static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
const SDLoc &dl, SelectionDAG &DAG) {
EVT VT = LHS.getValueType();
const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
if (VT.isFloatingPoint()) {
assert(VT != MVT::f128);
if (VT == MVT::f16 && !FullFP16) {
LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
VT = MVT::f32;
}
return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
}
// The CMP instruction is just an alias for SUBS, and representing it as
// SUBS means that it's possible to get CSE with subtract operations.
// A later phase can perform the optimization of setting the destination
// register to WZR/XZR if it ends up being unused.
unsigned Opcode = AArch64ISD::SUBS;
if (isCMN(RHS, CC)) {
// Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
Opcode = AArch64ISD::ADDS;
RHS = RHS.getOperand(1);
} else if (isCMN(LHS, CC)) {
// As we are looking for EQ/NE compares, the operands can be commuted ; can
// we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
Opcode = AArch64ISD::ADDS;
LHS = LHS.getOperand(1);
} else if (isNullConstant(RHS) && !isUnsignedIntSetCC(CC)) {
if (LHS.getOpcode() == ISD::AND) {
// Similarly, (CMP (and X, Y), 0) can be implemented with a TST
// (a.k.a. ANDS) except that the flags are only guaranteed to work for one
// of the signed comparisons.
const SDValue ANDSNode = DAG.getNode(AArch64ISD::ANDS, dl,
DAG.getVTList(VT, MVT_CC),
LHS.getOperand(0),
LHS.getOperand(1));
// Replace all users of (and X, Y) with newly generated (ands X, Y)
DAG.ReplaceAllUsesWith(LHS, ANDSNode);
return ANDSNode.getValue(1);
} else if (LHS.getOpcode() == AArch64ISD::ANDS) {
// Use result of ANDS
return LHS.getValue(1);
}
}
return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
.getValue(1);
}
/// \defgroup AArch64CCMP CMP;CCMP matching
///
/// These functions deal with the formation of CMP;CCMP;... sequences.
/// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
/// a comparison. They set the NZCV flags to a predefined value if their
/// predicate is false. This allows to express arbitrary conjunctions, for
/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B)))"
/// expressed as:
/// cmp A
/// ccmp B, inv(CB), CA
/// check for CB flags
///
/// This naturally lets us implement chains of AND operations with SETCC
/// operands. And we can even implement some other situations by transforming
/// them:
/// - We can implement (NEG SETCC) i.e. negating a single comparison by
/// negating the flags used in a CCMP/FCCMP operations.
/// - We can negate the result of a whole chain of CMP/CCMP/FCCMP operations
/// by negating the flags we test for afterwards. i.e.
/// NEG (CMP CCMP CCCMP ...) can be implemented.
/// - Note that we can only ever negate all previously processed results.
/// What we can not implement by flipping the flags to test is a negation
/// of two sub-trees (because the negation affects all sub-trees emitted so
/// far, so the 2nd sub-tree we emit would also affect the first).
/// With those tools we can implement some OR operations:
/// - (OR (SETCC A) (SETCC B)) can be implemented via:
/// NEG (AND (NEG (SETCC A)) (NEG (SETCC B)))
/// - After transforming OR to NEG/AND combinations we may be able to use NEG
/// elimination rules from earlier to implement the whole thing as a
/// CCMP/FCCMP chain.
///
/// As complete example:
/// or (or (setCA (cmp A)) (setCB (cmp B)))
/// (and (setCC (cmp C)) (setCD (cmp D)))"
/// can be reassociated to:
/// or (and (setCC (cmp C)) setCD (cmp D))
// (or (setCA (cmp A)) (setCB (cmp B)))
/// can be transformed to:
/// not (and (not (and (setCC (cmp C)) (setCD (cmp D))))
/// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
/// which can be implemented as:
/// cmp C
/// ccmp D, inv(CD), CC
/// ccmp A, CA, inv(CD)
/// ccmp B, CB, inv(CA)
/// check for CB flags
///
/// A counterexample is "or (and A B) (and C D)" which translates to
/// not (and (not (and (not A) (not B))) (not (and (not C) (not D)))), we
/// can only implement 1 of the inner (not) operations, but not both!
/// @{
/// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
ISD::CondCode CC, SDValue CCOp,
AArch64CC::CondCode Predicate,
AArch64CC::CondCode OutCC,
const SDLoc &DL, SelectionDAG &DAG) {
unsigned Opcode = 0;
const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
if (LHS.getValueType().isFloatingPoint()) {
assert(LHS.getValueType() != MVT::f128);
if (LHS.getValueType() == MVT::f16 && !FullFP16) {
LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
}
Opcode = AArch64ISD::FCCMP;
} else if (RHS.getOpcode() == ISD::SUB) {
SDValue SubOp0 = RHS.getOperand(0);
if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
// See emitComparison() on why we can only do this for SETEQ and SETNE.
Opcode = AArch64ISD::CCMN;
RHS = RHS.getOperand(1);
}
}
if (Opcode == 0)
Opcode = AArch64ISD::CCMP;
SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC);
AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
}
/// Returns true if @p Val is a tree of AND/OR/SETCC operations that can be
/// expressed as a conjunction. See \ref AArch64CCMP.
/// \param CanNegate Set to true if we can negate the whole sub-tree just by
/// changing the conditions on the SETCC tests.
/// (this means we can call emitConjunctionRec() with
/// Negate==true on this sub-tree)
/// \param MustBeFirst Set to true if this subtree needs to be negated and we
/// cannot do the negation naturally. We are required to
/// emit the subtree first in this case.
/// \param WillNegate Is true if are called when the result of this
/// subexpression must be negated. This happens when the
/// outer expression is an OR. We can use this fact to know
/// that we have a double negation (or (or ...) ...) that
/// can be implemented for free.
static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
bool &MustBeFirst, bool WillNegate,
unsigned Depth = 0) {
if (!Val.hasOneUse())
return false;
unsigned Opcode = Val->getOpcode();
if (Opcode == ISD::SETCC) {
if (Val->getOperand(0).getValueType() == MVT::f128)
return false;
CanNegate = true;
MustBeFirst = false;
return true;
}
// Protect against exponential runtime and stack overflow.
if (Depth > 6)
return false;
if (Opcode == ISD::AND || Opcode == ISD::OR) {
bool IsOR = Opcode == ISD::OR;
SDValue O0 = Val->getOperand(0);
SDValue O1 = Val->getOperand(1);
bool CanNegateL;
bool MustBeFirstL;
if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
return false;
bool CanNegateR;
bool MustBeFirstR;
if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
return false;
if (MustBeFirstL && MustBeFirstR)
return false;
if (IsOR) {
// For an OR expression we need to be able to naturally negate at least
// one side or we cannot do the transformation at all.
if (!CanNegateL && !CanNegateR)
return false;
// If we the result of the OR will be negated and we can naturally negate
// the leafs, then this sub-tree as a whole negates naturally.
CanNegate = WillNegate && CanNegateL && CanNegateR;
// If we cannot naturally negate the whole sub-tree, then this must be
// emitted first.
MustBeFirst = !CanNegate;
} else {
assert(Opcode == ISD::AND && "Must be OR or AND");
// We cannot naturally negate an AND operation.
CanNegate = false;
MustBeFirst = MustBeFirstL || MustBeFirstR;
}
return true;
}
return false;
}
/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
/// Tries to transform the given i1 producing node @p Val to a series compare
/// and conditional compare operations. @returns an NZCV flags producing node
/// and sets @p OutCC to the flags that should be tested or returns SDValue() if
/// transformation was not possible.
/// \p Negate is true if we want this sub-tree being negated just by changing
/// SETCC conditions.
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
AArch64CC::CondCode Predicate) {
// We're at a tree leaf, produce a conditional comparison operation.
unsigned Opcode = Val->getOpcode();
if (Opcode == ISD::SETCC) {
SDValue LHS = Val->getOperand(0);
SDValue RHS = Val->getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
bool isInteger = LHS.getValueType().isInteger();
if (Negate)
CC = getSetCCInverse(CC, LHS.getValueType());
SDLoc DL(Val);
// Determine OutCC and handle FP special case.
if (isInteger) {
OutCC = changeIntCCToAArch64CC(CC);
} else {
assert(LHS.getValueType().isFloatingPoint());
AArch64CC::CondCode ExtraCC;
changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
// Some floating point conditions can't be tested with a single condition
// code. Construct an additional comparison in this case.
if (ExtraCC != AArch64CC::AL) {
SDValue ExtraCmp;
if (!CCOp.getNode())
ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
else
ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
ExtraCC, DL, DAG);
CCOp = ExtraCmp;
Predicate = ExtraCC;
}
}
// Produce a normal comparison if we are first in the chain
if (!CCOp)
return emitComparison(LHS, RHS, CC, DL, DAG);
// Otherwise produce a ccmp.
return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
DAG);
}
assert(Val->hasOneUse() && "Valid conjunction/disjunction tree");
bool IsOR = Opcode == ISD::OR;
SDValue LHS = Val->getOperand(0);
bool CanNegateL;
bool MustBeFirstL;
bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR);
assert(ValidL && "Valid conjunction/disjunction tree");
(void)ValidL;
SDValue RHS = Val->getOperand(1);
bool CanNegateR;
bool MustBeFirstR;
bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR);
assert(ValidR && "Valid conjunction/disjunction tree");
(void)ValidR;
// Swap sub-tree that must come first to the right side.
if (MustBeFirstL) {
assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
std::swap(LHS, RHS);
std::swap(CanNegateL, CanNegateR);
std::swap(MustBeFirstL, MustBeFirstR);
}
bool NegateR;
bool NegateAfterR;
bool NegateL;
bool NegateAfterAll;
if (Opcode == ISD::OR) {
// Swap the sub-tree that we can negate naturally to the left.
if (!CanNegateL) {
assert(CanNegateR && "at least one side must be negatable");
assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
assert(!Negate);
std::swap(LHS, RHS);
NegateR = false;
NegateAfterR = true;
} else {
// Negate the left sub-tree if possible, otherwise negate the result.
NegateR = CanNegateR;
NegateAfterR = !CanNegateR;
}
NegateL = true;
NegateAfterAll = !Negate;
} else {
assert(Opcode == ISD::AND && "Valid conjunction/disjunction tree");
assert(!Negate && "Valid conjunction/disjunction tree");
NegateL = false;
NegateR = false;
NegateAfterR = false;
NegateAfterAll = false;
}
// Emit sub-trees.
AArch64CC::CondCode RHSCC;
SDValue CmpR = emitConjunctionRec(DAG, RHS, RHSCC, NegateR, CCOp, Predicate);
if (NegateAfterR)
RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
SDValue CmpL = emitConjunctionRec(DAG, LHS, OutCC, NegateL, CmpR, RHSCC);
if (NegateAfterAll)
OutCC = AArch64CC::getInvertedCondCode(OutCC);
return CmpL;
}
/// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
/// In some cases this is even possible with OR operations in the expression.
/// See \ref AArch64CCMP.
/// \see emitConjunctionRec().
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val,
AArch64CC::CondCode &OutCC) {
bool DummyCanNegate;
bool DummyMustBeFirst;
if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false))
return SDValue();
return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL);
}
/// @}
/// Returns how profitable it is to fold a comparison's operand's shift and/or
/// extension operations.
static unsigned getCmpOperandFoldingProfit(SDValue Op) {
auto isSupportedExtend = [&](SDValue V) {
if (V.getOpcode() == ISD::SIGN_EXTEND_INREG)
return true;
if (V.getOpcode() == ISD::AND)
if (ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
uint64_t Mask = MaskCst->getZExtValue();
return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
}
return false;
};
if (!Op.hasOneUse())
return 0;
if (isSupportedExtend(Op))
return 1;
unsigned Opc = Op.getOpcode();
if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA)
if (ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
uint64_t Shift = ShiftCst->getZExtValue();
if (isSupportedExtend(Op.getOperand(0)))
return (Shift <= 4) ? 2 : 1;
EVT VT = Op.getValueType();
if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
return 1;
}
return 0;
}
static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &AArch64cc, SelectionDAG &DAG,
const SDLoc &dl) {
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
EVT VT = RHS.getValueType();
uint64_t C = RHSC->getZExtValue();
if (!isLegalArithImmed(C)) {
// Constant does not fit, try adjusting it by one?
switch (CC) {
default:
break;
case ISD::SETLT:
case ISD::SETGE:
if ((VT == MVT::i32 && C != 0x80000000 &&
isLegalArithImmed((uint32_t)(C - 1))) ||
(VT == MVT::i64 && C != 0x80000000ULL &&
isLegalArithImmed(C - 1ULL))) {
CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
RHS = DAG.getConstant(C, dl, VT);
}
break;
case ISD::SETULT:
case ISD::SETUGE:
if ((VT == MVT::i32 && C != 0 &&
isLegalArithImmed((uint32_t)(C - 1))) ||
(VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
RHS = DAG.getConstant(C, dl, VT);
}
break;
case ISD::SETLE:
case ISD::SETGT:
if ((VT == MVT::i32 && C != INT32_MAX &&
isLegalArithImmed((uint32_t)(C + 1))) ||
(VT == MVT::i64 && C != INT64_MAX &&
isLegalArithImmed(C + 1ULL))) {
CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
RHS = DAG.getConstant(C, dl, VT);
}
break;
case ISD::SETULE:
case ISD::SETUGT:
if ((VT == MVT::i32 && C != UINT32_MAX &&
isLegalArithImmed((uint32_t)(C + 1))) ||
(VT == MVT::i64 && C != UINT64_MAX &&
isLegalArithImmed(C + 1ULL))) {
CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
RHS = DAG.getConstant(C, dl, VT);
}
break;
}
}
}
// Comparisons are canonicalized so that the RHS operand is simpler than the
// LHS one, the extreme case being when RHS is an immediate. However, AArch64
// can fold some shift+extend operations on the RHS operand, so swap the
// operands if that can be done.
//
// For example:
// lsl w13, w11, #1
// cmp w13, w12
// can be turned into:
// cmp w12, w11, lsl #1
if (!isa<ConstantSDNode>(RHS) ||
!isLegalArithImmed(cast<ConstantSDNode>(RHS)->getZExtValue())) {
SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;
if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) {
std::swap(LHS, RHS);
CC = ISD::getSetCCSwappedOperands(CC);
}
}
SDValue Cmp;
AArch64CC::CondCode AArch64CC;
if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
// The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
// For the i8 operand, the largest immediate is 255, so this can be easily
// encoded in the compare instruction. For the i16 operand, however, the
// largest immediate cannot be encoded in the compare.
// Therefore, use a sign extending load and cmn to avoid materializing the
// -1 constant. For example,
// movz w1, #65535
// ldrh w0, [x0, #0]
// cmp w0, w1
// >
// ldrsh w0, [x0, #0]
// cmn w0, #1
// Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
// if and only if (sext LHS) == (sext RHS). The checks are in place to
// ensure both the LHS and RHS are truly zero extended and to make sure the
// transformation is profitable.
if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
LHS.getNode()->hasNUsesOfValue(1, 0)) {
int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
SDValue SExt =
DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
DAG.getValueType(MVT::i16));
Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
RHS.getValueType()),
CC, dl, DAG);
AArch64CC = changeIntCCToAArch64CC(CC);
}
}
if (!Cmp && (RHSC->isZero() || RHSC->isOne())) {
if ((Cmp = emitConjunction(DAG, LHS, AArch64CC))) {
if ((CC == ISD::SETNE) ^ RHSC->isZero())
AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
}
}
}
if (!Cmp) {
Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
AArch64CC = changeIntCCToAArch64CC(CC);
}
AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
return Cmp;
}
static std::pair<SDValue, SDValue>
getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&
"Unsupported value type");
SDValue Value, Overflow;
SDLoc DL(Op);
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
unsigned Opc = 0;
switch (Op.getOpcode()) {
default:
llvm_unreachable("Unknown overflow instruction!");
case ISD::SADDO:
Opc = AArch64ISD::ADDS;
CC = AArch64CC::VS;
break;
case ISD::UADDO:
Opc = AArch64ISD::ADDS;
CC = AArch64CC::HS;
break;
case ISD::SSUBO:
Opc = AArch64ISD::SUBS;
CC = AArch64CC::VS;
break;
case ISD::USUBO:
Opc = AArch64ISD::SUBS;
CC = AArch64CC::LO;
break;
// Multiply needs a little bit extra work.
case ISD::SMULO:
case ISD::UMULO: {
CC = AArch64CC::NE;
bool IsSigned = Op.getOpcode() == ISD::SMULO;
if (Op.getValueType() == MVT::i32) {
// Extend to 64-bits, then perform a 64-bit multiply.
unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
// Check that the result fits into a 32-bit integer.
SDVTList VTs = DAG.getVTList(MVT::i64, MVT_CC);
if (IsSigned) {
// cmp xreg, wreg, sxtw
SDValue SExtMul = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Value);
Overflow =
DAG.getNode(AArch64ISD::SUBS, DL, VTs, Mul, SExtMul).getValue(1);
} else {
// tst xreg, #0xffffffff00000000
SDValue UpperBits = DAG.getConstant(0xFFFFFFFF00000000, DL, MVT::i64);
Overflow =
DAG.getNode(AArch64ISD::ANDS, DL, VTs, Mul, UpperBits).getValue(1);
}
break;
}
assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type");
// For the 64 bit multiply
Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
if (IsSigned) {
SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
DAG.getConstant(63, DL, MVT::i64));
// It is important that LowerBits is last, otherwise the arithmetic
// shift will not be folded into the compare (SUBS).
SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
.getValue(1);
} else {
SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
Overflow =
DAG.getNode(AArch64ISD::SUBS, DL, VTs,
DAG.getConstant(0, DL, MVT::i64),
UpperBits).getValue(1);
}
break;
}
} // switch (...)
if (Opc) {
SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
// Emit the AArch64 operation with overflow check.
Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
Overflow = Value.getValue(1);
}
return std::make_pair(Value, Overflow);
}
SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
if (useSVEForFixedLengthVectorVT(Op.getValueType()))
return LowerToScalableOp(Op, DAG);
SDValue Sel = Op.getOperand(0);
SDValue Other = Op.getOperand(1);
SDLoc dl(Sel);
// If the operand is an overflow checking operation, invert the condition
// code and kill the Not operation. I.e., transform:
// (xor (overflow_op_bool, 1))
// -->
// (csel 1, 0, invert(cc), overflow_op_bool)
// ... which later gets transformed to just a cset instruction with an
// inverted condition code, rather than a cset + eor sequence.
if (isOneConstant(Other) && ISD::isOverflowIntrOpRes(Sel)) {
// Only lower legal XALUO ops.
if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
return SDValue();
SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
AArch64CC::CondCode CC;
SDValue Value, Overflow;
std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
CCVal, Overflow);
}
// If neither operand is a SELECT_CC, give up.
if (Sel.getOpcode() != ISD::SELECT_CC)
std::swap(Sel, Other);
if (Sel.getOpcode() != ISD::SELECT_CC)
return Op;
// The folding we want to perform is:
// (xor x, (select_cc a, b, cc, 0, -1) )
// -->
// (csel x, (xor x, -1), cc ...)
//
// The latter will get matched to a CSINV instruction.
ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
SDValue LHS = Sel.getOperand(0);
SDValue RHS = Sel.getOperand(1);
SDValue TVal = Sel.getOperand(2);
SDValue FVal = Sel.getOperand(3);
// FIXME: This could be generalized to non-integer comparisons.
if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
return Op;
ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
// The values aren't constants, this isn't the pattern we're looking for.
if (!CFVal || !CTVal)
return Op;
// We can commute the SELECT_CC by inverting the condition. This
// might be needed to make this fit into a CSINV pattern.
if (CTVal->isAllOnes() && CFVal->isZero()) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
CC = ISD::getSetCCInverse(CC, LHS.getValueType());
}
// If the constants line up, perform the transform!
if (CTVal->isZero() && CFVal->isAllOnes()) {
SDValue CCVal;
SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
FVal = Other;
TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
DAG.getConstant(-1ULL, dl, Other.getValueType()));
return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
CCVal, Cmp);
}
return Op;
}
// If Invert is false, sets 'C' bit of NZCV to 0 if value is 0, else sets 'C'
// bit to 1. If Invert is true, sets 'C' bit of NZCV to 1 if value is 0, else
// sets 'C' bit to 0.
static SDValue valueToCarryFlag(SDValue Value, SelectionDAG &DAG, bool Invert) {
SDLoc DL(Value);
EVT VT = Value.getValueType();
SDValue Op0 = Invert ? DAG.getConstant(0, DL, VT) : Value;
SDValue Op1 = Invert ? Value : DAG.getConstant(1, DL, VT);
SDValue Cmp =
DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::Glue), Op0, Op1);
return Cmp.getValue(1);
}
// If Invert is false, value is 1 if 'C' bit of NZCV is 1, else 0.
// If Invert is true, value is 0 if 'C' bit of NZCV is 1, else 1.
static SDValue carryFlagToValue(SDValue Flag, EVT VT, SelectionDAG &DAG,
bool Invert) {
assert(Flag.getResNo() == 1);
SDLoc DL(Flag);
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue One = DAG.getConstant(1, DL, VT);
unsigned Cond = Invert ? AArch64CC::LO : AArch64CC::HS;
SDValue CC = DAG.getConstant(Cond, DL, MVT::i32);
return DAG.getNode(AArch64ISD::CSEL, DL, VT, One, Zero, CC, Flag);
}
// Value is 1 if 'V' bit of NZCV is 1, else 0
static SDValue overflowFlagToValue(SDValue Flag, EVT VT, SelectionDAG &DAG) {
assert(Flag.getResNo() == 1);
SDLoc DL(Flag);
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue One = DAG.getConstant(1, DL, VT);
SDValue CC = DAG.getConstant(AArch64CC::VS, DL, MVT::i32);
return DAG.getNode(AArch64ISD::CSEL, DL, VT, One, Zero, CC, Flag);
}
// This lowering is inefficient, but it will get cleaned up by
// `foldOverflowCheck`
static SDValue lowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG, unsigned Opcode,
bool IsSigned) {
EVT VT0 = Op.getValue(0).getValueType();
EVT VT1 = Op.getValue(1).getValueType();
if (VT0 != MVT::i32 && VT0 != MVT::i64)
return SDValue();
bool InvertCarry = Opcode == AArch64ISD::SBCS;
SDValue OpLHS = Op.getOperand(0);
SDValue OpRHS = Op.getOperand(1);
SDValue OpCarryIn = valueToCarryFlag(Op.getOperand(2), DAG, InvertCarry);
SDLoc DL(Op);
SDVTList VTs = DAG.getVTList(VT0, VT1);
SDValue Sum = DAG.getNode(Opcode, DL, DAG.getVTList(VT0, MVT::Glue), OpLHS,
OpRHS, OpCarryIn);
SDValue OutFlag =
IsSigned ? overflowFlagToValue(Sum.getValue(1), VT1, DAG)
: carryFlagToValue(Sum.getValue(1), VT1, DAG, InvertCarry);
return DAG.getNode(ISD::MERGE_VALUES, DL, VTs, Sum, OutFlag);
}
static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
// Let legalize expand this if it isn't a legal type yet.
if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
return SDValue();
SDLoc dl(Op);
AArch64CC::CondCode CC;
// The actual operation that sets the overflow or carry flag.
SDValue Value, Overflow;
std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
// We use 0 and 1 as false and true values.
SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
// We use an inverted condition, because the conditional select is inverted
// too. This will allow it to be selected to a single instruction:
// CSINC Wd, WZR, WZR, invert(cond).
SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
CCVal, Overflow);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
}
// Prefetch operands are:
// 1: Address to prefetch
// 2: bool isWrite
// 3: int locality (0 = no locality ... 3 = extreme locality)
// 4: bool isDataCache
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
bool IsStream = !Locality;
// When the locality number is set
if (Locality) {
// The front-end should have filtered out the out-of-range values
assert(Locality <= 3 && "Prefetch locality out-of-range");
// The locality degree is the opposite of the cache speed.
// Put the number the other way around.
// The encoding starts at 0 for level 1
Locality = 3 - Locality;
}
// built the mask value encoding the expected behavior.
unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
(!IsData << 3) | // IsDataCache bit
(Locality << 1) | // Cache level bits
(unsigned)IsStream; // Stream bit
return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
}
SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
if (VT.isScalableVector())
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU);
if (useSVEForFixedLengthVectorVT(VT))
return LowerFixedLengthFPExtendToSVE(Op, DAG);
assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
return SDValue();
}
SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
SelectionDAG &DAG) const {
if (Op.getValueType().isScalableVector())
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_ROUND_MERGE_PASSTHRU);
bool IsStrict = Op->isStrictFPOpcode();
SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
EVT SrcVT = SrcVal.getValueType();
if (useSVEForFixedLengthVectorVT(SrcVT))
return LowerFixedLengthFPRoundToSVE(Op, DAG);
if (SrcVT != MVT::f128) {
// Expand cases where the input is a vector bigger than NEON.
if (useSVEForFixedLengthVectorVT(SrcVT))
return SDValue();
// It's legal except when f128 is involved
return Op;
}
return SDValue();
}
SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
SelectionDAG &DAG) const {
// Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
// Any additional optimization in this function should be recorded
// in the cost tables.
bool IsStrict = Op->isStrictFPOpcode();
EVT InVT = Op.getOperand(IsStrict ? 1 : 0).getValueType();
EVT VT = Op.getValueType();
if (VT.isScalableVector()) {
unsigned Opcode = Op.getOpcode() == ISD::FP_TO_UINT
? AArch64ISD::FCVTZU_MERGE_PASSTHRU
: AArch64ISD::FCVTZS_MERGE_PASSTHRU;
return LowerToPredicatedOp(Op, DAG, Opcode);
}
if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT))
return LowerFixedLengthFPToIntToSVE(Op, DAG);
unsigned NumElts = InVT.getVectorNumElements();
// f16 conversions are promoted to f32 when full fp16 is not supported.
if (InVT.getVectorElementType() == MVT::f16 &&
!Subtarget->hasFullFP16()) {
MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
SDLoc dl(Op);
if (IsStrict) {
SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NewVT, MVT::Other},
{Op.getOperand(0), Op.getOperand(1)});
return DAG.getNode(Op.getOpcode(), dl, {VT, MVT::Other},
{Ext.getValue(1), Ext.getValue(0)});
}
return DAG.getNode(
Op.getOpcode(), dl, Op.getValueType(),
DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
}
uint64_t VTSize = VT.getFixedSizeInBits();
uint64_t InVTSize = InVT.getFixedSizeInBits();
if (VTSize < InVTSize) {
SDLoc dl(Op);
if (IsStrict) {
InVT = InVT.changeVectorElementTypeToInteger();
SDValue Cv = DAG.getNode(Op.getOpcode(), dl, {InVT, MVT::Other},
{Op.getOperand(0), Op.getOperand(1)});
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
return DAG.getMergeValues({Trunc, Cv.getValue(1)}, dl);
}
SDValue Cv =
DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
Op.getOperand(0));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
}
if (VTSize > InVTSize) {
SDLoc dl(Op);
MVT ExtVT =
MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
VT.getVectorNumElements());
if (IsStrict) {
SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {ExtVT, MVT::Other},
{Op.getOperand(0), Op.getOperand(1)});
return DAG.getNode(Op.getOpcode(), dl, {VT, MVT::Other},
{Ext.getValue(1), Ext.getValue(0)});
}
SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
}
// Use a scalar operation for conversions between single-element vectors of
// the same size.
if (NumElts == 1) {
SDLoc dl(Op);
SDValue Extract = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, dl, InVT.getScalarType(),
Op.getOperand(IsStrict ? 1 : 0), DAG.getConstant(0, dl, MVT::i64));
EVT ScalarVT = VT.getScalarType();
if (IsStrict)
return DAG.getNode(Op.getOpcode(), dl, {ScalarVT, MVT::Other},
{Op.getOperand(0), Extract});
return DAG.getNode(Op.getOpcode(), dl, ScalarVT, Extract);
}
// Type changing conversions are illegal.
return Op;
}
SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
SelectionDAG &DAG) const {
bool IsStrict = Op->isStrictFPOpcode();
SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
if (SrcVal.getValueType().isVector())
return LowerVectorFP_TO_INT(Op, DAG);
// f16 conversions are promoted to f32 when full fp16 is not supported.
if (SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
SDLoc dl(Op);
if (IsStrict) {
SDValue Ext =
DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::f32, MVT::Other},
{Op.getOperand(0), SrcVal});
return DAG.getNode(Op.getOpcode(), dl, {Op.getValueType(), MVT::Other},
{Ext.getValue(1), Ext.getValue(0)});
}
return DAG.getNode(
Op.getOpcode(), dl, Op.getValueType(),
DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal));
}
if (SrcVal.getValueType() != MVT::f128) {
// It's legal except when f128 is involved
return Op;
}
return SDValue();
}
SDValue
AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op,
SelectionDAG &DAG) const {
// AArch64 FP-to-int conversions saturate to the destination element size, so
// we can lower common saturating conversions to simple instructions.
SDValue SrcVal = Op.getOperand(0);
EVT SrcVT = SrcVal.getValueType();
EVT DstVT = Op.getValueType();
EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
uint64_t SrcElementWidth = SrcVT.getScalarSizeInBits();
uint64_t DstElementWidth = DstVT.getScalarSizeInBits();
uint64_t SatWidth = SatVT.getScalarSizeInBits();
assert(SatWidth <= DstElementWidth &&
"Saturation width cannot exceed result width");
// TODO: Consider lowering to SVE operations, as in LowerVectorFP_TO_INT.
// Currently, the `llvm.fpto[su]i.sat.*` intrinsics don't accept scalable
// types, so this is hard to reach.
if (DstVT.isScalableVector())
return SDValue();
EVT SrcElementVT = SrcVT.getVectorElementType();
// In the absence of FP16 support, promote f16 to f32 and saturate the result.
if (SrcElementVT == MVT::f16 &&
(!Subtarget->hasFullFP16() || DstElementWidth > 16)) {
MVT F32VT = MVT::getVectorVT(MVT::f32, SrcVT.getVectorNumElements());
SrcVal = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), F32VT, SrcVal);
SrcVT = F32VT;
SrcElementVT = MVT::f32;
SrcElementWidth = 32;
} else if (SrcElementVT != MVT::f64 && SrcElementVT != MVT::f32 &&
SrcElementVT != MVT::f16)
return SDValue();
SDLoc DL(Op);
// Cases that we can emit directly.
if (SrcElementWidth == DstElementWidth && SrcElementWidth == SatWidth)
return DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal,
DAG.getValueType(DstVT.getScalarType()));
// Otherwise we emit a cvt that saturates to a higher BW, and saturate the
// result. This is only valid if the legal cvt is larger than the saturate
// width. For double, as we don't have MIN/MAX, it can be simpler to scalarize
// (at least until sqxtn is selected).
if (SrcElementWidth < SatWidth || SrcElementVT == MVT::f64)
return SDValue();
EVT IntVT = SrcVT.changeVectorElementTypeToInteger();
SDValue NativeCvt = DAG.getNode(Op.getOpcode(), DL, IntVT, SrcVal,
DAG.getValueType(IntVT.getScalarType()));
SDValue Sat;
if (Op.getOpcode() == ISD::FP_TO_SINT_SAT) {
SDValue MinC = DAG.getConstant(
APInt::getSignedMaxValue(SatWidth).sext(SrcElementWidth), DL, IntVT);
SDValue Min = DAG.getNode(ISD::SMIN, DL, IntVT, NativeCvt, MinC);
SDValue MaxC = DAG.getConstant(
APInt::getSignedMinValue(SatWidth).sext(SrcElementWidth), DL, IntVT);
Sat = DAG.getNode(ISD::SMAX, DL, IntVT, Min, MaxC);
} else {
SDValue MinC = DAG.getConstant(
APInt::getAllOnesValue(SatWidth).zext(SrcElementWidth), DL, IntVT);
Sat = DAG.getNode(ISD::UMIN, DL, IntVT, NativeCvt, MinC);
}
return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Sat);
}
SDValue AArch64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
SelectionDAG &DAG) const {
// AArch64 FP-to-int conversions saturate to the destination register size, so
// we can lower common saturating conversions to simple instructions.
SDValue SrcVal = Op.getOperand(0);
EVT SrcVT = SrcVal.getValueType();
if (SrcVT.isVector())
return LowerVectorFP_TO_INT_SAT(Op, DAG);
EVT DstVT = Op.getValueType();
EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
uint64_t SatWidth = SatVT.getScalarSizeInBits();
uint64_t DstWidth = DstVT.getScalarSizeInBits();
assert(SatWidth <= DstWidth && "Saturation width cannot exceed result width");
// In the absence of FP16 support, promote f16 to f32 and saturate the result.
if (SrcVT == MVT::f16 && !Subtarget->hasFullFP16()) {
SrcVal = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, SrcVal);
SrcVT = MVT::f32;
} else if (SrcVT != MVT::f64 && SrcVT != MVT::f32 && SrcVT != MVT::f16)
return SDValue();
SDLoc DL(Op);
// Cases that we can emit directly.
if ((SrcVT == MVT::f64 || SrcVT == MVT::f32 ||
(SrcVT == MVT::f16 && Subtarget->hasFullFP16())) &&
DstVT == SatVT && (DstVT == MVT::i64 || DstVT == MVT::i32))
return DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal,
DAG.getValueType(DstVT));
// Otherwise we emit a cvt that saturates to a higher BW, and saturate the
// result. This is only valid if the legal cvt is larger than the saturate
// width.
if (DstWidth < SatWidth)
return SDValue();
SDValue NativeCvt =
DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal, DAG.getValueType(DstVT));
SDValue Sat;
if (Op.getOpcode() == ISD::FP_TO_SINT_SAT) {
SDValue MinC = DAG.getConstant(
APInt::getSignedMaxValue(SatWidth).sext(DstWidth), DL, DstVT);
SDValue Min = DAG.getNode(ISD::SMIN, DL, DstVT, NativeCvt, MinC);
SDValue MaxC = DAG.getConstant(
APInt::getSignedMinValue(SatWidth).sext(DstWidth), DL, DstVT);
Sat = DAG.getNode(ISD::SMAX, DL, DstVT, Min, MaxC);
} else {
SDValue MinC = DAG.getConstant(
APInt::getAllOnesValue(SatWidth).zext(DstWidth), DL, DstVT);
Sat = DAG.getNode(ISD::UMIN, DL, DstVT, NativeCvt, MinC);
}
return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Sat);
}
SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
// Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
// Any additional optimization in this function should be recorded
// in the cost tables.
bool IsStrict = Op->isStrictFPOpcode();
EVT VT = Op.getValueType();
SDLoc dl(Op);
SDValue In = Op.getOperand(IsStrict ? 1 : 0);
EVT InVT = In.getValueType();
unsigned Opc = Op.getOpcode();
bool IsSigned = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
if (VT.isScalableVector()) {
if (InVT.getVectorElementType() == MVT::i1) {
// We can't directly extend an SVE predicate; extend it first.
unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
EVT CastVT = getPromotedVTForPredicate(InVT);
In = DAG.getNode(CastOpc, dl, CastVT, In);
return DAG.getNode(Opc, dl, VT, In);
}
unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
: AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
return LowerToPredicatedOp(Op, DAG, Opcode);
}
if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT))
return LowerFixedLengthIntToFPToSVE(Op, DAG);
uint64_t VTSize = VT.getFixedSizeInBits();
uint64_t InVTSize = InVT.getFixedSizeInBits();
if (VTSize < InVTSize) {
MVT CastVT =
MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
InVT.getVectorNumElements());
if (IsStrict) {
In = DAG.getNode(Opc, dl, {CastVT, MVT::Other},
{Op.getOperand(0), In});
return DAG.getNode(
ISD::STRICT_FP_ROUND, dl, {VT, MVT::Other},
{In.getValue(1), In.getValue(0), DAG.getIntPtrConstant(0, dl)});
}
In = DAG.getNode(Opc, dl, CastVT, In);
return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
}
if (VTSize > InVTSize) {
unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
EVT CastVT = VT.changeVectorElementTypeToInteger();
In = DAG.getNode(CastOpc, dl, CastVT, In);
if (IsStrict)
return DAG.getNode(Opc, dl, {VT, MVT::Other}, {Op.getOperand(0), In});
return DAG.getNode(Opc, dl, VT, In);
}
// Use a scalar operation for conversions between single-element vectors of
// the same size.
if (VT.getVectorNumElements() == 1) {
SDValue Extract = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, dl, InVT.getScalarType(),
In, DAG.getConstant(0, dl, MVT::i64));
EVT ScalarVT = VT.getScalarType();
if (IsStrict)
return DAG.getNode(Op.getOpcode(), dl, {ScalarVT, MVT::Other},
{Op.getOperand(0), Extract});
return DAG.getNode(Op.getOpcode(), dl, ScalarVT, Extract);
}
return Op;
}
SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
if (Op.getValueType().isVector())
return LowerVectorINT_TO_FP(Op, DAG);
bool IsStrict = Op->isStrictFPOpcode();
SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
// f16 conversions are promoted to f32 when full fp16 is not supported.
if (Op.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
SDLoc dl(Op);
if (IsStrict) {
SDValue Val = DAG.getNode(Op.getOpcode(), dl, {MVT::f32, MVT::Other},
{Op.getOperand(0), SrcVal});
return DAG.getNode(
ISD::STRICT_FP_ROUND, dl, {MVT::f16, MVT::Other},
{Val.getValue(1), Val.getValue(0), DAG.getIntPtrConstant(0, dl)});
}
return DAG.getNode(
ISD::FP_ROUND, dl, MVT::f16,
DAG.getNode(Op.getOpcode(), dl, MVT::f32, SrcVal),
DAG.getIntPtrConstant(0, dl));
}
// i128 conversions are libcalls.
if (SrcVal.getValueType() == MVT::i128)
return SDValue();
// Other conversions are legal, unless it's to the completely software-based
// fp128.
if (Op.getValueType() != MVT::f128)
return Op;
return SDValue();
}
SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
SelectionDAG &DAG) const {
// For iOS, we want to call an alternative entry point: __sincos_stret,
// which returns the values in two S / D registers.
SDLoc dl(Op);
SDValue Arg = Op.getOperand(0);
EVT ArgVT = Arg.getValueType();
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
ArgListTy Args;
ArgListEntry Entry;
Entry.Node = Arg;
Entry.Ty = ArgTy;
Entry.IsSExt = false;
Entry.IsZExt = false;
Args.push_back(Entry);
RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
: RTLIB::SINCOS_STRET_F32;
const char *LibcallName = getLibcallName(LC);
SDValue Callee =
DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
StructType *RetTy = StructType::get(ArgTy, ArgTy);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)
.setChain(DAG.getEntryNode())
.setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.first;
}
static MVT getSVEContainerType(EVT ContentTy);
SDValue AArch64TargetLowering::LowerBITCAST(SDValue Op,
SelectionDAG &DAG) const {
EVT OpVT = Op.getValueType();
EVT ArgVT = Op.getOperand(0).getValueType();
if (useSVEForFixedLengthVectorVT(OpVT))
return LowerFixedLengthBitcastToSVE(Op, DAG);
if (OpVT.isScalableVector()) {
// Bitcasting between unpacked vector types of different element counts is
// not a NOP because the live elements are laid out differently.
// 01234567
// e.g. nxv2i32 = XX??XX??
// nxv4f16 = X?X?X?X?
if (OpVT.getVectorElementCount() != ArgVT.getVectorElementCount())
return SDValue();
if (isTypeLegal(OpVT) && !isTypeLegal(ArgVT)) {
assert(OpVT.isFloatingPoint() && !ArgVT.isFloatingPoint() &&
"Expected int->fp bitcast!");
SDValue ExtResult =
DAG.getNode(ISD::ANY_EXTEND, SDLoc(Op), getSVEContainerType(ArgVT),
Op.getOperand(0));
return getSVESafeBitCast(OpVT, ExtResult, DAG);
}
return getSVESafeBitCast(OpVT, Op.getOperand(0), DAG);
}
if (OpVT != MVT::f16 && OpVT != MVT::bf16)
return SDValue();
// Bitcasts between f16 and bf16 are legal.
if (ArgVT == MVT::f16 || ArgVT == MVT::bf16)
return Op;
assert(ArgVT == MVT::i16);
SDLoc DL(Op);
Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
return SDValue(
DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, OpVT, Op,
DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
0);
}
static EVT getExtensionTo64Bits(const EVT &OrigVT) {
if (OrigVT.getSizeInBits() >= 64)
return OrigVT;
assert(OrigVT.isSimple() && "Expecting a simple value type");
MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
switch (OrigSimpleTy) {
default: llvm_unreachable("Unexpected Vector Type");
case MVT::v2i8:
case MVT::v2i16:
return MVT::v2i32;
case MVT::v4i8:
return MVT::v4i16;
}
}
static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG,
const EVT &OrigTy,
const EVT &ExtTy,
unsigned ExtOpcode) {
// The vector originally had a size of OrigTy. It was then extended to ExtTy.
// We expect the ExtTy to be 128-bits total. If the OrigTy is less than
// 64-bits we need to insert a new extension so that it will be 64-bits.
assert(ExtTy.is128BitVector() && "Unexpected extension size");
if (OrigTy.getSizeInBits() >= 64)
return N;
// Must extend size to at least 64 bits to be used as an operand for VMULL.
EVT NewVT = getExtensionTo64Bits(OrigTy);
return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
}
static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
bool isSigned) {
EVT VT = N->getValueType(0);
if (N->getOpcode() != ISD::BUILD_VECTOR)
return false;
for (const SDValue &Elt : N->op_values()) {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
unsigned EltSize = VT.getScalarSizeInBits();
unsigned HalfSize = EltSize / 2;
if (isSigned) {
if (!isIntN(HalfSize, C->getSExtValue()))
return false;
} else {
if (!isUIntN(HalfSize, C->getZExtValue()))
return false;
}
continue;
}
return false;
}
return true;
}
static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
if (N->getOpcode() == ISD::SIGN_EXTEND ||
N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
N->getOperand(0)->getValueType(0),
N->getValueType(0),
N->getOpcode());
assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
EVT VT = N->getValueType(0);
SDLoc dl(N);
unsigned EltSize = VT.getScalarSizeInBits() / 2;
unsigned NumElts = VT.getVectorNumElements();
MVT TruncVT = MVT::getIntegerVT(EltSize);
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i != NumElts; ++i) {
ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
const APInt &CInt = C->getAPIntValue();
// Element types smaller than 32 bits are not legal, so use i32 elements.
// The values are implicitly truncated so sext vs. zext doesn't matter.
Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
}
return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
}
static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
return N->getOpcode() == ISD::SIGN_EXTEND ||
N->getOpcode() == ISD::ANY_EXTEND ||
isExtendedBUILD_VECTOR(N, DAG, true);
}
static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
return N->getOpcode() == ISD::ZERO_EXTEND ||
N->getOpcode() == ISD::ANY_EXTEND ||
isExtendedBUILD_VECTOR(N, DAG, false);
}
static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
unsigned Opcode = N->getOpcode();
if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
SDNode *N0 = N->getOperand(0).getNode();
SDNode *N1 = N->getOperand(1).getNode();
return N0->hasOneUse() && N1->hasOneUse() &&
isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
}
return false;
}
static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
unsigned Opcode = N->getOpcode();
if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
SDNode *N0 = N->getOperand(0).getNode();
SDNode *N1 = N->getOperand(1).getNode();
return N0->hasOneUse() && N1->hasOneUse() &&
isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
}
return false;
}
SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
SelectionDAG &DAG) const {
// The rounding mode is in bits 23:22 of the FPSCR.
// The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
// The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
// so that the shift + and get folded into a bitfield extract.
SDLoc dl(Op);
SDValue Chain = Op.getOperand(0);
SDValue FPCR_64 = DAG.getNode(
ISD::INTRINSIC_W_CHAIN, dl, {MVT::i64, MVT::Other},
{Chain, DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl, MVT::i64)});
Chain = FPCR_64.getValue(1);
SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64);
SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32,
DAG.getConstant(1U << 22, dl, MVT::i32));
SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
DAG.getConstant(22, dl, MVT::i32));
SDValue AND = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
DAG.getConstant(3, dl, MVT::i32));
return DAG.getMergeValues({AND, Chain}, dl);
}
SDValue AArch64TargetLowering::LowerSET_ROUNDING(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
SDValue Chain = Op->getOperand(0);
SDValue RMValue = Op->getOperand(1);
// The rounding mode is in bits 23:22 of the FPCR.
// The llvm.set.rounding argument value to the rounding mode in FPCR mapping
// is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
// ((arg - 1) & 3) << 22).
//
// The argument of llvm.set.rounding must be within the segment [0, 3], so
// NearestTiesToAway (4) is not handled here. It is responsibility of the code
// generated llvm.set.rounding to ensure this condition.
// Calculate new value of FPCR[23:22].
RMValue = DAG.getNode(ISD::SUB, DL, MVT::i32, RMValue,
DAG.getConstant(1, DL, MVT::i32));
RMValue = DAG.getNode(ISD::AND, DL, MVT::i32, RMValue,
DAG.getConstant(0x3, DL, MVT::i32));
RMValue =
DAG.getNode(ISD::SHL, DL, MVT::i32, RMValue,
DAG.getConstant(AArch64::RoundingBitsPos, DL, MVT::i32));
RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, RMValue);
// Get current value of FPCR.
SDValue Ops[] = {
Chain, DAG.getTargetConstant(Intrinsic::aarch64_get_fpcr, DL, MVT::i64)};
SDValue FPCR =
DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i64, MVT::Other}, Ops);
Chain = FPCR.getValue(1);
FPCR = FPCR.getValue(0);
// Put new rounding mode into FPSCR[23:22].
const int RMMask = ~(AArch64::Rounding::rmMask << AArch64::RoundingBitsPos);
FPCR = DAG.getNode(ISD::AND, DL, MVT::i64, FPCR,
DAG.getConstant(RMMask, DL, MVT::i64));
FPCR = DAG.getNode(ISD::OR, DL, MVT::i64, FPCR, RMValue);
SDValue Ops2[] = {
Chain, DAG.getTargetConstant(Intrinsic::aarch64_set_fpcr, DL, MVT::i64),
FPCR};
return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
}
SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
// If SVE is available then i64 vector multiplications can also be made legal.
bool OverrideNEON = VT == MVT::v2i64 || VT == MVT::v1i64;
if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, OverrideNEON))
return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED);
// Multiplications are only custom-lowered for 128-bit vectors so that
// VMULL can be detected. Otherwise v2i64 multiplications are not legal.
assert(VT.is128BitVector() && VT.isInteger() &&
"unexpected type for custom-lowering ISD::MUL");
SDNode *N0 = Op.getOperand(0).getNode();
SDNode *N1 = Op.getOperand(1).getNode();
unsigned NewOpc = 0;
bool isMLA = false;
bool isN0SExt = isSignExtended(N0, DAG);
bool isN1SExt = isSignExtended(N1, DAG);
if (isN0SExt && isN1SExt)
NewOpc = AArch64ISD::SMULL;
else {
bool isN0ZExt = isZeroExtended(N0, DAG);
bool isN1ZExt = isZeroExtended(N1, DAG);
if (isN0ZExt && isN1ZExt)
NewOpc = AArch64ISD::UMULL;
else if (isN1SExt || isN1ZExt) {
// Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
// into (s/zext A * s/zext C) + (s/zext B * s/zext C)
if (isN1SExt && isAddSubSExt(N0, DAG)) {
NewOpc = AArch64ISD::SMULL;
isMLA = true;
} else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
NewOpc = AArch64ISD::UMULL;
isMLA = true;
} else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
std::swap(N0, N1);
NewOpc = AArch64ISD::UMULL;
isMLA = true;
}
}
if (!NewOpc) {
if (VT == MVT::v2i64)
// Fall through to expand this. It is not legal.
return SDValue();
else
// Other vector multiplications are legal.
return Op;
}
}
// Legalize to a S/UMULL instruction
SDLoc DL(Op);
SDValue Op0;
SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
if (!isMLA) {
Op0 = skipExtensionForVectorMULL(N0, DAG);
assert(Op0.getValueType().is64BitVector() &&
Op1.getValueType().is64BitVector() &&
"unexpected types for extended operands to VMULL");
return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
}
// Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
// isel lowering to take advantage of no-stall back to back s/umul + s/umla.
// This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG);
SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG);
EVT Op1VT = Op1.getValueType();
return DAG.getNode(N0->getOpcode(), DL, VT,
DAG.getNode(NewOpc, DL, VT,
DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
DAG.getNode(NewOpc, DL, VT,
DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
}
static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,
int Pattern) {
if (VT == MVT::nxv1i1 && Pattern == AArch64SVEPredPattern::all)
return DAG.getConstant(1, DL, MVT::nxv1i1);
return DAG.getNode(AArch64ISD::PTRUE, DL, VT,
DAG.getTargetConstant(Pattern, DL, MVT::i32));
}
// Returns a safe bitcast between two scalable vector predicates, where
// any newly created lanes from a widening bitcast are defined as zero.
static SDValue getSVEPredicateBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
EVT InVT = Op.getValueType();
assert(InVT.getVectorElementType() == MVT::i1 &&
VT.getVectorElementType() == MVT::i1 &&
"Expected a predicate-to-predicate bitcast");
assert(VT.isScalableVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
InVT.isScalableVector() &&
DAG.getTargetLoweringInfo().isTypeLegal(InVT) &&
"Only expect to cast between legal scalable predicate types!");
// Return the operand if the cast isn't changing type,
// e.g. <n x 16 x i1> -> <n x 16 x i1>
if (InVT == VT)
return Op;
SDValue Reinterpret = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op);
// We only have to zero the lanes if new lanes are being defined, e.g. when
// casting from <vscale x 2 x i1> to <vscale x 16 x i1>. If this is not the
// case (e.g. when casting from <vscale x 16 x i1> -> <vscale x 2 x i1>) then
// we can return here.
if (InVT.bitsGT(VT))
return Reinterpret;
// Check if the other lanes are already known to be zeroed by
// construction.
if (isZeroingInactiveLanes(Op))
return Reinterpret;
// Zero the newly introduced lanes.
SDValue Mask = DAG.getConstant(1, DL, InVT);
Mask = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Mask);
return DAG.getNode(ISD::AND, DL, VT, Reinterpret, Mask);
}
SDValue AArch64TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntNo = Op.getConstantOperandVal(1);
SDLoc DL(Op);
switch (IntNo) {
default:
return SDValue(); // Don't custom lower most intrinsics.
case Intrinsic::aarch64_mops_memset_tag: {
auto Node = cast<MemIntrinsicSDNode>(Op.getNode());
SDValue Chain = Node->getChain();
SDValue Dst = Op.getOperand(2);
SDValue Val = Op.getOperand(3);
Val = DAG.getAnyExtOrTrunc(Val, DL, MVT::i64);
SDValue Size = Op.getOperand(4);
auto Alignment = Node->getMemOperand()->getAlign();
bool IsVol = Node->isVolatile();
auto DstPtrInfo = Node->getPointerInfo();
const auto &SDI =
static_cast<const AArch64SelectionDAGInfo &>(DAG.getSelectionDAGInfo());
SDValue MS =
SDI.EmitMOPS(AArch64ISD::MOPS_MEMSET_TAGGING, DAG, DL, Chain, Dst, Val,
Size, Alignment, IsVol, DstPtrInfo, MachinePointerInfo{});
// MOPS_MEMSET_TAGGING has 3 results (DstWb, SizeWb, Chain) whereas the
// intrinsic has 2. So hide SizeWb using MERGE_VALUES. Otherwise
// LowerOperationWrapper will complain that the number of results has
// changed.
return DAG.getMergeValues({MS.getValue(0), MS.getValue(2)}, DL);
}
case Intrinsic::aarch64_sme_get_pstatesm: {
SDValue Chain = Op.getOperand(0);
SDValue MRS = DAG.getNode(
AArch64ISD::MRS, DL, DAG.getVTList(MVT::i64, MVT::Glue, MVT::Other),
Chain, DAG.getConstant(AArch64SysReg::SVCR, DL, MVT::i64));
SDValue Mask = DAG.getConstant(/* PSTATE.SM */ 1, DL, MVT::i64);
SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, MRS, Mask);
return DAG.getMergeValues({And, Chain}, DL);
}
}
}
SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
SDLoc dl(Op);
switch (IntNo) {
default: return SDValue(); // Don't custom lower most intrinsics.
case Intrinsic::thread_pointer: {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
}
case Intrinsic::aarch64_neon_abs: {
EVT Ty = Op.getValueType();
if (Ty == MVT::i64) {
SDValue Result = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64,
Op.getOperand(1));
Result = DAG.getNode(ISD::ABS, dl, MVT::v1i64, Result);
return DAG.getNode(ISD::BITCAST, dl, MVT::i64, Result);
} else if (Ty.isVector() && Ty.isInteger() && isTypeLegal(Ty)) {
return DAG.getNode(ISD::ABS, dl, Ty, Op.getOperand(1));
} else {
report_fatal_error("Unexpected type for AArch64 NEON intrinic");
}
}
case Intrinsic::aarch64_neon_smax:
return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_neon_umax:
return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_neon_smin:
return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_neon_umin:
return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_sunpkhi:
return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(),
Op.getOperand(1));
case Intrinsic::aarch64_sve_sunpklo:
return DAG.getNode(AArch64ISD::SUNPKLO, dl, Op.getValueType(),
Op.getOperand(1));
case Intrinsic::aarch64_sve_uunpkhi:
return DAG.getNode(AArch64ISD::UUNPKHI, dl, Op.getValueType(),
Op.getOperand(1));
case Intrinsic::aarch64_sve_uunpklo:
return DAG.getNode(AArch64ISD::UUNPKLO, dl, Op.getValueType(),
Op.getOperand(1));
case Intrinsic::aarch64_sve_clasta_n:
return DAG.getNode(AArch64ISD::CLASTA_N, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
case Intrinsic::aarch64_sve_clastb_n:
return DAG.getNode(AArch64ISD::CLASTB_N, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
case Intrinsic::aarch64_sve_lasta:
return DAG.getNode(AArch64ISD::LASTA, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_lastb:
return DAG.getNode(AArch64ISD::LASTB, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_rev:
return DAG.getNode(ISD::VECTOR_REVERSE, dl, Op.getValueType(),
Op.getOperand(1));
case Intrinsic::aarch64_sve_tbl:
return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_trn1:
return DAG.getNode(AArch64ISD::TRN1, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_trn2:
return DAG.getNode(AArch64ISD::TRN2, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_uzp1:
return DAG.getNode(AArch64ISD::UZP1, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_uzp2:
return DAG.getNode(AArch64ISD::UZP2, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_zip1:
return DAG.getNode(AArch64ISD::ZIP1, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_zip2:
return DAG.getNode(AArch64ISD::ZIP2, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_splice:
return DAG.getNode(AArch64ISD::SPLICE, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
case Intrinsic::aarch64_sve_ptrue:
return getPTrue(DAG, dl, Op.getValueType(),
cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
case Intrinsic::aarch64_sve_clz:
return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sme_cntsb:
return DAG.getNode(AArch64ISD::RDSVL, dl, Op.getValueType(),
DAG.getConstant(1, dl, MVT::i32));
case Intrinsic::aarch64_sme_cntsh: {
SDValue One = DAG.getConstant(1, dl, MVT::i32);
SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, dl, Op.getValueType(), One);
return DAG.getNode(ISD::SRL, dl, Op.getValueType(), Bytes, One);
}
case Intrinsic::aarch64_sme_cntsw: {
SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, dl, Op.getValueType(),
DAG.getConstant(1, dl, MVT::i32));
return DAG.getNode(ISD::SRL, dl, Op.getValueType(), Bytes,
DAG.getConstant(2, dl, MVT::i32));
}
case Intrinsic::aarch64_sme_cntsd: {
SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, dl, Op.getValueType(),
DAG.getConstant(1, dl, MVT::i32));
return DAG.getNode(ISD::SRL, dl, Op.getValueType(), Bytes,
DAG.getConstant(3, dl, MVT::i32));
}
case Intrinsic::aarch64_sve_cnt: {
SDValue Data = Op.getOperand(3);
// CTPOP only supports integer operands.
if (Data.getValueType().isFloatingPoint())
Data = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Data);
return DAG.getNode(AArch64ISD::CTPOP_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Data, Op.getOperand(1));
}
case Intrinsic::aarch64_sve_dupq_lane:
return LowerDUPQLane(Op, DAG);
case Intrinsic::aarch64_sve_convert_from_svbool:
return getSVEPredicateBitCast(Op.getValueType(), Op.getOperand(1), DAG);
case Intrinsic::aarch64_sve_convert_to_svbool:
return getSVEPredicateBitCast(MVT::nxv16i1, Op.getOperand(1), DAG);
case Intrinsic::aarch64_sve_fneg:
return DAG.getNode(AArch64ISD::FNEG_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_frintp:
return DAG.getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_frintm:
return DAG.getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_frinti:
return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_frintx:
return DAG.getNode(AArch64ISD::FRINT_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_frinta:
return DAG.getNode(AArch64ISD::FROUND_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_frintn:
return DAG.getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_frintz:
return DAG.getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_ucvtf:
return DAG.getNode(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU, dl,
Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
Op.getOperand(1));
case Intrinsic::aarch64_sve_scvtf:
return DAG.getNode(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU, dl,
Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
Op.getOperand(1));
case Intrinsic::aarch64_sve_fcvtzu:
return DAG.getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU, dl,
Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
Op.getOperand(1));
case Intrinsic::aarch64_sve_fcvtzs:
return DAG.getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU, dl,
Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
Op.getOperand(1));
case Intrinsic::aarch64_sve_fsqrt:
return DAG.getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_frecpx:
return DAG.getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_frecpe_x:
return DAG.getNode(AArch64ISD::FRECPE, dl, Op.getValueType(),
Op.getOperand(1));
case Intrinsic::aarch64_sve_frecps_x:
return DAG.getNode(AArch64ISD::FRECPS, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_frsqrte_x:
return DAG.getNode(AArch64ISD::FRSQRTE, dl, Op.getValueType(),
Op.getOperand(1));
case Intrinsic::aarch64_sve_frsqrts_x:
return DAG.getNode(AArch64ISD::FRSQRTS, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_fabs:
return DAG.getNode(AArch64ISD::FABS_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_abs:
return DAG.getNode(AArch64ISD::ABS_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_neg:
return DAG.getNode(AArch64ISD::NEG_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_insr: {
SDValue Scalar = Op.getOperand(2);
EVT ScalarTy = Scalar.getValueType();
if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);
return DAG.getNode(AArch64ISD::INSR, dl, Op.getValueType(),
Op.getOperand(1), Scalar);
}
case Intrinsic::aarch64_sve_rbit:
return DAG.getNode(AArch64ISD::BITREVERSE_MERGE_PASSTHRU, dl,
Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
Op.getOperand(1));
case Intrinsic::aarch64_sve_revb:
return DAG.getNode(AArch64ISD::BSWAP_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_revh:
return DAG.getNode(AArch64ISD::REVH_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_revw:
return DAG.getNode(AArch64ISD::REVW_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_revd:
return DAG.getNode(AArch64ISD::REVD_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_sxtb:
return DAG.getNode(
AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3),
DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
Op.getOperand(1));
case Intrinsic::aarch64_sve_sxth:
return DAG.getNode(
AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3),
DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
Op.getOperand(1));
case Intrinsic::aarch64_sve_sxtw:
return DAG.getNode(
AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3),
DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
Op.getOperand(1));
case Intrinsic::aarch64_sve_uxtb:
return DAG.getNode(
AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3),
DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
Op.getOperand(1));
case Intrinsic::aarch64_sve_uxth:
return DAG.getNode(
AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3),
DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
Op.getOperand(1));
case Intrinsic::aarch64_sve_uxtw:
return DAG.getNode(
AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3),
DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
Op.getOperand(1));
case Intrinsic::localaddress: {
const auto &MF = DAG.getMachineFunction();
const auto *RegInfo = Subtarget->getRegisterInfo();
unsigned Reg = RegInfo->getLocalAddressRegister(MF);
return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg,
Op.getSimpleValueType());
}
case Intrinsic::eh_recoverfp: {
// FIXME: This needs to be implemented to correctly handle highly aligned
// stack objects. For now we simply return the incoming FP. Refer D53541
// for more details.
SDValue FnOp = Op.getOperand(1);
SDValue IncomingFPOp = Op.getOperand(2);
GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
if (!Fn)
report_fatal_error(
"llvm.eh.recoverfp must take a function as the first argument");
return IncomingFPOp;
}
case Intrinsic::aarch64_neon_vsri:
case Intrinsic::aarch64_neon_vsli: {
EVT Ty = Op.getValueType();
if (!Ty.isVector())
report_fatal_error("Unexpected type for aarch64_neon_vsli");
assert(Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits());
bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri;
unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
return DAG.getNode(Opcode, dl, Ty, Op.getOperand(1), Op.getOperand(2),
Op.getOperand(3));
}
case Intrinsic::aarch64_neon_srhadd:
case Intrinsic::aarch64_neon_urhadd:
case Intrinsic::aarch64_neon_shadd:
case Intrinsic::aarch64_neon_uhadd: {
bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
IntNo == Intrinsic::aarch64_neon_shadd);
bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
IntNo == Intrinsic::aarch64_neon_urhadd);
unsigned Opcode = IsSignedAdd
? (IsRoundingAdd ? ISD::AVGCEILS : ISD::AVGFLOORS)
: (IsRoundingAdd ? ISD::AVGCEILU : ISD::AVGFLOORU);
return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2));
}
case Intrinsic::aarch64_neon_sabd:
case Intrinsic::aarch64_neon_uabd: {
unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uabd ? ISD::ABDU
: ISD::ABDS;
return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2));
}
case Intrinsic::aarch64_neon_saddlp:
case Intrinsic::aarch64_neon_uaddlp: {
unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uaddlp
? AArch64ISD::UADDLP
: AArch64ISD::SADDLP;
return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1));
}
case Intrinsic::aarch64_neon_sdot:
case Intrinsic::aarch64_neon_udot:
case Intrinsic::aarch64_sve_sdot:
case Intrinsic::aarch64_sve_udot: {
unsigned Opcode = (IntNo == Intrinsic::aarch64_neon_udot ||
IntNo == Intrinsic::aarch64_sve_udot)
? AArch64ISD::UDOT
: AArch64ISD::SDOT;
return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3));
}
case Intrinsic::get_active_lane_mask: {
SDValue ID =
DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, dl, MVT::i64);
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(), ID,
Op.getOperand(1), Op.getOperand(2));
}
}
}
bool AArch64TargetLowering::shouldExtendGSIndex(EVT VT, EVT &EltTy) const {
if (VT.getVectorElementType() == MVT::i8 ||
VT.getVectorElementType() == MVT::i16) {
EltTy = MVT::i32;
return true;
}
return false;
}
bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT IndexVT,
EVT DataVT) const {
// SVE only supports implicit extension of 32-bit indices.
if (!Subtarget->hasSVE() || IndexVT.getVectorElementType() != MVT::i32)
return false;
// Indices cannot be smaller than the main data type.
if (IndexVT.getScalarSizeInBits() < DataVT.getScalarSizeInBits())
return false;
// Scalable vectors with "vscale * 2" or fewer elements sit within a 64-bit
// element container type, which would violate the previous clause.
return DataVT.isFixedLengthVector() || DataVT.getVectorMinNumElements() > 2;
}
bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
return ExtVal.getValueType().isScalableVector() ||
useSVEForFixedLengthVectorVT(
ExtVal.getValueType(),
/*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors());
}
unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
{std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
AArch64ISD::GLD1_MERGE_ZERO},
{std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
AArch64ISD::GLD1_UXTW_MERGE_ZERO},
{std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
AArch64ISD::GLD1_MERGE_ZERO},
{std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
AArch64ISD::GLD1_SXTW_MERGE_ZERO},
{std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
AArch64ISD::GLD1_SCALED_MERGE_ZERO},
{std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO},
{std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
AArch64ISD::GLD1_SCALED_MERGE_ZERO},
{std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO},
};
auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
return AddrModes.find(Key)->second;
}
unsigned getSignExtendedGatherOpcode(unsigned Opcode) {
switch (Opcode) {
default:
llvm_unreachable("unimplemented opcode");
return Opcode;
case AArch64ISD::GLD1_MERGE_ZERO:
return AArch64ISD::GLD1S_MERGE_ZERO;
case AArch64ISD::GLD1_IMM_MERGE_ZERO:
return AArch64ISD::GLD1S_IMM_MERGE_ZERO;
case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
return AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
return AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
return AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
return AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
return AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
}
}
SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
SelectionDAG &DAG) const {
MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(Op);
SDLoc DL(Op);
SDValue Chain = MGT->getChain();
SDValue PassThru = MGT->getPassThru();
SDValue Mask = MGT->getMask();
SDValue BasePtr = MGT->getBasePtr();
SDValue Index = MGT->getIndex();
SDValue Scale = MGT->getScale();
EVT VT = Op.getValueType();
EVT MemVT = MGT->getMemoryVT();
ISD::LoadExtType ExtType = MGT->getExtensionType();
ISD::MemIndexType IndexType = MGT->getIndexType();
// SVE supports zero (and so undef) passthrough values only, everything else
// must be handled manually by an explicit select on the load's output.
if (!PassThru->isUndef() && !isZerosVector(PassThru.getNode())) {
SDValue Ops[] = {Chain, DAG.getUNDEF(VT), Mask, BasePtr, Index, Scale};
SDValue Load =
DAG.getMaskedGather(MGT->getVTList(), MemVT, DL, Ops,
MGT->getMemOperand(), IndexType, ExtType);
SDValue Select = DAG.getSelect(DL, VT, Mask, Load, PassThru);
return DAG.getMergeValues({Select, Load.getValue(1)}, DL);
}
bool IsScaled = MGT->isIndexScaled();
bool IsSigned = MGT->isIndexSigned();
// SVE supports an index scaled by sizeof(MemVT.elt) only, everything else
// must be calculated before hand.
uint64_t ScaleVal = cast<ConstantSDNode>(Scale)->getZExtValue();
if (IsScaled && ScaleVal != MemVT.getScalarStoreSize()) {
assert(isPowerOf2_64(ScaleVal) && "Expecting power-of-two types");
EVT IndexVT = Index.getValueType();
Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index,
DAG.getConstant(Log2_32(ScaleVal), DL, IndexVT));
Scale = DAG.getTargetConstant(1, DL, Scale.getValueType());
SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
return DAG.getMaskedGather(MGT->getVTList(), MemVT, DL, Ops,
MGT->getMemOperand(), IndexType, ExtType);
}
// Lower fixed length gather to a scalable equivalent.
if (VT.isFixedLengthVector()) {
assert(Subtarget->useSVEForFixedLengthVectors() &&
"Cannot lower when not using SVE for fixed vectors!");
// NOTE: Handle floating-point as if integer then bitcast the result.
EVT DataVT = VT.changeVectorElementTypeToInteger();
MemVT = MemVT.changeVectorElementTypeToInteger();
// Find the smallest integer fixed length vector we can use for the gather.
EVT PromotedVT = VT.changeVectorElementType(MVT::i32);
if (DataVT.getVectorElementType() == MVT::i64 ||
Index.getValueType().getVectorElementType() == MVT::i64 ||
Mask.getValueType().getVectorElementType() == MVT::i64)
PromotedVT = VT.changeVectorElementType(MVT::i64);
// Promote vector operands except for passthrough, which we know is either
// undef or zero, and thus best constructed directly.
unsigned ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
Index = DAG.getNode(ExtOpcode, DL, PromotedVT, Index);
Mask = DAG.getNode(ISD::SIGN_EXTEND, DL, PromotedVT, Mask);
// A promoted result type forces the need for an extending load.
if (PromotedVT != DataVT && ExtType == ISD::NON_EXTLOAD)
ExtType = ISD::EXTLOAD;
EVT ContainerVT = getContainerForFixedLengthVector(DAG, PromotedVT);
// Convert fixed length vector operands to scalable.
MemVT = ContainerVT.changeVectorElementType(MemVT.getVectorElementType());
Index = convertToScalableVector(DAG, ContainerVT, Index);
Mask = convertFixedMaskToScalableVector(Mask, DAG);
PassThru = PassThru->isUndef() ? DAG.getUNDEF(ContainerVT)
: DAG.getConstant(0, DL, ContainerVT);
// Emit equivalent scalable vector gather.
SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
SDValue Load =
DAG.getMaskedGather(DAG.getVTList(ContainerVT, MVT::Other), MemVT, DL,
Ops, MGT->getMemOperand(), IndexType, ExtType);
// Extract fixed length data then convert to the required result type.
SDValue Result = convertFromScalableVector(DAG, PromotedVT, Load);
Result = DAG.getNode(ISD::TRUNCATE, DL, DataVT, Result);
if (VT.isFloatingPoint())
Result = DAG.getNode(ISD::BITCAST, DL, VT, Result);
return DAG.getMergeValues({Result, Load.getValue(1)}, DL);
}
// Everything else is legal.
return Op;
}
SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
SelectionDAG &DAG) const {
MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(Op);
SDLoc DL(Op);
SDValue Chain = MSC->getChain();
SDValue StoreVal = MSC->getValue();
SDValue Mask = MSC->getMask();
SDValue BasePtr = MSC->getBasePtr();
SDValue Index = MSC->getIndex();
SDValue Scale = MSC->getScale();
EVT VT = StoreVal.getValueType();
EVT MemVT = MSC->getMemoryVT();
ISD::MemIndexType IndexType = MSC->getIndexType();
bool Truncating = MSC->isTruncatingStore();
bool IsScaled = MSC->isIndexScaled();
bool IsSigned = MSC->isIndexSigned();
// SVE supports an index scaled by sizeof(MemVT.elt) only, everything else
// must be calculated before hand.
uint64_t ScaleVal = cast<ConstantSDNode>(Scale)->getZExtValue();
if (IsScaled && ScaleVal != MemVT.getScalarStoreSize()) {
assert(isPowerOf2_64(ScaleVal) && "Expecting power-of-two types");
EVT IndexVT = Index.getValueType();
Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index,
DAG.getConstant(Log2_32(ScaleVal), DL, IndexVT));
Scale = DAG.getTargetConstant(1, DL, Scale.getValueType());
SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
return DAG.getMaskedScatter(MSC->getVTList(), MemVT, DL, Ops,
MSC->getMemOperand(), IndexType, Truncating);
}
// Lower fixed length scatter to a scalable equivalent.
if (VT.isFixedLengthVector()) {
assert(Subtarget->useSVEForFixedLengthVectors() &&
"Cannot lower when not using SVE for fixed vectors!");
// Once bitcast we treat floating-point scatters as if integer.
if (VT.isFloatingPoint()) {
VT = VT.changeVectorElementTypeToInteger();
MemVT = MemVT.changeVectorElementTypeToInteger();
StoreVal = DAG.getNode(ISD::BITCAST, DL, VT, StoreVal);
}
// Find the smallest integer fixed length vector we can use for the scatter.
EVT PromotedVT = VT.changeVectorElementType(MVT::i32);
if (VT.getVectorElementType() == MVT::i64 ||
Index.getValueType().getVectorElementType() == MVT::i64 ||
Mask.getValueType().getVectorElementType() == MVT::i64)
PromotedVT = VT.changeVectorElementType(MVT::i64);
// Promote vector operands.
unsigned ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
Index = DAG.getNode(ExtOpcode, DL, PromotedVT, Index);
Mask = DAG.getNode(ISD::SIGN_EXTEND, DL, PromotedVT, Mask);
StoreVal = DAG.getNode(ISD::ANY_EXTEND, DL, PromotedVT, StoreVal);
// A promoted value type forces the need for a truncating store.
if (PromotedVT != VT)
Truncating = true;
EVT ContainerVT = getContainerForFixedLengthVector(DAG, PromotedVT);
// Convert fixed length vector operands to scalable.
MemVT = ContainerVT.changeVectorElementType(MemVT.getVectorElementType());
Index = convertToScalableVector(DAG, ContainerVT, Index);
Mask = convertFixedMaskToScalableVector(Mask, DAG);
StoreVal = convertToScalableVector(DAG, ContainerVT, StoreVal);
// Emit equivalent scalable vector scatter.
SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
return DAG.getMaskedScatter(MSC->getVTList(), MemVT, DL, Ops,
MSC->getMemOperand(), IndexType, Truncating);
}
// Everything else is legal.
return Op;
}
SDValue AArch64TargetLowering::LowerMLOAD(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
MaskedLoadSDNode *LoadNode = cast<MaskedLoadSDNode>(Op);
assert(LoadNode && "Expected custom lowering of a masked load node");
EVT VT = Op->getValueType(0);
if (useSVEForFixedLengthVectorVT(
VT,
/*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors()))
return LowerFixedLengthVectorMLoadToSVE(Op, DAG);
SDValue PassThru = LoadNode->getPassThru();
SDValue Mask = LoadNode->getMask();
if (PassThru->isUndef() || isZerosVector(PassThru.getNode()))
return Op;
SDValue Load = DAG.getMaskedLoad(
VT, DL, LoadNode->getChain(), LoadNode->getBasePtr(),
LoadNode->getOffset(), Mask, DAG.getUNDEF(VT), LoadNode->getMemoryVT(),
LoadNode->getMemOperand(), LoadNode->getAddressingMode(),
LoadNode->getExtensionType());
SDValue Result = DAG.getSelect(DL, VT, Mask, Load, PassThru);
return DAG.getMergeValues({Result, Load.getValue(1)}, DL);
}
// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
EVT VT, EVT MemVT,
SelectionDAG &DAG) {
assert(VT.isVector() && "VT should be a vector type");
assert(MemVT == MVT::v4i8 && VT == MVT::v4i16);
SDValue Value = ST->getValue();
// It first extend the promoted v4i16 to v8i16, truncate to v8i8, and extract
// the word lane which represent the v4i8 subvector. It optimizes the store
// to:
//
// xtn v0.8b, v0.8h
// str s0, [x0]
SDValue Undef = DAG.getUNDEF(MVT::i16);
SDValue UndefVec = DAG.getBuildVector(MVT::v4i16, DL,
{Undef, Undef, Undef, Undef});
SDValue TruncExt = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16,
Value, UndefVec);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, TruncExt);
Trunc = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Trunc);
SDValue ExtractTrunc = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
Trunc, DAG.getConstant(0, DL, MVT::i64));
return DAG.getStore(ST->getChain(), DL, ExtractTrunc,
ST->getBasePtr(), ST->getMemOperand());
}
// Custom lowering for any store, vector or scalar and/or default or with
// a truncate operations. Currently only custom lower truncate operation
// from vector v4i16 to v4i8 or volatile stores of i128.
SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc Dl(Op);
StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
assert (StoreNode && "Can only custom lower store nodes");
SDValue Value = StoreNode->getValue();
EVT VT = Value.getValueType();
EVT MemVT = StoreNode->getMemoryVT();
if (VT.isVector()) {
if (useSVEForFixedLengthVectorVT(
VT,
/*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors()))
return LowerFixedLengthVectorStoreToSVE(Op, DAG);
unsigned AS = StoreNode->getAddressSpace();
Align Alignment = StoreNode->getAlign();
if (Alignment < MemVT.getStoreSize() &&
!allowsMisalignedMemoryAccesses(MemVT, AS, Alignment,
StoreNode->getMemOperand()->getFlags(),
nullptr)) {
return scalarizeVectorStore(StoreNode, DAG);
}
if (StoreNode->isTruncatingStore() && VT == MVT::v4i16 &&
MemVT == MVT::v4i8) {
return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
}
// 256 bit non-temporal stores can be lowered to STNP. Do this as part of
// the custom lowering, as there are no un-paired non-temporal stores and
// legalization will break up 256 bit inputs.
ElementCount EC = MemVT.getVectorElementCount();
if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u &&
EC.isKnownEven() &&
((MemVT.getScalarSizeInBits() == 8u ||
MemVT.getScalarSizeInBits() == 16u ||
MemVT.getScalarSizeInBits() == 32u ||
MemVT.getScalarSizeInBits() == 64u))) {
SDValue Lo =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
StoreNode->getValue(), DAG.getConstant(0, Dl, MVT::i64));
SDValue Hi =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
StoreNode->getValue(),
DAG.getConstant(EC.getKnownMinValue() / 2, Dl, MVT::i64));
SDValue Result = DAG.getMemIntrinsicNode(
AArch64ISD::STNP, Dl, DAG.getVTList(MVT::Other),
{StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
StoreNode->getMemoryVT(), StoreNode->getMemOperand());
return Result;
}
} else if (MemVT == MVT::i128 && StoreNode->isVolatile()) {
return LowerStore128(Op, DAG);
} else if (MemVT == MVT::i64x8) {
SDValue Value = StoreNode->getValue();
assert(Value->getValueType(0) == MVT::i64x8);
SDValue Chain = StoreNode->getChain();
SDValue Base = StoreNode->getBasePtr();
EVT PtrVT = Base.getValueType();
for (unsigned i = 0; i < 8; i++) {
SDValue Part = DAG.getNode(AArch64ISD::LS64_EXTRACT, Dl, MVT::i64,
Value, DAG.getConstant(i, Dl, MVT::i32));
SDValue Ptr = DAG.getNode(ISD::ADD, Dl, PtrVT, Base,
DAG.getConstant(i * 8, Dl, PtrVT));
Chain = DAG.getStore(Chain, Dl, Part, Ptr, StoreNode->getPointerInfo(),
StoreNode->getOriginalAlign());
}
return Chain;
}
return SDValue();
}
/// Lower atomic or volatile 128-bit stores to a single STP instruction.
SDValue AArch64TargetLowering::LowerStore128(SDValue Op,
SelectionDAG &DAG) const {
MemSDNode *StoreNode = cast<MemSDNode>(Op);
assert(StoreNode->getMemoryVT() == MVT::i128);
assert(StoreNode->isVolatile() || StoreNode->isAtomic());
assert(!StoreNode->isAtomic() ||
StoreNode->getMergedOrdering() == AtomicOrdering::Unordered ||
StoreNode->getMergedOrdering() == AtomicOrdering::Monotonic);
SDValue Value = StoreNode->getOpcode() == ISD::STORE
? StoreNode->getOperand(1)
: StoreNode->getOperand(2);
SDLoc DL(Op);
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, Value,
DAG.getConstant(0, DL, MVT::i64));
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, Value,
DAG.getConstant(1, DL, MVT::i64));
SDValue Result = DAG.getMemIntrinsicNode(
AArch64ISD::STP, DL, DAG.getVTList(MVT::Other),
{StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
StoreNode->getMemoryVT(), StoreNode->getMemOperand());
return Result;
}
SDValue AArch64TargetLowering::LowerLOAD(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
assert(LoadNode && "Expected custom lowering of a load node");
if (LoadNode->getMemoryVT() == MVT::i64x8) {
SmallVector<SDValue, 8> Ops;
SDValue Base = LoadNode->getBasePtr();
SDValue Chain = LoadNode->getChain();
EVT PtrVT = Base.getValueType();
for (unsigned i = 0; i < 8; i++) {
SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Base,
DAG.getConstant(i * 8, DL, PtrVT));
SDValue Part = DAG.getLoad(MVT::i64, DL, Chain, Ptr,
LoadNode->getPointerInfo(),
LoadNode->getOriginalAlign());
Ops.push_back(Part);
Chain = SDValue(Part.getNode(), 1);
}
SDValue Loaded = DAG.getNode(AArch64ISD::LS64_BUILD, DL, MVT::i64x8, Ops);
return DAG.getMergeValues({Loaded, Chain}, DL);
}
// Custom lowering for extending v4i8 vector loads.
EVT VT = Op->getValueType(0);
assert((VT == MVT::v4i16 || VT == MVT::v4i32) && "Expected v4i16 or v4i32");
if (LoadNode->getMemoryVT() != MVT::v4i8)
return SDValue();
unsigned ExtType;
if (LoadNode->getExtensionType() == ISD::SEXTLOAD)
ExtType = ISD::SIGN_EXTEND;
else if (LoadNode->getExtensionType() == ISD::ZEXTLOAD ||
LoadNode->getExtensionType() == ISD::EXTLOAD)
ExtType = ISD::ZERO_EXTEND;
else
return SDValue();
SDValue Load = DAG.getLoad(MVT::f32, DL, LoadNode->getChain(),
LoadNode->getBasePtr(), MachinePointerInfo());
SDValue Chain = Load.getValue(1);
SDValue Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f32, Load);
SDValue BC = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Vec);
SDValue Ext = DAG.getNode(ExtType, DL, MVT::v8i16, BC);
Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Ext,
DAG.getConstant(0, DL, MVT::i64));
if (VT == MVT::v4i32)
Ext = DAG.getNode(ExtType, DL, MVT::v4i32, Ext);
return DAG.getMergeValues({Ext, Chain}, DL);
}
// Generate SUBS and CSEL for integer abs.
SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
MVT VT = Op.getSimpleValueType();
if (VT.isVector())
return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU);
SDLoc DL(Op);
SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
Op.getOperand(0));
// Generate SUBS & CSEL.
SDValue Cmp =
DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
Op.getOperand(0), DAG.getConstant(0, DL, VT));
return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg,
DAG.getConstant(AArch64CC::PL, DL, MVT::i32),
Cmp.getValue(1));
}
static SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
SDValue Chain = Op.getOperand(0);
SDValue Cond = Op.getOperand(1);
SDValue Dest = Op.getOperand(2);
AArch64CC::CondCode CC;
if (SDValue Cmp = emitConjunction(DAG, Cond, CC)) {
SDLoc dl(Op);
SDValue CCVal = DAG.getConstant(CC, dl, MVT::i32);
return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
Cmp);
}
return SDValue();
}
SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
LLVM_DEBUG(dbgs() << "Custom lowering: ");
LLVM_DEBUG(Op.dump());
switch (Op.getOpcode()) {
default:
llvm_unreachable("unimplemented operand");
return SDValue();
case ISD::BITCAST:
return LowerBITCAST(Op, DAG);
case ISD::GlobalAddress:
return LowerGlobalAddress(Op, DAG);
case ISD::GlobalTLSAddress:
return LowerGlobalTLSAddress(Op, DAG);
case ISD::SETCC:
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS:
return LowerSETCC(Op, DAG);
case ISD::BRCOND:
return LowerBRCOND(Op, DAG);
case ISD::BR_CC:
return LowerBR_CC(Op, DAG);
case ISD::SELECT:
return LowerSELECT(Op, DAG);
case ISD::SELECT_CC:
return LowerSELECT_CC(Op, DAG);
case ISD::JumpTable:
return LowerJumpTable(Op, DAG);
case ISD::BR_JT:
return LowerBR_JT(Op, DAG);
case ISD::ConstantPool:
return LowerConstantPool(Op, DAG);
case ISD::BlockAddress:
return LowerBlockAddress(Op, DAG);
case ISD::VASTART:
return LowerVASTART(Op, DAG);
case ISD::VACOPY:
return LowerVACOPY(Op, DAG);
case ISD::VAARG:
return LowerVAARG(Op, DAG);
case ISD::ADDCARRY:
return lowerADDSUBCARRY(Op, DAG, AArch64ISD::ADCS, false /*unsigned*/);
case ISD::SUBCARRY:
return lowerADDSUBCARRY(Op, DAG, AArch64ISD::SBCS, false /*unsigned*/);
case ISD::SADDO_CARRY:
return lowerADDSUBCARRY(Op, DAG, AArch64ISD::ADCS, true /*signed*/);
case ISD::SSUBO_CARRY:
return lowerADDSUBCARRY(Op, DAG, AArch64ISD::SBCS, true /*signed*/);
case ISD::SADDO:
case ISD::UADDO:
case ISD::SSUBO:
case ISD::USUBO:
case ISD::SMULO:
case ISD::UMULO:
return LowerXALUO(Op, DAG);
case ISD::FADD:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED);
case ISD::FSUB:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSUB_PRED);
case ISD::FMUL:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMUL_PRED);
case ISD::FMA:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMA_PRED);
case ISD::FDIV:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED);
case ISD::FNEG:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
case ISD::FCEIL:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU);
case ISD::FFLOOR:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FFLOOR_MERGE_PASSTHRU);
case ISD::FNEARBYINT:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEARBYINT_MERGE_PASSTHRU);
case ISD::FRINT:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FRINT_MERGE_PASSTHRU);
case ISD::FROUND:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUND_MERGE_PASSTHRU);
case ISD::FROUNDEVEN:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU);
case ISD::FTRUNC:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU);
case ISD::FSQRT:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSQRT_MERGE_PASSTHRU);
case ISD::FABS:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FABS_MERGE_PASSTHRU);
case ISD::FP_ROUND:
case ISD::STRICT_FP_ROUND:
return LowerFP_ROUND(Op, DAG);
case ISD::FP_EXTEND:
return LowerFP_EXTEND(Op, DAG);
case ISD::FRAMEADDR:
return LowerFRAMEADDR(Op, DAG);
case ISD::SPONENTRY:
return LowerSPONENTRY(Op, DAG);
case ISD::RETURNADDR:
return LowerRETURNADDR(Op, DAG);
case ISD::ADDROFRETURNADDR:
return LowerADDROFRETURNADDR(Op, DAG);
case ISD::CONCAT_VECTORS:
return LowerCONCAT_VECTORS(Op, DAG);
case ISD::INSERT_VECTOR_ELT:
return LowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT:
return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::BUILD_VECTOR:
return LowerBUILD_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE:
return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::SPLAT_VECTOR:
return LowerSPLAT_VECTOR(Op, DAG);
case ISD::EXTRACT_SUBVECTOR:
return LowerEXTRACT_SUBVECTOR(Op, DAG);
case ISD::INSERT_SUBVECTOR:
return LowerINSERT_SUBVECTOR(Op, DAG);
case ISD::SDIV:
case ISD::UDIV:
return LowerDIV(Op, DAG);
case ISD::SMIN:
case ISD::UMIN:
case ISD::SMAX:
case ISD::UMAX:
return LowerMinMax(Op, DAG);
case ISD::SRA:
case ISD::SRL:
case ISD::SHL:
return LowerVectorSRA_SRL_SHL(Op, DAG);
case ISD::SHL_PARTS:
case ISD::SRL_PARTS:
case ISD::SRA_PARTS:
return LowerShiftParts(Op, DAG);
case ISD::CTPOP:
case ISD::PARITY:
return LowerCTPOP_PARITY(Op, DAG);
case ISD::FCOPYSIGN:
return LowerFCOPYSIGN(Op, DAG);
case ISD::OR:
return LowerVectorOR(Op, DAG);
case ISD::XOR:
return LowerXOR(Op, DAG);
case ISD::PREFETCH:
return LowerPREFETCH(Op, DAG);
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
case ISD::STRICT_SINT_TO_FP:
case ISD::STRICT_UINT_TO_FP:
return LowerINT_TO_FP(Op, DAG);
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::STRICT_FP_TO_SINT:
case ISD::STRICT_FP_TO_UINT:
return LowerFP_TO_INT(Op, DAG);
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
return LowerFP_TO_INT_SAT(Op, DAG);
case ISD::FSINCOS:
return LowerFSINCOS(Op, DAG);
case ISD::FLT_ROUNDS_:
return LowerFLT_ROUNDS_(Op, DAG);
case ISD::SET_ROUNDING:
return LowerSET_ROUNDING(Op, DAG);
case ISD::MUL:
return LowerMUL(Op, DAG);
case ISD::MULHS:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHS_PRED);
case ISD::MULHU:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHU_PRED);
case ISD::INTRINSIC_W_CHAIN:
return LowerINTRINSIC_W_CHAIN(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN:
return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::ATOMIC_STORE:
if (cast<MemSDNode>(Op)->getMemoryVT() == MVT::i128) {
assert(Subtarget->hasLSE2());
return LowerStore128(Op, DAG);
}
return SDValue();
case ISD::STORE:
return LowerSTORE(Op, DAG);
case ISD::MSTORE:
return LowerFixedLengthVectorMStoreToSVE(Op, DAG);
case ISD::MGATHER:
return LowerMGATHER(Op, DAG);
case ISD::MSCATTER:
return LowerMSCATTER(Op, DAG);
case ISD::VECREDUCE_SEQ_FADD:
return LowerVECREDUCE_SEQ_FADD(Op, DAG);
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_AND:
case ISD::VECREDUCE_OR:
case ISD::VECREDUCE_XOR:
case ISD::VECREDUCE_SMAX:
case ISD::VECREDUCE_SMIN:
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
return LowerVECREDUCE(Op, DAG);
case ISD::ATOMIC_LOAD_SUB:
return LowerATOMIC_LOAD_SUB(Op, DAG);
case ISD::ATOMIC_LOAD_AND:
return LowerATOMIC_LOAD_AND(Op, DAG);
case ISD::DYNAMIC_STACKALLOC:
return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::VSCALE:
return LowerVSCALE(Op, DAG);
case ISD::ANY_EXTEND:
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
return LowerFixedLengthVectorIntExtendToSVE(Op, DAG);
case ISD::SIGN_EXTEND_INREG: {
// Only custom lower when ExtraVT has a legal byte based element type.
EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
EVT ExtraEltVT = ExtraVT.getVectorElementType();
if ((ExtraEltVT != MVT::i8) && (ExtraEltVT != MVT::i16) &&
(ExtraEltVT != MVT::i32) && (ExtraEltVT != MVT::i64))
return SDValue();
return LowerToPredicatedOp(Op, DAG,
AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU);
}
case ISD::TRUNCATE:
return LowerTRUNCATE(Op, DAG);
case ISD::MLOAD:
return LowerMLOAD(Op, DAG);
case ISD::LOAD:
if (useSVEForFixedLengthVectorVT(Op.getValueType()))
return LowerFixedLengthVectorLoadToSVE(Op, DAG);
return LowerLOAD(Op, DAG);
case ISD::ADD:
case ISD::AND:
case ISD::SUB:
return LowerToScalableOp(Op, DAG);
case ISD::FMAXIMUM:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAX_PRED);
case ISD::FMAXNUM:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAXNM_PRED);
case ISD::FMINIMUM:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMIN_PRED);
case ISD::FMINNUM:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMINNM_PRED);
case ISD::VSELECT:
return LowerFixedLengthVectorSelectToSVE(Op, DAG);
case ISD::ABS:
return LowerABS(Op, DAG);
case ISD::ABDS:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDS_PRED);
case ISD::ABDU:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDU_PRED);
case ISD::BITREVERSE:
return LowerBitreverse(Op, DAG);
case ISD::BSWAP:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU);
case ISD::CTLZ:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU);
case ISD::CTTZ:
return LowerCTTZ(Op, DAG);
case ISD::VECTOR_SPLICE:
return LowerVECTOR_SPLICE(Op, DAG);
case ISD::STRICT_LROUND:
case ISD::STRICT_LLROUND:
case ISD::STRICT_LRINT:
case ISD::STRICT_LLRINT: {
assert(Op.getOperand(1).getValueType() == MVT::f16 &&
"Expected custom lowering of rounding operations only for f16");
SDLoc DL(Op);
SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
{Op.getOperand(0), Op.getOperand(1)});
return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
{Ext.getValue(1), Ext.getValue(0)});
}
}
}
bool AArch64TargetLowering::mergeStoresAfterLegalization(EVT VT) const {
return !Subtarget->useSVEForFixedLengthVectors();
}
bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(
EVT VT, bool OverrideNEON) const {
if (!VT.isFixedLengthVector() || !VT.isSimple())
return false;
// Don't use SVE for vectors we cannot scalarize if required.
switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
// Fixed length predicates should be promoted to i8.
// NOTE: This is consistent with how NEON (and thus 64/128bit vectors) work.
case MVT::i1:
default:
return false;
case MVT::i8:
case MVT::i16:
case MVT::i32:
case MVT::i64:
case MVT::f16:
case MVT::f32:
case MVT::f64:
break;
}
// All SVE implementations support NEON sized vectors.
if (OverrideNEON && (VT.is128BitVector() || VT.is64BitVector()))
return Subtarget->hasSVE();
// Ensure NEON MVTs only belong to a single register class.
if (VT.getFixedSizeInBits() <= 128)
return false;
// Ensure wider than NEON code generation is enabled.
if (!Subtarget->useSVEForFixedLengthVectors())
return false;
// Don't use SVE for types that don't fit.
if (VT.getFixedSizeInBits() > Subtarget->getMinSVEVectorSizeInBits())
return false;
// TODO: Perhaps an artificial restriction, but worth having whilst getting
// the base fixed length SVE support in place.
if (!VT.isPow2VectorType())
return false;
return true;
}
//===----------------------------------------------------------------------===//
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
static unsigned getIntrinsicID(const SDNode *N) {
unsigned Opcode = N->getOpcode();
switch (Opcode) {
default:
return Intrinsic::not_intrinsic;
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
if (IID < Intrinsic::num_intrinsics)
return IID;
return Intrinsic::not_intrinsic;
}
}
}
bool AArch64TargetLowering::isReassocProfitable(SelectionDAG &DAG, SDValue N0,
SDValue N1) const {
if (!N0.hasOneUse())
return false;
unsigned IID = getIntrinsicID(N1.getNode());
// Avoid reassociating expressions that can be lowered to smlal/umlal.
if (IID == Intrinsic::aarch64_neon_umull ||
N1.getOpcode() == AArch64ISD::UMULL ||
IID == Intrinsic::aarch64_neon_smull ||
N1.getOpcode() == AArch64ISD::SMULL)
return N0.getOpcode() != ISD::ADD;
return true;
}
/// Selects the correct CCAssignFn for a given CallingConvention value.
CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
bool IsVarArg) const {
switch (CC) {
default:
report_fatal_error("Unsupported calling convention.");
case CallingConv::WebKit_JS:
return CC_AArch64_WebKit_JS;
case CallingConv::GHC:
return CC_AArch64_GHC;
case CallingConv::C:
case CallingConv::Fast:
case CallingConv::PreserveMost:
case CallingConv::CXX_FAST_TLS:
case CallingConv::Swift:
case CallingConv::SwiftTail:
case CallingConv::Tail:
if (Subtarget->isTargetWindows() && IsVarArg)
return CC_AArch64_Win64_VarArg;
if (!Subtarget->isTargetDarwin())
return CC_AArch64_AAPCS;
if (!IsVarArg)
return CC_AArch64_DarwinPCS;
return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg
: CC_AArch64_DarwinPCS_VarArg;
case CallingConv::Win64:
return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
case CallingConv::CFGuard_Check:
return CC_AArch64_Win64_CFGuard_Check;
case CallingConv::AArch64_VectorCall:
case CallingConv::AArch64_SVE_VectorCall:
return CC_AArch64_AAPCS;
}
}
CCAssignFn *
AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const {
return CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
: RetCC_AArch64_AAPCS;
}
SDValue AArch64TargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
const Function &F = MF.getFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
bool IsWin64 = Subtarget->isCallingConvWin64(F.getCallingConv());
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
SmallVector<ISD::OutputArg, 4> Outs;
GetReturnInfo(CallConv, F.getReturnType(), F.getAttributes(), Outs,
DAG.getTargetLoweringInfo(), MF.getDataLayout());
if (any_of(Outs, [](ISD::OutputArg &Out){ return Out.VT.isScalableVector(); }))
FuncInfo->setIsSVECC(true);
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
DenseMap<unsigned, SDValue> CopiedRegs;
CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
// At this point, Ins[].VT may already be promoted to i32. To correctly
// handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
// i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
// Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
// we use a special version of AnalyzeFormalArguments to pass in ValVT and
// LocVT.
unsigned NumArgs = Ins.size();
Function::const_arg_iterator CurOrigArg = F.arg_begin();
unsigned CurArgIdx = 0;
for (unsigned i = 0; i != NumArgs; ++i) {
MVT ValVT = Ins[i].VT;
if (Ins[i].isOrigArg()) {
std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
CurArgIdx = Ins[i].getOrigArgIndex();
// Get type of the original argument.
EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
/*AllowUnknown*/ true);
MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
// If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
ValVT = MVT::i8;
else if (ActualMVT == MVT::i16)
ValVT = MVT::i16;
}
bool UseVarArgCC = false;
if (IsWin64)
UseVarArgCC = isVarArg;
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, UseVarArgCC);
bool Res =
AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
assert(!Res && "Call operand has unhandled type");
(void)Res;
}
- SmallVector<SDValue, 16> ArgValues;
+
unsigned ExtraArgLocs = 0;
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
if (Ins[i].Flags.isByVal()) {
// Byval is used for HFAs in the PCS, but the system should work in a
// non-compliant manner for larger structs.
EVT PtrVT = getPointerTy(DAG.getDataLayout());
int Size = Ins[i].Flags.getByValSize();
unsigned NumRegs = (Size + 7) / 8;
// FIXME: This works on big-endian for composite byvals, which are the common
// case. It should also work for fundamental types too.
unsigned FrameIdx =
MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
InVals.push_back(FrameIdxN);
continue;
}
if (Ins[i].Flags.isSwiftAsync())
MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
SDValue ArgValue;
if (VA.isRegLoc()) {
// Arguments stored in registers.
EVT RegVT = VA.getLocVT();
const TargetRegisterClass *RC;
if (RegVT == MVT::i32)
RC = &AArch64::GPR32RegClass;
else if (RegVT == MVT::i64)
RC = &AArch64::GPR64RegClass;
else if (RegVT == MVT::f16 || RegVT == MVT::bf16)
RC = &AArch64::FPR16RegClass;
else if (RegVT == MVT::f32)
RC = &AArch64::FPR32RegClass;
else if (RegVT == MVT::f64 || RegVT.is64BitVector())
RC = &AArch64::FPR64RegClass;
else if (RegVT == MVT::f128 || RegVT.is128BitVector())
RC = &AArch64::FPR128RegClass;
else if (RegVT.isScalableVector() &&
RegVT.getVectorElementType() == MVT::i1) {
FuncInfo->setIsSVECC(true);
RC = &AArch64::PPRRegClass;
} else if (RegVT.isScalableVector()) {
FuncInfo->setIsSVECC(true);
RC = &AArch64::ZPRRegClass;
} else
llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
// Transform the arguments in physical registers into virtual ones.
Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
// If this is an 8, 16 or 32-bit value, it is really passed promoted
// to 64 bits. Insert an assert[sz]ext to capture this, then
// truncate to the right size.
switch (VA.getLocInfo()) {
default:
llvm_unreachable("Unknown loc info!");
case CCValAssign::Full:
break;
case CCValAssign::Indirect:
assert(VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly");
break;
case CCValAssign::BCvt:
ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
break;
case CCValAssign::AExt:
case CCValAssign::SExt:
case CCValAssign::ZExt:
break;
case CCValAssign::AExtUpper:
ArgValue = DAG.getNode(ISD::SRL, DL, RegVT, ArgValue,
DAG.getConstant(32, DL, RegVT));
ArgValue = DAG.getZExtOrTrunc(ArgValue, DL, VA.getValVT());
break;
}
} else { // VA.isRegLoc()
assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem");
unsigned ArgOffset = VA.getLocMemOffset();
unsigned ArgSize = (VA.getLocInfo() == CCValAssign::Indirect
? VA.getLocVT().getSizeInBits()
: VA.getValVT().getSizeInBits()) / 8;
uint32_t BEAlign = 0;
if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
!Ins[i].Flags.isInConsecutiveRegs())
BEAlign = 8 - ArgSize;
int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
// For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
MVT MemVT = VA.getValVT();
switch (VA.getLocInfo()) {
default:
break;
case CCValAssign::Trunc:
case CCValAssign::BCvt:
MemVT = VA.getLocVT();
break;
case CCValAssign::Indirect:
assert(VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly");
MemVT = VA.getLocVT();
break;
case CCValAssign::SExt:
ExtType = ISD::SEXTLOAD;
break;
case CCValAssign::ZExt:
ExtType = ISD::ZEXTLOAD;
break;
case CCValAssign::AExt:
ExtType = ISD::EXTLOAD;
break;
}
ArgValue =
DAG.getExtLoad(ExtType, DL, VA.getLocVT(), Chain, FIN,
MachinePointerInfo::getFixedStack(MF, FI), MemVT);
}
if (VA.getLocInfo() == CCValAssign::Indirect) {
assert(VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly");
uint64_t PartSize = VA.getValVT().getStoreSize().getKnownMinSize();
unsigned NumParts = 1;
if (Ins[i].Flags.isInConsecutiveRegs()) {
assert(!Ins[i].Flags.isInConsecutiveRegsLast());
while (!Ins[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
++NumParts;
}
MVT PartLoad = VA.getValVT();
SDValue Ptr = ArgValue;
// Ensure we generate all loads for each tuple part, whilst updating the
// pointer after each load correctly using vscale.
while (NumParts > 0) {
ArgValue = DAG.getLoad(PartLoad, DL, Chain, Ptr, MachinePointerInfo());
InVals.push_back(ArgValue);
NumParts--;
if (NumParts > 0) {
SDValue BytesIncrement = DAG.getVScale(
DL, Ptr.getValueType(),
APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
BytesIncrement, Flags);
ExtraArgLocs++;
i++;
}
}
} else {
if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(),
ArgValue, DAG.getValueType(MVT::i32));
// i1 arguments are zero-extended to i8 by the caller. Emit a
// hint to reflect this.
if (Ins[i].isOrigArg()) {
Argument *OrigArg = F.getArg(Ins[i].getOrigArgIndex());
if (OrigArg->getType()->isIntegerTy(1)) {
if (!Ins[i].Flags.isZExt()) {
ArgValue = DAG.getNode(AArch64ISD::ASSERT_ZEXT_BOOL, DL,
ArgValue.getValueType(), ArgValue);
}
}
}
InVals.push_back(ArgValue);
}
}
assert((ArgLocs.size() + ExtraArgLocs) == Ins.size());
// varargs
if (isVarArg) {
if (!Subtarget->isTargetDarwin() || IsWin64) {
// The AAPCS variadic function ABI is identical to the non-variadic
// one. As a result there may be more arguments in registers and we should
// save them for future reference.
// Win64 variadic functions also pass arguments in registers, but all float
// arguments are passed in integer registers.
saveVarArgRegisters(CCInfo, DAG, DL, Chain);
}
// This will point to the next argument passed via stack.
unsigned StackOffset = CCInfo.getNextStackOffset();
// We currently pass all varargs at 8-byte alignment, or 4 for ILP32
StackOffset = alignTo(StackOffset, Subtarget->isTargetILP32() ? 4 : 8);
FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
if (MFI.hasMustTailInVarArgFunc()) {
SmallVector<MVT, 2> RegParmTypes;
RegParmTypes.push_back(MVT::i64);
RegParmTypes.push_back(MVT::f128);
// Compute the set of forwarded registers. The rest are scratch.
SmallVectorImpl<ForwardedRegister> &Forwards =
FuncInfo->getForwardedMustTailRegParms();
CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
CC_AArch64_AAPCS);
// Conservatively forward X8, since it might be used for aggregate return.
if (!CCInfo.isAllocated(AArch64::X8)) {
Register X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
}
}
}
// On Windows, InReg pointers must be returned, so record the pointer in a
// virtual register at the start of the function so it can be returned in the
// epilogue.
if (IsWin64) {
for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
if (Ins[I].Flags.isInReg()) {
assert(!FuncInfo->getSRetReturnReg());
MVT PtrTy = getPointerTy(DAG.getDataLayout());
Register Reg =
MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
FuncInfo->setSRetReturnReg(Reg);
SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[I]);
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
break;
}
}
}
unsigned StackArgSize = CCInfo.getNextStackOffset();
bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
// This is a non-standard ABI so by fiat I say we're allowed to make full
// use of the stack area to be popped, which must be aligned to 16 bytes in
// any case:
StackArgSize = alignTo(StackArgSize, 16);
// If we're expected to restore the stack (e.g. fastcc) then we'll be adding
// a multiple of 16.
FuncInfo->setArgumentStackToRestore(StackArgSize);
// This realignment carries over to the available bytes below. Our own
// callers will guarantee the space is free by giving an aligned value to
// CALLSEQ_START.
}
// Even if we're not expected to free up the space, it's useful to know how
// much is there while considering tail calls (because we can reuse it).
FuncInfo->setBytesInStackArgArea(StackArgSize);
if (Subtarget->hasCustomCallingConv())
Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
return Chain;
}
void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
SelectionDAG &DAG,
const SDLoc &DL,
SDValue &Chain) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
auto PtrVT = getPointerTy(DAG.getDataLayout());
bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
SmallVector<SDValue, 8> MemOps;
static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
AArch64::X3, AArch64::X4, AArch64::X5,
AArch64::X6, AArch64::X7 };
static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
int GPRIdx = 0;
if (GPRSaveSize != 0) {
if (IsWin64) {
GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
if (GPRSaveSize & 15)
// The extra size here, if triggered, will always be 8.
MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
} else
GPRIdx = MFI.CreateStackObject(GPRSaveSize, Align(8), false);
SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
Register VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
SDValue Store =
DAG.getStore(Val.getValue(1), DL, Val, FIN,
IsWin64 ? MachinePointerInfo::getFixedStack(
MF, GPRIdx, (i - FirstVariadicGPR) * 8)
: MachinePointerInfo::getStack(MF, i * 8));
MemOps.push_back(Store);
FIN =
DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
}
}
FuncInfo->setVarArgsGPRIndex(GPRIdx);
FuncInfo->setVarArgsGPRSize(GPRSaveSize);
if (Subtarget->hasFPARMv8() && !IsWin64) {
static const MCPhysReg FPRArgRegs[] = {
AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
int FPRIdx = 0;
if (FPRSaveSize != 0) {
FPRIdx = MFI.CreateStackObject(FPRSaveSize, Align(16), false);
SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
Register VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
MachinePointerInfo::getStack(MF, i * 16));
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
DAG.getConstant(16, DL, PtrVT));
}
}
FuncInfo->setVarArgsFPRIndex(FPRIdx);
FuncInfo->setVarArgsFPRSize(FPRSaveSize);
}
if (!MemOps.empty()) {
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
}
}
/// LowerCallResult - Lower the result values of a call into the
/// appropriate copies out of appropriate physical registers.
SDValue AArch64TargetLowering::LowerCallResult(
SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
+ const SmallVectorImpl<CCValAssign> &RVLocs, const SDLoc &DL,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
SDValue ThisVal) const {
- CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
- // Assign locations to each value returned by this call.
- SmallVector<CCValAssign, 16> RVLocs;
DenseMap<unsigned, SDValue> CopiedRegs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
- *DAG.getContext());
- CCInfo.AnalyzeCallResult(Ins, RetCC);
-
// Copy all of the result registers out of their specified physreg.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign VA = RVLocs[i];
// Pass 'this' value directly from the argument to return value, to avoid
// reg unit interference
if (i == 0 && isThisReturn) {
assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&
"unexpected return calling convention register assignment");
InVals.push_back(ThisVal);
continue;
}
// Avoid copying a physreg twice since RegAllocFast is incompetent and only
// allows one use of a physreg per block.
SDValue Val = CopiedRegs.lookup(VA.getLocReg());
if (!Val) {
Val =
DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
Chain = Val.getValue(1);
InFlag = Val.getValue(2);
CopiedRegs[VA.getLocReg()] = Val;
}
switch (VA.getLocInfo()) {
default:
llvm_unreachable("Unknown loc info!");
case CCValAssign::Full:
break;
case CCValAssign::BCvt:
Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
break;
case CCValAssign::AExtUpper:
Val = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Val,
DAG.getConstant(32, DL, VA.getLocVT()));
LLVM_FALLTHROUGH;
case CCValAssign::AExt:
LLVM_FALLTHROUGH;
case CCValAssign::ZExt:
Val = DAG.getZExtOrTrunc(Val, DL, VA.getValVT());
break;
}
InVals.push_back(Val);
}
return Chain;
}
/// Return true if the calling convention is one that we can guarantee TCO for.
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
}
/// Return true if we might ever do TCO for calls with this calling convention.
static bool mayTailCallThisCC(CallingConv::ID CC) {
switch (CC) {
case CallingConv::C:
case CallingConv::AArch64_SVE_VectorCall:
case CallingConv::PreserveMost:
case CallingConv::Swift:
case CallingConv::SwiftTail:
case CallingConv::Tail:
case CallingConv::Fast:
return true;
default:
return false;
}
}
static void analyzeCallOperands(const AArch64TargetLowering &TLI,
const AArch64Subtarget *Subtarget,
const TargetLowering::CallLoweringInfo &CLI,
CCState &CCInfo) {
const SelectionDAG &DAG = CLI.DAG;
CallingConv::ID CalleeCC = CLI.CallConv;
bool IsVarArg = CLI.IsVarArg;
const SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC);
unsigned NumArgs = Outs.size();
for (unsigned i = 0; i != NumArgs; ++i) {
MVT ArgVT = Outs[i].VT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
bool UseVarArgCC = false;
if (IsVarArg) {
// On Windows, the fixed arguments in a vararg call are passed in GPRs
// too, so use the vararg CC to force them to integer registers.
if (IsCalleeWin64) {
UseVarArgCC = true;
} else {
UseVarArgCC = !Outs[i].IsFixed;
}
} else {
// Get type of the original argument.
EVT ActualVT =
TLI.getValueType(DAG.getDataLayout(), CLI.Args[Outs[i].OrigArgIndex].Ty,
/*AllowUnknown*/ true);
MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ArgVT;
// If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
ArgVT = MVT::i8;
else if (ActualMVT == MVT::i16)
ArgVT = MVT::i16;
}
CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CalleeCC, UseVarArgCC);
bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
assert(!Res && "Call operand has unhandled type");
(void)Res;
}
}
bool AArch64TargetLowering::isEligibleForTailCallOptimization(
const CallLoweringInfo &CLI) const {
CallingConv::ID CalleeCC = CLI.CallConv;
if (!mayTailCallThisCC(CalleeCC))
return false;
SDValue Callee = CLI.Callee;
bool IsVarArg = CLI.IsVarArg;
const SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
const SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
const SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
const SelectionDAG &DAG = CLI.DAG;
MachineFunction &MF = DAG.getMachineFunction();
const Function &CallerF = MF.getFunction();
CallingConv::ID CallerCC = CallerF.getCallingConv();
// Functions using the C or Fast calling convention that have an SVE signature
// preserve more registers and should assume the SVE_VectorCall CC.
// The check for matching callee-saved regs will determine whether it is
// eligible for TCO.
if ((CallerCC == CallingConv::C || CallerCC == CallingConv::Fast) &&
MF.getInfo<AArch64FunctionInfo>()->isSVECC())
CallerCC = CallingConv::AArch64_SVE_VectorCall;
bool CCMatch = CallerCC == CalleeCC;
// When using the Windows calling convention on a non-windows OS, we want
// to back up and restore X18 in such functions; we can't do a tail call
// from those functions.
if (CallerCC == CallingConv::Win64 && !Subtarget->isTargetWindows() &&
CalleeCC != CallingConv::Win64)
return false;
// Byval parameters hand the function a pointer directly into the stack area
// we want to reuse during a tail call. Working around this *is* possible (see
// X86) but less efficient and uglier in LowerCall.
for (Function::const_arg_iterator i = CallerF.arg_begin(),
e = CallerF.arg_end();
i != e; ++i) {
if (i->hasByValAttr())
return false;
// On Windows, "inreg" attributes signify non-aggregate indirect returns.
// In this case, it is necessary to save/restore X0 in the callee. Tail
// call opt interferes with this. So we disable tail call opt when the
// caller has an argument with "inreg" attribute.
// FIXME: Check whether the callee also has an "inreg" argument.
if (i->hasInRegAttr())
return false;
}
if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
return CCMatch;
// Externally-defined functions with weak linkage should not be
// tail-called on AArch64 when the OS does not support dynamic
// pre-emption of symbols, as the AAELF spec requires normal calls
// to undefined weak functions to be replaced with a NOP or jump to the
// next instruction. The behaviour of branch instructions in this
// situation (as used for tail calls) is implementation-defined, so we
// cannot rely on the linker replacing the tail call with a return.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
const GlobalValue *GV = G->getGlobal();
const Triple &TT = getTargetMachine().getTargetTriple();
if (GV->hasExternalWeakLinkage() &&
(!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
return false;
}
// Now we search for cases where we can use a tail call without changing the
// ABI. Sibcall is used in some places (particularly gcc) to refer to this
// concept.
// I want anyone implementing a new calling convention to think long and hard
// about this assert.
assert((!IsVarArg || CalleeCC == CallingConv::C) &&
"Unexpected variadic calling convention");
LLVMContext &C = *DAG.getContext();
// Check that the call results are passed in the same way.
if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
CCAssignFnForCall(CalleeCC, IsVarArg),
CCAssignFnForCall(CallerCC, IsVarArg)))
return false;
// The callee has to preserve all registers the caller needs to preserve.
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
if (!CCMatch) {
const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
if (Subtarget->hasCustomCallingConv()) {
TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
}
if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
return false;
}
// Nothing more to check if the callee is taking no arguments
if (Outs.empty())
return true;
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CalleeCC, IsVarArg, MF, ArgLocs, C);
analyzeCallOperands(*this, Subtarget, CLI, CCInfo);
if (IsVarArg && !(CLI.CB && CLI.CB->isMustTailCall())) {
// When we are musttail, additional checks have been done and we can safely ignore this check
// At least two cases here: if caller is fastcc then we can't have any
// memory arguments (we'd be expected to clean up the stack afterwards). If
// caller is C then we could potentially use its argument area.
// FIXME: for now we take the most conservative of these in both cases:
// disallow all variadic memory operands.
for (const CCValAssign &ArgLoc : ArgLocs)
if (!ArgLoc.isRegLoc())
return false;
}
const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
// If any of the arguments is passed indirectly, it must be SVE, so the
// 'getBytesInStackArgArea' is not sufficient to determine whether we need to
// allocate space on the stack. That is why we determine this explicitly here
// the call cannot be a tailcall.
if (llvm::any_of(ArgLocs, [](CCValAssign &A) {
assert((A.getLocInfo() != CCValAssign::Indirect ||
A.getValVT().isScalableVector()) &&
"Expected value to be scalable");
return A.getLocInfo() == CCValAssign::Indirect;
}))
return false;
// If the stack arguments for this call do not fit into our own save area then
// the call cannot be made tail.
if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
return false;
const MachineRegisterInfo &MRI = MF.getRegInfo();
if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
return false;
return true;
}
SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
SelectionDAG &DAG,
MachineFrameInfo &MFI,
int ClobberedFI) const {
SmallVector<SDValue, 8> ArgChains;
int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
// Include the original chain at the beginning of the list. When this is
// used by target LowerCall hooks, this helps legalize find the
// CALLSEQ_BEGIN node.
ArgChains.push_back(Chain);
// Add a chain value for each stack argument corresponding
for (SDNode *U : DAG.getEntryNode().getNode()->uses())
if (LoadSDNode *L = dyn_cast<LoadSDNode>(U))
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
if (FI->getIndex() < 0) {
int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
int64_t InLastByte = InFirstByte;
InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
(FirstByte <= InFirstByte && InFirstByte <= LastByte))
ArgChains.push_back(SDValue(L, 1));
}
// Build a tokenfactor for all the chains.
return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
}
bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
bool TailCallOpt) const {
return (CallCC == CallingConv::Fast && TailCallOpt) ||
CallCC == CallingConv::Tail || CallCC == CallingConv::SwiftTail;
}
// Check if the value is zero-extended from i1 to i8
static bool checkZExtBool(SDValue Arg, const SelectionDAG &DAG) {
unsigned SizeInBits = Arg.getValueType().getSizeInBits();
if (SizeInBits < 8)
return false;
APInt RequredZero(SizeInBits, 0xFE);
KnownBits Bits = DAG.computeKnownBits(Arg, 4);
bool ZExtBool = (Bits.Zero & RequredZero) == RequredZero;
return ZExtBool;
}
/// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
/// and add input and output parameter nodes.
SDValue
AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
SelectionDAG &DAG = CLI.DAG;
SDLoc &DL = CLI.DL;
SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
bool &IsTailCall = CLI.IsTailCall;
CallingConv::ID &CallConv = CLI.CallConv;
bool IsVarArg = CLI.IsVarArg;
MachineFunction &MF = DAG.getMachineFunction();
MachineFunction::CallSiteInfo CSInfo;
bool IsThisReturn = false;
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
bool IsSibCall = false;
bool GuardWithBTI = false;
if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
!Subtarget->noBTIAtReturnTwice()) {
GuardWithBTI = FuncInfo->branchTargetEnforcement();
}
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
+
+ if (IsVarArg) {
+ unsigned NumArgs = Outs.size();
+
+ for (unsigned i = 0; i != NumArgs; ++i) {
+ if (!Outs[i].IsFixed && Outs[i].VT.isScalableVector())
+ report_fatal_error("Passing SVE types to variadic functions is "
+ "currently not supported");
+ }
+ }
+
+ analyzeCallOperands(*this, Subtarget, CLI, CCInfo);
+
+ CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState RetCCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
+ RetCCInfo.AnalyzeCallResult(Ins, RetCC);
+
// Check callee args/returns for SVE registers and set calling convention
// accordingly.
if (CallConv == CallingConv::C || CallConv == CallingConv::Fast) {
- bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){
- return Out.VT.isScalableVector();
- });
- bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){
- return In.VT.isScalableVector();
- });
-
- if (CalleeInSVE || CalleeOutSVE)
+ auto HasSVERegLoc = [](CCValAssign &Loc) {
+ if (!Loc.isRegLoc())
+ return false;
+ return AArch64::ZPRRegClass.contains(Loc.getLocReg()) ||
+ AArch64::PPRRegClass.contains(Loc.getLocReg());
+ };
+ if (any_of(RVLocs, HasSVERegLoc) || any_of(ArgLocs, HasSVERegLoc))
CallConv = CallingConv::AArch64_SVE_VectorCall;
}
if (IsTailCall) {
// Check if it's really possible to do a tail call.
IsTailCall = isEligibleForTailCallOptimization(CLI);
// A sibling call is one where we're under the usual C ABI and not planning
// to change that but can still do a tail call:
if (!TailCallOpt && IsTailCall && CallConv != CallingConv::Tail &&
CallConv != CallingConv::SwiftTail)
IsSibCall = true;
if (IsTailCall)
++NumTailCalls;
}
if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
report_fatal_error("failed to perform tail call elimination on a call "
"site marked musttail");
- // Analyze operands of the call, assigning locations to each operand.
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
-
- if (IsVarArg) {
- unsigned NumArgs = Outs.size();
-
- for (unsigned i = 0; i != NumArgs; ++i) {
- if (!Outs[i].IsFixed && Outs[i].VT.isScalableVector())
- report_fatal_error("Passing SVE types to variadic functions is "
- "currently not supported");
- }
- }
-
- analyzeCallOperands(*this, Subtarget, CLI, CCInfo);
-
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
if (IsSibCall) {
// Since we're not changing the ABI to make this a tail call, the memory
// operands are already available in the caller's incoming argument space.
NumBytes = 0;
}
// FPDiff is the byte offset of the call's argument area from the callee's.
// Stores to callee stack arguments will be placed in FixedStackSlots offset
// by this amount for a tail call. In a sibling call it must be 0 because the
// caller will deallocate the entire stack and the callee still expects its
// arguments to begin at SP+0. Completely unused for non-tail calls.
int FPDiff = 0;
if (IsTailCall && !IsSibCall) {
unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
// Since callee will pop argument stack as a tail call, we must keep the
// popped size 16-byte aligned.
NumBytes = alignTo(NumBytes, 16);
// FPDiff will be negative if this tail call requires more space than we
// would automatically have in our incoming argument space. Positive if we
// can actually shrink the stack.
FPDiff = NumReusableBytes - NumBytes;
// Update the required reserved area if this is the tail call requiring the
// most argument stack space.
if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff)
FuncInfo->setTailCallReservedStack(-FPDiff);
// The stack pointer must be 16-byte aligned at all times it's used for a
// memory operation, which in practice means at *all* times and in
// particular across call boundaries. Therefore our own arguments started at
// a 16-byte aligned SP and the delta applied for the tail call should
// satisfy the same constraint.
assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
}
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
if (!IsSibCall)
Chain = DAG.getCALLSEQ_START(Chain, IsTailCall ? 0 : NumBytes, 0, DL);
SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
getPointerTy(DAG.getDataLayout()));
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallSet<unsigned, 8> RegsUsed;
SmallVector<SDValue, 8> MemOpChains;
auto PtrVT = getPointerTy(DAG.getDataLayout());
if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) {
const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
for (const auto &F : Forwards) {
SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT);
RegsToPass.emplace_back(F.PReg, Val);
}
}
// Walk the register/memloc assignments, inserting copies/loads.
unsigned ExtraArgLocs = 0;
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
// Promote the value if needed.
switch (VA.getLocInfo()) {
default:
llvm_unreachable("Unknown loc info!");
case CCValAssign::Full:
break;
case CCValAssign::SExt:
Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
break;
case CCValAssign::ZExt:
Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
break;
case CCValAssign::AExt:
if (Outs[i].ArgVT == MVT::i1) {
// AAPCS requires i1 to be zero-extended to 8-bits by the caller.
//
// Check if we actually have to do this, because the value may
// already be zero-extended.
//
// We cannot just emit a (zext i8 (trunc (assert-zext i8)))
// and rely on DAGCombiner to fold this, because the following
// (anyext i32) is combined with (zext i8) in DAG.getNode:
//
// (ext (zext x)) -> (zext x)
//
// This will give us (zext i32), which we cannot remove, so
// try to check this beforehand.
if (!checkZExtBool(Arg, DAG)) {
Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg);
}
}
Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
break;
case CCValAssign::AExtUpper:
assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits");
Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
DAG.getConstant(32, DL, VA.getLocVT()));
break;
case CCValAssign::BCvt:
Arg = DAG.getBitcast(VA.getLocVT(), Arg);
break;
case CCValAssign::Trunc:
Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
break;
case CCValAssign::FPExt:
Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
break;
case CCValAssign::Indirect:
assert(VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly");
uint64_t StoreSize = VA.getValVT().getStoreSize().getKnownMinSize();
uint64_t PartSize = StoreSize;
unsigned NumParts = 1;
if (Outs[i].Flags.isInConsecutiveRegs()) {
assert(!Outs[i].Flags.isInConsecutiveRegsLast());
while (!Outs[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
++NumParts;
StoreSize *= NumParts;
}
MachineFrameInfo &MFI = MF.getFrameInfo();
Type *Ty = EVT(VA.getValVT()).getTypeForEVT(*DAG.getContext());
Align Alignment = DAG.getDataLayout().getPrefTypeAlign(Ty);
int FI = MFI.CreateStackObject(StoreSize, Alignment, false);
MFI.setStackID(FI, TargetStackID::ScalableVector);
MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
SDValue Ptr = DAG.getFrameIndex(
FI, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
SDValue SpillSlot = Ptr;
// Ensure we generate all stores for each tuple part, whilst updating the
// pointer after each store correctly using vscale.
while (NumParts) {
Chain = DAG.getStore(Chain, DL, OutVals[i], Ptr, MPI);
NumParts--;
if (NumParts > 0) {
SDValue BytesIncrement = DAG.getVScale(
DL, Ptr.getValueType(),
APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
MPI = MachinePointerInfo(MPI.getAddrSpace());
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
BytesIncrement, Flags);
ExtraArgLocs++;
i++;
}
}
Arg = SpillSlot;
break;
}
if (VA.isRegLoc()) {
if (i == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
Outs[0].VT == MVT::i64) {
assert(VA.getLocVT() == MVT::i64 &&
"unexpected calling convention register assignment");
assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&
"unexpected use of 'returned'");
IsThisReturn = true;
}
if (RegsUsed.count(VA.getLocReg())) {
// If this register has already been used then we're trying to pack
// parts of an [N x i32] into an X-register. The extension type will
// take care of putting the two halves in the right place but we have to
// combine them.
SDValue &Bits =
llvm::find_if(RegsToPass,
[=](const std::pair<unsigned, SDValue> &Elt) {
return Elt.first == VA.getLocReg();
})
->second;
Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
// Call site info is used for function's parameter entry value
// tracking. For now we track only simple cases when parameter
// is transferred through whole register.
llvm::erase_if(CSInfo, [&VA](MachineFunction::ArgRegPair ArgReg) {
return ArgReg.Reg == VA.getLocReg();
});
} else {
RegsToPass.emplace_back(VA.getLocReg(), Arg);
RegsUsed.insert(VA.getLocReg());
const TargetOptions &Options = DAG.getTarget().Options;
if (Options.EmitCallSiteInfo)
CSInfo.emplace_back(VA.getLocReg(), i);
}
} else {
assert(VA.isMemLoc());
SDValue DstAddr;
MachinePointerInfo DstInfo;
// FIXME: This works on big-endian for composite byvals, which are the
// common case. It should also work for fundamental types too.
uint32_t BEAlign = 0;
unsigned OpSize;
if (VA.getLocInfo() == CCValAssign::Indirect ||
VA.getLocInfo() == CCValAssign::Trunc)
OpSize = VA.getLocVT().getFixedSizeInBits();
else
OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
: VA.getValVT().getSizeInBits();
OpSize = (OpSize + 7) / 8;
if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
!Flags.isInConsecutiveRegs()) {
if (OpSize < 8)
BEAlign = 8 - OpSize;
}
unsigned LocMemOffset = VA.getLocMemOffset();
int32_t Offset = LocMemOffset + BEAlign;
SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
if (IsTailCall) {
Offset = Offset + FPDiff;
int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
DstAddr = DAG.getFrameIndex(FI, PtrVT);
DstInfo = MachinePointerInfo::getFixedStack(MF, FI);
// Make sure any stack arguments overlapping with where we're storing
// are loaded before this eventual operation. Otherwise they'll be
// clobbered.
Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
} else {
SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
DstInfo = MachinePointerInfo::getStack(MF, LocMemOffset);
}
if (Outs[i].Flags.isByVal()) {
SDValue SizeNode =
DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
SDValue Cpy = DAG.getMemcpy(
Chain, DL, DstAddr, Arg, SizeNode,
Outs[i].Flags.getNonZeroByValAlign(),
/*isVol = */ false, /*AlwaysInline = */ false,
/*isTailCall = */ false, DstInfo, MachinePointerInfo());
MemOpChains.push_back(Cpy);
} else {
// Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
// promoted to a legal register type i32, we should truncate Arg back to
// i1/i8/i16.
if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 ||
VA.getValVT() == MVT::i16)
Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo);
MemOpChains.push_back(Store);
}
}
}
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
SDValue InFlag;
for (auto &RegToPass : RegsToPass) {
Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
RegToPass.second, InFlag);
InFlag = Chain.getValue(1);
}
// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
// node so that legalize doesn't hack it.
if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
auto GV = G->getGlobal();
unsigned OpFlags =
Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine());
if (OpFlags & AArch64II::MO_GOT) {
Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
} else {
const GlobalValue *GV = G->getGlobal();
Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
}
} else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
if (getTargetMachine().getCodeModel() == CodeModel::Large &&
Subtarget->isTargetMachO()) {
const char *Sym = S->getSymbol();
Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
} else {
const char *Sym = S->getSymbol();
Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
}
}
// We don't usually want to end the call-sequence here because we would tidy
// the frame up *after* the call, however in the ABI-changing tail-call case
// we've carefully laid out the parameters so that when sp is reset they'll be
// in the correct location.
if (IsTailCall && !IsSibCall) {
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true),
DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
InFlag = Chain.getValue(1);
}
std::vector<SDValue> Ops;
Ops.push_back(Chain);
Ops.push_back(Callee);
if (IsTailCall) {
// Each tail call may have to adjust the stack by a different amount, so
// this information must travel along with the operation for eventual
// consumption by emitEpilogue.
Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
}
// Add argument registers to the end of the list so that they are known live
// into the call.
for (auto &RegToPass : RegsToPass)
Ops.push_back(DAG.getRegister(RegToPass.first,
RegToPass.second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
const uint32_t *Mask;
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
if (IsThisReturn) {
// For 'this' returns, use the X0-preserving mask if applicable
Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
if (!Mask) {
IsThisReturn = false;
Mask = TRI->getCallPreservedMask(MF, CallConv);
}
} else
Mask = TRI->getCallPreservedMask(MF, CallConv);
if (Subtarget->hasCustomCallingConv())
TRI->UpdateCustomCallPreservedMask(MF, &Mask);
if (TRI->isAnyArgRegReserved(MF))
TRI->emitReservedArgRegCallError(MF);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
if (InFlag.getNode())
Ops.push_back(InFlag);
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
// If we're doing a tall call, use a TC_RETURN here rather than an
// actual call instruction.
if (IsTailCall) {
MF.getFrameInfo().setHasTailCall();
SDValue Ret = DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
return Ret;
}
unsigned CallOpc = AArch64ISD::CALL;
// Calls with operand bundle "clang.arc.attachedcall" are special. They should
// be expanded to the call, directly followed by a special marker sequence and
// a call to an ObjC library function. Use CALL_RVMARKER to do that.
if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
assert(!IsTailCall &&
"tail calls cannot be marked with clang.arc.attachedcall");
CallOpc = AArch64ISD::CALL_RVMARKER;
// Add a target global address for the retainRV/claimRV runtime function
// just before the call target.
Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB);
auto GA = DAG.getTargetGlobalAddress(ARCFn, DL, PtrVT);
Ops.insert(Ops.begin() + 1, GA);
} else if (GuardWithBTI)
CallOpc = AArch64ISD::CALL_BTI;
// Returns a chain and a flag for retval copy to use.
Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
InFlag = Chain.getValue(1);
DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
uint64_t CalleePopBytes =
DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0;
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
DAG.getIntPtrConstant(CalleePopBytes, DL, true),
InFlag, DL);
if (!Ins.empty())
InFlag = Chain.getValue(1);
// Handle result values, copying them out of physregs into vregs that we
// return.
- return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
+ return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, RVLocs, DL, DAG,
InVals, IsThisReturn,
IsThisReturn ? OutVals[0] : SDValue());
}
bool AArch64TargetLowering::CanLowerReturn(
CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
return CCInfo.CheckReturn(Outs, RetCC);
}
SDValue
AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &DL, SelectionDAG &DAG) const {
auto &MF = DAG.getMachineFunction();
auto *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC);
// Copy the result values into the output registers.
SDValue Flag;
SmallVector<std::pair<unsigned, SDValue>, 4> RetVals;
SmallSet<unsigned, 4> RegsUsed;
for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size();
++i, ++realRVLocIdx) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
SDValue Arg = OutVals[realRVLocIdx];
switch (VA.getLocInfo()) {
default:
llvm_unreachable("Unknown loc info!");
case CCValAssign::Full:
if (Outs[i].ArgVT == MVT::i1) {
// AAPCS requires i1 to be zero-extended to i8 by the producer of the
// value. This is strictly redundant on Darwin (which uses "zeroext
// i1"), but will be optimised out before ISel.
Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
}
break;
case CCValAssign::BCvt:
Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
break;
case CCValAssign::AExt:
case CCValAssign::ZExt:
Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
break;
case CCValAssign::AExtUpper:
assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits");
Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
DAG.getConstant(32, DL, VA.getLocVT()));
break;
}
if (RegsUsed.count(VA.getLocReg())) {
SDValue &Bits =
llvm::find_if(RetVals, [=](const std::pair<unsigned, SDValue> &Elt) {
return Elt.first == VA.getLocReg();
})->second;
Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
} else {
RetVals.emplace_back(VA.getLocReg(), Arg);
RegsUsed.insert(VA.getLocReg());
}
}
SmallVector<SDValue, 4> RetOps(1, Chain);
for (auto &RetVal : RetVals) {
Chain = DAG.getCopyToReg(Chain, DL, RetVal.first, RetVal.second, Flag);
Flag = Chain.getValue(1);
RetOps.push_back(
DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
}
// Windows AArch64 ABIs require that for returning structs by value we copy
// the sret argument into X0 for the return.
// We saved the argument into a virtual register in the entry block,
// so now we copy the value out and into X0.
if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
SDValue Val = DAG.getCopyFromReg(RetOps[0], DL, SRetReg,
getPointerTy(MF.getDataLayout()));
unsigned RetValReg = AArch64::X0;
Chain = DAG.getCopyToReg(Chain, DL, RetValReg, Val, Flag);
Flag = Chain.getValue(1);
RetOps.push_back(
DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
}
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
const MCPhysReg *I = TRI->getCalleeSavedRegsViaCopy(&MF);
if (I) {
for (; *I; ++I) {
if (AArch64::GPR64RegClass.contains(*I))
RetOps.push_back(DAG.getRegister(*I, MVT::i64));
else if (AArch64::FPR64RegClass.contains(*I))
RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
else
llvm_unreachable("Unexpected register class in CSRsViaCopy!");
}
}
RetOps[0] = Chain; // Update chain.
// Add the flag if we have it.
if (Flag.getNode())
RetOps.push_back(Flag);
return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps);
}
//===----------------------------------------------------------------------===//
// Other Lowering Code
//===----------------------------------------------------------------------===//
SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
SelectionDAG &DAG,
unsigned Flag) const {
return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty,
N->getOffset(), Flag);
}
SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
SelectionDAG &DAG,
unsigned Flag) const {
return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
}
SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
SelectionDAG &DAG,
unsigned Flag) const {
return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
N->getOffset(), Flag);
}
SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty,
SelectionDAG &DAG,
unsigned Flag) const {
return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
}
// (loadGOT sym)
template <class NodeTy>
SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG,
unsigned Flags) const {
LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n");
SDLoc DL(N);
EVT Ty = getPointerTy(DAG.getDataLayout());
SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT | Flags);
// FIXME: Once remat is capable of dealing with instructions with register
// operands, expand this into two nodes instead of using a wrapper node.
return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr);
}
// (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym))
template <class NodeTy>
SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG,
unsigned Flags) const {
LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n");
SDLoc DL(N);
EVT Ty = getPointerTy(DAG.getDataLayout());
const unsigned char MO_NC = AArch64II::MO_NC;
return DAG.getNode(
AArch64ISD::WrapperLarge, DL, Ty,
getTargetNode(N, Ty, DAG, AArch64II::MO_G3 | Flags),
getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC | Flags),
getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC | Flags),
getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC | Flags));
}
// (addlow (adrp %hi(sym)) %lo(sym))
template <class NodeTy>
SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
unsigned Flags) const {
LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n");
SDLoc DL(N);
EVT Ty = getPointerTy(DAG.getDataLayout());
SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE | Flags);
SDValue Lo = getTargetNode(N, Ty, DAG,
AArch64II::MO_PAGEOFF | AArch64II::MO_NC | Flags);
SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi);
return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
}
// (adr sym)
template <class NodeTy>
SDValue AArch64TargetLowering::getAddrTiny(NodeTy *N, SelectionDAG &DAG,
unsigned Flags) const {
LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrTiny\n");
SDLoc DL(N);
EVT Ty = getPointerTy(DAG.getDataLayout());
SDValue Sym = getTargetNode(N, Ty, DAG, Flags);
return DAG.getNode(AArch64ISD::ADR, DL, Ty, Sym);
}
SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GN->getGlobal();
unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
if (OpFlags != AArch64II::MO_NO_FLAG)
assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
"unexpected offset in global node");
// This also catches the large code model case for Darwin, and tiny code
// model with got relocations.
if ((OpFlags & AArch64II::MO_GOT) != 0) {
return getGOT(GN, DAG, OpFlags);
}
SDValue Result;
if (getTargetMachine().getCodeModel() == CodeModel::Large) {
Result = getAddrLarge(GN, DAG, OpFlags);
} else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
Result = getAddrTiny(GN, DAG, OpFlags);
} else {
Result = getAddr(GN, DAG, OpFlags);
}
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(GN);
if (OpFlags & (AArch64II::MO_DLLIMPORT | AArch64II::MO_COFFSTUB))
Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
MachinePointerInfo::getGOT(DAG.getMachineFunction()));
return Result;
}
/// Convert a TLS address reference into the correct sequence of loads
/// and calls to compute the variable's address (for Darwin, currently) and
/// return an SDValue containing the final node.
/// Darwin only has one TLS scheme which must be capable of dealing with the
/// fully general situation, in the worst case. This means:
/// + "extern __thread" declaration.
/// + Defined in a possibly unknown dynamic library.
///
/// The general system is that each __thread variable has a [3 x i64] descriptor
/// which contains information used by the runtime to calculate the address. The
/// only part of this the compiler needs to know about is the first xword, which
/// contains a function pointer that must be called with the address of the
/// entire descriptor in "x0".
///
/// Since this descriptor may be in a different unit, in general even the
/// descriptor must be accessed via an indirect load. The "ideal" code sequence
/// is:
/// adrp x0, _var@TLVPPAGE
/// ldr x0, [x0, _var@TLVPPAGEOFF] ; x0 now contains address of descriptor
/// ldr x1, [x0] ; x1 contains 1st entry of descriptor,
/// ; the function pointer
/// blr x1 ; Uses descriptor address in x0
/// ; Address of _var is now in x0.
///
/// If the address of _var's descriptor *is* known to the linker, then it can
/// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for
/// a slight efficiency gain.
SDValue
AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->isTargetDarwin() &&
"This function expects a Darwin target");
SDLoc DL(Op);
MVT PtrVT = getPointerTy(DAG.getDataLayout());
MVT PtrMemVT = getPointerMemTy(DAG.getDataLayout());
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
SDValue TLVPAddr =
DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
SDValue DescAddr = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TLVPAddr);
// The first entry in the descriptor is a function pointer that we must call
// to obtain the address of the variable.
SDValue Chain = DAG.getEntryNode();
SDValue FuncTLVGet = DAG.getLoad(
PtrMemVT, DL, Chain, DescAddr,
MachinePointerInfo::getGOT(DAG.getMachineFunction()),
Align(PtrMemVT.getSizeInBits() / 8),
MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
Chain = FuncTLVGet.getValue(1);
// Extend loaded pointer if necessary (i.e. if ILP32) to DAG pointer.
FuncTLVGet = DAG.getZExtOrTrunc(FuncTLVGet, DL, PtrVT);
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
MFI.setAdjustsStack(true);
// TLS calls preserve all registers except those that absolutely must be
// trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
// silly).
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
const uint32_t *Mask = TRI->getTLSCallPreservedMask();
if (Subtarget->hasCustomCallingConv())
TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
// Finally, we can make the call. This is just a degenerate version of a
// normal AArch64 call node: x0 takes the address of the descriptor, and
// returns the address of the variable in this thread.
Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, DescAddr, SDValue());
Chain =
DAG.getNode(AArch64ISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
Chain, FuncTLVGet, DAG.getRegister(AArch64::X0, MVT::i64),
DAG.getRegisterMask(Mask), Chain.getValue(1));
return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1));
}
/// Convert a thread-local variable reference into a sequence of instructions to
/// compute the variable's address for the local exec TLS model of ELF targets.
/// The sequence depends on the maximum TLS area size.
SDValue AArch64TargetLowering::LowerELFTLSLocalExec(const GlobalValue *GV,
SDValue ThreadBase,
const SDLoc &DL,
SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue TPOff, Addr;
switch (DAG.getTarget().Options.TLSSize) {
default:
llvm_unreachable("Unexpected TLS size");
case 12: {
// mrs x0, TPIDR_EL0
// add x0, x0, :tprel_lo12:a
SDValue Var = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGEOFF);
return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
Var,
DAG.getTargetConstant(0, DL, MVT::i32)),
0);
}
case 24: {
// mrs x0, TPIDR_EL0
// add x0, x0, :tprel_hi12:a
// add x0, x0, :tprel_lo12_nc:a
SDValue HiVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
SDValue LoVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0,
AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
Addr = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
HiVar,
DAG.getTargetConstant(0, DL, MVT::i32)),
0);
return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, Addr,
LoVar,
DAG.getTargetConstant(0, DL, MVT::i32)),
0);
}
case 32: {
// mrs x1, TPIDR_EL0
// movz x0, #:tprel_g1:a
// movk x0, #:tprel_g0_nc:a
// add x0, x1, x0
SDValue HiVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
SDValue LoVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0,
AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
DAG.getTargetConstant(16, DL, MVT::i32)),
0);
TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
DAG.getTargetConstant(0, DL, MVT::i32)),
0);
return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
}
case 48: {
// mrs x1, TPIDR_EL0
// movz x0, #:tprel_g2:a
// movk x0, #:tprel_g1_nc:a
// movk x0, #:tprel_g0_nc:a
// add x0, x1, x0
SDValue HiVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G2);
SDValue MiVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0,
AArch64II::MO_TLS | AArch64II::MO_G1 | AArch64II::MO_NC);
SDValue LoVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0,
AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
DAG.getTargetConstant(32, DL, MVT::i32)),
0);
TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, MiVar,
DAG.getTargetConstant(16, DL, MVT::i32)),
0);
TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
DAG.getTargetConstant(0, DL, MVT::i32)),
0);
return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
}
}
}
/// When accessing thread-local variables under either the general-dynamic or
/// local-dynamic system, we make a "TLS-descriptor" call. The variable will
/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
/// is a function pointer to carry out the resolution.
///
/// The sequence is:
/// adrp x0, :tlsdesc:var
/// ldr x1, [x0, #:tlsdesc_lo12:var]
/// add x0, x0, #:tlsdesc_lo12:var
/// .tlsdesccall var
/// blr x1
/// (TPIDR_EL0 offset now in x0)
///
/// The above sequence must be produced unscheduled, to enable the linker to
/// optimize/relax this sequence.
/// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
/// above sequence, and expanded really late in the compilation flow, to ensure
/// the sequence is produced as per above.
SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr,
const SDLoc &DL,
SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Chain = DAG.getEntryNode();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
Chain =
DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, {Chain, SymAddr});
SDValue Glue = Chain.getValue(1);
return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
}
SDValue
AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->isTargetELF() && "This function expects an ELF target");
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
if (!EnableAArch64ELFLocalDynamicTLSGeneration) {
if (Model == TLSModel::LocalDynamic)
Model = TLSModel::GeneralDynamic;
}
if (getTargetMachine().getCodeModel() == CodeModel::Large &&
Model != TLSModel::LocalExec)
report_fatal_error("ELF TLS only supported in small memory model or "
"in local exec TLS model");
// Different choices can be made for the maximum size of the TLS area for a
// module. For the small address model, the default TLS size is 16MiB and the
// maximum TLS size is 4GiB.
// FIXME: add tiny and large code model support for TLS access models other
// than local exec. We currently generate the same code as small for tiny,
// which may be larger than needed.
SDValue TPOff;
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(Op);
const GlobalValue *GV = GA->getGlobal();
SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
if (Model == TLSModel::LocalExec) {
return LowerELFTLSLocalExec(GV, ThreadBase, DL, DAG);
} else if (Model == TLSModel::InitialExec) {
TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
} else if (Model == TLSModel::LocalDynamic) {
// Local-dynamic accesses proceed in two phases. A general-dynamic TLS
// descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
// the beginning of the module's TLS region, followed by a DTPREL offset
// calculation.
// These accesses will need deduplicating if there's more than one.
AArch64FunctionInfo *MFI =
DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
MFI->incNumLocalDynamicTLSAccesses();
// The call needs a relocation too for linker relaxation. It doesn't make
// sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
// the address.
SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
AArch64II::MO_TLS);
// Now we can calculate the offset from TPIDR_EL0 to this module's
// thread-local area.
TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
// Now use :dtprel_whatever: operations to calculate this variable's offset
// in its thread-storage area.
SDValue HiVar = DAG.getTargetGlobalAddress(
GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
SDValue LoVar = DAG.getTargetGlobalAddress(
GV, DL, MVT::i64, 0,
AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
DAG.getTargetConstant(0, DL, MVT::i32)),
0);
TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
DAG.getTargetConstant(0, DL, MVT::i32)),
0);
} else if (Model == TLSModel::GeneralDynamic) {
// The call needs a relocation too for linker relaxation. It doesn't make
// sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
// the address.
SDValue SymAddr =
DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
// Finally we can make a call to calculate the offset from tpidr_el0.
TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
} else
llvm_unreachable("Unsupported ELF TLS access model");
return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
}
SDValue
AArch64TargetLowering::LowerWindowsGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
SDValue Chain = DAG.getEntryNode();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(Op);
SDValue TEB = DAG.getRegister(AArch64::X18, MVT::i64);
// Load the ThreadLocalStoragePointer from the TEB
// A pointer to the TLS array is located at offset 0x58 from the TEB.
SDValue TLSArray =
DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x58, DL));
TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
Chain = TLSArray.getValue(1);
// Load the TLS index from the C runtime;
// This does the same as getAddr(), but without having a GlobalAddressSDNode.
// This also does the same as LOADgot, but using a generic i32 load,
// while LOADgot only loads i64.
SDValue TLSIndexHi =
DAG.getTargetExternalSymbol("_tls_index", PtrVT, AArch64II::MO_PAGE);
SDValue TLSIndexLo = DAG.getTargetExternalSymbol(
"_tls_index", PtrVT, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, TLSIndexHi);
SDValue TLSIndex =
DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, TLSIndexLo);
TLSIndex = DAG.getLoad(MVT::i32, DL, Chain, TLSIndex, MachinePointerInfo());
Chain = TLSIndex.getValue(1);
// The pointer to the thread's TLS data area is at the TLS Index scaled by 8
// offset into the TLSArray.
TLSIndex = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TLSIndex);
SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
DAG.getConstant(3, DL, PtrVT));
SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
MachinePointerInfo());
Chain = TLS.getValue(1);
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GA->getGlobal();
SDValue TGAHi = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
SDValue TGALo = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0,
AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
// Add the offset from the start of the .tls section (section base).
SDValue Addr =
SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TLS, TGAHi,
DAG.getTargetConstant(0, DL, MVT::i32)),
0);
Addr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, Addr, TGALo);
return Addr;
}
SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
if (DAG.getTarget().useEmulatedTLS())
return LowerToTLSEmulatedModel(GA, DAG);
if (Subtarget->isTargetDarwin())
return LowerDarwinGlobalTLSAddress(Op, DAG);
if (Subtarget->isTargetELF())
return LowerELFGlobalTLSAddress(Op, DAG);
if (Subtarget->isTargetWindows())
return LowerWindowsGlobalTLSAddress(Op, DAG);
llvm_unreachable("Unexpected platform trying to use TLS");
}
// Looks through \param Val to determine the bit that can be used to
// check the sign of the value. It returns the unextended value and
// the sign bit position.
std::pair<SDValue, uint64_t> lookThroughSignExtension(SDValue Val) {
if (Val.getOpcode() == ISD::SIGN_EXTEND_INREG)
return {Val.getOperand(0),
cast<VTSDNode>(Val.getOperand(1))->getVT().getFixedSizeInBits() -
1};
if (Val.getOpcode() == ISD::SIGN_EXTEND)
return {Val.getOperand(0),
Val.getOperand(0)->getValueType(0).getFixedSizeInBits() - 1};
return {Val, Val.getValueSizeInBits() - 1};
}
SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
SDValue LHS = Op.getOperand(2);
SDValue RHS = Op.getOperand(3);
SDValue Dest = Op.getOperand(4);
SDLoc dl(Op);
MachineFunction &MF = DAG.getMachineFunction();
// Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
// will not be produced, as they are conditional branch instructions that do
// not set flags.
bool ProduceNonFlagSettingCondBr =
!MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
// Handle f128 first, since lowering it will result in comparing the return
// value of a libcall against zero, which is just what the rest of LowerBR_CC
// is expecting to deal with.
if (LHS.getValueType() == MVT::f128) {
softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);
// If softenSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
if (!RHS.getNode()) {
RHS = DAG.getConstant(0, dl, LHS.getValueType());
CC = ISD::SETNE;
}
}
// Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
// instruction.
if (ISD::isOverflowIntrOpRes(LHS) && isOneConstant(RHS) &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
// Only lower legal XALUO ops.
if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
return SDValue();
// The actual operation with overflow check.
AArch64CC::CondCode OFCC;
SDValue Value, Overflow;
std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, LHS.getValue(0), DAG);
if (CC == ISD::SETNE)
OFCC = getInvertedCondCode(OFCC);
SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32);
return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
Overflow);
}
if (LHS.getValueType().isInteger()) {
assert((LHS.getValueType() == RHS.getValueType()) &&
(LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64));
// If the RHS of the comparison is zero, we can potentially fold this
// to a specialized branch.
const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
if (RHSC && RHSC->getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
if (CC == ISD::SETEQ) {
// See if we can use a TBZ to fold in an AND as well.
// TBZ has a smaller branch displacement than CBZ. If the offset is
// out of bounds, a late MI-layer pass rewrites branches.
// 403.gcc is an example that hits this case.
if (LHS.getOpcode() == ISD::AND &&
isa<ConstantSDNode>(LHS.getOperand(1)) &&
isPowerOf2_64(LHS.getConstantOperandVal(1))) {
SDValue Test = LHS.getOperand(0);
uint64_t Mask = LHS.getConstantOperandVal(1);
return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
Dest);
}
return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
} else if (CC == ISD::SETNE) {
// See if we can use a TBZ to fold in an AND as well.
// TBZ has a smaller branch displacement than CBZ. If the offset is
// out of bounds, a late MI-layer pass rewrites branches.
// 403.gcc is an example that hits this case.
if (LHS.getOpcode() == ISD::AND &&
isa<ConstantSDNode>(LHS.getOperand(1)) &&
isPowerOf2_64(LHS.getConstantOperandVal(1))) {
SDValue Test = LHS.getOperand(0);
uint64_t Mask = LHS.getConstantOperandVal(1);
return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
Dest);
}
return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
} else if (CC == ISD::SETLT && LHS.getOpcode() != ISD::AND) {
// Don't combine AND since emitComparison converts the AND to an ANDS
// (a.k.a. TST) and the test in the test bit and branch instruction
// becomes redundant. This would also increase register pressure.
uint64_t SignBitPos;
std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
}
}
if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
LHS.getOpcode() != ISD::AND && ProduceNonFlagSettingCondBr) {
// Don't combine AND since emitComparison converts the AND to an ANDS
// (a.k.a. TST) and the test in the test bit and branch instruction
// becomes redundant. This would also increase register pressure.
uint64_t SignBitPos;
std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
}
SDValue CCVal;
SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
Cmp);
}
assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::bf16 ||
LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
// clean. Some of them require two branches to implement.
SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
AArch64CC::CondCode CC1, CC2;
changeFPCCToAArch64CC(CC, CC1, CC2);
SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
SDValue BR1 =
DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
if (CC2 != AArch64CC::AL) {
SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
Cmp);
}
return BR1;
}
SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
SelectionDAG &DAG) const {
if (!Subtarget->hasNEON())
return SDValue();
EVT VT = Op.getValueType();
EVT IntVT = VT.changeTypeToInteger();
SDLoc DL(Op);
SDValue In1 = Op.getOperand(0);
SDValue In2 = Op.getOperand(1);
EVT SrcVT = In2.getValueType();
if (SrcVT.bitsLT(VT))
In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
else if (SrcVT.bitsGT(VT))
In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL));
if (VT.isScalableVector())
IntVT =
getPackedSVEVectorVT(VT.getVectorElementType().changeTypeToInteger());
if (VT != In2.getValueType())
return SDValue();
auto BitCast = [this](EVT VT, SDValue Op, SelectionDAG &DAG) {
if (VT.isScalableVector())
return getSVESafeBitCast(VT, Op, DAG);
return DAG.getBitcast(VT, Op);
};
SDValue VecVal1, VecVal2;
EVT VecVT;
auto SetVecVal = [&](int Idx = -1) {
if (!VT.isVector()) {
VecVal1 =
DAG.getTargetInsertSubreg(Idx, DL, VecVT, DAG.getUNDEF(VecVT), In1);
VecVal2 =
DAG.getTargetInsertSubreg(Idx, DL, VecVT, DAG.getUNDEF(VecVT), In2);
} else {
VecVal1 = BitCast(VecVT, In1, DAG);
VecVal2 = BitCast(VecVT, In2, DAG);
}
};
if (VT.isVector()) {
VecVT = IntVT;
SetVecVal();
} else if (VT == MVT::f64) {
VecVT = MVT::v2i64;
SetVecVal(AArch64::dsub);
} else if (VT == MVT::f32) {
VecVT = MVT::v4i32;
SetVecVal(AArch64::ssub);
} else if (VT == MVT::f16) {
VecVT = MVT::v8i16;
SetVecVal(AArch64::hsub);
} else {
llvm_unreachable("Invalid type for copysign!");
}
unsigned BitWidth = In1.getScalarValueSizeInBits();
SDValue SignMaskV = DAG.getConstant(~APInt::getSignMask(BitWidth), DL, VecVT);
// We want to materialize a mask with every bit but the high bit set, but the
// AdvSIMD immediate moves cannot materialize that in a single instruction for
// 64-bit elements. Instead, materialize all bits set and then negate that.
if (VT == MVT::f64 || VT == MVT::v2f64) {
SignMaskV = DAG.getConstant(APInt::getAllOnes(BitWidth), DL, VecVT);
SignMaskV = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, SignMaskV);
SignMaskV = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, SignMaskV);
SignMaskV = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, SignMaskV);
}
SDValue BSP =
DAG.getNode(AArch64ISD::BSP, DL, VecVT, SignMaskV, VecVal1, VecVal2);
if (VT == MVT::f16)
return DAG.getTargetExtractSubreg(AArch64::hsub, DL, VT, BSP);
if (VT == MVT::f32)
return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, BSP);
if (VT == MVT::f64)
return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, BSP);
return BitCast(VT, BSP, DAG);
}
SDValue AArch64TargetLowering::LowerCTPOP_PARITY(SDValue Op,
SelectionDAG &DAG) const {
if (DAG.getMachineFunction().getFunction().hasFnAttribute(
Attribute::NoImplicitFloat))
return SDValue();
if (!Subtarget->hasNEON())
return SDValue();
bool IsParity = Op.getOpcode() == ISD::PARITY;
// While there is no integer popcount instruction, it can
// be more efficiently lowered to the following sequence that uses
// AdvSIMD registers/instructions as long as the copies to/from
// the AdvSIMD registers are cheap.
// FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
// CNT V0.8B, V0.8B // 8xbyte pop-counts
// ADDV B0, V0.8B // sum 8xbyte pop-counts
// UMOV X0, V0.B[0] // copy byte result back to integer reg
SDValue Val = Op.getOperand(0);
SDLoc DL(Op);
EVT VT = Op.getValueType();
if (VT == MVT::i32 || VT == MVT::i64) {
if (VT == MVT::i32)
Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
SDValue UaddLV = DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
if (IsParity)
UaddLV = DAG.getNode(ISD::AND, DL, MVT::i32, UaddLV,
DAG.getConstant(1, DL, MVT::i32));
if (VT == MVT::i64)
UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
return UaddLV;
} else if (VT == MVT::i128) {
Val = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Val);
SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v16i8, Val);
SDValue UaddLV = DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
if (IsParity)
UaddLV = DAG.getNode(ISD::AND, DL, MVT::i32, UaddLV,
DAG.getConstant(1, DL, MVT::i32));
return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, UaddLV);
}
assert(!IsParity && "ISD::PARITY of vector types not supported");
if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT))
return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTPOP_MERGE_PASSTHRU);
assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
"Unexpected type for custom ctpop lowering");
EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
Val = DAG.getBitcast(VT8Bit, Val);
Val = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Val);
// Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
unsigned EltSize = 8;
unsigned NumElts = VT.is64BitVector() ? 8 : 16;
while (EltSize != VT.getScalarSizeInBits()) {
EltSize *= 2;
NumElts /= 2;
MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
Val = DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, DL, WidenVT,
DAG.getConstant(Intrinsic::aarch64_neon_uaddlp, DL, MVT::i32), Val);
}
return Val;
}
SDValue AArch64TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(VT.isScalableVector() ||
useSVEForFixedLengthVectorVT(
VT, /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors()));
SDLoc DL(Op);
SDValue RBIT = DAG.getNode(ISD::BITREVERSE, DL, VT, Op.getOperand(0));
return DAG.getNode(ISD::CTLZ, DL, VT, RBIT);
}
SDValue AArch64TargetLowering::LowerMinMax(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDLoc DL(Op);
unsigned Opcode = Op.getOpcode();
ISD::CondCode CC;
switch (Opcode) {
default:
llvm_unreachable("Wrong instruction");
case ISD::SMAX:
CC = ISD::SETGT;
break;
case ISD::SMIN:
CC = ISD::SETLT;
break;
case ISD::UMAX:
CC = ISD::SETUGT;
break;
case ISD::UMIN:
CC = ISD::SETULT;
break;
}
if (VT.isScalableVector() ||
useSVEForFixedLengthVectorVT(
VT, /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors())) {
switch (Opcode) {
default:
llvm_unreachable("Wrong instruction");
case ISD::SMAX:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED);
case ISD::SMIN:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED);
case ISD::UMAX:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED);
case ISD::UMIN:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED);
}
}
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue Cond = DAG.getSetCC(DL, VT, Op0, Op1, CC);
return DAG.getSelect(DL, VT, Cond, Op0, Op1);
}
SDValue AArch64TargetLowering::LowerBitreverse(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
if (VT.isScalableVector() ||
useSVEForFixedLengthVectorVT(
VT, /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors()))
return LowerToPredicatedOp(Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU);
SDLoc DL(Op);
SDValue REVB;
MVT VST;
switch (VT.getSimpleVT().SimpleTy) {
default:
llvm_unreachable("Invalid type for bitreverse!");
case MVT::v2i32: {
VST = MVT::v8i8;
REVB = DAG.getNode(AArch64ISD::REV32, DL, VST, Op.getOperand(0));
break;
}
case MVT::v4i32: {
VST = MVT::v16i8;
REVB = DAG.getNode(AArch64ISD::REV32, DL, VST, Op.getOperand(0));
break;
}
case MVT::v1i64: {
VST = MVT::v8i8;
REVB = DAG.getNode(AArch64ISD::REV64, DL, VST, Op.getOperand(0));
break;
}
case MVT::v2i64: {
VST = MVT::v16i8;
REVB = DAG.getNode(AArch64ISD::REV64, DL, VST, Op.getOperand(0));
break;
}
}
return DAG.getNode(AArch64ISD::NVCAST, DL, VT,
DAG.getNode(ISD::BITREVERSE, DL, VST, REVB));
}
SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
if (Op.getValueType().isVector())
return LowerVSETCC(Op, DAG);
bool IsStrict = Op->isStrictFPOpcode();
bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
unsigned OpNo = IsStrict ? 1 : 0;
SDValue Chain;
if (IsStrict)
Chain = Op.getOperand(0);
SDValue LHS = Op.getOperand(OpNo + 0);
SDValue RHS = Op.getOperand(OpNo + 1);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(OpNo + 2))->get();
SDLoc dl(Op);
// We chose ZeroOrOneBooleanContents, so use zero and one.
EVT VT = Op.getValueType();
SDValue TVal = DAG.getConstant(1, dl, VT);
SDValue FVal = DAG.getConstant(0, dl, VT);
// Handle f128 first, since one possible outcome is a normal integer
// comparison which gets picked up by the next if statement.
if (LHS.getValueType() == MVT::f128) {
softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS, Chain,
IsSignaling);
// If softenSetCCOperands returned a scalar, use it.
if (!RHS.getNode()) {
assert(LHS.getValueType() == Op.getValueType() &&
"Unexpected setcc expansion!");
return IsStrict ? DAG.getMergeValues({LHS, Chain}, dl) : LHS;
}
}
if (LHS.getValueType().isInteger()) {
SDValue CCVal;
SDValue Cmp = getAArch64Cmp(
LHS, RHS, ISD::getSetCCInverse(CC, LHS.getValueType()), CCVal, DAG, dl);
// Note that we inverted the condition above, so we reverse the order of
// the true and false operands here. This will allow the setcc to be
// matched to a single CSINC instruction.
SDValue Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp);
return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res;
}
// Now we know we're dealing with FP values.
assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||
LHS.getValueType() == MVT::f64);
// If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
// and do the comparison.
SDValue Cmp;
if (IsStrict)
Cmp = emitStrictFPComparison(LHS, RHS, dl, DAG, Chain, IsSignaling);
else
Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
AArch64CC::CondCode CC1, CC2;
changeFPCCToAArch64CC(CC, CC1, CC2);
SDValue Res;
if (CC2 == AArch64CC::AL) {
changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, LHS.getValueType()), CC1,
CC2);
SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
// Note that we inverted the condition above, so we reverse the order of
// the true and false operands here. This will allow the setcc to be
// matched to a single CSINC instruction.
Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp);
} else {
// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
// totally clean. Some of them require two CSELs to implement. As is in
// this case, we emit the first CSEL and then emit a second using the output
// of the first as the RHS. We're effectively OR'ing the two CC's together.
// FIXME: It would be nice if we could match the two CSELs to two CSINCs.
SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
SDValue CS1 =
DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
}
return IsStrict ? DAG.getMergeValues({Res, Cmp.getValue(1)}, dl) : Res;
}
SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
SDValue RHS, SDValue TVal,
SDValue FVal, const SDLoc &dl,
SelectionDAG &DAG) const {
// Handle f128 first, because it will result in a comparison of some RTLIB
// call result against zero.
if (LHS.getValueType() == MVT::f128) {
softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);
// If softenSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
if (!RHS.getNode()) {
RHS = DAG.getConstant(0, dl, LHS.getValueType());
CC = ISD::SETNE;
}
}
// Also handle f16, for which we need to do a f32 comparison.
if (LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
}
// Next, handle integers.
if (LHS.getValueType().isInteger()) {
assert((LHS.getValueType() == RHS.getValueType()) &&
(LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64));
ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
// Check for sign pattern (SELECT_CC setgt, iN lhs, -1, 1, -1) and transform
// into (OR (ASR lhs, N-1), 1), which requires less instructions for the
// supported types.
if (CC == ISD::SETGT && RHSC && RHSC->isAllOnes() && CTVal && CFVal &&
CTVal->isOne() && CFVal->isAllOnes() &&
LHS.getValueType() == TVal.getValueType()) {
EVT VT = LHS.getValueType();
SDValue Shift =
DAG.getNode(ISD::SRA, dl, VT, LHS,
DAG.getConstant(VT.getSizeInBits() - 1, dl, VT));
return DAG.getNode(ISD::OR, dl, VT, Shift, DAG.getConstant(1, dl, VT));
}
unsigned Opcode = AArch64ISD::CSEL;
// If both the TVal and the FVal are constants, see if we can swap them in
// order to for a CSINV or CSINC out of them.
if (CTVal && CFVal && CTVal->isAllOnes() && CFVal->isZero()) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
CC = ISD::getSetCCInverse(CC, LHS.getValueType());
} else if (CTVal && CFVal && CTVal->isOne() && CFVal->isZero()) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
CC = ISD::getSetCCInverse(CC, LHS.getValueType());
} else if (TVal.getOpcode() == ISD::XOR) {
// If TVal is a NOT we want to swap TVal and FVal so that we can match
// with a CSINV rather than a CSEL.
if (isAllOnesConstant(TVal.getOperand(1))) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
CC = ISD::getSetCCInverse(CC, LHS.getValueType());
}
} else if (TVal.getOpcode() == ISD::SUB) {
// If TVal is a negation (SUB from 0) we want to swap TVal and FVal so
// that we can match with a CSNEG rather than a CSEL.
if (isNullConstant(TVal.getOperand(0))) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
CC = ISD::getSetCCInverse(CC, LHS.getValueType());
}
} else if (CTVal && CFVal) {
const int64_t TrueVal = CTVal->getSExtValue();
const int64_t FalseVal = CFVal->getSExtValue();
bool Swap = false;
// If both TVal and FVal are constants, see if FVal is the
// inverse/negation/increment of TVal and generate a CSINV/CSNEG/CSINC
// instead of a CSEL in that case.
if (TrueVal == ~FalseVal) {
Opcode = AArch64ISD::CSINV;
} else if (FalseVal > std::numeric_limits<int64_t>::min() &&
TrueVal == -FalseVal) {
Opcode = AArch64ISD::CSNEG;
} else if (TVal.getValueType() == MVT::i32) {
// If our operands are only 32-bit wide, make sure we use 32-bit
// arithmetic for the check whether we can use CSINC. This ensures that
// the addition in the check will wrap around properly in case there is
// an overflow (which would not be the case if we do the check with
// 64-bit arithmetic).
const uint32_t TrueVal32 = CTVal->getZExtValue();
const uint32_t FalseVal32 = CFVal->getZExtValue();
if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
Opcode = AArch64ISD::CSINC;
if (TrueVal32 > FalseVal32) {
Swap = true;
}
}
// 64-bit check whether we can use CSINC.
} else if ((TrueVal == FalseVal + 1) || (TrueVal + 1 == FalseVal)) {
Opcode = AArch64ISD::CSINC;
if (TrueVal > FalseVal) {
Swap = true;
}
}
// Swap TVal and FVal if necessary.
if (Swap) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
CC = ISD::getSetCCInverse(CC, LHS.getValueType());
}
if (Opcode != AArch64ISD::CSEL) {
// Drop FVal since we can get its value by simply inverting/negating
// TVal.
FVal = TVal;
}
}
// Avoid materializing a constant when possible by reusing a known value in
// a register. However, don't perform this optimization if the known value
// is one, zero or negative one in the case of a CSEL. We can always
// materialize these values using CSINC, CSEL and CSINV with wzr/xzr as the
// FVal, respectively.
ConstantSDNode *RHSVal = dyn_cast<ConstantSDNode>(RHS);
if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->isOne() &&
!RHSVal->isZero() && !RHSVal->isAllOnes()) {
AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
// Transform "a == C ? C : x" to "a == C ? a : x" and "a != C ? x : C" to
// "a != C ? x : a" to avoid materializing C.
if (CTVal && CTVal == RHSVal && AArch64CC == AArch64CC::EQ)
TVal = LHS;
else if (CFVal && CFVal == RHSVal && AArch64CC == AArch64CC::NE)
FVal = LHS;
} else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) {
assert (CTVal && CFVal && "Expected constant operands for CSNEG.");
// Use a CSINV to transform "a == C ? 1 : -1" to "a == C ? a : -1" to
// avoid materializing C.
AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
if (CTVal == RHSVal && AArch64CC == AArch64CC::EQ) {
Opcode = AArch64ISD::CSINV;
TVal = LHS;
FVal = DAG.getConstant(0, dl, FVal.getValueType());
}
}
SDValue CCVal;
SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
EVT VT = TVal.getValueType();
return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
}
// Now we know we're dealing with FP values.
assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||
LHS.getValueType() == MVT::f64);
assert(LHS.getValueType() == RHS.getValueType());
EVT VT = TVal.getValueType();
SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
// clean. Some of them require two CSELs to implement.
AArch64CC::CondCode CC1, CC2;
changeFPCCToAArch64CC(CC, CC1, CC2);
if (DAG.getTarget().Options.UnsafeFPMath) {
// Transform "a == 0.0 ? 0.0 : x" to "a == 0.0 ? a : x" and
// "a != 0.0 ? x : 0.0" to "a != 0.0 ? x : a" to avoid materializing 0.0.
ConstantFPSDNode *RHSVal = dyn_cast<ConstantFPSDNode>(RHS);
if (RHSVal && RHSVal->isZero()) {
ConstantFPSDNode *CFVal = dyn_cast<ConstantFPSDNode>(FVal);
ConstantFPSDNode *CTVal = dyn_cast<ConstantFPSDNode>(TVal);
if ((CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETUEQ) &&
CTVal && CTVal->isZero() && TVal.getValueType() == LHS.getValueType())
TVal = LHS;
else if ((CC == ISD::SETNE || CC == ISD::SETONE || CC == ISD::SETUNE) &&
CFVal && CFVal->isZero() &&
FVal.getValueType() == LHS.getValueType())
FVal = LHS;
}
}
// Emit first, and possibly only, CSEL.
SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
// If we need a second CSEL, emit it, using the output of the first as the
// RHS. We're effectively OR'ing the two CC's together.
if (CC2 != AArch64CC::AL) {
SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
}
// Otherwise, return the output of the first CSEL.
return CS1;
}
SDValue AArch64TargetLowering::LowerVECTOR_SPLICE(SDValue Op,
SelectionDAG &DAG) const {
EVT Ty = Op.getValueType();
auto Idx = Op.getConstantOperandAPInt(2);
int64_t IdxVal = Idx.getSExtValue();
assert(Ty.isScalableVector() &&
"Only expect scalable vectors for custom lowering of VECTOR_SPLICE");
// We can use the splice instruction for certain index values where we are
// able to efficiently generate the correct predicate. The index will be
// inverted and used directly as the input to the ptrue instruction, i.e.
// -1 -> vl1, -2 -> vl2, etc. The predicate will then be reversed to get the
// splice predicate. However, we can only do this if we can guarantee that
// there are enough elements in the vector, hence we check the index <= min
// number of elements.
Optional<unsigned> PredPattern;
if (Ty.isScalableVector() && IdxVal < 0 &&
(PredPattern = getSVEPredPatternFromNumElements(std::abs(IdxVal))) !=
None) {
SDLoc DL(Op);
// Create a predicate where all but the last -IdxVal elements are false.
EVT PredVT = Ty.changeVectorElementType(MVT::i1);
SDValue Pred = getPTrue(DAG, DL, PredVT, *PredPattern);
Pred = DAG.getNode(ISD::VECTOR_REVERSE, DL, PredVT, Pred);
// Now splice the two inputs together using the predicate.
return DAG.getNode(AArch64ISD::SPLICE, DL, Ty, Pred, Op.getOperand(0),
Op.getOperand(1));
}
// This will select to an EXT instruction, which has a maximum immediate
// value of 255, hence 2048-bits is the maximum value we can lower.
if (IdxVal >= 0 &&
IdxVal < int64_t(2048 / Ty.getVectorElementType().getSizeInBits()))
return Op;
return SDValue();
}
SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
SelectionDAG &DAG) const {
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
SDValue TVal = Op.getOperand(2);
SDValue FVal = Op.getOperand(3);
SDLoc DL(Op);
return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
}
SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
SelectionDAG &DAG) const {
SDValue CCVal = Op->getOperand(0);
SDValue TVal = Op->getOperand(1);
SDValue FVal = Op->getOperand(2);
SDLoc DL(Op);
EVT Ty = Op.getValueType();
if (Ty.isScalableVector()) {
SDValue TruncCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, CCVal);
MVT PredVT = MVT::getVectorVT(MVT::i1, Ty.getVectorElementCount());
SDValue SplatPred = DAG.getNode(ISD::SPLAT_VECTOR, DL, PredVT, TruncCC);
return DAG.getNode(ISD::VSELECT, DL, Ty, SplatPred, TVal, FVal);
}
if (useSVEForFixedLengthVectorVT(Ty)) {
// FIXME: Ideally this would be the same as above using i1 types, however
// for the moment we can't deal with fixed i1 vector types properly, so
// instead extend the predicate to a result type sized integer vector.
MVT SplatValVT = MVT::getIntegerVT(Ty.getScalarSizeInBits());
MVT PredVT = MVT::getVectorVT(SplatValVT, Ty.getVectorElementCount());
SDValue SplatVal = DAG.getSExtOrTrunc(CCVal, DL, SplatValVT);
SDValue SplatPred = DAG.getNode(ISD::SPLAT_VECTOR, DL, PredVT, SplatVal);
return DAG.getNode(ISD::VSELECT, DL, Ty, SplatPred, TVal, FVal);
}
// Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
// instruction.
if (ISD::isOverflowIntrOpRes(CCVal)) {
// Only lower legal XALUO ops.
if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
return SDValue();
AArch64CC::CondCode OFCC;
SDValue Value, Overflow;
std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG);
SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32);
return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
CCVal, Overflow);
}
// Lower it the same way as we would lower a SELECT_CC node.
ISD::CondCode CC;
SDValue LHS, RHS;
if (CCVal.getOpcode() == ISD::SETCC) {
LHS = CCVal.getOperand(0);
RHS = CCVal.getOperand(1);
CC = cast<CondCodeSDNode>(CCVal.getOperand(2))->get();
} else {
LHS = CCVal;
RHS = DAG.getConstant(0, DL, CCVal.getValueType());
CC = ISD::SETNE;
}
return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
}
SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
SelectionDAG &DAG) const {
// Jump table entries as PC relative offsets. No additional tweaking
// is necessary here. Just get the address of the jump table.
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
if (getTargetMachine().getCodeModel() == CodeModel::Large &&
!Subtarget->isTargetMachO()) {
return getAddrLarge(JT, DAG);
} else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
return getAddrTiny(JT, DAG);
}
return getAddr(JT, DAG);
}
SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op,
SelectionDAG &DAG) const {
// Jump table entries as PC relative offsets. No additional tweaking
// is necessary here. Just get the address of the jump table.
SDLoc DL(Op);
SDValue JT = Op.getOperand(1);
SDValue Entry = Op.getOperand(2);
int JTI = cast<JumpTableSDNode>(JT.getNode())->getIndex();
auto *AFI = DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
AFI->setJumpTableEntryInfo(JTI, 4, nullptr);
SDNode *Dest =
DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT,
Entry, DAG.getTargetJumpTable(JTI, MVT::i32));
return DAG.getNode(ISD::BRIND, DL, MVT::Other, Op.getOperand(0),
SDValue(Dest, 0));
}
SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
SelectionDAG &DAG) const {
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
if (getTargetMachine().getCodeModel() == CodeModel::Large) {
// Use the GOT for the large code model on iOS.
if (Subtarget->isTargetMachO()) {
return getGOT(CP, DAG);
}
return getAddrLarge(CP, DAG);
} else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
return getAddrTiny(CP, DAG);
} else {
return getAddr(CP, DAG);
}
}
SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
SelectionDAG &DAG) const {
BlockAddressSDNode *BA = cast<BlockAddressSDNode>(Op);
if (getTargetMachine().getCodeModel() == CodeModel::Large &&
!Subtarget->isTargetMachO()) {
return getAddrLarge(BA, DAG);
} else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
return getAddrTiny(BA, DAG);
}
return getAddr(BA, DAG);
}
SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
SelectionDAG &DAG) const {
AArch64FunctionInfo *FuncInfo =
DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
SDLoc DL(Op);
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
getPointerTy(DAG.getDataLayout()));
FR = DAG.getZExtOrTrunc(FR, DL, getPointerMemTy(DAG.getDataLayout()));
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
MachinePointerInfo(SV));
}
SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op,
SelectionDAG &DAG) const {
AArch64FunctionInfo *FuncInfo =
DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
SDLoc DL(Op);
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0
? FuncInfo->getVarArgsGPRIndex()
: FuncInfo->getVarArgsStackIndex(),
getPointerTy(DAG.getDataLayout()));
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
MachinePointerInfo(SV));
}
SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
SelectionDAG &DAG) const {
// The layout of the va_list struct is specified in the AArch64 Procedure Call
// Standard, section B.3.
MachineFunction &MF = DAG.getMachineFunction();
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
auto PtrMemVT = getPointerMemTy(DAG.getDataLayout());
auto PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(Op);
SDValue Chain = Op.getOperand(0);
SDValue VAList = Op.getOperand(1);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
SmallVector<SDValue, 4> MemOps;
// void *__stack at offset 0
unsigned Offset = 0;
SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
Stack = DAG.getZExtOrTrunc(Stack, DL, PtrMemVT);
MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
MachinePointerInfo(SV), Align(PtrSize)));
// void *__gr_top at offset 8 (4 on ILP32)
Offset += PtrSize;
int GPRSize = FuncInfo->getVarArgsGPRSize();
if (GPRSize > 0) {
SDValue GRTop, GRTopAddr;
GRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
DAG.getConstant(Offset, DL, PtrVT));
GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop,
DAG.getConstant(GPRSize, DL, PtrVT));
GRTop = DAG.getZExtOrTrunc(GRTop, DL, PtrMemVT);
MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
MachinePointerInfo(SV, Offset),
Align(PtrSize)));
}
// void *__vr_top at offset 16 (8 on ILP32)
Offset += PtrSize;
int FPRSize = FuncInfo->getVarArgsFPRSize();
if (FPRSize > 0) {
SDValue VRTop, VRTopAddr;
VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
DAG.getConstant(Offset, DL, PtrVT));
VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop,
DAG.getConstant(FPRSize, DL, PtrVT));
VRTop = DAG.getZExtOrTrunc(VRTop, DL, PtrMemVT);
MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
MachinePointerInfo(SV, Offset),
Align(PtrSize)));
}
// int __gr_offs at offset 24 (12 on ILP32)
Offset += PtrSize;
SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
DAG.getConstant(Offset, DL, PtrVT));
MemOps.push_back(
DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32),
GROffsAddr, MachinePointerInfo(SV, Offset), Align(4)));
// int __vr_offs at offset 28 (16 on ILP32)
Offset += 4;
SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
DAG.getConstant(Offset, DL, PtrVT));
MemOps.push_back(
DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32),
VROffsAddr, MachinePointerInfo(SV, Offset), Align(4)));
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
}
SDValue AArch64TargetLowering::LowerVASTART(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
if (Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()))
return LowerWin64_VASTART(Op, DAG);
else if (Subtarget->isTargetDarwin())
return LowerDarwin_VASTART(Op, DAG);
else
return LowerAAPCS_VASTART(Op, DAG);
}
SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
SelectionDAG &DAG) const {
// AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
// pointer.
SDLoc DL(Op);
unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
unsigned VaListSize =
(Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
? PtrSize
: Subtarget->isTargetILP32() ? 20 : 32;
const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1), Op.getOperand(2),
DAG.getConstant(VaListSize, DL, MVT::i32),
Align(PtrSize), false, false, false,
MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
}
SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
assert(Subtarget->isTargetDarwin() &&
"automatic va_arg instruction only works on Darwin");
const Value *V = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
EVT VT = Op.getValueType();
SDLoc DL(Op);
SDValue Chain = Op.getOperand(0);
SDValue Addr = Op.getOperand(1);
MaybeAlign Align(Op.getConstantOperandVal(3));
unsigned MinSlotSize = Subtarget->isTargetILP32() ? 4 : 8;
auto PtrVT = getPointerTy(DAG.getDataLayout());
auto PtrMemVT = getPointerMemTy(DAG.getDataLayout());
SDValue VAList =
DAG.getLoad(PtrMemVT, DL, Chain, Addr, MachinePointerInfo(V));
Chain = VAList.getValue(1);
VAList = DAG.getZExtOrTrunc(VAList, DL, PtrVT);
if (VT.isScalableVector())
report_fatal_error("Passing SVE types to variadic functions is "
"currently not supported");
if (Align && *Align > MinSlotSize) {
VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
DAG.getConstant(Align->value() - 1, DL, PtrVT));
VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
DAG.getConstant(-(int64_t)Align->value(), DL, PtrVT));
}
Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
unsigned ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
// Scalar integer and FP values smaller than 64 bits are implicitly extended
// up to 64 bits. At the very least, we have to increase the striding of the
// vaargs list to match this, and for FP values we need to introduce
// FP_ROUND nodes as well.
if (VT.isInteger() && !VT.isVector())
ArgSize = std::max(ArgSize, MinSlotSize);
bool NeedFPTrunc = false;
if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) {
ArgSize = 8;
NeedFPTrunc = true;
}
// Increment the pointer, VAList, to the next vaarg
SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
DAG.getConstant(ArgSize, DL, PtrVT));
VANext = DAG.getZExtOrTrunc(VANext, DL, PtrMemVT);
// Store the incremented VAList to the legalized pointer
SDValue APStore =
DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V));
// Load the actual argument out of the pointer VAList
if (NeedFPTrunc) {
// Load the value as an f64.
SDValue WideFP =
DAG.getLoad(MVT::f64, DL, APStore, VAList, MachinePointerInfo());
// Round the value down to an f32.
SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0),
DAG.getIntPtrConstant(1, DL));
SDValue Ops[] = { NarrowFP, WideFP.getValue(1) };
// Merge the rounded value with the chain output of the load.
return DAG.getMergeValues(Ops, DL);
}
return DAG.getLoad(VT, DL, APStore, VAList, MachinePointerInfo());
}
SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op,
SelectionDAG &DAG) const {
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
MFI.setFrameAddressIsTaken(true);
EVT VT = Op.getValueType();
SDLoc DL(Op);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
SDValue FrameAddr =
DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, MVT::i64);
while (Depth--)
FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr,
MachinePointerInfo());
if (Subtarget->isTargetILP32())
FrameAddr = DAG.getNode(ISD::AssertZext, DL, MVT::i64, FrameAddr,
DAG.getValueType(VT));
return FrameAddr;
}
SDValue AArch64TargetLowering::LowerSPONENTRY(SDValue Op,
SelectionDAG &DAG) const {
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
EVT VT = getPointerTy(DAG.getDataLayout());
SDLoc DL(Op);
int FI = MFI.CreateFixedObject(4, 0, false);
return DAG.getFrameIndex(FI, VT);
}
#define GET_REGISTER_MATCHER
#include "AArch64GenAsmMatcher.inc"
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
Register AArch64TargetLowering::
getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const {
Register Reg = MatchRegisterName(RegName);
if (AArch64::X1 <= Reg && Reg <= AArch64::X28) {
const MCRegisterInfo *MRI = Subtarget->getRegisterInfo();
unsigned DwarfRegNum = MRI->getDwarfRegNum(Reg, false);
if (!Subtarget->isXRegisterReserved(DwarfRegNum))
Reg = 0;
}
if (Reg)
return Reg;
report_fatal_error(Twine("Invalid register name \""
+ StringRef(RegName) + "\"."));
}
SDValue AArch64TargetLowering::LowerADDROFRETURNADDR(SDValue Op,
SelectionDAG &DAG) const {
DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
EVT VT = Op.getValueType();
SDLoc DL(Op);
SDValue FrameAddr =
DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT);
SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
return DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset);
}
SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
MFI.setReturnAddressIsTaken(true);
EVT VT = Op.getValueType();
SDLoc DL(Op);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
SDValue ReturnAddress;
if (Depth) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
ReturnAddress = DAG.getLoad(
VT, DL, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), MachinePointerInfo());
} else {
// Return LR, which contains the return address. Mark it an implicit
// live-in.
Register Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass);
ReturnAddress = DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
}
// The XPACLRI instruction assembles to a hint-space instruction before
// Armv8.3-A therefore this instruction can be safely used for any pre
// Armv8.3-A architectures. On Armv8.3-A and onwards XPACI is available so use
// that instead.
SDNode *St;
if (Subtarget->hasPAuth()) {
St = DAG.getMachineNode(AArch64::XPACI, DL, VT, ReturnAddress);
} else {
// XPACLRI operates on LR therefore we must move the operand accordingly.
SDValue Chain =
DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::LR, ReturnAddress);
St = DAG.getMachineNode(AArch64::XPACLRI, DL, VT, Chain);
}
return SDValue(St, 0);
}
/// LowerShiftParts - Lower SHL_PARTS/SRA_PARTS/SRL_PARTS, which returns two
/// i32 values and take a 2 x i32 value to shift plus a shift amount.
SDValue AArch64TargetLowering::LowerShiftParts(SDValue Op,
SelectionDAG &DAG) const {
SDValue Lo, Hi;
expandShiftParts(Op.getNode(), Lo, Hi, DAG);
return DAG.getMergeValues({Lo, Hi}, SDLoc(Op));
}
bool AArch64TargetLowering::isOffsetFoldingLegal(
const GlobalAddressSDNode *GA) const {
// Offsets are folded in the DAG combine rather than here so that we can
// intelligently choose an offset based on the uses.
return false;
}
bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
bool OptForSize) const {
bool IsLegal = false;
// We can materialize #0.0 as fmov $Rd, XZR for 64-bit, 32-bit cases, and
// 16-bit case when target has full fp16 support.
// FIXME: We should be able to handle f128 as well with a clever lowering.
const APInt ImmInt = Imm.bitcastToAPInt();
if (VT == MVT::f64)
IsLegal = AArch64_AM::getFP64Imm(ImmInt) != -1 || Imm.isPosZero();
else if (VT == MVT::f32)
IsLegal = AArch64_AM::getFP32Imm(ImmInt) != -1 || Imm.isPosZero();
else if (VT == MVT::f16 && Subtarget->hasFullFP16())
IsLegal = AArch64_AM::getFP16Imm(ImmInt) != -1 || Imm.isPosZero();
// TODO: fmov h0, w0 is also legal, however on't have an isel pattern to
// generate that fmov.
// If we can not materialize in immediate field for fmov, check if the
// value can be encoded as the immediate operand of a logical instruction.
// The immediate value will be created with either MOVZ, MOVN, or ORR.
if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32)) {
// The cost is actually exactly the same for mov+fmov vs. adrp+ldr;
// however the mov+fmov sequence is always better because of the reduced
// cache pressure. The timings are still the same if you consider
// movw+movk+fmov vs. adrp+ldr (it's one instruction longer, but the
// movw+movk is fused). So we limit up to 2 instrdduction at most.
SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
AArch64_IMM::expandMOVImm(ImmInt.getZExtValue(), VT.getSizeInBits(),
Insn);
unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 5 : 2));
IsLegal = Insn.size() <= Limit;
}
LLVM_DEBUG(dbgs() << (IsLegal ? "Legal " : "Illegal ") << VT.getEVTString()
<< " imm value: "; Imm.dump(););
return IsLegal;
}
//===----------------------------------------------------------------------===//
// AArch64 Optimization Hooks
//===----------------------------------------------------------------------===//
static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode,
SDValue Operand, SelectionDAG &DAG,
int &ExtraSteps) {
EVT VT = Operand.getValueType();
if ((ST->hasNEON() &&
(VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 ||
VT == MVT::f32 || VT == MVT::v1f32 || VT == MVT::v2f32 ||
VT == MVT::v4f32)) ||
(ST->hasSVE() &&
(VT == MVT::nxv8f16 || VT == MVT::nxv4f32 || VT == MVT::nxv2f64))) {
if (ExtraSteps == TargetLoweringBase::ReciprocalEstimate::Unspecified)
// For the reciprocal estimates, convergence is quadratic, so the number
// of digits is doubled after each iteration. In ARMv8, the accuracy of
// the initial estimate is 2^-8. Thus the number of extra steps to refine
// the result for float (23 mantissa bits) is 2 and for double (52
// mantissa bits) is 3.
ExtraSteps = VT.getScalarType() == MVT::f64 ? 3 : 2;
return DAG.getNode(Opcode, SDLoc(Operand), VT, Operand);
}
return SDValue();
}
SDValue
AArch64TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
const DenormalMode &Mode) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
}
SDValue
AArch64TargetLowering::getSqrtResultForDenormInput(SDValue Op,
SelectionDAG &DAG) const {
return Op;
}
SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
SelectionDAG &DAG, int Enabled,
int &ExtraSteps,
bool &UseOneConst,
bool Reciprocal) const {
if (Enabled == ReciprocalEstimate::Enabled ||
(Enabled == ReciprocalEstimate::Unspecified && Subtarget->useRSqrt()))
if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRSQRTE, Operand,
DAG, ExtraSteps)) {
SDLoc DL(Operand);
EVT VT = Operand.getValueType();
SDNodeFlags Flags;
Flags.setAllowReassociation(true);
// Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2)
// AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
for (int i = ExtraSteps; i > 0; --i) {
SDValue Step = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate,
Flags);
Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, Flags);
Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
}
if (!Reciprocal)
Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate, Flags);
ExtraSteps = 0;
return Estimate;
}
return SDValue();
}
SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand,
SelectionDAG &DAG, int Enabled,
int &ExtraSteps) const {
if (Enabled == ReciprocalEstimate::Enabled)
if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRECPE, Operand,
DAG, ExtraSteps)) {
SDLoc DL(Operand);
EVT VT = Operand.getValueType();
SDNodeFlags Flags;
Flags.setAllowReassociation(true);
// Newton reciprocal iteration: E * (2 - X * E)
// AArch64 reciprocal iteration instruction: (2 - M * N)
for (int i = ExtraSteps; i > 0; --i) {
SDValue Step = DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand,
Estimate, Flags);
Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
}
ExtraSteps = 0;
return Estimate;
}
return SDValue();
}
//===----------------------------------------------------------------------===//
// AArch64 Inline Assembly Support
//===----------------------------------------------------------------------===//
// Table of Constraints
// TODO: This is the current set of constraints supported by ARM for the
// compiler, not all of them may make sense.
//
// r - A general register
// w - An FP/SIMD register of some size in the range v0-v31
// x - An FP/SIMD register of some size in the range v0-v15
// I - Constant that can be used with an ADD instruction
// J - Constant that can be used with a SUB instruction
// K - Constant that can be used with a 32-bit logical instruction
// L - Constant that can be used with a 64-bit logical instruction
// M - Constant that can be used as a 32-bit MOV immediate
// N - Constant that can be used as a 64-bit MOV immediate
// Q - A memory reference with base register and no offset
// S - A symbolic address
// Y - Floating point constant zero
// Z - Integer constant zero
//
// Note that general register operands will be output using their 64-bit x
// register name, whatever the size of the variable, unless the asm operand
// is prefixed by the %w modifier. Floating-point and SIMD register operands
// will be output with the v prefix unless prefixed by the %b, %h, %s, %d or
// %q modifier.
const char *AArch64TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
// At this point, we have to lower this constraint to something else, so we
// lower it to an "r" or "w". However, by doing this we will force the result
// to be in register, while the X constraint is much more permissive.
//
// Although we are correct (we are free to emit anything, without
// constraints), we might break use cases that would expect us to be more
// efficient and emit something else.
if (!Subtarget->hasFPARMv8())
return "r";
if (ConstraintVT.isFloatingPoint())
return "w";
if (ConstraintVT.isVector() &&
(ConstraintVT.getSizeInBits() == 64 ||
ConstraintVT.getSizeInBits() == 128))
return "w";
return "r";
}
enum PredicateConstraint {
Upl,
Upa,
Invalid
};
static PredicateConstraint parsePredicateConstraint(StringRef Constraint) {
PredicateConstraint P = PredicateConstraint::Invalid;
if (Constraint == "Upa")
P = PredicateConstraint::Upa;
if (Constraint == "Upl")
P = PredicateConstraint::Upl;
return P;
}
/// getConstraintType - Given a constraint letter, return the type of
/// constraint it is for this target.
AArch64TargetLowering::ConstraintType
AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
default:
break;
case 'x':
case 'w':
case 'y':
return C_RegisterClass;
// An address with a single base register. Due to the way we
// currently handle addresses it is the same as 'r'.
case 'Q':
return C_Memory;
case 'I':
case 'J':
case 'K':
case 'L':
case 'M':
case 'N':
case 'Y':
case 'Z':
return C_Immediate;
case 'z':
case 'S': // A symbolic address
return C_Other;
}
} else if (parsePredicateConstraint(Constraint) !=
PredicateConstraint::Invalid)
return C_RegisterClass;
return TargetLowering::getConstraintType(Constraint);
}
/// Examine constraint type and operand type and determine a weight value.
/// This object must already have been set up with the operand type
/// and the current alternative constraint selected.
TargetLowering::ConstraintWeight
AArch64TargetLowering::getSingleConstraintMatchWeight(
AsmOperandInfo &info, const char *constraint) const {
ConstraintWeight weight = CW_Invalid;
Value *CallOperandVal = info.CallOperandVal;
// If we don't have a value, we can't do a match,
// but allow it at the lowest weight.
if (!CallOperandVal)
return CW_Default;
Type *type = CallOperandVal->getType();
// Look at the constraint type.
switch (*constraint) {
default:
weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
break;
case 'x':
case 'w':
case 'y':
if (type->isFloatingPointTy() || type->isVectorTy())
weight = CW_Register;
break;
case 'z':
weight = CW_Constant;
break;
case 'U':
if (parsePredicateConstraint(constraint) != PredicateConstraint::Invalid)
weight = CW_Register;
break;
}
return weight;
}
std::pair<unsigned, const TargetRegisterClass *>
AArch64TargetLowering::getRegForInlineAsmConstraint(
const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'r':
if (VT.isScalableVector())
return std::make_pair(0U, nullptr);
if (Subtarget->hasLS64() && VT.getSizeInBits() == 512)
return std::make_pair(0U, &AArch64::GPR64x8ClassRegClass);
if (VT.getFixedSizeInBits() == 64)
return std::make_pair(0U, &AArch64::GPR64commonRegClass);
return std::make_pair(0U, &AArch64::GPR32commonRegClass);
case 'w': {
if (!Subtarget->hasFPARMv8())
break;
if (VT.isScalableVector()) {
if (VT.getVectorElementType() != MVT::i1)
return std::make_pair(0U, &AArch64::ZPRRegClass);
return std::make_pair(0U, nullptr);
}
uint64_t VTSize = VT.getFixedSizeInBits();
if (VTSize == 16)
return std::make_pair(0U, &AArch64::FPR16RegClass);
if (VTSize == 32)
return std::make_pair(0U, &AArch64::FPR32RegClass);
if (VTSize == 64)
return std::make_pair(0U, &AArch64::FPR64RegClass);
if (VTSize == 128)
return std::make_pair(0U, &AArch64::FPR128RegClass);
break;
}
// The instructions that this constraint is designed for can
// only take 128-bit registers so just use that regclass.
case 'x':
if (!Subtarget->hasFPARMv8())
break;
if (VT.isScalableVector())
return std::make_pair(0U, &AArch64::ZPR_4bRegClass);
if (VT.getSizeInBits() == 128)
return std::make_pair(0U, &AArch64::FPR128_loRegClass);
break;
case 'y':
if (!Subtarget->hasFPARMv8())
break;
if (VT.isScalableVector())
return std::make_pair(0U, &AArch64::ZPR_3bRegClass);
break;
}
} else {
PredicateConstraint PC = parsePredicateConstraint(Constraint);
if (PC != PredicateConstraint::Invalid) {
if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1)
return std::make_pair(0U, nullptr);
bool restricted = (PC == PredicateConstraint::Upl);
return restricted ? std::make_pair(0U, &AArch64::PPR_3bRegClass)
: std::make_pair(0U, &AArch64::PPRRegClass);
}
}
if (StringRef("{cc}").equals_insensitive(Constraint))
return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
// Use the default implementation in TargetLowering to convert the register
// constraint into a member of a register class.
std::pair<unsigned, const TargetRegisterClass *> Res;
Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
// Not found as a standard register?
if (!Res.second) {
unsigned Size = Constraint.size();
if ((Size == 4 || Size == 5) && Constraint[0] == '{' &&
tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') {
int RegNo;
bool Failed = Constraint.slice(2, Size - 1).getAsInteger(10, RegNo);
if (!Failed && RegNo >= 0 && RegNo <= 31) {
// v0 - v31 are aliases of q0 - q31 or d0 - d31 depending on size.
// By default we'll emit v0-v31 for this unless there's a modifier where
// we'll emit the correct register as well.
if (VT != MVT::Other && VT.getSizeInBits() == 64) {
Res.first = AArch64::FPR64RegClass.getRegister(RegNo);
Res.second = &AArch64::FPR64RegClass;
} else {
Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
Res.second = &AArch64::FPR128RegClass;
}
}
}
}
if (Res.second && !Subtarget->hasFPARMv8() &&
!AArch64::GPR32allRegClass.hasSubClassEq(Res.second) &&
!AArch64::GPR64allRegClass.hasSubClassEq(Res.second))
return std::make_pair(0U, nullptr);
return Res;
}
EVT AArch64TargetLowering::getAsmOperandValueType(const DataLayout &DL,
llvm::Type *Ty,
bool AllowUnknown) const {
if (Subtarget->hasLS64() && Ty->isIntegerTy(512))
return EVT(MVT::i64x8);
return TargetLowering::getAsmOperandValueType(DL, Ty, AllowUnknown);
}
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops.
void AArch64TargetLowering::LowerAsmOperandForConstraint(
SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
SDValue Result;
// Currently only support length 1 constraints.
if (Constraint.length() != 1)
return;
char ConstraintLetter = Constraint[0];
switch (ConstraintLetter) {
default:
break;
// This set of constraints deal with valid constants for various instructions.
// Validate and return a target constant for them if we can.
case 'z': {
// 'z' maps to xzr or wzr so it needs an input of 0.
if (!isNullConstant(Op))
return;
if (Op.getValueType() == MVT::i64)
Result = DAG.getRegister(AArch64::XZR, MVT::i64);
else
Result = DAG.getRegister(AArch64::WZR, MVT::i32);
break;
}
case 'S': {
// An absolute symbolic address or label reference.
if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
Result = DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
GA->getValueType(0));
} else if (const BlockAddressSDNode *BA =
dyn_cast<BlockAddressSDNode>(Op)) {
Result =
DAG.getTargetBlockAddress(BA->getBlockAddress(), BA->getValueType(0));
} else
return;
break;
}
case 'I':
case 'J':
case 'K':
case 'L':
case 'M':
case 'N':
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
if (!C)
return;
// Grab the value and do some validation.
uint64_t CVal = C->getZExtValue();
switch (ConstraintLetter) {
// The I constraint applies only to simple ADD or SUB immediate operands:
// i.e. 0 to 4095 with optional shift by 12
// The J constraint applies only to ADD or SUB immediates that would be
// valid when negated, i.e. if [an add pattern] were to be output as a SUB
// instruction [or vice versa], in other words -1 to -4095 with optional
// left shift by 12.
case 'I':
if (isUInt<12>(CVal) || isShiftedUInt<12, 12>(CVal))
break;
return;
case 'J': {
uint64_t NVal = -C->getSExtValue();
if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal)) {
CVal = C->getSExtValue();
break;
}
return;
}
// The K and L constraints apply *only* to logical immediates, including
// what used to be the MOVI alias for ORR (though the MOVI alias has now
// been removed and MOV should be used). So these constraints have to
// distinguish between bit patterns that are valid 32-bit or 64-bit
// "bitmask immediates": for example 0xaaaaaaaa is a valid bimm32 (K), but
// not a valid bimm64 (L) where 0xaaaaaaaaaaaaaaaa would be valid, and vice
// versa.
case 'K':
if (AArch64_AM::isLogicalImmediate(CVal, 32))
break;
return;
case 'L':
if (AArch64_AM::isLogicalImmediate(CVal, 64))
break;
return;
// The M and N constraints are a superset of K and L respectively, for use
// with the MOV (immediate) alias. As well as the logical immediates they
// also match 32 or 64-bit immediates that can be loaded either using a
// *single* MOVZ or MOVN , such as 32-bit 0x12340000, 0x00001234, 0xffffedca
// (M) or 64-bit 0x1234000000000000 (N) etc.
// As a note some of this code is liberally stolen from the asm parser.
case 'M': {
if (!isUInt<32>(CVal))
return;
if (AArch64_AM::isLogicalImmediate(CVal, 32))
break;
if ((CVal & 0xFFFF) == CVal)
break;
if ((CVal & 0xFFFF0000ULL) == CVal)
break;
uint64_t NCVal = ~(uint32_t)CVal;
if ((NCVal & 0xFFFFULL) == NCVal)
break;
if ((NCVal & 0xFFFF0000ULL) == NCVal)
break;
return;
}
case 'N': {
if (AArch64_AM::isLogicalImmediate(CVal, 64))
break;
if ((CVal & 0xFFFFULL) == CVal)
break;
if ((CVal & 0xFFFF0000ULL) == CVal)
break;
if ((CVal & 0xFFFF00000000ULL) == CVal)
break;
if ((CVal & 0xFFFF000000000000ULL) == CVal)
break;
uint64_t NCVal = ~CVal;
if ((NCVal & 0xFFFFULL) == NCVal)
break;
if ((NCVal & 0xFFFF0000ULL) == NCVal)
break;
if ((NCVal & 0xFFFF00000000ULL) == NCVal)
break;
if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
break;
return;
}
default:
return;
}
// All assembler immediates are 64-bit integers.
Result = DAG.getTargetConstant(CVal, SDLoc(Op), MVT::i64);
break;
}
if (Result.getNode()) {
Ops.push_back(Result);
return;
}
return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
//===----------------------------------------------------------------------===//
// AArch64 Advanced SIMD Support
//===----------------------------------------------------------------------===//
/// WidenVector - Given a value in the V64 register class, produce the
/// equivalent value in the V128 register class.
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG) {
EVT VT = V64Reg.getValueType();
unsigned NarrowSize = VT.getVectorNumElements();
MVT EltTy = VT.getVectorElementType().getSimpleVT();
MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
SDLoc DL(V64Reg);
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy),
V64Reg, DAG.getConstant(0, DL, MVT::i64));
}
/// getExtFactor - Determine the adjustment factor for the position when
/// generating an "extract from vector registers" instruction.
static unsigned getExtFactor(SDValue &V) {
EVT EltType = V.getValueType().getVectorElementType();
return EltType.getSizeInBits() / 8;
}
/// NarrowVector - Given a value in the V128 register class, produce the
/// equivalent value in the V64 register class.
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
EVT VT = V128Reg.getValueType();
unsigned WideSize = VT.getVectorNumElements();
MVT EltTy = VT.getVectorElementType().getSimpleVT();
MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
SDLoc DL(V128Reg);
return DAG.getTargetExtractSubreg(AArch64::dsub, DL, NarrowTy, V128Reg);
}
// Gather data to see if the operation can be modelled as a
// shuffle in combination with VEXTs.
SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
SelectionDAG &DAG) const {
assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
LLVM_DEBUG(dbgs() << "AArch64TargetLowering::ReconstructShuffle\n");
SDLoc dl(Op);
EVT VT = Op.getValueType();
assert(!VT.isScalableVector() &&
"Scalable vectors cannot be used with ISD::BUILD_VECTOR");
unsigned NumElts = VT.getVectorNumElements();
struct ShuffleSourceInfo {
SDValue Vec;
unsigned MinElt;
unsigned MaxElt;
// We may insert some combination of BITCASTs and VEXT nodes to force Vec to
// be compatible with the shuffle we intend to construct. As a result
// ShuffleVec will be some sliding window into the original Vec.
SDValue ShuffleVec;
// Code should guarantee that element i in Vec starts at element "WindowBase
// + i * WindowScale in ShuffleVec".
int WindowBase;
int WindowScale;
ShuffleSourceInfo(SDValue Vec)
: Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
};
// First gather all vectors used as an immediate source for this BUILD_VECTOR
// node.
SmallVector<ShuffleSourceInfo, 2> Sources;
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
if (V.isUndef())
continue;
else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
!isa<ConstantSDNode>(V.getOperand(1)) ||
V.getOperand(0).getValueType().isScalableVector()) {
LLVM_DEBUG(
dbgs() << "Reshuffle failed: "
"a shuffle can only come from building a vector from "
"various elements of other fixed-width vectors, provided "
"their indices are constant\n");
return SDValue();
}
// Add this element source to the list if it's not already there.
SDValue SourceVec = V.getOperand(0);
auto Source = find(Sources, SourceVec);
if (Source == Sources.end())
Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
// Update the minimum and maximum lane number seen.
unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
Source->MinElt = std::min(Source->MinElt, EltNo);
Source->MaxElt = std::max(Source->MaxElt, EltNo);
}
// If we have 3 or 4 sources, try to generate a TBL, which will at least be
// better than moving to/from gpr registers for larger vectors.
if ((Sources.size() == 3 || Sources.size() == 4) && NumElts > 4) {
// Construct a mask for the tbl. We may need to adjust the index for types
// larger than i8.
SmallVector<unsigned, 16> Mask;
unsigned OutputFactor = VT.getScalarSizeInBits() / 8;
for (unsigned I = 0; I < NumElts; ++I) {
SDValue V = Op.getOperand(I);
if (V.isUndef()) {
for (unsigned OF = 0; OF < OutputFactor; OF++)
Mask.push_back(-1);
continue;
}
// Set the Mask lanes adjusted for the size of the input and output
// lanes. The Mask is always i8, so it will set OutputFactor lanes per
// output element, adjusted in their positions per input and output types.
unsigned Lane = V.getConstantOperandVal(1);
for (unsigned S = 0; S < Sources.size(); S++) {
if (V.getOperand(0) == Sources[S].Vec) {
unsigned InputSize = Sources[S].Vec.getScalarValueSizeInBits();
unsigned InputBase = 16 * S + Lane * InputSize / 8;
for (unsigned OF = 0; OF < OutputFactor; OF++)
Mask.push_back(InputBase + OF);
break;
}
}
}
// Construct the tbl3/tbl4 out of an intrinsic, the sources converted to
// v16i8, and the TBLMask
SmallVector<SDValue, 16> TBLOperands;
TBLOperands.push_back(DAG.getConstant(Sources.size() == 3
? Intrinsic::aarch64_neon_tbl3
: Intrinsic::aarch64_neon_tbl4,
dl, MVT::i32));
for (unsigned i = 0; i < Sources.size(); i++) {
SDValue Src = Sources[i].Vec;
EVT SrcVT = Src.getValueType();
Src = DAG.getBitcast(SrcVT.is64BitVector() ? MVT::v8i8 : MVT::v16i8, Src);
assert((SrcVT.is64BitVector() || SrcVT.is128BitVector()) &&
"Expected a legally typed vector");
if (SrcVT.is64BitVector())
Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i8, Src,
DAG.getUNDEF(MVT::v8i8));
TBLOperands.push_back(Src);
}
SmallVector<SDValue, 16> TBLMask;
for (unsigned i = 0; i < Mask.size(); i++)
TBLMask.push_back(DAG.getConstant(Mask[i], dl, MVT::i32));
assert((Mask.size() == 8 || Mask.size() == 16) &&
"Expected a v8i8 or v16i8 Mask");
TBLOperands.push_back(
DAG.getBuildVector(Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8, dl, TBLMask));
SDValue Shuffle =
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl,
Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8, TBLOperands);
return DAG.getBitcast(VT, Shuffle);
}
if (Sources.size() > 2) {
LLVM_DEBUG(dbgs() << "Reshuffle failed: currently only do something "
<< "sensible when at most two source vectors are "
<< "involved\n");
return SDValue();
}
// Find out the smallest element size among result and two sources, and use
// it as element size to build the shuffle_vector.
EVT SmallestEltTy = VT.getVectorElementType();
for (auto &Source : Sources) {
EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
if (SrcEltTy.bitsLT(SmallestEltTy)) {
SmallestEltTy = SrcEltTy;
}
}
unsigned ResMultiplier =
VT.getScalarSizeInBits() / SmallestEltTy.getFixedSizeInBits();
uint64_t VTSize = VT.getFixedSizeInBits();
NumElts = VTSize / SmallestEltTy.getFixedSizeInBits();
EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
// If the source vector is too wide or too narrow, we may nevertheless be able
// to construct a compatible shuffle either by concatenating it with UNDEF or
// extracting a suitable range of elements.
for (auto &Src : Sources) {
EVT SrcVT = Src.ShuffleVec.getValueType();
TypeSize SrcVTSize = SrcVT.getSizeInBits();
if (SrcVTSize == TypeSize::Fixed(VTSize))
continue;
// This stage of the search produces a source with the same element type as
// the original, but with a total width matching the BUILD_VECTOR output.
EVT EltVT = SrcVT.getVectorElementType();
unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
if (SrcVTSize.getFixedValue() < VTSize) {
assert(2 * SrcVTSize == VTSize);
// We can pad out the smaller vector for free, so if it's part of a
// shuffle...
Src.ShuffleVec =
DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
DAG.getUNDEF(Src.ShuffleVec.getValueType()));
continue;
}
if (SrcVTSize.getFixedValue() != 2 * VTSize) {
LLVM_DEBUG(
dbgs() << "Reshuffle failed: result vector too small to extract\n");
return SDValue();
}
if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
LLVM_DEBUG(
dbgs() << "Reshuffle failed: span too large for a VEXT to cope\n");
return SDValue();
}
if (Src.MinElt >= NumSrcElts) {
// The extraction can just take the second half
Src.ShuffleVec =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
DAG.getConstant(NumSrcElts, dl, MVT::i64));
Src.WindowBase = -NumSrcElts;
} else if (Src.MaxElt < NumSrcElts) {
// The extraction can just take the first half
Src.ShuffleVec =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
DAG.getConstant(0, dl, MVT::i64));
} else {
// An actual VEXT is needed
SDValue VEXTSrc1 =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
DAG.getConstant(0, dl, MVT::i64));
SDValue VEXTSrc2 =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
DAG.getConstant(NumSrcElts, dl, MVT::i64));
unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
if (!SrcVT.is64BitVector()) {
LLVM_DEBUG(
dbgs() << "Reshuffle failed: don't know how to lower AArch64ISD::EXT "
"for SVE vectors.");
return SDValue();
}
Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
VEXTSrc2,
DAG.getConstant(Imm, dl, MVT::i32));
Src.WindowBase = -Src.MinElt;
}
}
// Another possible incompatibility occurs from the vector element types. We
// can fix this by bitcasting the source vectors to the same type we intend
// for the shuffle.
for (auto &Src : Sources) {
EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
if (SrcEltTy == SmallestEltTy)
continue;
assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
Src.WindowScale =
SrcEltTy.getFixedSizeInBits() / SmallestEltTy.getFixedSizeInBits();
Src.WindowBase *= Src.WindowScale;
}
// Final check before we try to actually produce a shuffle.
LLVM_DEBUG(for (auto Src
: Sources)
assert(Src.ShuffleVec.getValueType() == ShuffleVT););
// The stars all align, our next step is to produce the mask for the shuffle.
SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
SDValue Entry = Op.getOperand(i);
if (Entry.isUndef())
continue;
auto Src = find(Sources, Entry.getOperand(0));
int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
// EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
// trunc. So only std::min(SrcBits, DestBits) actually get defined in this
// segment.
EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
VT.getScalarSizeInBits());
int LanesDefined = BitsDefined / BitsPerShuffleLane;
// This source is expected to fill ResMultiplier lanes of the final shuffle,
// starting at the appropriate offset.
int *LaneMask = &Mask[i * ResMultiplier];
int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
ExtractBase += NumElts * (Src - Sources.begin());
for (int j = 0; j < LanesDefined; ++j)
LaneMask[j] = ExtractBase + j;
}
// Final check before we try to produce nonsense...
if (!isShuffleMaskLegal(Mask, ShuffleVT)) {
LLVM_DEBUG(dbgs() << "Reshuffle failed: illegal shuffle mask\n");
return SDValue();
}
SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
for (unsigned i = 0; i < Sources.size(); ++i)
ShuffleOps[i] = Sources[i].ShuffleVec;
SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
ShuffleOps[1], Mask);
SDValue V = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
LLVM_DEBUG(dbgs() << "Reshuffle, creating node: "; Shuffle.dump();
dbgs() << "Reshuffle, creating node: "; V.dump(););
return V;
}
// check if an EXT instruction can handle the shuffle mask when the
// vector sources of the shuffle are the same.
static bool isSingletonEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
unsigned NumElts = VT.getVectorNumElements();
// Assume that the first shuffle index is not UNDEF. Fail if it is.
if (M[0] < 0)
return false;
Imm = M[0];
// If this is a VEXT shuffle, the immediate value is the index of the first
// element. The other shuffle indices must be the successive elements after
// the first one.
unsigned ExpectedElt = Imm;
for (unsigned i = 1; i < NumElts; ++i) {
// Increment the expected index. If it wraps around, just follow it
// back to index zero and keep going.
++ExpectedElt;
if (ExpectedElt == NumElts)
ExpectedElt = 0;
if (M[i] < 0)
continue; // ignore UNDEF indices
if (ExpectedElt != static_cast<unsigned>(M[i]))
return false;
}
return true;
}
// Detect patterns of a0,a1,a2,a3,b0,b1,b2,b3,c0,c1,c2,c3,d0,d1,d2,d3 from
// v4i32s. This is really a truncate, which we can construct out of (legal)
// concats and truncate nodes.
static SDValue ReconstructTruncateFromBuildVector(SDValue V, SelectionDAG &DAG) {
if (V.getValueType() != MVT::v16i8)
return SDValue();
assert(V.getNumOperands() == 16 && "Expected 16 operands on the BUILDVECTOR");
for (unsigned X = 0; X < 4; X++) {
// Check the first item in each group is an extract from lane 0 of a v4i32
// or v4i16.
SDValue BaseExt = V.getOperand(X * 4);
if (BaseExt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
(BaseExt.getOperand(0).getValueType() != MVT::v4i16 &&
BaseExt.getOperand(0).getValueType() != MVT::v4i32) ||
!isa<ConstantSDNode>(BaseExt.getOperand(1)) ||
BaseExt.getConstantOperandVal(1) != 0)
return SDValue();
SDValue Base = BaseExt.getOperand(0);
// And check the other items are extracts from the same vector.
for (unsigned Y = 1; Y < 4; Y++) {
SDValue Ext = V.getOperand(X * 4 + Y);
if (Ext.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
Ext.getOperand(0) != Base ||
!isa<ConstantSDNode>(Ext.getOperand(1)) ||
Ext.getConstantOperandVal(1) != Y)
return SDValue();
}
}
// Turn the buildvector into a series of truncates and concates, which will
// become uzip1's. Any v4i32s we found get truncated to v4i16, which are
// concat together to produce 2 v8i16. These are both truncated and concat
// together.
SDLoc DL(V);
SDValue Trunc[4] = {
V.getOperand(0).getOperand(0), V.getOperand(4).getOperand(0),
V.getOperand(8).getOperand(0), V.getOperand(12).getOperand(0)};
for (int I = 0; I < 4; I++)
if (Trunc[I].getValueType() == MVT::v4i32)
Trunc[I] = DAG.getNode(ISD::TRUNCATE, DL, MVT::v4i16, Trunc[I]);
SDValue Concat0 =
DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16, Trunc[0], Trunc[1]);
SDValue Concat1 =
DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16, Trunc[2], Trunc[3]);
SDValue Trunc0 = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, Concat0);
SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, Concat1);
return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, Trunc0, Trunc1);
}
/// Check if a vector shuffle corresponds to a DUP instructions with a larger
/// element width than the vector lane type. If that is the case the function
/// returns true and writes the value of the DUP instruction lane operand into
/// DupLaneOp
static bool isWideDUPMask(ArrayRef<int> M, EVT VT, unsigned BlockSize,
unsigned &DupLaneOp) {
assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
"Only possible block sizes for wide DUP are: 16, 32, 64");
if (BlockSize <= VT.getScalarSizeInBits())
return false;
if (BlockSize % VT.getScalarSizeInBits() != 0)
return false;
if (VT.getSizeInBits() % BlockSize != 0)
return false;
size_t SingleVecNumElements = VT.getVectorNumElements();
size_t NumEltsPerBlock = BlockSize / VT.getScalarSizeInBits();
size_t NumBlocks = VT.getSizeInBits() / BlockSize;
// We are looking for masks like
// [0, 1, 0, 1] or [2, 3, 2, 3] or [4, 5, 6, 7, 4, 5, 6, 7] where any element
// might be replaced by 'undefined'. BlockIndices will eventually contain
// lane indices of the duplicated block (i.e. [0, 1], [2, 3] and [4, 5, 6, 7]
// for the above examples)
SmallVector<int, 8> BlockElts(NumEltsPerBlock, -1);
for (size_t BlockIndex = 0; BlockIndex < NumBlocks; BlockIndex++)
for (size_t I = 0; I < NumEltsPerBlock; I++) {
int Elt = M[BlockIndex * NumEltsPerBlock + I];
if (Elt < 0)
continue;
// For now we don't support shuffles that use the second operand
if ((unsigned)Elt >= SingleVecNumElements)
return false;
if (BlockElts[I] < 0)
BlockElts[I] = Elt;
else if (BlockElts[I] != Elt)
return false;
}
// We found a candidate block (possibly with some undefs). It must be a
// sequence of consecutive integers starting with a value divisible by
// NumEltsPerBlock with some values possibly replaced by undef-s.
// Find first non-undef element
auto FirstRealEltIter = find_if(BlockElts, [](int Elt) { return Elt >= 0; });
assert(FirstRealEltIter != BlockElts.end() &&
"Shuffle with all-undefs must have been caught by previous cases, "
"e.g. isSplat()");
if (FirstRealEltIter == BlockElts.end()) {
DupLaneOp = 0;
return true;
}
// Index of FirstRealElt in BlockElts
size_t FirstRealIndex = FirstRealEltIter - BlockElts.begin();
if ((unsigned)*FirstRealEltIter < FirstRealIndex)
return false;
// BlockElts[0] must have the following value if it isn't undef:
size_t Elt0 = *FirstRealEltIter - FirstRealIndex;
// Check the first element
if (Elt0 % NumEltsPerBlock != 0)
return false;
// Check that the sequence indeed consists of consecutive integers (modulo
// undefs)
for (size_t I = 0; I < NumEltsPerBlock; I++)
if (BlockElts[I] >= 0 && (unsigned)BlockElts[I] != Elt0 + I)
return false;
DupLaneOp = Elt0 / NumEltsPerBlock;
return true;
}
// check if an EXT instruction can handle the shuffle mask when the
// vector sources of the shuffle are different.
static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT,
unsigned &Imm) {
// Look for the first non-undef element.
const int *FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
// Benefit form APInt to handle overflow when calculating expected element.
unsigned NumElts = VT.getVectorNumElements();
unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
// The following shuffle indices must be the successive elements after the
// first real element.
const int *FirstWrongElt = std::find_if(FirstRealElt + 1, M.end(),
[&](int Elt) {return Elt != ExpectedElt++ && Elt != -1;});
if (FirstWrongElt != M.end())
return false;
// The index of an EXT is the first element if it is not UNDEF.
// Watch out for the beginning UNDEFs. The EXT index should be the expected
// value of the first element. E.g.
// <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
// <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
// ExpectedElt is the last mask index plus 1.
Imm = ExpectedElt.getZExtValue();
// There are two difference cases requiring to reverse input vectors.
// For example, for vector <4 x i32> we have the following cases,
// Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
// Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
// For both cases, we finally use mask <5, 6, 7, 0>, which requires
// to reverse two input vectors.
if (Imm < NumElts)
ReverseEXT = true;
else
Imm -= NumElts;
return true;
}
/// isREVMask - Check if a vector shuffle corresponds to a REV
/// instruction with the specified blocksize. (The order of the elements
/// within each block of the vector is reversed.)
static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
"Only possible block sizes for REV are: 16, 32, 64");
unsigned EltSz = VT.getScalarSizeInBits();
if (EltSz == 64)
return false;
unsigned NumElts = VT.getVectorNumElements();
unsigned BlockElts = M[0] + 1;
// If the first shuffle index is UNDEF, be optimistic.
if (M[0] < 0)
BlockElts = BlockSize / EltSz;
if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
return false;
for (unsigned i = 0; i < NumElts; ++i) {
if (M[i] < 0)
continue; // ignore UNDEF indices
if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
return false;
}
return true;
}
static bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned NumElts = VT.getVectorNumElements();
if (NumElts % 2 != 0)
return false;
WhichResult = (M[0] == 0 ? 0 : 1);
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned i = 0; i != NumElts; i += 2) {
if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
(M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts))
return false;
Idx += 1;
}
return true;
}
static bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned NumElts = VT.getVectorNumElements();
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i != NumElts; ++i) {
if (M[i] < 0)
continue; // ignore UNDEF indices
if ((unsigned)M[i] != 2 * i + WhichResult)
return false;
}
return true;
}
static bool isTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned NumElts = VT.getVectorNumElements();
if (NumElts % 2 != 0)
return false;
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i < NumElts; i += 2) {
if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
(M[i + 1] >= 0 && (unsigned)M[i + 1] != i + NumElts + WhichResult))
return false;
}
return true;
}
/// isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of
/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
static bool isZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned NumElts = VT.getVectorNumElements();
if (NumElts % 2 != 0)
return false;
WhichResult = (M[0] == 0 ? 0 : 1);
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned i = 0; i != NumElts; i += 2) {
if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
(M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx))
return false;
Idx += 1;
}
return true;
}
/// isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of
/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
static bool isUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned Half = VT.getVectorNumElements() / 2;
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned j = 0; j != 2; ++j) {
unsigned Idx = WhichResult;
for (unsigned i = 0; i != Half; ++i) {
int MIdx = M[i + j * Half];
if (MIdx >= 0 && (unsigned)MIdx != Idx)
return false;
Idx += 2;
}
}
return true;
}
/// isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of
/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
static bool isTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned NumElts = VT.getVectorNumElements();
if (NumElts % 2 != 0)
return false;
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i < NumElts; i += 2) {
if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
(M[i + 1] >= 0 && (unsigned)M[i + 1] != i + WhichResult))
return false;
}
return true;
}
static bool isINSMask(ArrayRef<int> M, int NumInputElements,
bool &DstIsLeft, int &Anomaly) {
if (M.size() != static_cast<size_t>(NumInputElements))
return false;
int NumLHSMatch = 0, NumRHSMatch = 0;
int LastLHSMismatch = -1, LastRHSMismatch = -1;
for (int i = 0; i < NumInputElements; ++i) {
if (M[i] == -1) {
++NumLHSMatch;
++NumRHSMatch;
continue;
}
if (M[i] == i)
++NumLHSMatch;
else
LastLHSMismatch = i;
if (M[i] == i + NumInputElements)
++NumRHSMatch;
else
LastRHSMismatch = i;
}
if (NumLHSMatch == NumInputElements - 1) {
DstIsLeft = true;
Anomaly = LastLHSMismatch;
return true;
} else if (NumRHSMatch == NumInputElements - 1) {
DstIsLeft = false;
Anomaly = LastRHSMismatch;
return true;
}
return false;
}
static bool isConcatMask(ArrayRef<int> Mask, EVT VT, bool SplitLHS) {
if (VT.getSizeInBits() != 128)
return false;
unsigned NumElts = VT.getVectorNumElements();
for (int I = 0, E = NumElts / 2; I != E; I++) {
if (Mask[I] != I)
return false;
}
int Offset = NumElts / 2;
for (int I = NumElts / 2, E = NumElts; I != E; I++) {
if (Mask[I] != I + SplitLHS * Offset)
return false;
}
return true;
}
static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
EVT VT = Op.getValueType();
SDValue V0 = Op.getOperand(0);
SDValue V1 = Op.getOperand(1);
ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
if (VT.getVectorElementType() != V0.getValueType().getVectorElementType() ||
VT.getVectorElementType() != V1.getValueType().getVectorElementType())
return SDValue();
bool SplitV0 = V0.getValueSizeInBits() == 128;
if (!isConcatMask(Mask, VT, SplitV0))
return SDValue();
EVT CastVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
if (SplitV0) {
V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0,
DAG.getConstant(0, DL, MVT::i64));
}
if (V1.getValueSizeInBits() == 128) {
V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1,
DAG.getConstant(0, DL, MVT::i64));
}
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1);
}
/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
/// the specified operations to build the shuffle. ID is the perfect-shuffle
//ID, V1 and V2 are the original shuffle inputs. PFEntry is the Perfect shuffle
//table entry and LHS/RHS are the immediate inputs for this stage of the
//shuffle.
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1,
SDValue V2, unsigned PFEntry, SDValue LHS,
SDValue RHS, SelectionDAG &DAG,
const SDLoc &dl) {
unsigned OpNum = (PFEntry >> 26) & 0x0F;
unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
enum {
OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
OP_VREV,
OP_VDUP0,
OP_VDUP1,
OP_VDUP2,
OP_VDUP3,
OP_VEXT1,
OP_VEXT2,
OP_VEXT3,
OP_VUZPL, // VUZP, left result
OP_VUZPR, // VUZP, right result
OP_VZIPL, // VZIP, left result
OP_VZIPR, // VZIP, right result
OP_VTRNL, // VTRN, left result
OP_VTRNR, // VTRN, right result
OP_MOVLANE // Move lane. RHSID is the lane to move into
};
if (OpNum == OP_COPY) {
if (LHSID == (1 * 9 + 2) * 9 + 3)
return LHS;
assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && "Illegal OP_COPY!");
return RHS;
}
if (OpNum == OP_MOVLANE) {
// Decompose a PerfectShuffle ID to get the Mask for lane Elt
auto getPFIDLane = [](unsigned ID, int Elt) -> int {
assert(Elt < 4 && "Expected Perfect Lanes to be less than 4");
Elt = 3 - Elt;
while (Elt > 0) {
ID /= 9;
Elt--;
}
return (ID % 9 == 8) ? -1 : ID % 9;
};
// For OP_MOVLANE shuffles, the RHSID represents the lane to move into. We
// get the lane to move from from the PFID, which is always from the
// original vectors (V1 or V2).
SDValue OpLHS = GeneratePerfectShuffle(
LHSID, V1, V2, PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
EVT VT = OpLHS.getValueType();
assert(RHSID < 8 && "Expected a lane index for RHSID!");
unsigned ExtLane = 0;
SDValue Input;
// OP_MOVLANE are either D movs (if bit 0x4 is set) or S movs. D movs
// convert into a higher type.
if (RHSID & 0x4) {
int MaskElt = getPFIDLane(ID, (RHSID & 0x01) << 1) >> 1;
if (MaskElt == -1)
MaskElt = (getPFIDLane(ID, ((RHSID & 0x01) << 1) + 1) - 1) >> 1;
assert(MaskElt >= 0 && "Didn't expect an undef movlane index!");
ExtLane = MaskElt < 2 ? MaskElt : (MaskElt - 2);
Input = MaskElt < 2 ? V1 : V2;
if (VT.getScalarSizeInBits() == 16) {
Input = DAG.getBitcast(MVT::v2f32, Input);
OpLHS = DAG.getBitcast(MVT::v2f32, OpLHS);
} else {
assert(VT.getScalarSizeInBits() == 32 &&
"Expected 16 or 32 bit shuffle elemements");
Input = DAG.getBitcast(MVT::v2f64, Input);
OpLHS = DAG.getBitcast(MVT::v2f64, OpLHS);
}
} else {
int MaskElt = getPFIDLane(ID, RHSID);
assert(MaskElt >= 0 && "Didn't expect an undef movlane index!");
ExtLane = MaskElt < 4 ? MaskElt : (MaskElt - 4);
Input = MaskElt < 4 ? V1 : V2;
// Be careful about creating illegal types. Use f16 instead of i16.
if (VT == MVT::v4i16) {
Input = DAG.getBitcast(MVT::v4f16, Input);
OpLHS = DAG.getBitcast(MVT::v4f16, OpLHS);
}
}
SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
Input.getValueType().getVectorElementType(),
Input, DAG.getVectorIdxConstant(ExtLane, dl));
SDValue Ins =
DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Input.getValueType(), OpLHS,
Ext, DAG.getVectorIdxConstant(RHSID & 0x3, dl));
return DAG.getBitcast(VT, Ins);
}
SDValue OpLHS, OpRHS;
OpLHS = GeneratePerfectShuffle(LHSID, V1, V2, PerfectShuffleTable[LHSID], LHS,
RHS, DAG, dl);
OpRHS = GeneratePerfectShuffle(RHSID, V1, V2, PerfectShuffleTable[RHSID], LHS,
RHS, DAG, dl);
EVT VT = OpLHS.getValueType();
switch (OpNum) {
default:
llvm_unreachable("Unknown shuffle opcode!");
case OP_VREV:
// VREV divides the vector in half and swaps within the half.
if (VT.getVectorElementType() == MVT::i32 ||
VT.getVectorElementType() == MVT::f32)
return DAG.getNode(AArch64ISD::REV64, dl, VT, OpLHS);
// vrev <4 x i16> -> REV32
if (VT.getVectorElementType() == MVT::i16 ||
VT.getVectorElementType() == MVT::f16 ||
VT.getVectorElementType() == MVT::bf16)
return DAG.getNode(AArch64ISD::REV32, dl, VT, OpLHS);
// vrev <4 x i8> -> REV16
assert(VT.getVectorElementType() == MVT::i8);
return DAG.getNode(AArch64ISD::REV16, dl, VT, OpLHS);
case OP_VDUP0:
case OP_VDUP1:
case OP_VDUP2:
case OP_VDUP3: {
EVT EltTy = VT.getVectorElementType();
unsigned Opcode;
if (EltTy == MVT::i8)
Opcode = AArch64ISD::DUPLANE8;
else if (EltTy == MVT::i16 || EltTy == MVT::f16 || EltTy == MVT::bf16)
Opcode = AArch64ISD::DUPLANE16;
else if (EltTy == MVT::i32 || EltTy == MVT::f32)
Opcode = AArch64ISD::DUPLANE32;
else if (EltTy == MVT::i64 || EltTy == MVT::f64)
Opcode = AArch64ISD::DUPLANE64;
else
llvm_unreachable("Invalid vector element type?");
if (VT.getSizeInBits() == 64)
OpLHS = WidenVector(OpLHS, DAG);
SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, dl, MVT::i64);
return DAG.getNode(Opcode, dl, VT, OpLHS, Lane);
}
case OP_VEXT1:
case OP_VEXT2:
case OP_VEXT3: {
unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS);
return DAG.getNode(AArch64ISD::EXT, dl, VT, OpLHS, OpRHS,
DAG.getConstant(Imm, dl, MVT::i32));
}
case OP_VUZPL:
return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS,
OpRHS);
case OP_VUZPR:
return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), OpLHS,
OpRHS);
case OP_VZIPL:
return DAG.getNode(AArch64ISD::ZIP1, dl, DAG.getVTList(VT, VT), OpLHS,
OpRHS);
case OP_VZIPR:
return DAG.getNode(AArch64ISD::ZIP2, dl, DAG.getVTList(VT, VT), OpLHS,
OpRHS);
case OP_VTRNL:
return DAG.getNode(AArch64ISD::TRN1, dl, DAG.getVTList(VT, VT), OpLHS,
OpRHS);
case OP_VTRNR:
return DAG.getNode(AArch64ISD::TRN2, dl, DAG.getVTList(VT, VT), OpLHS,
OpRHS);
}
}
static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
SelectionDAG &DAG) {
// Check to see if we can use the TBL instruction.
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
SDLoc DL(Op);
EVT EltVT = Op.getValueType().getVectorElementType();
unsigned BytesPerElt = EltVT.getSizeInBits() / 8;
bool Swap = false;
if (V1.isUndef() || isZerosVector(V1.getNode())) {
std::swap(V1, V2);
Swap = true;
}
// If the V2 source is undef or zero then we can use a tbl1, as tbl1 will fill
// out of range values with 0s. We do need to make sure that any out-of-range
// values are really out-of-range for a v16i8 vector.
bool IsUndefOrZero = V2.isUndef() || isZerosVector(V2.getNode());
MVT IndexVT = MVT::v8i8;
unsigned IndexLen = 8;
if (Op.getValueSizeInBits() == 128) {
IndexVT = MVT::v16i8;
IndexLen = 16;
}
SmallVector<SDValue, 8> TBLMask;
for (int Val : ShuffleMask) {
for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
unsigned Offset = Byte + Val * BytesPerElt;
if (Swap)
Offset = Offset < IndexLen ? Offset + IndexLen : Offset - IndexLen;
if (IsUndefOrZero && Offset >= IndexLen)
Offset = 255;
TBLMask.push_back(DAG.getConstant(Offset, DL, MVT::i32));
}
}
SDValue V1Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V1);
SDValue V2Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V2);
SDValue Shuffle;
if (IsUndefOrZero) {
if (IndexLen == 8)
V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst);
Shuffle = DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
DAG.getBuildVector(IndexVT, DL,
makeArrayRef(TBLMask.data(), IndexLen)));
} else {
if (IndexLen == 8) {
V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V2Cst);
Shuffle = DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
DAG.getBuildVector(IndexVT, DL,
makeArrayRef(TBLMask.data(), IndexLen)));
} else {
// FIXME: We cannot, for the moment, emit a TBL2 instruction because we
// cannot currently represent the register constraints on the input
// table registers.
// Shuffle = DAG.getNode(AArch64ISD::TBL2, DL, IndexVT, V1Cst, V2Cst,
// DAG.getBuildVector(IndexVT, DL, &TBLMask[0],
// IndexLen));
Shuffle = DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
DAG.getConstant(Intrinsic::aarch64_neon_tbl2, DL, MVT::i32), V1Cst,
V2Cst, DAG.getBuildVector(IndexVT, DL,
makeArrayRef(TBLMask.data(), IndexLen)));
}
}
return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Shuffle);
}
static unsigned getDUPLANEOp(EVT EltType) {
if (EltType == MVT::i8)
return AArch64ISD::DUPLANE8;
if (EltType == MVT::i16 || EltType == MVT::f16 || EltType == MVT::bf16)
return AArch64ISD::DUPLANE16;
if (EltType == MVT::i32 || EltType == MVT::f32)
return AArch64ISD::DUPLANE32;
if (EltType == MVT::i64 || EltType == MVT::f64)
return AArch64ISD::DUPLANE64;
llvm_unreachable("Invalid vector element type?");
}
static SDValue constructDup(SDValue V, int Lane, SDLoc dl, EVT VT,
unsigned Opcode, SelectionDAG &DAG) {
// Try to eliminate a bitcasted extract subvector before a DUPLANE.
auto getScaledOffsetDup = [](SDValue BitCast, int &LaneC, MVT &CastVT) {
// Match: dup (bitcast (extract_subv X, C)), LaneC
if (BitCast.getOpcode() != ISD::BITCAST ||
BitCast.getOperand(0).getOpcode() != ISD::EXTRACT_SUBVECTOR)
return false;
// The extract index must align in the destination type. That may not
// happen if the bitcast is from narrow to wide type.
SDValue Extract = BitCast.getOperand(0);
unsigned ExtIdx = Extract.getConstantOperandVal(1);
unsigned SrcEltBitWidth = Extract.getScalarValueSizeInBits();
unsigned ExtIdxInBits = ExtIdx * SrcEltBitWidth;
unsigned CastedEltBitWidth = BitCast.getScalarValueSizeInBits();
if (ExtIdxInBits % CastedEltBitWidth != 0)
return false;
// Can't handle cases where vector size is not 128-bit
if (!Extract.getOperand(0).getValueType().is128BitVector())
return false;
// Update the lane value by offsetting with the scaled extract index.
LaneC += ExtIdxInBits / CastedEltBitWidth;
// Determine the casted vector type of the wide vector input.
// dup (bitcast (extract_subv X, C)), LaneC --> dup (bitcast X), LaneC'
// Examples:
// dup (bitcast (extract_subv v2f64 X, 1) to v2f32), 1 --> dup v4f32 X, 3
// dup (bitcast (extract_subv v16i8 X, 8) to v4i16), 1 --> dup v8i16 X, 5
unsigned SrcVecNumElts =
Extract.getOperand(0).getValueSizeInBits() / CastedEltBitWidth;
CastVT = MVT::getVectorVT(BitCast.getSimpleValueType().getScalarType(),
SrcVecNumElts);
return true;
};
MVT CastVT;
if (getScaledOffsetDup(V, Lane, CastVT)) {
V = DAG.getBitcast(CastVT, V.getOperand(0).getOperand(0));
} else if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
V.getOperand(0).getValueType().is128BitVector()) {
// The lane is incremented by the index of the extract.
// Example: dup v2f32 (extract v4f32 X, 2), 1 --> dup v4f32 X, 3
Lane += V.getConstantOperandVal(1);
V = V.getOperand(0);
} else if (V.getOpcode() == ISD::CONCAT_VECTORS) {
// The lane is decremented if we are splatting from the 2nd operand.
// Example: dup v4i32 (concat v2i32 X, v2i32 Y), 3 --> dup v4i32 Y, 1
unsigned Idx = Lane >= (int)VT.getVectorNumElements() / 2;
Lane -= Idx * VT.getVectorNumElements() / 2;
V = WidenVector(V.getOperand(Idx), DAG);
} else if (VT.getSizeInBits() == 64) {
// Widen the operand to 128-bit register with undef.
V = WidenVector(V, DAG);
}
return DAG.getNode(Opcode, dl, VT, V, DAG.getConstant(Lane, dl, MVT::i64));
}
// Return true if we can get a new shuffle mask by checking the parameter mask
// array to test whether every two adjacent mask values are continuous and
// starting from an even number.
static bool isWideTypeMask(ArrayRef<int> M, EVT VT,
SmallVectorImpl<int> &NewMask) {
unsigned NumElts = VT.getVectorNumElements();
if (NumElts % 2 != 0)
return false;
NewMask.clear();
for (unsigned i = 0; i < NumElts; i += 2) {
int M0 = M[i];
int M1 = M[i + 1];
// If both elements are undef, new mask is undef too.
if (M0 == -1 && M1 == -1) {
NewMask.push_back(-1);
continue;
}
if (M0 == -1 && M1 != -1 && (M1 % 2) == 1) {
NewMask.push_back(M1 / 2);
continue;
}
if (M0 != -1 && (M0 % 2) == 0 && ((M0 + 1) == M1 || M1 == -1)) {
NewMask.push_back(M0 / 2);
continue;
}
NewMask.clear();
return false;
}
assert(NewMask.size() == NumElts / 2 && "Incorrect size for mask!");
return true;
}
// Try to widen element type to get a new mask value for a better permutation
// sequence, so that we can use NEON shuffle instructions, such as zip1/2,
// UZP1/2, TRN1/2, REV, INS, etc.
// For example:
// shufflevector <4 x i32> %a, <4 x i32> %b,
// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
// is equivalent to:
// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
// Finally, we can get:
// mov v0.d[0], v1.d[1]
static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
EVT VT = Op.getValueType();
EVT ScalarVT = VT.getVectorElementType();
unsigned ElementSize = ScalarVT.getFixedSizeInBits();
SDValue V0 = Op.getOperand(0);
SDValue V1 = Op.getOperand(1);
ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
// If combining adjacent elements, like two i16's -> i32, two i32's -> i64 ...
// We need to make sure the wider element type is legal. Thus, ElementSize
// should be not larger than 32 bits, and i1 type should also be excluded.
if (ElementSize > 32 || ElementSize == 1)
return SDValue();
SmallVector<int, 8> NewMask;
if (isWideTypeMask(Mask, VT, NewMask)) {
MVT NewEltVT = VT.isFloatingPoint()
? MVT::getFloatingPointVT(ElementSize * 2)
: MVT::getIntegerVT(ElementSize * 2);
MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
V0 = DAG.getBitcast(NewVT, V0);
V1 = DAG.getBitcast(NewVT, V1);
return DAG.getBitcast(VT,
DAG.getVectorShuffle(NewVT, DL, V0, V1, NewMask));
}
}
return SDValue();
}
SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
EVT VT = Op.getValueType();
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
if (useSVEForFixedLengthVectorVT(VT))
return LowerFixedLengthVECTOR_SHUFFLEToSVE(Op, DAG);
// Convert shuffles that are directly supported on NEON to target-specific
// DAG nodes, instead of keeping them as shuffles and matching them again
// during code selection. This is more efficient and avoids the possibility
// of inconsistencies between legalization and selection.
ArrayRef<int> ShuffleMask = SVN->getMask();
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
assert(V1.getValueType() == VT && "Unexpected VECTOR_SHUFFLE type!");
assert(ShuffleMask.size() == VT.getVectorNumElements() &&
"Unexpected VECTOR_SHUFFLE mask size!");
if (SVN->isSplat()) {
int Lane = SVN->getSplatIndex();
// If this is undef splat, generate it via "just" vdup, if possible.
if (Lane == -1)
Lane = 0;
if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR)
return DAG.getNode(AArch64ISD::DUP, dl, V1.getValueType(),
V1.getOperand(0));
// Test if V1 is a BUILD_VECTOR and the lane being referenced is a non-
// constant. If so, we can just reference the lane's definition directly.
if (V1.getOpcode() == ISD::BUILD_VECTOR &&
!isa<ConstantSDNode>(V1.getOperand(Lane)))
return DAG.getNode(AArch64ISD::DUP, dl, VT, V1.getOperand(Lane));
// Otherwise, duplicate from the lane of the input vector.
unsigned Opcode = getDUPLANEOp(V1.getValueType().getVectorElementType());
return constructDup(V1, Lane, dl, VT, Opcode, DAG);
}
// Check if the mask matches a DUP for a wider element
for (unsigned LaneSize : {64U, 32U, 16U}) {
unsigned Lane = 0;
if (isWideDUPMask(ShuffleMask, VT, LaneSize, Lane)) {
unsigned Opcode = LaneSize == 64 ? AArch64ISD::DUPLANE64
: LaneSize == 32 ? AArch64ISD::DUPLANE32
: AArch64ISD::DUPLANE16;
// Cast V1 to an integer vector with required lane size
MVT NewEltTy = MVT::getIntegerVT(LaneSize);
unsigned NewEltCount = VT.getSizeInBits() / LaneSize;
MVT NewVecTy = MVT::getVectorVT(NewEltTy, NewEltCount);
V1 = DAG.getBitcast(NewVecTy, V1);
// Constuct the DUP instruction
V1 = constructDup(V1, Lane, dl, NewVecTy, Opcode, DAG);
// Cast back to the original type
return DAG.getBitcast(VT, V1);
}
}
if (isREVMask(ShuffleMask, VT, 64))
return DAG.getNode(AArch64ISD::REV64, dl, V1.getValueType(), V1, V2);
if (isREVMask(ShuffleMask, VT, 32))
return DAG.getNode(AArch64ISD::REV32, dl, V1.getValueType(), V1, V2);
if (isREVMask(ShuffleMask, VT, 16))
return DAG.getNode(AArch64ISD::REV16, dl, V1.getValueType(), V1, V2);
if (((VT.getVectorNumElements() == 8 && VT.getScalarSizeInBits() == 16) ||
(VT.getVectorNumElements() == 16 && VT.getScalarSizeInBits() == 8)) &&
ShuffleVectorInst::isReverseMask(ShuffleMask)) {
SDValue Rev = DAG.getNode(AArch64ISD::REV64, dl, VT, V1);
return DAG.getNode(AArch64ISD::EXT, dl, VT, Rev, Rev,
DAG.getConstant(8, dl, MVT::i32));
}
bool ReverseEXT = false;
unsigned Imm;
if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) {
if (ReverseEXT)
std::swap(V1, V2);
Imm *= getExtFactor(V1);
return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V2,
DAG.getConstant(Imm, dl, MVT::i32));
} else if (V2->isUndef() && isSingletonEXTMask(ShuffleMask, VT, Imm)) {
Imm *= getExtFactor(V1);
return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V1,
DAG.getConstant(Imm, dl, MVT::i32));
}
unsigned WhichResult;
if (isZIPMask(ShuffleMask, VT, WhichResult)) {
unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
}
if (isUZPMask(ShuffleMask, VT, WhichResult)) {
unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
}
if (isTRNMask(ShuffleMask, VT, WhichResult)) {
unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
}
if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
}
if (isUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
}
if (isTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
}
if (SDValue Concat = tryFormConcatFromShuffle(Op, DAG))
return Concat;
bool DstIsLeft;
int Anomaly;
int NumInputElements = V1.getValueType().getVectorNumElements();
if (isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
SDValue DstVec = DstIsLeft ? V1 : V2;
SDValue DstLaneV = DAG.getConstant(Anomaly, dl, MVT::i64);
SDValue SrcVec = V1;
int SrcLane = ShuffleMask[Anomaly];
if (SrcLane >= NumInputElements) {
SrcVec = V2;
SrcLane -= VT.getVectorNumElements();
}
SDValue SrcLaneV = DAG.getConstant(SrcLane, dl, MVT::i64);
EVT ScalarVT = VT.getVectorElementType();
if (ScalarVT.getFixedSizeInBits() < 32 && ScalarVT.isInteger())
ScalarVT = MVT::i32;
return DAG.getNode(
ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, SrcVec, SrcLaneV),
DstLaneV);
}
if (SDValue NewSD = tryWidenMaskForShuffle(Op, DAG))
return NewSD;
// If the shuffle is not directly supported and it has 4 elements, use
// the PerfectShuffle-generated table to synthesize it from other shuffles.
unsigned NumElts = VT.getVectorNumElements();
if (NumElts == 4) {
unsigned PFIndexes[4];
for (unsigned i = 0; i != 4; ++i) {
if (ShuffleMask[i] < 0)
PFIndexes[i] = 8;
else
PFIndexes[i] = ShuffleMask[i];
}
// Compute the index in the perfect shuffle table.
unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
PFIndexes[2] * 9 + PFIndexes[3];
unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
return GeneratePerfectShuffle(PFTableIndex, V1, V2, PFEntry, V1, V2, DAG,
dl);
}
return GenerateTBL(Op, ShuffleMask, DAG);
}
SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
if (useSVEForFixedLengthVectorVT(VT))
return LowerToScalableOp(Op, DAG);
assert(VT.isScalableVector() && VT.getVectorElementType() == MVT::i1 &&
"Unexpected vector type!");
// We can handle the constant cases during isel.
if (isa<ConstantSDNode>(Op.getOperand(0)))
return Op;
// There isn't a natural way to handle the general i1 case, so we use some
// trickery with whilelo.
SDLoc DL(Op);
SDValue SplatVal = DAG.getAnyExtOrTrunc(Op.getOperand(0), DL, MVT::i64);
SplatVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, SplatVal,
DAG.getValueType(MVT::i1));
SDValue ID =
DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, DL, MVT::i64);
SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
if (VT == MVT::nxv1i1)
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::nxv1i1,
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::nxv2i1, ID,
Zero, SplatVal),
Zero);
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, ID, Zero, SplatVal);
}
SDValue AArch64TargetLowering::LowerDUPQLane(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
if (!isTypeLegal(VT) || !VT.isScalableVector())
return SDValue();
// Current lowering only supports the SVE-ACLE types.
if (VT.getSizeInBits().getKnownMinSize() != AArch64::SVEBitsPerBlock)
return SDValue();
// The DUPQ operation is indepedent of element type so normalise to i64s.
SDValue Idx128 = Op.getOperand(2);
// DUPQ can be used when idx is in range.
auto *CIdx = dyn_cast<ConstantSDNode>(Idx128);
if (CIdx && (CIdx->getZExtValue() <= 3)) {
SDValue CI = DAG.getTargetConstant(CIdx->getZExtValue(), DL, MVT::i64);
return DAG.getNode(AArch64ISD::DUPLANE128, DL, VT, Op.getOperand(1), CI);
}
SDValue V = DAG.getNode(ISD::BITCAST, DL, MVT::nxv2i64, Op.getOperand(1));
// The ACLE says this must produce the same result as:
// svtbl(data, svadd_x(svptrue_b64(),
// svand_x(svptrue_b64(), svindex_u64(0, 1), 1),
// index * 2))
SDValue One = DAG.getConstant(1, DL, MVT::i64);
SDValue SplatOne = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, One);
// create the vector 0,1,0,1,...
SDValue SV = DAG.getStepVector(DL, MVT::nxv2i64);
SV = DAG.getNode(ISD::AND, DL, MVT::nxv2i64, SV, SplatOne);
// create the vector idx64,idx64+1,idx64,idx64+1,...
SDValue Idx64 = DAG.getNode(ISD::ADD, DL, MVT::i64, Idx128, Idx128);
SDValue SplatIdx64 = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, Idx64);
SDValue ShuffleMask = DAG.getNode(ISD::ADD, DL, MVT::nxv2i64, SV, SplatIdx64);
// create the vector Val[idx64],Val[idx64+1],Val[idx64],Val[idx64+1],...
SDValue TBL = DAG.getNode(AArch64ISD::TBL, DL, MVT::nxv2i64, V, ShuffleMask);
return DAG.getNode(ISD::BITCAST, DL, VT, TBL);
}
static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
APInt &UndefBits) {
EVT VT = BVN->getValueType(0);
APInt SplatBits, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
unsigned NumSplats = VT.getSizeInBits() / SplatBitSize;
for (unsigned i = 0; i < NumSplats; ++i) {
CnstBits <<= SplatBitSize;
UndefBits <<= SplatBitSize;
CnstBits |= SplatBits.zextOrTrunc(VT.getSizeInBits());
UndefBits |= (SplatBits ^ SplatUndef).zextOrTrunc(VT.getSizeInBits());
}
return true;
}
return false;
}
// Try 64-bit splatted SIMD immediate.
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
const APInt &Bits) {
if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
EVT VT = Op.getValueType();
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v2i64 : MVT::f64;
if (AArch64_AM::isAdvSIMDModImmType10(Value)) {
Value = AArch64_AM::encodeAdvSIMDModImmType10(Value);
SDLoc dl(Op);
SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
DAG.getConstant(Value, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
}
return SDValue();
}
// Try 32-bit splatted SIMD immediate.
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
const APInt &Bits,
const SDValue *LHS = nullptr) {
if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
EVT VT = Op.getValueType();
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
bool isAdvSIMDModImm = false;
uint64_t Shift;
if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType1(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType1(Value);
Shift = 0;
}
else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType2(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType2(Value);
Shift = 8;
}
else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType3(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType3(Value);
Shift = 16;
}
else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType4(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType4(Value);
Shift = 24;
}
if (isAdvSIMDModImm) {
SDLoc dl(Op);
SDValue Mov;
if (LHS)
Mov = DAG.getNode(NewOp, dl, MovTy, *LHS,
DAG.getConstant(Value, dl, MVT::i32),
DAG.getConstant(Shift, dl, MVT::i32));
else
Mov = DAG.getNode(NewOp, dl, MovTy,
DAG.getConstant(Value, dl, MVT::i32),
DAG.getConstant(Shift, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
}
return SDValue();
}
// Try 16-bit splatted SIMD immediate.
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
const APInt &Bits,
const SDValue *LHS = nullptr) {
if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
EVT VT = Op.getValueType();
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
bool isAdvSIMDModImm = false;
uint64_t Shift;
if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType5(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType5(Value);
Shift = 0;
}
else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType6(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType6(Value);
Shift = 8;
}
if (isAdvSIMDModImm) {
SDLoc dl(Op);
SDValue Mov;
if (LHS)
Mov = DAG.getNode(NewOp, dl, MovTy, *LHS,
DAG.getConstant(Value, dl, MVT::i32),
DAG.getConstant(Shift, dl, MVT::i32));
else
Mov = DAG.getNode(NewOp, dl, MovTy,
DAG.getConstant(Value, dl, MVT::i32),
DAG.getConstant(Shift, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
}
return SDValue();
}
// Try 32-bit splatted SIMD immediate with shifted ones.
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op,
SelectionDAG &DAG, const APInt &Bits) {
if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
EVT VT = Op.getValueType();
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
bool isAdvSIMDModImm = false;
uint64_t Shift;
if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType7(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType7(Value);
Shift = 264;
}
else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType8(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType8(Value);
Shift = 272;
}
if (isAdvSIMDModImm) {
SDLoc dl(Op);
SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
DAG.getConstant(Value, dl, MVT::i32),
DAG.getConstant(Shift, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
}
return SDValue();
}
// Try 8-bit splatted SIMD immediate.
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
const APInt &Bits) {
if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
EVT VT = Op.getValueType();
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8;
if (AArch64_AM::isAdvSIMDModImmType9(Value)) {
Value = AArch64_AM::encodeAdvSIMDModImmType9(Value);
SDLoc dl(Op);
SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
DAG.getConstant(Value, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
}
return SDValue();
}
// Try FP splatted SIMD immediate.
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
const APInt &Bits) {
if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
EVT VT = Op.getValueType();
bool isWide = (VT.getSizeInBits() == 128);
MVT MovTy;
bool isAdvSIMDModImm = false;
if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType11(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType11(Value);
MovTy = isWide ? MVT::v4f32 : MVT::v2f32;
}
else if (isWide &&
(isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType12(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType12(Value);
MovTy = MVT::v2f64;
}
if (isAdvSIMDModImm) {
SDLoc dl(Op);
SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
DAG.getConstant(Value, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
}
return SDValue();
}
// Specialized code to quickly find if PotentialBVec is a BuildVector that
// consists of only the same constant int value, returned in reference arg
// ConstVal
static bool isAllConstantBuildVector(const SDValue &PotentialBVec,
uint64_t &ConstVal) {
BuildVectorSDNode *Bvec = dyn_cast<BuildVectorSDNode>(PotentialBVec);
if (!Bvec)
return false;
ConstantSDNode *FirstElt = dyn_cast<ConstantSDNode>(Bvec->getOperand(0));
if (!FirstElt)
return false;
EVT VT = Bvec->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
for (unsigned i = 1; i < NumElts; ++i)
if (dyn_cast<ConstantSDNode>(Bvec->getOperand(i)) != FirstElt)
return false;
ConstVal = FirstElt->getZExtValue();
return true;
}
// Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)),
// to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a
// BUILD_VECTORs with constant element C1, C2 is a constant, and:
// - for the SLI case: C1 == ~(Ones(ElemSizeInBits) << C2)
// - for the SRI case: C1 == ~(Ones(ElemSizeInBits) >> C2)
// The (or (lsl Y, C2), (and X, BvecC1)) case is also handled.
static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
if (!VT.isVector())
return SDValue();
SDLoc DL(N);
SDValue And;
SDValue Shift;
SDValue FirstOp = N->getOperand(0);
unsigned FirstOpc = FirstOp.getOpcode();
SDValue SecondOp = N->getOperand(1);
unsigned SecondOpc = SecondOp.getOpcode();
// Is one of the operands an AND or a BICi? The AND may have been optimised to
// a BICi in order to use an immediate instead of a register.
// Is the other operand an shl or lshr? This will have been turned into:
// AArch64ISD::VSHL vector, #shift or AArch64ISD::VLSHR vector, #shift.
if ((FirstOpc == ISD::AND || FirstOpc == AArch64ISD::BICi) &&
(SecondOpc == AArch64ISD::VSHL || SecondOpc == AArch64ISD::VLSHR)) {
And = FirstOp;
Shift = SecondOp;
} else if ((SecondOpc == ISD::AND || SecondOpc == AArch64ISD::BICi) &&
(FirstOpc == AArch64ISD::VSHL || FirstOpc == AArch64ISD::VLSHR)) {
And = SecondOp;
Shift = FirstOp;
} else
return SDValue();
bool IsAnd = And.getOpcode() == ISD::AND;
bool IsShiftRight = Shift.getOpcode() == AArch64ISD::VLSHR;
// Is the shift amount constant?
ConstantSDNode *C2node = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
if (!C2node)
return SDValue();
uint64_t C1;
if (IsAnd) {
// Is the and mask vector all constant?
if (!isAllConstantBuildVector(And.getOperand(1), C1))
return SDValue();
} else {
// Reconstruct the corresponding AND immediate from the two BICi immediates.
ConstantSDNode *C1nodeImm = dyn_cast<ConstantSDNode>(And.getOperand(1));
ConstantSDNode *C1nodeShift = dyn_cast<ConstantSDNode>(And.getOperand(2));
assert(C1nodeImm && C1nodeShift);
C1 = ~(C1nodeImm->getZExtValue() << C1nodeShift->getZExtValue());
}
// Is C1 == ~(Ones(ElemSizeInBits) << C2) or
// C1 == ~(Ones(ElemSizeInBits) >> C2), taking into account
// how much one can shift elements of a particular size?
uint64_t C2 = C2node->getZExtValue();
unsigned ElemSizeInBits = VT.getScalarSizeInBits();
if (C2 > ElemSizeInBits)
return SDValue();
APInt C1AsAPInt(ElemSizeInBits, C1);
APInt RequiredC1 = IsShiftRight ? APInt::getHighBitsSet(ElemSizeInBits, C2)
: APInt::getLowBitsSet(ElemSizeInBits, C2);
if (C1AsAPInt != RequiredC1)
return SDValue();
SDValue X = And.getOperand(0);
SDValue Y = Shift.getOperand(0);
unsigned Inst = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
SDValue ResultSLI = DAG.getNode(Inst, DL, VT, X, Y, Shift.getOperand(1));
LLVM_DEBUG(dbgs() << "aarch64-lower: transformed: \n");
LLVM_DEBUG(N->dump(&DAG));
LLVM_DEBUG(dbgs() << "into: \n");
LLVM_DEBUG(ResultSLI->dump(&DAG));
++NumShiftInserts;
return ResultSLI;
}
SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
SelectionDAG &DAG) const {
if (useSVEForFixedLengthVectorVT(Op.getValueType()))
return LowerToScalableOp(Op, DAG);
// Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2))
if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG))
return Res;
EVT VT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
BuildVectorSDNode *BVN =
dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode());
if (!BVN) {
// OR commutes, so try swapping the operands.
LHS = Op.getOperand(1);
BVN = dyn_cast<BuildVectorSDNode>(Op.getOperand(0).getNode());
}
if (!BVN)
return Op;
APInt DefBits(VT.getSizeInBits(), 0);
APInt UndefBits(VT.getSizeInBits(), 0);
if (resolveBuildVector(BVN, DefBits, UndefBits)) {
SDValue NewOp;
if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG,
DefBits, &LHS)) ||
(NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG,
DefBits, &LHS)))
return NewOp;
if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG,
UndefBits, &LHS)) ||
(NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG,
UndefBits, &LHS)))
return NewOp;
}
// We can always fall back to a non-immediate OR.
return Op;
}
// Normalize the operands of BUILD_VECTOR. The value of constant operands will
// be truncated to fit element width.
static SDValue NormalizeBuildVector(SDValue Op,
SelectionDAG &DAG) {
assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
SDLoc dl(Op);
EVT VT = Op.getValueType();
EVT EltTy= VT.getVectorElementType();
if (EltTy.isFloatingPoint() || EltTy.getSizeInBits() > 16)
return Op;
SmallVector<SDValue, 16> Ops;
for (SDValue Lane : Op->ops()) {
// For integer vectors, type legalization would have promoted the
// operands already. Otherwise, if Op is a floating-point splat
// (with operands cast to integers), then the only possibilities
// are constants and UNDEFs.
if (auto *CstLane = dyn_cast<ConstantSDNode>(Lane)) {
APInt LowBits(EltTy.getSizeInBits(),
CstLane->getZExtValue());
Lane = DAG.getConstant(LowBits.getZExtValue(), dl, MVT::i32);
} else if (Lane.getNode()->isUndef()) {
Lane = DAG.getUNDEF(MVT::i32);
} else {
assert(Lane.getValueType() == MVT::i32 &&
"Unexpected BUILD_VECTOR operand type");
}
Ops.push_back(Lane);
}
return DAG.getBuildVector(VT, dl, Ops);
}
static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
APInt DefBits(VT.getSizeInBits(), 0);
APInt UndefBits(VT.getSizeInBits(), 0);
BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
if (resolveBuildVector(BVN, DefBits, UndefBits)) {
SDValue NewOp;
if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits)))
return NewOp;
DefBits = ~DefBits;
if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, DefBits)))
return NewOp;
DefBits = UndefBits;
if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits)))
return NewOp;
DefBits = ~UndefBits;
if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, DefBits)))
return NewOp;
}
return SDValue();
}
SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
if (useSVEForFixedLengthVectorVT(VT)) {
if (auto SeqInfo = cast<BuildVectorSDNode>(Op)->isConstantSequence()) {
SDLoc DL(Op);
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
SDValue Start = DAG.getConstant(SeqInfo->first, DL, ContainerVT);
SDValue Steps = DAG.getStepVector(DL, ContainerVT, SeqInfo->second);
SDValue Seq = DAG.getNode(ISD::ADD, DL, ContainerVT, Start, Steps);
return convertFromScalableVector(DAG, Op.getValueType(), Seq);
}
// Revert to common legalisation for all other variants.
return SDValue();
}
// Try to build a simple constant vector.
Op = NormalizeBuildVector(Op, DAG);
if (VT.isInteger()) {
// Certain vector constants, used to express things like logical NOT and
// arithmetic NEG, are passed through unmodified. This allows special
// patterns for these operations to match, which will lower these constants
// to whatever is proven necessary.
BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
if (BVN->isConstant())
if (ConstantSDNode *Const = BVN->getConstantSplatNode()) {
unsigned BitSize = VT.getVectorElementType().getSizeInBits();
APInt Val(BitSize,
Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
if (Val.isZero() || Val.isAllOnes())
return Op;
}
}
if (SDValue V = ConstantBuildVector(Op, DAG))
return V;
// Scan through the operands to find some interesting properties we can
// exploit:
// 1) If only one value is used, we can use a DUP, or
// 2) if only the low element is not undef, we can just insert that, or
// 3) if only one constant value is used (w/ some non-constant lanes),
// we can splat the constant value into the whole vector then fill
// in the non-constant lanes.
// 4) FIXME: If different constant values are used, but we can intelligently
// select the values we'll be overwriting for the non-constant
// lanes such that we can directly materialize the vector
// some other way (MOVI, e.g.), we can be sneaky.
// 5) if all operands are EXTRACT_VECTOR_ELT, check for VUZP.
SDLoc dl(Op);
unsigned NumElts = VT.getVectorNumElements();
bool isOnlyLowElement = true;
bool usesOnlyOneValue = true;
bool usesOnlyOneConstantValue = true;
bool isConstant = true;
bool AllLanesExtractElt = true;
unsigned NumConstantLanes = 0;
unsigned NumDifferentLanes = 0;
unsigned NumUndefLanes = 0;
SDValue Value;
SDValue ConstantValue;
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
AllLanesExtractElt = false;
if (V.isUndef()) {
++NumUndefLanes;
continue;
}
if (i > 0)
isOnlyLowElement = false;
if (!isIntOrFPConstant(V))
isConstant = false;
if (isIntOrFPConstant(V)) {
++NumConstantLanes;
if (!ConstantValue.getNode())
ConstantValue = V;
else if (ConstantValue != V)
usesOnlyOneConstantValue = false;
}
if (!Value.getNode())
Value = V;
else if (V != Value) {
usesOnlyOneValue = false;
++NumDifferentLanes;
}
}
if (!Value.getNode()) {
LLVM_DEBUG(
dbgs() << "LowerBUILD_VECTOR: value undefined, creating undef node\n");
return DAG.getUNDEF(VT);
}
// Convert BUILD_VECTOR where all elements but the lowest are undef into
// SCALAR_TO_VECTOR, except for when we have a single-element constant vector
// as SimplifyDemandedBits will just turn that back into BUILD_VECTOR.
if (isOnlyLowElement && !(NumElts == 1 && isIntOrFPConstant(Value))) {
LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: only low element used, creating 1 "
"SCALAR_TO_VECTOR node\n");
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
}
if (AllLanesExtractElt) {
SDNode *Vector = nullptr;
bool Even = false;
bool Odd = false;
// Check whether the extract elements match the Even pattern <0,2,4,...> or
// the Odd pattern <1,3,5,...>.
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
const SDNode *N = V.getNode();
if (!isa<ConstantSDNode>(N->getOperand(1)))
break;
SDValue N0 = N->getOperand(0);
// All elements are extracted from the same vector.
if (!Vector) {
Vector = N0.getNode();
// Check that the type of EXTRACT_VECTOR_ELT matches the type of
// BUILD_VECTOR.
if (VT.getVectorElementType() !=
N0.getValueType().getVectorElementType())
break;
} else if (Vector != N0.getNode()) {
Odd = false;
Even = false;
break;
}
// Extracted values are either at Even indices <0,2,4,...> or at Odd
// indices <1,3,5,...>.
uint64_t Val = N->getConstantOperandVal(1);
if (Val == 2 * i) {
Even = true;
continue;
}
if (Val - 1 == 2 * i) {
Odd = true;
continue;
}
// Something does not match: abort.
Odd = false;
Even = false;
break;
}
if (Even || Odd) {
SDValue LHS =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SDValue(Vector, 0),
DAG.getConstant(0, dl, MVT::i64));
SDValue RHS =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SDValue(Vector, 0),
DAG.getConstant(NumElts, dl, MVT::i64));
if (Even && !Odd)
return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), LHS,
RHS);
if (Odd && !Even)
return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), LHS,
RHS);
}
}
// Use DUP for non-constant splats. For f32 constant splats, reduce to
// i32 and try again.
if (usesOnlyOneValue) {
if (!isConstant) {
if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
Value.getValueType() != VT) {
LLVM_DEBUG(
dbgs() << "LowerBUILD_VECTOR: use DUP for non-constant splats\n");
return DAG.getNode(AArch64ISD::DUP, dl, VT, Value);
}
// This is actually a DUPLANExx operation, which keeps everything vectory.
SDValue Lane = Value.getOperand(1);
Value = Value.getOperand(0);
if (Value.getValueSizeInBits() == 64) {
LLVM_DEBUG(
dbgs() << "LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
"widening it\n");
Value = WidenVector(Value, DAG);
}
unsigned Opcode = getDUPLANEOp(VT.getVectorElementType());
return DAG.getNode(Opcode, dl, VT, Value, Lane);
}
if (VT.getVectorElementType().isFloatingPoint()) {
SmallVector<SDValue, 8> Ops;
EVT EltTy = VT.getVectorElementType();
assert ((EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 ||
EltTy == MVT::f64) && "Unsupported floating-point vector type");
LLVM_DEBUG(
dbgs() << "LowerBUILD_VECTOR: float constant splats, creating int "
"BITCASTS, and try again\n");
MVT NewType = MVT::getIntegerVT(EltTy.getSizeInBits());
for (unsigned i = 0; i < NumElts; ++i)
Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i)));
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts);
SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: trying to lower new vector: ";
Val.dump(););
Val = LowerBUILD_VECTOR(Val, DAG);
if (Val.getNode())
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
}
}
// If we need to insert a small number of different non-constant elements and
// the vector width is sufficiently large, prefer using DUP with the common
// value and INSERT_VECTOR_ELT for the different lanes. If DUP is preferred,
// skip the constant lane handling below.
bool PreferDUPAndInsert =
!isConstant && NumDifferentLanes >= 1 &&
NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) &&
NumDifferentLanes >= NumConstantLanes;
// If there was only one constant value used and for more than one lane,
// start by splatting that value, then replace the non-constant lanes. This
// is better than the default, which will perform a separate initialization
// for each lane.
if (!PreferDUPAndInsert && NumConstantLanes > 0 && usesOnlyOneConstantValue) {
// Firstly, try to materialize the splat constant.
SDValue Vec = DAG.getSplatBuildVector(VT, dl, ConstantValue),
Val = ConstantBuildVector(Vec, DAG);
if (!Val) {
// Otherwise, materialize the constant and splat it.
Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue);
DAG.ReplaceAllUsesWith(Vec.getNode(), &Val);
}
// Now insert the non-constant lanes.
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
if (!isIntOrFPConstant(V))
// Note that type legalization likely mucked about with the VT of the
// source operand, so we may have to convert it here before inserting.
Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, V, LaneIdx);
}
return Val;
}
// This will generate a load from the constant pool.
if (isConstant) {
LLVM_DEBUG(
dbgs() << "LowerBUILD_VECTOR: all elements are constant, use default "
"expansion\n");
return SDValue();
}
// Detect patterns of a0,a1,a2,a3,b0,b1,b2,b3,c0,c1,c2,c3,d0,d1,d2,d3 from
// v4i32s. This is really a truncate, which we can construct out of (legal)
// concats and truncate nodes.
if (SDValue M = ReconstructTruncateFromBuildVector(Op, DAG))
return M;
// Empirical tests suggest this is rarely worth it for vectors of length <= 2.
if (NumElts >= 4) {
if (SDValue shuffle = ReconstructShuffle(Op, DAG))
return shuffle;
}
if (PreferDUPAndInsert) {
// First, build a constant vector with the common element.
SmallVector<SDValue, 8> Ops(NumElts, Value);
SDValue NewVector = LowerBUILD_VECTOR(DAG.getBuildVector(VT, dl, Ops), DAG);
// Next, insert the elements that do not match the common value.
for (unsigned I = 0; I < NumElts; ++I)
if (Op.getOperand(I) != Value)
NewVector =
DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, NewVector,
Op.getOperand(I), DAG.getConstant(I, dl, MVT::i64));
return NewVector;
}
// If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
// know the default expansion would otherwise fall back on something even
// worse. For a vector with one or two non-undef values, that's
// scalar_to_vector for the elements followed by a shuffle (provided the
// shuffle is valid for the target) and materialization element by element
// on the stack followed by a load for everything else.
if (!isConstant && !usesOnlyOneValue) {
LLVM_DEBUG(
dbgs() << "LowerBUILD_VECTOR: alternatives failed, creating sequence "
"of INSERT_VECTOR_ELT\n");
SDValue Vec = DAG.getUNDEF(VT);
SDValue Op0 = Op.getOperand(0);
unsigned i = 0;
// Use SCALAR_TO_VECTOR for lane zero to
// a) Avoid a RMW dependency on the full vector register, and
// b) Allow the register coalescer to fold away the copy if the
// value is already in an S or D register, and we're forced to emit an
// INSERT_SUBREG that we can't fold anywhere.
//
// We also allow types like i8 and i16 which are illegal scalar but legal
// vector element types. After type-legalization the inserted value is
// extended (i32) and it is safe to cast them to the vector type by ignoring
// the upper bits of the lowest lane (e.g. v8i8, v4i16).
if (!Op0.isUndef()) {
LLVM_DEBUG(dbgs() << "Creating node for op0, it is not undefined:\n");
Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op0);
++i;
}
LLVM_DEBUG(if (i < NumElts) dbgs()
<< "Creating nodes for the other vector elements:\n";);
for (; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
if (V.isUndef())
continue;
SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
}
return Vec;
}
LLVM_DEBUG(
dbgs() << "LowerBUILD_VECTOR: use default expansion, failed to find "
"better alternative\n");
return SDValue();
}
SDValue AArch64TargetLowering::LowerCONCAT_VECTORS(SDValue Op,
SelectionDAG &DAG) const {
if (useSVEForFixedLengthVectorVT(Op.getValueType()))
return LowerFixedLengthConcatVectorsToSVE(Op, DAG);
assert(Op.getValueType().isScalableVector() &&
isTypeLegal(Op.getValueType()) &&
"Expected legal scalable vector type!");
if (isTypeLegal(Op.getOperand(0).getValueType())) {
unsigned NumOperands = Op->getNumOperands();
assert(NumOperands > 1 && isPowerOf2_32(NumOperands) &&
"Unexpected number of operands in CONCAT_VECTORS");
if (NumOperands == 2)
return Op;
// Concat each pair of subvectors and pack into the lower half of the array.
SmallVector<SDValue> ConcatOps(Op->op_begin(), Op->op_end());
while (ConcatOps.size() > 1) {
for (unsigned I = 0, E = ConcatOps.size(); I != E; I += 2) {
SDValue V1 = ConcatOps[I];
SDValue V2 = ConcatOps[I + 1];
EVT SubVT = V1.getValueType();
EVT PairVT = SubVT.getDoubleNumVectorElementsVT(*DAG.getContext());
ConcatOps[I / 2] =
DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), PairVT, V1, V2);
}
ConcatOps.resize(ConcatOps.size() / 2);
}
return ConcatOps[0];
}
return SDValue();
}
SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
if (useSVEForFixedLengthVectorVT(Op.getValueType()))
return LowerFixedLengthInsertVectorElt(Op, DAG);
// Check for non-constant or out of range lane.
EVT VT = Op.getOperand(0).getValueType();
if (VT.getScalarType() == MVT::i1) {
EVT VectorVT = getPromotedVTForPredicate(VT);
SDLoc DL(Op);
SDValue ExtendedVector =
DAG.getAnyExtOrTrunc(Op.getOperand(0), DL, VectorVT);
SDValue ExtendedValue =
DAG.getAnyExtOrTrunc(Op.getOperand(1), DL,
VectorVT.getScalarType().getSizeInBits() < 32
? MVT::i32
: VectorVT.getScalarType());
ExtendedVector =
DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VectorVT, ExtendedVector,
ExtendedValue, Op.getOperand(2));
return DAG.getAnyExtOrTrunc(ExtendedVector, DL, VT);
}
ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(2));
if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
return SDValue();
// Insertion/extraction are legal for V128 types.
if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
VT == MVT::v8f16 || VT == MVT::v8bf16)
return Op;
if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 &&
VT != MVT::v4bf16)
return SDValue();
// For V64 types, we perform insertion by expanding the value
// to a V128 type and perform the insertion on that.
SDLoc DL(Op);
SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
EVT WideTy = WideVec.getValueType();
SDValue Node = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideTy, WideVec,
Op.getOperand(1), Op.getOperand(2));
// Re-narrow the resultant vector.
return NarrowVector(Node, DAG);
}
SDValue
AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
EVT VT = Op.getOperand(0).getValueType();
if (VT.getScalarType() == MVT::i1) {
// We can't directly extract from an SVE predicate; extend it first.
// (This isn't the only possible lowering, but it's straightforward.)
EVT VectorVT = getPromotedVTForPredicate(VT);
SDLoc DL(Op);
SDValue Extend =
DAG.getNode(ISD::ANY_EXTEND, DL, VectorVT, Op.getOperand(0));
MVT ExtractTy = VectorVT == MVT::nxv2i64 ? MVT::i64 : MVT::i32;
SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtractTy,
Extend, Op.getOperand(1));
return DAG.getAnyExtOrTrunc(Extract, DL, Op.getValueType());
}
if (useSVEForFixedLengthVectorVT(VT))
return LowerFixedLengthExtractVectorElt(Op, DAG);
// Check for non-constant or out of range lane.
ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(1));
if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
return SDValue();
// Insertion/extraction are legal for V128 types.
if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
VT == MVT::v8f16 || VT == MVT::v8bf16)
return Op;
if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 &&
VT != MVT::v4bf16)
return SDValue();
// For V64 types, we perform extraction by expanding the value
// to a V128 type and perform the extraction on that.
SDLoc DL(Op);
SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
EVT WideTy = WideVec.getValueType();
EVT ExtrTy = WideTy.getVectorElementType();
if (ExtrTy == MVT::i16 || ExtrTy == MVT::i8)
ExtrTy = MVT::i32;
// For extractions, we just return the result directly.
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtrTy, WideVec,
Op.getOperand(1));
}
SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
SelectionDAG &DAG) const {
assert(Op.getValueType().isFixedLengthVector() &&
"Only cases that extract a fixed length vector are supported!");
EVT InVT = Op.getOperand(0).getValueType();
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
unsigned Size = Op.getValueSizeInBits();
// If we don't have legal types yet, do nothing
if (!DAG.getTargetLoweringInfo().isTypeLegal(InVT))
return SDValue();
if (InVT.isScalableVector()) {
// This will be matched by custom code during ISelDAGToDAG.
if (Idx == 0 && isPackedVectorType(InVT, DAG))
return Op;
return SDValue();
}
// This will get lowered to an appropriate EXTRACT_SUBREG in ISel.
if (Idx == 0 && InVT.getSizeInBits() <= 128)
return Op;
// If this is extracting the upper 64-bits of a 128-bit vector, we match
// that directly.
if (Size == 64 && Idx * InVT.getScalarSizeInBits() == 64 &&
InVT.getSizeInBits() == 128)
return Op;
if (useSVEForFixedLengthVectorVT(InVT)) {
SDLoc DL(Op);
EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
SDValue NewInVec =
convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
SDValue Splice = DAG.getNode(ISD::VECTOR_SPLICE, DL, ContainerVT, NewInVec,
NewInVec, DAG.getConstant(Idx, DL, MVT::i64));
return convertFromScalableVector(DAG, Op.getValueType(), Splice);
}
return SDValue();
}
SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
SelectionDAG &DAG) const {
assert(Op.getValueType().isScalableVector() &&
"Only expect to lower inserts into scalable vectors!");
EVT InVT = Op.getOperand(1).getValueType();
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
SDValue Vec0 = Op.getOperand(0);
SDValue Vec1 = Op.getOperand(1);
SDLoc DL(Op);
EVT VT = Op.getValueType();
if (InVT.isScalableVector()) {
if (!isTypeLegal(VT))
return SDValue();
// Break down insert_subvector into simpler parts.
if (VT.getVectorElementType() == MVT::i1) {
unsigned NumElts = VT.getVectorMinNumElements();
EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
SDValue Lo, Hi;
Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, Vec0,
DAG.getVectorIdxConstant(0, DL));
Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, Vec0,
DAG.getVectorIdxConstant(NumElts / 2, DL));
if (Idx < (NumElts / 2)) {
SDValue NewLo = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, HalfVT, Lo, Vec1,
DAG.getVectorIdxConstant(Idx, DL));
return DAG.getNode(AArch64ISD::UZP1, DL, VT, NewLo, Hi);
} else {
SDValue NewHi =
DAG.getNode(ISD::INSERT_SUBVECTOR, DL, HalfVT, Hi, Vec1,
DAG.getVectorIdxConstant(Idx - (NumElts / 2), DL));
return DAG.getNode(AArch64ISD::UZP1, DL, VT, Lo, NewHi);
}
}
// Ensure the subvector is half the size of the main vector.
if (VT.getVectorElementCount() != (InVT.getVectorElementCount() * 2))
return SDValue();
// Here narrow and wide refers to the vector element types. After "casting"
// both vectors must have the same bit length and so because the subvector
// has fewer elements, those elements need to be bigger.
EVT NarrowVT = getPackedSVEVectorVT(VT.getVectorElementCount());
EVT WideVT = getPackedSVEVectorVT(InVT.getVectorElementCount());
// NOP cast operands to the largest legal vector of the same element count.
if (VT.isFloatingPoint()) {
Vec0 = getSVESafeBitCast(NarrowVT, Vec0, DAG);
Vec1 = getSVESafeBitCast(WideVT, Vec1, DAG);
} else {
// Legal integer vectors are already their largest so Vec0 is fine as is.
Vec1 = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Vec1);
}
// To replace the top/bottom half of vector V with vector SubV we widen the
// preserved half of V, concatenate this to SubV (the order depending on the
// half being replaced) and then narrow the result.
SDValue Narrow;
if (Idx == 0) {
SDValue HiVec0 = DAG.getNode(AArch64ISD::UUNPKHI, DL, WideVT, Vec0);
Narrow = DAG.getNode(AArch64ISD::UZP1, DL, NarrowVT, Vec1, HiVec0);
} else {
assert(Idx == InVT.getVectorMinNumElements() &&
"Invalid subvector index!");
SDValue LoVec0 = DAG.getNode(AArch64ISD::UUNPKLO, DL, WideVT, Vec0);
Narrow = DAG.getNode(AArch64ISD::UZP1, DL, NarrowVT, LoVec0, Vec1);
}
return getSVESafeBitCast(VT, Narrow, DAG);
}
if (Idx == 0 && isPackedVectorType(VT, DAG)) {
// This will be matched by custom code during ISelDAGToDAG.
if (Vec0.isUndef())
return Op;
Optional<unsigned> PredPattern =
getSVEPredPatternFromNumElements(InVT.getVectorNumElements());
auto PredTy = VT.changeVectorElementType(MVT::i1);
SDValue PTrue = getPTrue(DAG, DL, PredTy, *PredPattern);
SDValue ScalableVec1 = convertToScalableVector(DAG, VT, Vec1);
return DAG.getNode(ISD::VSELECT, DL, VT, PTrue, ScalableVec1, Vec0);
}
return SDValue();
}
static bool isPow2Splat(SDValue Op, uint64_t &SplatVal, bool &Negated) {
if (Op.getOpcode() != AArch64ISD::DUP &&
Op.getOpcode() != ISD::SPLAT_VECTOR &&
Op.getOpcode() != ISD::BUILD_VECTOR)
return false;
if (Op.getOpcode() == ISD::BUILD_VECTOR &&
!isAllConstantBuildVector(Op, SplatVal))
return false;
if (Op.getOpcode() != ISD::BUILD_VECTOR &&
!isa<ConstantSDNode>(Op->getOperand(0)))
return false;
SplatVal = Op->getConstantOperandVal(0);
if (Op.getValueType().getVectorElementType() != MVT::i64)
SplatVal = (int32_t)SplatVal;
Negated = false;
if (isPowerOf2_64(SplatVal))
return true;
Negated = true;
if (isPowerOf2_64(-SplatVal)) {
SplatVal = -SplatVal;
return true;
}
return false;
}
SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDLoc dl(Op);
if (useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))
return LowerFixedLengthVectorIntDivideToSVE(Op, DAG);
assert(VT.isScalableVector() && "Expected a scalable vector.");
bool Signed = Op.getOpcode() == ISD::SDIV;
unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
bool Negated;
uint64_t SplatVal;
if (Signed && isPow2Splat(Op.getOperand(1), SplatVal, Negated)) {
SDValue Pg = getPredicateForScalableVector(DAG, dl, VT);
SDValue Res =
DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, dl, VT, Pg, Op->getOperand(0),
DAG.getTargetConstant(Log2_64(SplatVal), dl, MVT::i32));
if (Negated)
Res = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), Res);
return Res;
}
if (VT == MVT::nxv4i32 || VT == MVT::nxv2i64)
return LowerToPredicatedOp(Op, DAG, PredOpcode);
// SVE doesn't have i8 and i16 DIV operations; widen them to 32-bit
// operations, and truncate the result.
EVT WidenedVT;
if (VT == MVT::nxv16i8)
WidenedVT = MVT::nxv8i16;
else if (VT == MVT::nxv8i16)
WidenedVT = MVT::nxv4i32;
else
llvm_unreachable("Unexpected Custom DIV operation");
unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
SDValue Op0Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(0));
SDValue Op1Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(1));
SDValue Op0Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(0));
SDValue Op1Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(1));
SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Lo, Op1Lo);
SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Hi, Op1Hi);
return DAG.getNode(AArch64ISD::UZP1, dl, VT, ResultLo, ResultHi);
}
bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
// Currently no fixed length shuffles that require SVE are legal.
if (useSVEForFixedLengthVectorVT(VT))
return false;
if (VT.getVectorNumElements() == 4 &&
(VT.is128BitVector() || VT.is64BitVector())) {
unsigned Cost = getPerfectShuffleCost(M);
if (Cost <= 1)
return true;
}
bool DummyBool;
int DummyInt;
unsigned DummyUnsigned;
return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isREVMask(M, VT, 64) ||
isREVMask(M, VT, 32) || isREVMask(M, VT, 16) ||
isEXTMask(M, VT, DummyBool, DummyUnsigned) ||
// isTBLMask(M, VT) || // FIXME: Port TBL support from ARM.
isTRNMask(M, VT, DummyUnsigned) || isUZPMask(M, VT, DummyUnsigned) ||
isZIPMask(M, VT, DummyUnsigned) ||
isTRN_v_undef_Mask(M, VT, DummyUnsigned) ||
isUZP_v_undef_Mask(M, VT, DummyUnsigned) ||
isZIP_v_undef_Mask(M, VT, DummyUnsigned) ||
isINSMask(M, VT.getVectorNumElements(), DummyBool, DummyInt) ||
isConcatMask(M, VT, VT.getSizeInBits() == 128));
}
bool AArch64TargetLowering::isVectorClearMaskLegal(ArrayRef<int> M,
EVT VT) const {
// Just delegate to the generic legality, clear masks aren't special.
return isShuffleMaskLegal(M, VT);
}
/// getVShiftImm - Check if this is a valid build_vector for the immediate
/// operand of a vector shift operation, where all the elements of the
/// build_vector must have the same constant integer value.
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
// Ignore bit_converts.
while (Op.getOpcode() == ISD::BITCAST)
Op = Op.getOperand(0);
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
APInt SplatBits, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
HasAnyUndefs, ElementBits) ||
SplatBitSize > ElementBits)
return false;
Cnt = SplatBits.getSExtValue();
return true;
}
/// isVShiftLImm - Check if this is a valid build_vector for the immediate
/// operand of a vector shift left operation. That value must be in the range:
/// 0 <= Value < ElementBits for a left shift; or
/// 0 <= Value <= ElementBits for a long left shift.
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
assert(VT.isVector() && "vector shift count is not a vector type");
int64_t ElementBits = VT.getScalarSizeInBits();
if (!getVShiftImm(Op, ElementBits, Cnt))
return false;
return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
}
/// isVShiftRImm - Check if this is a valid build_vector for the immediate
/// operand of a vector shift right operation. The value must be in the range:
/// 1 <= Value <= ElementBits for a right shift; or
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt) {
assert(VT.isVector() && "vector shift count is not a vector type");
int64_t ElementBits = VT.getScalarSizeInBits();
if (!getVShiftImm(Op, ElementBits, Cnt))
return false;
return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
}
SDValue AArch64TargetLowering::LowerTRUNCATE(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
if (VT.getScalarType() == MVT::i1) {
// Lower i1 truncate to `(x & 1) != 0`.
SDLoc dl(Op);
EVT OpVT = Op.getOperand(0).getValueType();
SDValue Zero = DAG.getConstant(0, dl, OpVT);
SDValue One = DAG.getConstant(1, dl, OpVT);
SDValue And = DAG.getNode(ISD::AND, dl, OpVT, Op.getOperand(0), One);
return DAG.getSetCC(dl, VT, And, Zero, ISD::SETNE);
}
if (!VT.isVector() || VT.isScalableVector())
return SDValue();
if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType()))
return LowerFixedLengthVectorTruncateToSVE(Op, DAG);
return SDValue();
}
SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDLoc DL(Op);
int64_t Cnt;
if (!Op.getOperand(1).getValueType().isVector())
return Op;
unsigned EltSize = VT.getScalarSizeInBits();
switch (Op.getOpcode()) {
case ISD::SHL:
if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT))
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_PRED);
if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0),
DAG.getConstant(Cnt, DL, MVT::i32));
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL,
MVT::i32),
Op.getOperand(0), Op.getOperand(1));
case ISD::SRA:
case ISD::SRL:
if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT)) {
unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_PRED
: AArch64ISD::SRL_PRED;
return LowerToPredicatedOp(Op, DAG, Opc);
}
// Right shift immediate
if (isVShiftRImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) {
unsigned Opc =
(Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
return DAG.getNode(Opc, DL, VT, Op.getOperand(0),
DAG.getConstant(Cnt, DL, MVT::i32));
}
// Right shift register. Note, there is not a shift right register
// instruction, but the shift left register instruction takes a signed
// value, where negative numbers specify a right shift.
unsigned Opc = (Op.getOpcode() == ISD::SRA) ? Intrinsic::aarch64_neon_sshl
: Intrinsic::aarch64_neon_ushl;
// negate the shift amount
SDValue NegShift = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
Op.getOperand(1));
SDValue NegShiftLeft =
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
DAG.getConstant(Opc, DL, MVT::i32), Op.getOperand(0),
NegShift);
return NegShiftLeft;
}
llvm_unreachable("unexpected shift opcode");
}
static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
AArch64CC::CondCode CC, bool NoNans, EVT VT,
const SDLoc &dl, SelectionDAG &DAG) {
EVT SrcVT = LHS.getValueType();
assert(VT.getSizeInBits() == SrcVT.getSizeInBits() &&
"function only supposed to emit natural comparisons");
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
APInt CnstBits(VT.getSizeInBits(), 0);
APInt UndefBits(VT.getSizeInBits(), 0);
bool IsCnst = BVN && resolveBuildVector(BVN, CnstBits, UndefBits);
bool IsZero = IsCnst && (CnstBits == 0);
if (SrcVT.getVectorElementType().isFloatingPoint()) {
switch (CC) {
default:
return SDValue();
case AArch64CC::NE: {
SDValue Fcmeq;
if (IsZero)
Fcmeq = DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
else
Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
return DAG.getNOT(dl, Fcmeq, VT);
}
case AArch64CC::EQ:
if (IsZero)
return DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
case AArch64CC::GE:
if (IsZero)
return DAG.getNode(AArch64ISD::FCMGEz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::FCMGE, dl, VT, LHS, RHS);
case AArch64CC::GT:
if (IsZero)
return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS);
case AArch64CC::LE:
if (!NoNans)
return SDValue();
// If we ignore NaNs then we can use to the LS implementation.
LLVM_FALLTHROUGH;
case AArch64CC::LS:
if (IsZero)
return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::FCMGE, dl, VT, RHS, LHS);
case AArch64CC::LT:
if (!NoNans)
return SDValue();
// If we ignore NaNs then we can use to the MI implementation.
LLVM_FALLTHROUGH;
case AArch64CC::MI:
if (IsZero)
return DAG.getNode(AArch64ISD::FCMLTz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::FCMGT, dl, VT, RHS, LHS);
}
}
switch (CC) {
default:
return SDValue();
case AArch64CC::NE: {
SDValue Cmeq;
if (IsZero)
Cmeq = DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
else
Cmeq = DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
return DAG.getNOT(dl, Cmeq, VT);
}
case AArch64CC::EQ:
if (IsZero)
return DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
case AArch64CC::GE:
if (IsZero)
return DAG.getNode(AArch64ISD::CMGEz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::CMGE, dl, VT, LHS, RHS);
case AArch64CC::GT:
if (IsZero)
return DAG.getNode(AArch64ISD::CMGTz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::CMGT, dl, VT, LHS, RHS);
case AArch64CC::LE:
if (IsZero)
return DAG.getNode(AArch64ISD::CMLEz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::CMGE, dl, VT, RHS, LHS);
case AArch64CC::LS:
return DAG.getNode(AArch64ISD::CMHS, dl, VT, RHS, LHS);
case AArch64CC::LO:
return DAG.getNode(AArch64ISD::CMHI, dl, VT, RHS, LHS);
case AArch64CC::LT:
if (IsZero)
return DAG.getNode(AArch64ISD::CMLTz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::CMGT, dl, VT, RHS, LHS);
case AArch64CC::HI:
return DAG.getNode(AArch64ISD::CMHI, dl, VT, LHS, RHS);
case AArch64CC::HS:
return DAG.getNode(AArch64ISD::CMHS, dl, VT, LHS, RHS);
}
}
SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
SelectionDAG &DAG) const {
if (Op.getValueType().isScalableVector())
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SETCC_MERGE_ZERO);
if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType()))
return LowerFixedLengthVectorSetccToSVE(Op, DAG);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
EVT CmpVT = LHS.getValueType().changeVectorElementTypeToInteger();
SDLoc dl(Op);
if (LHS.getValueType().getVectorElementType().isInteger()) {
assert(LHS.getValueType() == RHS.getValueType());
AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
SDValue Cmp =
EmitVectorComparison(LHS, RHS, AArch64CC, false, CmpVT, dl, DAG);
return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
}
const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
// Make v4f16 (only) fcmp operations utilise vector instructions
// v8f16 support will be a litle more complicated
if (!FullFP16 && LHS.getValueType().getVectorElementType() == MVT::f16) {
if (LHS.getValueType().getVectorNumElements() == 4) {
LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, LHS);
RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, RHS);
SDValue NewSetcc = DAG.getSetCC(dl, MVT::v4i16, LHS, RHS, CC);
DAG.ReplaceAllUsesWith(Op, NewSetcc);
CmpVT = MVT::v4i32;
} else
return SDValue();
}
assert((!FullFP16 && LHS.getValueType().getVectorElementType() != MVT::f16) ||
LHS.getValueType().getVectorElementType() != MVT::f128);
// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
// clean. Some of them require two branches to implement.
AArch64CC::CondCode CC1, CC2;
bool ShouldInvert;
changeVectorFPCCToAArch64CC(CC, CC1, CC2, ShouldInvert);
bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs();
SDValue Cmp =
EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG);
if (!Cmp.getNode())
return SDValue();
if (CC2 != AArch64CC::AL) {
SDValue Cmp2 =
EmitVectorComparison(LHS, RHS, CC2, NoNaNs, CmpVT, dl, DAG);
if (!Cmp2.getNode())
return SDValue();
Cmp = DAG.getNode(ISD::OR, dl, CmpVT, Cmp, Cmp2);
}
Cmp = DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
if (ShouldInvert)
Cmp = DAG.getNOT(dl, Cmp, Cmp.getValueType());
return Cmp;
}
static SDValue getReductionSDNode(unsigned Op, SDLoc DL, SDValue ScalarOp,
SelectionDAG &DAG) {
SDValue VecOp = ScalarOp.getOperand(0);
auto Rdx = DAG.getNode(Op, DL, VecOp.getSimpleValueType(), VecOp);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarOp.getValueType(), Rdx,
DAG.getConstant(0, DL, MVT::i64));
}
SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
SelectionDAG &DAG) const {
SDValue Src = Op.getOperand(0);
// Try to lower fixed length reductions to SVE.
EVT SrcVT = Src.getValueType();
bool OverrideNEON = Op.getOpcode() == ISD::VECREDUCE_AND ||
Op.getOpcode() == ISD::VECREDUCE_OR ||
Op.getOpcode() == ISD::VECREDUCE_XOR ||
Op.getOpcode() == ISD::VECREDUCE_FADD ||
(Op.getOpcode() != ISD::VECREDUCE_ADD &&
SrcVT.getVectorElementType() == MVT::i64);
if (SrcVT.isScalableVector() ||
useSVEForFixedLengthVectorVT(
SrcVT, OverrideNEON && Subtarget->useSVEForFixedLengthVectors())) {
if (SrcVT.getVectorElementType() == MVT::i1)
return LowerPredReductionToSVE(Op, DAG);
switch (Op.getOpcode()) {
case ISD::VECREDUCE_ADD:
return LowerReductionToSVE(AArch64ISD::UADDV_PRED, Op, DAG);
case ISD::VECREDUCE_AND:
return LowerReductionToSVE(AArch64ISD::ANDV_PRED, Op, DAG);
case ISD::VECREDUCE_OR:
return LowerReductionToSVE(AArch64ISD::ORV_PRED, Op, DAG);
case ISD::VECREDUCE_SMAX:
return LowerReductionToSVE(AArch64ISD::SMAXV_PRED, Op, DAG);
case ISD::VECREDUCE_SMIN:
return LowerReductionToSVE(AArch64ISD::SMINV_PRED, Op, DAG);
case ISD::VECREDUCE_UMAX:
return LowerReductionToSVE(AArch64ISD::UMAXV_PRED, Op, DAG);
case ISD::VECREDUCE_UMIN:
return LowerReductionToSVE(AArch64ISD::UMINV_PRED, Op, DAG);
case ISD::VECREDUCE_XOR:
return LowerReductionToSVE(AArch64ISD::EORV_PRED, Op, DAG);
case ISD::VECREDUCE_FADD:
return LowerReductionToSVE(AArch64ISD::FADDV_PRED, Op, DAG);
case ISD::VECREDUCE_FMAX:
return LowerReductionToSVE(AArch64ISD::FMAXNMV_PRED, Op, DAG);
case ISD::VECREDUCE_FMIN:
return LowerReductionToSVE(AArch64ISD::FMINNMV_PRED, Op, DAG);
default:
llvm_unreachable("Unhandled fixed length reduction");
}
}
// Lower NEON reductions.
SDLoc dl(Op);
switch (Op.getOpcode()) {
case ISD::VECREDUCE_ADD:
return getReductionSDNode(AArch64ISD::UADDV, dl, Op, DAG);
case ISD::VECREDUCE_SMAX:
return getReductionSDNode(AArch64ISD::SMAXV, dl, Op, DAG);
case ISD::VECREDUCE_SMIN:
return getReductionSDNode(AArch64ISD::SMINV, dl, Op, DAG);
case ISD::VECREDUCE_UMAX:
return getReductionSDNode(AArch64ISD::UMAXV, dl, Op, DAG);
case ISD::VECREDUCE_UMIN:
return getReductionSDNode(AArch64ISD::UMINV, dl, Op, DAG);
case ISD::VECREDUCE_FMAX: {
return DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
DAG.getConstant(Intrinsic::aarch64_neon_fmaxnmv, dl, MVT::i32),
Src);
}
case ISD::VECREDUCE_FMIN: {
return DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
DAG.getConstant(Intrinsic::aarch64_neon_fminnmv, dl, MVT::i32),
Src);
}
default:
llvm_unreachable("Unhandled reduction");
}
}
SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op,
SelectionDAG &DAG) const {
auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
return SDValue();
// LSE has an atomic load-add instruction, but not a load-sub.
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
SDValue RHS = Op.getOperand(2);
AtomicSDNode *AN = cast<AtomicSDNode>(Op.getNode());
RHS = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), RHS);
return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl, AN->getMemoryVT(),
Op.getOperand(0), Op.getOperand(1), RHS,
AN->getMemOperand());
}
SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op,
SelectionDAG &DAG) const {
auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
return SDValue();
// LSE has an atomic load-clear instruction, but not a load-and.
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
SDValue RHS = Op.getOperand(2);
AtomicSDNode *AN = cast<AtomicSDNode>(Op.getNode());
RHS = DAG.getNode(ISD::XOR, dl, VT, DAG.getConstant(-1ULL, dl, VT), RHS);
return DAG.getAtomic(ISD::ATOMIC_LOAD_CLR, dl, AN->getMemoryVT(),
Op.getOperand(0), Op.getOperand(1), RHS,
AN->getMemOperand());
}
SDValue AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
SDValue Op, SDValue Chain, SDValue &Size, SelectionDAG &DAG) const {
SDLoc dl(Op);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Callee = DAG.getTargetExternalSymbol("__chkstk", PtrVT, 0);
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
const uint32_t *Mask = TRI->getWindowsStackProbePreservedMask();
if (Subtarget->hasCustomCallingConv())
TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
Size = DAG.getNode(ISD::SRL, dl, MVT::i64, Size,
DAG.getConstant(4, dl, MVT::i64));
Chain = DAG.getCopyToReg(Chain, dl, AArch64::X15, Size, SDValue());
Chain =
DAG.getNode(AArch64ISD::CALL, dl, DAG.getVTList(MVT::Other, MVT::Glue),
Chain, Callee, DAG.getRegister(AArch64::X15, MVT::i64),
DAG.getRegisterMask(Mask), Chain.getValue(1));
// To match the actual intent better, we should read the output from X15 here
// again (instead of potentially spilling it to the stack), but rereading Size
// from X15 here doesn't work at -O0, since it thinks that X15 is undefined
// here.
Size = DAG.getNode(ISD::SHL, dl, MVT::i64, Size,
DAG.getConstant(4, dl, MVT::i64));
return Chain;
}
SDValue
AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->isTargetWindows() &&
"Only Windows alloca probing supported");
SDLoc dl(Op);
// Get the inputs.
SDNode *Node = Op.getNode();
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
MaybeAlign Align =
cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
EVT VT = Node->getValueType(0);
if (DAG.getMachineFunction().getFunction().hasFnAttribute(
"no-stack-arg-probe")) {
SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
Chain = SP.getValue(1);
SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
if (Align)
SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
SDValue Ops[2] = {SP, Chain};
return DAG.getMergeValues(Ops, dl);
}
Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
Chain = LowerWindowsDYNAMIC_STACKALLOC(Op, Chain, Size, DAG);
SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
Chain = SP.getValue(1);
SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
if (Align)
SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
DAG.getIntPtrConstant(0, dl, true), SDValue(), dl);
SDValue Ops[2] = {SP, Chain};
return DAG.getMergeValues(Ops, dl);
}
SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(VT != MVT::i64 && "Expected illegal VSCALE node");
SDLoc DL(Op);
APInt MulImm = cast<ConstantSDNode>(Op.getOperand(0))->getAPIntValue();
return DAG.getZExtOrTrunc(DAG.getVScale(DL, MVT::i64, MulImm.sext(64)), DL,
VT);
}
/// Set the IntrinsicInfo for the `aarch64_sve_st<N>` intrinsics.
template <unsigned NumVecs>
static bool
setInfoSVEStN(const AArch64TargetLowering &TLI, const DataLayout &DL,
AArch64TargetLowering::IntrinsicInfo &Info, const CallInst &CI) {
Info.opc = ISD::INTRINSIC_VOID;
// Retrieve EC from first vector argument.
const EVT VT = TLI.getMemValueType(DL, CI.getArgOperand(0)->getType());
ElementCount EC = VT.getVectorElementCount();
#ifndef NDEBUG
// Check the assumption that all input vectors are the same type.
for (unsigned I = 0; I < NumVecs; ++I)
assert(VT == TLI.getMemValueType(DL, CI.getArgOperand(I)->getType()) &&
"Invalid type.");
#endif
// memVT is `NumVecs * VT`.
Info.memVT = EVT::getVectorVT(CI.getType()->getContext(), VT.getScalarType(),
EC * NumVecs);
Info.ptrVal = CI.getArgOperand(CI.arg_size() - 1);
Info.offset = 0;
Info.align.reset();
Info.flags = MachineMemOperand::MOStore;
return true;
}
/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
/// specified in the intrinsic calls.
bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
MachineFunction &MF,
unsigned Intrinsic) const {
auto &DL = I.getModule()->getDataLayout();
switch (Intrinsic) {
case Intrinsic::aarch64_sve_st2:
return setInfoSVEStN<2>(*this, DL, Info, I);
case Intrinsic::aarch64_sve_st3:
return setInfoSVEStN<3>(*this, DL, Info, I);
case Intrinsic::aarch64_sve_st4:
return setInfoSVEStN<4>(*this, DL, Info, I);
case Intrinsic::aarch64_neon_ld2:
case Intrinsic::aarch64_neon_ld3:
case Intrinsic::aarch64_neon_ld4:
case Intrinsic::aarch64_neon_ld1x2:
case Intrinsic::aarch64_neon_ld1x3:
case Intrinsic::aarch64_neon_ld1x4:
case Intrinsic::aarch64_neon_ld2lane:
case Intrinsic::aarch64_neon_ld3lane:
case Intrinsic::aarch64_neon_ld4lane:
case Intrinsic::aarch64_neon_ld2r:
case Intrinsic::aarch64_neon_ld3r:
case Intrinsic::aarch64_neon_ld4r: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
// Conservatively set memVT to the entire set of vectors loaded.
uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
Info.offset = 0;
Info.align.reset();
// volatile loads with NEON intrinsics not supported
Info.flags = MachineMemOperand::MOLoad;
return true;
}
case Intrinsic::aarch64_neon_st2:
case Intrinsic::aarch64_neon_st3:
case Intrinsic::aarch64_neon_st4:
case Intrinsic::aarch64_neon_st1x2:
case Intrinsic::aarch64_neon_st1x3:
case Intrinsic::aarch64_neon_st1x4:
case Intrinsic::aarch64_neon_st2lane:
case Intrinsic::aarch64_neon_st3lane:
case Intrinsic::aarch64_neon_st4lane: {
Info.opc = ISD::INTRINSIC_VOID;
// Conservatively set memVT to the entire set of vectors stored.
unsigned NumElts = 0;
for (const Value *Arg : I.args()) {
Type *ArgTy = Arg->getType();
if (!ArgTy->isVectorTy())
break;
NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
}
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
Info.offset = 0;
Info.align.reset();
// volatile stores with NEON intrinsics not supported
Info.flags = MachineMemOperand::MOStore;
return true;
}
case Intrinsic::aarch64_ldaxr:
case Intrinsic::aarch64_ldxr: {
Type *ValTy = I.getParamElementType(0);
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(ValTy);
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.align = DL.getABITypeAlign(ValTy);
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
return true;
}
case Intrinsic::aarch64_stlxr:
case Intrinsic::aarch64_stxr: {
Type *ValTy = I.getParamElementType(1);
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(ValTy);
Info.ptrVal = I.getArgOperand(1);
Info.offset = 0;
Info.align = DL.getABITypeAlign(ValTy);
Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
return true;
}
case Intrinsic::aarch64_ldaxp:
case Intrinsic::aarch64_ldxp:
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::i128;
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.align = Align(16);
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
return true;
case Intrinsic::aarch64_stlxp:
case Intrinsic::aarch64_stxp:
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::i128;
Info.ptrVal = I.getArgOperand(2);
Info.offset = 0;
Info.align = Align(16);
Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
return true;
case Intrinsic::aarch64_sve_ldnt1: {
Type *ElTy = cast<VectorType>(I.getType())->getElementType();
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(I.getType());
Info.ptrVal = I.getArgOperand(1);
Info.offset = 0;
Info.align = DL.getABITypeAlign(ElTy);
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MONonTemporal;
return true;
}
case Intrinsic::aarch64_sve_stnt1: {
Type *ElTy =
cast<VectorType>(I.getArgOperand(0)->getType())->getElementType();
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(I.getOperand(0)->getType());
Info.ptrVal = I.getArgOperand(2);
Info.offset = 0;
Info.align = DL.getABITypeAlign(ElTy);
Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MONonTemporal;
return true;
}
case Intrinsic::aarch64_mops_memset_tag: {
Value *Dst = I.getArgOperand(0);
Value *Val = I.getArgOperand(1);
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(Val->getType());
Info.ptrVal = Dst;
Info.offset = 0;
Info.align = I.getParamAlign(0).valueOrOne();
Info.flags = MachineMemOperand::MOStore;
// The size of the memory being operated on is unknown at this point
Info.size = MemoryLocation::UnknownSize;
return true;
}
default:
break;
}
return false;
}
bool AArch64TargetLowering::shouldReduceLoadWidth(SDNode *Load,
ISD::LoadExtType ExtTy,
EVT NewVT) const {
// TODO: This may be worth removing. Check regression tests for diffs.
if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT))
return false;
// If we're reducing the load width in order to avoid having to use an extra
// instruction to do extension then it's probably a good idea.
if (ExtTy != ISD::NON_EXTLOAD)
return true;
// Don't reduce load width if it would prevent us from combining a shift into
// the offset.
MemSDNode *Mem = dyn_cast<MemSDNode>(Load);
assert(Mem);
const SDValue &Base = Mem->getBasePtr();
if (Base.getOpcode() == ISD::ADD &&
Base.getOperand(1).getOpcode() == ISD::SHL &&
Base.getOperand(1).hasOneUse() &&
Base.getOperand(1).getOperand(1).getOpcode() == ISD::Constant) {
// It's unknown whether a scalable vector has a power-of-2 bitwidth.
if (Mem->getMemoryVT().isScalableVector())
return false;
// The shift can be combined if it matches the size of the value being
// loaded (and so reducing the width would make it not match).
uint64_t ShiftAmount = Base.getOperand(1).getConstantOperandVal(1);
uint64_t LoadBytes = Mem->getMemoryVT().getSizeInBits()/8;
if (ShiftAmount == Log2_32(LoadBytes))
return false;
}
// We have no reason to disallow reducing the load width, so allow it.
return true;
}
// Truncations from 64-bit GPR to 32-bit GPR is free.
bool AArch64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
return false;
uint64_t NumBits1 = Ty1->getPrimitiveSizeInBits().getFixedSize();
uint64_t NumBits2 = Ty2->getPrimitiveSizeInBits().getFixedSize();
return NumBits1 > NumBits2;
}
bool AArch64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
return false;
uint64_t NumBits1 = VT1.getFixedSizeInBits();
uint64_t NumBits2 = VT2.getFixedSizeInBits();
return NumBits1 > NumBits2;
}
/// Check if it is profitable to hoist instruction in then/else to if.
/// Not profitable if I and it's user can form a FMA instruction
/// because we prefer FMSUB/FMADD.
bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const {
if (I->getOpcode() != Instruction::FMul)
return true;
if (!I->hasOneUse())
return true;
Instruction *User = I->user_back();
if (!(User->getOpcode() == Instruction::FSub ||
User->getOpcode() == Instruction::FAdd))
return true;
const TargetOptions &Options = getTargetMachine().Options;
const Function *F = I->getFunction();
const DataLayout &DL = F->getParent()->getDataLayout();
Type *Ty = User->getOperand(0)->getType();
return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
(Options.AllowFPOpFusion == FPOpFusion::Fast ||
Options.UnsafeFPMath));
}
// All 32-bit GPR operations implicitly zero the high-half of the corresponding
// 64-bit GPR.
bool AArch64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
return false;
unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
return NumBits1 == 32 && NumBits2 == 64;
}
bool AArch64TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
return false;
unsigned NumBits1 = VT1.getSizeInBits();
unsigned NumBits2 = VT2.getSizeInBits();
return NumBits1 == 32 && NumBits2 == 64;
}
bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
EVT VT1 = Val.getValueType();
if (isZExtFree(VT1, VT2)) {
return true;
}
if (Val.getOpcode() != ISD::LOAD)
return false;
// 8-, 16-, and 32-bit integer loads all implicitly zero-extend.
return (VT1.isSimple() && !VT1.isVector() && VT1.isInteger() &&
VT2.isSimple() && !VT2.isVector() && VT2.isInteger() &&
VT1.getSizeInBits() <= 32);
}
bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
if (isa<FPExtInst>(Ext))
return false;
// Vector types are not free.
if (Ext->getType()->isVectorTy())
return false;
for (const Use &U : Ext->uses()) {
// The extension is free if we can fold it with a left shift in an
// addressing mode or an arithmetic operation: add, sub, and cmp.
// Is there a shift?
const Instruction *Instr = cast<Instruction>(U.getUser());
// Is this a constant shift?
switch (Instr->getOpcode()) {
case Instruction::Shl:
if (!isa<ConstantInt>(Instr->getOperand(1)))
return false;
break;
case Instruction::GetElementPtr: {
gep_type_iterator GTI = gep_type_begin(Instr);
auto &DL = Ext->getModule()->getDataLayout();
std::advance(GTI, U.getOperandNo()-1);
Type *IdxTy = GTI.getIndexedType();
// This extension will end up with a shift because of the scaling factor.
// 8-bit sized types have a scaling factor of 1, thus a shift amount of 0.
// Get the shift amount based on the scaling factor:
// log2(sizeof(IdxTy)) - log2(8).
uint64_t ShiftAmt =
countTrailingZeros(DL.getTypeStoreSizeInBits(IdxTy).getFixedSize()) - 3;
// Is the constant foldable in the shift of the addressing mode?
// I.e., shift amount is between 1 and 4 inclusive.
if (ShiftAmt == 0 || ShiftAmt > 4)
return false;
break;
}
case Instruction::Trunc:
// Check if this is a noop.
// trunc(sext ty1 to ty2) to ty1.
if (Instr->getType() == Ext->getOperand(0)->getType())
continue;
LLVM_FALLTHROUGH;
default:
return false;
}
// At this point we can use the bfm family, so this extension is free
// for that use.
}
return true;
}
/// Check if both Op1 and Op2 are shufflevector extracts of either the lower
/// or upper half of the vector elements.
static bool areExtractShuffleVectors(Value *Op1, Value *Op2) {
auto areTypesHalfed = [](Value *FullV, Value *HalfV) {
auto *FullTy = FullV->getType();
auto *HalfTy = HalfV->getType();
return FullTy->getPrimitiveSizeInBits().getFixedSize() ==
2 * HalfTy->getPrimitiveSizeInBits().getFixedSize();
};
auto extractHalf = [](Value *FullV, Value *HalfV) {
auto *FullVT = cast<FixedVectorType>(FullV->getType());
auto *HalfVT = cast<FixedVectorType>(HalfV->getType());
return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
};
ArrayRef<int> M1, M2;
Value *S1Op1, *S2Op1;
if (!match(Op1, m_Shuffle(m_Value(S1Op1), m_Undef(), m_Mask(M1))) ||
!match(Op2, m_Shuffle(m_Value(S2Op1), m_Undef(), m_Mask(M2))))
return false;
// Check that the operands are half as wide as the result and we extract
// half of the elements of the input vectors.
if (!areTypesHalfed(S1Op1, Op1) || !areTypesHalfed(S2Op1, Op2) ||
!extractHalf(S1Op1, Op1) || !extractHalf(S2Op1, Op2))
return false;
// Check the mask extracts either the lower or upper half of vector
// elements.
int M1Start = -1;
int M2Start = -1;
int NumElements = cast<FixedVectorType>(Op1->getType())->getNumElements() * 2;
if (!ShuffleVectorInst::isExtractSubvectorMask(M1, NumElements, M1Start) ||
!ShuffleVectorInst::isExtractSubvectorMask(M2, NumElements, M2Start) ||
M1Start != M2Start || (M1Start != 0 && M2Start != (NumElements / 2)))
return false;
return true;
}
/// Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth
/// of the vector elements.
static bool areExtractExts(Value *Ext1, Value *Ext2) {
auto areExtDoubled = [](Instruction *Ext) {
return Ext->getType()->getScalarSizeInBits() ==
2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
};
if (!match(Ext1, m_ZExtOrSExt(m_Value())) ||
!match(Ext2, m_ZExtOrSExt(m_Value())) ||
!areExtDoubled(cast<Instruction>(Ext1)) ||
!areExtDoubled(cast<Instruction>(Ext2)))
return false;
return true;
}
/// Check if Op could be used with vmull_high_p64 intrinsic.
static bool isOperandOfVmullHighP64(Value *Op) {
Value *VectorOperand = nullptr;
ConstantInt *ElementIndex = nullptr;
return match(Op, m_ExtractElt(m_Value(VectorOperand),
m_ConstantInt(ElementIndex))) &&
ElementIndex->getValue() == 1 &&
isa<FixedVectorType>(VectorOperand->getType()) &&
cast<FixedVectorType>(VectorOperand->getType())->getNumElements() == 2;
}
/// Check if Op1 and Op2 could be used with vmull_high_p64 intrinsic.
static bool areOperandsOfVmullHighP64(Value *Op1, Value *Op2) {
return isOperandOfVmullHighP64(Op1) && isOperandOfVmullHighP64(Op2);
}
static bool isSplatShuffle(Value *V) {
if (auto *Shuf = dyn_cast<ShuffleVectorInst>(V))
return is_splat(Shuf->getShuffleMask());
return false;
}
/// Check if sinking \p I's operands to I's basic block is profitable, because
/// the operands can be folded into a target instruction, e.g.
/// shufflevectors extracts and/or sext/zext can be folded into (u,s)subl(2).
bool AArch64TargetLowering::shouldSinkOperands(
Instruction *I, SmallVectorImpl<Use *> &Ops) const {
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
case Intrinsic::aarch64_neon_smull:
case Intrinsic::aarch64_neon_umull:
if (areExtractShuffleVectors(II->getOperand(0), II->getOperand(1))) {
Ops.push_back(&II->getOperandUse(0));
Ops.push_back(&II->getOperandUse(1));
return true;
}
LLVM_FALLTHROUGH;
case Intrinsic::fma:
if (isa<VectorType>(I->getType()) &&
cast<VectorType>(I->getType())->getElementType()->isHalfTy() &&
!Subtarget->hasFullFP16())
return false;
LLVM_FALLTHROUGH;
case Intrinsic::aarch64_neon_sqdmull:
case Intrinsic::aarch64_neon_sqdmulh:
case Intrinsic::aarch64_neon_sqrdmulh:
// Sink splats for index lane variants
if (isSplatShuffle(II->getOperand(0)))
Ops.push_back(&II->getOperandUse(0));
if (isSplatShuffle(II->getOperand(1)))
Ops.push_back(&II->getOperandUse(1));
return !Ops.empty();
case Intrinsic::aarch64_sve_ptest_first:
case Intrinsic::aarch64_sve_ptest_last:
if (auto *IIOp = dyn_cast<IntrinsicInst>(II->getOperand(0)))
if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)
Ops.push_back(&II->getOperandUse(0));
return !Ops.empty();
case Intrinsic::aarch64_sme_write_horiz:
case Intrinsic::aarch64_sme_write_vert:
case Intrinsic::aarch64_sme_writeq_horiz:
case Intrinsic::aarch64_sme_writeq_vert: {
auto *Idx = dyn_cast<Instruction>(II->getOperand(1));
if (!Idx || Idx->getOpcode() != Instruction::Add)
return false;
Ops.push_back(&II->getOperandUse(1));
return true;
}
case Intrinsic::aarch64_sme_read_horiz:
case Intrinsic::aarch64_sme_read_vert:
case Intrinsic::aarch64_sme_readq_horiz:
case Intrinsic::aarch64_sme_readq_vert:
case Intrinsic::aarch64_sme_ld1b_vert:
case Intrinsic::aarch64_sme_ld1h_vert:
case Intrinsic::aarch64_sme_ld1w_vert:
case Intrinsic::aarch64_sme_ld1d_vert:
case Intrinsic::aarch64_sme_ld1q_vert:
case Intrinsic::aarch64_sme_st1b_vert:
case Intrinsic::aarch64_sme_st1h_vert:
case Intrinsic::aarch64_sme_st1w_vert:
case Intrinsic::aarch64_sme_st1d_vert:
case Intrinsic::aarch64_sme_st1q_vert:
case Intrinsic::aarch64_sme_ld1b_horiz:
case Intrinsic::aarch64_sme_ld1h_horiz:
case Intrinsic::aarch64_sme_ld1w_horiz:
case Intrinsic::aarch64_sme_ld1d_horiz:
case Intrinsic::aarch64_sme_ld1q_horiz:
case Intrinsic::aarch64_sme_st1b_horiz:
case Intrinsic::aarch64_sme_st1h_horiz:
case Intrinsic::aarch64_sme_st1w_horiz:
case Intrinsic::aarch64_sme_st1d_horiz:
case Intrinsic::aarch64_sme_st1q_horiz: {
auto *Idx = dyn_cast<Instruction>(II->getOperand(3));
if (!Idx || Idx->getOpcode() != Instruction::Add)
return false;
Ops.push_back(&II->getOperandUse(3));
return true;
}
case Intrinsic::aarch64_neon_pmull:
if (!areExtractShuffleVectors(II->getOperand(0), II->getOperand(1)))
return false;
Ops.push_back(&II->getOperandUse(0));
Ops.push_back(&II->getOperandUse(1));
return true;
case Intrinsic::aarch64_neon_pmull64:
if (!areOperandsOfVmullHighP64(II->getArgOperand(0),
II->getArgOperand(1)))
return false;
Ops.push_back(&II->getArgOperandUse(0));
Ops.push_back(&II->getArgOperandUse(1));
return true;
default:
return false;
}
}
if (!I->getType()->isVectorTy())
return false;
switch (I->getOpcode()) {
case Instruction::Sub:
case Instruction::Add: {
if (!areExtractExts(I->getOperand(0), I->getOperand(1)))
return false;
// If the exts' operands extract either the lower or upper elements, we
// can sink them too.
auto Ext1 = cast<Instruction>(I->getOperand(0));
auto Ext2 = cast<Instruction>(I->getOperand(1));
if (areExtractShuffleVectors(Ext1->getOperand(0), Ext2->getOperand(0))) {
Ops.push_back(&Ext1->getOperandUse(0));
Ops.push_back(&Ext2->getOperandUse(0));
}
Ops.push_back(&I->getOperandUse(0));
Ops.push_back(&I->getOperandUse(1));
return true;
}
case Instruction::Mul: {
bool IsProfitable = false;
for (auto &Op : I->operands()) {
// Make sure we are not already sinking this operand
if (any_of(Ops, [&](Use *U) { return U->get() == Op; }))
continue;
ShuffleVectorInst *Shuffle = dyn_cast<ShuffleVectorInst>(Op);
if (!Shuffle || !Shuffle->isZeroEltSplat())
continue;
Value *ShuffleOperand = Shuffle->getOperand(0);
InsertElementInst *Insert = dyn_cast<InsertElementInst>(ShuffleOperand);
if (!Insert)
continue;
Instruction *OperandInstr = dyn_cast<Instruction>(Insert->getOperand(1));
if (!OperandInstr)
continue;
ConstantInt *ElementConstant =
dyn_cast<ConstantInt>(Insert->getOperand(2));
// Check that the insertelement is inserting into element 0
if (!ElementConstant || ElementConstant->getZExtValue() != 0)
continue;
unsigned Opcode = OperandInstr->getOpcode();
if (Opcode != Instruction::SExt && Opcode != Instruction::ZExt)
continue;
Ops.push_back(&Shuffle->getOperandUse(0));
Ops.push_back(&Op);
IsProfitable = true;
}
return IsProfitable;
}
default:
return false;
}
return false;
}
bool AArch64TargetLowering::hasPairedLoad(EVT LoadedType,
Align &RequiredAligment) const {
if (!LoadedType.isSimple() ||
(!LoadedType.isInteger() && !LoadedType.isFloatingPoint()))
return false;
// Cyclone supports unaligned accesses.
RequiredAligment = Align(1);
unsigned NumBits = LoadedType.getSizeInBits();
return NumBits == 32 || NumBits == 64;
}
/// A helper function for determining the number of interleaved accesses we
/// will generate when lowering accesses of the given type.
unsigned AArch64TargetLowering::getNumInterleavedAccesses(
VectorType *VecTy, const DataLayout &DL, bool UseScalable) const {
unsigned VecSize = UseScalable ? Subtarget->getMinSVEVectorSizeInBits() : 128;
return std::max<unsigned>(1, (DL.getTypeSizeInBits(VecTy) + 127) / VecSize);
}
MachineMemOperand::Flags
AArch64TargetLowering::getTargetMMOFlags(const Instruction &I) const {
if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor &&
I.getMetadata(FALKOR_STRIDED_ACCESS_MD) != nullptr)
return MOStridedAccess;
return MachineMemOperand::MONone;
}
bool AArch64TargetLowering::isLegalInterleavedAccessType(
VectorType *VecTy, const DataLayout &DL, bool &UseScalable) const {
unsigned VecSize = DL.getTypeSizeInBits(VecTy);
unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
UseScalable = false;
// Ensure the number of vector elements is greater than 1.
if (NumElements < 2)
return false;
// Ensure the element type is legal.
if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
return false;
if (Subtarget->useSVEForFixedLengthVectors() &&
(VecSize % Subtarget->getMinSVEVectorSizeInBits() == 0 ||
(VecSize < Subtarget->getMinSVEVectorSizeInBits() &&
isPowerOf2_32(NumElements) && VecSize > 128))) {
UseScalable = true;
return true;
}
// Ensure the total vector size is 64 or a multiple of 128. Types larger than
// 128 will be split into multiple interleaved accesses.
return VecSize == 64 || VecSize % 128 == 0;
}
static ScalableVectorType *getSVEContainerIRType(FixedVectorType *VTy) {
if (VTy->getElementType() == Type::getDoubleTy(VTy->getContext()))
return ScalableVectorType::get(VTy->getElementType(), 2);
if (VTy->getElementType() == Type::getFloatTy(VTy->getContext()))
return ScalableVectorType::get(VTy->getElementType(), 4);
if (VTy->getElementType() == Type::getBFloatTy(VTy->getContext()))
return ScalableVectorType::get(VTy->getElementType(), 8);
if (VTy->getElementType() == Type::getHalfTy(VTy->getContext()))
return ScalableVectorType::get(VTy->getElementType(), 8);
if (VTy->getElementType() == Type::getInt64Ty(VTy->getContext()))
return ScalableVectorType::get(VTy->getElementType(), 2);
if (VTy->getElementType() == Type::getInt32Ty(VTy->getContext()))
return ScalableVectorType::get(VTy->getElementType(), 4);
if (VTy->getElementType() == Type::getInt16Ty(VTy->getContext()))
return ScalableVectorType::get(VTy->getElementType(), 8);
if (VTy->getElementType() == Type::getInt8Ty(VTy->getContext()))
return ScalableVectorType::get(VTy->getElementType(), 16);
llvm_unreachable("Cannot handle input vector type");
}
/// Lower an interleaved load into a ldN intrinsic.
///
/// E.g. Lower an interleaved load (Factor = 2):
/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
///
/// Into:
/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.aarch64.neon.ld2(%ptr)
/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
bool AArch64TargetLowering::lowerInterleavedLoad(
LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
ArrayRef<unsigned> Indices, unsigned Factor) const {
assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
"Invalid interleave factor");
assert(!Shuffles.empty() && "Empty shufflevector input");
assert(Shuffles.size() == Indices.size() &&
"Unmatched number of shufflevectors and indices");
const DataLayout &DL = LI->getModule()->getDataLayout();
VectorType *VTy = Shuffles[0]->getType();
// Skip if we do not have NEON and skip illegal vector types. We can
// "legalize" wide vector types into multiple interleaved accesses as long as
// the vector types are divisible by 128.
bool UseScalable;
if (!Subtarget->hasNEON() ||
!isLegalInterleavedAccessType(VTy, DL, UseScalable))
return false;
unsigned NumLoads = getNumInterleavedAccesses(VTy, DL, UseScalable);
auto *FVTy = cast<FixedVectorType>(VTy);
// A pointer vector can not be the return type of the ldN intrinsics. Need to
// load integer vectors first and then convert to pointer vectors.
Type *EltTy = FVTy->getElementType();
if (EltTy->isPointerTy())
FVTy =
FixedVectorType::get(DL.getIntPtrType(EltTy), FVTy->getNumElements());
// If we're going to generate more than one load, reset the sub-vector type
// to something legal.
FVTy = FixedVectorType::get(FVTy->getElementType(),
FVTy->getNumElements() / NumLoads);
auto *LDVTy =
UseScalable ? cast<VectorType>(getSVEContainerIRType(FVTy)) : FVTy;
IRBuilder<> Builder(LI);
// The base address of the load.
Value *BaseAddr = LI->getPointerOperand();
if (NumLoads > 1) {
// We will compute the pointer operand of each load from the original base
// address using GEPs. Cast the base address to a pointer to the scalar
// element type.
BaseAddr = Builder.CreateBitCast(
BaseAddr,
LDVTy->getElementType()->getPointerTo(LI->getPointerAddressSpace()));
}
Type *PtrTy =
UseScalable
? LDVTy->getElementType()->getPointerTo(LI->getPointerAddressSpace())
: LDVTy->getPointerTo(LI->getPointerAddressSpace());
Type *PredTy = VectorType::get(Type::getInt1Ty(LDVTy->getContext()),
LDVTy->getElementCount());
static const Intrinsic::ID SVELoadIntrs[3] = {
Intrinsic::aarch64_sve_ld2_sret, Intrinsic::aarch64_sve_ld3_sret,
Intrinsic::aarch64_sve_ld4_sret};
static const Intrinsic::ID NEONLoadIntrs[3] = {Intrinsic::aarch64_neon_ld2,
Intrinsic::aarch64_neon_ld3,
Intrinsic::aarch64_neon_ld4};
Function *LdNFunc;
if (UseScalable)
LdNFunc = Intrinsic::getDeclaration(LI->getModule(),
SVELoadIntrs[Factor - 2], {LDVTy});
else
LdNFunc = Intrinsic::getDeclaration(
LI->getModule(), NEONLoadIntrs[Factor - 2], {LDVTy, PtrTy});
// Holds sub-vectors extracted from the load intrinsic return values. The
// sub-vectors are associated with the shufflevector instructions they will
// replace.
DenseMap<ShuffleVectorInst *, SmallVector<Value *, 4>> SubVecs;
Value *PTrue = nullptr;
if (UseScalable) {
Optional<unsigned> PgPattern =
getSVEPredPatternFromNumElements(FVTy->getNumElements());
if (Subtarget->getMinSVEVectorSizeInBits() ==
Subtarget->getMaxSVEVectorSizeInBits() &&
Subtarget->getMinSVEVectorSizeInBits() == DL.getTypeSizeInBits(FVTy))
PgPattern = AArch64SVEPredPattern::all;
auto *PTruePat =
ConstantInt::get(Type::getInt32Ty(LDVTy->getContext()), *PgPattern);
PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
{PTruePat});
}
for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
// If we're generating more than one load, compute the base address of
// subsequent loads as an offset from the previous.
if (LoadCount > 0)
BaseAddr = Builder.CreateConstGEP1_32(LDVTy->getElementType(), BaseAddr,
FVTy->getNumElements() * Factor);
CallInst *LdN;
if (UseScalable)
LdN = Builder.CreateCall(
LdNFunc, {PTrue, Builder.CreateBitCast(BaseAddr, PtrTy)}, "ldN");
else
LdN = Builder.CreateCall(LdNFunc, Builder.CreateBitCast(BaseAddr, PtrTy),
"ldN");
// Extract and store the sub-vectors returned by the load intrinsic.
for (unsigned i = 0; i < Shuffles.size(); i++) {
ShuffleVectorInst *SVI = Shuffles[i];
unsigned Index = Indices[i];
Value *SubVec = Builder.CreateExtractValue(LdN, Index);
if (UseScalable)
SubVec = Builder.CreateExtractVector(
FVTy, SubVec,
ConstantInt::get(Type::getInt64Ty(VTy->getContext()), 0));
// Convert the integer vector to pointer vector if the element is pointer.
if (EltTy->isPointerTy())
SubVec = Builder.CreateIntToPtr(
SubVec, FixedVectorType::get(SVI->getType()->getElementType(),
FVTy->getNumElements()));
SubVecs[SVI].push_back(SubVec);
}
}
// Replace uses of the shufflevector instructions with the sub-vectors
// returned by the load intrinsic. If a shufflevector instruction is
// associated with more than one sub-vector, those sub-vectors will be
// concatenated into a single wide vector.
for (ShuffleVectorInst *SVI : Shuffles) {
auto &SubVec = SubVecs[SVI];
auto *WideVec =
SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
SVI->replaceAllUsesWith(WideVec);
}
return true;
}
/// Lower an interleaved store into a stN intrinsic.
///
/// E.g. Lower an interleaved store (Factor = 3):
/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
/// store <12 x i32> %i.vec, <12 x i32>* %ptr
///
/// Into:
/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
/// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
///
/// Note that the new shufflevectors will be removed and we'll only generate one
/// st3 instruction in CodeGen.
///
/// Example for a more general valid mask (Factor 3). Lower:
/// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
/// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
/// store <12 x i32> %i.vec, <12 x i32>* %ptr
///
/// Into:
/// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
/// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
/// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
/// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
ShuffleVectorInst *SVI,
unsigned Factor) const {
assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
"Invalid interleave factor");
auto *VecTy = cast<FixedVectorType>(SVI->getType());
assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
unsigned LaneLen = VecTy->getNumElements() / Factor;
Type *EltTy = VecTy->getElementType();
auto *SubVecTy = FixedVectorType::get(EltTy, LaneLen);
const DataLayout &DL = SI->getModule()->getDataLayout();
bool UseScalable;
// Skip if we do not have NEON and skip illegal vector types. We can
// "legalize" wide vector types into multiple interleaved accesses as long as
// the vector types are divisible by 128.
if (!Subtarget->hasNEON() ||
!isLegalInterleavedAccessType(SubVecTy, DL, UseScalable))
return false;
unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL, UseScalable);
Value *Op0 = SVI->getOperand(0);
Value *Op1 = SVI->getOperand(1);
IRBuilder<> Builder(SI);
// StN intrinsics don't support pointer vectors as arguments. Convert pointer
// vectors to integer vectors.
if (EltTy->isPointerTy()) {
Type *IntTy = DL.getIntPtrType(EltTy);
unsigned NumOpElts =
cast<FixedVectorType>(Op0->getType())->getNumElements();
// Convert to the corresponding integer vector.
auto *IntVecTy = FixedVectorType::get(IntTy, NumOpElts);
Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
SubVecTy = FixedVectorType::get(IntTy, LaneLen);
}
// If we're going to generate more than one store, reset the lane length
// and sub-vector type to something legal.
LaneLen /= NumStores;
SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
auto *STVTy = UseScalable ? cast<VectorType>(getSVEContainerIRType(SubVecTy))
: SubVecTy;
// The base address of the store.
Value *BaseAddr = SI->getPointerOperand();
if (NumStores > 1) {
// We will compute the pointer operand of each store from the original base
// address using GEPs. Cast the base address to a pointer to the scalar
// element type.
BaseAddr = Builder.CreateBitCast(
BaseAddr,
SubVecTy->getElementType()->getPointerTo(SI->getPointerAddressSpace()));
}
auto Mask = SVI->getShuffleMask();
Type *PtrTy =
UseScalable
? STVTy->getElementType()->getPointerTo(SI->getPointerAddressSpace())
: STVTy->getPointerTo(SI->getPointerAddressSpace());
Type *PredTy = VectorType::get(Type::getInt1Ty(STVTy->getContext()),
STVTy->getElementCount());
static const Intrinsic::ID SVEStoreIntrs[3] = {Intrinsic::aarch64_sve_st2,
Intrinsic::aarch64_sve_st3,
Intrinsic::aarch64_sve_st4};
static const Intrinsic::ID NEONStoreIntrs[3] = {Intrinsic::aarch64_neon_st2,
Intrinsic::aarch64_neon_st3,
Intrinsic::aarch64_neon_st4};
Function *StNFunc;
if (UseScalable)
StNFunc = Intrinsic::getDeclaration(SI->getModule(),
SVEStoreIntrs[Factor - 2], {STVTy});
else
StNFunc = Intrinsic::getDeclaration(
SI->getModule(), NEONStoreIntrs[Factor - 2], {STVTy, PtrTy});
Value *PTrue = nullptr;
if (UseScalable) {
Optional<unsigned> PgPattern =
getSVEPredPatternFromNumElements(SubVecTy->getNumElements());
if (Subtarget->getMinSVEVectorSizeInBits() ==
Subtarget->getMaxSVEVectorSizeInBits() &&
Subtarget->getMinSVEVectorSizeInBits() ==
DL.getTypeSizeInBits(SubVecTy))
PgPattern = AArch64SVEPredPattern::all;
auto *PTruePat =
ConstantInt::get(Type::getInt32Ty(STVTy->getContext()), *PgPattern);
PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
{PTruePat});
}
for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
SmallVector<Value *, 5> Ops;
// Split the shufflevector operands into sub vectors for the new stN call.
for (unsigned i = 0; i < Factor; i++) {
Value *Shuffle;
unsigned IdxI = StoreCount * LaneLen * Factor + i;
if (Mask[IdxI] >= 0) {
Shuffle = Builder.CreateShuffleVector(
Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0));
} else {
unsigned StartMask = 0;
for (unsigned j = 1; j < LaneLen; j++) {
unsigned IdxJ = StoreCount * LaneLen * Factor + j;
if (Mask[IdxJ * Factor + IdxI] >= 0) {
StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
break;
}
}
// Note: Filling undef gaps with random elements is ok, since
// those elements were being written anyway (with undefs).
// In the case of all undefs we're defaulting to using elems from 0
// Note: StartMask cannot be negative, it's checked in
// isReInterleaveMask
Shuffle = Builder.CreateShuffleVector(
Op0, Op1, createSequentialMask(StartMask, LaneLen, 0));
}
if (UseScalable)
Shuffle = Builder.CreateInsertVector(
STVTy, UndefValue::get(STVTy), Shuffle,
ConstantInt::get(Type::getInt64Ty(STVTy->getContext()), 0));
Ops.push_back(Shuffle);
}
if (UseScalable)
Ops.push_back(PTrue);
// If we generating more than one store, we compute the base address of
// subsequent stores as an offset from the previous.
if (StoreCount > 0)
BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
BaseAddr, LaneLen * Factor);
Ops.push_back(Builder.CreateBitCast(BaseAddr, PtrTy));
Builder.CreateCall(StNFunc, Ops);
}
return true;
}
// Lower an SVE structured load intrinsic returning a tuple type to target
// specific intrinsic taking the same input but returning a multi-result value
// of the split tuple type.
//
// E.g. Lowering an LD3:
//
// call <vscale x 12 x i32> @llvm.aarch64.sve.ld3.nxv12i32(
// <vscale x 4 x i1> %pred,
// <vscale x 4 x i32>* %addr)
//
// Output DAG:
//
// t0: ch = EntryToken
// t2: nxv4i1,ch = CopyFromReg t0, Register:nxv4i1 %0
// t4: i64,ch = CopyFromReg t0, Register:i64 %1
// t5: nxv4i32,nxv4i32,nxv4i32,ch = AArch64ISD::SVE_LD3 t0, t2, t4
// t6: nxv12i32 = concat_vectors t5, t5:1, t5:2
//
// This is called pre-legalization to avoid widening/splitting issues with
// non-power-of-2 tuple types used for LD3, such as nxv12i32.
SDValue AArch64TargetLowering::LowerSVEStructLoad(unsigned Intrinsic,
ArrayRef<SDValue> LoadOps,
EVT VT, SelectionDAG &DAG,
const SDLoc &DL) const {
assert(VT.isScalableVector() && "Can only lower scalable vectors");
unsigned N, Opcode;
static const std::pair<unsigned, std::pair<unsigned, unsigned>>
IntrinsicMap[] = {
{Intrinsic::aarch64_sve_ld2, {2, AArch64ISD::SVE_LD2_MERGE_ZERO}},
{Intrinsic::aarch64_sve_ld3, {3, AArch64ISD::SVE_LD3_MERGE_ZERO}},
{Intrinsic::aarch64_sve_ld4, {4, AArch64ISD::SVE_LD4_MERGE_ZERO}}};
std::tie(N, Opcode) = llvm::find_if(IntrinsicMap, [&](auto P) {
return P.first == Intrinsic;
})->second;
assert(VT.getVectorElementCount().getKnownMinValue() % N == 0 &&
"invalid tuple vector type!");
EVT SplitVT =
EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
VT.getVectorElementCount().divideCoefficientBy(N));
assert(isTypeLegal(SplitVT));
SmallVector<EVT, 5> VTs(N, SplitVT);
VTs.push_back(MVT::Other); // Chain
SDVTList NodeTys = DAG.getVTList(VTs);
SDValue PseudoLoad = DAG.getNode(Opcode, DL, NodeTys, LoadOps);
SmallVector<SDValue, 4> PseudoLoadOps;
for (unsigned I = 0; I < N; ++I)
PseudoLoadOps.push_back(SDValue(PseudoLoad.getNode(), I));
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, PseudoLoadOps);
}
EVT AArch64TargetLowering::getOptimalMemOpType(
const MemOp &Op, const AttributeList &FuncAttributes) const {
bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
// Only use AdvSIMD to implement memset of 32-byte and above. It would have
// taken one instruction to materialize the v2i64 zero and one store (with
// restrictive addressing mode). Just do i64 stores.
bool IsSmallMemset = Op.isMemset() && Op.size() < 32;
auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) {
if (Op.isAligned(AlignCheck))
return true;
bool Fast;
return allowsMisalignedMemoryAccesses(VT, 0, Align(1),
MachineMemOperand::MONone, &Fast) &&
Fast;
};
if (CanUseNEON && Op.isMemset() && !IsSmallMemset &&
AlignmentIsAcceptable(MVT::v16i8, Align(16)))
return MVT::v16i8;
if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, Align(16)))
return MVT::f128;
if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, Align(8)))
return MVT::i64;
if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, Align(4)))
return MVT::i32;
return MVT::Other;
}
LLT AArch64TargetLowering::getOptimalMemOpLLT(
const MemOp &Op, const AttributeList &FuncAttributes) const {
bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
// Only use AdvSIMD to implement memset of 32-byte and above. It would have
// taken one instruction to materialize the v2i64 zero and one store (with
// restrictive addressing mode). Just do i64 stores.
bool IsSmallMemset = Op.isMemset() && Op.size() < 32;
auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) {
if (Op.isAligned(AlignCheck))
return true;
bool Fast;
return allowsMisalignedMemoryAccesses(VT, 0, Align(1),
MachineMemOperand::MONone, &Fast) &&
Fast;
};
if (CanUseNEON && Op.isMemset() && !IsSmallMemset &&
AlignmentIsAcceptable(MVT::v2i64, Align(16)))
return LLT::fixed_vector(2, 64);
if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, Align(16)))
return LLT::scalar(128);
if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, Align(8)))
return LLT::scalar(64);
if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, Align(4)))
return LLT::scalar(32);
return LLT();
}
// 12-bit optionally shifted immediates are legal for adds.
bool AArch64TargetLowering::isLegalAddImmediate(int64_t Immed) const {
if (Immed == std::numeric_limits<int64_t>::min()) {
LLVM_DEBUG(dbgs() << "Illegal add imm " << Immed
<< ": avoid UB for INT64_MIN\n");
return false;
}
// Same encoding for add/sub, just flip the sign.
Immed = std::abs(Immed);
bool IsLegal = ((Immed >> 12) == 0 ||
((Immed & 0xfff) == 0 && Immed >> 24 == 0));
LLVM_DEBUG(dbgs() << "Is " << Immed
<< " legal add imm: " << (IsLegal ? "yes" : "no") << "\n");
return IsLegal;
}
// Return false to prevent folding
// (mul (add x, c1), c2) -> (add (mul x, c2), c2*c1) in DAGCombine,
// if the folding leads to worse code.
bool AArch64TargetLowering::isMulAddWithConstProfitable(
SDValue AddNode, SDValue ConstNode) const {
// Let the DAGCombiner decide for vector types and large types.
const EVT VT = AddNode.getValueType();
if (VT.isVector() || VT.getScalarSizeInBits() > 64)
return true;
// It is worse if c1 is legal add immediate, while c1*c2 is not
// and has to be composed by at least two instructions.
const ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
const ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
const int64_t C1 = C1Node->getSExtValue();
const APInt C1C2 = C1Node->getAPIntValue() * C2Node->getAPIntValue();
if (!isLegalAddImmediate(C1) || isLegalAddImmediate(C1C2.getSExtValue()))
return true;
SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
AArch64_IMM::expandMOVImm(C1C2.getZExtValue(), VT.getSizeInBits(), Insn);
if (Insn.size() > 1)
return false;
// Default to true and let the DAGCombiner decide.
return true;
}
// Integer comparisons are implemented with ADDS/SUBS, so the range of valid
// immediates is the same as for an add or a sub.
bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Immed) const {
return isLegalAddImmediate(Immed);
}
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS, Instruction *I) const {
// AArch64 has five basic addressing modes:
// reg
// reg + 9-bit signed offset
// reg + SIZE_IN_BYTES * 12-bit unsigned offset
// reg1 + reg2
// reg + SIZE_IN_BYTES * reg
// No global is ever allowed as a base.
if (AM.BaseGV)
return false;
// No reg+reg+imm addressing.
if (AM.HasBaseReg && AM.BaseOffs && AM.Scale)
return false;
// FIXME: Update this method to support scalable addressing modes.
if (isa<ScalableVectorType>(Ty)) {
uint64_t VecElemNumBytes =
DL.getTypeSizeInBits(cast<VectorType>(Ty)->getElementType()) / 8;
return AM.HasBaseReg && !AM.BaseOffs &&
(AM.Scale == 0 || (uint64_t)AM.Scale == VecElemNumBytes);
}
// check reg + imm case:
// i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12
uint64_t NumBytes = 0;
if (Ty->isSized()) {
uint64_t NumBits = DL.getTypeSizeInBits(Ty);
NumBytes = NumBits / 8;
if (!isPowerOf2_64(NumBits))
NumBytes = 0;
}
if (!AM.Scale) {
int64_t Offset = AM.BaseOffs;
// 9-bit signed offset
if (isInt<9>(Offset))
return true;
// 12-bit unsigned offset
unsigned shift = Log2_64(NumBytes);
if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
// Must be a multiple of NumBytes (NumBytes is a power of 2)
(Offset >> shift) << shift == Offset)
return true;
return false;
}
// Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
return AM.Scale == 1 || (AM.Scale > 0 && (uint64_t)AM.Scale == NumBytes);
}
bool AArch64TargetLowering::shouldConsiderGEPOffsetSplit() const {
// Consider splitting large offset of struct or array.
return true;
}
InstructionCost AArch64TargetLowering::getScalingFactorCost(
const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const {
// Scaling factors are not free at all.
// Operands | Rt Latency
// -------------------------------------------
// Rt, [Xn, Xm] | 4
// -------------------------------------------
// Rt, [Xn, Xm, lsl #imm] | Rn: 4 Rm: 5
// Rt, [Xn, Wm, <extend> #imm] |
if (isLegalAddressingMode(DL, AM, Ty, AS))
// Scale represents reg2 * scale, thus account for 1 if
// it is not equal to 0 or 1.
return AM.Scale != 0 && AM.Scale != 1;
return -1;
}
bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(
const MachineFunction &MF, EVT VT) const {
VT = VT.getScalarType();
if (!VT.isSimple())
return false;
switch (VT.getSimpleVT().SimpleTy) {
case MVT::f16:
return Subtarget->hasFullFP16();
case MVT::f32:
case MVT::f64:
return true;
default:
break;
}
return false;
}
bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
Type *Ty) const {
switch (Ty->getScalarType()->getTypeID()) {
case Type::FloatTyID:
case Type::DoubleTyID:
return true;
default:
return false;
}
}
bool AArch64TargetLowering::generateFMAsInMachineCombiner(
EVT VT, CodeGenOpt::Level OptLevel) const {
return (OptLevel >= CodeGenOpt::Aggressive) && !VT.isScalableVector() &&
!useSVEForFixedLengthVectorVT(VT);
}
const MCPhysReg *
AArch64TargetLowering::getScratchRegisters(CallingConv::ID) const {
// LR is a callee-save register, but we must treat it as clobbered by any call
// site. Hence we include LR in the scratch registers, which are in turn added
// as implicit-defs for stackmaps and patchpoints.
static const MCPhysReg ScratchRegs[] = {
AArch64::X16, AArch64::X17, AArch64::LR, 0
};
return ScratchRegs;
}
bool
AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
CombineLevel Level) const {
assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
N->getOpcode() == ISD::SRL) &&
"Expected shift op");
SDValue ShiftLHS = N->getOperand(0);
EVT VT = N->getValueType(0);
// If ShiftLHS is unsigned bit extraction: ((x >> C) & mask), then do not combine
// it with shift 'N' to let it be lowered to UBFX.
if (ShiftLHS.getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) &&
isa<ConstantSDNode>(ShiftLHS.getOperand(1))) {
uint64_t TruncMask = ShiftLHS.getConstantOperandVal(1);
if (isMask_64(TruncMask) &&
ShiftLHS.getOperand(0).getOpcode() == ISD::SRL &&
isa<ConstantSDNode>(ShiftLHS.getOperand(0).getOperand(1)))
return false;
}
return true;
}
bool AArch64TargetLowering::isDesirableToCommuteXorWithShift(
const SDNode *N) const {
assert(N->getOpcode() == ISD::XOR &&
(N->getOperand(0).getOpcode() == ISD::SHL ||
N->getOperand(0).getOpcode() == ISD::SRL) &&
"Expected XOR(SHIFT) pattern");
// Only commute if the entire NOT mask is a hidden shifted mask.
auto *XorC = dyn_cast<ConstantSDNode>(N->getOperand(1));
auto *ShiftC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
if (XorC && ShiftC) {
unsigned MaskIdx, MaskLen;
if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
unsigned ShiftAmt = ShiftC->getZExtValue();
unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
if (N->getOperand(0).getOpcode() == ISD::SHL)
return MaskIdx == ShiftAmt && MaskLen == (BitWidth - ShiftAmt);
return MaskIdx == 0 && MaskLen == (BitWidth - ShiftAmt);
}
}
return false;
}
bool AArch64TargetLowering::shouldFoldConstantShiftPairToMask(
const SDNode *N, CombineLevel Level) const {
assert(((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) ||
(N->getOpcode() == ISD::SRL &&
N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask");
// Don't allow multiuse shift folding with the same shift amount.
if (!N->getOperand(0)->hasOneUse())
return false;
// Only fold srl(shl(x,c1),c2) iff C1 >= C2 to prevent loss of UBFX patterns.
EVT VT = N->getValueType(0);
if (N->getOpcode() == ISD::SRL && (VT == MVT::i32 || VT == MVT::i64)) {
auto *C1 = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
return (!C1 || !C2 || C1->getZExtValue() >= C2->getZExtValue());
}
return true;
}
bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
if (BitSize == 0)
return false;
int64_t Val = Imm.getSExtValue();
if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, BitSize))
return true;
if ((int64_t)Val < 0)
Val = ~Val;
if (BitSize == 32)
Val &= (1LL << 32) - 1;
unsigned LZ = countLeadingZeros((uint64_t)Val);
unsigned Shift = (63 - LZ) / 16;
// MOVZ is free so return true for one or fewer MOVK.
return Shift < 3;
}
bool AArch64TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
unsigned Index) const {
if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
return false;
return (Index == 0 || Index == ResVT.getVectorMinNumElements());
}
/// Turn vector tests of the signbit in the form of:
/// xor (sra X, elt_size(X)-1), -1
/// into:
/// cmge X, X, #0
static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
const AArch64Subtarget *Subtarget) {
EVT VT = N->getValueType(0);
if (!Subtarget->hasNEON() || !VT.isVector())
return SDValue();
// There must be a shift right algebraic before the xor, and the xor must be a
// 'not' operation.
SDValue Shift = N->getOperand(0);
SDValue Ones = N->getOperand(1);
if (Shift.getOpcode() != AArch64ISD::VASHR || !Shift.hasOneUse() ||
!ISD::isBuildVectorAllOnes(Ones.getNode()))
return SDValue();
// The shift should be smearing the sign bit across each vector element.
auto *ShiftAmt = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
EVT ShiftEltTy = Shift.getValueType().getVectorElementType();
if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.getSizeInBits() - 1)
return SDValue();
return DAG.getNode(AArch64ISD::CMGEz, SDLoc(N), VT, Shift.getOperand(0));
}
// Given a vecreduce_add node, detect the below pattern and convert it to the
// node sequence with UABDL, [S|U]ADB and UADDLP.
//
// i32 vecreduce_add(
// v16i32 abs(
// v16i32 sub(
// v16i32 [sign|zero]_extend(v16i8 a), v16i32 [sign|zero]_extend(v16i8 b))))
// =================>
// i32 vecreduce_add(
// v4i32 UADDLP(
// v8i16 add(
// v8i16 zext(
// v8i8 [S|U]ABD low8:v16i8 a, low8:v16i8 b
// v8i16 zext(
// v8i8 [S|U]ABD high8:v16i8 a, high8:v16i8 b
static SDValue performVecReduceAddCombineWithUADDLP(SDNode *N,
SelectionDAG &DAG) {
// Assumed i32 vecreduce_add
if (N->getValueType(0) != MVT::i32)
return SDValue();
SDValue VecReduceOp0 = N->getOperand(0);
unsigned Opcode = VecReduceOp0.getOpcode();
// Assumed v16i32 abs
if (Opcode != ISD::ABS || VecReduceOp0->getValueType(0) != MVT::v16i32)
return SDValue();
SDValue ABS = VecReduceOp0;
// Assumed v16i32 sub
if (ABS->getOperand(0)->getOpcode() != ISD::SUB ||
ABS->getOperand(0)->getValueType(0) != MVT::v16i32)
return SDValue();
SDValue SUB = ABS->getOperand(0);
unsigned Opcode0 = SUB->getOperand(0).getOpcode();
unsigned Opcode1 = SUB->getOperand(1).getOpcode();
// Assumed v16i32 type
if (SUB->getOperand(0)->getValueType(0) != MVT::v16i32 ||
SUB->getOperand(1)->getValueType(0) != MVT::v16i32)
return SDValue();
// Assumed zext or sext
bool IsZExt = false;
if (Opcode0 == ISD::ZERO_EXTEND && Opcode1 == ISD::ZERO_EXTEND) {
IsZExt = true;
} else if (Opcode0 == ISD::SIGN_EXTEND && Opcode1 == ISD::SIGN_EXTEND) {
IsZExt = false;
} else
return SDValue();
SDValue EXT0 = SUB->getOperand(0);
SDValue EXT1 = SUB->getOperand(1);
// Assumed zext's operand has v16i8 type
if (EXT0->getOperand(0)->getValueType(0) != MVT::v16i8 ||
EXT1->getOperand(0)->getValueType(0) != MVT::v16i8)
return SDValue();
// Pattern is dectected. Let's convert it to sequence of nodes.
SDLoc DL(N);
// First, create the node pattern of UABD/SABD.
SDValue UABDHigh8Op0 =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT0->getOperand(0),
DAG.getConstant(8, DL, MVT::i64));
SDValue UABDHigh8Op1 =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT1->getOperand(0),
DAG.getConstant(8, DL, MVT::i64));
SDValue UABDHigh8 = DAG.getNode(IsZExt ? ISD::ABDU : ISD::ABDS, DL, MVT::v8i8,
UABDHigh8Op0, UABDHigh8Op1);
SDValue UABDL = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, UABDHigh8);
// Second, create the node pattern of UABAL.
SDValue UABDLo8Op0 =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT0->getOperand(0),
DAG.getConstant(0, DL, MVT::i64));
SDValue UABDLo8Op1 =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT1->getOperand(0),
DAG.getConstant(0, DL, MVT::i64));
SDValue UABDLo8 = DAG.getNode(IsZExt ? ISD::ABDU : ISD::ABDS, DL, MVT::v8i8,
UABDLo8Op0, UABDLo8Op1);
SDValue ZExtUABD = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, UABDLo8);
SDValue UABAL = DAG.getNode(ISD::ADD, DL, MVT::v8i16, UABDL, ZExtUABD);
// Third, create the node of UADDLP.
SDValue UADDLP = DAG.getNode(AArch64ISD::UADDLP, DL, MVT::v4i32, UABAL);
// Fourth, create the node of VECREDUCE_ADD.
return DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i32, UADDLP);
}
// Turn a v8i8/v16i8 extended vecreduce into a udot/sdot and vecreduce
// vecreduce.add(ext(A)) to vecreduce.add(DOT(zero, A, one))
// vecreduce.add(mul(ext(A), ext(B))) to vecreduce.add(DOT(zero, A, B))
static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG,
const AArch64Subtarget *ST) {
if (!ST->hasDotProd())
return performVecReduceAddCombineWithUADDLP(N, DAG);
SDValue Op0 = N->getOperand(0);
if (N->getValueType(0) != MVT::i32 ||
Op0.getValueType().getVectorElementType() != MVT::i32)
return SDValue();
unsigned ExtOpcode = Op0.getOpcode();
SDValue A = Op0;
SDValue B;
if (ExtOpcode == ISD::MUL) {
A = Op0.getOperand(0);
B = Op0.getOperand(1);
if (A.getOpcode() != B.getOpcode() ||
A.getOperand(0).getValueType() != B.getOperand(0).getValueType())
return SDValue();
ExtOpcode = A.getOpcode();
}
if (ExtOpcode != ISD::ZERO_EXTEND && ExtOpcode != ISD::SIGN_EXTEND)
return SDValue();
EVT Op0VT = A.getOperand(0).getValueType();
if (Op0VT != MVT::v8i8 && Op0VT != MVT::v16i8)
return SDValue();
SDLoc DL(Op0);
// For non-mla reductions B can be set to 1. For MLA we take the operand of
// the extend B.
if (!B)
B = DAG.getConstant(1, DL, Op0VT);
else
B = B.getOperand(0);
SDValue Zeros =
DAG.getConstant(0, DL, Op0VT == MVT::v8i8 ? MVT::v2i32 : MVT::v4i32);
auto DotOpcode =
(ExtOpcode == ISD::ZERO_EXTEND) ? AArch64ISD::UDOT : AArch64ISD::SDOT;
SDValue Dot = DAG.getNode(DotOpcode, DL, Zeros.getValueType(), Zeros,
A.getOperand(0), B);
return DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot);
}
// Given an (integer) vecreduce, we know the order of the inputs does not
// matter. We can convert UADDV(add(zext(extract_lo(x)), zext(extract_hi(x))))
// into UADDV(UADDLP(x)). This can also happen through an extra add, where we
// transform UADDV(add(y, add(zext(extract_lo(x)), zext(extract_hi(x))))).
static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG) {
auto DetectAddExtract = [&](SDValue A) {
// Look for add(zext(extract_lo(x)), zext(extract_hi(x))), returning
// UADDLP(x) if found.
if (A.getOpcode() != ISD::ADD)
return SDValue();
EVT VT = A.getValueType();
SDValue Op0 = A.getOperand(0);
SDValue Op1 = A.getOperand(1);
if (Op0.getOpcode() != Op0.getOpcode() ||
(Op0.getOpcode() != ISD::ZERO_EXTEND &&
Op0.getOpcode() != ISD::SIGN_EXTEND))
return SDValue();
SDValue Ext0 = Op0.getOperand(0);
SDValue Ext1 = Op1.getOperand(0);
if (Ext0.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
Ext1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
Ext0.getOperand(0) != Ext1.getOperand(0))
return SDValue();
// Check that the type is twice the add types, and the extract are from
// upper/lower parts of the same source.
if (Ext0.getOperand(0).getValueType().getVectorNumElements() !=
VT.getVectorNumElements() * 2)
return SDValue();
if ((Ext0.getConstantOperandVal(1) != 0 &&
Ext1.getConstantOperandVal(1) != VT.getVectorNumElements()) &&
(Ext1.getConstantOperandVal(1) != 0 &&
Ext0.getConstantOperandVal(1) != VT.getVectorNumElements()))
return SDValue();
unsigned Opcode = Op0.getOpcode() == ISD::ZERO_EXTEND ? AArch64ISD::UADDLP
: AArch64ISD::SADDLP;
return DAG.getNode(Opcode, SDLoc(A), VT, Ext0.getOperand(0));
};
SDValue A = N->getOperand(0);
if (SDValue R = DetectAddExtract(A))
return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), R);
if (A.getOpcode() == ISD::ADD) {
if (SDValue R = DetectAddExtract(A.getOperand(0)))
return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
DAG.getNode(ISD::ADD, SDLoc(A), A.getValueType(), R,
A.getOperand(1)));
if (SDValue R = DetectAddExtract(A.getOperand(1)))
return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
DAG.getNode(ISD::ADD, SDLoc(A), A.getValueType(), R,
A.getOperand(0)));
}
return SDValue();
}
static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
return foldVectorXorShiftIntoCmp(N, DAG, Subtarget);
}
SDValue
AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) const {
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
if (isIntDivCheap(N->getValueType(0), Attr))
return SDValue(N,0); // Lower SDIV as SDIV
EVT VT = N->getValueType(0);
// For scalable and fixed types, mark them as cheap so we can handle it much
// later. This allows us to handle larger than legal types.
if (VT.isScalableVector() || Subtarget->useSVEForFixedLengthVectors())
return SDValue(N, 0);
// fold (sdiv X, pow2)
if ((VT != MVT::i32 && VT != MVT::i64) ||
!(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
return SDValue();
SDLoc DL(N);
SDValue N0 = N->getOperand(0);
unsigned Lg2 = Divisor.countTrailingZeros();
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
// Add (N0 < 0) ? Pow2 - 1 : 0;
SDValue CCVal;
SDValue Cmp = getAArch64Cmp(N0, Zero, ISD::SETLT, CCVal, DAG, DL);
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
SDValue CSel = DAG.getNode(AArch64ISD::CSEL, DL, VT, Add, N0, CCVal, Cmp);
Created.push_back(Cmp.getNode());
Created.push_back(Add.getNode());
Created.push_back(CSel.getNode());
// Divide by pow2.
SDValue SRA =
DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, DL, MVT::i64));
// If we're dividing by a positive value, we're done. Otherwise, we must
// negate the result.
if (Divisor.isNonNegative())
return SRA;
Created.push_back(SRA.getNode());
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
}
SDValue
AArch64TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) const {
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
if (isIntDivCheap(N->getValueType(0), Attr))
return SDValue(N, 0); // Lower SREM as SREM
EVT VT = N->getValueType(0);
// For scalable and fixed types, mark them as cheap so we can handle it much
// later. This allows us to handle larger than legal types.
if (VT.isScalableVector() || Subtarget->useSVEForFixedLengthVectors())
return SDValue(N, 0);
// fold (srem X, pow2)
if ((VT != MVT::i32 && VT != MVT::i64) ||
!(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
return SDValue();
unsigned Lg2 = Divisor.countTrailingZeros();
if (Lg2 == 0)
return SDValue();
SDLoc DL(N);
SDValue N0 = N->getOperand(0);
SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue CCVal, CSNeg;
if (Lg2 == 1) {
SDValue Cmp = getAArch64Cmp(N0, Zero, ISD::SETGE, CCVal, DAG, DL);
SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, Pow2MinusOne);
CSNeg = DAG.getNode(AArch64ISD::CSNEG, DL, VT, And, And, CCVal, Cmp);
Created.push_back(Cmp.getNode());
Created.push_back(And.getNode());
} else {
SDValue CCVal = DAG.getConstant(AArch64CC::MI, DL, MVT_CC);
SDVTList VTs = DAG.getVTList(VT, MVT::i32);
SDValue Negs = DAG.getNode(AArch64ISD::SUBS, DL, VTs, Zero, N0);
SDValue AndPos = DAG.getNode(ISD::AND, DL, VT, N0, Pow2MinusOne);
SDValue AndNeg = DAG.getNode(ISD::AND, DL, VT, Negs, Pow2MinusOne);
CSNeg = DAG.getNode(AArch64ISD::CSNEG, DL, VT, AndPos, AndNeg, CCVal,
Negs.getValue(1));
Created.push_back(Negs.getNode());
Created.push_back(AndPos.getNode());
Created.push_back(AndNeg.getNode());
}
return CSNeg;
}
static bool IsSVECntIntrinsic(SDValue S) {
switch(getIntrinsicID(S.getNode())) {
default:
break;
case Intrinsic::aarch64_sve_cntb:
case Intrinsic::aarch64_sve_cnth:
case Intrinsic::aarch64_sve_cntw:
case Intrinsic::aarch64_sve_cntd:
return true;
}
return false;
}
/// Calculates what the pre-extend type is, based on the extension
/// operation node provided by \p Extend.
///
/// In the case that \p Extend is a SIGN_EXTEND or a ZERO_EXTEND, the
/// pre-extend type is pulled directly from the operand, while other extend
/// operations need a bit more inspection to get this information.
///
/// \param Extend The SDNode from the DAG that represents the extend operation
///
/// \returns The type representing the \p Extend source type, or \p MVT::Other
/// if no valid type can be determined
static EVT calculatePreExtendType(SDValue Extend) {
switch (Extend.getOpcode()) {
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
return Extend.getOperand(0).getValueType();
case ISD::AssertSext:
case ISD::AssertZext:
case ISD::SIGN_EXTEND_INREG: {
VTSDNode *TypeNode = dyn_cast<VTSDNode>(Extend.getOperand(1));
if (!TypeNode)
return MVT::Other;
return TypeNode->getVT();
}
case ISD::AND: {
ConstantSDNode *Constant =
dyn_cast<ConstantSDNode>(Extend.getOperand(1).getNode());
if (!Constant)
return MVT::Other;
uint32_t Mask = Constant->getZExtValue();
if (Mask == UCHAR_MAX)
return MVT::i8;
else if (Mask == USHRT_MAX)
return MVT::i16;
else if (Mask == UINT_MAX)
return MVT::i32;
return MVT::Other;
}
default:
return MVT::Other;
}
}
/// Combines a buildvector(sext/zext) or shuffle(sext/zext, undef) node pattern
/// into sext/zext(buildvector) or sext/zext(shuffle) making use of the vector
/// SExt/ZExt rather than the scalar SExt/ZExt
static SDValue performBuildShuffleExtendCombine(SDValue BV, SelectionDAG &DAG) {
EVT VT = BV.getValueType();
if (BV.getOpcode() != ISD::BUILD_VECTOR &&
BV.getOpcode() != ISD::VECTOR_SHUFFLE)
return SDValue();
// Use the first item in the buildvector/shuffle to get the size of the
// extend, and make sure it looks valid.
SDValue Extend = BV->getOperand(0);
unsigned ExtendOpcode = Extend.getOpcode();
bool IsSExt = ExtendOpcode == ISD::SIGN_EXTEND ||
ExtendOpcode == ISD::SIGN_EXTEND_INREG ||
ExtendOpcode == ISD::AssertSext;
if (!IsSExt && ExtendOpcode != ISD::ZERO_EXTEND &&
ExtendOpcode != ISD::AssertZext && ExtendOpcode != ISD::AND)
return SDValue();
// Shuffle inputs are vector, limit to SIGN_EXTEND and ZERO_EXTEND to ensure
// calculatePreExtendType will work without issue.
if (BV.getOpcode() == ISD::VECTOR_SHUFFLE &&
ExtendOpcode != ISD::SIGN_EXTEND && ExtendOpcode != ISD::ZERO_EXTEND)
return SDValue();
// Restrict valid pre-extend data type
EVT PreExtendType = calculatePreExtendType(Extend);
if (PreExtendType == MVT::Other ||
PreExtendType.getScalarSizeInBits() != VT.getScalarSizeInBits() / 2)
return SDValue();
// Make sure all other operands are equally extended
for (SDValue Op : drop_begin(BV->ops())) {
if (Op.isUndef())
continue;
unsigned Opc = Op.getOpcode();
bool OpcIsSExt = Opc == ISD::SIGN_EXTEND || Opc == ISD::SIGN_EXTEND_INREG ||
Opc == ISD::AssertSext;
if (OpcIsSExt != IsSExt || calculatePreExtendType(Op) != PreExtendType)
return SDValue();
}
SDValue NBV;
SDLoc DL(BV);
if (BV.getOpcode() == ISD::BUILD_VECTOR) {
EVT PreExtendVT = VT.changeVectorElementType(PreExtendType);
EVT PreExtendLegalType =
PreExtendType.getScalarSizeInBits() < 32 ? MVT::i32 : PreExtendType;
SmallVector<SDValue, 8> NewOps;
for (SDValue Op : BV->ops())
NewOps.push_back(Op.isUndef() ? DAG.getUNDEF(PreExtendLegalType)
: DAG.getAnyExtOrTrunc(Op.getOperand(0), DL,
PreExtendLegalType));
NBV = DAG.getNode(ISD::BUILD_VECTOR, DL, PreExtendVT, NewOps);
} else { // BV.getOpcode() == ISD::VECTOR_SHUFFLE
EVT PreExtendVT = VT.changeVectorElementType(PreExtendType.getScalarType());
NBV = DAG.getVectorShuffle(PreExtendVT, DL, BV.getOperand(0).getOperand(0),
BV.getOperand(1).isUndef()
? DAG.getUNDEF(PreExtendVT)
: BV.getOperand(1).getOperand(0),
cast<ShuffleVectorSDNode>(BV)->getMask());
}
return DAG.getNode(IsSExt ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, VT, NBV);
}
/// Combines a mul(dup(sext/zext)) node pattern into mul(sext/zext(dup))
/// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt
static SDValue performMulVectorExtendCombine(SDNode *Mul, SelectionDAG &DAG) {
// If the value type isn't a vector, none of the operands are going to be dups
EVT VT = Mul->getValueType(0);
if (VT != MVT::v8i16 && VT != MVT::v4i32 && VT != MVT::v2i64)
return SDValue();
SDValue Op0 = performBuildShuffleExtendCombine(Mul->getOperand(0), DAG);
SDValue Op1 = performBuildShuffleExtendCombine(Mul->getOperand(1), DAG);
// Neither operands have been changed, don't make any further changes
if (!Op0 && !Op1)
return SDValue();
SDLoc DL(Mul);
return DAG.getNode(Mul->getOpcode(), DL, VT, Op0 ? Op0 : Mul->getOperand(0),
Op1 ? Op1 : Mul->getOperand(1));
}
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
if (SDValue Ext = performMulVectorExtendCombine(N, DAG))
return Ext;
if (DCI.isBeforeLegalizeOps())
return SDValue();
// Canonicalize X*(Y+1) -> X*Y+X and (X+1)*Y -> X*Y+Y,
// and in MachineCombiner pass, add+mul will be combined into madd.
// Similarly, X*(1-Y) -> X - X*Y and (1-Y)*X -> X - Y*X.
SDLoc DL(N);
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue MulOper;
unsigned AddSubOpc;
auto IsAddSubWith1 = [&](SDValue V) -> bool {
AddSubOpc = V->getOpcode();
if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
SDValue Opnd = V->getOperand(1);
MulOper = V->getOperand(0);
if (AddSubOpc == ISD::SUB)
std::swap(Opnd, MulOper);
if (auto C = dyn_cast<ConstantSDNode>(Opnd))
return C->isOne();
}
return false;
};
if (IsAddSubWith1(N0)) {
SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
}
if (IsAddSubWith1(N1)) {
SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
}
// The below optimizations require a constant RHS.
if (!isa<ConstantSDNode>(N1))
return SDValue();
ConstantSDNode *C = cast<ConstantSDNode>(N1);
const APInt &ConstValue = C->getAPIntValue();
// Allow the scaling to be folded into the `cnt` instruction by preventing
// the scaling to be obscured here. This makes it easier to pattern match.
if (IsSVECntIntrinsic(N0) ||
(N0->getOpcode() == ISD::TRUNCATE &&
(IsSVECntIntrinsic(N0->getOperand(0)))))
if (ConstValue.sge(1) && ConstValue.sle(16))
return SDValue();
// Multiplication of a power of two plus/minus one can be done more
// cheaply as as shift+add/sub. For now, this is true unilaterally. If
// future CPUs have a cheaper MADD instruction, this may need to be
// gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
// 64-bit is 5 cycles, so this is always a win.
// More aggressively, some multiplications N0 * C can be lowered to
// shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M,
// e.g. 6=3*2=(2+1)*2.
// TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45
// which equals to (1+2)*16-(1+2).
// TrailingZeroes is used to test if the mul can be lowered to
// shift+add+shift.
unsigned TrailingZeroes = ConstValue.countTrailingZeros();
if (TrailingZeroes) {
// Conservatively do not lower to shift+add+shift if the mul might be
// folded into smul or umul.
if (N0->hasOneUse() && (isSignExtended(N0.getNode(), DAG) ||
isZeroExtended(N0.getNode(), DAG)))
return SDValue();
// Conservatively do not lower to shift+add+shift if the mul might be
// folded into madd or msub.
if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ADD ||
N->use_begin()->getOpcode() == ISD::SUB))
return SDValue();
}
// Use ShiftedConstValue instead of ConstValue to support both shift+add/sub
// and shift+add+shift.
APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes);
unsigned ShiftAmt;
// Is the shifted value the LHS operand of the add/sub?
bool ShiftValUseIsN0 = true;
// Do we need to negate the result?
bool NegateResult = false;
if (ConstValue.isNonNegative()) {
// (mul x, 2^N + 1) => (add (shl x, N), x)
// (mul x, 2^N - 1) => (sub (shl x, N), x)
// (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M)
APInt SCVMinus1 = ShiftedConstValue - 1;
APInt CVPlus1 = ConstValue + 1;
if (SCVMinus1.isPowerOf2()) {
ShiftAmt = SCVMinus1.logBase2();
AddSubOpc = ISD::ADD;
} else if (CVPlus1.isPowerOf2()) {
ShiftAmt = CVPlus1.logBase2();
AddSubOpc = ISD::SUB;
} else
return SDValue();
} else {
// (mul x, -(2^N - 1)) => (sub x, (shl x, N))
// (mul x, -(2^N + 1)) => - (add (shl x, N), x)
APInt CVNegPlus1 = -ConstValue + 1;
APInt CVNegMinus1 = -ConstValue - 1;
if (CVNegPlus1.isPowerOf2()) {
ShiftAmt = CVNegPlus1.logBase2();
AddSubOpc = ISD::SUB;
ShiftValUseIsN0 = false;
} else if (CVNegMinus1.isPowerOf2()) {
ShiftAmt = CVNegMinus1.logBase2();
AddSubOpc = ISD::ADD;
NegateResult = true;
} else
return SDValue();
}
SDValue ShiftedVal = DAG.getNode(ISD::SHL, DL, VT, N0,
DAG.getConstant(ShiftAmt, DL, MVT::i64));
SDValue AddSubN0 = ShiftValUseIsN0 ? ShiftedVal : N0;
SDValue AddSubN1 = ShiftValUseIsN0 ? N0 : ShiftedVal;
SDValue Res = DAG.getNode(AddSubOpc, DL, VT, AddSubN0, AddSubN1);
assert(!(NegateResult && TrailingZeroes) &&
"NegateResult and TrailingZeroes cannot both be true for now.");
// Negate the result.
if (NegateResult)
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
// Shift the result.
if (TrailingZeroes)
return DAG.getNode(ISD::SHL, DL, VT, Res,
DAG.getConstant(TrailingZeroes, DL, MVT::i64));
return Res;
}
static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
SelectionDAG &DAG) {
// Take advantage of vector comparisons producing 0 or -1 in each lane to
// optimize away operation when it's from a constant.
//
// The general transformation is:
// UNARYOP(AND(VECTOR_CMP(x,y), constant)) -->
// AND(VECTOR_CMP(x,y), constant2)
// constant2 = UNARYOP(constant)
// Early exit if this isn't a vector operation, the operand of the
// unary operation isn't a bitwise AND, or if the sizes of the operations
// aren't the same.
EVT VT = N->getValueType(0);
if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND ||
N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC ||
VT.getSizeInBits() != N->getOperand(0)->getValueType(0).getSizeInBits())
return SDValue();
// Now check that the other operand of the AND is a constant. We could
// make the transformation for non-constant splats as well, but it's unclear
// that would be a benefit as it would not eliminate any operations, just
// perform one more step in scalar code before moving to the vector unit.
if (BuildVectorSDNode *BV =
dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) {
// Bail out if the vector isn't a constant.
if (!BV->isConstant())
return SDValue();
// Everything checks out. Build up the new and improved node.
SDLoc DL(N);
EVT IntVT = BV->getValueType(0);
// Create a new constant of the appropriate type for the transformed
// DAG.
SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
// The AND node needs bitcasts to/from an integer vector type around it.
SDValue MaskConst = DAG.getNode(ISD::BITCAST, DL, IntVT, SourceConst);
SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT,
N->getOperand(0)->getOperand(0), MaskConst);
SDValue Res = DAG.getNode(ISD::BITCAST, DL, VT, NewAnd);
return Res;
}
return SDValue();
}
static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
const AArch64Subtarget *Subtarget) {
// First try to optimize away the conversion when it's conditionally from
// a constant. Vectors only.
if (SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG))
return Res;
EVT VT = N->getValueType(0);
if (VT != MVT::f32 && VT != MVT::f64)
return SDValue();
// Only optimize when the source and destination types have the same width.
if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
return SDValue();
// If the result of an integer load is only used by an integer-to-float
// conversion, use a fp load instead and a AdvSIMD scalar {S|U}CVTF instead.
// This eliminates an "integer-to-vector-move" UOP and improves throughput.
SDValue N0 = N->getOperand(0);
if (Subtarget->hasNEON() && ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
// Do not change the width of a volatile load.
!cast<LoadSDNode>(N0)->isVolatile()) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
LN0->getPointerInfo(), LN0->getAlign(),
LN0->getMemOperand()->getFlags());
// Make sure successors of the original load stay after it by updating them
// to use the new Chain.
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));
unsigned Opcode =
(N->getOpcode() == ISD::SINT_TO_FP) ? AArch64ISD::SITOF : AArch64ISD::UITOF;
return DAG.getNode(Opcode, SDLoc(N), VT, Load);
}
return SDValue();
}
/// Fold a floating-point multiply by power of two into floating-point to
/// fixed-point conversion.
static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
if (!Subtarget->hasNEON())
return SDValue();
if (!N->getValueType(0).isSimple())
return SDValue();
SDValue Op = N->getOperand(0);
if (!Op.getValueType().isSimple() || Op.getOpcode() != ISD::FMUL)
return SDValue();
if (!Op.getValueType().is64BitVector() && !Op.getValueType().is128BitVector())
return SDValue();
SDValue ConstVec = Op->getOperand(1);
if (!isa<BuildVectorSDNode>(ConstVec))
return SDValue();
MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
uint32_t FloatBits = FloatTy.getSizeInBits();
if (FloatBits != 32 && FloatBits != 64 &&
(FloatBits != 16 || !Subtarget->hasFullFP16()))
return SDValue();
MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
uint32_t IntBits = IntTy.getSizeInBits();
if (IntBits != 16 && IntBits != 32 && IntBits != 64)
return SDValue();
// Avoid conversions where iN is larger than the float (e.g., float -> i64).
if (IntBits > FloatBits)
return SDValue();
BitVector UndefElements;
BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
int32_t Bits = IntBits == 64 ? 64 : 32;
int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, Bits + 1);
if (C == -1 || C == 0 || C > Bits)
return SDValue();
EVT ResTy = Op.getValueType().changeVectorElementTypeToInteger();
if (!DAG.getTargetLoweringInfo().isTypeLegal(ResTy))
return SDValue();
if (N->getOpcode() == ISD::FP_TO_SINT_SAT ||
N->getOpcode() == ISD::FP_TO_UINT_SAT) {
EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
if (SatVT.getScalarSizeInBits() != IntBits || IntBits != FloatBits)
return SDValue();
}
SDLoc DL(N);
bool IsSigned = (N->getOpcode() == ISD::FP_TO_SINT ||
N->getOpcode() == ISD::FP_TO_SINT_SAT);
unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs
: Intrinsic::aarch64_neon_vcvtfp2fxu;
SDValue FixConv =
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ResTy,
DAG.getConstant(IntrinsicOpcode, DL, MVT::i32),
Op->getOperand(0), DAG.getConstant(C, DL, MVT::i32));
// We can handle smaller integers by generating an extra trunc.
if (IntBits < FloatBits)
FixConv = DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), FixConv);
return FixConv;
}
/// Fold a floating-point divide by power of two into fixed-point to
/// floating-point conversion.
static SDValue performFDivCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
if (!Subtarget->hasNEON())
return SDValue();
SDValue Op = N->getOperand(0);
unsigned Opc = Op->getOpcode();
if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
!Op.getOperand(0).getValueType().isSimple() ||
(Opc != ISD::SINT_TO_FP && Opc != ISD::UINT_TO_FP))
return SDValue();
SDValue ConstVec = N->getOperand(1);
if (!isa<BuildVectorSDNode>(ConstVec))
return SDValue();
MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
int32_t IntBits = IntTy.getSizeInBits();
if (IntBits != 16 && IntBits != 32 && IntBits != 64)
return SDValue();
MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
int32_t FloatBits = FloatTy.getSizeInBits();
if (FloatBits != 32 && FloatBits != 64)
return SDValue();
// Avoid conversions where iN is larger than the float (e.g., i64 -> float).
if (IntBits > FloatBits)
return SDValue();
BitVector UndefElements;
BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, FloatBits + 1);
if (C == -1 || C == 0 || C > FloatBits)
return SDValue();
MVT ResTy;
unsigned NumLanes = Op.getValueType().getVectorNumElements();
switch (NumLanes) {
default:
return SDValue();
case 2:
ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
break;
case 4:
ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64;
break;
}
if (ResTy == MVT::v4i64 && DCI.isBeforeLegalizeOps())
return SDValue();
SDLoc DL(N);
SDValue ConvInput = Op.getOperand(0);
bool IsSigned = Opc == ISD::SINT_TO_FP;
if (IntBits < FloatBits)
ConvInput = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
ResTy, ConvInput);
unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfxs2fp
: Intrinsic::aarch64_neon_vcvtfxu2fp;
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), ConvInput,
DAG.getConstant(C, DL, MVT::i32));
}
/// An EXTR instruction is made up of two shifts, ORed together. This helper
/// searches for and classifies those shifts.
static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
bool &FromHi) {
if (N.getOpcode() == ISD::SHL)
FromHi = false;
else if (N.getOpcode() == ISD::SRL)
FromHi = true;
else
return false;
if (!isa<ConstantSDNode>(N.getOperand(1)))
return false;
ShiftAmount = N->getConstantOperandVal(1);
Src = N->getOperand(0);
return true;
}
/// EXTR instruction extracts a contiguous chunk of bits from two existing
/// registers viewed as a high/low pair. This function looks for the pattern:
/// <tt>(or (shl VAL1, \#N), (srl VAL2, \#RegWidth-N))</tt> and replaces it
/// with an EXTR. Can't quite be done in TableGen because the two immediates
/// aren't independent.
static SDValue tryCombineToEXTR(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
EVT VT = N->getValueType(0);
assert(N->getOpcode() == ISD::OR && "Unexpected root");
if (VT != MVT::i32 && VT != MVT::i64)
return SDValue();
SDValue LHS;
uint32_t ShiftLHS = 0;
bool LHSFromHi = false;
if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi))
return SDValue();
SDValue RHS;
uint32_t ShiftRHS = 0;
bool RHSFromHi = false;
if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi))
return SDValue();
// If they're both trying to come from the high part of the register, they're
// not really an EXTR.
if (LHSFromHi == RHSFromHi)
return SDValue();
if (ShiftLHS + ShiftRHS != VT.getSizeInBits())
return SDValue();
if (LHSFromHi) {
std::swap(LHS, RHS);
std::swap(ShiftLHS, ShiftRHS);
}
return DAG.getNode(AArch64ISD::EXTR, DL, VT, LHS, RHS,
DAG.getConstant(ShiftRHS, DL, MVT::i64));
}
static SDValue tryCombineToBSL(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
EVT VT = N->getValueType(0);
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
if (!VT.isVector())
return SDValue();
// The combining code currently only works for NEON vectors. In particular,
// it does not work for SVE when dealing with vectors wider than 128 bits.
if (!VT.is64BitVector() && !VT.is128BitVector())
return SDValue();
SDValue N0 = N->getOperand(0);
if (N0.getOpcode() != ISD::AND)
return SDValue();
SDValue N1 = N->getOperand(1);
if (N1.getOpcode() != ISD::AND)
return SDValue();
// InstCombine does (not (neg a)) => (add a -1).
// Try: (or (and (neg a) b) (and (add a -1) c)) => (bsl (neg a) b c)
// Loop over all combinations of AND operands.
for (int i = 1; i >= 0; --i) {
for (int j = 1; j >= 0; --j) {
SDValue O0 = N0->getOperand(i);
SDValue O1 = N1->getOperand(j);
SDValue Sub, Add, SubSibling, AddSibling;
// Find a SUB and an ADD operand, one from each AND.
if (O0.getOpcode() == ISD::SUB && O1.getOpcode() == ISD::ADD) {
Sub = O0;
Add = O1;
SubSibling = N0->getOperand(1 - i);
AddSibling = N1->getOperand(1 - j);
} else if (O0.getOpcode() == ISD::ADD && O1.getOpcode() == ISD::SUB) {
Add = O0;
Sub = O1;
AddSibling = N0->getOperand(1 - i);
SubSibling = N1->getOperand(1 - j);
} else
continue;
if (!ISD::isBuildVectorAllZeros(Sub.getOperand(0).getNode()))
continue;
// Constant ones is always righthand operand of the Add.
if (!ISD::isBuildVectorAllOnes(Add.getOperand(1).getNode()))
continue;
if (Sub.getOperand(1) != Add.getOperand(0))
continue;
return DAG.getNode(AArch64ISD::BSP, DL, VT, Sub, SubSibling, AddSibling);
}
}
// (or (and a b) (and (not a) c)) => (bsl a b c)
// We only have to look for constant vectors here since the general, variable
// case can be handled in TableGen.
unsigned Bits = VT.getScalarSizeInBits();
uint64_t BitMask = Bits == 64 ? -1ULL : ((1ULL << Bits) - 1);
for (int i = 1; i >= 0; --i)
for (int j = 1; j >= 0; --j) {
BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(i));
BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(j));
if (!BVN0 || !BVN1)
continue;
bool FoundMatch = true;
for (unsigned k = 0; k < VT.getVectorNumElements(); ++k) {
ConstantSDNode *CN0 = dyn_cast<ConstantSDNode>(BVN0->getOperand(k));
ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(BVN1->getOperand(k));
if (!CN0 || !CN1 ||
CN0->getZExtValue() != (BitMask & ~CN1->getZExtValue())) {
FoundMatch = false;
break;
}
}
if (FoundMatch)
return DAG.getNode(AArch64ISD::BSP, DL, VT, SDValue(BVN0, 0),
N0->getOperand(1 - i), N1->getOperand(1 - j));
}
return SDValue();
}
// Given a tree of and/or(csel(0, 1, cc0), csel(0, 1, cc1)), we may be able to
// convert to csel(ccmp(.., cc0)), depending on cc1:
// (AND (CSET cc0 cmp0) (CSET cc1 (CMP x1 y1)))
// =>
// (CSET cc1 (CCMP x1 y1 !cc1 cc0 cmp0))
//
// (OR (CSET cc0 cmp0) (CSET cc1 (CMP x1 y1)))
// =>
// (CSET cc1 (CCMP x1 y1 cc1 !cc0 cmp0))
static SDValue performANDORCSELCombine(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
SDValue CSel0 = N->getOperand(0);
SDValue CSel1 = N->getOperand(1);
if (CSel0.getOpcode() != AArch64ISD::CSEL ||
CSel1.getOpcode() != AArch64ISD::CSEL)
return SDValue();
if (!CSel0->hasOneUse() || !CSel1->hasOneUse())
return SDValue();
if (!isNullConstant(CSel0.getOperand(0)) ||
!isOneConstant(CSel0.getOperand(1)) ||
!isNullConstant(CSel1.getOperand(0)) ||
!isOneConstant(CSel1.getOperand(1)))
return SDValue();
SDValue Cmp0 = CSel0.getOperand(3);
SDValue Cmp1 = CSel1.getOperand(3);
AArch64CC::CondCode CC0 = (AArch64CC::CondCode)CSel0.getConstantOperandVal(2);
AArch64CC::CondCode CC1 = (AArch64CC::CondCode)CSel1.getConstantOperandVal(2);
if (!Cmp0->hasOneUse() || !Cmp1->hasOneUse())
return SDValue();
if (Cmp1.getOpcode() != AArch64ISD::SUBS &&
Cmp0.getOpcode() == AArch64ISD::SUBS) {
std::swap(Cmp0, Cmp1);
std::swap(CC0, CC1);
}
if (Cmp1.getOpcode() != AArch64ISD::SUBS)
return SDValue();
SDLoc DL(N);
SDValue CCmp;
if (N->getOpcode() == ISD::AND) {
AArch64CC::CondCode InvCC0 = AArch64CC::getInvertedCondCode(CC0);
SDValue Condition = DAG.getConstant(InvCC0, DL, MVT_CC);
unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CC1);
SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
CCmp = DAG.getNode(AArch64ISD::CCMP, DL, MVT_CC, Cmp1.getOperand(0),
Cmp1.getOperand(1), NZCVOp, Condition, Cmp0);
} else {
SDLoc DL(N);
AArch64CC::CondCode InvCC1 = AArch64CC::getInvertedCondCode(CC1);
SDValue Condition = DAG.getConstant(CC0, DL, MVT_CC);
unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvCC1);
SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
CCmp = DAG.getNode(AArch64ISD::CCMP, DL, MVT_CC, Cmp1.getOperand(0),
Cmp1.getOperand(1), NZCVOp, Condition, Cmp0);
}
return DAG.getNode(AArch64ISD::CSEL, DL, VT, CSel0.getOperand(0),
CSel0.getOperand(1), DAG.getConstant(CC1, DL, MVT::i32),
CCmp);
}
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
if (SDValue R = performANDORCSELCombine(N, DAG))
return R;
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
// Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N))
if (SDValue Res = tryCombineToEXTR(N, DCI))
return Res;
if (SDValue Res = tryCombineToBSL(N, DCI))
return Res;
return SDValue();
}
static bool isConstantSplatVectorMaskForType(SDNode *N, EVT MemVT) {
if (!MemVT.getVectorElementType().isSimple())
return false;
uint64_t MaskForTy = 0ull;
switch (MemVT.getVectorElementType().getSimpleVT().SimpleTy) {
case MVT::i8:
MaskForTy = 0xffull;
break;
case MVT::i16:
MaskForTy = 0xffffull;
break;
case MVT::i32:
MaskForTy = 0xffffffffull;
break;
default:
return false;
break;
}
if (N->getOpcode() == AArch64ISD::DUP || N->getOpcode() == ISD::SPLAT_VECTOR)
if (auto *Op0 = dyn_cast<ConstantSDNode>(N->getOperand(0)))
return Op0->getAPIntValue().getLimitedValue() == MaskForTy;
return false;
}
static SDValue performSVEAndCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
SelectionDAG &DAG = DCI.DAG;
SDValue Src = N->getOperand(0);
unsigned Opc = Src->getOpcode();
// Zero/any extend of an unsigned unpack
if (Opc == AArch64ISD::UUNPKHI || Opc == AArch64ISD::UUNPKLO) {
SDValue UnpkOp = Src->getOperand(0);
SDValue Dup = N->getOperand(1);
if (Dup.getOpcode() != ISD::SPLAT_VECTOR)
return SDValue();
SDLoc DL(N);
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Dup->getOperand(0));
if (!C)
return SDValue();
uint64_t ExtVal = C->getZExtValue();
// If the mask is fully covered by the unpack, we don't need to push
// a new AND onto the operand
EVT EltTy = UnpkOp->getValueType(0).getVectorElementType();
if ((ExtVal == 0xFF && EltTy == MVT::i8) ||
(ExtVal == 0xFFFF && EltTy == MVT::i16) ||
(ExtVal == 0xFFFFFFFF && EltTy == MVT::i32))
return Src;
// Truncate to prevent a DUP with an over wide constant
APInt Mask = C->getAPIntValue().trunc(EltTy.getSizeInBits());
// Otherwise, make sure we propagate the AND to the operand
// of the unpack
Dup = DAG.getNode(ISD::SPLAT_VECTOR, DL, UnpkOp->getValueType(0),
DAG.getConstant(Mask.zextOrTrunc(32), DL, MVT::i32));
SDValue And = DAG.getNode(ISD::AND, DL,
UnpkOp->getValueType(0), UnpkOp, Dup);
return DAG.getNode(Opc, DL, N->getValueType(0), And);
}
if (!EnableCombineMGatherIntrinsics)
return SDValue();
SDValue Mask = N->getOperand(1);
if (!Src.hasOneUse())
return SDValue();
EVT MemVT;
// SVE load instructions perform an implicit zero-extend, which makes them
// perfect candidates for combining.
switch (Opc) {
case AArch64ISD::LD1_MERGE_ZERO:
case AArch64ISD::LDNF1_MERGE_ZERO:
case AArch64ISD::LDFF1_MERGE_ZERO:
MemVT = cast<VTSDNode>(Src->getOperand(3))->getVT();
break;
case AArch64ISD::GLD1_MERGE_ZERO:
case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
case AArch64ISD::GLD1_IMM_MERGE_ZERO:
case AArch64ISD::GLDFF1_MERGE_ZERO:
case AArch64ISD::GLDFF1_SCALED_MERGE_ZERO:
case AArch64ISD::GLDFF1_SXTW_MERGE_ZERO:
case AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO:
case AArch64ISD::GLDFF1_UXTW_MERGE_ZERO:
case AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO:
case AArch64ISD::GLDFF1_IMM_MERGE_ZERO:
case AArch64ISD::GLDNT1_MERGE_ZERO:
MemVT = cast<VTSDNode>(Src->getOperand(4))->getVT();
break;
default:
return SDValue();
}
if (isConstantSplatVectorMaskForType(Mask.getNode(), MemVT))
return Src;
return SDValue();
}
static SDValue performANDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
EVT VT = N->getValueType(0);
if (SDValue R = performANDORCSELCombine(N, DAG))
return R;
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
if (VT.isScalableVector())
return performSVEAndCombine(N, DCI);
// The combining code below works only for NEON vectors. In particular, it
// does not work for SVE when dealing with vectors wider than 128 bits.
if (!VT.is64BitVector() && !VT.is128BitVector())
return SDValue();
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
if (!BVN)
return SDValue();
// AND does not accept an immediate, so check if we can use a BIC immediate
// instruction instead. We do this here instead of using a (and x, (mvni imm))
// pattern in isel, because some immediates may be lowered to the preferred
// (and x, (movi imm)) form, even though an mvni representation also exists.
APInt DefBits(VT.getSizeInBits(), 0);
APInt UndefBits(VT.getSizeInBits(), 0);
if (resolveBuildVector(BVN, DefBits, UndefBits)) {
SDValue NewOp;
DefBits = ~DefBits;
if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, SDValue(N, 0), DAG,
DefBits, &LHS)) ||
(NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, SDValue(N, 0), DAG,
DefBits, &LHS)))
return NewOp;
UndefBits = ~UndefBits;
if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, SDValue(N, 0), DAG,
UndefBits, &LHS)) ||
(NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, SDValue(N, 0), DAG,
UndefBits, &LHS)))
return NewOp;
}
return SDValue();
}
static bool hasPairwiseAdd(unsigned Opcode, EVT VT, bool FullFP16) {
switch (Opcode) {
case ISD::STRICT_FADD:
case ISD::FADD:
return (FullFP16 && VT == MVT::f16) || VT == MVT::f32 || VT == MVT::f64;
case ISD::ADD:
return VT == MVT::i64;
default:
return false;
}
}
static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op,
AArch64CC::CondCode Cond);
static bool isPredicateCCSettingOp(SDValue N) {
if ((N.getOpcode() == ISD::SETCC) ||
(N.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
(N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilege ||
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilegt ||
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehi ||
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehs ||
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilele ||
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelo ||
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilels ||
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt ||
// get_active_lane_mask is lowered to a whilelo instruction.
N.getConstantOperandVal(0) == Intrinsic::get_active_lane_mask)))
return true;
return false;
}
// Materialize : i1 = extract_vector_elt t37, Constant:i64<0>
// ... into: "ptrue p, all" + PTEST
static SDValue
performFirstTrueTestVectorCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT);
// Make sure PTEST can be legalised with illegal types.
if (!Subtarget->hasSVE() || DCI.isBeforeLegalize())
return SDValue();
SDValue N0 = N->getOperand(0);
EVT VT = N0.getValueType();
if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1 ||
!isNullConstant(N->getOperand(1)))
return SDValue();
// Restricted the DAG combine to only cases where we're extracting from a
// flag-setting operation.
if (!isPredicateCCSettingOp(N0))
return SDValue();
// Extracts of lane 0 for SVE can be expressed as PTEST(Op, FIRST) ? 1 : 0
SelectionDAG &DAG = DCI.DAG;
SDValue Pg = getPTrue(DAG, SDLoc(N), VT, AArch64SVEPredPattern::all);
return getPTest(DAG, N->getValueType(0), Pg, N0, AArch64CC::FIRST_ACTIVE);
}
// Materialize : Idx = (add (mul vscale, NumEls), -1)
// i1 = extract_vector_elt t37, Constant:i64<Idx>
// ... into: "ptrue p, all" + PTEST
static SDValue
performLastTrueTestVectorCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT);
// Make sure PTEST is legal types.
if (!Subtarget->hasSVE() || DCI.isBeforeLegalize())
return SDValue();
SDValue N0 = N->getOperand(0);
EVT OpVT = N0.getValueType();
if (!OpVT.isScalableVector() || OpVT.getVectorElementType() != MVT::i1)
return SDValue();
// Idx == (add (mul vscale, NumEls), -1)
SDValue Idx = N->getOperand(1);
if (Idx.getOpcode() != ISD::ADD || !isAllOnesConstant(Idx.getOperand(1)))
return SDValue();
SDValue VS = Idx.getOperand(0);
if (VS.getOpcode() != ISD::VSCALE)
return SDValue();
unsigned NumEls = OpVT.getVectorElementCount().getKnownMinValue();
if (VS.getConstantOperandVal(0) != NumEls)
return SDValue();
// Extracts of lane EC-1 for SVE can be expressed as PTEST(Op, LAST) ? 1 : 0
SelectionDAG &DAG = DCI.DAG;
SDValue Pg = getPTrue(DAG, SDLoc(N), OpVT, AArch64SVEPredPattern::all);
return getPTest(DAG, N->getValueType(0), Pg, N0, AArch64CC::LAST_ACTIVE);
}
static SDValue
performExtractVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT);
if (SDValue Res = performFirstTrueTestVectorCombine(N, DCI, Subtarget))
return Res;
if (SDValue Res = performLastTrueTestVectorCombine(N, DCI, Subtarget))
return Res;
SelectionDAG &DAG = DCI.DAG;
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
ConstantSDNode *ConstantN1 = dyn_cast<ConstantSDNode>(N1);
EVT VT = N->getValueType(0);
const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
bool IsStrict = N0->isStrictFPOpcode();
// extract(dup x) -> x
if (N0.getOpcode() == AArch64ISD::DUP)
return DAG.getZExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
// Rewrite for pairwise fadd pattern
// (f32 (extract_vector_elt
// (fadd (vXf32 Other)
// (vector_shuffle (vXf32 Other) undef <1,X,...> )) 0))
// ->
// (f32 (fadd (extract_vector_elt (vXf32 Other) 0)
// (extract_vector_elt (vXf32 Other) 1))
// For strict_fadd we need to make sure the old strict_fadd can be deleted, so
// we can only do this when it's used only by the extract_vector_elt.
if (ConstantN1 && ConstantN1->getZExtValue() == 0 &&
hasPairwiseAdd(N0->getOpcode(), VT, FullFP16) &&
(!IsStrict || N0.hasOneUse())) {
SDLoc DL(N0);
SDValue N00 = N0->getOperand(IsStrict ? 1 : 0);
SDValue N01 = N0->getOperand(IsStrict ? 2 : 1);
ShuffleVectorSDNode *Shuffle = dyn_cast<ShuffleVectorSDNode>(N01);
SDValue Other = N00;
// And handle the commutative case.
if (!Shuffle) {
Shuffle = dyn_cast<ShuffleVectorSDNode>(N00);
Other = N01;
}
if (Shuffle && Shuffle->getMaskElt(0) == 1 &&
Other == Shuffle->getOperand(0)) {
SDValue Extract1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other,
DAG.getConstant(0, DL, MVT::i64));
SDValue Extract2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other,
DAG.getConstant(1, DL, MVT::i64));
if (!IsStrict)
return DAG.getNode(N0->getOpcode(), DL, VT, Extract1, Extract2);
// For strict_fadd we need uses of the final extract_vector to be replaced
// with the strict_fadd, but we also need uses of the chain output of the
// original strict_fadd to use the chain output of the new strict_fadd as
// otherwise it may not be deleted.
SDValue Ret = DAG.getNode(N0->getOpcode(), DL,
{VT, MVT::Other},
{N0->getOperand(0), Extract1, Extract2});
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Ret);
DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Ret.getValue(1));
return SDValue(N, 0);
}
}
return SDValue();
}
static SDValue performConcatVectorsCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
SDLoc dl(N);
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
unsigned N0Opc = N0->getOpcode(), N1Opc = N1->getOpcode();
if (VT.isScalableVector())
return SDValue();
// Optimize concat_vectors of truncated vectors, where the intermediate
// type is illegal, to avoid said illegality, e.g.,
// (v4i16 (concat_vectors (v2i16 (truncate (v2i64))),
// (v2i16 (truncate (v2i64)))))
// ->
// (v4i16 (truncate (vector_shuffle (v4i32 (bitcast (v2i64))),
// (v4i32 (bitcast (v2i64))),
// <0, 2, 4, 6>)))
// This isn't really target-specific, but ISD::TRUNCATE legality isn't keyed
// on both input and result type, so we might generate worse code.
// On AArch64 we know it's fine for v2i64->v4i16 and v4i32->v8i8.
if (N->getNumOperands() == 2 && N0Opc == ISD::TRUNCATE &&
N1Opc == ISD::TRUNCATE) {
SDValue N00 = N0->getOperand(0);
SDValue N10 = N1->getOperand(0);
EVT N00VT = N00.getValueType();
if (N00VT == N10.getValueType() &&
(N00VT == MVT::v2i64 || N00VT == MVT::v4i32) &&
N00VT.getScalarSizeInBits() == 4 * VT.getScalarSizeInBits()) {
MVT MidVT = (N00VT == MVT::v2i64 ? MVT::v4i32 : MVT::v8i16);
SmallVector<int, 8> Mask(MidVT.getVectorNumElements());
for (size_t i = 0; i < Mask.size(); ++i)
Mask[i] = i * 2;
return DAG.getNode(ISD::TRUNCATE, dl, VT,
DAG.getVectorShuffle(
MidVT, dl,
DAG.getNode(ISD::BITCAST, dl, MidVT, N00),
DAG.getNode(ISD::BITCAST, dl, MidVT, N10), Mask));
}
}
if (N->getOperand(0).getValueType() == MVT::v4i8) {
// If we have a concat of v4i8 loads, convert them to a buildvector of f32
// loads to prevent having to go through the v4i8 load legalization that
// needs to extend each element into a larger type.
if (N->getNumOperands() % 2 == 0 && all_of(N->op_values(), [](SDValue V) {
if (V.getValueType() != MVT::v4i8)
return false;
if (V.isUndef())
return true;
LoadSDNode *LD = dyn_cast<LoadSDNode>(V);
return LD && V.hasOneUse() && LD->isSimple() && !LD->isIndexed() &&
LD->getExtensionType() == ISD::NON_EXTLOAD;
})) {
EVT NVT =
EVT::getVectorVT(*DAG.getContext(), MVT::f32, N->getNumOperands());
SmallVector<SDValue> Ops;
for (unsigned i = 0; i < N->getNumOperands(); i++) {
SDValue V = N->getOperand(i);
if (V.isUndef())
Ops.push_back(DAG.getUNDEF(MVT::f32));
else {
LoadSDNode *LD = cast<LoadSDNode>(V);
SDValue NewLoad =
DAG.getLoad(MVT::f32, dl, LD->getChain(), LD->getBasePtr(),
LD->getMemOperand());
DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLoad.getValue(1));
Ops.push_back(NewLoad);
}
}
return DAG.getBitcast(N->getValueType(0),
DAG.getBuildVector(NVT, dl, Ops));
}
}
// Wait 'til after everything is legalized to try this. That way we have
// legal vector types and such.
if (DCI.isBeforeLegalizeOps())
return SDValue();
// Optimise concat_vectors of two [us]avgceils or [us]avgfloors that use
// extracted subvectors from the same original vectors. Combine these into a
// single avg that operates on the two original vectors.
// avgceil is the target independant name for rhadd, avgfloor is a hadd.
// Example:
// (concat_vectors (v8i8 (avgceils (extract_subvector (v16i8 OpA, <0>),
// extract_subvector (v16i8 OpB, <0>))),
// (v8i8 (avgceils (extract_subvector (v16i8 OpA, <8>),
// extract_subvector (v16i8 OpB, <8>)))))
// ->
// (v16i8(avgceils(v16i8 OpA, v16i8 OpB)))
if (N->getNumOperands() == 2 && N0Opc == N1Opc &&
(N0Opc == ISD::AVGCEILU || N0Opc == ISD::AVGCEILS ||
N0Opc == ISD::AVGFLOORU || N0Opc == ISD::AVGFLOORS)) {
SDValue N00 = N0->getOperand(0);
SDValue N01 = N0->getOperand(1);
SDValue N10 = N1->getOperand(0);
SDValue N11 = N1->getOperand(1);
EVT N00VT = N00.getValueType();
EVT N10VT = N10.getValueType();
if (N00->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
N01->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
N10->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
N11->getOpcode() == ISD::EXTRACT_SUBVECTOR && N00VT == N10VT) {
SDValue N00Source = N00->getOperand(0);
SDValue N01Source = N01->getOperand(0);
SDValue N10Source = N10->getOperand(0);
SDValue N11Source = N11->getOperand(0);
if (N00Source == N10Source && N01Source == N11Source &&
N00Source.getValueType() == VT && N01Source.getValueType() == VT) {
assert(N0.getValueType() == N1.getValueType());
uint64_t N00Index = N00.getConstantOperandVal(1);
uint64_t N01Index = N01.getConstantOperandVal(1);
uint64_t N10Index = N10.getConstantOperandVal(1);
uint64_t N11Index = N11.getConstantOperandVal(1);
if (N00Index == N01Index && N10Index == N11Index && N00Index == 0 &&
N10Index == N00VT.getVectorNumElements())
return DAG.getNode(N0Opc, dl, VT, N00Source, N01Source);
}
}
}
// If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector
// splat. The indexed instructions are going to be expecting a DUPLANE64, so
// canonicalise to that.
if (N->getNumOperands() == 2 && N0 == N1 && VT.getVectorNumElements() == 2) {
assert(VT.getScalarSizeInBits() == 64);
return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT, WidenVector(N0, DAG),
DAG.getConstant(0, dl, MVT::i64));
}
// Canonicalise concat_vectors so that the right-hand vector has as few
// bit-casts as possible before its real operation. The primary matching
// destination for these operations will be the narrowing "2" instructions,
// which depend on the operation being performed on this right-hand vector.
// For example,
// (concat_vectors LHS, (v1i64 (bitconvert (v4i16 RHS))))
// becomes
// (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS))
if (N->getNumOperands() != 2 || N1Opc != ISD::BITCAST)
return SDValue();
SDValue RHS = N1->getOperand(0);
MVT RHSTy = RHS.getValueType().getSimpleVT();
// If the RHS is not a vector, this is not the pattern we're looking for.
if (!RHSTy.isVector())
return SDValue();
LLVM_DEBUG(
dbgs() << "aarch64-lower: concat_vectors bitcast simplification\n");
MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(),
RHSTy.getVectorNumElements() * 2);
return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy,
DAG.getNode(ISD::BITCAST, dl, RHSTy, N0),
RHS));
}
static SDValue
performExtractSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
EVT VT = N->getValueType(0);
if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1)
return SDValue();
SDValue V = N->getOperand(0);
// NOTE: This combine exists in DAGCombiner, but that version's legality check
// blocks this combine because the non-const case requires custom lowering.
//
// ty1 extract_vector(ty2 splat(const))) -> ty1 splat(const)
if (V.getOpcode() == ISD::SPLAT_VECTOR)
if (isa<ConstantSDNode>(V.getOperand(0)))
return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V.getOperand(0));
return SDValue();
}
static SDValue
performInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Vec = N->getOperand(0);
SDValue SubVec = N->getOperand(1);
uint64_t IdxVal = N->getConstantOperandVal(2);
EVT VecVT = Vec.getValueType();
EVT SubVT = SubVec.getValueType();
// Only do this for legal fixed vector types.
if (!VecVT.isFixedLengthVector() ||
!DAG.getTargetLoweringInfo().isTypeLegal(VecVT) ||
!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
return SDValue();
// Ignore widening patterns.
if (IdxVal == 0 && Vec.isUndef())
return SDValue();
// Subvector must be half the width and an "aligned" insertion.
unsigned NumSubElts = SubVT.getVectorNumElements();
if ((SubVT.getSizeInBits() * 2) != VecVT.getSizeInBits() ||
(IdxVal != 0 && IdxVal != NumSubElts))
return SDValue();
// Fold insert_subvector -> concat_vectors
// insert_subvector(Vec,Sub,lo) -> concat_vectors(Sub,extract(Vec,hi))
// insert_subvector(Vec,Sub,hi) -> concat_vectors(extract(Vec,lo),Sub)
SDValue Lo, Hi;
if (IdxVal == 0) {
Lo = SubVec;
Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
DAG.getVectorIdxConstant(NumSubElts, DL));
} else {
Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
DAG.getVectorIdxConstant(0, DL));
Hi = SubVec;
}
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo, Hi);
}
static SDValue tryCombineFixedPointConvert(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
// Wait until after everything is legalized to try this. That way we have
// legal vector types and such.
if (DCI.isBeforeLegalizeOps())
return SDValue();
// Transform a scalar conversion of a value from a lane extract into a
// lane extract of a vector conversion. E.g., from foo1 to foo2:
// double foo1(int64x2_t a) { return vcvtd_n_f64_s64(a[1], 9); }
// double foo2(int64x2_t a) { return vcvtq_n_f64_s64(a, 9)[1]; }
//
// The second form interacts better with instruction selection and the
// register allocator to avoid cross-class register copies that aren't
// coalescable due to a lane reference.
// Check the operand and see if it originates from a lane extract.
SDValue Op1 = N->getOperand(1);
if (Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
return SDValue();
// Yep, no additional predication needed. Perform the transform.
SDValue IID = N->getOperand(0);
SDValue Shift = N->getOperand(2);
SDValue Vec = Op1.getOperand(0);
SDValue Lane = Op1.getOperand(1);
EVT ResTy = N->getValueType(0);
EVT VecResTy;
SDLoc DL(N);
// The vector width should be 128 bits by the time we get here, even
// if it started as 64 bits (the extract_vector handling will have
// done so). Bail if it is not.
if (Vec.getValueSizeInBits() != 128)
return SDValue();
if (Vec.getValueType() == MVT::v4i32)
VecResTy = MVT::v4f32;
else if (Vec.getValueType() == MVT::v2i64)
VecResTy = MVT::v2f64;
else
return SDValue();
SDValue Convert =
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecResTy, IID, Vec, Shift);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResTy, Convert, Lane);
}
// AArch64 high-vector "long" operations are formed by performing the non-high
// version on an extract_subvector of each operand which gets the high half:
//
// (longop2 LHS, RHS) == (longop (extract_high LHS), (extract_high RHS))
//
// However, there are cases which don't have an extract_high explicitly, but
// have another operation that can be made compatible with one for free. For
// example:
//
// (dupv64 scalar) --> (extract_high (dup128 scalar))
//
// This routine does the actual conversion of such DUPs, once outer routines
// have determined that everything else is in order.
// It also supports immediate DUP-like nodes (MOVI/MVNi), which we can fold
// similarly here.
static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) {
MVT VT = N.getSimpleValueType();
if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
N.getConstantOperandVal(1) == 0)
N = N.getOperand(0);
switch (N.getOpcode()) {
case AArch64ISD::DUP:
case AArch64ISD::DUPLANE8:
case AArch64ISD::DUPLANE16:
case AArch64ISD::DUPLANE32:
case AArch64ISD::DUPLANE64:
case AArch64ISD::MOVI:
case AArch64ISD::MOVIshift:
case AArch64ISD::MOVIedit:
case AArch64ISD::MOVImsl:
case AArch64ISD::MVNIshift:
case AArch64ISD::MVNImsl:
break;
default:
// FMOV could be supported, but isn't very useful, as it would only occur
// if you passed a bitcast' floating point immediate to an eligible long
// integer op (addl, smull, ...).
return SDValue();
}
if (!VT.is64BitVector())
return SDValue();
SDLoc DL(N);
unsigned NumElems = VT.getVectorNumElements();
if (N.getValueType().is64BitVector()) {
MVT ElementTy = VT.getVectorElementType();
MVT NewVT = MVT::getVectorVT(ElementTy, NumElems * 2);
N = DAG.getNode(N->getOpcode(), DL, NewVT, N->ops());
}
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, N,
DAG.getConstant(NumElems, DL, MVT::i64));
}
static bool isEssentiallyExtractHighSubvector(SDValue N) {
if (N.getOpcode() == ISD::BITCAST)
N = N.getOperand(0);
if (N.getOpcode() != ISD::EXTRACT_SUBVECTOR)
return false;
if (N.getOperand(0).getValueType().isScalableVector())
return false;
return cast<ConstantSDNode>(N.getOperand(1))->getAPIntValue() ==
N.getOperand(0).getValueType().getVectorNumElements() / 2;
}
/// Helper structure to keep track of ISD::SET_CC operands.
struct GenericSetCCInfo {
const SDValue *Opnd0;
const SDValue *Opnd1;
ISD::CondCode CC;
};
/// Helper structure to keep track of a SET_CC lowered into AArch64 code.
struct AArch64SetCCInfo {
const SDValue *Cmp;
AArch64CC::CondCode CC;
};
/// Helper structure to keep track of SetCC information.
union SetCCInfo {
GenericSetCCInfo Generic;
AArch64SetCCInfo AArch64;
};
/// Helper structure to be able to read SetCC information. If set to
/// true, IsAArch64 field, Info is a AArch64SetCCInfo, otherwise Info is a
/// GenericSetCCInfo.
struct SetCCInfoAndKind {
SetCCInfo Info;
bool IsAArch64;
};
/// Check whether or not \p Op is a SET_CC operation, either a generic or
/// an
/// AArch64 lowered one.
/// \p SetCCInfo is filled accordingly.
/// \post SetCCInfo is meanginfull only when this function returns true.
/// \return True when Op is a kind of SET_CC operation.
static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo) {
// If this is a setcc, this is straight forward.
if (Op.getOpcode() == ISD::SETCC) {
SetCCInfo.Info.Generic.Opnd0 = &Op.getOperand(0);
SetCCInfo.Info.Generic.Opnd1 = &Op.getOperand(1);
SetCCInfo.Info.Generic.CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
SetCCInfo.IsAArch64 = false;
return true;
}
// Otherwise, check if this is a matching csel instruction.
// In other words:
// - csel 1, 0, cc
// - csel 0, 1, !cc
if (Op.getOpcode() != AArch64ISD::CSEL)
return false;
// Set the information about the operands.
// TODO: we want the operands of the Cmp not the csel
SetCCInfo.Info.AArch64.Cmp = &Op.getOperand(3);
SetCCInfo.IsAArch64 = true;
SetCCInfo.Info.AArch64.CC = static_cast<AArch64CC::CondCode>(
cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
// Check that the operands matches the constraints:
// (1) Both operands must be constants.
// (2) One must be 1 and the other must be 0.
ConstantSDNode *TValue = dyn_cast<ConstantSDNode>(Op.getOperand(0));
ConstantSDNode *FValue = dyn_cast<ConstantSDNode>(Op.getOperand(1));
// Check (1).
if (!TValue || !FValue)
return false;
// Check (2).
if (!TValue->isOne()) {
// Update the comparison when we are interested in !cc.
std::swap(TValue, FValue);
SetCCInfo.Info.AArch64.CC =
AArch64CC::getInvertedCondCode(SetCCInfo.Info.AArch64.CC);
}
return TValue->isOne() && FValue->isZero();
}
// Returns true if Op is setcc or zext of setcc.
static bool isSetCCOrZExtSetCC(const SDValue& Op, SetCCInfoAndKind &Info) {
if (isSetCC(Op, Info))
return true;
return ((Op.getOpcode() == ISD::ZERO_EXTEND) &&
isSetCC(Op->getOperand(0), Info));
}
// The folding we want to perform is:
// (add x, [zext] (setcc cc ...) )
// -->
// (csel x, (add x, 1), !cc ...)
//
// The latter will get matched to a CSINC instruction.
static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) {
assert(Op && Op->getOpcode() == ISD::ADD && "Unexpected operation!");
SDValue LHS = Op->getOperand(0);
SDValue RHS = Op->getOperand(1);
SetCCInfoAndKind InfoAndKind;
// If both operands are a SET_CC, then we don't want to perform this
// folding and create another csel as this results in more instructions
// (and higher register usage).
if (isSetCCOrZExtSetCC(LHS, InfoAndKind) &&
isSetCCOrZExtSetCC(RHS, InfoAndKind))
return SDValue();
// If neither operand is a SET_CC, give up.
if (!isSetCCOrZExtSetCC(LHS, InfoAndKind)) {
std::swap(LHS, RHS);
if (!isSetCCOrZExtSetCC(LHS, InfoAndKind))
return SDValue();
}
// FIXME: This could be generatized to work for FP comparisons.
EVT CmpVT = InfoAndKind.IsAArch64
? InfoAndKind.Info.AArch64.Cmp->getOperand(0).getValueType()
: InfoAndKind.Info.Generic.Opnd0->getValueType();
if (CmpVT != MVT::i32 && CmpVT != MVT::i64)
return SDValue();
SDValue CCVal;
SDValue Cmp;
SDLoc dl(Op);
if (InfoAndKind.IsAArch64) {
CCVal = DAG.getConstant(
AArch64CC::getInvertedCondCode(InfoAndKind.Info.AArch64.CC), dl,
MVT::i32);
Cmp = *InfoAndKind.Info.AArch64.Cmp;
} else
Cmp = getAArch64Cmp(
*InfoAndKind.Info.Generic.Opnd0, *InfoAndKind.Info.Generic.Opnd1,
ISD::getSetCCInverse(InfoAndKind.Info.Generic.CC, CmpVT), CCVal, DAG,
dl);
EVT VT = Op->getValueType(0);
LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, dl, VT));
return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp);
}
// ADD(UADDV a, UADDV b) --> UADDV(ADD a, b)
static SDValue performAddUADDVCombine(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
// Only scalar integer and vector types.
if (N->getOpcode() != ISD::ADD || !VT.isScalarInteger())
return SDValue();
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
if (LHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || LHS.getValueType() != VT)
return SDValue();
auto *LHSN1 = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
auto *RHSN1 = dyn_cast<ConstantSDNode>(RHS->getOperand(1));
if (!LHSN1 || LHSN1 != RHSN1 || !RHSN1->isZero())
return SDValue();
SDValue Op1 = LHS->getOperand(0);
SDValue Op2 = RHS->getOperand(0);
EVT OpVT1 = Op1.getValueType();
EVT OpVT2 = Op2.getValueType();
if (Op1.getOpcode() != AArch64ISD::UADDV || OpVT1 != OpVT2 ||
Op2.getOpcode() != AArch64ISD::UADDV ||
OpVT1.getVectorElementType() != VT)
return SDValue();
SDValue Val1 = Op1.getOperand(0);
SDValue Val2 = Op2.getOperand(0);
EVT ValVT = Val1->getValueType(0);
SDLoc DL(N);
SDValue AddVal = DAG.getNode(ISD::ADD, DL, ValVT, Val1, Val2);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
DAG.getNode(AArch64ISD::UADDV, DL, ValVT, AddVal),
DAG.getConstant(0, DL, MVT::i64));
}
/// Perform the scalar expression combine in the form of:
/// CSEL(c, 1, cc) + b => CSINC(b+c, b, cc)
/// CSNEG(c, -1, cc) + b => CSINC(b+c, b, cc)
static SDValue performAddCSelIntoCSinc(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
if (!VT.isScalarInteger() || N->getOpcode() != ISD::ADD)
return SDValue();
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
// Handle commutivity.
if (LHS.getOpcode() != AArch64ISD::CSEL &&
LHS.getOpcode() != AArch64ISD::CSNEG) {
std::swap(LHS, RHS);
if (LHS.getOpcode() != AArch64ISD::CSEL &&
LHS.getOpcode() != AArch64ISD::CSNEG) {
return SDValue();
}
}
if (!LHS.hasOneUse())
return SDValue();
AArch64CC::CondCode AArch64CC =
static_cast<AArch64CC::CondCode>(LHS.getConstantOperandVal(2));
// The CSEL should include a const one operand, and the CSNEG should include
// One or NegOne operand.
ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(LHS.getOperand(0));
ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
if (!CTVal || !CFVal)
return SDValue();
if (!(LHS.getOpcode() == AArch64ISD::CSEL &&
(CTVal->isOne() || CFVal->isOne())) &&
!(LHS.getOpcode() == AArch64ISD::CSNEG &&
(CTVal->isOne() || CFVal->isAllOnes())))
return SDValue();
// Switch CSEL(1, c, cc) to CSEL(c, 1, !cc)
if (LHS.getOpcode() == AArch64ISD::CSEL && CTVal->isOne() &&
!CFVal->isOne()) {
std::swap(CTVal, CFVal);
AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
}
SDLoc DL(N);
// Switch CSNEG(1, c, cc) to CSNEG(-c, -1, !cc)
if (LHS.getOpcode() == AArch64ISD::CSNEG && CTVal->isOne() &&
!CFVal->isAllOnes()) {
APInt C = -1 * CFVal->getAPIntValue();
CTVal = cast<ConstantSDNode>(DAG.getConstant(C, DL, VT));
CFVal = cast<ConstantSDNode>(DAG.getAllOnesConstant(DL, VT));
AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
}
// It might be neutral for larger constants, as the immediate need to be
// materialized in a register.
APInt ADDC = CTVal->getAPIntValue();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isLegalAddImmediate(ADDC.getSExtValue()))
return SDValue();
assert(((LHS.getOpcode() == AArch64ISD::CSEL && CFVal->isOne()) ||
(LHS.getOpcode() == AArch64ISD::CSNEG && CFVal->isAllOnes())) &&
"Unexpected constant value");
SDValue NewNode = DAG.getNode(ISD::ADD, DL, VT, RHS, SDValue(CTVal, 0));
SDValue CCVal = DAG.getConstant(AArch64CC, DL, MVT::i32);
SDValue Cmp = LHS.getOperand(3);
return DAG.getNode(AArch64ISD::CSINC, DL, VT, NewNode, RHS, CCVal, Cmp);
}
// ADD(UDOT(zero, x, y), A) --> UDOT(A, x, y)
static SDValue performAddDotCombine(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
if (N->getOpcode() != ISD::ADD)
return SDValue();
SDValue Dot = N->getOperand(0);
SDValue A = N->getOperand(1);
// Handle commutivity
auto isZeroDot = [](SDValue Dot) {
return (Dot.getOpcode() == AArch64ISD::UDOT ||
Dot.getOpcode() == AArch64ISD::SDOT) &&
isZerosVector(Dot.getOperand(0).getNode());
};
if (!isZeroDot(Dot))
std::swap(Dot, A);
if (!isZeroDot(Dot))
return SDValue();
return DAG.getNode(Dot.getOpcode(), SDLoc(N), VT, A, Dot.getOperand(1),
Dot.getOperand(2));
}
static bool isNegatedInteger(SDValue Op) {
return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0));
}
static SDValue getNegatedInteger(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
EVT VT = Op.getValueType();
SDValue Zero = DAG.getConstant(0, DL, VT);
return DAG.getNode(ISD::SUB, DL, VT, Zero, Op);
}
// Try to fold
//
// (neg (csel X, Y)) -> (csel (neg X), (neg Y))
//
// The folding helps csel to be matched with csneg without generating
// redundant neg instruction, which includes negation of the csel expansion
// of abs node lowered by lowerABS.
static SDValue performNegCSelCombine(SDNode *N, SelectionDAG &DAG) {
if (!isNegatedInteger(SDValue(N, 0)))
return SDValue();
SDValue CSel = N->getOperand(1);
if (CSel.getOpcode() != AArch64ISD::CSEL || !CSel->hasOneUse())
return SDValue();
SDValue N0 = CSel.getOperand(0);
SDValue N1 = CSel.getOperand(1);
// If both of them is not negations, it's not worth the folding as it
// introduces two additional negations while reducing one negation.
if (!isNegatedInteger(N0) && !isNegatedInteger(N1))
return SDValue();
SDValue N0N = getNegatedInteger(N0, DAG);
SDValue N1N = getNegatedInteger(N1, DAG);
SDLoc DL(N);
EVT VT = CSel.getValueType();
return DAG.getNode(AArch64ISD::CSEL, DL, VT, N0N, N1N, CSel.getOperand(2),
CSel.getOperand(3));
}
// The basic add/sub long vector instructions have variants with "2" on the end
// which act on the high-half of their inputs. They are normally matched by
// patterns like:
//
// (add (zeroext (extract_high LHS)),
// (zeroext (extract_high RHS)))
// -> uaddl2 vD, vN, vM
//
// However, if one of the extracts is something like a duplicate, this
// instruction can still be used profitably. This function puts the DAG into a
// more appropriate form for those patterns to trigger.
static SDValue performAddSubLongCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
MVT VT = N->getSimpleValueType(0);
if (!VT.is128BitVector()) {
if (N->getOpcode() == ISD::ADD)
return performSetccAddFolding(N, DAG);
return SDValue();
}
// Make sure both branches are extended in the same way.
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
if ((LHS.getOpcode() != ISD::ZERO_EXTEND &&
LHS.getOpcode() != ISD::SIGN_EXTEND) ||
LHS.getOpcode() != RHS.getOpcode())
return SDValue();
unsigned ExtType = LHS.getOpcode();
// It's not worth doing if at least one of the inputs isn't already an
// extract, but we don't know which it'll be so we have to try both.
if (isEssentiallyExtractHighSubvector(LHS.getOperand(0))) {
RHS = tryExtendDUPToExtractHigh(RHS.getOperand(0), DAG);
if (!RHS.getNode())
return SDValue();
RHS = DAG.getNode(ExtType, SDLoc(N), VT, RHS);
} else if (isEssentiallyExtractHighSubvector(RHS.getOperand(0))) {
LHS = tryExtendDUPToExtractHigh(LHS.getOperand(0), DAG);
if (!LHS.getNode())
return SDValue();
LHS = DAG.getNode(ExtType, SDLoc(N), VT, LHS);
}
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS);
}
static bool isCMP(SDValue Op) {
return Op.getOpcode() == AArch64ISD::SUBS &&
!Op.getNode()->hasAnyUseOfValue(0);
}
// (CSEL 1 0 CC Cond) => CC
// (CSEL 0 1 CC Cond) => !CC
static Optional<AArch64CC::CondCode> getCSETCondCode(SDValue Op) {
if (Op.getOpcode() != AArch64ISD::CSEL)
return None;
auto CC = static_cast<AArch64CC::CondCode>(Op.getConstantOperandVal(2));
if (CC == AArch64CC::AL || CC == AArch64CC::NV)
return None;
SDValue OpLHS = Op.getOperand(0);
SDValue OpRHS = Op.getOperand(1);
if (isOneConstant(OpLHS) && isNullConstant(OpRHS))
return CC;
if (isNullConstant(OpLHS) && isOneConstant(OpRHS))
return getInvertedCondCode(CC);
return None;
}
// (ADC{S} l r (CMP (CSET HS carry) 1)) => (ADC{S} l r carry)
// (SBC{S} l r (CMP 0 (CSET LO carry))) => (SBC{S} l r carry)
static SDValue foldOverflowCheck(SDNode *Op, SelectionDAG &DAG, bool IsAdd) {
SDValue CmpOp = Op->getOperand(2);
if (!isCMP(CmpOp))
return SDValue();
if (IsAdd) {
if (!isOneConstant(CmpOp.getOperand(1)))
return SDValue();
} else {
if (!isNullConstant(CmpOp.getOperand(0)))
return SDValue();
}
SDValue CsetOp = CmpOp->getOperand(IsAdd ? 0 : 1);
auto CC = getCSETCondCode(CsetOp);
if (CC != (IsAdd ? AArch64CC::HS : AArch64CC::LO))
return SDValue();
return DAG.getNode(Op->getOpcode(), SDLoc(Op), Op->getVTList(),
Op->getOperand(0), Op->getOperand(1),
CsetOp.getOperand(3));
}
// (ADC x 0 cond) => (CINC x HS cond)
static SDValue foldADCToCINC(SDNode *N, SelectionDAG &DAG) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
SDValue Cond = N->getOperand(2);
if (!isNullConstant(RHS))
return SDValue();
EVT VT = N->getValueType(0);
SDLoc DL(N);
// (CINC x cc cond) <=> (CSINC x x !cc cond)
SDValue CC = DAG.getConstant(AArch64CC::LO, DL, MVT::i32);
return DAG.getNode(AArch64ISD::CSINC, DL, VT, LHS, LHS, CC, Cond);
}
// Transform vector add(zext i8 to i32, zext i8 to i32)
// into sext(add(zext(i8 to i16), zext(i8 to i16)) to i32)
// This allows extra uses of saddl/uaddl at the lower vector widths, and less
// extends.
static SDValue performVectorAddSubExtCombine(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
if (!VT.isFixedLengthVector() || VT.getSizeInBits() <= 128 ||
(N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND) ||
(N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND) ||
N->getOperand(0).getOperand(0).getValueType() !=
N->getOperand(1).getOperand(0).getValueType())
return SDValue();
SDValue N0 = N->getOperand(0).getOperand(0);
SDValue N1 = N->getOperand(1).getOperand(0);
EVT InVT = N0.getValueType();
EVT S1 = InVT.getScalarType();
EVT S2 = VT.getScalarType();
if ((S2 == MVT::i32 && S1 == MVT::i8) ||
(S2 == MVT::i64 && (S1 == MVT::i8 || S1 == MVT::i16))) {
SDLoc DL(N);
EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
S2.getHalfSizedIntegerVT(*DAG.getContext()),
VT.getVectorElementCount());
SDValue NewN0 = DAG.getNode(N->getOperand(0).getOpcode(), DL, HalfVT, N0);
SDValue NewN1 = DAG.getNode(N->getOperand(1).getOpcode(), DL, HalfVT, N1);
SDValue NewOp = DAG.getNode(N->getOpcode(), DL, HalfVT, NewN0, NewN1);
return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewOp);
}
return SDValue();
}
static SDValue performBuildVectorCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
SDLoc DL(N);
// A build vector of two extracted elements is equivalent to an
// extract subvector where the inner vector is any-extended to the
// extract_vector_elt VT.
// (build_vector (extract_elt_iXX_to_i32 vec Idx+0)
// (extract_elt_iXX_to_i32 vec Idx+1))
// => (extract_subvector (anyext_iXX_to_i32 vec) Idx)
// For now, only consider the v2i32 case, which arises as a result of
// legalization.
if (N->getValueType(0) != MVT::v2i32)
return SDValue();
SDValue Elt0 = N->getOperand(0), Elt1 = N->getOperand(1);
// Reminder, EXTRACT_VECTOR_ELT has the effect of any-extending to its VT.
if (Elt0->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
Elt1->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
// Constant index.
isa<ConstantSDNode>(Elt0->getOperand(1)) &&
isa<ConstantSDNode>(Elt1->getOperand(1)) &&
// Both EXTRACT_VECTOR_ELT from same vector...
Elt0->getOperand(0) == Elt1->getOperand(0) &&
// ... and contiguous. First element's index +1 == second element's index.
Elt0->getConstantOperandVal(1) + 1 == Elt1->getConstantOperandVal(1)) {
SDValue VecToExtend = Elt0->getOperand(0);
EVT ExtVT = VecToExtend.getValueType().changeVectorElementType(MVT::i32);
if (!DAG.getTargetLoweringInfo().isTypeLegal(ExtVT))
return SDValue();
SDValue SubvectorIdx = DAG.getVectorIdxConstant(Elt0->getConstantOperandVal(1), DL);
SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, DL, ExtVT, VecToExtend);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Ext,
SubvectorIdx);
}
return SDValue();
}
static SDValue performAddSubCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
// Try to change sum of two reductions.
if (SDValue Val = performAddUADDVCombine(N, DAG))
return Val;
if (SDValue Val = performAddDotCombine(N, DAG))
return Val;
if (SDValue Val = performAddCSelIntoCSinc(N, DAG))
return Val;
if (SDValue Val = performNegCSelCombine(N, DAG))
return Val;
if (SDValue Val = performVectorAddSubExtCombine(N, DAG))
return Val;
return performAddSubLongCombine(N, DCI, DAG);
}
// Massage DAGs which we can use the high-half "long" operations on into
// something isel will recognize better. E.g.
//
// (aarch64_neon_umull (extract_high vec) (dupv64 scalar)) -->
// (aarch64_neon_umull (extract_high (v2i64 vec)))
// (extract_high (v2i64 (dup128 scalar)))))
//
static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
SDValue LHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 0 : 1);
SDValue RHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 1 : 2);
assert(LHS.getValueType().is64BitVector() &&
RHS.getValueType().is64BitVector() &&
"unexpected shape for long operation");
// Either node could be a DUP, but it's not worth doing both of them (you'd
// just as well use the non-high version) so look for a corresponding extract
// operation on the other "wing".
if (isEssentiallyExtractHighSubvector(LHS)) {
RHS = tryExtendDUPToExtractHigh(RHS, DAG);
if (!RHS.getNode())
return SDValue();
} else if (isEssentiallyExtractHighSubvector(RHS)) {
LHS = tryExtendDUPToExtractHigh(LHS, DAG);
if (!LHS.getNode())
return SDValue();
}
if (IID == Intrinsic::not_intrinsic)
return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), LHS, RHS);
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
N->getOperand(0), LHS, RHS);
}
static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
MVT ElemTy = N->getSimpleValueType(0).getScalarType();
unsigned ElemBits = ElemTy.getSizeInBits();
int64_t ShiftAmount;
if (BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(2))) {
APInt SplatValue, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
HasAnyUndefs, ElemBits) ||
SplatBitSize != ElemBits)
return SDValue();
ShiftAmount = SplatValue.getSExtValue();
} else if (ConstantSDNode *CVN = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
ShiftAmount = CVN->getSExtValue();
} else
return SDValue();
unsigned Opcode;
bool IsRightShift;
switch (IID) {
default:
llvm_unreachable("Unknown shift intrinsic");
case Intrinsic::aarch64_neon_sqshl:
Opcode = AArch64ISD::SQSHL_I;
IsRightShift = false;
break;
case Intrinsic::aarch64_neon_uqshl:
Opcode = AArch64ISD::UQSHL_I;
IsRightShift = false;
break;
case Intrinsic::aarch64_neon_srshl:
Opcode = AArch64ISD::SRSHR_I;
IsRightShift = true;
break;
case Intrinsic::aarch64_neon_urshl:
Opcode = AArch64ISD::URSHR_I;
IsRightShift = true;
break;
case Intrinsic::aarch64_neon_sqshlu:
Opcode = AArch64ISD::SQSHLU_I;
IsRightShift = false;
break;
case Intrinsic::aarch64_neon_sshl:
case Intrinsic::aarch64_neon_ushl:
// For positive shift amounts we can use SHL, as ushl/sshl perform a regular
// left shift for positive shift amounts. Below, we only replace the current
// node with VSHL, if this condition is met.
Opcode = AArch64ISD::VSHL;
IsRightShift = false;
break;
}
if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) {
SDLoc dl(N);
return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
DAG.getConstant(-ShiftAmount, dl, MVT::i32));
} else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) {
SDLoc dl(N);
return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
DAG.getConstant(ShiftAmount, dl, MVT::i32));
}
return SDValue();
}
// The CRC32[BH] instructions ignore the high bits of their data operand. Since
// the intrinsics must be legal and take an i32, this means there's almost
// certainly going to be a zext in the DAG which we can eliminate.
static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG) {
SDValue AndN = N->getOperand(2);
if (AndN.getOpcode() != ISD::AND)
return SDValue();
ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(AndN.getOperand(1));
if (!CMask || CMask->getZExtValue() != Mask)
return SDValue();
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), MVT::i32,
N->getOperand(0), N->getOperand(1), AndN.getOperand(0));
}
static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N,
SelectionDAG &DAG) {
SDLoc dl(N);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0),
DAG.getNode(Opc, dl,
N->getOperand(1).getSimpleValueType(),
N->getOperand(1)),
DAG.getConstant(0, dl, MVT::i64));
}
static SDValue LowerSVEIntrinsicIndex(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Op1 = N->getOperand(1);
SDValue Op2 = N->getOperand(2);
EVT ScalarTy = Op2.getValueType();
if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
ScalarTy = MVT::i32;
// Lower index_vector(base, step) to mul(step step_vector(1)) + splat(base).
SDValue StepVector = DAG.getStepVector(DL, N->getValueType(0));
SDValue Step = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op2);
SDValue Mul = DAG.getNode(ISD::MUL, DL, N->getValueType(0), StepVector, Step);
SDValue Base = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op1);
return DAG.getNode(ISD::ADD, DL, N->getValueType(0), Mul, Base);
}
static SDValue LowerSVEIntrinsicDUP(SDNode *N, SelectionDAG &DAG) {
SDLoc dl(N);
SDValue Scalar = N->getOperand(3);
EVT ScalarTy = Scalar.getValueType();
if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);
SDValue Passthru = N->getOperand(1);
SDValue Pred = N->getOperand(2);
return DAG.getNode(AArch64ISD::DUP_MERGE_PASSTHRU, dl, N->getValueType(0),
Pred, Scalar, Passthru);
}
static SDValue LowerSVEIntrinsicEXT(SDNode *N, SelectionDAG &DAG) {
SDLoc dl(N);
LLVMContext &Ctx = *DAG.getContext();
EVT VT = N->getValueType(0);
assert(VT.isScalableVector() && "Expected a scalable vector.");
// Current lowering only supports the SVE-ACLE types.
if (VT.getSizeInBits().getKnownMinSize() != AArch64::SVEBitsPerBlock)
return SDValue();
unsigned ElemSize = VT.getVectorElementType().getSizeInBits() / 8;
unsigned ByteSize = VT.getSizeInBits().getKnownMinSize() / 8;
EVT ByteVT =
EVT::getVectorVT(Ctx, MVT::i8, ElementCount::getScalable(ByteSize));
// Convert everything to the domain of EXT (i.e bytes).
SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(1));
SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(2));
SDValue Op2 = DAG.getNode(ISD::MUL, dl, MVT::i32, N->getOperand(3),
DAG.getConstant(ElemSize, dl, MVT::i32));
SDValue EXT = DAG.getNode(AArch64ISD::EXT, dl, ByteVT, Op0, Op1, Op2);
return DAG.getNode(ISD::BITCAST, dl, VT, EXT);
}
static SDValue tryConvertSVEWideCompare(SDNode *N, ISD::CondCode CC,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
if (DCI.isBeforeLegalize())
return SDValue();
SDValue Comparator = N->getOperand(3);
if (Comparator.getOpcode() == AArch64ISD::DUP ||
Comparator.getOpcode() == ISD::SPLAT_VECTOR) {
unsigned IID = getIntrinsicID(N);
EVT VT = N->getValueType(0);
EVT CmpVT = N->getOperand(2).getValueType();
SDValue Pred = N->getOperand(1);
SDValue Imm;
SDLoc DL(N);
switch (IID) {
default:
llvm_unreachable("Called with wrong intrinsic!");
break;
// Signed comparisons
case Intrinsic::aarch64_sve_cmpeq_wide:
case Intrinsic::aarch64_sve_cmpne_wide:
case Intrinsic::aarch64_sve_cmpge_wide:
case Intrinsic::aarch64_sve_cmpgt_wide:
case Intrinsic::aarch64_sve_cmplt_wide:
case Intrinsic::aarch64_sve_cmple_wide: {
if (auto *CN = dyn_cast<ConstantSDNode>(Comparator.getOperand(0))) {
int64_t ImmVal = CN->getSExtValue();
if (ImmVal >= -16 && ImmVal <= 15)
Imm = DAG.getConstant(ImmVal, DL, MVT::i32);
else
return SDValue();
}
break;
}
// Unsigned comparisons
case Intrinsic::aarch64_sve_cmphs_wide:
case Intrinsic::aarch64_sve_cmphi_wide:
case Intrinsic::aarch64_sve_cmplo_wide:
case Intrinsic::aarch64_sve_cmpls_wide: {
if (auto *CN = dyn_cast<ConstantSDNode>(Comparator.getOperand(0))) {
uint64_t ImmVal = CN->getZExtValue();
if (ImmVal <= 127)
Imm = DAG.getConstant(ImmVal, DL, MVT::i32);
else
return SDValue();
}
break;
}
}
if (!Imm)
return SDValue();
SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, CmpVT, Imm);
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, VT, Pred,
N->getOperand(2), Splat, DAG.getCondCode(CC));
}
return SDValue();
}
static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op,
AArch64CC::CondCode Cond) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDLoc DL(Op);
assert(Op.getValueType().isScalableVector() &&
TLI.isTypeLegal(Op.getValueType()) &&
"Expected legal scalable vector type!");
assert(Op.getValueType() == Pg.getValueType() &&
"Expected same type for PTEST operands");
// Ensure target specific opcodes are using legal type.
EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDValue TVal = DAG.getConstant(1, DL, OutVT);
SDValue FVal = DAG.getConstant(0, DL, OutVT);
// Ensure operands have type nxv16i1.
if (Op.getValueType() != MVT::nxv16i1) {
if ((Cond == AArch64CC::ANY_ACTIVE || Cond == AArch64CC::NONE_ACTIVE) &&
isZeroingInactiveLanes(Op))
Pg = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, MVT::nxv16i1, Pg);
else
Pg = getSVEPredicateBitCast(MVT::nxv16i1, Pg, DAG);
Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, MVT::nxv16i1, Op);
}
// Set condition code (CC) flags.
SDValue Test = DAG.getNode(AArch64ISD::PTEST, DL, MVT::Other, Pg, Op);
// Convert CC to integer based on requested condition.
// NOTE: Cond is inverted to promote CSEL's removal when it feeds a compare.
SDValue CC = DAG.getConstant(getInvertedCondCode(Cond), DL, MVT::i32);
SDValue Res = DAG.getNode(AArch64ISD::CSEL, DL, OutVT, FVal, TVal, CC, Test);
return DAG.getZExtOrTrunc(Res, DL, VT);
}
static SDValue combineSVEReductionInt(SDNode *N, unsigned Opc,
SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Pred = N->getOperand(1);
SDValue VecToReduce = N->getOperand(2);
// NOTE: The integer reduction's result type is not always linked to the
// operand's element type so we construct it from the intrinsic's result type.
EVT ReduceVT = getPackedSVEVectorVT(N->getValueType(0));
SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, VecToReduce);
// SVE reductions set the whole vector register with the first element
// containing the reduction result, which we'll now extract.
SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
Zero);
}
static SDValue combineSVEReductionFP(SDNode *N, unsigned Opc,
SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Pred = N->getOperand(1);
SDValue VecToReduce = N->getOperand(2);
EVT ReduceVT = VecToReduce.getValueType();
SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, VecToReduce);
// SVE reductions set the whole vector register with the first element
// containing the reduction result, which we'll now extract.
SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
Zero);
}
static SDValue combineSVEReductionOrderedFP(SDNode *N, unsigned Opc,
SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Pred = N->getOperand(1);
SDValue InitVal = N->getOperand(2);
SDValue VecToReduce = N->getOperand(3);
EVT ReduceVT = VecToReduce.getValueType();
// Ordered reductions use the first lane of the result vector as the
// reduction's initial value.
SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
InitVal = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ReduceVT,
DAG.getUNDEF(ReduceVT), InitVal, Zero);
SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, InitVal, VecToReduce);
// SVE reductions set the whole vector register with the first element
// containing the reduction result, which we'll now extract.
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
Zero);
}
static bool isAllInactivePredicate(SDValue N) {
// Look through cast.
while (N.getOpcode() == AArch64ISD::REINTERPRET_CAST)
N = N.getOperand(0);
return ISD::isConstantSplatVectorAllZeros(N.getNode());
}
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) {
unsigned NumElts = N.getValueType().getVectorMinNumElements();
// Look through cast.
while (N.getOpcode() == AArch64ISD::REINTERPRET_CAST) {
N = N.getOperand(0);
// When reinterpreting from a type with fewer elements the "new" elements
// are not active, so bail if they're likely to be used.
if (N.getValueType().getVectorMinNumElements() < NumElts)
return false;
}
if (ISD::isConstantSplatVectorAllOnes(N.getNode()))
return true;
// "ptrue p.<ty>, all" can be considered all active when <ty> is the same size
// or smaller than the implicit element type represented by N.
// NOTE: A larger element count implies a smaller element type.
if (N.getOpcode() == AArch64ISD::PTRUE &&
N.getConstantOperandVal(0) == AArch64SVEPredPattern::all)
return N.getValueType().getVectorMinNumElements() >= NumElts;
// If we're compiling for a specific vector-length, we can check if the
// pattern's VL equals that of the scalable vector at runtime.
if (N.getOpcode() == AArch64ISD::PTRUE) {
const auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits();
unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
if (MaxSVESize && MinSVESize == MaxSVESize) {
unsigned VScale = MaxSVESize / AArch64::SVEBitsPerBlock;
unsigned PatNumElts =
getNumElementsFromSVEPredPattern(N.getConstantOperandVal(0));
return PatNumElts == (NumElts * VScale);
}
}
return false;
}
// If a merged operation has no inactive lanes we can relax it to a predicated
// or unpredicated operation, which potentially allows better isel (perhaps
// using immediate forms) or relaxing register reuse requirements.
static SDValue convertMergedOpToPredOp(SDNode *N, unsigned Opc,
SelectionDAG &DAG, bool UnpredOp = false,
bool SwapOperands = false) {
assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Expected intrinsic!");
assert(N->getNumOperands() == 4 && "Expected 3 operand intrinsic!");
SDValue Pg = N->getOperand(1);
SDValue Op1 = N->getOperand(SwapOperands ? 3 : 2);
SDValue Op2 = N->getOperand(SwapOperands ? 2 : 3);
// ISD way to specify an all active predicate.
if (isAllActivePredicate(DAG, Pg)) {
if (UnpredOp)
return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Op1, Op2);
return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Pg, Op1, Op2);
}
// FUTURE: SplatVector(true)
return SDValue();
}
static SDValue performIntrinsicCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
SelectionDAG &DAG = DCI.DAG;
unsigned IID = getIntrinsicID(N);
switch (IID) {
default:
break;
case Intrinsic::get_active_lane_mask: {
SDValue Res = SDValue();
EVT VT = N->getValueType(0);
if (VT.isFixedLengthVector()) {
// We can use the SVE whilelo instruction to lower this intrinsic by
// creating the appropriate sequence of scalable vector operations and
// then extracting a fixed-width subvector from the scalable vector.
SDLoc DL(N);
SDValue ID =
DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, DL, MVT::i64);
EVT WhileVT = EVT::getVectorVT(
*DAG.getContext(), MVT::i1,
ElementCount::getScalable(VT.getVectorNumElements()));
// Get promoted scalable vector VT, i.e. promote nxv4i1 -> nxv4i32.
EVT PromVT = getPromotedVTForPredicate(WhileVT);
// Get the fixed-width equivalent of PromVT for extraction.
EVT ExtVT =
EVT::getVectorVT(*DAG.getContext(), PromVT.getVectorElementType(),
VT.getVectorElementCount());
Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WhileVT, ID,
N->getOperand(1), N->getOperand(2));
Res = DAG.getNode(ISD::SIGN_EXTEND, DL, PromVT, Res);
Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtVT, Res,
DAG.getConstant(0, DL, MVT::i64));
Res = DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
}
return Res;
}
case Intrinsic::aarch64_neon_vcvtfxs2fp:
case Intrinsic::aarch64_neon_vcvtfxu2fp:
return tryCombineFixedPointConvert(N, DCI, DAG);
case Intrinsic::aarch64_neon_saddv:
return combineAcrossLanesIntrinsic(AArch64ISD::SADDV, N, DAG);
case Intrinsic::aarch64_neon_uaddv:
return combineAcrossLanesIntrinsic(AArch64ISD::UADDV, N, DAG);
case Intrinsic::aarch64_neon_sminv:
return combineAcrossLanesIntrinsic(AArch64ISD::SMINV, N, DAG);
case Intrinsic::aarch64_neon_uminv:
return combineAcrossLanesIntrinsic(AArch64ISD::UMINV, N, DAG);
case Intrinsic::aarch64_neon_smaxv:
return combineAcrossLanesIntrinsic(AArch64ISD::SMAXV, N, DAG);
case Intrinsic::aarch64_neon_umaxv:
return combineAcrossLanesIntrinsic(AArch64ISD::UMAXV, N, DAG);
case Intrinsic::aarch64_neon_fmax:
return DAG.getNode(ISD::FMAXIMUM, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_neon_fmin:
return DAG.getNode(ISD::FMINIMUM, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_neon_fmaxnm:
return DAG.getNode(ISD::FMAXNUM, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_neon_fminnm:
return DAG.getNode(ISD::FMINNUM, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_neon_smull:
return DAG.getNode(AArch64ISD::SMULL, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_neon_umull:
return DAG.getNode(AArch64ISD::UMULL, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_neon_pmull:
case Intrinsic::aarch64_neon_sqdmull:
return tryCombineLongOpWithDup(IID, N, DCI, DAG);
case Intrinsic::aarch64_neon_sqshl:
case Intrinsic::aarch64_neon_uqshl:
case Intrinsic::aarch64_neon_sqshlu:
case Intrinsic::aarch64_neon_srshl:
case Intrinsic::aarch64_neon_urshl:
case Intrinsic::aarch64_neon_sshl:
case Intrinsic::aarch64_neon_ushl:
return tryCombineShiftImm(IID, N, DAG);
case Intrinsic::aarch64_crc32b:
case Intrinsic::aarch64_crc32cb:
return tryCombineCRC32(0xff, N, DAG);
case Intrinsic::aarch64_crc32h:
case Intrinsic::aarch64_crc32ch:
return tryCombineCRC32(0xffff, N, DAG);
case Intrinsic::aarch64_sve_saddv:
// There is no i64 version of SADDV because the sign is irrelevant.
if (N->getOperand(2)->getValueType(0).getVectorElementType() == MVT::i64)
return combineSVEReductionInt(N, AArch64ISD::UADDV_PRED, DAG);
else
return combineSVEReductionInt(N, AArch64ISD::SADDV_PRED, DAG);
case Intrinsic::aarch64_sve_uaddv:
return combineSVEReductionInt(N, AArch64ISD::UADDV_PRED, DAG);
case Intrinsic::aarch64_sve_smaxv:
return combineSVEReductionInt(N, AArch64ISD::SMAXV_PRED, DAG);
case Intrinsic::aarch64_sve_umaxv:
return combineSVEReductionInt(N, AArch64ISD::UMAXV_PRED, DAG);
case Intrinsic::aarch64_sve_sminv:
return combineSVEReductionInt(N, AArch64ISD::SMINV_PRED, DAG);
case Intrinsic::aarch64_sve_uminv:
return combineSVEReductionInt(N, AArch64ISD::UMINV_PRED, DAG);
case Intrinsic::aarch64_sve_orv:
return combineSVEReductionInt(N, AArch64ISD::ORV_PRED, DAG);
case Intrinsic::aarch64_sve_eorv:
return combineSVEReductionInt(N, AArch64ISD::EORV_PRED, DAG);
case Intrinsic::aarch64_sve_andv:
return combineSVEReductionInt(N, AArch64ISD::ANDV_PRED, DAG);
case Intrinsic::aarch64_sve_index:
return LowerSVEIntrinsicIndex(N, DAG);
case Intrinsic::aarch64_sve_dup:
return LowerSVEIntrinsicDUP(N, DAG);
case Intrinsic::aarch64_sve_dup_x:
return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), N->getValueType(0),
N->getOperand(1));
case Intrinsic::aarch64_sve_ext:
return LowerSVEIntrinsicEXT(N, DAG);
case Intrinsic::aarch64_sve_mul:
return convertMergedOpToPredOp(N, AArch64ISD::MUL_PRED, DAG);
case Intrinsic::aarch64_sve_smulh:
return convertMergedOpToPredOp(N, AArch64ISD::MULHS_PRED, DAG);
case Intrinsic::aarch64_sve_umulh:
return convertMergedOpToPredOp(N, AArch64ISD::MULHU_PRED, DAG);
case Intrinsic::aarch64_sve_smin:
return convertMergedOpToPredOp(N, AArch64ISD::SMIN_PRED, DAG);
case Intrinsic::aarch64_sve_umin:
return convertMergedOpToPredOp(N, AArch64ISD::UMIN_PRED, DAG);
case Intrinsic::aarch64_sve_smax:
return convertMergedOpToPredOp(N, AArch64ISD::SMAX_PRED, DAG);
case Intrinsic::aarch64_sve_umax:
return convertMergedOpToPredOp(N, AArch64ISD::UMAX_PRED, DAG);
case Intrinsic::aarch64_sve_lsl:
return convertMergedOpToPredOp(N, AArch64ISD::SHL_PRED, DAG);
case Intrinsic::aarch64_sve_lsr:
return convertMergedOpToPredOp(N, AArch64ISD::SRL_PRED, DAG);
case Intrinsic::aarch64_sve_asr:
return convertMergedOpToPredOp(N, AArch64ISD::SRA_PRED, DAG);
case Intrinsic::aarch64_sve_fadd:
return convertMergedOpToPredOp(N, AArch64ISD::FADD_PRED, DAG);
case Intrinsic::aarch64_sve_fsub:
return convertMergedOpToPredOp(N, AArch64ISD::FSUB_PRED, DAG);
case Intrinsic::aarch64_sve_fmul:
return convertMergedOpToPredOp(N, AArch64ISD::FMUL_PRED, DAG);
case Intrinsic::aarch64_sve_add:
return convertMergedOpToPredOp(N, ISD::ADD, DAG, true);
case Intrinsic::aarch64_sve_sub:
return convertMergedOpToPredOp(N, ISD::SUB, DAG, true);
case Intrinsic::aarch64_sve_subr:
return convertMergedOpToPredOp(N, ISD::SUB, DAG, true, true);
case Intrinsic::aarch64_sve_and:
return convertMergedOpToPredOp(N, ISD::AND, DAG, true);
case Intrinsic::aarch64_sve_bic:
return convertMergedOpToPredOp(N, AArch64ISD::BIC, DAG, true);
case Intrinsic::aarch64_sve_eor:
return convertMergedOpToPredOp(N, ISD::XOR, DAG, true);
case Intrinsic::aarch64_sve_orr:
return convertMergedOpToPredOp(N, ISD::OR, DAG, true);
case Intrinsic::aarch64_sve_sabd:
return convertMergedOpToPredOp(N, ISD::ABDS, DAG, true);
case Intrinsic::aarch64_sve_uabd:
return convertMergedOpToPredOp(N, ISD::ABDU, DAG, true);
case Intrinsic::aarch64_sve_sqadd:
return convertMergedOpToPredOp(N, ISD::SADDSAT, DAG, true);
case Intrinsic::aarch64_sve_sqsub:
return convertMergedOpToPredOp(N, ISD::SSUBSAT, DAG, true);
case Intrinsic::aarch64_sve_uqadd:
return convertMergedOpToPredOp(N, ISD::UADDSAT, DAG, true);
case Intrinsic::aarch64_sve_uqsub:
return convertMergedOpToPredOp(N, ISD::USUBSAT, DAG, true);
case Intrinsic::aarch64_sve_sqadd_x:
return DAG.getNode(ISD::SADDSAT, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_sve_sqsub_x:
return DAG.getNode(ISD::SSUBSAT, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_sve_uqadd_x:
return DAG.getNode(ISD::UADDSAT, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_sve_uqsub_x:
return DAG.getNode(ISD::USUBSAT, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_sve_asrd:
return DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2), N->getOperand(3));
case Intrinsic::aarch64_sve_cmphs:
if (!N->getOperand(2).getValueType().isFloatingPoint())
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
N->getValueType(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), DAG.getCondCode(ISD::SETUGE));
break;
case Intrinsic::aarch64_sve_cmphi:
if (!N->getOperand(2).getValueType().isFloatingPoint())
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
N->getValueType(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), DAG.getCondCode(ISD::SETUGT));
break;
case Intrinsic::aarch64_sve_fcmpge:
case Intrinsic::aarch64_sve_cmpge:
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
N->getValueType(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), DAG.getCondCode(ISD::SETGE));
break;
case Intrinsic::aarch64_sve_fcmpgt:
case Intrinsic::aarch64_sve_cmpgt:
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
N->getValueType(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), DAG.getCondCode(ISD::SETGT));
break;
case Intrinsic::aarch64_sve_fcmpeq:
case Intrinsic::aarch64_sve_cmpeq:
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
N->getValueType(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), DAG.getCondCode(ISD::SETEQ));
break;
case Intrinsic::aarch64_sve_fcmpne:
case Intrinsic::aarch64_sve_cmpne:
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
N->getValueType(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), DAG.getCondCode(ISD::SETNE));
break;
case Intrinsic::aarch64_sve_fcmpuo:
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
N->getValueType(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), DAG.getCondCode(ISD::SETUO));
break;
case Intrinsic::aarch64_sve_fadda:
return combineSVEReductionOrderedFP(N, AArch64ISD::FADDA_PRED, DAG);
case Intrinsic::aarch64_sve_faddv:
return combineSVEReductionFP(N, AArch64ISD::FADDV_PRED, DAG);
case Intrinsic::aarch64_sve_fmaxnmv:
return combineSVEReductionFP(N, AArch64ISD::FMAXNMV_PRED, DAG);
case Intrinsic::aarch64_sve_fmaxv:
return combineSVEReductionFP(N, AArch64ISD::FMAXV_PRED, DAG);
case Intrinsic::aarch64_sve_fminnmv:
return combineSVEReductionFP(N, AArch64ISD::FMINNMV_PRED, DAG);
case Intrinsic::aarch64_sve_fminv:
return combineSVEReductionFP(N, AArch64ISD::FMINV_PRED, DAG);
case Intrinsic::aarch64_sve_sel:
return DAG.getNode(ISD::VSELECT, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2), N->getOperand(3));
case Intrinsic::aarch64_sve_cmpeq_wide:
return tryConvertSVEWideCompare(N, ISD::SETEQ, DCI, DAG);
case Intrinsic::aarch64_sve_cmpne_wide:
return tryConvertSVEWideCompare(N, ISD::SETNE, DCI, DAG);
case Intrinsic::aarch64_sve_cmpge_wide:
return tryConvertSVEWideCompare(N, ISD::SETGE, DCI, DAG);
case Intrinsic::aarch64_sve_cmpgt_wide:
return tryConvertSVEWideCompare(N, ISD::SETGT, DCI, DAG);
case Intrinsic::aarch64_sve_cmplt_wide:
return tryConvertSVEWideCompare(N, ISD::SETLT, DCI, DAG);
case Intrinsic::aarch64_sve_cmple_wide:
return tryConvertSVEWideCompare(N, ISD::SETLE, DCI, DAG);
case Intrinsic::aarch64_sve_cmphs_wide:
return tryConvertSVEWideCompare(N, ISD::SETUGE, DCI, DAG);
case Intrinsic::aarch64_sve_cmphi_wide:
return tryConvertSVEWideCompare(N, ISD::SETUGT, DCI, DAG);
case Intrinsic::aarch64_sve_cmplo_wide:
return tryConvertSVEWideCompare(N, ISD::SETULT, DCI, DAG);
case Intrinsic::aarch64_sve_cmpls_wide:
return tryConvertSVEWideCompare(N, ISD::SETULE, DCI, DAG);
case Intrinsic::aarch64_sve_ptest_any:
return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
AArch64CC::ANY_ACTIVE);
case Intrinsic::aarch64_sve_ptest_first:
return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
AArch64CC::FIRST_ACTIVE);
case Intrinsic::aarch64_sve_ptest_last:
return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
AArch64CC::LAST_ACTIVE);
}
return SDValue();
}
static bool isCheapToExtend(const SDValue &N) {
unsigned OC = N->getOpcode();
return OC == ISD::LOAD || OC == ISD::MLOAD ||
ISD::isConstantSplatVectorAllZeros(N.getNode());
}
static SDValue
performSignExtendSetCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
// If we have (sext (setcc A B)) and A and B are cheap to extend,
// we can move the sext into the arguments and have the same result. For
// example, if A and B are both loads, we can make those extending loads and
// avoid an extra instruction. This pattern appears often in VLS code
// generation where the inputs to the setcc have a different size to the
// instruction that wants to use the result of the setcc.
assert(N->getOpcode() == ISD::SIGN_EXTEND &&
N->getOperand(0)->getOpcode() == ISD::SETCC);
const SDValue SetCC = N->getOperand(0);
const SDValue CCOp0 = SetCC.getOperand(0);
const SDValue CCOp1 = SetCC.getOperand(1);
if (!CCOp0->getValueType(0).isInteger() ||
!CCOp1->getValueType(0).isInteger())
return SDValue();
ISD::CondCode Code =
cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get();
ISD::NodeType ExtType =
isSignedIntSetCC(Code) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
if (isCheapToExtend(SetCC.getOperand(0)) &&
isCheapToExtend(SetCC.getOperand(1))) {
const SDValue Ext1 =
DAG.getNode(ExtType, SDLoc(N), N->getValueType(0), CCOp0);
const SDValue Ext2 =
DAG.getNode(ExtType, SDLoc(N), N->getValueType(0), CCOp1);
return DAG.getSetCC(
SDLoc(SetCC), N->getValueType(0), Ext1, Ext2,
cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get());
}
return SDValue();
}
static SDValue performExtendCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
// If we see something like (zext (sabd (extract_high ...), (DUP ...))) then
// we can convert that DUP into another extract_high (of a bigger DUP), which
// helps the backend to decide that an sabdl2 would be useful, saving a real
// extract_high operation.
if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
(N->getOperand(0).getOpcode() == ISD::ABDU ||
N->getOperand(0).getOpcode() == ISD::ABDS)) {
SDNode *ABDNode = N->getOperand(0).getNode();
SDValue NewABD =
tryCombineLongOpWithDup(Intrinsic::not_intrinsic, ABDNode, DCI, DAG);
if (!NewABD.getNode())
return SDValue();
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD);
}
if (N->getValueType(0).isFixedLengthVector() &&
N->getOpcode() == ISD::SIGN_EXTEND &&
N->getOperand(0)->getOpcode() == ISD::SETCC)
return performSignExtendSetCCCombine(N, DCI, DAG);
return SDValue();
}
static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St,
SDValue SplatVal, unsigned NumVecElts) {
assert(!St.isTruncatingStore() && "cannot split truncating vector store");
Align OrigAlignment = St.getAlign();
unsigned EltOffset = SplatVal.getValueType().getSizeInBits() / 8;
// Create scalar stores. This is at least as good as the code sequence for a
// split unaligned store which is a dup.s, ext.b, and two stores.
// Most of the time the three stores should be replaced by store pair
// instructions (stp).
SDLoc DL(&St);
SDValue BasePtr = St.getBasePtr();
uint64_t BaseOffset = 0;
const MachinePointerInfo &PtrInfo = St.getPointerInfo();
SDValue NewST1 =
DAG.getStore(St.getChain(), DL, SplatVal, BasePtr, PtrInfo,
OrigAlignment, St.getMemOperand()->getFlags());
// As this in ISel, we will not merge this add which may degrade results.
if (BasePtr->getOpcode() == ISD::ADD &&
isa<ConstantSDNode>(BasePtr->getOperand(1))) {
BaseOffset = cast<ConstantSDNode>(BasePtr->getOperand(1))->getSExtValue();
BasePtr = BasePtr->getOperand(0);
}
unsigned Offset = EltOffset;
while (--NumVecElts) {
Align Alignment = commonAlignment(OrigAlignment, Offset);
SDValue OffsetPtr =
DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
DAG.getConstant(BaseOffset + Offset, DL, MVT::i64));
NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr,
PtrInfo.getWithOffset(Offset), Alignment,
St.getMemOperand()->getFlags());
Offset += EltOffset;
}
return NewST1;
}
// Returns an SVE type that ContentTy can be trivially sign or zero extended
// into.
static MVT getSVEContainerType(EVT ContentTy) {
assert(ContentTy.isSimple() && "No SVE containers for extended types");
switch (ContentTy.getSimpleVT().SimpleTy) {
default:
llvm_unreachable("No known SVE container for this MVT type");
case MVT::nxv2i8:
case MVT::nxv2i16:
case MVT::nxv2i32:
case MVT::nxv2i64:
case MVT::nxv2f32:
case MVT::nxv2f64:
return MVT::nxv2i64;
case MVT::nxv4i8:
case MVT::nxv4i16:
case MVT::nxv4i32:
case MVT::nxv4f32:
return MVT::nxv4i32;
case MVT::nxv8i8:
case MVT::nxv8i16:
case MVT::nxv8f16:
case MVT::nxv8bf16:
return MVT::nxv8i16;
case MVT::nxv16i8:
return MVT::nxv16i8;
}
}
static SDValue performLD1Combine(SDNode *N, SelectionDAG &DAG, unsigned Opc) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
if (VT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)
return SDValue();
EVT ContainerVT = VT;
if (ContainerVT.isInteger())
ContainerVT = getSVEContainerType(ContainerVT);
SDVTList VTs = DAG.getVTList(ContainerVT, MVT::Other);
SDValue Ops[] = { N->getOperand(0), // Chain
N->getOperand(2), // Pg
N->getOperand(3), // Base
DAG.getValueType(VT) };
SDValue Load = DAG.getNode(Opc, DL, VTs, Ops);
SDValue LoadChain = SDValue(Load.getNode(), 1);
if (ContainerVT.isInteger() && (VT != ContainerVT))
Load = DAG.getNode(ISD::TRUNCATE, DL, VT, Load.getValue(0));
return DAG.getMergeValues({ Load, LoadChain }, DL);
}
static SDValue performLDNT1Combine(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
EVT PtrTy = N->getOperand(3).getValueType();
EVT LoadVT = VT;
if (VT.isFloatingPoint())
LoadVT = VT.changeTypeToInteger();
auto *MINode = cast<MemIntrinsicSDNode>(N);
SDValue PassThru = DAG.getConstant(0, DL, LoadVT);
SDValue L = DAG.getMaskedLoad(LoadVT, DL, MINode->getChain(),
MINode->getOperand(3), DAG.getUNDEF(PtrTy),
MINode->getOperand(2), PassThru,
MINode->getMemoryVT(), MINode->getMemOperand(),
ISD::UNINDEXED, ISD::NON_EXTLOAD, false);
if (VT.isFloatingPoint()) {
SDValue Ops[] = { DAG.getNode(ISD::BITCAST, DL, VT, L), L.getValue(1) };
return DAG.getMergeValues(Ops, DL);
}
return L;
}
template <unsigned Opcode>
static SDValue performLD1ReplicateCombine(SDNode *N, SelectionDAG &DAG) {
static_assert(Opcode == AArch64ISD::LD1RQ_MERGE_ZERO ||
Opcode == AArch64ISD::LD1RO_MERGE_ZERO,
"Unsupported opcode.");
SDLoc DL(N);
EVT VT = N->getValueType(0);
EVT LoadVT = VT;
if (VT.isFloatingPoint())
LoadVT = VT.changeTypeToInteger();
SDValue Ops[] = {N->getOperand(0), N->getOperand(2), N->getOperand(3)};
SDValue Load = DAG.getNode(Opcode, DL, {LoadVT, MVT::Other}, Ops);
SDValue LoadChain = SDValue(Load.getNode(), 1);
if (VT.isFloatingPoint())
Load = DAG.getNode(ISD::BITCAST, DL, VT, Load.getValue(0));
return DAG.getMergeValues({Load, LoadChain}, DL);
}
static SDValue performST1Combine(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Data = N->getOperand(2);
EVT DataVT = Data.getValueType();
EVT HwSrcVt = getSVEContainerType(DataVT);
SDValue InputVT = DAG.getValueType(DataVT);
if (DataVT.isFloatingPoint())
InputVT = DAG.getValueType(HwSrcVt);
SDValue SrcNew;
if (Data.getValueType().isFloatingPoint())
SrcNew = DAG.getNode(ISD::BITCAST, DL, HwSrcVt, Data);
else
SrcNew = DAG.getNode(ISD::ANY_EXTEND, DL, HwSrcVt, Data);
SDValue Ops[] = { N->getOperand(0), // Chain
SrcNew,
N->getOperand(4), // Base
N->getOperand(3), // Pg
InputVT
};
return DAG.getNode(AArch64ISD::ST1_PRED, DL, N->getValueType(0), Ops);
}
static SDValue performSTNT1Combine(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Data = N->getOperand(2);
EVT DataVT = Data.getValueType();
EVT PtrTy = N->getOperand(4).getValueType();
if (DataVT.isFloatingPoint())
Data = DAG.getNode(ISD::BITCAST, DL, DataVT.changeTypeToInteger(), Data);
auto *MINode = cast<MemIntrinsicSDNode>(N);
return DAG.getMaskedStore(MINode->getChain(), DL, Data, MINode->getOperand(4),
DAG.getUNDEF(PtrTy), MINode->getOperand(3),
MINode->getMemoryVT(), MINode->getMemOperand(),
ISD::UNINDEXED, false, false);
}
/// Replace a splat of zeros to a vector store by scalar stores of WZR/XZR. The
/// load store optimizer pass will merge them to store pair stores. This should
/// be better than a movi to create the vector zero followed by a vector store
/// if the zero constant is not re-used, since one instructions and one register
/// live range will be removed.
///
/// For example, the final generated code should be:
///
/// stp xzr, xzr, [x0]
///
/// instead of:
///
/// movi v0.2d, #0
/// str q0, [x0]
///
static SDValue replaceZeroVectorStore(SelectionDAG &DAG, StoreSDNode &St) {
SDValue StVal = St.getValue();
EVT VT = StVal.getValueType();
// Avoid scalarizing zero splat stores for scalable vectors.
if (VT.isScalableVector())
return SDValue();
// It is beneficial to scalarize a zero splat store for 2 or 3 i64 elements or
// 2, 3 or 4 i32 elements.
int NumVecElts = VT.getVectorNumElements();
if (!(((NumVecElts == 2 || NumVecElts == 3) &&
VT.getVectorElementType().getSizeInBits() == 64) ||
((NumVecElts == 2 || NumVecElts == 3 || NumVecElts == 4) &&
VT.getVectorElementType().getSizeInBits() == 32)))
return SDValue();
if (StVal.getOpcode() != ISD::BUILD_VECTOR)
return SDValue();
// If the zero constant has more than one use then the vector store could be
// better since the constant mov will be amortized and stp q instructions
// should be able to be formed.
if (!StVal.hasOneUse())
return SDValue();
// If the store is truncating then it's going down to i16 or smaller, which
// means it can be implemented in a single store anyway.
if (St.isTruncatingStore())
return SDValue();
// If the immediate offset of the address operand is too large for the stp
// instruction, then bail out.
if (DAG.isBaseWithConstantOffset(St.getBasePtr())) {
int64_t Offset = St.getBasePtr()->getConstantOperandVal(1);
if (Offset < -512 || Offset > 504)
return SDValue();
}
for (int I = 0; I < NumVecElts; ++I) {
SDValue EltVal = StVal.getOperand(I);
if (!isNullConstant(EltVal) && !isNullFPConstant(EltVal))
return SDValue();
}
// Use a CopyFromReg WZR/XZR here to prevent
// DAGCombiner::MergeConsecutiveStores from undoing this transformation.
SDLoc DL(&St);
unsigned ZeroReg;
EVT ZeroVT;
if (VT.getVectorElementType().getSizeInBits() == 32) {
ZeroReg = AArch64::WZR;
ZeroVT = MVT::i32;
} else {
ZeroReg = AArch64::XZR;
ZeroVT = MVT::i64;
}
SDValue SplatVal =
DAG.getCopyFromReg(DAG.getEntryNode(), DL, ZeroReg, ZeroVT);
return splitStoreSplat(DAG, St, SplatVal, NumVecElts);
}
/// Replace a splat of a scalar to a vector store by scalar stores of the scalar
/// value. The load store optimizer pass will merge them to store pair stores.
/// This has better performance than a splat of the scalar followed by a split
/// vector store. Even if the stores are not merged it is four stores vs a dup,
/// followed by an ext.b and two stores.
static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode &St) {
SDValue StVal = St.getValue();
EVT VT = StVal.getValueType();
// Don't replace floating point stores, they possibly won't be transformed to
// stp because of the store pair suppress pass.
if (VT.isFloatingPoint())
return SDValue();
// We can express a splat as store pair(s) for 2 or 4 elements.
unsigned NumVecElts = VT.getVectorNumElements();
if (NumVecElts != 4 && NumVecElts != 2)
return SDValue();
// If the store is truncating then it's going down to i16 or smaller, which
// means it can be implemented in a single store anyway.
if (St.isTruncatingStore())
return SDValue();
// Check that this is a splat.
// Make sure that each of the relevant vector element locations are inserted
// to, i.e. 0 and 1 for v2i64 and 0, 1, 2, 3 for v4i32.
std::bitset<4> IndexNotInserted((1 << NumVecElts) - 1);
SDValue SplatVal;
for (unsigned I = 0; I < NumVecElts; ++I) {
// Check for insert vector elements.
if (StVal.getOpcode() != ISD::INSERT_VECTOR_ELT)
return SDValue();
// Check that same value is inserted at each vector element.
if (I == 0)
SplatVal = StVal.getOperand(1);
else if (StVal.getOperand(1) != SplatVal)
return SDValue();
// Check insert element index.
ConstantSDNode *CIndex = dyn_cast<ConstantSDNode>(StVal.getOperand(2));
if (!CIndex)
return SDValue();
uint64_t IndexVal = CIndex->getZExtValue();
if (IndexVal >= NumVecElts)
return SDValue();
IndexNotInserted.reset(IndexVal);
StVal = StVal.getOperand(0);
}
// Check that all vector element locations were inserted to.
if (IndexNotInserted.any())
return SDValue();
return splitStoreSplat(DAG, St, SplatVal, NumVecElts);
}
static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG,
const AArch64Subtarget *Subtarget) {
StoreSDNode *S = cast<StoreSDNode>(N);
if (S->isVolatile() || S->isIndexed())
return SDValue();
SDValue StVal = S->getValue();
EVT VT = StVal.getValueType();
if (!VT.isFixedLengthVector())
return SDValue();
// If we get a splat of zeros, convert this vector store to a store of
// scalars. They will be merged into store pairs of xzr thereby removing one
// instruction and one register.
if (SDValue ReplacedZeroSplat = replaceZeroVectorStore(DAG, *S))
return ReplacedZeroSplat;
// FIXME: The logic for deciding if an unaligned store should be split should
// be included in TLI.allowsMisalignedMemoryAccesses(), and there should be
// a call to that function here.
if (!Subtarget->isMisaligned128StoreSlow())
return SDValue();
// Don't split at -Oz.
if (DAG.getMachineFunction().getFunction().hasMinSize())
return SDValue();
// Don't split v2i64 vectors. Memcpy lowering produces those and splitting
// those up regresses performance on micro-benchmarks and olden/bh.
if (VT.getVectorNumElements() < 2 || VT == MVT::v2i64)
return SDValue();
// Split unaligned 16B stores. They are terrible for performance.
// Don't split stores with alignment of 1 or 2. Code that uses clang vector
// extensions can use this to mark that it does not want splitting to happen
// (by underspecifying alignment to be 1 or 2). Furthermore, the chance of
// eliminating alignment hazards is only 1 in 8 for alignment of 2.
if (VT.getSizeInBits() != 128 || S->getAlign() >= Align(16) ||
S->getAlign() <= Align(2))
return SDValue();
// If we get a splat of a scalar convert this vector store to a store of
// scalars. They will be merged into store pairs thereby removing two
// instructions.
if (SDValue ReplacedSplat = replaceSplatVectorStore(DAG, *S))
return ReplacedSplat;
SDLoc DL(S);
// Split VT into two.
EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
unsigned NumElts = HalfVT.getVectorNumElements();
SDValue SubVector0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
DAG.getConstant(0, DL, MVT::i64));
SDValue SubVector1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
DAG.getConstant(NumElts, DL, MVT::i64));
SDValue BasePtr = S->getBasePtr();
SDValue NewST1 =
DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(),
S->getAlign(), S->getMemOperand()->getFlags());
SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
DAG.getConstant(8, DL, MVT::i64));
return DAG.getStore(NewST1.getValue(0), DL, SubVector1, OffsetPtr,
S->getPointerInfo(), S->getAlign(),
S->getMemOperand()->getFlags());
}
static SDValue performSpliceCombine(SDNode *N, SelectionDAG &DAG) {
assert(N->getOpcode() == AArch64ISD::SPLICE && "Unexepected Opcode!");
// splice(pg, op1, undef) -> op1
if (N->getOperand(2).isUndef())
return N->getOperand(1);
return SDValue();
}
static SDValue performUnpackCombine(SDNode *N, SelectionDAG &DAG,
const AArch64Subtarget *Subtarget) {
assert((N->getOpcode() == AArch64ISD::UUNPKHI ||
N->getOpcode() == AArch64ISD::UUNPKLO) &&
"Unexpected Opcode!");
// uunpklo/hi undef -> undef
if (N->getOperand(0).isUndef())
return DAG.getUNDEF(N->getValueType(0));
// If this is a masked load followed by an UUNPKLO, fold this into a masked
// extending load. We can do this even if this is already a masked
// {z,}extload.
if (N->getOperand(0).getOpcode() == ISD::MLOAD &&
N->getOpcode() == AArch64ISD::UUNPKLO) {
MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N->getOperand(0));
SDValue Mask = MLD->getMask();
SDLoc DL(N);
if (MLD->isUnindexed() && MLD->getExtensionType() != ISD::SEXTLOAD &&
SDValue(MLD, 0).hasOneUse() && Mask->getOpcode() == AArch64ISD::PTRUE &&
(MLD->getPassThru()->isUndef() ||
isZerosVector(MLD->getPassThru().getNode()))) {
unsigned MinSVESize = Subtarget->getMinSVEVectorSizeInBits();
unsigned PgPattern = Mask->getConstantOperandVal(0);
EVT VT = N->getValueType(0);
// Ensure we can double the size of the predicate pattern
unsigned NumElts = getNumElementsFromSVEPredPattern(PgPattern);
if (NumElts &&
NumElts * VT.getVectorElementType().getSizeInBits() <= MinSVESize) {
Mask =
getPTrue(DAG, DL, VT.changeVectorElementType(MVT::i1), PgPattern);
SDValue PassThru = DAG.getConstant(0, DL, VT);
SDValue NewLoad = DAG.getMaskedLoad(
VT, DL, MLD->getChain(), MLD->getBasePtr(), MLD->getOffset(), Mask,
PassThru, MLD->getMemoryVT(), MLD->getMemOperand(),
MLD->getAddressingMode(), ISD::ZEXTLOAD);
DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), NewLoad.getValue(1));
return NewLoad;
}
}
}
return SDValue();
}
static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
EVT ResVT = N->getValueType(0);
// uzp1(x, undef) -> concat(truncate(x), undef)
if (Op1.getOpcode() == ISD::UNDEF) {
EVT BCVT = MVT::Other, HalfVT = MVT::Other;
switch (ResVT.getSimpleVT().SimpleTy) {
default:
break;
case MVT::v16i8:
BCVT = MVT::v8i16;
HalfVT = MVT::v8i8;
break;
case MVT::v8i16:
BCVT = MVT::v4i32;
HalfVT = MVT::v4i16;
break;
case MVT::v4i32:
BCVT = MVT::v2i64;
HalfVT = MVT::v2i32;
break;
}
if (BCVT != MVT::Other) {
SDValue BC = DAG.getBitcast(BCVT, Op0);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, BC);
return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Trunc,
DAG.getUNDEF(HalfVT));
}
}
// uzp1(unpklo(uzp1(x, y)), z) => uzp1(x, z)
if (Op0.getOpcode() == AArch64ISD::UUNPKLO) {
if (Op0.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
SDValue X = Op0.getOperand(0).getOperand(0);
return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, X, Op1);
}
}
// uzp1(x, unpkhi(uzp1(y, z))) => uzp1(x, z)
if (Op1.getOpcode() == AArch64ISD::UUNPKHI) {
if (Op1.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
SDValue Z = Op1.getOperand(0).getOperand(1);
return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, Op0, Z);
}
}
return SDValue();
}
static SDValue performGLD1Combine(SDNode *N, SelectionDAG &DAG) {
unsigned Opc = N->getOpcode();
assert(((Opc >= AArch64ISD::GLD1_MERGE_ZERO && // unsigned gather loads
Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) ||
(Opc >= AArch64ISD::GLD1S_MERGE_ZERO && // signed gather loads
Opc <= AArch64ISD::GLD1S_IMM_MERGE_ZERO)) &&
"Invalid opcode.");
const bool Scaled = Opc == AArch64ISD::GLD1_SCALED_MERGE_ZERO ||
Opc == AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
const bool Signed = Opc == AArch64ISD::GLD1S_MERGE_ZERO ||
Opc == AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
const bool Extended = Opc == AArch64ISD::GLD1_SXTW_MERGE_ZERO ||
Opc == AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO ||
Opc == AArch64ISD::GLD1_UXTW_MERGE_ZERO ||
Opc == AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO;
SDLoc DL(N);
SDValue Chain = N->getOperand(0);
SDValue Pg = N->getOperand(1);
SDValue Base = N->getOperand(2);
SDValue Offset = N->getOperand(3);
SDValue Ty = N->getOperand(4);
EVT ResVT = N->getValueType(0);
const auto OffsetOpc = Offset.getOpcode();
const bool OffsetIsZExt =
OffsetOpc == AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU;
const bool OffsetIsSExt =
OffsetOpc == AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU;
// Fold sign/zero extensions of vector offsets into GLD1 nodes where possible.
if (!Extended && (OffsetIsSExt || OffsetIsZExt)) {
SDValue ExtPg = Offset.getOperand(0);
VTSDNode *ExtFrom = cast<VTSDNode>(Offset.getOperand(2).getNode());
EVT ExtFromEVT = ExtFrom->getVT().getVectorElementType();
// If the predicate for the sign- or zero-extended offset is the
// same as the predicate used for this load and the sign-/zero-extension
// was from a 32-bits...
if (ExtPg == Pg && ExtFromEVT == MVT::i32) {
SDValue UnextendedOffset = Offset.getOperand(1);
unsigned NewOpc = getGatherVecOpcode(Scaled, OffsetIsSExt, true);
if (Signed)
NewOpc = getSignExtendedGatherOpcode(NewOpc);
return DAG.getNode(NewOpc, DL, {ResVT, MVT::Other},
{Chain, Pg, Base, UnextendedOffset, Ty});
}
}
return SDValue();
}
/// Optimize a vector shift instruction and its operand if shifted out
/// bits are not used.
static SDValue performVectorShiftCombine(SDNode *N,
const AArch64TargetLowering &TLI,
TargetLowering::DAGCombinerInfo &DCI) {
assert(N->getOpcode() == AArch64ISD::VASHR ||
N->getOpcode() == AArch64ISD::VLSHR);
SDValue Op = N->getOperand(0);
unsigned OpScalarSize = Op.getScalarValueSizeInBits();
unsigned ShiftImm = N->getConstantOperandVal(1);
assert(OpScalarSize > ShiftImm && "Invalid shift imm");
APInt ShiftedOutBits = APInt::getLowBitsSet(OpScalarSize, ShiftImm);
APInt DemandedMask = ~ShiftedOutBits;
if (TLI.SimplifyDemandedBits(Op, DemandedMask, DCI))
return SDValue(N, 0);
return SDValue();
}
static SDValue performSunpkloCombine(SDNode *N, SelectionDAG &DAG) {
// sunpklo(sext(pred)) -> sext(extract_low_half(pred))
// This transform works in partnership with performSetCCPunpkCombine to
// remove unnecessary transfer of predicates into standard registers and back
if (N->getOperand(0).getOpcode() == ISD::SIGN_EXTEND &&
N->getOperand(0)->getOperand(0)->getValueType(0).getScalarType() ==
MVT::i1) {
SDValue CC = N->getOperand(0)->getOperand(0);
auto VT = CC->getValueType(0).getHalfNumVectorElementsVT(*DAG.getContext());
SDValue Unpk = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, CC,
DAG.getVectorIdxConstant(0, SDLoc(N)));
return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), N->getValueType(0), Unpk);
}
return SDValue();
}
/// Target-specific DAG combine function for post-increment LD1 (lane) and
/// post-increment LD1R.
static SDValue performPostLD1Combine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
bool IsLaneOp) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
if (!VT.is128BitVector() && !VT.is64BitVector())
return SDValue();
unsigned LoadIdx = IsLaneOp ? 1 : 0;
SDNode *LD = N->getOperand(LoadIdx).getNode();
// If it is not LOAD, can not do such combine.
if (LD->getOpcode() != ISD::LOAD)
return SDValue();
// The vector lane must be a constant in the LD1LANE opcode.
SDValue Lane;
if (IsLaneOp) {
Lane = N->getOperand(2);
auto *LaneC = dyn_cast<ConstantSDNode>(Lane);
if (!LaneC || LaneC->getZExtValue() >= VT.getVectorNumElements())
return SDValue();
}
LoadSDNode *LoadSDN = cast<LoadSDNode>(LD);
EVT MemVT = LoadSDN->getMemoryVT();
// Check if memory operand is the same type as the vector element.
if (MemVT != VT.getVectorElementType())
return SDValue();
// Check if there are other uses. If so, do not combine as it will introduce
// an extra load.
for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end(); UI != UE;
++UI) {
if (UI.getUse().getResNo() == 1) // Ignore uses of the chain result.
continue;
if (*UI != N)
return SDValue();
}
SDValue Addr = LD->getOperand(1);
SDValue Vector = N->getOperand(0);
// Search for a use of the address operand that is an increment.
for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), UE =
Addr.getNode()->use_end(); UI != UE; ++UI) {
SDNode *User = *UI;
if (User->getOpcode() != ISD::ADD
|| UI.getUse().getResNo() != Addr.getResNo())
continue;
// If the increment is a constant, it must match the memory ref size.
SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
uint32_t IncVal = CInc->getZExtValue();
unsigned NumBytes = VT.getScalarSizeInBits() / 8;
if (IncVal != NumBytes)
continue;
Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
}
// To avoid cycle construction make sure that neither the load nor the add
// are predecessors to each other or the Vector.
SmallPtrSet<const SDNode *, 32> Visited;
SmallVector<const SDNode *, 16> Worklist;
Visited.insert(Addr.getNode());
Worklist.push_back(User);
Worklist.push_back(LD);
Worklist.push_back(Vector.getNode());
if (SDNode::hasPredecessorHelper(LD, Visited, Worklist) ||
SDNode::hasPredecessorHelper(User, Visited, Worklist))
continue;
SmallVector<SDValue, 8> Ops;
Ops.push_back(LD->getOperand(0)); // Chain
if (IsLaneOp) {
Ops.push_back(Vector); // The vector to be inserted
Ops.push_back(Lane); // The lane to be inserted in the vector
}
Ops.push_back(Addr);
Ops.push_back(Inc);
EVT Tys[3] = { VT, MVT::i64, MVT::Other };
SDVTList SDTys = DAG.getVTList(Tys);
unsigned NewOp = IsLaneOp ? AArch64ISD::LD1LANEpost : AArch64ISD::LD1DUPpost;
SDValue UpdN = DAG.getMemIntrinsicNode(NewOp, SDLoc(N), SDTys, Ops,
MemVT,
LoadSDN->getMemOperand());
// Update the uses.
SDValue NewResults[] = {
SDValue(LD, 0), // The result of load
SDValue(UpdN.getNode(), 2) // Chain
};
DCI.CombineTo(LD, NewResults);
DCI.CombineTo(N, SDValue(UpdN.getNode(), 0)); // Dup/Inserted Result
DCI.CombineTo(User, SDValue(UpdN.getNode(), 1)); // Write back register
break;
}
return SDValue();
}
/// Simplify ``Addr`` given that the top byte of it is ignored by HW during
/// address translation.
static bool performTBISimplification(SDValue Addr,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
APInt DemandedMask = APInt::getLowBitsSet(64, 56);
KnownBits Known;
TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
!DCI.isBeforeLegalizeOps());
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.SimplifyDemandedBits(Addr, DemandedMask, Known, TLO)) {
DCI.CommitTargetLoweringOpt(TLO);
return true;
}
return false;
}
static SDValue foldTruncStoreOfExt(SelectionDAG &DAG, SDNode *N) {
assert((N->getOpcode() == ISD::STORE || N->getOpcode() == ISD::MSTORE) &&
"Expected STORE dag node in input!");
if (auto Store = dyn_cast<StoreSDNode>(N)) {
if (!Store->isTruncatingStore() || Store->isIndexed())
return SDValue();
SDValue Ext = Store->getValue();
auto ExtOpCode = Ext.getOpcode();
if (ExtOpCode != ISD::ZERO_EXTEND && ExtOpCode != ISD::SIGN_EXTEND &&
ExtOpCode != ISD::ANY_EXTEND)
return SDValue();
SDValue Orig = Ext->getOperand(0);
if (Store->getMemoryVT() != Orig.getValueType())
return SDValue();
return DAG.getStore(Store->getChain(), SDLoc(Store), Orig,
Store->getBasePtr(), Store->getMemOperand());
}
return SDValue();
}
static SDValue performSTORECombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG,
const AArch64Subtarget *Subtarget) {
StoreSDNode *ST = cast<StoreSDNode>(N);
SDValue Chain = ST->getChain();
SDValue Value = ST->getValue();
SDValue Ptr = ST->getBasePtr();
// If this is an FP_ROUND followed by a store, fold this into a truncating
// store. We can do this even if this is already a truncstore.
// We purposefully don't care about legality of the nodes here as we know
// they can be split down into something legal.
if (DCI.isBeforeLegalizeOps() && Value.getOpcode() == ISD::FP_ROUND &&
Value.getNode()->hasOneUse() && ST->isUnindexed() &&
Subtarget->useSVEForFixedLengthVectors() &&
Value.getValueType().isFixedLengthVector() &&
Value.getValueType().getFixedSizeInBits() >=
Subtarget->getMinSVEVectorSizeInBits())
return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
ST->getMemoryVT(), ST->getMemOperand());
if (SDValue Split = splitStores(N, DCI, DAG, Subtarget))
return Split;
if (Subtarget->supportsAddressTopByteIgnored() &&
performTBISimplification(N->getOperand(2), DCI, DAG))
return SDValue(N, 0);
if (SDValue Store = foldTruncStoreOfExt(DAG, N))
return Store;
return SDValue();
}
static SDValue performMSTORECombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG,
const AArch64Subtarget *Subtarget) {
MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
SDValue Value = MST->getValue();
SDValue Mask = MST->getMask();
SDLoc DL(N);
// If this is a UZP1 followed by a masked store, fold this into a masked
// truncating store. We can do this even if this is already a masked
// truncstore.
if (Value.getOpcode() == AArch64ISD::UZP1 && Value->hasOneUse() &&
MST->isUnindexed() && Mask->getOpcode() == AArch64ISD::PTRUE &&
Value.getValueType().isInteger()) {
Value = Value.getOperand(0);
if (Value.getOpcode() == ISD::BITCAST) {
EVT HalfVT =
Value.getValueType().getHalfNumVectorElementsVT(*DAG.getContext());
EVT InVT = Value.getOperand(0).getValueType();
if (HalfVT.widenIntegerVectorElementType(*DAG.getContext()) == InVT) {
unsigned MinSVESize = Subtarget->getMinSVEVectorSizeInBits();
unsigned PgPattern = Mask->getConstantOperandVal(0);
// Ensure we can double the size of the predicate pattern
unsigned NumElts = getNumElementsFromSVEPredPattern(PgPattern);
if (NumElts && NumElts * InVT.getVectorElementType().getSizeInBits() <=
MinSVESize) {
Mask = getPTrue(DAG, DL, InVT.changeVectorElementType(MVT::i1),
PgPattern);
return DAG.getMaskedStore(MST->getChain(), DL, Value.getOperand(0),
MST->getBasePtr(), MST->getOffset(), Mask,
MST->getMemoryVT(), MST->getMemOperand(),
MST->getAddressingMode(),
/*IsTruncating=*/true);
}
}
}
}
return SDValue();
}
/// \return true if part of the index was folded into the Base.
static bool foldIndexIntoBase(SDValue &BasePtr, SDValue &Index, SDValue Scale,
SDLoc DL, SelectionDAG &DAG) {
// This function assumes a vector of i64 indices.
EVT IndexVT = Index.getValueType();
if (!IndexVT.isVector() || IndexVT.getVectorElementType() != MVT::i64)
return false;
// Simplify:
// BasePtr = Ptr
// Index = X + splat(Offset)
// ->
// BasePtr = Ptr + Offset * scale.
// Index = X
if (Index.getOpcode() == ISD::ADD) {
if (auto Offset = DAG.getSplatValue(Index.getOperand(1))) {
Offset = DAG.getNode(ISD::MUL, DL, MVT::i64, Offset, Scale);
BasePtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, Offset);
Index = Index.getOperand(0);
return true;
}
}
// Simplify:
// BasePtr = Ptr
// Index = (X + splat(Offset)) << splat(Shift)
// ->
// BasePtr = Ptr + (Offset << Shift) * scale)
// Index = X << splat(shift)
if (Index.getOpcode() == ISD::SHL &&
Index.getOperand(0).getOpcode() == ISD::ADD) {
SDValue Add = Index.getOperand(0);
SDValue ShiftOp = Index.getOperand(1);
SDValue OffsetOp = Add.getOperand(1);
if (auto Shift = DAG.getSplatValue(ShiftOp))
if (auto Offset = DAG.getSplatValue(OffsetOp)) {
Offset = DAG.getNode(ISD::SHL, DL, MVT::i64, Offset, Shift);
Offset = DAG.getNode(ISD::MUL, DL, MVT::i64, Offset, Scale);
BasePtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, Offset);
Index = DAG.getNode(ISD::SHL, DL, Index.getValueType(),
Add.getOperand(0), ShiftOp);
return true;
}
}
return false;
}
// Analyse the specified address returning true if a more optimal addressing
// mode is available. When returning true all parameters are updated to reflect
// their recommended values.
static bool findMoreOptimalIndexType(const MaskedGatherScatterSDNode *N,
SDValue &BasePtr, SDValue &Index,
SelectionDAG &DAG) {
// Try to iteratively fold parts of the index into the base pointer to
// simplify the index as much as possible.
bool Changed = false;
while (foldIndexIntoBase(BasePtr, Index, N->getScale(), SDLoc(N), DAG))
Changed = true;
// Only consider element types that are pointer sized as smaller types can
// be easily promoted.
EVT IndexVT = Index.getValueType();
if (IndexVT.getVectorElementType() != MVT::i64 || IndexVT == MVT::nxv2i64)
return Changed;
// Match:
// Index = step(const)
int64_t Stride = 0;
if (Index.getOpcode() == ISD::STEP_VECTOR)
Stride = cast<ConstantSDNode>(Index.getOperand(0))->getSExtValue();
// Match:
// Index = step(const) << shift(const)
else if (Index.getOpcode() == ISD::SHL &&
Index.getOperand(0).getOpcode() == ISD::STEP_VECTOR) {
SDValue RHS = Index.getOperand(1);
if (auto *Shift =
dyn_cast_or_null<ConstantSDNode>(DAG.getSplatValue(RHS))) {
int64_t Step = (int64_t)Index.getOperand(0).getConstantOperandVal(1);
Stride = Step << Shift->getZExtValue();
}
}
// Return early because no supported pattern is found.
if (Stride == 0)
return Changed;
if (Stride < std::numeric_limits<int32_t>::min() ||
Stride > std::numeric_limits<int32_t>::max())
return Changed;
const auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
unsigned MaxVScale =
Subtarget.getMaxSVEVectorSizeInBits() / AArch64::SVEBitsPerBlock;
int64_t LastElementOffset =
IndexVT.getVectorMinNumElements() * Stride * MaxVScale;
if (LastElementOffset < std::numeric_limits<int32_t>::min() ||
LastElementOffset > std::numeric_limits<int32_t>::max())
return Changed;
EVT NewIndexVT = IndexVT.changeVectorElementType(MVT::i32);
// Stride does not scale explicitly by 'Scale', because it happens in
// the gather/scatter addressing mode.
Index = DAG.getNode(ISD::STEP_VECTOR, SDLoc(N), NewIndexVT,
DAG.getTargetConstant(Stride, SDLoc(N), MVT::i32));
return true;
}
static SDValue performMaskedGatherScatterCombine(
SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) {
MaskedGatherScatterSDNode *MGS = cast<MaskedGatherScatterSDNode>(N);
assert(MGS && "Can only combine gather load or scatter store nodes");
if (!DCI.isBeforeLegalize())
return SDValue();
SDLoc DL(MGS);
SDValue Chain = MGS->getChain();
SDValue Scale = MGS->getScale();
SDValue Index = MGS->getIndex();
SDValue Mask = MGS->getMask();
SDValue BasePtr = MGS->getBasePtr();
ISD::MemIndexType IndexType = MGS->getIndexType();
if (!findMoreOptimalIndexType(MGS, BasePtr, Index, DAG))
return SDValue();
// Here we catch such cases early and change MGATHER's IndexType to allow
// the use of an Index that's more legalisation friendly.
if (auto *MGT = dyn_cast<MaskedGatherSDNode>(MGS)) {
SDValue PassThru = MGT->getPassThru();
SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
return DAG.getMaskedGather(
DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
}
auto *MSC = cast<MaskedScatterSDNode>(MGS);
SDValue Data = MSC->getValue();
SDValue Ops[] = {Chain, Data, Mask, BasePtr, Index, Scale};
return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL,
Ops, MSC->getMemOperand(), IndexType,
MSC->isTruncatingStore());
}
/// Target-specific DAG combine function for NEON load/store intrinsics
/// to merge base address updates.
static SDValue performNEONPostLDSTCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
return SDValue();
unsigned AddrOpIdx = N->getNumOperands() - 1;
SDValue Addr = N->getOperand(AddrOpIdx);
// Search for a use of the address operand that is an increment.
for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
SDNode *User = *UI;
if (User->getOpcode() != ISD::ADD ||
UI.getUse().getResNo() != Addr.getResNo())
continue;
// Check that the add is independent of the load/store. Otherwise, folding
// it would create a cycle.
SmallPtrSet<const SDNode *, 32> Visited;
SmallVector<const SDNode *, 16> Worklist;
Visited.insert(Addr.getNode());
Worklist.push_back(N);
Worklist.push_back(User);
if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
SDNode::hasPredecessorHelper(User, Visited, Worklist))
continue;
// Find the new opcode for the updating load/store.
bool IsStore = false;
bool IsLaneOp = false;
bool IsDupOp = false;
unsigned NewOpc = 0;
unsigned NumVecs = 0;
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
switch (IntNo) {
default: llvm_unreachable("unexpected intrinsic for Neon base update");
case Intrinsic::aarch64_neon_ld2: NewOpc = AArch64ISD::LD2post;
NumVecs = 2; break;
case Intrinsic::aarch64_neon_ld3: NewOpc = AArch64ISD::LD3post;
NumVecs = 3; break;
case Intrinsic::aarch64_neon_ld4: NewOpc = AArch64ISD::LD4post;
NumVecs = 4; break;
case Intrinsic::aarch64_neon_st2: NewOpc = AArch64ISD::ST2post;
NumVecs = 2; IsStore = true; break;
case Intrinsic::aarch64_neon_st3: NewOpc = AArch64ISD::ST3post;
NumVecs = 3; IsStore = true; break;
case Intrinsic::aarch64_neon_st4: NewOpc = AArch64ISD::ST4post;
NumVecs = 4; IsStore = true; break;
case Intrinsic::aarch64_neon_ld1x2: NewOpc = AArch64ISD::LD1x2post;
NumVecs = 2; break;
case Intrinsic::aarch64_neon_ld1x3: NewOpc = AArch64ISD::LD1x3post;
NumVecs = 3; break;
case Intrinsic::aarch64_neon_ld1x4: NewOpc = AArch64ISD::LD1x4post;
NumVecs = 4; break;
case Intrinsic::aarch64_neon_st1x2: NewOpc = AArch64ISD::ST1x2post;
NumVecs = 2; IsStore = true; break;
case Intrinsic::aarch64_neon_st1x3: NewOpc = AArch64ISD::ST1x3post;
NumVecs = 3; IsStore = true; break;
case Intrinsic::aarch64_neon_st1x4: NewOpc = AArch64ISD::ST1x4post;
NumVecs = 4; IsStore = true; break;
case Intrinsic::aarch64_neon_ld2r: NewOpc = AArch64ISD::LD2DUPpost;
NumVecs = 2; IsDupOp = true; break;
case Intrinsic::aarch64_neon_ld3r: NewOpc = AArch64ISD::LD3DUPpost;
NumVecs = 3; IsDupOp = true; break;
case Intrinsic::aarch64_neon_ld4r: NewOpc = AArch64ISD::LD4DUPpost;
NumVecs = 4; IsDupOp = true; break;
case Intrinsic::aarch64_neon_ld2lane: NewOpc = AArch64ISD::LD2LANEpost;
NumVecs = 2; IsLaneOp = true; break;
case Intrinsic::aarch64_neon_ld3lane: NewOpc = AArch64ISD::LD3LANEpost;
NumVecs = 3; IsLaneOp = true; break;
case Intrinsic::aarch64_neon_ld4lane: NewOpc = AArch64ISD::LD4LANEpost;
NumVecs = 4; IsLaneOp = true; break;
case Intrinsic::aarch64_neon_st2lane: NewOpc = AArch64ISD::ST2LANEpost;
NumVecs = 2; IsStore = true; IsLaneOp = true; break;
case Intrinsic::aarch64_neon_st3lane: NewOpc = AArch64ISD::ST3LANEpost;
NumVecs = 3; IsStore = true; IsLaneOp = true; break;
case Intrinsic::aarch64_neon_st4lane: NewOpc = AArch64ISD::ST4LANEpost;
NumVecs = 4; IsStore = true; IsLaneOp = true; break;
}
EVT VecTy;
if (IsStore)
VecTy = N->getOperand(2).getValueType();
else
VecTy = N->getValueType(0);
// If the increment is a constant, it must match the memory ref size.
SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
uint32_t IncVal = CInc->getZExtValue();
unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
if (IsLaneOp || IsDupOp)
NumBytes /= VecTy.getVectorNumElements();
if (IncVal != NumBytes)
continue;
Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
}
SmallVector<SDValue, 8> Ops;
Ops.push_back(N->getOperand(0)); // Incoming chain
// Load lane and store have vector list as input.
if (IsLaneOp || IsStore)
for (unsigned i = 2; i < AddrOpIdx; ++i)
Ops.push_back(N->getOperand(i));
Ops.push_back(Addr); // Base register
Ops.push_back(Inc);
// Return Types.
EVT Tys[6];
unsigned NumResultVecs = (IsStore ? 0 : NumVecs);
unsigned n;
for (n = 0; n < NumResultVecs; ++n)
Tys[n] = VecTy;
Tys[n++] = MVT::i64; // Type of write back register
Tys[n] = MVT::Other; // Type of the chain
SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs + 2));
MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, Ops,
MemInt->getMemoryVT(),
MemInt->getMemOperand());
// Update the uses.
std::vector<SDValue> NewResults;
for (unsigned i = 0; i < NumResultVecs; ++i) {
NewResults.push_back(SDValue(UpdN.getNode(), i));
}
NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1));
DCI.CombineTo(N, NewResults);
DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
break;
}
return SDValue();
}
// Checks to see if the value is the prescribed width and returns information
// about its extension mode.
static
bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType) {
ExtType = ISD::NON_EXTLOAD;
switch(V.getNode()->getOpcode()) {
default:
return false;
case ISD::LOAD: {
LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
if ((LoadNode->getMemoryVT() == MVT::i8 && width == 8)
|| (LoadNode->getMemoryVT() == MVT::i16 && width == 16)) {
ExtType = LoadNode->getExtensionType();
return true;
}
return false;
}
case ISD::AssertSext: {
VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
if ((TypeNode->getVT() == MVT::i8 && width == 8)
|| (TypeNode->getVT() == MVT::i16 && width == 16)) {
ExtType = ISD::SEXTLOAD;
return true;
}
return false;
}
case ISD::AssertZext: {
VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
if ((TypeNode->getVT() == MVT::i8 && width == 8)
|| (TypeNode->getVT() == MVT::i16 && width == 16)) {
ExtType = ISD::ZEXTLOAD;
return true;
}
return false;
}
case ISD::Constant:
case ISD::TargetConstant: {
return std::abs(cast<ConstantSDNode>(V.getNode())->getSExtValue()) <
1LL << (width - 1);
}
}
return true;
}
// This function does a whole lot of voodoo to determine if the tests are
// equivalent without and with a mask. Essentially what happens is that given a
// DAG resembling:
//
// +-------------+ +-------------+ +-------------+ +-------------+
// | Input | | AddConstant | | CompConstant| | CC |
// +-------------+ +-------------+ +-------------+ +-------------+
// | | | |
// V V | +----------+
// +-------------+ +----+ | |
// | ADD | |0xff| | |
// +-------------+ +----+ | |
// | | | |
// V V | |
// +-------------+ | |
// | AND | | |
// +-------------+ | |
// | | |
// +-----+ | |
// | | |
// V V V
// +-------------+
// | CMP |
// +-------------+
//
// The AND node may be safely removed for some combinations of inputs. In
// particular we need to take into account the extension type of the Input,
// the exact values of AddConstant, CompConstant, and CC, along with the nominal
// width of the input (this can work for any width inputs, the above graph is
// specific to 8 bits.
//
// The specific equations were worked out by generating output tables for each
// AArch64CC value in terms of and AddConstant (w1), CompConstant(w2). The
// problem was simplified by working with 4 bit inputs, which means we only
// needed to reason about 24 distinct bit patterns: 8 patterns unique to zero
// extension (8,15), 8 patterns unique to sign extensions (-8,-1), and 8
// patterns present in both extensions (0,7). For every distinct set of
// AddConstant and CompConstants bit patterns we can consider the masked and
// unmasked versions to be equivalent if the result of this function is true for
// all 16 distinct bit patterns of for the current extension type of Input (w0).
//
// sub w8, w0, w1
// and w10, w8, #0x0f
// cmp w8, w2
// cset w9, AArch64CC
// cmp w10, w2
// cset w11, AArch64CC
// cmp w9, w11
// cset w0, eq
// ret
//
// Since the above function shows when the outputs are equivalent it defines
// when it is safe to remove the AND. Unfortunately it only runs on AArch64 and
// would be expensive to run during compiles. The equations below were written
// in a test harness that confirmed they gave equivalent outputs to the above
// for all inputs function, so they can be used determine if the removal is
// legal instead.
//
// isEquivalentMaskless() is the code for testing if the AND can be removed
// factored out of the DAG recognition as the DAG can take several forms.
static bool isEquivalentMaskless(unsigned CC, unsigned width,
ISD::LoadExtType ExtType, int AddConstant,
int CompConstant) {
// By being careful about our equations and only writing the in term
// symbolic values and well known constants (0, 1, -1, MaxUInt) we can
// make them generally applicable to all bit widths.
int MaxUInt = (1 << width);
// For the purposes of these comparisons sign extending the type is
// equivalent to zero extending the add and displacing it by half the integer
// width. Provided we are careful and make sure our equations are valid over
// the whole range we can just adjust the input and avoid writing equations
// for sign extended inputs.
if (ExtType == ISD::SEXTLOAD)
AddConstant -= (1 << (width-1));
switch(CC) {
case AArch64CC::LE:
case AArch64CC::GT:
if ((AddConstant == 0) ||
(CompConstant == MaxUInt - 1 && AddConstant < 0) ||
(AddConstant >= 0 && CompConstant < 0) ||
(AddConstant <= 0 && CompConstant <= 0 && CompConstant < AddConstant))
return true;
break;
case AArch64CC::LT:
case AArch64CC::GE:
if ((AddConstant == 0) ||
(AddConstant >= 0 && CompConstant <= 0) ||
(AddConstant <= 0 && CompConstant <= 0 && CompConstant <= AddConstant))
return true;
break;
case AArch64CC::HI:
case AArch64CC::LS:
if ((AddConstant >= 0 && CompConstant < 0) ||
(AddConstant <= 0 && CompConstant >= -1 &&
CompConstant < AddConstant + MaxUInt))
return true;
break;
case AArch64CC::PL:
case AArch64CC::MI:
if ((AddConstant == 0) ||
(AddConstant > 0 && CompConstant <= 0) ||
(AddConstant < 0 && CompConstant <= AddConstant))
return true;
break;
case AArch64CC::LO:
case AArch64CC::HS:
if ((AddConstant >= 0 && CompConstant <= 0) ||
(AddConstant <= 0 && CompConstant >= 0 &&
CompConstant <= AddConstant + MaxUInt))
return true;
break;
case AArch64CC::EQ:
case AArch64CC::NE:
if ((AddConstant > 0 && CompConstant < 0) ||
(AddConstant < 0 && CompConstant >= 0 &&
CompConstant < AddConstant + MaxUInt) ||
(AddConstant >= 0 && CompConstant >= 0 &&
CompConstant >= AddConstant) ||
(AddConstant <= 0 && CompConstant < 0 && CompConstant < AddConstant))
return true;
break;
case AArch64CC::VS:
case AArch64CC::VC:
case AArch64CC::AL:
case AArch64CC::NV:
return true;
case AArch64CC::Invalid:
break;
}
return false;
}
static
SDValue performCONDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG, unsigned CCIndex,
unsigned CmpIndex) {
unsigned CC = cast<ConstantSDNode>(N->getOperand(CCIndex))->getSExtValue();
SDNode *SubsNode = N->getOperand(CmpIndex).getNode();
unsigned CondOpcode = SubsNode->getOpcode();
if (CondOpcode != AArch64ISD::SUBS)
return SDValue();
// There is a SUBS feeding this condition. Is it fed by a mask we can
// use?
SDNode *AndNode = SubsNode->getOperand(0).getNode();
unsigned MaskBits = 0;
if (AndNode->getOpcode() != ISD::AND)
return SDValue();
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndNode->getOperand(1))) {
uint32_t CNV = CN->getZExtValue();
if (CNV == 255)
MaskBits = 8;
else if (CNV == 65535)
MaskBits = 16;
}
if (!MaskBits)
return SDValue();
SDValue AddValue = AndNode->getOperand(0);
if (AddValue.getOpcode() != ISD::ADD)
return SDValue();
// The basic dag structure is correct, grab the inputs and validate them.
SDValue AddInputValue1 = AddValue.getNode()->getOperand(0);
SDValue AddInputValue2 = AddValue.getNode()->getOperand(1);
SDValue SubsInputValue = SubsNode->getOperand(1);
// The mask is present and the provenance of all the values is a smaller type,
// lets see if the mask is superfluous.
if (!isa<ConstantSDNode>(AddInputValue2.getNode()) ||
!isa<ConstantSDNode>(SubsInputValue.getNode()))
return SDValue();
ISD::LoadExtType ExtType;
if (!checkValueWidth(SubsInputValue, MaskBits, ExtType) ||
!checkValueWidth(AddInputValue2, MaskBits, ExtType) ||
!checkValueWidth(AddInputValue1, MaskBits, ExtType) )
return SDValue();
if(!isEquivalentMaskless(CC, MaskBits, ExtType,
cast<ConstantSDNode>(AddInputValue2.getNode())->getSExtValue(),
cast<ConstantSDNode>(SubsInputValue.getNode())->getSExtValue()))
return SDValue();
// The AND is not necessary, remove it.
SDVTList VTs = DAG.getVTList(SubsNode->getValueType(0),
SubsNode->getValueType(1));
SDValue Ops[] = { AddValue, SubsNode->getOperand(1) };
SDValue NewValue = DAG.getNode(CondOpcode, SDLoc(SubsNode), VTs, Ops);
DAG.ReplaceAllUsesWith(SubsNode, NewValue.getNode());
return SDValue(N, 0);
}
// Optimize compare with zero and branch.
static SDValue performBRCONDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
MachineFunction &MF = DAG.getMachineFunction();
// Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
// will not be produced, as they are conditional branch instructions that do
// not set flags.
if (MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
return SDValue();
if (SDValue NV = performCONDCombine(N, DCI, DAG, 2, 3))
N = NV.getNode();
SDValue Chain = N->getOperand(0);
SDValue Dest = N->getOperand(1);
SDValue CCVal = N->getOperand(2);
SDValue Cmp = N->getOperand(3);
assert(isa<ConstantSDNode>(CCVal) && "Expected a ConstantSDNode here!");
unsigned CC = cast<ConstantSDNode>(CCVal)->getZExtValue();
if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
return SDValue();
unsigned CmpOpc = Cmp.getOpcode();
if (CmpOpc != AArch64ISD::ADDS && CmpOpc != AArch64ISD::SUBS)
return SDValue();
// Only attempt folding if there is only one use of the flag and no use of the
// value.
if (!Cmp->hasNUsesOfValue(0, 0) || !Cmp->hasNUsesOfValue(1, 1))
return SDValue();
SDValue LHS = Cmp.getOperand(0);
SDValue RHS = Cmp.getOperand(1);
assert(LHS.getValueType() == RHS.getValueType() &&
"Expected the value type to be the same for both operands!");
if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
return SDValue();
if (isNullConstant(LHS))
std::swap(LHS, RHS);
if (!isNullConstant(RHS))
return SDValue();
if (LHS.getOpcode() == ISD::SHL || LHS.getOpcode() == ISD::SRA ||
LHS.getOpcode() == ISD::SRL)
return SDValue();
// Fold the compare into the branch instruction.
SDValue BR;
if (CC == AArch64CC::EQ)
BR = DAG.getNode(AArch64ISD::CBZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
else
BR = DAG.getNode(AArch64ISD::CBNZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
// Do not add new nodes to DAG combiner worklist.
DCI.CombineTo(N, BR, false);
return SDValue();
}
static SDValue foldCSELofCTTZ(SDNode *N, SelectionDAG &DAG) {
unsigned CC = N->getConstantOperandVal(2);
SDValue SUBS = N->getOperand(3);
SDValue Zero, CTTZ;
if (CC == AArch64CC::EQ && SUBS.getOpcode() == AArch64ISD::SUBS) {
Zero = N->getOperand(0);
CTTZ = N->getOperand(1);
} else if (CC == AArch64CC::NE && SUBS.getOpcode() == AArch64ISD::SUBS) {
Zero = N->getOperand(1);
CTTZ = N->getOperand(0);
} else
return SDValue();
if ((CTTZ.getOpcode() != ISD::CTTZ && CTTZ.getOpcode() != ISD::TRUNCATE) ||
(CTTZ.getOpcode() == ISD::TRUNCATE &&
CTTZ.getOperand(0).getOpcode() != ISD::CTTZ))
return SDValue();
assert((CTTZ.getValueType() == MVT::i32 || CTTZ.getValueType() == MVT::i64) &&
"Illegal type in CTTZ folding");
if (!isNullConstant(Zero) || !isNullConstant(SUBS.getOperand(1)))
return SDValue();
SDValue X = CTTZ.getOpcode() == ISD::TRUNCATE
? CTTZ.getOperand(0).getOperand(0)
: CTTZ.getOperand(0);
if (X != SUBS.getOperand(0))
return SDValue();
unsigned BitWidth = CTTZ.getOpcode() == ISD::TRUNCATE
? CTTZ.getOperand(0).getValueSizeInBits()
: CTTZ.getValueSizeInBits();
SDValue BitWidthMinusOne =
DAG.getConstant(BitWidth - 1, SDLoc(N), CTTZ.getValueType());
return DAG.getNode(ISD::AND, SDLoc(N), CTTZ.getValueType(), CTTZ,
BitWidthMinusOne);
}
// Optimize CSEL instructions
static SDValue performCSELCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
// CSEL x, x, cc -> x
if (N->getOperand(0) == N->getOperand(1))
return N->getOperand(0);
// CSEL 0, cttz(X), eq(X, 0) -> AND cttz bitwidth-1
// CSEL cttz(X), 0, ne(X, 0) -> AND cttz bitwidth-1
if (SDValue Folded = foldCSELofCTTZ(N, DAG))
return Folded;
return performCONDCombine(N, DCI, DAG, 2, 3);
}
// Try to re-use an already extended operand of a vector SetCC feeding a
// extended select. Doing so avoids requiring another full extension of the
// SET_CC result when lowering the select.
static SDValue tryToWidenSetCCOperands(SDNode *Op, SelectionDAG &DAG) {
EVT Op0MVT = Op->getOperand(0).getValueType();
if (!Op0MVT.isVector() || Op->use_empty())
return SDValue();
// Make sure that all uses of Op are VSELECTs with result matching types where
// the result type has a larger element type than the SetCC operand.
SDNode *FirstUse = *Op->use_begin();
if (FirstUse->getOpcode() != ISD::VSELECT)
return SDValue();
EVT UseMVT = FirstUse->getValueType(0);
if (UseMVT.getScalarSizeInBits() <= Op0MVT.getScalarSizeInBits())
return SDValue();
if (any_of(Op->uses(), [&UseMVT](const SDNode *N) {
return N->getOpcode() != ISD::VSELECT || N->getValueType(0) != UseMVT;
}))
return SDValue();
APInt V;
if (!ISD::isConstantSplatVector(Op->getOperand(1).getNode(), V))
return SDValue();
SDLoc DL(Op);
SDValue Op0ExtV;
SDValue Op1ExtV;
ISD::CondCode CC = cast<CondCodeSDNode>(Op->getOperand(2))->get();
// Check if the first operand of the SET_CC is already extended. If it is,
// split the SET_CC and re-use the extended version of the operand.
SDNode *Op0SExt = DAG.getNodeIfExists(ISD::SIGN_EXTEND, DAG.getVTList(UseMVT),
Op->getOperand(0));
SDNode *Op0ZExt = DAG.getNodeIfExists(ISD::ZERO_EXTEND, DAG.getVTList(UseMVT),
Op->getOperand(0));
if (Op0SExt && (isSignedIntSetCC(CC) || isIntEqualitySetCC(CC))) {
Op0ExtV = SDValue(Op0SExt, 0);
Op1ExtV = DAG.getNode(ISD::SIGN_EXTEND, DL, UseMVT, Op->getOperand(1));
} else if (Op0ZExt && (isUnsignedIntSetCC(CC) || isIntEqualitySetCC(CC))) {
Op0ExtV = SDValue(Op0ZExt, 0);
Op1ExtV = DAG.getNode(ISD::ZERO_EXTEND, DL, UseMVT, Op->getOperand(1));
} else
return SDValue();
return DAG.getNode(ISD::SETCC, DL, UseMVT.changeVectorElementType(MVT::i1),
Op0ExtV, Op1ExtV, Op->getOperand(2));
}
static SDValue performSETCCCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
assert(N->getOpcode() == ISD::SETCC && "Unexpected opcode!");
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
SDLoc DL(N);
EVT VT = N->getValueType(0);
if (SDValue V = tryToWidenSetCCOperands(N, DAG))
return V;
// setcc (csel 0, 1, cond, X), 1, ne ==> csel 0, 1, !cond, X
if (Cond == ISD::SETNE && isOneConstant(RHS) &&
LHS->getOpcode() == AArch64ISD::CSEL &&
isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) &&
LHS->hasOneUse()) {
// Invert CSEL's condition.
auto *OpCC = cast<ConstantSDNode>(LHS.getOperand(2));
auto OldCond = static_cast<AArch64CC::CondCode>(OpCC->getZExtValue());
auto NewCond = getInvertedCondCode(OldCond);
// csel 0, 1, !cond, X
SDValue CSEL =
DAG.getNode(AArch64ISD::CSEL, DL, LHS.getValueType(), LHS.getOperand(0),
LHS.getOperand(1), DAG.getConstant(NewCond, DL, MVT::i32),
LHS.getOperand(3));
return DAG.getZExtOrTrunc(CSEL, DL, VT);
}
// setcc (srl x, imm), 0, ne ==> setcc (and x, (-1 << imm)), 0, ne
if (Cond == ISD::SETNE && isNullConstant(RHS) &&
LHS->getOpcode() == ISD::SRL && isa<ConstantSDNode>(LHS->getOperand(1)) &&
LHS->hasOneUse()) {
EVT TstVT = LHS->getValueType(0);
if (TstVT.isScalarInteger() && TstVT.getFixedSizeInBits() <= 64) {
// this pattern will get better opt in emitComparison
uint64_t TstImm = -1ULL << LHS->getConstantOperandVal(1);
SDValue TST = DAG.getNode(ISD::AND, DL, TstVT, LHS->getOperand(0),
DAG.getConstant(TstImm, DL, TstVT));
return DAG.getNode(ISD::SETCC, DL, VT, TST, RHS, N->getOperand(2));
}
}
// setcc (iN (bitcast (vNi1 X))), 0, (eq|ne)
// ==> setcc (iN (zext (i1 (vecreduce_or (vNi1 X))))), 0, (eq|ne)
if (DCI.isBeforeLegalize() && VT.isScalarInteger() &&
(Cond == ISD::SETEQ || Cond == ISD::SETNE) && isNullConstant(RHS) &&
LHS->getOpcode() == ISD::BITCAST) {
EVT ToVT = LHS->getValueType(0);
EVT FromVT = LHS->getOperand(0).getValueType();
if (FromVT.isFixedLengthVector() &&
FromVT.getVectorElementType() == MVT::i1) {
LHS = DAG.getNode(ISD::VECREDUCE_OR, DL, MVT::i1, LHS->getOperand(0));
LHS = DAG.getNode(ISD::ZERO_EXTEND, DL, ToVT, LHS);
return DAG.getSetCC(DL, VT, LHS, RHS, Cond);
}
}
return SDValue();
}
// Replace a flag-setting operator (eg ANDS) with the generic version
// (eg AND) if the flag is unused.
static SDValue performFlagSettingCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
unsigned GenericOpcode) {
SDLoc DL(N);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
EVT VT = N->getValueType(0);
// If the flag result isn't used, convert back to a generic opcode.
if (!N->hasAnyUseOfValue(1)) {
SDValue Res = DCI.DAG.getNode(GenericOpcode, DL, VT, N->ops());
return DCI.DAG.getMergeValues({Res, DCI.DAG.getConstant(0, DL, MVT::i32)},
DL);
}
// Combine identical generic nodes into this node, re-using the result.
if (SDNode *Generic = DCI.DAG.getNodeIfExists(
GenericOpcode, DCI.DAG.getVTList(VT), {LHS, RHS}))
DCI.CombineTo(Generic, SDValue(N, 0));
return SDValue();
}
static SDValue performSetCCPunpkCombine(SDNode *N, SelectionDAG &DAG) {
// setcc_merge_zero pred
// (sign_extend (extract_subvector (setcc_merge_zero ... pred ...))), 0, ne
// => extract_subvector (inner setcc_merge_zero)
SDValue Pred = N->getOperand(0);
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(3))->get();
if (Cond != ISD::SETNE || !isZerosVector(RHS.getNode()) ||
LHS->getOpcode() != ISD::SIGN_EXTEND)
return SDValue();
SDValue Extract = LHS->getOperand(0);
if (Extract->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
Extract->getValueType(0) != N->getValueType(0) ||
Extract->getConstantOperandVal(1) != 0)
return SDValue();
SDValue InnerSetCC = Extract->getOperand(0);
if (InnerSetCC->getOpcode() != AArch64ISD::SETCC_MERGE_ZERO)
return SDValue();
// By this point we've effectively got
// zero_inactive_lanes_and_trunc_i1(sext_i1(A)). If we can prove A's inactive
// lanes are already zero then the trunc(sext()) sequence is redundant and we
// can operate on A directly.
SDValue InnerPred = InnerSetCC.getOperand(0);
if (Pred.getOpcode() == AArch64ISD::PTRUE &&
InnerPred.getOpcode() == AArch64ISD::PTRUE &&
Pred.getConstantOperandVal(0) == InnerPred.getConstantOperandVal(0) &&
Pred->getConstantOperandVal(0) >= AArch64SVEPredPattern::vl1 &&
Pred->getConstantOperandVal(0) <= AArch64SVEPredPattern::vl256)
return Extract;
return SDValue();
}
static SDValue
performSetccMergeZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
assert(N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&
"Unexpected opcode!");
SelectionDAG &DAG = DCI.DAG;
SDValue Pred = N->getOperand(0);
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(3))->get();
if (SDValue V = performSetCCPunpkCombine(N, DAG))
return V;
if (Cond == ISD::SETNE && isZerosVector(RHS.getNode()) &&
LHS->getOpcode() == ISD::SIGN_EXTEND &&
LHS->getOperand(0)->getValueType(0) == N->getValueType(0)) {
// setcc_merge_zero(
// pred, extend(setcc_merge_zero(pred, ...)), != splat(0))
// => setcc_merge_zero(pred, ...)
if (LHS->getOperand(0)->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&
LHS->getOperand(0)->getOperand(0) == Pred)
return LHS->getOperand(0);
// setcc_merge_zero(
// all_active, extend(nxvNi1 ...), != splat(0))
// -> nxvNi1 ...
if (isAllActivePredicate(DAG, Pred))
return LHS->getOperand(0);
// setcc_merge_zero(
// pred, extend(nxvNi1 ...), != splat(0))
// -> nxvNi1 and(pred, ...)
if (DCI.isAfterLegalizeDAG())
// Do this after legalization to allow more folds on setcc_merge_zero
// to be recognized.
return DAG.getNode(ISD::AND, SDLoc(N), N->getValueType(0),
LHS->getOperand(0), Pred);
}
return SDValue();
}
// Optimize some simple tbz/tbnz cases. Returns the new operand and bit to test
// as well as whether the test should be inverted. This code is required to
// catch these cases (as opposed to standard dag combines) because
// AArch64ISD::TBZ is matched during legalization.
static SDValue getTestBitOperand(SDValue Op, unsigned &Bit, bool &Invert,
SelectionDAG &DAG) {
if (!Op->hasOneUse())
return Op;
// We don't handle undef/constant-fold cases below, as they should have
// already been taken care of (e.g. and of 0, test of undefined shifted bits,
// etc.)
// (tbz (trunc x), b) -> (tbz x, b)
// This case is just here to enable more of the below cases to be caught.
if (Op->getOpcode() == ISD::TRUNCATE &&
Bit < Op->getValueType(0).getSizeInBits()) {
return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
}
// (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
if (Op->getOpcode() == ISD::ANY_EXTEND &&
Bit < Op->getOperand(0).getValueSizeInBits()) {
return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
}
if (Op->getNumOperands() != 2)
return Op;
auto *C = dyn_cast<ConstantSDNode>(Op->getOperand(1));
if (!C)
return Op;
switch (Op->getOpcode()) {
default:
return Op;
// (tbz (and x, m), b) -> (tbz x, b)
case ISD::AND:
if ((C->getZExtValue() >> Bit) & 1)
return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
return Op;
// (tbz (shl x, c), b) -> (tbz x, b-c)
case ISD::SHL:
if (C->getZExtValue() <= Bit &&
(Bit - C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
Bit = Bit - C->getZExtValue();
return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
}
return Op;
// (tbz (sra x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits in x
case ISD::SRA:
Bit = Bit + C->getZExtValue();
if (Bit >= Op->getValueType(0).getSizeInBits())
Bit = Op->getValueType(0).getSizeInBits() - 1;
return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
// (tbz (srl x, c), b) -> (tbz x, b+c)
case ISD::SRL:
if ((Bit + C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
Bit = Bit + C->getZExtValue();
return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
}
return Op;
// (tbz (xor x, -1), b) -> (tbnz x, b)
case ISD::XOR:
if ((C->getZExtValue() >> Bit) & 1)
Invert = !Invert;
return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
}
}
// Optimize test single bit zero/non-zero and branch.
static SDValue performTBZCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
unsigned Bit = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
bool Invert = false;
SDValue TestSrc = N->getOperand(1);
SDValue NewTestSrc = getTestBitOperand(TestSrc, Bit, Invert, DAG);
if (TestSrc == NewTestSrc)
return SDValue();
unsigned NewOpc = N->getOpcode();
if (Invert) {
if (NewOpc == AArch64ISD::TBZ)
NewOpc = AArch64ISD::TBNZ;
else {
assert(NewOpc == AArch64ISD::TBNZ);
NewOpc = AArch64ISD::TBZ;
}
}
SDLoc DL(N);
return DAG.getNode(NewOpc, DL, MVT::Other, N->getOperand(0), NewTestSrc,
DAG.getConstant(Bit, DL, MVT::i64), N->getOperand(3));
}
// Swap vselect operands where it may allow a predicated operation to achieve
// the `sel`.
//
// (vselect (setcc ( condcode) (_) (_)) (a) (op (a) (b)))
// => (vselect (setcc (!condcode) (_) (_)) (op (a) (b)) (a))
static SDValue trySwapVSelectOperands(SDNode *N, SelectionDAG &DAG) {
auto SelectA = N->getOperand(1);
auto SelectB = N->getOperand(2);
auto NTy = N->getValueType(0);
if (!NTy.isScalableVector())
return SDValue();
SDValue SetCC = N->getOperand(0);
if (SetCC.getOpcode() != ISD::SETCC || !SetCC.hasOneUse())
return SDValue();
switch (SelectB.getOpcode()) {
default:
return SDValue();
case ISD::FMUL:
case ISD::FSUB:
case ISD::FADD:
break;
}
if (SelectA != SelectB.getOperand(0))
return SDValue();
ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
ISD::CondCode InverseCC =
ISD::getSetCCInverse(CC, SetCC.getOperand(0).getValueType());
auto InverseSetCC =
DAG.getSetCC(SDLoc(SetCC), SetCC.getValueType(), SetCC.getOperand(0),
SetCC.getOperand(1), InverseCC);
return DAG.getNode(ISD::VSELECT, SDLoc(N), NTy,
{InverseSetCC, SelectB, SelectA});
}
// vselect (v1i1 setcc) ->
// vselect (v1iXX setcc) (XX is the size of the compared operand type)
// FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as
// condition. If it can legalize "VSELECT v1i1" correctly, no need to combine
// such VSELECT.
static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
if (auto SwapResult = trySwapVSelectOperands(N, DAG))
return SwapResult;
SDValue N0 = N->getOperand(0);
EVT CCVT = N0.getValueType();
if (isAllActivePredicate(DAG, N0))
return N->getOperand(1);
if (isAllInactivePredicate(N0))
return N->getOperand(2);
// Check for sign pattern (VSELECT setgt, iN lhs, -1, 1, -1) and transform
// into (OR (ASR lhs, N-1), 1), which requires less instructions for the
// supported types.
SDValue SetCC = N->getOperand(0);
if (SetCC.getOpcode() == ISD::SETCC &&
SetCC.getOperand(2) == DAG.getCondCode(ISD::SETGT)) {
SDValue CmpLHS = SetCC.getOperand(0);
EVT VT = CmpLHS.getValueType();
SDNode *CmpRHS = SetCC.getOperand(1).getNode();
SDNode *SplatLHS = N->getOperand(1).getNode();
SDNode *SplatRHS = N->getOperand(2).getNode();
APInt SplatLHSVal;
if (CmpLHS.getValueType() == N->getOperand(1).getValueType() &&
VT.isSimple() &&
is_contained(
makeArrayRef({MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
MVT::v2i32, MVT::v4i32, MVT::v2i64}),
VT.getSimpleVT().SimpleTy) &&
ISD::isConstantSplatVector(SplatLHS, SplatLHSVal) &&
SplatLHSVal.isOne() && ISD::isConstantSplatVectorAllOnes(CmpRHS) &&
ISD::isConstantSplatVectorAllOnes(SplatRHS)) {
unsigned NumElts = VT.getVectorNumElements();
SmallVector<SDValue, 8> Ops(
NumElts, DAG.getConstant(VT.getScalarSizeInBits() - 1, SDLoc(N),
VT.getScalarType()));
SDValue Val = DAG.getBuildVector(VT, SDLoc(N), Ops);
auto Shift = DAG.getNode(ISD::SRA, SDLoc(N), VT, CmpLHS, Val);
auto Or = DAG.getNode(ISD::OR, SDLoc(N), VT, Shift, N->getOperand(1));
return Or;
}
}
if (N0.getOpcode() != ISD::SETCC ||
CCVT.getVectorElementCount() != ElementCount::getFixed(1) ||
CCVT.getVectorElementType() != MVT::i1)
return SDValue();
EVT ResVT = N->getValueType(0);
EVT CmpVT = N0.getOperand(0).getValueType();
// Only combine when the result type is of the same size as the compared
// operands.
if (ResVT.getSizeInBits() != CmpVT.getSizeInBits())
return SDValue();
SDValue IfTrue = N->getOperand(1);
SDValue IfFalse = N->getOperand(2);
SetCC = DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(),
N0.getOperand(0), N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get());
return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC,
IfTrue, IfFalse);
}
/// A vector select: "(select vL, vR, (setcc LHS, RHS))" is best performed with
/// the compare-mask instructions rather than going via NZCV, even if LHS and
/// RHS are really scalar. This replaces any scalar setcc in the above pattern
/// with a vector one followed by a DUP shuffle on the result.
static SDValue performSelectCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
SDValue N0 = N->getOperand(0);
EVT ResVT = N->getValueType(0);
if (N0.getOpcode() != ISD::SETCC)
return SDValue();
if (ResVT.isScalableVector())
return SDValue();
// Make sure the SETCC result is either i1 (initial DAG), or i32, the lowered
// scalar SetCCResultType. We also don't expect vectors, because we assume
// that selects fed by vector SETCCs are canonicalized to VSELECT.
assert((N0.getValueType() == MVT::i1 || N0.getValueType() == MVT::i32) &&
"Scalar-SETCC feeding SELECT has unexpected result type!");
// If NumMaskElts == 0, the comparison is larger than select result. The
// largest real NEON comparison is 64-bits per lane, which means the result is
// at most 32-bits and an illegal vector. Just bail out for now.
EVT SrcVT = N0.getOperand(0).getValueType();
// Don't try to do this optimization when the setcc itself has i1 operands.
// There are no legal vectors of i1, so this would be pointless.
if (SrcVT == MVT::i1)
return SDValue();
int NumMaskElts = ResVT.getSizeInBits() / SrcVT.getSizeInBits();
if (!ResVT.isVector() || NumMaskElts == 0)
return SDValue();
SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumMaskElts);
EVT CCVT = SrcVT.changeVectorElementTypeToInteger();
// Also bail out if the vector CCVT isn't the same size as ResVT.
// This can happen if the SETCC operand size doesn't divide the ResVT size
// (e.g., f64 vs v3f32).
if (CCVT.getSizeInBits() != ResVT.getSizeInBits())
return SDValue();
// Make sure we didn't create illegal types, if we're not supposed to.
assert(DCI.isBeforeLegalize() ||
DAG.getTargetLoweringInfo().isTypeLegal(SrcVT));
// First perform a vector comparison, where lane 0 is the one we're interested
// in.
SDLoc DL(N0);
SDValue LHS =
DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(0));
SDValue RHS =
DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(1));
SDValue SetCC = DAG.getNode(ISD::SETCC, DL, CCVT, LHS, RHS, N0.getOperand(2));
// Now duplicate the comparison mask we want across all other lanes.
SmallVector<int, 8> DUPMask(CCVT.getVectorNumElements(), 0);
SDValue Mask = DAG.getVectorShuffle(CCVT, DL, SetCC, SetCC, DUPMask);
Mask = DAG.getNode(ISD::BITCAST, DL,
ResVT.changeVectorElementTypeToInteger(), Mask);
return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
}
static SDValue performDUPCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
EVT VT = N->getValueType(0);
// If "v2i32 DUP(x)" and "v4i32 DUP(x)" both exist, use an extract from the
// 128bit vector version.
if (VT.is64BitVector() && DCI.isAfterLegalizeDAG()) {
EVT LVT = VT.getDoubleNumVectorElementsVT(*DCI.DAG.getContext());
if (SDNode *LN = DCI.DAG.getNodeIfExists(
N->getOpcode(), DCI.DAG.getVTList(LVT), {N->getOperand(0)})) {
SDLoc DL(N);
return DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SDValue(LN, 0),
DCI.DAG.getConstant(0, DL, MVT::i64));
}
}
return performPostLD1Combine(N, DCI, false);
}
/// Get rid of unnecessary NVCASTs (that don't change the type).
static SDValue performNVCASTCombine(SDNode *N) {
if (N->getValueType(0) == N->getOperand(0).getValueType())
return N->getOperand(0);
return SDValue();
}
// If all users of the globaladdr are of the form (globaladdr + constant), find
// the smallest constant, fold it into the globaladdr's offset and rewrite the
// globaladdr as (globaladdr + constant) - constant.
static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG,
const AArch64Subtarget *Subtarget,
const TargetMachine &TM) {
auto *GN = cast<GlobalAddressSDNode>(N);
if (Subtarget->ClassifyGlobalReference(GN->getGlobal(), TM) !=
AArch64II::MO_NO_FLAG)
return SDValue();
uint64_t MinOffset = -1ull;
for (SDNode *N : GN->uses()) {
if (N->getOpcode() != ISD::ADD)
return SDValue();
auto *C = dyn_cast<ConstantSDNode>(N->getOperand(0));
if (!C)
C = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!C)
return SDValue();
MinOffset = std::min(MinOffset, C->getZExtValue());
}
uint64_t Offset = MinOffset + GN->getOffset();
// Require that the new offset is larger than the existing one. Otherwise, we
// can end up oscillating between two possible DAGs, for example,
// (add (add globaladdr + 10, -1), 1) and (add globaladdr + 9, 1).
if (Offset <= uint64_t(GN->getOffset()))
return SDValue();
// Check whether folding this offset is legal. It must not go out of bounds of
// the referenced object to avoid violating the code model, and must be
// smaller than 2^20 because this is the largest offset expressible in all
// object formats. (The IMAGE_REL_ARM64_PAGEBASE_REL21 relocation in COFF
// stores an immediate signed 21 bit offset.)
//
// This check also prevents us from folding negative offsets, which will end
// up being treated in the same way as large positive ones. They could also
// cause code model violations, and aren't really common enough to matter.
if (Offset >= (1 << 20))
return SDValue();
const GlobalValue *GV = GN->getGlobal();
Type *T = GV->getValueType();
if (!T->isSized() ||
Offset > GV->getParent()->getDataLayout().getTypeAllocSize(T))
return SDValue();
SDLoc DL(GN);
SDValue Result = DAG.getGlobalAddress(GV, DL, MVT::i64, Offset);
return DAG.getNode(ISD::SUB, DL, MVT::i64, Result,
DAG.getConstant(MinOffset, DL, MVT::i64));
}
// Turns the vector of indices into a vector of byte offstes by scaling Offset
// by (BitWidth / 8).
static SDValue getScaledOffsetForBitWidth(SelectionDAG &DAG, SDValue Offset,
SDLoc DL, unsigned BitWidth) {
assert(Offset.getValueType().isScalableVector() &&
"This method is only for scalable vectors of offsets");
SDValue Shift = DAG.getConstant(Log2_32(BitWidth / 8), DL, MVT::i64);
SDValue SplatShift = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, Shift);
return DAG.getNode(ISD::SHL, DL, MVT::nxv2i64, Offset, SplatShift);
}
/// Check if the value of \p OffsetInBytes can be used as an immediate for
/// the gather load/prefetch and scatter store instructions with vector base and
/// immediate offset addressing mode:
///
/// [<Zn>.[S|D]{, #<imm>}]
///
/// where <imm> = sizeof(<T>) * k, for k = 0, 1, ..., 31.
inline static bool isValidImmForSVEVecImmAddrMode(unsigned OffsetInBytes,
unsigned ScalarSizeInBytes) {
// The immediate is not a multiple of the scalar size.
if (OffsetInBytes % ScalarSizeInBytes)
return false;
// The immediate is out of range.
if (OffsetInBytes / ScalarSizeInBytes > 31)
return false;
return true;
}
/// Check if the value of \p Offset represents a valid immediate for the SVE
/// gather load/prefetch and scatter store instructiona with vector base and
/// immediate offset addressing mode:
///
/// [<Zn>.[S|D]{, #<imm>}]
///
/// where <imm> = sizeof(<T>) * k, for k = 0, 1, ..., 31.
static bool isValidImmForSVEVecImmAddrMode(SDValue Offset,
unsigned ScalarSizeInBytes) {
ConstantSDNode *OffsetConst = dyn_cast<ConstantSDNode>(Offset.getNode());
return OffsetConst && isValidImmForSVEVecImmAddrMode(
OffsetConst->getZExtValue(), ScalarSizeInBytes);
}
static SDValue performScatterStoreCombine(SDNode *N, SelectionDAG &DAG,
unsigned Opcode,
bool OnlyPackedOffsets = true) {
const SDValue Src = N->getOperand(2);
const EVT SrcVT = Src->getValueType(0);
assert(SrcVT.isScalableVector() &&
"Scatter stores are only possible for SVE vectors");
SDLoc DL(N);
MVT SrcElVT = SrcVT.getVectorElementType().getSimpleVT();
// Make sure that source data will fit into an SVE register
if (SrcVT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)
return SDValue();
// For FPs, ACLE only supports _packed_ single and double precision types.
if (SrcElVT.isFloatingPoint())
if ((SrcVT != MVT::nxv4f32) && (SrcVT != MVT::nxv2f64))
return SDValue();
// Depending on the addressing mode, this is either a pointer or a vector of
// pointers (that fits into one register)
SDValue Base = N->getOperand(4);
// Depending on the addressing mode, this is either a single offset or a
// vector of offsets (that fits into one register)
SDValue Offset = N->getOperand(5);
// For "scalar + vector of indices", just scale the indices. This only
// applies to non-temporal scatters because there's no instruction that takes
// indicies.
if (Opcode == AArch64ISD::SSTNT1_INDEX_PRED) {
Offset =
getScaledOffsetForBitWidth(DAG, Offset, DL, SrcElVT.getSizeInBits());
Opcode = AArch64ISD::SSTNT1_PRED;
}
// In the case of non-temporal gather loads there's only one SVE instruction
// per data-size: "scalar + vector", i.e.
// * stnt1{b|h|w|d} { z0.s }, p0/z, [z0.s, x0]
// Since we do have intrinsics that allow the arguments to be in a different
// order, we may need to swap them to match the spec.
if (Opcode == AArch64ISD::SSTNT1_PRED && Offset.getValueType().isVector())
std::swap(Base, Offset);
// SST1_IMM requires that the offset is an immediate that is:
// * a multiple of #SizeInBytes,
// * in the range [0, 31 x #SizeInBytes],
// where #SizeInBytes is the size in bytes of the stored items. For
// immediates outside that range and non-immediate scalar offsets use SST1 or
// SST1_UXTW instead.
if (Opcode == AArch64ISD::SST1_IMM_PRED) {
if (!isValidImmForSVEVecImmAddrMode(Offset,
SrcVT.getScalarSizeInBits() / 8)) {
if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy)
Opcode = AArch64ISD::SST1_UXTW_PRED;
else
Opcode = AArch64ISD::SST1_PRED;
std::swap(Base, Offset);
}
}
auto &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isTypeLegal(Base.getValueType()))
return SDValue();
// Some scatter store variants allow unpacked offsets, but only as nxv2i32
// vectors. These are implicitly sign (sxtw) or zero (zxtw) extend to
// nxv2i64. Legalize accordingly.
if (!OnlyPackedOffsets &&
Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset).getValue(0);
if (!TLI.isTypeLegal(Offset.getValueType()))
return SDValue();
// Source value type that is representable in hardware
EVT HwSrcVt = getSVEContainerType(SrcVT);
// Keep the original type of the input data to store - this is needed to be
// able to select the correct instruction, e.g. ST1B, ST1H, ST1W and ST1D. For
// FP values we want the integer equivalent, so just use HwSrcVt.
SDValue InputVT = DAG.getValueType(SrcVT);
if (SrcVT.isFloatingPoint())
InputVT = DAG.getValueType(HwSrcVt);
SDVTList VTs = DAG.getVTList(MVT::Other);
SDValue SrcNew;
if (Src.getValueType().isFloatingPoint())
SrcNew = DAG.getNode(ISD::BITCAST, DL, HwSrcVt, Src);
else
SrcNew = DAG.getNode(ISD::ANY_EXTEND, DL, HwSrcVt, Src);
SDValue Ops[] = {N->getOperand(0), // Chain
SrcNew,
N->getOperand(3), // Pg
Base,
Offset,
InputVT};
return DAG.getNode(Opcode, DL, VTs, Ops);
}
static SDValue performGatherLoadCombine(SDNode *N, SelectionDAG &DAG,
unsigned Opcode,
bool OnlyPackedOffsets = true) {
const EVT RetVT = N->getValueType(0);
assert(RetVT.isScalableVector() &&
"Gather loads are only possible for SVE vectors");
SDLoc DL(N);
// Make sure that the loaded data will fit into an SVE register
if (RetVT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)
return SDValue();
// Depending on the addressing mode, this is either a pointer or a vector of
// pointers (that fits into one register)
SDValue Base = N->getOperand(3);
// Depending on the addressing mode, this is either a single offset or a
// vector of offsets (that fits into one register)
SDValue Offset = N->getOperand(4);
// For "scalar + vector of indices", just scale the indices. This only
// applies to non-temporal gathers because there's no instruction that takes
// indicies.
if (Opcode == AArch64ISD::GLDNT1_INDEX_MERGE_ZERO) {
Offset = getScaledOffsetForBitWidth(DAG, Offset, DL,
RetVT.getScalarSizeInBits());
Opcode = AArch64ISD::GLDNT1_MERGE_ZERO;
}
// In the case of non-temporal gather loads there's only one SVE instruction
// per data-size: "scalar + vector", i.e.
// * ldnt1{b|h|w|d} { z0.s }, p0/z, [z0.s, x0]
// Since we do have intrinsics that allow the arguments to be in a different
// order, we may need to swap them to match the spec.
if (Opcode == AArch64ISD::GLDNT1_MERGE_ZERO &&
Offset.getValueType().isVector())
std::swap(Base, Offset);
// GLD{FF}1_IMM requires that the offset is an immediate that is:
// * a multiple of #SizeInBytes,
// * in the range [0, 31 x #SizeInBytes],
// where #SizeInBytes is the size in bytes of the loaded items. For
// immediates outside that range and non-immediate scalar offsets use
// GLD1_MERGE_ZERO or GLD1_UXTW_MERGE_ZERO instead.
if (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO ||
Opcode == AArch64ISD::GLDFF1_IMM_MERGE_ZERO) {
if (!isValidImmForSVEVecImmAddrMode(Offset,
RetVT.getScalarSizeInBits() / 8)) {
if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy)
Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO)
? AArch64ISD::GLD1_UXTW_MERGE_ZERO
: AArch64ISD::GLDFF1_UXTW_MERGE_ZERO;
else
Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO)
? AArch64ISD::GLD1_MERGE_ZERO
: AArch64ISD::GLDFF1_MERGE_ZERO;
std::swap(Base, Offset);
}
}
auto &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isTypeLegal(Base.getValueType()))
return SDValue();
// Some gather load variants allow unpacked offsets, but only as nxv2i32
// vectors. These are implicitly sign (sxtw) or zero (zxtw) extend to
// nxv2i64. Legalize accordingly.
if (!OnlyPackedOffsets &&
Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset).getValue(0);
// Return value type that is representable in hardware
EVT HwRetVt = getSVEContainerType(RetVT);
// Keep the original output value type around - this is needed to be able to
// select the correct instruction, e.g. LD1B, LD1H, LD1W and LD1D. For FP
// values we want the integer equivalent, so just use HwRetVT.
SDValue OutVT = DAG.getValueType(RetVT);
if (RetVT.isFloatingPoint())
OutVT = DAG.getValueType(HwRetVt);
SDVTList VTs = DAG.getVTList(HwRetVt, MVT::Other);
SDValue Ops[] = {N->getOperand(0), // Chain
N->getOperand(2), // Pg
Base, Offset, OutVT};
SDValue Load = DAG.getNode(Opcode, DL, VTs, Ops);
SDValue LoadChain = SDValue(Load.getNode(), 1);
if (RetVT.isInteger() && (RetVT != HwRetVt))
Load = DAG.getNode(ISD::TRUNCATE, DL, RetVT, Load.getValue(0));
// If the original return value was FP, bitcast accordingly. Doing it here
// means that we can avoid adding TableGen patterns for FPs.
if (RetVT.isFloatingPoint())
Load = DAG.getNode(ISD::BITCAST, DL, RetVT, Load.getValue(0));
return DAG.getMergeValues({Load, LoadChain}, DL);
}
static SDValue
performSignExtendInRegCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Src = N->getOperand(0);
unsigned Opc = Src->getOpcode();
// Sign extend of an unsigned unpack -> signed unpack
if (Opc == AArch64ISD::UUNPKHI || Opc == AArch64ISD::UUNPKLO) {
unsigned SOpc = Opc == AArch64ISD::UUNPKHI ? AArch64ISD::SUNPKHI
: AArch64ISD::SUNPKLO;
// Push the sign extend to the operand of the unpack
// This is necessary where, for example, the operand of the unpack
// is another unpack:
// 4i32 sign_extend_inreg (4i32 uunpklo(8i16 uunpklo (16i8 opnd)), from 4i8)
// ->
// 4i32 sunpklo (8i16 sign_extend_inreg(8i16 uunpklo (16i8 opnd), from 8i8)
// ->
// 4i32 sunpklo(8i16 sunpklo(16i8 opnd))
SDValue ExtOp = Src->getOperand(0);
auto VT = cast<VTSDNode>(N->getOperand(1))->getVT();
EVT EltTy = VT.getVectorElementType();
(void)EltTy;
assert((EltTy == MVT::i8 || EltTy == MVT::i16 || EltTy == MVT::i32) &&
"Sign extending from an invalid type");
EVT ExtVT = VT.getDoubleNumVectorElementsVT(*DAG.getContext());
SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ExtOp.getValueType(),
ExtOp, DAG.getValueType(ExtVT));
return DAG.getNode(SOpc, DL, N->getValueType(0), Ext);
}
if (DCI.isBeforeLegalizeOps())
return SDValue();
if (!EnableCombineMGatherIntrinsics)
return SDValue();
// SVE load nodes (e.g. AArch64ISD::GLD1) are straightforward candidates
// for DAG Combine with SIGN_EXTEND_INREG. Bail out for all other nodes.
unsigned NewOpc;
unsigned MemVTOpNum = 4;
switch (Opc) {
case AArch64ISD::LD1_MERGE_ZERO:
NewOpc = AArch64ISD::LD1S_MERGE_ZERO;
MemVTOpNum = 3;
break;
case AArch64ISD::LDNF1_MERGE_ZERO:
NewOpc = AArch64ISD::LDNF1S_MERGE_ZERO;
MemVTOpNum = 3;
break;
case AArch64ISD::LDFF1_MERGE_ZERO:
NewOpc = AArch64ISD::LDFF1S_MERGE_ZERO;
MemVTOpNum = 3;
break;
case AArch64ISD::GLD1_MERGE_ZERO:
NewOpc = AArch64ISD::GLD1S_MERGE_ZERO;
break;
case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
NewOpc = AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
break;
case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
NewOpc = AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
break;
case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
NewOpc = AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
break;
case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
NewOpc = AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
break;
case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
NewOpc = AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
break;
case AArch64ISD::GLD1_IMM_MERGE_ZERO:
NewOpc = AArch64ISD::GLD1S_IMM_MERGE_ZERO;
break;
case AArch64ISD::GLDFF1_MERGE_ZERO:
NewOpc = AArch64ISD::GLDFF1S_MERGE_ZERO;
break;
case AArch64ISD::GLDFF1_SCALED_MERGE_ZERO:
NewOpc = AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO;
break;
case AArch64ISD::GLDFF1_SXTW_MERGE_ZERO:
NewOpc = AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO;
break;
case AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO:
NewOpc = AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO;
break;
case AArch64ISD::GLDFF1_UXTW_MERGE_ZERO:
NewOpc = AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO;
break;
case AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO:
NewOpc = AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO;
break;
case AArch64ISD::GLDFF1_IMM_MERGE_ZERO:
NewOpc = AArch64ISD::GLDFF1S_IMM_MERGE_ZERO;
break;
case AArch64ISD::GLDNT1_MERGE_ZERO:
NewOpc = AArch64ISD::GLDNT1S_MERGE_ZERO;
break;
default:
return SDValue();
}
EVT SignExtSrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
EVT SrcMemVT = cast<VTSDNode>(Src->getOperand(MemVTOpNum))->getVT();
if ((SignExtSrcVT != SrcMemVT) || !Src.hasOneUse())
return SDValue();
EVT DstVT = N->getValueType(0);
SDVTList VTs = DAG.getVTList(DstVT, MVT::Other);
SmallVector<SDValue, 5> Ops;
for (unsigned I = 0; I < Src->getNumOperands(); ++I)
Ops.push_back(Src->getOperand(I));
SDValue ExtLoad = DAG.getNode(NewOpc, SDLoc(N), VTs, Ops);
DCI.CombineTo(N, ExtLoad);
DCI.CombineTo(Src.getNode(), ExtLoad, ExtLoad.getValue(1));
// Return N so it doesn't get rechecked
return SDValue(N, 0);
}
/// Legalize the gather prefetch (scalar + vector addressing mode) when the
/// offset vector is an unpacked 32-bit scalable vector. The other cases (Offset
/// != nxv2i32) do not need legalization.
static SDValue legalizeSVEGatherPrefetchOffsVec(SDNode *N, SelectionDAG &DAG) {
const unsigned OffsetPos = 4;
SDValue Offset = N->getOperand(OffsetPos);
// Not an unpacked vector, bail out.
if (Offset.getValueType().getSimpleVT().SimpleTy != MVT::nxv2i32)
return SDValue();
// Extend the unpacked offset vector to 64-bit lanes.
SDLoc DL(N);
Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset);
SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end());
// Replace the offset operand with the 64-bit one.
Ops[OffsetPos] = Offset;
return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
}
/// Combines a node carrying the intrinsic
/// `aarch64_sve_prf<T>_gather_scalar_offset` into a node that uses
/// `aarch64_sve_prfb_gather_uxtw_index` when the scalar offset passed to
/// `aarch64_sve_prf<T>_gather_scalar_offset` is not a valid immediate for the
/// sve gather prefetch instruction with vector plus immediate addressing mode.
static SDValue combineSVEPrefetchVecBaseImmOff(SDNode *N, SelectionDAG &DAG,
unsigned ScalarSizeInBytes) {
const unsigned ImmPos = 4, OffsetPos = 3;
// No need to combine the node if the immediate is valid...
if (isValidImmForSVEVecImmAddrMode(N->getOperand(ImmPos), ScalarSizeInBytes))
return SDValue();
// ...otherwise swap the offset base with the offset...
SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end());
std::swap(Ops[ImmPos], Ops[OffsetPos]);
// ...and remap the intrinsic `aarch64_sve_prf<T>_gather_scalar_offset` to
// `aarch64_sve_prfb_gather_uxtw_index`.
SDLoc DL(N);
Ops[1] = DAG.getConstant(Intrinsic::aarch64_sve_prfb_gather_uxtw_index, DL,
MVT::i64);
return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
}
// Return true if the vector operation can guarantee only the first lane of its
// result contains data, with all bits in other lanes set to zero.
static bool isLanes1toNKnownZero(SDValue Op) {
switch (Op.getOpcode()) {
default:
return false;
case AArch64ISD::ANDV_PRED:
case AArch64ISD::EORV_PRED:
case AArch64ISD::FADDA_PRED:
case AArch64ISD::FADDV_PRED:
case AArch64ISD::FMAXNMV_PRED:
case AArch64ISD::FMAXV_PRED:
case AArch64ISD::FMINNMV_PRED:
case AArch64ISD::FMINV_PRED:
case AArch64ISD::ORV_PRED:
case AArch64ISD::SADDV_PRED:
case AArch64ISD::SMAXV_PRED:
case AArch64ISD::SMINV_PRED:
case AArch64ISD::UADDV_PRED:
case AArch64ISD::UMAXV_PRED:
case AArch64ISD::UMINV_PRED:
return true;
}
}
static SDValue removeRedundantInsertVectorElt(SDNode *N) {
assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT && "Unexpected node!");
SDValue InsertVec = N->getOperand(0);
SDValue InsertElt = N->getOperand(1);
SDValue InsertIdx = N->getOperand(2);
// We only care about inserts into the first element...
if (!isNullConstant(InsertIdx))
return SDValue();
// ...of a zero'd vector...
if (!ISD::isConstantSplatVectorAllZeros(InsertVec.getNode()))
return SDValue();
// ...where the inserted data was previously extracted...
if (InsertElt.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
return SDValue();
SDValue ExtractVec = InsertElt.getOperand(0);
SDValue ExtractIdx = InsertElt.getOperand(1);
// ...from the first element of a vector.
if (!isNullConstant(ExtractIdx))
return SDValue();
// If we get here we are effectively trying to zero lanes 1-N of a vector.
// Ensure there's no type conversion going on.
if (N->getValueType(0) != ExtractVec.getValueType())
return SDValue();
if (!isLanes1toNKnownZero(ExtractVec))
return SDValue();
// The explicit zeroing is redundant.
return ExtractVec;
}
static SDValue
performInsertVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
if (SDValue Res = removeRedundantInsertVectorElt(N))
return Res;
return performPostLD1Combine(N, DCI, true);
}
static SDValue performSVESpliceCombine(SDNode *N, SelectionDAG &DAG) {
EVT Ty = N->getValueType(0);
if (Ty.isInteger())
return SDValue();
EVT IntTy = Ty.changeVectorElementTypeToInteger();
EVT ExtIntTy = getPackedSVEVectorVT(IntTy.getVectorElementCount());
if (ExtIntTy.getVectorElementType().getScalarSizeInBits() <
IntTy.getVectorElementType().getScalarSizeInBits())
return SDValue();
SDLoc DL(N);
SDValue LHS = DAG.getAnyExtOrTrunc(DAG.getBitcast(IntTy, N->getOperand(0)),
DL, ExtIntTy);
SDValue RHS = DAG.getAnyExtOrTrunc(DAG.getBitcast(IntTy, N->getOperand(1)),
DL, ExtIntTy);
SDValue Idx = N->getOperand(2);
SDValue Splice = DAG.getNode(ISD::VECTOR_SPLICE, DL, ExtIntTy, LHS, RHS, Idx);
SDValue Trunc = DAG.getAnyExtOrTrunc(Splice, DL, IntTy);
return DAG.getBitcast(Ty, Trunc);
}
static SDValue performFPExtendCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
// If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::FP_ROUND)
return SDValue();
// fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
// We purposefully don't care about legality of the nodes here as we know
// they can be split down into something legal.
if (DCI.isBeforeLegalizeOps() && ISD::isNormalLoad(N0.getNode()) &&
N0.hasOneUse() && Subtarget->useSVEForFixedLengthVectors() &&
VT.isFixedLengthVector() &&
VT.getFixedSizeInBits() >= Subtarget->getMinSVEVectorSizeInBits()) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
LN0->getChain(), LN0->getBasePtr(),
N0.getValueType(), LN0->getMemOperand());
DCI.CombineTo(N, ExtLoad);
DCI.CombineTo(N0.getNode(),
DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(),
ExtLoad, DAG.getIntPtrConstant(1, SDLoc(N0))),
ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
return SDValue();
}
static SDValue performBSPExpandForSVE(SDNode *N, SelectionDAG &DAG,
const AArch64Subtarget *Subtarget,
bool fixedSVEVectorVT) {
EVT VT = N->getValueType(0);
// Don't expand for SVE2
if (!VT.isScalableVector() || Subtarget->hasSVE2() || Subtarget->hasSME())
return SDValue();
// Don't expand for NEON
if (VT.isFixedLengthVector() && !fixedSVEVectorVT)
return SDValue();
SDLoc DL(N);
SDValue Mask = N->getOperand(0);
SDValue In1 = N->getOperand(1);
SDValue In2 = N->getOperand(2);
SDValue InvMask = DAG.getNOT(DL, Mask, VT);
SDValue Sel = DAG.getNode(ISD::AND, DL, VT, Mask, In1);
SDValue SelInv = DAG.getNode(ISD::AND, DL, VT, InvMask, In2);
return DAG.getNode(ISD::OR, DL, VT, Sel, SelInv);
}
static SDValue performDupLane128Combine(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
SDValue Insert = N->getOperand(0);
if (Insert.getOpcode() != ISD::INSERT_SUBVECTOR)
return SDValue();
if (!Insert.getOperand(0).isUndef())
return SDValue();
uint64_t IdxInsert = Insert.getConstantOperandVal(2);
uint64_t IdxDupLane = N->getConstantOperandVal(1);
if (IdxInsert != IdxDupLane)
return SDValue();
SDValue Bitcast = Insert.getOperand(1);
if (Bitcast.getOpcode() != ISD::BITCAST)
return SDValue();
SDValue Subvec = Bitcast.getOperand(0);
EVT SubvecVT = Subvec.getValueType();
if (!SubvecVT.is128BitVector())
return SDValue();
EVT NewSubvecVT =
getPackedSVEVectorVT(Subvec.getValueType().getVectorElementType());
SDLoc DL(N);
SDValue NewInsert =
DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewSubvecVT,
DAG.getUNDEF(NewSubvecVT), Subvec, Insert->getOperand(2));
SDValue NewDuplane128 = DAG.getNode(AArch64ISD::DUPLANE128, DL, NewSubvecVT,
NewInsert, N->getOperand(1));
return DAG.getNode(ISD::BITCAST, DL, VT, NewDuplane128);
}
SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {
default:
LLVM_DEBUG(dbgs() << "Custom combining: skipping\n");
break;
case ISD::ADD:
case ISD::SUB:
return performAddSubCombine(N, DCI, DAG);
case ISD::BUILD_VECTOR:
return performBuildVectorCombine(N, DCI, DAG);
case AArch64ISD::ANDS:
return performFlagSettingCombine(N, DCI, ISD::AND);
case AArch64ISD::ADC:
if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ true))
return R;
return foldADCToCINC(N, DAG);
case AArch64ISD::SBC:
return foldOverflowCheck(N, DAG, /* IsAdd */ false);
case AArch64ISD::ADCS:
if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ true))
return R;
return performFlagSettingCombine(N, DCI, AArch64ISD::ADC);
case AArch64ISD::SBCS:
if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ false))
return R;
return performFlagSettingCombine(N, DCI, AArch64ISD::SBC);
case ISD::XOR:
return performXorCombine(N, DAG, DCI, Subtarget);
case ISD::MUL:
return performMulCombine(N, DAG, DCI, Subtarget);
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
return performIntToFpCombine(N, DAG, Subtarget);
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
return performFpToIntCombine(N, DAG, DCI, Subtarget);
case ISD::FDIV:
return performFDivCombine(N, DAG, DCI, Subtarget);
case ISD::OR:
return performORCombine(N, DCI, Subtarget);
case ISD::AND:
return performANDCombine(N, DCI);
case ISD::INTRINSIC_WO_CHAIN:
return performIntrinsicCombine(N, DCI, Subtarget);
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::SIGN_EXTEND:
return performExtendCombine(N, DCI, DAG);
case ISD::SIGN_EXTEND_INREG:
return performSignExtendInRegCombine(N, DCI, DAG);
case ISD::CONCAT_VECTORS:
return performConcatVectorsCombine(N, DCI, DAG);
case ISD::EXTRACT_SUBVECTOR:
return performExtractSubvectorCombine(N, DCI, DAG);
case ISD::INSERT_SUBVECTOR:
return performInsertSubvectorCombine(N, DCI, DAG);
case ISD::SELECT:
return performSelectCombine(N, DCI);
case ISD::VSELECT:
return performVSelectCombine(N, DCI.DAG);
case ISD::SETCC:
return performSETCCCombine(N, DCI, DAG);
case ISD::LOAD:
if (performTBISimplification(N->getOperand(1), DCI, DAG))
return SDValue(N, 0);
break;
case ISD::STORE:
return performSTORECombine(N, DCI, DAG, Subtarget);
case ISD::MSTORE:
return performMSTORECombine(N, DCI, DAG, Subtarget);
case ISD::MGATHER:
case ISD::MSCATTER:
return performMaskedGatherScatterCombine(N, DCI, DAG);
case ISD::VECTOR_SPLICE:
return performSVESpliceCombine(N, DAG);
case ISD::FP_EXTEND:
return performFPExtendCombine(N, DAG, DCI, Subtarget);
case AArch64ISD::BRCOND:
return performBRCONDCombine(N, DCI, DAG);
case AArch64ISD::TBNZ:
case AArch64ISD::TBZ:
return performTBZCombine(N, DCI, DAG);
case AArch64ISD::CSEL:
return performCSELCombine(N, DCI, DAG);
case AArch64ISD::DUP:
return performDUPCombine(N, DCI);
case AArch64ISD::DUPLANE128:
return performDupLane128Combine(N, DAG);
case AArch64ISD::NVCAST:
return performNVCASTCombine(N);
case AArch64ISD::SPLICE:
return performSpliceCombine(N, DAG);
case AArch64ISD::UUNPKLO:
case AArch64ISD::UUNPKHI:
return performUnpackCombine(N, DAG, Subtarget);
case AArch64ISD::UZP1:
return performUzpCombine(N, DAG);
case AArch64ISD::SETCC_MERGE_ZERO:
return performSetccMergeZeroCombine(N, DCI);
case AArch64ISD::GLD1_MERGE_ZERO:
case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
case AArch64ISD::GLD1_IMM_MERGE_ZERO:
case AArch64ISD::GLD1S_MERGE_ZERO:
case AArch64ISD::GLD1S_SCALED_MERGE_ZERO:
case AArch64ISD::GLD1S_UXTW_MERGE_ZERO:
case AArch64ISD::GLD1S_SXTW_MERGE_ZERO:
case AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO:
case AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO:
case AArch64ISD::GLD1S_IMM_MERGE_ZERO:
return performGLD1Combine(N, DAG);
case AArch64ISD::VASHR:
case AArch64ISD::VLSHR:
return performVectorShiftCombine(N, *this, DCI);
case AArch64ISD::SUNPKLO:
return performSunpkloCombine(N, DAG);
case AArch64ISD::BSP:
return performBSPExpandForSVE(
N, DAG, Subtarget, useSVEForFixedLengthVectorVT(N->getValueType(0)));
case ISD::INSERT_VECTOR_ELT:
return performInsertVectorEltCombine(N, DCI);
case ISD::EXTRACT_VECTOR_ELT:
return performExtractVectorEltCombine(N, DCI, Subtarget);
case ISD::VECREDUCE_ADD:
return performVecReduceAddCombine(N, DCI.DAG, Subtarget);
case AArch64ISD::UADDV:
return performUADDVCombine(N, DAG);
case AArch64ISD::SMULL:
case AArch64ISD::UMULL:
return tryCombineLongOpWithDup(Intrinsic::not_intrinsic, N, DCI, DAG);
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN:
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
return combineSVEPrefetchVecBaseImmOff(N, DAG, 1 /*=ScalarSizeInBytes*/);
case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
return combineSVEPrefetchVecBaseImmOff(N, DAG, 2 /*=ScalarSizeInBytes*/);
case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
return combineSVEPrefetchVecBaseImmOff(N, DAG, 4 /*=ScalarSizeInBytes*/);
case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
return combineSVEPrefetchVecBaseImmOff(N, DAG, 8 /*=ScalarSizeInBytes*/);
case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
return legalizeSVEGatherPrefetchOffsVec(N, DAG);
case Intrinsic::aarch64_neon_ld2:
case Intrinsic::aarch64_neon_ld3:
case Intrinsic::aarch64_neon_ld4:
case Intrinsic::aarch64_neon_ld1x2:
case Intrinsic::aarch64_neon_ld1x3:
case Intrinsic::aarch64_neon_ld1x4:
case Intrinsic::aarch64_neon_ld2lane:
case Intrinsic::aarch64_neon_ld3lane:
case Intrinsic::aarch64_neon_ld4lane:
case Intrinsic::aarch64_neon_ld2r:
case Intrinsic::aarch64_neon_ld3r:
case Intrinsic::aarch64_neon_ld4r:
case Intrinsic::aarch64_neon_st2:
case Intrinsic::aarch64_neon_st3:
case Intrinsic::aarch64_neon_st4:
case Intrinsic::aarch64_neon_st1x2:
case Intrinsic::aarch64_neon_st1x3:
case Intrinsic::aarch64_neon_st1x4:
case Intrinsic::aarch64_neon_st2lane:
case Intrinsic::aarch64_neon_st3lane:
case Intrinsic::aarch64_neon_st4lane:
return performNEONPostLDSTCombine(N, DCI, DAG);
case Intrinsic::aarch64_sve_ldnt1:
return performLDNT1Combine(N, DAG);
case Intrinsic::aarch64_sve_ld1rq:
return performLD1ReplicateCombine<AArch64ISD::LD1RQ_MERGE_ZERO>(N, DAG);
case Intrinsic::aarch64_sve_ld1ro:
return performLD1ReplicateCombine<AArch64ISD::LD1RO_MERGE_ZERO>(N, DAG);
case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO);
case Intrinsic::aarch64_sve_ldnt1_gather:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO);
case Intrinsic::aarch64_sve_ldnt1_gather_index:
return performGatherLoadCombine(N, DAG,
AArch64ISD::GLDNT1_INDEX_MERGE_ZERO);
case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO);
case Intrinsic::aarch64_sve_ld1:
return performLD1Combine(N, DAG, AArch64ISD::LD1_MERGE_ZERO);
case Intrinsic::aarch64_sve_ldnf1:
return performLD1Combine(N, DAG, AArch64ISD::LDNF1_MERGE_ZERO);
case Intrinsic::aarch64_sve_ldff1:
return performLD1Combine(N, DAG, AArch64ISD::LDFF1_MERGE_ZERO);
case Intrinsic::aarch64_sve_st1:
return performST1Combine(N, DAG);
case Intrinsic::aarch64_sve_stnt1:
return performSTNT1Combine(N, DAG);
case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED);
case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED);
case Intrinsic::aarch64_sve_stnt1_scatter:
return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED);
case Intrinsic::aarch64_sve_stnt1_scatter_index:
return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_INDEX_PRED);
case Intrinsic::aarch64_sve_ld1_gather:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_MERGE_ZERO);
case Intrinsic::aarch64_sve_ld1_gather_index:
return performGatherLoadCombine(N, DAG,
AArch64ISD::GLD1_SCALED_MERGE_ZERO);
case Intrinsic::aarch64_sve_ld1_gather_sxtw:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_SXTW_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_ld1_gather_uxtw:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_UXTW_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
return performGatherLoadCombine(N, DAG,
AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
return performGatherLoadCombine(N, DAG,
AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_IMM_MERGE_ZERO);
case Intrinsic::aarch64_sve_ldff1_gather:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLDFF1_MERGE_ZERO);
case Intrinsic::aarch64_sve_ldff1_gather_index:
return performGatherLoadCombine(N, DAG,
AArch64ISD::GLDFF1_SCALED_MERGE_ZERO);
case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
return performGatherLoadCombine(N, DAG,
AArch64ISD::GLDFF1_SXTW_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
return performGatherLoadCombine(N, DAG,
AArch64ISD::GLDFF1_UXTW_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
return performGatherLoadCombine(N, DAG,
AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
return performGatherLoadCombine(N, DAG,
AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
return performGatherLoadCombine(N, DAG,
AArch64ISD::GLDFF1_IMM_MERGE_ZERO);
case Intrinsic::aarch64_sve_st1_scatter:
return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_PRED);
case Intrinsic::aarch64_sve_st1_scatter_index:
return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_SCALED_PRED);
case Intrinsic::aarch64_sve_st1_scatter_sxtw:
return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_SXTW_PRED,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_st1_scatter_uxtw:
return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_UXTW_PRED,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
return performScatterStoreCombine(N, DAG,
AArch64ISD::SST1_SXTW_SCALED_PRED,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
return performScatterStoreCombine(N, DAG,
AArch64ISD::SST1_UXTW_SCALED_PRED,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_IMM_PRED);
case Intrinsic::aarch64_sve_tuple_get: {
SDLoc DL(N);
SDValue Chain = N->getOperand(0);
SDValue Src1 = N->getOperand(2);
SDValue Idx = N->getOperand(3);
uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
EVT ResVT = N->getValueType(0);
uint64_t NumLanes = ResVT.getVectorElementCount().getKnownMinValue();
SDValue ExtIdx = DAG.getVectorIdxConstant(IdxConst * NumLanes, DL);
SDValue Val =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1, ExtIdx);
return DAG.getMergeValues({Val, Chain}, DL);
}
case Intrinsic::aarch64_sve_tuple_set: {
SDLoc DL(N);
SDValue Chain = N->getOperand(0);
SDValue Tuple = N->getOperand(2);
SDValue Idx = N->getOperand(3);
SDValue Vec = N->getOperand(4);
EVT TupleVT = Tuple.getValueType();
uint64_t TupleLanes = TupleVT.getVectorElementCount().getKnownMinValue();
uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
uint64_t NumLanes =
Vec.getValueType().getVectorElementCount().getKnownMinValue();
if ((TupleLanes % NumLanes) != 0)
report_fatal_error("invalid tuple vector!");
uint64_t NumVecs = TupleLanes / NumLanes;
SmallVector<SDValue, 4> Opnds;
for (unsigned I = 0; I < NumVecs; ++I) {
if (I == IdxConst)
Opnds.push_back(Vec);
else {
SDValue ExtIdx = DAG.getVectorIdxConstant(I * NumLanes, DL);
Opnds.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
Vec.getValueType(), Tuple, ExtIdx));
}
}
SDValue Concat =
DAG.getNode(ISD::CONCAT_VECTORS, DL, Tuple.getValueType(), Opnds);
return DAG.getMergeValues({Concat, Chain}, DL);
}
case Intrinsic::aarch64_sve_tuple_create2:
case Intrinsic::aarch64_sve_tuple_create3:
case Intrinsic::aarch64_sve_tuple_create4: {
SDLoc DL(N);
SDValue Chain = N->getOperand(0);
SmallVector<SDValue, 4> Opnds;
for (unsigned I = 2; I < N->getNumOperands(); ++I)
Opnds.push_back(N->getOperand(I));
EVT VT = Opnds[0].getValueType();
EVT EltVT = VT.getVectorElementType();
EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
VT.getVectorElementCount() *
(N->getNumOperands() - 2));
SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, DestVT, Opnds);
return DAG.getMergeValues({Concat, Chain}, DL);
}
case Intrinsic::aarch64_sve_ld2:
case Intrinsic::aarch64_sve_ld3:
case Intrinsic::aarch64_sve_ld4: {
SDLoc DL(N);
SDValue Chain = N->getOperand(0);
SDValue Mask = N->getOperand(2);
SDValue BasePtr = N->getOperand(3);
SDValue LoadOps[] = {Chain, Mask, BasePtr};
unsigned IntrinsicID =
cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
SDValue Result =
LowerSVEStructLoad(IntrinsicID, LoadOps, N->getValueType(0), DAG, DL);
return DAG.getMergeValues({Result, Chain}, DL);
}
case Intrinsic::aarch64_rndr:
case Intrinsic::aarch64_rndrrs: {
unsigned IntrinsicID =
cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
auto Register =
(IntrinsicID == Intrinsic::aarch64_rndr ? AArch64SysReg::RNDR
: AArch64SysReg::RNDRRS);
SDLoc DL(N);
SDValue A = DAG.getNode(
AArch64ISD::MRS, DL, DAG.getVTList(MVT::i64, MVT::Glue, MVT::Other),
N->getOperand(0), DAG.getConstant(Register, DL, MVT::i64));
SDValue B = DAG.getNode(
AArch64ISD::CSINC, DL, MVT::i32, DAG.getConstant(0, DL, MVT::i32),
DAG.getConstant(0, DL, MVT::i32),
DAG.getConstant(AArch64CC::NE, DL, MVT::i32), A.getValue(1));
return DAG.getMergeValues(
{A, DAG.getZExtOrTrunc(B, DL, MVT::i1), A.getValue(2)}, DL);
}
default:
break;
}
break;
case ISD::GlobalAddress:
return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());
}
return SDValue();
}
// Check if the return value is used as only a return value, as otherwise
// we can't perform a tail-call. In particular, we need to check for
// target ISD nodes that are returns and any other "odd" constructs
// that the generic analysis code won't necessarily catch.
bool AArch64TargetLowering::isUsedByReturnOnly(SDNode *N,
SDValue &Chain) const {
if (N->getNumValues() != 1)
return false;
if (!N->hasNUsesOfValue(1, 0))
return false;
SDValue TCChain = Chain;
SDNode *Copy = *N->use_begin();
if (Copy->getOpcode() == ISD::CopyToReg) {
// If the copy has a glue operand, we conservatively assume it isn't safe to
// perform a tail call.
if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() ==
MVT::Glue)
return false;
TCChain = Copy->getOperand(0);
} else if (Copy->getOpcode() != ISD::FP_EXTEND)
return false;
bool HasRet = false;
for (SDNode *Node : Copy->uses()) {
if (Node->getOpcode() != AArch64ISD::RET_FLAG)
return false;
HasRet = true;
}
if (!HasRet)
return false;
Chain = TCChain;
return true;
}
// Return whether the an instruction can potentially be optimized to a tail
// call. This will cause the optimizers to attempt to move, or duplicate,
// return instructions to help enable tail call optimizations for this
// instruction.
bool AArch64TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
return CI->isTailCall();
}
bool AArch64TargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base,
SDValue &Offset,
ISD::MemIndexedMode &AM,
bool &IsInc,
SelectionDAG &DAG) const {
if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
return false;
Base = Op->getOperand(0);
// All of the indexed addressing mode instructions take a signed
// 9 bit immediate offset.
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
int64_t RHSC = RHS->getSExtValue();
if (Op->getOpcode() == ISD::SUB)
RHSC = -(uint64_t)RHSC;
if (!isInt<9>(RHSC))
return false;
IsInc = (Op->getOpcode() == ISD::ADD);
Offset = Op->getOperand(1);
return true;
}
return false;
}
bool AArch64TargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
SDValue &Offset,
ISD::MemIndexedMode &AM,
SelectionDAG &DAG) const {
EVT VT;
SDValue Ptr;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
VT = LD->getMemoryVT();
Ptr = LD->getBasePtr();
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
VT = ST->getMemoryVT();
Ptr = ST->getBasePtr();
} else
return false;
bool IsInc;
if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, IsInc, DAG))
return false;
AM = IsInc ? ISD::PRE_INC : ISD::PRE_DEC;
return true;
}
bool AArch64TargetLowering::getPostIndexedAddressParts(
SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset,
ISD::MemIndexedMode &AM, SelectionDAG &DAG) const {
EVT VT;
SDValue Ptr;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
VT = LD->getMemoryVT();
Ptr = LD->getBasePtr();
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
VT = ST->getMemoryVT();
Ptr = ST->getBasePtr();
} else
return false;
bool IsInc;
if (!getIndexedAddressParts(Op, Base, Offset, AM, IsInc, DAG))
return false;
// Post-indexing updates the base, so it's not a valid transform
// if that's not the same as the load's pointer.
if (Ptr != Base)
return false;
AM = IsInc ? ISD::POST_INC : ISD::POST_DEC;
return true;
}
void AArch64TargetLowering::ReplaceBITCASTResults(
SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
SDLoc DL(N);
SDValue Op = N->getOperand(0);
EVT VT = N->getValueType(0);
EVT SrcVT = Op.getValueType();
if (VT.isScalableVector() && !isTypeLegal(VT) && isTypeLegal(SrcVT)) {
assert(!VT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
"Expected fp->int bitcast!");
// Bitcasting between unpacked vector types of different element counts is
// not a NOP because the live elements are laid out differently.
// 01234567
// e.g. nxv2i32 = XX??XX??
// nxv4f16 = X?X?X?X?
if (VT.getVectorElementCount() != SrcVT.getVectorElementCount())
return;
SDValue CastResult = getSVESafeBitCast(getSVEContainerType(VT), Op, DAG);
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, CastResult));
return;
}
if (VT != MVT::i16 || (SrcVT != MVT::f16 && SrcVT != MVT::bf16))
return;
Op = SDValue(
DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f32,
DAG.getUNDEF(MVT::i32), Op,
DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
0);
Op = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op);
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Op));
}
static void ReplaceAddWithADDP(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG,
const AArch64Subtarget *Subtarget) {
EVT VT = N->getValueType(0);
if (!VT.is256BitVector() ||
(VT.getScalarType().isFloatingPoint() &&
!N->getFlags().hasAllowReassociation()) ||
(VT.getScalarType() == MVT::f16 && !Subtarget->hasFullFP16()))
return;
SDValue X = N->getOperand(0);
auto *Shuf = dyn_cast<ShuffleVectorSDNode>(N->getOperand(1));
if (!Shuf) {
Shuf = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
X = N->getOperand(1);
if (!Shuf)
return;
}
if (Shuf->getOperand(0) != X || !Shuf->getOperand(1)->isUndef())
return;
// Check the mask is 1,0,3,2,5,4,...
ArrayRef<int> Mask = Shuf->getMask();
for (int I = 0, E = Mask.size(); I < E; I++)
if (Mask[I] != (I % 2 == 0 ? I + 1 : I - 1))
return;
SDLoc DL(N);
auto LoHi = DAG.SplitVector(X, DL);
assert(LoHi.first.getValueType() == LoHi.second.getValueType());
SDValue Addp = DAG.getNode(AArch64ISD::ADDP, N, LoHi.first.getValueType(),
LoHi.first, LoHi.second);
// Shuffle the elements back into order.
SmallVector<int> NMask;
for (unsigned I = 0, E = VT.getVectorNumElements() / 2; I < E; I++) {
NMask.push_back(I);
NMask.push_back(I);
}
Results.push_back(
DAG.getVectorShuffle(VT, DL,
DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Addp,
DAG.getUNDEF(LoHi.first.getValueType())),
DAG.getUNDEF(VT), NMask));
}
static void ReplaceReductionResults(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG, unsigned InterOp,
unsigned AcrossOp) {
EVT LoVT, HiVT;
SDValue Lo, Hi;
SDLoc dl(N);
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
SDValue InterVal = DAG.getNode(InterOp, dl, LoVT, Lo, Hi);
SDValue SplitVal = DAG.getNode(AcrossOp, dl, LoVT, InterVal);
Results.push_back(SplitVal);
}
static std::pair<SDValue, SDValue> splitInt128(SDValue N, SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, N);
SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64,
DAG.getNode(ISD::SRL, DL, MVT::i128, N,
DAG.getConstant(64, DL, MVT::i64)));
return std::make_pair(Lo, Hi);
}
void AArch64TargetLowering::ReplaceExtractSubVectorResults(
SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
SDValue In = N->getOperand(0);
EVT InVT = In.getValueType();
// Common code will handle these just fine.
if (!InVT.isScalableVector() || !InVT.isInteger())
return;
SDLoc DL(N);
EVT VT = N->getValueType(0);
// The following checks bail if this is not a halving operation.
ElementCount ResEC = VT.getVectorElementCount();
if (InVT.getVectorElementCount() != (ResEC * 2))
return;
auto *CIndex = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!CIndex)
return;
unsigned Index = CIndex->getZExtValue();
if ((Index != 0) && (Index != ResEC.getKnownMinValue()))
return;
unsigned Opcode = (Index == 0) ? AArch64ISD::UUNPKLO : AArch64ISD::UUNPKHI;
EVT ExtendedHalfVT = VT.widenIntegerVectorElementType(*DAG.getContext());
SDValue Half = DAG.getNode(Opcode, DL, ExtendedHalfVT, N->getOperand(0));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Half));
}
// Create an even/odd pair of X registers holding integer value V.
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
SDLoc dl(V.getNode());
SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i64);
SDValue VHi = DAG.getAnyExtOrTrunc(
DAG.getNode(ISD::SRL, dl, MVT::i128, V, DAG.getConstant(64, dl, MVT::i64)),
dl, MVT::i64);
if (DAG.getDataLayout().isBigEndian())
std::swap (VLo, VHi);
SDValue RegClass =
DAG.getTargetConstant(AArch64::XSeqPairsClassRegClassID, dl, MVT::i32);
SDValue SubReg0 = DAG.getTargetConstant(AArch64::sube64, dl, MVT::i32);
SDValue SubReg1 = DAG.getTargetConstant(AArch64::subo64, dl, MVT::i32);
const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
return SDValue(
DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
}
static void ReplaceCMP_SWAP_128Results(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG,
const AArch64Subtarget *Subtarget) {
assert(N->getValueType(0) == MVT::i128 &&
"AtomicCmpSwap on types less than 128 should be legal");
MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
if (Subtarget->hasLSE() || Subtarget->outlineAtomics()) {
// LSE has a 128-bit compare and swap (CASP), but i128 is not a legal type,
// so lower it here, wrapped in REG_SEQUENCE and EXTRACT_SUBREG.
SDValue Ops[] = {
createGPRPairNode(DAG, N->getOperand(2)), // Compare value
createGPRPairNode(DAG, N->getOperand(3)), // Store value
N->getOperand(1), // Ptr
N->getOperand(0), // Chain in
};
unsigned Opcode;
switch (MemOp->getMergedOrdering()) {
case AtomicOrdering::Monotonic:
Opcode = AArch64::CASPX;
break;
case AtomicOrdering::Acquire:
Opcode = AArch64::CASPAX;
break;
case AtomicOrdering::Release:
Opcode = AArch64::CASPLX;
break;
case AtomicOrdering::AcquireRelease:
case AtomicOrdering::SequentiallyConsistent:
Opcode = AArch64::CASPALX;
break;
default:
llvm_unreachable("Unexpected ordering!");
}
MachineSDNode *CmpSwap = DAG.getMachineNode(
Opcode, SDLoc(N), DAG.getVTList(MVT::Untyped, MVT::Other), Ops);
DAG.setNodeMemRefs(CmpSwap, {MemOp});
unsigned SubReg1 = AArch64::sube64, SubReg2 = AArch64::subo64;
if (DAG.getDataLayout().isBigEndian())
std::swap(SubReg1, SubReg2);
SDValue Lo = DAG.getTargetExtractSubreg(SubReg1, SDLoc(N), MVT::i64,
SDValue(CmpSwap, 0));
SDValue Hi = DAG.getTargetExtractSubreg(SubReg2, SDLoc(N), MVT::i64,
SDValue(CmpSwap, 0));
Results.push_back(
DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128, Lo, Hi));
Results.push_back(SDValue(CmpSwap, 1)); // Chain out
return;
}
unsigned Opcode;
switch (MemOp->getMergedOrdering()) {
case AtomicOrdering::Monotonic:
Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
break;
case AtomicOrdering::Acquire:
Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
break;
case AtomicOrdering::Release:
Opcode = AArch64::CMP_SWAP_128_RELEASE;
break;
case AtomicOrdering::AcquireRelease:
case AtomicOrdering::SequentiallyConsistent:
Opcode = AArch64::CMP_SWAP_128;
break;
default:
llvm_unreachable("Unexpected ordering!");
}
auto Desired = splitInt128(N->getOperand(2), DAG);
auto New = splitInt128(N->getOperand(3), DAG);
SDValue Ops[] = {N->getOperand(1), Desired.first, Desired.second,
New.first, New.second, N->getOperand(0)};
SDNode *CmpSwap = DAG.getMachineNode(
Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i32, MVT::Other),
Ops);
DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128,
SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
Results.push_back(SDValue(CmpSwap, 3));
}
void AArch64TargetLowering::ReplaceNodeResults(
SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
switch (N->getOpcode()) {
default:
llvm_unreachable("Don't know how to custom expand this");
case ISD::BITCAST:
ReplaceBITCASTResults(N, Results, DAG);
return;
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_SMAX:
case ISD::VECREDUCE_SMIN:
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_UMIN:
Results.push_back(LowerVECREDUCE(SDValue(N, 0), DAG));
return;
case ISD::ADD:
case ISD::FADD:
ReplaceAddWithADDP(N, Results, DAG, Subtarget);
return;
case ISD::CTPOP:
case ISD::PARITY:
if (SDValue Result = LowerCTPOP_PARITY(SDValue(N, 0), DAG))
Results.push_back(Result);
return;
case AArch64ISD::SADDV:
ReplaceReductionResults(N, Results, DAG, ISD::ADD, AArch64ISD::SADDV);
return;
case AArch64ISD::UADDV:
ReplaceReductionResults(N, Results, DAG, ISD::ADD, AArch64ISD::UADDV);
return;
case AArch64ISD::SMINV:
ReplaceReductionResults(N, Results, DAG, ISD::SMIN, AArch64ISD::SMINV);
return;
case AArch64ISD::UMINV:
ReplaceReductionResults(N, Results, DAG, ISD::UMIN, AArch64ISD::UMINV);
return;
case AArch64ISD::SMAXV:
ReplaceReductionResults(N, Results, DAG, ISD::SMAX, AArch64ISD::SMAXV);
return;
case AArch64ISD::UMAXV:
ReplaceReductionResults(N, Results, DAG, ISD::UMAX, AArch64ISD::UMAXV);
return;
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT:
case ISD::STRICT_FP_TO_SINT:
case ISD::STRICT_FP_TO_UINT:
assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion");
// Let normal code take care of it by not adding anything to Results.
return;
case ISD::ATOMIC_CMP_SWAP:
ReplaceCMP_SWAP_128Results(N, Results, DAG, Subtarget);
return;
case ISD::ATOMIC_LOAD:
case ISD::LOAD: {
assert(SDValue(N, 0).getValueType() == MVT::i128 &&
"unexpected load's value type");
MemSDNode *LoadNode = cast<MemSDNode>(N);
if ((!LoadNode->isVolatile() && !LoadNode->isAtomic()) ||
LoadNode->getMemoryVT() != MVT::i128) {
// Non-volatile or atomic loads are optimized later in AArch64's load/store
// optimizer.
return;
}
SDValue Result = DAG.getMemIntrinsicNode(
AArch64ISD::LDP, SDLoc(N),
DAG.getVTList({MVT::i64, MVT::i64, MVT::Other}),
{LoadNode->getChain(), LoadNode->getBasePtr()}, LoadNode->getMemoryVT(),
LoadNode->getMemOperand());
SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128,
Result.getValue(0), Result.getValue(1));
Results.append({Pair, Result.getValue(2) /* Chain */});
return;
}
case ISD::EXTRACT_SUBVECTOR:
ReplaceExtractSubVectorResults(N, Results, DAG);
return;
case ISD::INSERT_SUBVECTOR:
case ISD::CONCAT_VECTORS:
// Custom lowering has been requested for INSERT_SUBVECTOR and
// CONCAT_VECTORS -- but delegate to common code for result type
// legalisation
return;
case ISD::INTRINSIC_WO_CHAIN: {
EVT VT = N->getValueType(0);
assert((VT == MVT::i8 || VT == MVT::i16) &&
"custom lowering for unexpected type");
ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(0));
Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
switch (IntID) {
default:
return;
case Intrinsic::aarch64_sve_clasta_n: {
SDLoc DL(N);
auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
auto V = DAG.getNode(AArch64ISD::CLASTA_N, DL, MVT::i32,
N->getOperand(1), Op2, N->getOperand(3));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
return;
}
case Intrinsic::aarch64_sve_clastb_n: {
SDLoc DL(N);
auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
auto V = DAG.getNode(AArch64ISD::CLASTB_N, DL, MVT::i32,
N->getOperand(1), Op2, N->getOperand(3));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
return;
}
case Intrinsic::aarch64_sve_lasta: {
SDLoc DL(N);
auto V = DAG.getNode(AArch64ISD::LASTA, DL, MVT::i32,
N->getOperand(1), N->getOperand(2));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
return;
}
case Intrinsic::aarch64_sve_lastb: {
SDLoc DL(N);
auto V = DAG.getNode(AArch64ISD::LASTB, DL, MVT::i32,
N->getOperand(1), N->getOperand(2));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
return;
}
}
}
}
}
bool AArch64TargetLowering::useLoadStackGuardNode() const {
if (Subtarget->isTargetAndroid() || Subtarget->isTargetFuchsia())
return TargetLowering::useLoadStackGuardNode();
return true;
}
unsigned AArch64TargetLowering::combineRepeatedFPDivisors() const {
// Combine multiple FDIVs with the same divisor into multiple FMULs by the
// reciprocal if there are three or more FDIVs.
return 3;
}
TargetLoweringBase::LegalizeTypeAction
AArch64TargetLowering::getPreferredVectorAction(MVT VT) const {
// During type legalization, we prefer to widen v1i8, v1i16, v1i32 to v8i8,
// v4i16, v2i32 instead of to promote.
if (VT == MVT::v1i8 || VT == MVT::v1i16 || VT == MVT::v1i32 ||
VT == MVT::v1f32)
return TypeWidenVector;
return TargetLoweringBase::getPreferredVectorAction(VT);
}
// In v8.4a, ldp and stp instructions are guaranteed to be single-copy atomic
// provided the address is 16-byte aligned.
bool AArch64TargetLowering::isOpSuitableForLDPSTP(const Instruction *I) const {
if (!Subtarget->hasLSE2())
return false;
if (auto LI = dyn_cast<LoadInst>(I))
return LI->getType()->getPrimitiveSizeInBits() == 128 &&
LI->getAlign() >= Align(16);
if (auto SI = dyn_cast<StoreInst>(I))
return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
SI->getAlign() >= Align(16);
return false;
}
bool AArch64TargetLowering::shouldInsertFencesForAtomic(
const Instruction *I) const {
return isOpSuitableForLDPSTP(I);
}
// Loads and stores less than 128-bits are already atomic; ones above that
// are doomed anyway, so defer to the default libcall and blame the OS when
// things go wrong.
TargetLoweringBase::AtomicExpansionKind
AArch64TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
if (Size != 128 || isOpSuitableForLDPSTP(SI))
return AtomicExpansionKind::None;
return AtomicExpansionKind::Expand;
}
// Loads and stores less than 128-bits are already atomic; ones above that
// are doomed anyway, so defer to the default libcall and blame the OS when
// things go wrong.
TargetLowering::AtomicExpansionKind
AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
unsigned Size = LI->getType()->getPrimitiveSizeInBits();
if (Size != 128 || isOpSuitableForLDPSTP(LI))
return AtomicExpansionKind::None;
// At -O0, fast-regalloc cannot cope with the live vregs necessary to
// implement atomicrmw without spilling. If the target address is also on the
// stack and close enough to the spill slot, this can lead to a situation
// where the monitor always gets cleared and the atomic operation can never
// succeed. So at -O0 lower this operation to a CAS loop.
if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
return AtomicExpansionKind::CmpXChg;
return AtomicExpansionKind::LLSC;
}
// For the real atomic operations, we have ldxr/stxr up to 128 bits,
TargetLowering::AtomicExpansionKind
AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
if (AI->isFloatingPointOperation())
return AtomicExpansionKind::CmpXChg;
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
if (Size > 128) return AtomicExpansionKind::None;
// Nand is not supported in LSE.
// Leave 128 bits to LLSC or CmpXChg.
if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) {
if (Subtarget->hasLSE())
return AtomicExpansionKind::None;
if (Subtarget->outlineAtomics()) {
// [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far.
// Don't outline them unless
// (1) high level <atomic> support approved:
// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf
// (2) low level libgcc and compiler-rt support implemented by:
// min/max outline atomics helpers
if (AI->getOperation() != AtomicRMWInst::Min &&
AI->getOperation() != AtomicRMWInst::Max &&
AI->getOperation() != AtomicRMWInst::UMin &&
AI->getOperation() != AtomicRMWInst::UMax) {
return AtomicExpansionKind::None;
}
}
}
// At -O0, fast-regalloc cannot cope with the live vregs necessary to
// implement atomicrmw without spilling. If the target address is also on the
// stack and close enough to the spill slot, this can lead to a situation
// where the monitor always gets cleared and the atomic operation can never
// succeed. So at -O0 lower this operation to a CAS loop.
if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
return AtomicExpansionKind::CmpXChg;
return AtomicExpansionKind::LLSC;
}
TargetLowering::AtomicExpansionKind
AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR(
AtomicCmpXchgInst *AI) const {
// If subtarget has LSE, leave cmpxchg intact for codegen.
if (Subtarget->hasLSE() || Subtarget->outlineAtomics())
return AtomicExpansionKind::None;
// At -O0, fast-regalloc cannot cope with the live vregs necessary to
// implement cmpxchg without spilling. If the address being exchanged is also
// on the stack and close enough to the spill slot, this can lead to a
// situation where the monitor always gets cleared and the atomic operation
// can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
return AtomicExpansionKind::None;
// 128-bit atomic cmpxchg is weird; AtomicExpand doesn't know how to expand
// it.
unsigned Size = AI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
if (Size > 64)
return AtomicExpansionKind::None;
return AtomicExpansionKind::LLSC;
}
Value *AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder,
Type *ValueTy, Value *Addr,
AtomicOrdering Ord) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
bool IsAcquire = isAcquireOrStronger(Ord);
// Since i128 isn't legal and intrinsics don't get type-lowered, the ldrexd
// intrinsic must return {i64, i64} and we have to recombine them into a
// single i128 here.
if (ValueTy->getPrimitiveSizeInBits() == 128) {
Intrinsic::ID Int =
IsAcquire ? Intrinsic::aarch64_ldaxp : Intrinsic::aarch64_ldxp;
Function *Ldxr = Intrinsic::getDeclaration(M, Int);
Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
Value *LoHi = Builder.CreateCall(Ldxr, Addr, "lohi");
Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
Lo = Builder.CreateZExt(Lo, ValueTy, "lo64");
Hi = Builder.CreateZExt(Hi, ValueTy, "hi64");
return Builder.CreateOr(
Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 64)), "val64");
}
Type *Tys[] = { Addr->getType() };
Intrinsic::ID Int =
IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr;
Function *Ldxr = Intrinsic::getDeclaration(M, Int, Tys);
const DataLayout &DL = M->getDataLayout();
IntegerType *IntEltTy = Builder.getIntNTy(DL.getTypeSizeInBits(ValueTy));
CallInst *CI = Builder.CreateCall(Ldxr, Addr);
CI->addParamAttr(
0, Attribute::get(Builder.getContext(), Attribute::ElementType, ValueTy));
Value *Trunc = Builder.CreateTrunc(CI, IntEltTy);
return Builder.CreateBitCast(Trunc, ValueTy);
}
void AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
IRBuilderBase &Builder) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::aarch64_clrex));
}
Value *AArch64TargetLowering::emitStoreConditional(IRBuilderBase &Builder,
Value *Val, Value *Addr,
AtomicOrdering Ord) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
bool IsRelease = isReleaseOrStronger(Ord);
// Since the intrinsics must have legal type, the i128 intrinsics take two
// parameters: "i64, i64". We must marshal Val into the appropriate form
// before the call.
if (Val->getType()->getPrimitiveSizeInBits() == 128) {
Intrinsic::ID Int =
IsRelease ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp;
Function *Stxr = Intrinsic::getDeclaration(M, Int);
Type *Int64Ty = Type::getInt64Ty(M->getContext());
Value *Lo = Builder.CreateTrunc(Val, Int64Ty, "lo");
Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 64), Int64Ty, "hi");
Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
return Builder.CreateCall(Stxr, {Lo, Hi, Addr});
}
Intrinsic::ID Int =
IsRelease ? Intrinsic::aarch64_stlxr : Intrinsic::aarch64_stxr;
Type *Tys[] = { Addr->getType() };
Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys);
const DataLayout &DL = M->getDataLayout();
IntegerType *IntValTy = Builder.getIntNTy(DL.getTypeSizeInBits(Val->getType()));
Val = Builder.CreateBitCast(Val, IntValTy);
CallInst *CI = Builder.CreateCall(
Stxr, {Builder.CreateZExtOrBitCast(
Val, Stxr->getFunctionType()->getParamType(0)),
Addr});
CI->addParamAttr(1, Attribute::get(Builder.getContext(),
Attribute::ElementType, Val->getType()));
return CI;
}
bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
Type *Ty, CallingConv::ID CallConv, bool isVarArg,
const DataLayout &DL) const {
if (!Ty->isArrayTy()) {
const TypeSize &TySize = Ty->getPrimitiveSizeInBits();
return TySize.isScalable() && TySize.getKnownMinSize() > 128;
}
// All non aggregate members of the type must have the same type
SmallVector<EVT> ValueVTs;
ComputeValueVTs(*this, DL, Ty, ValueVTs);
return is_splat(ValueVTs);
}
bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &,
EVT) const {
return false;
}
static Value *UseTlsOffset(IRBuilderBase &IRB, unsigned Offset) {
Module *M = IRB.GetInsertBlock()->getParent()->getParent();
Function *ThreadPointerFunc =
Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
return IRB.CreatePointerCast(
IRB.CreateConstGEP1_32(IRB.getInt8Ty(), IRB.CreateCall(ThreadPointerFunc),
Offset),
IRB.getInt8PtrTy()->getPointerTo(0));
}
Value *AArch64TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
// Android provides a fixed TLS slot for the stack cookie. See the definition
// of TLS_SLOT_STACK_GUARD in
// https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
if (Subtarget->isTargetAndroid())
return UseTlsOffset(IRB, 0x28);
// Fuchsia is similar.
// <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
if (Subtarget->isTargetFuchsia())
return UseTlsOffset(IRB, -0x10);
return TargetLowering::getIRStackGuard(IRB);
}
void AArch64TargetLowering::insertSSPDeclarations(Module &M) const {
// MSVC CRT provides functionalities for stack protection.
if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment()) {
// MSVC CRT has a global variable holding security cookie.
M.getOrInsertGlobal("__security_cookie",
Type::getInt8PtrTy(M.getContext()));
// MSVC CRT has a function to validate security cookie.
FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
"__security_check_cookie", Type::getVoidTy(M.getContext()),
Type::getInt8PtrTy(M.getContext()));
if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
F->setCallingConv(CallingConv::Win64);
F->addParamAttr(0, Attribute::AttrKind::InReg);
}
return;
}
TargetLowering::insertSSPDeclarations(M);
}
Value *AArch64TargetLowering::getSDagStackGuard(const Module &M) const {
// MSVC CRT has a global variable holding security cookie.
if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
return M.getGlobalVariable("__security_cookie");
return TargetLowering::getSDagStackGuard(M);
}
Function *AArch64TargetLowering::getSSPStackGuardCheck(const Module &M) const {
// MSVC CRT has a function to validate security cookie.
if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
return M.getFunction("__security_check_cookie");
return TargetLowering::getSSPStackGuardCheck(M);
}
Value *
AArch64TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
// Android provides a fixed TLS slot for the SafeStack pointer. See the
// definition of TLS_SLOT_SAFESTACK in
// https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
if (Subtarget->isTargetAndroid())
return UseTlsOffset(IRB, 0x48);
// Fuchsia is similar.
// <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
if (Subtarget->isTargetFuchsia())
return UseTlsOffset(IRB, -0x8);
return TargetLowering::getSafeStackPointerLocation(IRB);
}
bool AArch64TargetLowering::isMaskAndCmp0FoldingBeneficial(
const Instruction &AndI) const {
// Only sink 'and' mask to cmp use block if it is masking a single bit, since
// this is likely to be fold the and/cmp/br into a single tbz instruction. It
// may be beneficial to sink in other cases, but we would have to check that
// the cmp would not get folded into the br to form a cbz for these to be
// beneficial.
ConstantInt* Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
if (!Mask)
return false;
return Mask->getValue().isPowerOf2();
}
bool AArch64TargetLowering::
shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
unsigned OldShiftOpcode, unsigned NewShiftOpcode,
SelectionDAG &DAG) const {
// Does baseline recommend not to perform the fold by default?
if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG))
return false;
// Else, if this is a vector shift, prefer 'shl'.
return X.getValueType().isScalarInteger() || NewShiftOpcode == ISD::SHL;
}
bool AArch64TargetLowering::shouldExpandShift(SelectionDAG &DAG,
SDNode *N) const {
if (DAG.getMachineFunction().getFunction().hasMinSize() &&
!Subtarget->isTargetWindows() && !Subtarget->isTargetDarwin())
return false;
return true;
}
void AArch64TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
// Update IsSplitCSR in AArch64unctionInfo.
AArch64FunctionInfo *AFI = Entry->getParent()->getInfo<AArch64FunctionInfo>();
AFI->setIsSplitCSR(true);
}
void AArch64TargetLowering::insertCopiesSplitCSR(
MachineBasicBlock *Entry,
const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
if (!IStart)
return;
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
MachineBasicBlock::iterator MBBI = Entry->begin();
for (const MCPhysReg *I = IStart; *I; ++I) {
const TargetRegisterClass *RC = nullptr;
if (AArch64::GPR64RegClass.contains(*I))
RC = &AArch64::GPR64RegClass;
else if (AArch64::FPR64RegClass.contains(*I))
RC = &AArch64::FPR64RegClass;
else
llvm_unreachable("Unexpected register class in CSRsViaCopy!");
Register NewVR = MRI->createVirtualRegister(RC);
// Create copy from CSR to a virtual register.
// FIXME: this currently does not emit CFI pseudo-instructions, it works
// fine for CXX_FAST_TLS since the C++-style TLS access functions should be
// nounwind. If we want to generalize this later, we may need to emit
// CFI pseudo-instructions.
assert(Entry->getParent()->getFunction().hasFnAttribute(
Attribute::NoUnwind) &&
"Function should be nounwind in insertCopiesSplitCSR!");
Entry->addLiveIn(*I);
BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
.addReg(*I);
// Insert the copy-back instructions right before the terminator.
for (auto *Exit : Exits)
BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
TII->get(TargetOpcode::COPY), *I)
.addReg(NewVR);
}
}
bool AArch64TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
// Integer division on AArch64 is expensive. However, when aggressively
// optimizing for code size, we prefer to use a div instruction, as it is
// usually smaller than the alternative sequence.
// The exception to this is vector division. Since AArch64 doesn't have vector
// integer division, leaving the division as-is is a loss even in terms of
// size, because it will have to be scalarized, while the alternative code
// sequence can be performed in vector form.
bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
return OptSize && !VT.isVector();
}
bool AArch64TargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
// We want inc-of-add for scalars and sub-of-not for vectors.
return VT.isScalarInteger();
}
bool AArch64TargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
EVT VT) const {
// v8f16 without fp16 need to be extended to v8f32, which is more difficult to
// legalize.
if (FPVT == MVT::v8f16 && !Subtarget->hasFullFP16())
return false;
return TargetLowering::shouldConvertFpToSat(Op, FPVT, VT);
}
bool AArch64TargetLowering::enableAggressiveFMAFusion(EVT VT) const {
return Subtarget->hasAggressiveFMA() && VT.isFloatingPoint();
}
unsigned
AArch64TargetLowering::getVaListSizeInBits(const DataLayout &DL) const {
if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
return getPointerTy(DL).getSizeInBits();
return 3 * getPointerTy(DL).getSizeInBits() + 2 * 32;
}
void AArch64TargetLowering::finalizeLowering(MachineFunction &MF) const {
MachineFrameInfo &MFI = MF.getFrameInfo();
// If we have any vulnerable SVE stack objects then the stack protector
// needs to be placed at the top of the SVE stack area, as the SVE locals
// are placed above the other locals, so we allocate it as if it were a
// scalable vector.
// FIXME: It may be worthwhile having a specific interface for this rather
// than doing it here in finalizeLowering.
if (MFI.hasStackProtectorIndex()) {
for (unsigned int i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
if (MFI.getStackID(i) == TargetStackID::ScalableVector &&
MFI.getObjectSSPLayout(i) != MachineFrameInfo::SSPLK_None) {
MFI.setStackID(MFI.getStackProtectorIndex(),
TargetStackID::ScalableVector);
MFI.setObjectAlignment(MFI.getStackProtectorIndex(), Align(16));
break;
}
}
}
MFI.computeMaxCallFrameSize(MF);
TargetLoweringBase::finalizeLowering(MF);
}
// Unlike X86, we let frame lowering assign offsets to all catch objects.
bool AArch64TargetLowering::needsFixedCatchObjects() const {
return false;
}
bool AArch64TargetLowering::shouldLocalize(
const MachineInstr &MI, const TargetTransformInfo *TTI) const {
switch (MI.getOpcode()) {
case TargetOpcode::G_GLOBAL_VALUE: {
// On Darwin, TLS global vars get selected into function calls, which
// we don't want localized, as they can get moved into the middle of a
// another call sequence.
const GlobalValue &GV = *MI.getOperand(1).getGlobal();
if (GV.isThreadLocal() && Subtarget->isTargetMachO())
return false;
break;
}
// If we legalized G_GLOBAL_VALUE into ADRP + G_ADD_LOW, mark both as being
// localizable.
case AArch64::ADRP:
case AArch64::G_ADD_LOW:
return true;
default:
break;
}
return TargetLoweringBase::shouldLocalize(MI, TTI);
}
bool AArch64TargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
if (isa<ScalableVectorType>(Inst.getType()))
return true;
for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
if (isa<ScalableVectorType>(Inst.getOperand(i)->getType()))
return true;
if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
if (isa<ScalableVectorType>(AI->getAllocatedType()))
return true;
}
return false;
}
// Return the largest legal scalable vector type that matches VT's element type.
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT) {
assert(VT.isFixedLengthVector() &&
DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
"Expected legal fixed length vector!");
switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
default:
llvm_unreachable("unexpected element type for SVE container");
case MVT::i8:
return EVT(MVT::nxv16i8);
case MVT::i16:
return EVT(MVT::nxv8i16);
case MVT::i32:
return EVT(MVT::nxv4i32);
case MVT::i64:
return EVT(MVT::nxv2i64);
case MVT::f16:
return EVT(MVT::nxv8f16);
case MVT::f32:
return EVT(MVT::nxv4f32);
case MVT::f64:
return EVT(MVT::nxv2f64);
}
}
// Return a PTRUE with active lanes corresponding to the extent of VT.
static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL,
EVT VT) {
assert(VT.isFixedLengthVector() &&
DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
"Expected legal fixed length vector!");
Optional<unsigned> PgPattern =
getSVEPredPatternFromNumElements(VT.getVectorNumElements());
assert(PgPattern && "Unexpected element count for SVE predicate");
// For vectors that are exactly getMaxSVEVectorSizeInBits big, we can use
// AArch64SVEPredPattern::all, which can enable the use of unpredicated
// variants of instructions when available.
const auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits();
unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
if (MaxSVESize && MinSVESize == MaxSVESize &&
MaxSVESize == VT.getSizeInBits())
PgPattern = AArch64SVEPredPattern::all;
MVT MaskVT;
switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
default:
llvm_unreachable("unexpected element type for SVE predicate");
case MVT::i8:
MaskVT = MVT::nxv16i1;
break;
case MVT::i16:
case MVT::f16:
MaskVT = MVT::nxv8i1;
break;
case MVT::i32:
case MVT::f32:
MaskVT = MVT::nxv4i1;
break;
case MVT::i64:
case MVT::f64:
MaskVT = MVT::nxv2i1;
break;
}
return getPTrue(DAG, DL, MaskVT, *PgPattern);
}
static SDValue getPredicateForScalableVector(SelectionDAG &DAG, SDLoc &DL,
EVT VT) {
assert(VT.isScalableVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
"Expected legal scalable vector!");
auto PredTy = VT.changeVectorElementType(MVT::i1);
return getPTrue(DAG, DL, PredTy, AArch64SVEPredPattern::all);
}
static SDValue getPredicateForVector(SelectionDAG &DAG, SDLoc &DL, EVT VT) {
if (VT.isFixedLengthVector())
return getPredicateForFixedLengthVector(DAG, DL, VT);
return getPredicateForScalableVector(DAG, DL, VT);
}
// Grow V to consume an entire SVE register.
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V) {
assert(VT.isScalableVector() &&
"Expected to convert into a scalable vector!");
assert(V.getValueType().isFixedLengthVector() &&
"Expected a fixed length vector operand!");
SDLoc DL(V);
SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
}
// Shrink V so it's just big enough to maintain a VT's worth of data.
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V) {
assert(VT.isFixedLengthVector() &&
"Expected to convert into a fixed length vector!");
assert(V.getValueType().isScalableVector() &&
"Expected a scalable vector operand!");
SDLoc DL(V);
SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
}
// Convert all fixed length vector loads larger than NEON to masked_loads.
SDValue AArch64TargetLowering::LowerFixedLengthVectorLoadToSVE(
SDValue Op, SelectionDAG &DAG) const {
auto Load = cast<LoadSDNode>(Op);
SDLoc DL(Op);
EVT VT = Op.getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
EVT LoadVT = ContainerVT;
EVT MemVT = Load->getMemoryVT();
auto Pg = getPredicateForFixedLengthVector(DAG, DL, VT);
if (VT.isFloatingPoint() && Load->getExtensionType() == ISD::EXTLOAD) {
LoadVT = ContainerVT.changeTypeToInteger();
MemVT = MemVT.changeTypeToInteger();
}
SDValue NewLoad = DAG.getMaskedLoad(
LoadVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(), Pg,
DAG.getUNDEF(LoadVT), MemVT, Load->getMemOperand(),
Load->getAddressingMode(), Load->getExtensionType());
SDValue Result = NewLoad;
if (VT.isFloatingPoint() && Load->getExtensionType() == ISD::EXTLOAD) {
EVT ExtendVT = ContainerVT.changeVectorElementType(
Load->getMemoryVT().getVectorElementType());
Result = getSVESafeBitCast(ExtendVT, Result, DAG);
Result = DAG.getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU, DL, ContainerVT,
Pg, Result, DAG.getUNDEF(ContainerVT));
}
Result = convertFromScalableVector(DAG, VT, Result);
SDValue MergedValues[2] = {Result, NewLoad.getValue(1)};
return DAG.getMergeValues(MergedValues, DL);
}
static SDValue convertFixedMaskToScalableVector(SDValue Mask,
SelectionDAG &DAG) {
SDLoc DL(Mask);
EVT InVT = Mask.getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT);
if (ISD::isBuildVectorAllOnes(Mask.getNode()))
return Pg;
auto Op1 = convertToScalableVector(DAG, ContainerVT, Mask);
auto Op2 = DAG.getConstant(0, DL, ContainerVT);
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, Pg.getValueType(),
{Pg, Op1, Op2, DAG.getCondCode(ISD::SETNE)});
}
// Convert all fixed length vector loads larger than NEON to masked_loads.
SDValue AArch64TargetLowering::LowerFixedLengthVectorMLoadToSVE(
SDValue Op, SelectionDAG &DAG) const {
auto Load = cast<MaskedLoadSDNode>(Op);
SDLoc DL(Op);
EVT VT = Op.getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
SDValue Mask = convertFixedMaskToScalableVector(Load->getMask(), DAG);
SDValue PassThru;
bool IsPassThruZeroOrUndef = false;
if (Load->getPassThru()->isUndef()) {
PassThru = DAG.getUNDEF(ContainerVT);
IsPassThruZeroOrUndef = true;
} else {
if (ContainerVT.isInteger())
PassThru = DAG.getConstant(0, DL, ContainerVT);
else
PassThru = DAG.getConstantFP(0, DL, ContainerVT);
if (isZerosVector(Load->getPassThru().getNode()))
IsPassThruZeroOrUndef = true;
}
SDValue NewLoad = DAG.getMaskedLoad(
ContainerVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(),
Mask, PassThru, Load->getMemoryVT(), Load->getMemOperand(),
Load->getAddressingMode(), Load->getExtensionType());
SDValue Result = NewLoad;
if (!IsPassThruZeroOrUndef) {
SDValue OldPassThru =
convertToScalableVector(DAG, ContainerVT, Load->getPassThru());
Result = DAG.getSelect(DL, ContainerVT, Mask, Result, OldPassThru);
}
Result = convertFromScalableVector(DAG, VT, Result);
SDValue MergedValues[2] = {Result, NewLoad.getValue(1)};
return DAG.getMergeValues(MergedValues, DL);
}
// Convert all fixed length vector stores larger than NEON to masked_stores.
SDValue AArch64TargetLowering::LowerFixedLengthVectorStoreToSVE(
SDValue Op, SelectionDAG &DAG) const {
auto Store = cast<StoreSDNode>(Op);
SDLoc DL(Op);
EVT VT = Store->getValue().getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
EVT MemVT = Store->getMemoryVT();
auto Pg = getPredicateForFixedLengthVector(DAG, DL, VT);
auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());
if (VT.isFloatingPoint() && Store->isTruncatingStore()) {
EVT TruncVT = ContainerVT.changeVectorElementType(
Store->getMemoryVT().getVectorElementType());
MemVT = MemVT.changeTypeToInteger();
NewValue = DAG.getNode(AArch64ISD::FP_ROUND_MERGE_PASSTHRU, DL, TruncVT, Pg,
NewValue, DAG.getTargetConstant(0, DL, MVT::i64),
DAG.getUNDEF(TruncVT));
NewValue =
getSVESafeBitCast(ContainerVT.changeTypeToInteger(), NewValue, DAG);
}
return DAG.getMaskedStore(Store->getChain(), DL, NewValue,
Store->getBasePtr(), Store->getOffset(), Pg, MemVT,
Store->getMemOperand(), Store->getAddressingMode(),
Store->isTruncatingStore());
}
SDValue AArch64TargetLowering::LowerFixedLengthVectorMStoreToSVE(
SDValue Op, SelectionDAG &DAG) const {
auto *Store = cast<MaskedStoreSDNode>(Op);
SDLoc DL(Op);
EVT VT = Store->getValue().getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());
SDValue Mask = convertFixedMaskToScalableVector(Store->getMask(), DAG);
return DAG.getMaskedStore(
Store->getChain(), DL, NewValue, Store->getBasePtr(), Store->getOffset(),
Mask, Store->getMemoryVT(), Store->getMemOperand(),
Store->getAddressingMode(), Store->isTruncatingStore());
}
SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
EVT VT = Op.getValueType();
EVT EltVT = VT.getVectorElementType();
bool Signed = Op.getOpcode() == ISD::SDIV;
unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
bool Negated;
uint64_t SplatVal;
if (Signed && isPow2Splat(Op.getOperand(1), SplatVal, Negated)) {
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
SDValue Op2 = DAG.getTargetConstant(Log2_64(SplatVal), dl, MVT::i32);
SDValue Pg = getPredicateForFixedLengthVector(DAG, dl, VT);
SDValue Res = DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, dl, ContainerVT, Pg, Op1, Op2);
if (Negated)
Res = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), Res);
return convertFromScalableVector(DAG, VT, Res);
}
// Scalable vector i32/i64 DIV is supported.
if (EltVT == MVT::i32 || EltVT == MVT::i64)
return LowerToPredicatedOp(Op, DAG, PredOpcode);
// Scalable vector i8/i16 DIV is not supported. Promote it to i32.
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
EVT FixedWidenedVT = HalfVT.widenIntegerVectorElementType(*DAG.getContext());
EVT ScalableWidenedVT = getContainerForFixedLengthVector(DAG, FixedWidenedVT);
// If this is not a full vector, extend, div, and truncate it.
EVT WidenedVT = VT.widenIntegerVectorElementType(*DAG.getContext());
if (DAG.getTargetLoweringInfo().isTypeLegal(WidenedVT)) {
unsigned ExtendOpcode = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
SDValue Op0 = DAG.getNode(ExtendOpcode, dl, WidenedVT, Op.getOperand(0));
SDValue Op1 = DAG.getNode(ExtendOpcode, dl, WidenedVT, Op.getOperand(1));
SDValue Div = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0, Op1);
return DAG.getNode(ISD::TRUNCATE, dl, VT, Div);
}
// Convert the operands to scalable vectors.
SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));
// Extend the scalable operands.
unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
SDValue Op0Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op0);
SDValue Op1Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op1);
SDValue Op0Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op0);
SDValue Op1Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op1);
// Convert back to fixed vectors so the DIV can be further lowered.
Op0Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op0Lo);
Op1Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op1Lo);
Op0Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op0Hi);
Op1Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op1Hi);
SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT,
Op0Lo, Op1Lo);
SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT,
Op0Hi, Op1Hi);
// Convert again to scalable vectors to truncate.
ResultLo = convertToScalableVector(DAG, ScalableWidenedVT, ResultLo);
ResultHi = convertToScalableVector(DAG, ScalableWidenedVT, ResultHi);
SDValue ScalableResult = DAG.getNode(AArch64ISD::UZP1, dl, ContainerVT,
ResultLo, ResultHi);
return convertFromScalableVector(DAG, VT, ScalableResult);
}
SDValue AArch64TargetLowering::LowerFixedLengthVectorIntExtendToSVE(
SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
SDLoc DL(Op);
SDValue Val = Op.getOperand(0);
EVT ContainerVT = getContainerForFixedLengthVector(DAG, Val.getValueType());
Val = convertToScalableVector(DAG, ContainerVT, Val);
bool Signed = Op.getOpcode() == ISD::SIGN_EXTEND;
unsigned ExtendOpc = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
// Repeatedly unpack Val until the result is of the desired element type.
switch (ContainerVT.getSimpleVT().SimpleTy) {
default:
llvm_unreachable("unimplemented container type");
case MVT::nxv16i8:
Val = DAG.getNode(ExtendOpc, DL, MVT::nxv8i16, Val);
if (VT.getVectorElementType() == MVT::i16)
break;
LLVM_FALLTHROUGH;
case MVT::nxv8i16:
Val = DAG.getNode(ExtendOpc, DL, MVT::nxv4i32, Val);
if (VT.getVectorElementType() == MVT::i32)
break;
LLVM_FALLTHROUGH;
case MVT::nxv4i32:
Val = DAG.getNode(ExtendOpc, DL, MVT::nxv2i64, Val);
assert(VT.getVectorElementType() == MVT::i64 && "Unexpected element type!");
break;
}
return convertFromScalableVector(DAG, VT, Val);
}
SDValue AArch64TargetLowering::LowerFixedLengthVectorTruncateToSVE(
SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
SDLoc DL(Op);
SDValue Val = Op.getOperand(0);
EVT ContainerVT = getContainerForFixedLengthVector(DAG, Val.getValueType());
Val = convertToScalableVector(DAG, ContainerVT, Val);
// Repeatedly truncate Val until the result is of the desired element type.
switch (ContainerVT.getSimpleVT().SimpleTy) {
default:
llvm_unreachable("unimplemented container type");
case MVT::nxv2i64:
Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv4i32, Val);
Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv4i32, Val, Val);
if (VT.getVectorElementType() == MVT::i32)
break;
LLVM_FALLTHROUGH;
case MVT::nxv4i32:
Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv8i16, Val);
Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv8i16, Val, Val);
if (VT.getVectorElementType() == MVT::i16)
break;
LLVM_FALLTHROUGH;
case MVT::nxv8i16:
Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv16i8, Val);
Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv16i8, Val, Val);
assert(VT.getVectorElementType() == MVT::i8 && "Unexpected element type!");
break;
}
return convertFromScalableVector(DAG, VT, Val);
}
SDValue AArch64TargetLowering::LowerFixedLengthExtractVectorElt(
SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
EVT InVT = Op.getOperand(0).getValueType();
assert(InVT.isFixedLengthVector() && "Expected fixed length vector type!");
SDLoc DL(Op);
EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(0));
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Op.getOperand(1));
}
SDValue AArch64TargetLowering::LowerFixedLengthInsertVectorElt(
SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
SDLoc DL(Op);
EVT InVT = Op.getOperand(0).getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(0));
auto ScalableRes = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ContainerVT, Op0,
Op.getOperand(1), Op.getOperand(2));
return convertFromScalableVector(DAG, VT, ScalableRes);
}
// Convert vector operation 'Op' to an equivalent predicated operation whereby
// the original operation's type is used to construct a suitable predicate.
// NOTE: The results for inactive lanes are undefined.
SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
SelectionDAG &DAG,
unsigned NewOp) const {
EVT VT = Op.getValueType();
SDLoc DL(Op);
auto Pg = getPredicateForVector(DAG, DL, VT);
if (VT.isFixedLengthVector()) {
assert(isTypeLegal(VT) && "Expected only legal fixed-width types");
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
// Create list of operands by converting existing ones to scalable types.
SmallVector<SDValue, 4> Operands = {Pg};
for (const SDValue &V : Op->op_values()) {
if (isa<CondCodeSDNode>(V)) {
Operands.push_back(V);
continue;
}
if (const VTSDNode *VTNode = dyn_cast<VTSDNode>(V)) {
EVT VTArg = VTNode->getVT().getVectorElementType();
EVT NewVTArg = ContainerVT.changeVectorElementType(VTArg);
Operands.push_back(DAG.getValueType(NewVTArg));
continue;
}
assert(isTypeLegal(V.getValueType()) &&
"Expected only legal fixed-width types");
Operands.push_back(convertToScalableVector(DAG, ContainerVT, V));
}
if (isMergePassthruOpcode(NewOp))
Operands.push_back(DAG.getUNDEF(ContainerVT));
auto ScalableRes = DAG.getNode(NewOp, DL, ContainerVT, Operands);
return convertFromScalableVector(DAG, VT, ScalableRes);
}
assert(VT.isScalableVector() && "Only expect to lower scalable vector op!");
SmallVector<SDValue, 4> Operands = {Pg};
for (const SDValue &V : Op->op_values()) {
assert((!V.getValueType().isVector() ||
V.getValueType().isScalableVector()) &&
"Only scalable vectors are supported!");
Operands.push_back(V);
}
if (isMergePassthruOpcode(NewOp))
Operands.push_back(DAG.getUNDEF(VT));
return DAG.getNode(NewOp, DL, VT, Operands, Op->getFlags());
}
// If a fixed length vector operation has no side effects when applied to
// undefined elements, we can safely use scalable vectors to perform the same
// operation without needing to worry about predication.
SDValue AArch64TargetLowering::LowerToScalableOp(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(useSVEForFixedLengthVectorVT(VT) &&
"Only expected to lower fixed length vector operation!");
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
// Create list of operands by converting existing ones to scalable types.
SmallVector<SDValue, 4> Ops;
for (const SDValue &V : Op->op_values()) {
assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
// Pass through non-vector operands.
if (!V.getValueType().isVector()) {
Ops.push_back(V);
continue;
}
// "cast" fixed length vector to a scalable vector.
assert(useSVEForFixedLengthVectorVT(V.getValueType()) &&
"Only fixed length vectors are supported!");
Ops.push_back(convertToScalableVector(DAG, ContainerVT, V));
}
auto ScalableRes = DAG.getNode(Op.getOpcode(), SDLoc(Op), ContainerVT, Ops);
return convertFromScalableVector(DAG, VT, ScalableRes);
}
SDValue AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp,
SelectionDAG &DAG) const {
SDLoc DL(ScalarOp);
SDValue AccOp = ScalarOp.getOperand(0);
SDValue VecOp = ScalarOp.getOperand(1);
EVT SrcVT = VecOp.getValueType();
EVT ResVT = SrcVT.getVectorElementType();
EVT ContainerVT = SrcVT;
if (SrcVT.isFixedLengthVector()) {
ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
}
SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
// Convert operands to Scalable.
AccOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ContainerVT,
DAG.getUNDEF(ContainerVT), AccOp, Zero);
// Perform reduction.
SDValue Rdx = DAG.getNode(AArch64ISD::FADDA_PRED, DL, ContainerVT,
Pg, AccOp, VecOp);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Rdx, Zero);
}
SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp,
SelectionDAG &DAG) const {
SDLoc DL(ReduceOp);
SDValue Op = ReduceOp.getOperand(0);
EVT OpVT = Op.getValueType();
EVT VT = ReduceOp.getValueType();
if (!OpVT.isScalableVector() || OpVT.getVectorElementType() != MVT::i1)
return SDValue();
SDValue Pg = getPredicateForVector(DAG, DL, OpVT);
switch (ReduceOp.getOpcode()) {
default:
return SDValue();
case ISD::VECREDUCE_OR:
if (isAllActivePredicate(DAG, Pg) && OpVT == MVT::nxv16i1)
// The predicate can be 'Op' because
// vecreduce_or(Op & <all true>) <=> vecreduce_or(Op).
return getPTest(DAG, VT, Op, Op, AArch64CC::ANY_ACTIVE);
else
return getPTest(DAG, VT, Pg, Op, AArch64CC::ANY_ACTIVE);
case ISD::VECREDUCE_AND: {
Op = DAG.getNode(ISD::XOR, DL, OpVT, Op, Pg);
return getPTest(DAG, VT, Pg, Op, AArch64CC::NONE_ACTIVE);
}
case ISD::VECREDUCE_XOR: {
SDValue ID =
DAG.getTargetConstant(Intrinsic::aarch64_sve_cntp, DL, MVT::i64);
if (OpVT == MVT::nxv1i1) {
// Emulate a CNTP on .Q using .D and a different governing predicate.
Pg = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, MVT::nxv2i1, Pg);
Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, MVT::nxv2i1, Op);
}
SDValue Cntp =
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i64, ID, Pg, Op);
return DAG.getAnyExtOrTrunc(Cntp, DL, VT);
}
}
return SDValue();
}
SDValue AArch64TargetLowering::LowerReductionToSVE(unsigned Opcode,
SDValue ScalarOp,
SelectionDAG &DAG) const {
SDLoc DL(ScalarOp);
SDValue VecOp = ScalarOp.getOperand(0);
EVT SrcVT = VecOp.getValueType();
if (useSVEForFixedLengthVectorVT(
SrcVT,
/*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors())) {
EVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
}
// UADDV always returns an i64 result.
EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64 :
SrcVT.getVectorElementType();
EVT RdxVT = SrcVT;
if (SrcVT.isFixedLengthVector() || Opcode == AArch64ISD::UADDV_PRED)
RdxVT = getPackedSVEVectorVT(ResVT);
SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
SDValue Rdx = DAG.getNode(Opcode, DL, RdxVT, Pg, VecOp);
SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT,
Rdx, DAG.getConstant(0, DL, MVT::i64));
// The VEC_REDUCE nodes expect an element size result.
if (ResVT != ScalarOp.getValueType())
Res = DAG.getAnyExtOrTrunc(Res, DL, ScalarOp.getValueType());
return Res;
}
SDValue
AArch64TargetLowering::LowerFixedLengthVectorSelectToSVE(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDLoc DL(Op);
EVT InVT = Op.getOperand(1).getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(1));
SDValue Op2 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(2));
// Convert the mask to a predicated (NOTE: We don't need to worry about
// inactive lanes since VSELECT is safe when given undefined elements).
EVT MaskVT = Op.getOperand(0).getValueType();
EVT MaskContainerVT = getContainerForFixedLengthVector(DAG, MaskVT);
auto Mask = convertToScalableVector(DAG, MaskContainerVT, Op.getOperand(0));
Mask = DAG.getNode(ISD::TRUNCATE, DL,
MaskContainerVT.changeVectorElementType(MVT::i1), Mask);
auto ScalableRes = DAG.getNode(ISD::VSELECT, DL, ContainerVT,
Mask, Op1, Op2);
return convertFromScalableVector(DAG, VT, ScalableRes);
}
SDValue AArch64TargetLowering::LowerFixedLengthVectorSetccToSVE(
SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT InVT = Op.getOperand(0).getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
assert(useSVEForFixedLengthVectorVT(InVT) &&
"Only expected to lower fixed length vector operation!");
assert(Op.getValueType() == InVT.changeTypeToInteger() &&
"Expected integer result of the same bit length as the inputs!");
auto Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
auto Op2 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));
auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT);
EVT CmpVT = Pg.getValueType();
auto Cmp = DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, CmpVT,
{Pg, Op1, Op2, Op.getOperand(2)});
EVT PromoteVT = ContainerVT.changeTypeToInteger();
auto Promote = DAG.getBoolExtOrTrunc(Cmp, DL, PromoteVT, InVT);
return convertFromScalableVector(DAG, Op.getValueType(), Promote);
}
SDValue
AArch64TargetLowering::LowerFixedLengthBitcastToSVE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
auto SrcOp = Op.getOperand(0);
EVT VT = Op.getValueType();
EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
EVT ContainerSrcVT =
getContainerForFixedLengthVector(DAG, SrcOp.getValueType());
SrcOp = convertToScalableVector(DAG, ContainerSrcVT, SrcOp);
Op = DAG.getNode(ISD::BITCAST, DL, ContainerDstVT, SrcOp);
return convertFromScalableVector(DAG, VT, Op);
}
SDValue AArch64TargetLowering::LowerFixedLengthConcatVectorsToSVE(
SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
unsigned NumOperands = Op->getNumOperands();
assert(NumOperands > 1 && isPowerOf2_32(NumOperands) &&
"Unexpected number of operands in CONCAT_VECTORS");
auto SrcOp1 = Op.getOperand(0);
auto SrcOp2 = Op.getOperand(1);
EVT VT = Op.getValueType();
EVT SrcVT = SrcOp1.getValueType();
if (NumOperands > 2) {
SmallVector<SDValue, 4> Ops;
EVT PairVT = SrcVT.getDoubleNumVectorElementsVT(*DAG.getContext());
for (unsigned I = 0; I < NumOperands; I += 2)
Ops.push_back(DAG.getNode(ISD::CONCAT_VECTORS, DL, PairVT,
Op->getOperand(I), Op->getOperand(I + 1)));
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Ops);
}
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
SDValue Pg = getPredicateForFixedLengthVector(DAG, DL, SrcVT);
SrcOp1 = convertToScalableVector(DAG, ContainerVT, SrcOp1);
SrcOp2 = convertToScalableVector(DAG, ContainerVT, SrcOp2);
Op = DAG.getNode(AArch64ISD::SPLICE, DL, ContainerVT, Pg, SrcOp1, SrcOp2);
return convertFromScalableVector(DAG, VT, Op);
}
SDValue
AArch64TargetLowering::LowerFixedLengthFPExtendToSVE(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
SDLoc DL(Op);
SDValue Val = Op.getOperand(0);
SDValue Pg = getPredicateForVector(DAG, DL, VT);
EVT SrcVT = Val.getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
EVT ExtendVT = ContainerVT.changeVectorElementType(
SrcVT.getVectorElementType());
Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val);
Val = DAG.getNode(ISD::ANY_EXTEND, DL, VT.changeTypeToInteger(), Val);
Val = convertToScalableVector(DAG, ContainerVT.changeTypeToInteger(), Val);
Val = getSVESafeBitCast(ExtendVT, Val, DAG);
Val = DAG.getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU, DL, ContainerVT,
Pg, Val, DAG.getUNDEF(ContainerVT));
return convertFromScalableVector(DAG, VT, Val);
}
SDValue
AArch64TargetLowering::LowerFixedLengthFPRoundToSVE(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
SDLoc DL(Op);
SDValue Val = Op.getOperand(0);
EVT SrcVT = Val.getValueType();
EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
EVT RoundVT = ContainerSrcVT.changeVectorElementType(
VT.getVectorElementType());
SDValue Pg = getPredicateForVector(DAG, DL, RoundVT);
Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
Val = DAG.getNode(AArch64ISD::FP_ROUND_MERGE_PASSTHRU, DL, RoundVT, Pg, Val,
Op.getOperand(1), DAG.getUNDEF(RoundVT));
Val = getSVESafeBitCast(ContainerSrcVT.changeTypeToInteger(), Val, DAG);
Val = convertFromScalableVector(DAG, SrcVT.changeTypeToInteger(), Val);
Val = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Val);
return DAG.getNode(ISD::BITCAST, DL, VT, Val);
}
SDValue
AArch64TargetLowering::LowerFixedLengthIntToFPToSVE(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP;
unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
: AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
SDLoc DL(Op);
SDValue Val = Op.getOperand(0);
EVT SrcVT = Val.getValueType();
EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
if (ContainerSrcVT.getVectorElementType().getSizeInBits() <=
ContainerDstVT.getVectorElementType().getSizeInBits()) {
SDValue Pg = getPredicateForVector(DAG, DL, VT);
Val = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
VT.changeTypeToInteger(), Val);
Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
Val = getSVESafeBitCast(ContainerDstVT.changeTypeToInteger(), Val, DAG);
// Safe to use a larger than specified operand since we just unpacked the
// data, hence the upper bits are zero.
Val = DAG.getNode(Opcode, DL, ContainerDstVT, Pg, Val,
DAG.getUNDEF(ContainerDstVT));
return convertFromScalableVector(DAG, VT, Val);
} else {
EVT CvtVT = ContainerSrcVT.changeVectorElementType(
ContainerDstVT.getVectorElementType());
SDValue Pg = getPredicateForVector(DAG, DL, CvtVT);
Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
Val = DAG.getNode(Opcode, DL, CvtVT, Pg, Val, DAG.getUNDEF(CvtVT));
Val = getSVESafeBitCast(ContainerSrcVT, Val, DAG);
Val = convertFromScalableVector(DAG, SrcVT, Val);
Val = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Val);
return DAG.getNode(ISD::BITCAST, DL, VT, Val);
}
}
SDValue
AArch64TargetLowering::LowerFixedLengthFPToIntToSVE(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT;
unsigned Opcode = IsSigned ? AArch64ISD::FCVTZS_MERGE_PASSTHRU
: AArch64ISD::FCVTZU_MERGE_PASSTHRU;
SDLoc DL(Op);
SDValue Val = Op.getOperand(0);
EVT SrcVT = Val.getValueType();
EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
if (ContainerSrcVT.getVectorElementType().getSizeInBits() <=
ContainerDstVT.getVectorElementType().getSizeInBits()) {
EVT CvtVT = ContainerDstVT.changeVectorElementType(
ContainerSrcVT.getVectorElementType());
SDValue Pg = getPredicateForVector(DAG, DL, VT);
Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val);
Val = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Val);
Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
Val = getSVESafeBitCast(CvtVT, Val, DAG);
Val = DAG.getNode(Opcode, DL, ContainerDstVT, Pg, Val,
DAG.getUNDEF(ContainerDstVT));
return convertFromScalableVector(DAG, VT, Val);
} else {
EVT CvtVT = ContainerSrcVT.changeTypeToInteger();
SDValue Pg = getPredicateForVector(DAG, DL, CvtVT);
// Safe to use a larger than specified result since an fp_to_int where the
// result doesn't fit into the destination is undefined.
Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
Val = DAG.getNode(Opcode, DL, CvtVT, Pg, Val, DAG.getUNDEF(CvtVT));
Val = convertFromScalableVector(DAG, SrcVT.changeTypeToInteger(), Val);
return DAG.getNode(ISD::TRUNCATE, DL, VT, Val);
}
}
SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
auto *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
auto ShuffleMask = SVN->getMask();
SDLoc DL(Op);
SDValue Op1 = Op.getOperand(0);
SDValue Op2 = Op.getOperand(1);
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
Op1 = convertToScalableVector(DAG, ContainerVT, Op1);
Op2 = convertToScalableVector(DAG, ContainerVT, Op2);
bool ReverseEXT = false;
unsigned Imm;
if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm) &&
Imm == VT.getVectorNumElements() - 1) {
if (ReverseEXT)
std::swap(Op1, Op2);
EVT ScalarTy = VT.getVectorElementType();
if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
ScalarTy = MVT::i32;
SDValue Scalar = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, DL, ScalarTy, Op1,
DAG.getConstant(VT.getVectorNumElements() - 1, DL, MVT::i64));
Op = DAG.getNode(AArch64ISD::INSR, DL, ContainerVT, Op2, Scalar);
return convertFromScalableVector(DAG, VT, Op);
}
for (unsigned LaneSize : {64U, 32U, 16U}) {
if (isREVMask(ShuffleMask, VT, LaneSize)) {
EVT NewVT =
getPackedSVEVectorVT(EVT::getIntegerVT(*DAG.getContext(), LaneSize));
unsigned RevOp;
unsigned EltSz = VT.getScalarSizeInBits();
if (EltSz == 8)
RevOp = AArch64ISD::BSWAP_MERGE_PASSTHRU;
else if (EltSz == 16)
RevOp = AArch64ISD::REVH_MERGE_PASSTHRU;
else
RevOp = AArch64ISD::REVW_MERGE_PASSTHRU;
Op = DAG.getNode(ISD::BITCAST, DL, NewVT, Op1);
Op = LowerToPredicatedOp(Op, DAG, RevOp);
Op = DAG.getNode(ISD::BITCAST, DL, ContainerVT, Op);
return convertFromScalableVector(DAG, VT, Op);
}
}
unsigned WhichResult;
if (isZIPMask(ShuffleMask, VT, WhichResult) && WhichResult == 0)
return convertFromScalableVector(
DAG, VT, DAG.getNode(AArch64ISD::ZIP1, DL, ContainerVT, Op1, Op2));
if (isTRNMask(ShuffleMask, VT, WhichResult)) {
unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
return convertFromScalableVector(
DAG, VT, DAG.getNode(Opc, DL, ContainerVT, Op1, Op2));
}
if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult) && WhichResult == 0)
return convertFromScalableVector(
DAG, VT, DAG.getNode(AArch64ISD::ZIP1, DL, ContainerVT, Op1, Op1));
if (isTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
return convertFromScalableVector(
DAG, VT, DAG.getNode(Opc, DL, ContainerVT, Op1, Op1));
}
// Functions like isZIPMask return true when a ISD::VECTOR_SHUFFLE's mask
// represents the same logical operation as performed by a ZIP instruction. In
// isolation these functions do not mean the ISD::VECTOR_SHUFFLE is exactly
// equivalent to an AArch64 instruction. There's the extra component of
// ISD::VECTOR_SHUFFLE's value type to consider. Prior to SVE these functions
// only operated on 64/128bit vector types that have a direct mapping to a
// target register and so an exact mapping is implied.
// However, when using SVE for fixed length vectors, most legal vector types
// are actually sub-vectors of a larger SVE register. When mapping
// ISD::VECTOR_SHUFFLE to an SVE instruction care must be taken to consider
// how the mask's indices translate. Specifically, when the mapping requires
// an exact meaning for a specific vector index (e.g. Index X is the last
// vector element in the register) then such mappings are often only safe when
// the exact SVE register size is know. The main exception to this is when
// indices are logically relative to the first element of either
// ISD::VECTOR_SHUFFLE operand because these relative indices don't change
// when converting from fixed-length to scalable vector types (i.e. the start
// of a fixed length vector is always the start of a scalable vector).
unsigned MinSVESize = Subtarget->getMinSVEVectorSizeInBits();
unsigned MaxSVESize = Subtarget->getMaxSVEVectorSizeInBits();
if (MinSVESize == MaxSVESize && MaxSVESize == VT.getSizeInBits()) {
if (ShuffleVectorInst::isReverseMask(ShuffleMask) && Op2.isUndef()) {
Op = DAG.getNode(ISD::VECTOR_REVERSE, DL, ContainerVT, Op1);
return convertFromScalableVector(DAG, VT, Op);
}
if (isZIPMask(ShuffleMask, VT, WhichResult) && WhichResult != 0)
return convertFromScalableVector(
DAG, VT, DAG.getNode(AArch64ISD::ZIP2, DL, ContainerVT, Op1, Op2));
if (isUZPMask(ShuffleMask, VT, WhichResult)) {
unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
return convertFromScalableVector(
DAG, VT, DAG.getNode(Opc, DL, ContainerVT, Op1, Op2));
}
if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult) && WhichResult != 0)
return convertFromScalableVector(
DAG, VT, DAG.getNode(AArch64ISD::ZIP2, DL, ContainerVT, Op1, Op1));
if (isUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
return convertFromScalableVector(
DAG, VT, DAG.getNode(Opc, DL, ContainerVT, Op1, Op1));
}
}
return SDValue();
}
SDValue AArch64TargetLowering::getSVESafeBitCast(EVT VT, SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT InVT = Op.getValueType();
assert(VT.isScalableVector() && isTypeLegal(VT) &&
InVT.isScalableVector() && isTypeLegal(InVT) &&
"Only expect to cast between legal scalable vector types!");
assert(VT.getVectorElementType() != MVT::i1 &&
InVT.getVectorElementType() != MVT::i1 &&
"For predicate bitcasts, use getSVEPredicateBitCast");
if (InVT == VT)
return Op;
EVT PackedVT = getPackedSVEVectorVT(VT.getVectorElementType());
EVT PackedInVT = getPackedSVEVectorVT(InVT.getVectorElementType());
// Safe bitcasting between unpacked vector types of different element counts
// is currently unsupported because the following is missing the necessary
// work to ensure the result's elements live where they're supposed to within
// an SVE register.
// 01234567
// e.g. nxv2i32 = XX??XX??
// nxv4f16 = X?X?X?X?
assert((VT.getVectorElementCount() == InVT.getVectorElementCount() ||
VT == PackedVT || InVT == PackedInVT) &&
"Unexpected bitcast!");
// Pack input if required.
if (InVT != PackedInVT)
Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, PackedInVT, Op);
Op = DAG.getNode(ISD::BITCAST, DL, PackedVT, Op);
// Unpack result if required.
if (VT != PackedVT)
Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op);
return Op;
}
bool AArch64TargetLowering::isAllActivePredicate(SelectionDAG &DAG,
SDValue N) const {
return ::isAllActivePredicate(DAG, N);
}
EVT AArch64TargetLowering::getPromotedVTForPredicate(EVT VT) const {
return ::getPromotedVTForPredicate(VT);
}
bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode(
SDValue Op, const APInt &OriginalDemandedBits,
const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
unsigned Depth) const {
unsigned Opc = Op.getOpcode();
switch (Opc) {
case AArch64ISD::VSHL: {
// Match (VSHL (VLSHR Val X) X)
SDValue ShiftL = Op;
SDValue ShiftR = Op->getOperand(0);
if (ShiftR->getOpcode() != AArch64ISD::VLSHR)
return false;
if (!ShiftL.hasOneUse() || !ShiftR.hasOneUse())
return false;
unsigned ShiftLBits = ShiftL->getConstantOperandVal(1);
unsigned ShiftRBits = ShiftR->getConstantOperandVal(1);
// Other cases can be handled as well, but this is not
// implemented.
if (ShiftRBits != ShiftLBits)
return false;
unsigned ScalarSize = Op.getScalarValueSizeInBits();
assert(ScalarSize > ShiftLBits && "Invalid shift imm");
APInt ZeroBits = APInt::getLowBitsSet(ScalarSize, ShiftLBits);
APInt UnusedBits = ~OriginalDemandedBits;
if ((ZeroBits & UnusedBits) != ZeroBits)
return false;
// All bits that are zeroed by (VSHL (VLSHR Val X) X) are not
// used - simplify to just Val.
return TLO.CombineTo(Op, ShiftR->getOperand(0));
}
}
return TargetLowering::SimplifyDemandedBitsForTargetNode(
Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
}
bool AArch64TargetLowering::isTargetCanonicalConstantNode(SDValue Op) const {
return Op.getOpcode() == AArch64ISD::DUP ||
(Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
Op.getOperand(0).getOpcode() == AArch64ISD::DUP) ||
TargetLowering::isTargetCanonicalConstantNode(Op);
}
bool AArch64TargetLowering::isConstantUnsignedBitfieldExtractLegal(
unsigned Opc, LLT Ty1, LLT Ty2) const {
return Ty1 == Ty2 && (Ty1 == LLT::scalar(32) || Ty1 == LLT::scalar(64));
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 1ba2e2f315ec..ff3bfe897869 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1,1175 +1,1175 @@
//==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the interfaces that AArch64 uses to lower LLVM code into a
// selection DAG.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
#define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
#include "AArch64.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Instruction.h"
namespace llvm {
namespace AArch64ISD {
// For predicated nodes where the result is a vector, the operation is
// controlled by a governing predicate and the inactive lanes are explicitly
// defined with a value, please stick the following naming convention:
//
// _MERGE_OP<n> The result value is a vector with inactive lanes equal
// to source operand OP<n>.
//
// _MERGE_ZERO The result value is a vector with inactive lanes
// actively zeroed.
//
// _MERGE_PASSTHRU The result value is a vector with inactive lanes equal
// to the last source operand which only purpose is being
// a passthru value.
//
// For other cases where no explicit action is needed to set the inactive lanes,
// or when the result is not a vector and it is needed or helpful to
// distinguish a node from similar unpredicated nodes, use:
//
// _PRED
//
enum NodeType : unsigned {
FIRST_NUMBER = ISD::BUILTIN_OP_END,
WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
CALL, // Function call.
// Pseudo for a OBJC call that gets emitted together with a special `mov
// x29, x29` marker instruction.
CALL_RVMARKER,
CALL_BTI, // Function call followed by a BTI instruction.
// Produces the full sequence of instructions for getting the thread pointer
// offset of a variable into X0, using the TLSDesc model.
TLSDESC_CALLSEQ,
ADRP, // Page address of a TargetGlobalAddress operand.
ADR, // ADR
ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand.
LOADgot, // Load from automatically generated descriptor (e.g. Global
// Offset Table, TLS record).
RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand.
BRCOND, // Conditional branch instruction; "b.cond".
CSEL,
CSINV, // Conditional select invert.
CSNEG, // Conditional select negate.
CSINC, // Conditional select increment.
// Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
// ELF.
THREAD_POINTER,
ADC,
SBC, // adc, sbc instructions
// Predicated instructions where inactive lanes produce undefined results.
ABDS_PRED,
ABDU_PRED,
FADD_PRED,
FDIV_PRED,
FMA_PRED,
FMAX_PRED,
FMAXNM_PRED,
FMIN_PRED,
FMINNM_PRED,
FMUL_PRED,
FSUB_PRED,
MUL_PRED,
MULHS_PRED,
MULHU_PRED,
SDIV_PRED,
SHL_PRED,
SMAX_PRED,
SMIN_PRED,
SRA_PRED,
SRL_PRED,
UDIV_PRED,
UMAX_PRED,
UMIN_PRED,
// Unpredicated vector instructions
BIC,
SRAD_MERGE_OP1,
// Predicated instructions with the result of inactive lanes provided by the
// last operand.
FABS_MERGE_PASSTHRU,
FCEIL_MERGE_PASSTHRU,
FFLOOR_MERGE_PASSTHRU,
FNEARBYINT_MERGE_PASSTHRU,
FNEG_MERGE_PASSTHRU,
FRECPX_MERGE_PASSTHRU,
FRINT_MERGE_PASSTHRU,
FROUND_MERGE_PASSTHRU,
FROUNDEVEN_MERGE_PASSTHRU,
FSQRT_MERGE_PASSTHRU,
FTRUNC_MERGE_PASSTHRU,
FP_ROUND_MERGE_PASSTHRU,
FP_EXTEND_MERGE_PASSTHRU,
UINT_TO_FP_MERGE_PASSTHRU,
SINT_TO_FP_MERGE_PASSTHRU,
FCVTZU_MERGE_PASSTHRU,
FCVTZS_MERGE_PASSTHRU,
SIGN_EXTEND_INREG_MERGE_PASSTHRU,
ZERO_EXTEND_INREG_MERGE_PASSTHRU,
ABS_MERGE_PASSTHRU,
NEG_MERGE_PASSTHRU,
SETCC_MERGE_ZERO,
// Arithmetic instructions which write flags.
ADDS,
SUBS,
ADCS,
SBCS,
ANDS,
// Conditional compares. Operands: left,right,falsecc,cc,flags
CCMP,
CCMN,
FCCMP,
// Floating point comparison
FCMP,
// Scalar extract
EXTR,
// Scalar-to-vector duplication
DUP,
DUPLANE8,
DUPLANE16,
DUPLANE32,
DUPLANE64,
DUPLANE128,
// Vector immedate moves
MOVI,
MOVIshift,
MOVIedit,
MOVImsl,
FMOV,
MVNIshift,
MVNImsl,
// Vector immediate ops
BICi,
ORRi,
// Vector bitwise select: similar to ISD::VSELECT but not all bits within an
// element must be identical.
BSP,
// Vector shuffles
ZIP1,
ZIP2,
UZP1,
UZP2,
TRN1,
TRN2,
REV16,
REV32,
REV64,
EXT,
SPLICE,
// Vector shift by scalar
VSHL,
VLSHR,
VASHR,
// Vector shift by scalar (again)
SQSHL_I,
UQSHL_I,
SQSHLU_I,
SRSHR_I,
URSHR_I,
// Vector shift by constant and insert
VSLI,
VSRI,
// Vector comparisons
CMEQ,
CMGE,
CMGT,
CMHI,
CMHS,
FCMEQ,
FCMGE,
FCMGT,
// Vector zero comparisons
CMEQz,
CMGEz,
CMGTz,
CMLEz,
CMLTz,
FCMEQz,
FCMGEz,
FCMGTz,
FCMLEz,
FCMLTz,
// Vector across-lanes addition
// Only the lower result lane is defined.
SADDV,
UADDV,
// Add Pairwise of two vectors
ADDP,
// Add Long Pairwise
SADDLP,
UADDLP,
// udot/sdot instructions
UDOT,
SDOT,
// Vector across-lanes min/max
// Only the lower result lane is defined.
SMINV,
UMINV,
SMAXV,
UMAXV,
SADDV_PRED,
UADDV_PRED,
SMAXV_PRED,
UMAXV_PRED,
SMINV_PRED,
UMINV_PRED,
ORV_PRED,
EORV_PRED,
ANDV_PRED,
// Vector bitwise insertion
BIT,
// Compare-and-branch
CBZ,
CBNZ,
TBZ,
TBNZ,
// Tail calls
TC_RETURN,
// Custom prefetch handling
PREFETCH,
// {s|u}int to FP within a FP register.
SITOF,
UITOF,
/// Natural vector cast. ISD::BITCAST is not natural in the big-endian
/// world w.r.t vectors; which causes additional REV instructions to be
/// generated to compensate for the byte-swapping. But sometimes we do
/// need to re-interpret the data in SIMD vector registers in big-endian
/// mode without emitting such REV instructions.
NVCAST,
MRS, // MRS, also sets the flags via a glue.
SMULL,
UMULL,
// Reciprocal estimates and steps.
FRECPE,
FRECPS,
FRSQRTE,
FRSQRTS,
SUNPKHI,
SUNPKLO,
UUNPKHI,
UUNPKLO,
CLASTA_N,
CLASTB_N,
LASTA,
LASTB,
TBL,
// Floating-point reductions.
FADDA_PRED,
FADDV_PRED,
FMAXV_PRED,
FMAXNMV_PRED,
FMINV_PRED,
FMINNMV_PRED,
INSR,
PTEST,
PTRUE,
BITREVERSE_MERGE_PASSTHRU,
BSWAP_MERGE_PASSTHRU,
REVH_MERGE_PASSTHRU,
REVW_MERGE_PASSTHRU,
CTLZ_MERGE_PASSTHRU,
CTPOP_MERGE_PASSTHRU,
DUP_MERGE_PASSTHRU,
INDEX_VECTOR,
// Cast between vectors of the same element type but differ in length.
REINTERPRET_CAST,
// Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa
LS64_BUILD,
LS64_EXTRACT,
LD1_MERGE_ZERO,
LD1S_MERGE_ZERO,
LDNF1_MERGE_ZERO,
LDNF1S_MERGE_ZERO,
LDFF1_MERGE_ZERO,
LDFF1S_MERGE_ZERO,
LD1RQ_MERGE_ZERO,
LD1RO_MERGE_ZERO,
// Structured loads.
SVE_LD2_MERGE_ZERO,
SVE_LD3_MERGE_ZERO,
SVE_LD4_MERGE_ZERO,
// Unsigned gather loads.
GLD1_MERGE_ZERO,
GLD1_SCALED_MERGE_ZERO,
GLD1_UXTW_MERGE_ZERO,
GLD1_SXTW_MERGE_ZERO,
GLD1_UXTW_SCALED_MERGE_ZERO,
GLD1_SXTW_SCALED_MERGE_ZERO,
GLD1_IMM_MERGE_ZERO,
// Signed gather loads
GLD1S_MERGE_ZERO,
GLD1S_SCALED_MERGE_ZERO,
GLD1S_UXTW_MERGE_ZERO,
GLD1S_SXTW_MERGE_ZERO,
GLD1S_UXTW_SCALED_MERGE_ZERO,
GLD1S_SXTW_SCALED_MERGE_ZERO,
GLD1S_IMM_MERGE_ZERO,
// Unsigned gather loads.
GLDFF1_MERGE_ZERO,
GLDFF1_SCALED_MERGE_ZERO,
GLDFF1_UXTW_MERGE_ZERO,
GLDFF1_SXTW_MERGE_ZERO,
GLDFF1_UXTW_SCALED_MERGE_ZERO,
GLDFF1_SXTW_SCALED_MERGE_ZERO,
GLDFF1_IMM_MERGE_ZERO,
// Signed gather loads.
GLDFF1S_MERGE_ZERO,
GLDFF1S_SCALED_MERGE_ZERO,
GLDFF1S_UXTW_MERGE_ZERO,
GLDFF1S_SXTW_MERGE_ZERO,
GLDFF1S_UXTW_SCALED_MERGE_ZERO,
GLDFF1S_SXTW_SCALED_MERGE_ZERO,
GLDFF1S_IMM_MERGE_ZERO,
// Non-temporal gather loads
GLDNT1_MERGE_ZERO,
GLDNT1_INDEX_MERGE_ZERO,
GLDNT1S_MERGE_ZERO,
// Contiguous masked store.
ST1_PRED,
// Scatter store
SST1_PRED,
SST1_SCALED_PRED,
SST1_UXTW_PRED,
SST1_SXTW_PRED,
SST1_UXTW_SCALED_PRED,
SST1_SXTW_SCALED_PRED,
SST1_IMM_PRED,
// Non-temporal scatter store
SSTNT1_PRED,
SSTNT1_INDEX_PRED,
// SME
RDSVL,
REVD_MERGE_PASSTHRU,
// Asserts that a function argument (i32) is zero-extended to i8 by
// the caller
ASSERT_ZEXT_BOOL,
// Strict (exception-raising) floating point comparison
STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
STRICT_FCMPE,
// NEON Load/Store with post-increment base updates
LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
LD3post,
LD4post,
ST2post,
ST3post,
ST4post,
LD1x2post,
LD1x3post,
LD1x4post,
ST1x2post,
ST1x3post,
ST1x4post,
LD1DUPpost,
LD2DUPpost,
LD3DUPpost,
LD4DUPpost,
LD1LANEpost,
LD2LANEpost,
LD3LANEpost,
LD4LANEpost,
ST2LANEpost,
ST3LANEpost,
ST4LANEpost,
STG,
STZG,
ST2G,
STZ2G,
LDP,
STP,
STNP,
// Memory Operations
MOPS_MEMSET,
MOPS_MEMSET_TAGGING,
MOPS_MEMCOPY,
MOPS_MEMMOVE,
};
} // end namespace AArch64ISD
namespace AArch64 {
/// Possible values of current rounding mode, which is specified in bits
/// 23:22 of FPCR.
enum Rounding {
RN = 0, // Round to Nearest
RP = 1, // Round towards Plus infinity
RM = 2, // Round towards Minus infinity
RZ = 3, // Round towards Zero
rmMask = 3 // Bit mask selecting rounding mode
};
// Bit position of rounding mode bits in FPCR.
const unsigned RoundingBitsPos = 22;
} // namespace AArch64
class AArch64Subtarget;
class AArch64TargetLowering : public TargetLowering {
public:
explicit AArch64TargetLowering(const TargetMachine &TM,
const AArch64Subtarget &STI);
/// Control the following reassociation of operands: (op (op x, c1), y) -> (op
/// (op x, y), c1) where N0 is (op x, c1) and N1 is y.
bool isReassocProfitable(SelectionDAG &DAG, SDValue N0,
SDValue N1) const override;
/// Selects the correct CCAssignFn for a given CallingConvention value.
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
/// Selects the correct CCAssignFn for a given CallingConvention value.
CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const;
/// Determine which of the bits specified in Mask are known to be either zero
/// or one and return them in the KnownZero/KnownOne bitsets.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
// Returning i64 unconditionally here (i.e. even for ILP32) means that the
// *DAG* representation of pointers will always be 64-bits. They will be
// truncated and extended when transferred to memory, but the 64-bit DAG
// allows us to use AArch64's addressing modes much more easily.
return MVT::getIntegerVT(64);
}
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
const APInt &DemandedElts,
TargetLoweringOpt &TLO) const override;
MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
/// Returns true if the target allows unaligned memory accesses of the
/// specified type.
bool allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool *Fast = nullptr) const override;
/// LLT variant.
bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace,
Align Alignment,
MachineMemOperand::Flags Flags,
bool *Fast = nullptr) const override;
/// Provide custom lowering hooks for some operations.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
const char *getTargetNodeName(unsigned Opcode) const override;
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
/// This method returns a target specific FastISel object, or null if the
/// target does not support "fast" ISel.
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) const override;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
/// Return true if the given shuffle mask can be codegen'd directly, or if it
/// should be stack expanded.
bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
/// Similar to isShuffleMaskLegal. Return true is the given 'select with zero'
/// shuffle mask can be codegen'd directly.
bool isVectorClearMaskLegal(ArrayRef<int> M, EVT VT) const override;
/// Return the ISD::SETCC ValueType.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
MachineBasicBlock *EmitF128CSEL(MachineInstr &MI,
MachineBasicBlock *BB) const;
MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
MachineBasicBlock *BB) const;
MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg,
MachineInstr &MI,
MachineBasicBlock *BB) const;
MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const;
MachineBasicBlock *EmitMopa(unsigned Opc, unsigned BaseReg, MachineInstr &MI,
MachineBasicBlock *BB) const;
MachineBasicBlock *EmitInsertVectorToTile(unsigned Opc, unsigned BaseReg,
MachineInstr &MI,
MachineBasicBlock *BB) const;
MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const;
MachineBasicBlock *EmitAddVectorToTile(unsigned Opc, unsigned BaseReg,
MachineInstr &MI,
MachineBasicBlock *BB) const;
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *MBB) const override;
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
MachineFunction &MF,
unsigned Intrinsic) const override;
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
EVT NewVT) const override;
bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
bool isTruncateFree(EVT VT1, EVT VT2) const override;
bool isProfitableToHoist(Instruction *I) const override;
bool isZExtFree(Type *Ty1, Type *Ty2) const override;
bool isZExtFree(EVT VT1, EVT VT2) const override;
bool isZExtFree(SDValue Val, EVT VT2) const override;
bool shouldSinkOperands(Instruction *I,
SmallVectorImpl<Use *> &Ops) const override;
bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override;
unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
bool lowerInterleavedLoad(LoadInst *LI,
ArrayRef<ShuffleVectorInst *> Shuffles,
ArrayRef<unsigned> Indices,
unsigned Factor) const override;
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
unsigned Factor) const override;
bool isLegalAddImmediate(int64_t) const override;
bool isLegalICmpImmediate(int64_t) const override;
bool isMulAddWithConstProfitable(SDValue AddNode,
SDValue ConstNode) const override;
bool shouldConsiderGEPOffsetSplit() const override;
EVT getOptimalMemOpType(const MemOp &Op,
const AttributeList &FuncAttributes) const override;
LLT getOptimalMemOpLLT(const MemOp &Op,
const AttributeList &FuncAttributes) const override;
/// Return true if the addressing mode represented by AM is legal for this
/// target, for a load/store of the specified type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS,
Instruction *I = nullptr) const override;
/// Return the cost of the scaling factor used in the addressing
/// mode represented by AM for this target, for a load/store
/// of the specified type.
/// If the AM is supported, the return value must be >= 0.
/// If the AM is not supported, it returns a negative value.
InstructionCost getScalingFactorCost(const DataLayout &DL, const AddrMode &AM,
Type *Ty, unsigned AS) const override;
/// Return true if an FMA operation is faster than a pair of fmul and fadd
/// instructions. fmuladd intrinsics will be expanded to FMAs when this method
/// returns true, otherwise fmuladd is expanded to fmul + fadd.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
EVT VT) const override;
bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
bool generateFMAsInMachineCombiner(EVT VT,
CodeGenOpt::Level OptLevel) const override;
const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
/// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
bool isDesirableToCommuteWithShift(const SDNode *N,
CombineLevel Level) const override;
/// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
bool isDesirableToCommuteXorWithShift(const SDNode *N) const override;
/// Return true if it is profitable to fold a pair of shifts into a mask.
bool shouldFoldConstantShiftPairToMask(const SDNode *N,
CombineLevel Level) const override;
/// Returns true if it is beneficial to convert a load of a constant
/// to just the constant itself.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override;
/// Return true if EXTRACT_SUBVECTOR is cheap for this result type
/// with this index.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
unsigned Index) const override;
bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
bool MathUsed) const override {
// Using overflow ops for overflow checks only should beneficial on
// AArch64.
return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
}
Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr,
AtomicOrdering Ord) const override;
Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr,
AtomicOrdering Ord) const override;
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override;
bool isOpSuitableForLDPSTP(const Instruction *I) const;
bool shouldInsertFencesForAtomic(const Instruction *I) const override;
TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
bool useLoadStackGuardNode() const override;
TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(MVT VT) const override;
/// If the target has a standard location for the stack protector cookie,
/// returns the address of that location. Otherwise, returns nullptr.
Value *getIRStackGuard(IRBuilderBase &IRB) const override;
void insertSSPDeclarations(Module &M) const override;
Value *getSDagStackGuard(const Module &M) const override;
Function *getSSPStackGuardCheck(const Module &M) const override;
/// If the target has a standard location for the unsafe stack pointer,
/// returns the address of that location. Otherwise, returns nullptr.
Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
/// If a physical register, this returns the register that receives the
/// exception address on entry to an EH pad.
Register
getExceptionPointerRegister(const Constant *PersonalityFn) const override {
// FIXME: This is a guess. Has this been defined yet?
return AArch64::X0;
}
/// If a physical register, this returns the register that receives the
/// exception typeid on entry to a landing pad.
Register
getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
// FIXME: This is a guess. Has this been defined yet?
return AArch64::X1;
}
bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
const MachineFunction &MF) const override {
// Do not merge to float value size (128 bytes) if no implicit
// float attribute is set.
bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
if (NoFloat)
return (MemVT.getSizeInBits() <= 64);
return true;
}
bool isCheapToSpeculateCttz() const override {
return true;
}
bool isCheapToSpeculateCtlz() const override {
return true;
}
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
bool hasAndNotCompare(SDValue V) const override {
// We can use bics for any scalar.
return V.getValueType().isScalarInteger();
}
bool hasAndNot(SDValue Y) const override {
EVT VT = Y.getValueType();
if (!VT.isVector())
return hasAndNotCompare(Y);
TypeSize TS = VT.getSizeInBits();
// TODO: We should be able to use bic/bif too for SVE.
return !TS.isScalable() && TS.getFixedValue() >= 64; // vector 'bic'
}
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
unsigned OldShiftOpcode, unsigned NewShiftOpcode,
SelectionDAG &DAG) const override;
bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
bool shouldTransformSignedTruncationCheck(EVT XVT,
unsigned KeptBits) const override {
// For vectors, we don't have a preference..
if (XVT.isVector())
return false;
auto VTIsOk = [](EVT VT) -> bool {
return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
VT == MVT::i64;
};
// We are ok with KeptBitsVT being byte/word/dword, what SXT supports.
// XVT will be larger than KeptBitsVT.
MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
}
bool preferIncOfAddToSubOfNot(EVT VT) const override;
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
bool hasBitPreservingFPLogic(EVT VT) const override {
// FIXME: Is this always true? It should be true for vectors at least.
return VT == MVT::f32 || VT == MVT::f64;
}
bool supportSplitCSR(MachineFunction *MF) const override {
return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
}
void initializeSplitCSR(MachineBasicBlock *Entry) const override;
void insertCopiesSplitCSR(
MachineBasicBlock *Entry,
const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
bool supportSwiftError() const override {
return true;
}
/// Enable aggressive FMA fusion on targets that want it.
bool enableAggressiveFMAFusion(EVT VT) const override;
/// Returns the size of the platform's va_list object.
unsigned getVaListSizeInBits(const DataLayout &DL) const override;
/// Returns true if \p VecTy is a legal interleaved access type. This
/// function checks the vector element type and the overall width of the
/// vector.
bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL,
bool &UseScalable) const;
/// Returns the number of interleaved accesses that will be generated when
/// lowering accesses of the given type.
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL,
bool UseScalable) const;
MachineMemOperand::Flags getTargetMMOFlags(
const Instruction &I) const override;
bool functionArgumentNeedsConsecutiveRegisters(
Type *Ty, CallingConv::ID CallConv, bool isVarArg,
const DataLayout &DL) const override;
/// Used for exception handling on Win64.
bool needsFixedCatchObjects() const override;
bool fallBackToDAGISel(const Instruction &Inst) const override;
/// SVE code generation for fixed length vectors does not custom lower
/// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to
/// merge. However, merging them creates a BUILD_VECTOR that is just as
/// illegal as the original, thus leading to an infinite legalisation loop.
/// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
/// vector types this override can be removed.
bool mergeStoresAfterLegalization(EVT VT) const override;
// If the platform/function should have a redzone, return the size in bytes.
unsigned getRedZoneSize(const Function &F) const {
if (F.hasFnAttribute(Attribute::NoRedZone))
return 0;
return 128;
}
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const;
EVT getPromotedVTForPredicate(EVT VT) const;
EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
bool AllowUnknown = false) const override;
bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override;
private:
/// Keep a pointer to the AArch64Subtarget around so that we can
/// make the right decision when generating code for different targets.
const AArch64Subtarget *Subtarget;
bool isExtFreeImpl(const Instruction *Ext) const override;
void addTypeForNEON(MVT VT);
void addTypeForFixedLengthSVE(MVT VT);
void addDRTypeForNEON(MVT VT);
void addQRTypeForNEON(MVT VT);
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
const SDLoc &DL, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const override;
SDValue LowerCall(CallLoweringInfo & /*CLI*/,
SmallVectorImpl<SDValue> &InVals) const override;
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
+ const SmallVectorImpl<CCValAssign> &RVLocs,
const SDLoc &DL, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
SDValue ThisVal) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
bool
isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const;
/// Finds the incoming stack arguments which overlap the given fixed stack
/// object and incorporates their load into the current chain. This prevents
/// an upcoming store from clobbering the stack argument before it's used.
SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
MachineFrameInfo &MFI, int ClobberedFI) const;
bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL,
SDValue &Chain) const;
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const override;
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
SelectionDAG &DAG) const override;
SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
unsigned Flag) const;
SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
unsigned Flag) const;
SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
unsigned Flag) const;
SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
unsigned Flag) const;
template <class NodeTy>
SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
template <class NodeTy>
SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
template <class NodeTy>
SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
template <class NodeTy>
SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase,
const SDLoc &DL, SelectionDAG &DAG) const;
SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL,
SelectionDAG &DAG) const;
SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
SDValue TVal, SDValue FVal, const SDLoc &dl,
SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG,
unsigned NewOp) const;
SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTPOP_PARITY(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain,
SDValue &Size,
SelectionDAG &DAG) const;
SDValue LowerSVEStructLoad(unsigned Intrinsic, ArrayRef<SDValue> LoadOps,
EVT VT, SelectionDAG &DAG, const SDLoc &DL) const;
SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
SelectionDAG &DAG) const;
SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
SelectionDAG &DAG) const;
SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
SelectionDAG &DAG) const;
SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op,
SelectionDAG &DAG) const;
SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
SelectionDAG &DAG) const;
SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op,
SelectionDAG &DAG) const;
SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op,
SelectionDAG &DAG) const;
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) const override;
SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) const override;
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
int &ExtraSteps, bool &UseOneConst,
bool Reciprocal) const override;
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
int &ExtraSteps) const override;
SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
const DenormalMode &Mode) const override;
SDValue getSqrtResultForDenormInput(SDValue Operand,
SelectionDAG &DAG) const override;
unsigned combineRepeatedFPDivisors() const override;
ConstraintType getConstraintType(StringRef Constraint) const override;
Register getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const override;
/// Examine constraint string and operand type and determine a weight value.
/// The operand object must already have been set up with the operand type.
ConstraintWeight
getSingleConstraintMatchWeight(AsmOperandInfo &info,
const char *constraint) const override;
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint, MVT VT) const override;
const char *LowerXConstraint(EVT ConstraintVT) const override;
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
if (ConstraintCode == "Q")
return InlineAsm::Constraint_Q;
// FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
// followed by llvm_unreachable so we'll leave them unimplemented in
// the backend for now.
return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
}
bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override;
bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const override;
bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
ISD::MemIndexedMode &AM, bool &IsInc,
SelectionDAG &DAG) const;
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
ISD::MemIndexedMode &AM,
SelectionDAG &DAG) const override;
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
SDValue &Offset, ISD::MemIndexedMode &AM,
SelectionDAG &DAG) const override;
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const;
void ReplaceExtractSubVectorResults(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const;
bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
void finalizeLowering(MachineFunction &MF) const override;
bool shouldLocalize(const MachineInstr &MI,
const TargetTransformInfo *TTI) const override;
bool SimplifyDemandedBitsForTargetNode(SDValue Op,
const APInt &OriginalDemandedBits,
const APInt &OriginalDemandedElts,
KnownBits &Known,
TargetLoweringOpt &TLO,
unsigned Depth) const override;
bool isTargetCanonicalConstantNode(SDValue Op) const override;
// Normally SVE is only used for byte size vectors that do not fit within a
// NEON vector. This changes when OverrideNEON is true, allowing SVE to be
// used for 64bit and 128bit vectors as well.
bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;
// With the exception of data-predicate transitions, no instructions are
// required to cast between legal scalable vector types. However:
// 1. Packed and unpacked types have different bit lengths, meaning BITCAST
// is not universally useable.
// 2. Most unpacked integer types are not legal and thus integer extends
// cannot be used to convert between unpacked and packed types.
// These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used
// to transition between unpacked and packed types of the same element type,
// with BITCAST used otherwise.
// This function does not handle predicate bitcasts.
SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const;
bool isConstantUnsignedBitfieldExtractLegal(unsigned Opc, LLT Ty1,
LLT Ty2) const override;
};
namespace AArch64 {
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo);
} // end namespace AArch64
} // end namespace llvm
#endif
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index f7d139adc63b..f6b7d1ffc6d2 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1,12973 +1,12975 @@
//===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// Custom DAG lowering for SI
//
//===----------------------------------------------------------------------===//
#include "SIISelLowering.h"
#include "AMDGPU.h"
#include "AMDGPUInstrInfo.h"
#include "AMDGPUTargetMachine.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
#include "llvm/ADT/FloatingPointMode.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/KnownBits.h"
using namespace llvm;
#define DEBUG_TYPE "si-lower"
STATISTIC(NumTailCalls, "Number of tail calls");
static cl::opt<bool> DisableLoopAlignment(
"amdgpu-disable-loop-alignment",
cl::desc("Do not align and prefetch loops"),
cl::init(false));
static cl::opt<bool> UseDivergentRegisterIndexing(
"amdgpu-use-divergent-register-indexing",
cl::Hidden,
cl::desc("Use indirect register addressing for divergent indexes"),
cl::init(false));
static bool hasFP32Denormals(const MachineFunction &MF) {
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
return Info->getMode().allFP32Denormals();
}
static bool hasFP64FP16Denormals(const MachineFunction &MF) {
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
return Info->getMode().allFP64FP16Denormals();
}
static unsigned findFirstFreeSGPR(CCState &CCInfo) {
unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) {
if (!CCInfo.isAllocated(AMDGPU::SGPR0 + Reg)) {
return AMDGPU::SGPR0 + Reg;
}
}
llvm_unreachable("Cannot allocate sgpr");
}
SITargetLowering::SITargetLowering(const TargetMachine &TM,
const GCNSubtarget &STI)
: AMDGPUTargetLowering(TM, STI),
Subtarget(&STI) {
addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass);
addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
addRegisterClass(MVT::i32, &AMDGPU::SReg_32RegClass);
addRegisterClass(MVT::f32, &AMDGPU::VGPR_32RegClass);
addRegisterClass(MVT::v2i32, &AMDGPU::SReg_64RegClass);
const SIRegisterInfo *TRI = STI.getRegisterInfo();
const TargetRegisterClass *V64RegClass = TRI->getVGPR64Class();
addRegisterClass(MVT::f64, V64RegClass);
addRegisterClass(MVT::v2f32, V64RegClass);
addRegisterClass(MVT::v3i32, &AMDGPU::SGPR_96RegClass);
addRegisterClass(MVT::v3f32, TRI->getVGPRClassForBitWidth(96));
addRegisterClass(MVT::v2i64, &AMDGPU::SGPR_128RegClass);
addRegisterClass(MVT::v2f64, &AMDGPU::SGPR_128RegClass);
addRegisterClass(MVT::v4i32, &AMDGPU::SGPR_128RegClass);
addRegisterClass(MVT::v4f32, TRI->getVGPRClassForBitWidth(128));
addRegisterClass(MVT::v5i32, &AMDGPU::SGPR_160RegClass);
addRegisterClass(MVT::v5f32, TRI->getVGPRClassForBitWidth(160));
addRegisterClass(MVT::v6i32, &AMDGPU::SGPR_192RegClass);
addRegisterClass(MVT::v6f32, TRI->getVGPRClassForBitWidth(192));
addRegisterClass(MVT::v3i64, &AMDGPU::SGPR_192RegClass);
addRegisterClass(MVT::v3f64, TRI->getVGPRClassForBitWidth(192));
addRegisterClass(MVT::v7i32, &AMDGPU::SGPR_224RegClass);
addRegisterClass(MVT::v7f32, TRI->getVGPRClassForBitWidth(224));
addRegisterClass(MVT::v8i32, &AMDGPU::SGPR_256RegClass);
addRegisterClass(MVT::v8f32, TRI->getVGPRClassForBitWidth(256));
addRegisterClass(MVT::v4i64, &AMDGPU::SGPR_256RegClass);
addRegisterClass(MVT::v4f64, TRI->getVGPRClassForBitWidth(256));
addRegisterClass(MVT::v16i32, &AMDGPU::SGPR_512RegClass);
addRegisterClass(MVT::v16f32, TRI->getVGPRClassForBitWidth(512));
addRegisterClass(MVT::v8i64, &AMDGPU::SGPR_512RegClass);
addRegisterClass(MVT::v8f64, TRI->getVGPRClassForBitWidth(512));
addRegisterClass(MVT::v16i64, &AMDGPU::SGPR_1024RegClass);
addRegisterClass(MVT::v16f64, TRI->getVGPRClassForBitWidth(1024));
if (Subtarget->has16BitInsts()) {
addRegisterClass(MVT::i16, &AMDGPU::SReg_32RegClass);
addRegisterClass(MVT::f16, &AMDGPU::SReg_32RegClass);
// Unless there are also VOP3P operations, not operations are really legal.
addRegisterClass(MVT::v2i16, &AMDGPU::SReg_32RegClass);
addRegisterClass(MVT::v2f16, &AMDGPU::SReg_32RegClass);
addRegisterClass(MVT::v4i16, &AMDGPU::SReg_64RegClass);
addRegisterClass(MVT::v4f16, &AMDGPU::SReg_64RegClass);
addRegisterClass(MVT::v8i16, &AMDGPU::SGPR_128RegClass);
addRegisterClass(MVT::v8f16, &AMDGPU::SGPR_128RegClass);
addRegisterClass(MVT::v16i16, &AMDGPU::SGPR_256RegClass);
addRegisterClass(MVT::v16f16, &AMDGPU::SGPR_256RegClass);
}
addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass);
addRegisterClass(MVT::v32f32, TRI->getVGPRClassForBitWidth(1024));
computeRegisterProperties(Subtarget->getRegisterInfo());
// The boolean content concept here is too inflexible. Compares only ever
// really produce a 1-bit result. Any copy/extend from these will turn into a
// select, and zext/1 or sext/-1 are equally cheap. Arbitrarily choose 0/1, as
// it's what most targets use.
setBooleanContents(ZeroOrOneBooleanContent);
setBooleanVectorContents(ZeroOrOneBooleanContent);
// We need to custom lower vector stores from local memory
setOperationAction(ISD::LOAD,
{MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32,
MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v16i32, MVT::i1,
MVT::v32i32},
Custom);
setOperationAction(ISD::STORE,
{MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32,
MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v16i32, MVT::i1,
MVT::v32i32},
Custom);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
setTruncStoreAction(MVT::v3i32, MVT::v3i16, Expand);
setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
setTruncStoreAction(MVT::v32i32, MVT::v32i16, Expand);
setTruncStoreAction(MVT::v2i32, MVT::v2i8, Expand);
setTruncStoreAction(MVT::v4i32, MVT::v4i8, Expand);
setTruncStoreAction(MVT::v8i32, MVT::v8i8, Expand);
setTruncStoreAction(MVT::v16i32, MVT::v16i8, Expand);
setTruncStoreAction(MVT::v32i32, MVT::v32i8, Expand);
setTruncStoreAction(MVT::v2i16, MVT::v2i8, Expand);
setTruncStoreAction(MVT::v4i16, MVT::v4i8, Expand);
setTruncStoreAction(MVT::v8i16, MVT::v8i8, Expand);
setTruncStoreAction(MVT::v16i16, MVT::v16i8, Expand);
setTruncStoreAction(MVT::v32i16, MVT::v32i8, Expand);
setTruncStoreAction(MVT::v3i64, MVT::v3i16, Expand);
setTruncStoreAction(MVT::v3i64, MVT::v3i32, Expand);
setTruncStoreAction(MVT::v4i64, MVT::v4i8, Expand);
setTruncStoreAction(MVT::v8i64, MVT::v8i8, Expand);
setTruncStoreAction(MVT::v8i64, MVT::v8i16, Expand);
setTruncStoreAction(MVT::v8i64, MVT::v8i32, Expand);
setTruncStoreAction(MVT::v16i64, MVT::v16i32, Expand);
setOperationAction(ISD::GlobalAddress, {MVT::i32, MVT::i64}, Custom);
setOperationAction(ISD::SELECT, MVT::i1, Promote);
setOperationAction(ISD::SELECT, MVT::i64, Custom);
setOperationAction(ISD::SELECT, MVT::f64, Promote);
AddPromotedToType(ISD::SELECT, MVT::f64, MVT::i64);
setOperationAction(ISD::SELECT_CC,
{MVT::f32, MVT::i32, MVT::i64, MVT::f64, MVT::i1}, Expand);
setOperationAction(ISD::SETCC, MVT::i1, Promote);
setOperationAction(ISD::SETCC, {MVT::v2i1, MVT::v4i1}, Expand);
AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32);
setOperationAction(ISD::TRUNCATE,
{MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32,
MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v16i32},
Expand);
setOperationAction(ISD::FP_ROUND,
{MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32,
MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v16f32},
Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG,
{MVT::v2i1, MVT::v4i1, MVT::v2i8, MVT::v4i8, MVT::v2i16,
MVT::v3i16, MVT::v4i16, MVT::Other},
Custom);
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
setOperationAction(ISD::BR_CC,
{MVT::i1, MVT::i32, MVT::i64, MVT::f32, MVT::f64}, Expand);
setOperationAction({ISD::UADDO, ISD::USUBO}, MVT::i32, Legal);
setOperationAction({ISD::ADDCARRY, ISD::SUBCARRY}, MVT::i32, Legal);
setOperationAction({ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS}, MVT::i64,
Expand);
#if 0
setOperationAction({ISD::ADDCARRY, ISD::SUBCARRY}, MVT::i64, Legal);
#endif
// We only support LOAD/STORE and vector manipulation ops for vectors
// with > 4 elements.
for (MVT VT :
{MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32, MVT::v2i64,
MVT::v2f64, MVT::v4i16, MVT::v4f16, MVT::v3i64, MVT::v3f64,
MVT::v6i32, MVT::v6f32, MVT::v4i64, MVT::v4f64, MVT::v8i64,
MVT::v8f64, MVT::v8i16, MVT::v8f16, MVT::v16i16, MVT::v16f16,
MVT::v16i64, MVT::v16f64, MVT::v32i32, MVT::v32f32}) {
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
switch (Op) {
case ISD::LOAD:
case ISD::STORE:
case ISD::BUILD_VECTOR:
case ISD::BITCAST:
+ case ISD::UNDEF:
case ISD::EXTRACT_VECTOR_ELT:
case ISD::INSERT_VECTOR_ELT:
case ISD::EXTRACT_SUBVECTOR:
case ISD::SCALAR_TO_VECTOR:
break;
case ISD::INSERT_SUBVECTOR:
case ISD::CONCAT_VECTORS:
setOperationAction(Op, VT, Custom);
break;
default:
setOperationAction(Op, VT, Expand);
break;
}
}
}
setOperationAction(ISD::FP_EXTEND, MVT::v4f32, Expand);
// TODO: For dynamic 64-bit vector inserts/extracts, should emit a pseudo that
// is expanded to avoid having two separate loops in case the index is a VGPR.
// Most operations are naturally 32-bit vector operations. We only support
// load and store of i64 vectors, so promote v2i64 vector operations to v4i32.
for (MVT Vec64 : { MVT::v2i64, MVT::v2f64 }) {
setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v4i32);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote);
AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v4i32);
setOperationAction(ISD::INSERT_VECTOR_ELT, Vec64, Promote);
AddPromotedToType(ISD::INSERT_VECTOR_ELT, Vec64, MVT::v4i32);
setOperationAction(ISD::SCALAR_TO_VECTOR, Vec64, Promote);
AddPromotedToType(ISD::SCALAR_TO_VECTOR, Vec64, MVT::v4i32);
}
for (MVT Vec64 : { MVT::v3i64, MVT::v3f64 }) {
setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v6i32);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote);
AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v6i32);
setOperationAction(ISD::INSERT_VECTOR_ELT, Vec64, Promote);
AddPromotedToType(ISD::INSERT_VECTOR_ELT, Vec64, MVT::v6i32);
setOperationAction(ISD::SCALAR_TO_VECTOR, Vec64, Promote);
AddPromotedToType(ISD::SCALAR_TO_VECTOR, Vec64, MVT::v6i32);
}
for (MVT Vec64 : { MVT::v4i64, MVT::v4f64 }) {
setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v8i32);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote);
AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v8i32);
setOperationAction(ISD::INSERT_VECTOR_ELT, Vec64, Promote);
AddPromotedToType(ISD::INSERT_VECTOR_ELT, Vec64, MVT::v8i32);
setOperationAction(ISD::SCALAR_TO_VECTOR, Vec64, Promote);
AddPromotedToType(ISD::SCALAR_TO_VECTOR, Vec64, MVT::v8i32);
}
for (MVT Vec64 : { MVT::v8i64, MVT::v8f64 }) {
setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v16i32);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote);
AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v16i32);
setOperationAction(ISD::INSERT_VECTOR_ELT, Vec64, Promote);
AddPromotedToType(ISD::INSERT_VECTOR_ELT, Vec64, MVT::v16i32);
setOperationAction(ISD::SCALAR_TO_VECTOR, Vec64, Promote);
AddPromotedToType(ISD::SCALAR_TO_VECTOR, Vec64, MVT::v16i32);
}
for (MVT Vec64 : { MVT::v16i64, MVT::v16f64 }) {
setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v32i32);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote);
AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v32i32);
setOperationAction(ISD::INSERT_VECTOR_ELT, Vec64, Promote);
AddPromotedToType(ISD::INSERT_VECTOR_ELT, Vec64, MVT::v32i32);
setOperationAction(ISD::SCALAR_TO_VECTOR, Vec64, Promote);
AddPromotedToType(ISD::SCALAR_TO_VECTOR, Vec64, MVT::v32i32);
}
setOperationAction(ISD::VECTOR_SHUFFLE,
{MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32},
Expand);
setOperationAction(ISD::BUILD_VECTOR, {MVT::v4f16, MVT::v4i16}, Custom);
// Avoid stack access for these.
// TODO: Generalize to more vector types.
setOperationAction({ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT},
{MVT::v2i16, MVT::v2f16, MVT::v2i8, MVT::v4i8, MVT::v8i8,
MVT::v4i16, MVT::v4f16},
Custom);
// Deal with vec3 vector operations when widened to vec4.
setOperationAction(ISD::INSERT_SUBVECTOR,
{MVT::v3i32, MVT::v3f32, MVT::v4i32, MVT::v4f32}, Custom);
// Deal with vec5/6/7 vector operations when widened to vec8.
setOperationAction(ISD::INSERT_SUBVECTOR,
{MVT::v5i32, MVT::v5f32, MVT::v6i32, MVT::v6f32,
MVT::v7i32, MVT::v7f32, MVT::v8i32, MVT::v8f32},
Custom);
// BUFFER/FLAT_ATOMIC_CMP_SWAP on GCN GPUs needs input marshalling,
// and output demarshalling
setOperationAction(ISD::ATOMIC_CMP_SWAP, {MVT::i32, MVT::i64}, Custom);
// We can't return success/failure, only the old value,
// let LLVM add the comparison
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, {MVT::i32, MVT::i64},
Expand);
if (Subtarget->hasFlatAddressSpace())
setOperationAction(ISD::ADDRSPACECAST, {MVT::i32, MVT::i64}, Custom);
setOperationAction(ISD::BITREVERSE, {MVT::i32, MVT::i64}, Legal);
// FIXME: This should be narrowed to i32, but that only happens if i64 is
// illegal.
// FIXME: Should lower sub-i32 bswaps to bit-ops without v_perm_b32.
setOperationAction(ISD::BSWAP, {MVT::i64, MVT::i32}, Legal);
// On SI this is s_memtime and s_memrealtime on VI.
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Custom);
if (Subtarget->has16BitInsts()) {
setOperationAction({ISD::FPOW, ISD::FPOWI}, MVT::f16, Promote);
setOperationAction({ISD::FLOG, ISD::FEXP, ISD::FLOG10}, MVT::f16, Custom);
}
if (Subtarget->hasMadMacF32Insts())
setOperationAction(ISD::FMAD, MVT::f32, Legal);
if (!Subtarget->hasBFI())
// fcopysign can be done in a single instruction with BFI.
setOperationAction(ISD::FCOPYSIGN, {MVT::f32, MVT::f64}, Expand);
if (!Subtarget->hasBCNT(32))
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
if (!Subtarget->hasBCNT(64))
setOperationAction(ISD::CTPOP, MVT::i64, Expand);
if (Subtarget->hasFFBH())
setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom);
if (Subtarget->hasFFBL())
setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom);
// We only really have 32-bit BFE instructions (and 16-bit on VI).
//
// On SI+ there are 64-bit BFEs, but they are scalar only and there isn't any
// effort to match them now. We want this to be false for i64 cases when the
// extraction isn't restricted to the upper or lower half. Ideally we would
// have some pass reduce 64-bit extracts to 32-bit if possible. Extracts that
// span the midpoint are probably relatively rare, so don't worry about them
// for now.
if (Subtarget->hasBFE())
setHasExtractBitsInsn(true);
// Clamp modifier on add/sub
if (Subtarget->hasIntClamp())
setOperationAction({ISD::UADDSAT, ISD::USUBSAT}, MVT::i32, Legal);
if (Subtarget->hasAddNoCarry())
setOperationAction({ISD::SADDSAT, ISD::SSUBSAT}, {MVT::i16, MVT::i32},
Legal);
setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, {MVT::f32, MVT::f64},
Custom);
// These are really only legal for ieee_mode functions. We should be avoiding
// them for functions that don't have ieee_mode enabled, so just say they are
// legal.
setOperationAction({ISD::FMINNUM_IEEE, ISD::FMAXNUM_IEEE},
{MVT::f32, MVT::f64}, Legal);
if (Subtarget->haveRoundOpsF64())
setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FRINT}, MVT::f64, Legal);
else
setOperationAction({ISD::FCEIL, ISD::FTRUNC, ISD::FRINT, ISD::FFLOOR},
MVT::f64, Custom);
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction({ISD::FSIN, ISD::FCOS, ISD::FDIV}, MVT::f32, Custom);
setOperationAction(ISD::FDIV, MVT::f64, Custom);
if (Subtarget->has16BitInsts()) {
setOperationAction({ISD::Constant, ISD::SMIN, ISD::SMAX, ISD::UMIN,
ISD::UMAX, ISD::UADDSAT, ISD::USUBSAT},
MVT::i16, Legal);
AddPromotedToType(ISD::SIGN_EXTEND, MVT::i16, MVT::i32);
setOperationAction({ISD::ROTR, ISD::ROTL, ISD::SELECT_CC, ISD::BR_CC},
MVT::i16, Expand);
setOperationAction({ISD::SIGN_EXTEND, ISD::SDIV, ISD::UDIV, ISD::SREM,
ISD::UREM, ISD::BITREVERSE, ISD::CTTZ,
ISD::CTTZ_ZERO_UNDEF, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF,
ISD::CTPOP},
MVT::i16, Promote);
setOperationAction(ISD::LOAD, MVT::i16, Custom);
setTruncStoreAction(MVT::i64, MVT::i16, Expand);
setOperationAction(ISD::FP16_TO_FP, MVT::i16, Promote);
AddPromotedToType(ISD::FP16_TO_FP, MVT::i16, MVT::i32);
setOperationAction(ISD::FP_TO_FP16, MVT::i16, Promote);
AddPromotedToType(ISD::FP_TO_FP16, MVT::i16, MVT::i32);
setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, MVT::i16, Custom);
// F16 - Constant Actions.
setOperationAction(ISD::ConstantFP, MVT::f16, Legal);
// F16 - Load/Store Actions.
setOperationAction(ISD::LOAD, MVT::f16, Promote);
AddPromotedToType(ISD::LOAD, MVT::f16, MVT::i16);
setOperationAction(ISD::STORE, MVT::f16, Promote);
AddPromotedToType(ISD::STORE, MVT::f16, MVT::i16);
// F16 - VOP1 Actions.
setOperationAction(
{ISD::FP_ROUND, ISD::FCOS, ISD::FSIN, ISD::FROUND, ISD::FPTRUNC_ROUND},
MVT::f16, Custom);
setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, MVT::i16, Custom);
setOperationAction(
{ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::SINT_TO_FP, ISD::UINT_TO_FP},
MVT::f16, Promote);
// F16 - VOP2 Actions.
setOperationAction({ISD::BR_CC, ISD::SELECT_CC}, MVT::f16, Expand);
setOperationAction(ISD::FDIV, MVT::f16, Custom);
// F16 - VOP3 Actions.
setOperationAction(ISD::FMA, MVT::f16, Legal);
if (STI.hasMadF16())
setOperationAction(ISD::FMAD, MVT::f16, Legal);
for (MVT VT : {MVT::v2i16, MVT::v2f16, MVT::v4i16, MVT::v4f16, MVT::v8i16,
MVT::v8f16, MVT::v16i16, MVT::v16f16}) {
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
switch (Op) {
case ISD::LOAD:
case ISD::STORE:
case ISD::BUILD_VECTOR:
case ISD::BITCAST:
+ case ISD::UNDEF:
case ISD::EXTRACT_VECTOR_ELT:
case ISD::INSERT_VECTOR_ELT:
case ISD::INSERT_SUBVECTOR:
case ISD::EXTRACT_SUBVECTOR:
case ISD::SCALAR_TO_VECTOR:
break;
case ISD::CONCAT_VECTORS:
setOperationAction(Op, VT, Custom);
break;
default:
setOperationAction(Op, VT, Expand);
break;
}
}
}
// v_perm_b32 can handle either of these.
setOperationAction(ISD::BSWAP, {MVT::i16, MVT::v2i16}, Legal);
setOperationAction(ISD::BSWAP, MVT::v4i16, Custom);
// XXX - Do these do anything? Vector constants turn into build_vector.
setOperationAction(ISD::Constant, {MVT::v2i16, MVT::v2f16}, Legal);
setOperationAction(ISD::UNDEF, {MVT::v2i16, MVT::v2f16}, Legal);
setOperationAction(ISD::STORE, MVT::v2i16, Promote);
AddPromotedToType(ISD::STORE, MVT::v2i16, MVT::i32);
setOperationAction(ISD::STORE, MVT::v2f16, Promote);
AddPromotedToType(ISD::STORE, MVT::v2f16, MVT::i32);
setOperationAction(ISD::LOAD, MVT::v2i16, Promote);
AddPromotedToType(ISD::LOAD, MVT::v2i16, MVT::i32);
setOperationAction(ISD::LOAD, MVT::v2f16, Promote);
AddPromotedToType(ISD::LOAD, MVT::v2f16, MVT::i32);
setOperationAction(ISD::AND, MVT::v2i16, Promote);
AddPromotedToType(ISD::AND, MVT::v2i16, MVT::i32);
setOperationAction(ISD::OR, MVT::v2i16, Promote);
AddPromotedToType(ISD::OR, MVT::v2i16, MVT::i32);
setOperationAction(ISD::XOR, MVT::v2i16, Promote);
AddPromotedToType(ISD::XOR, MVT::v2i16, MVT::i32);
setOperationAction(ISD::LOAD, MVT::v4i16, Promote);
AddPromotedToType(ISD::LOAD, MVT::v4i16, MVT::v2i32);
setOperationAction(ISD::LOAD, MVT::v4f16, Promote);
AddPromotedToType(ISD::LOAD, MVT::v4f16, MVT::v2i32);
setOperationAction(ISD::STORE, MVT::v4i16, Promote);
AddPromotedToType(ISD::STORE, MVT::v4i16, MVT::v2i32);
setOperationAction(ISD::STORE, MVT::v4f16, Promote);
AddPromotedToType(ISD::STORE, MVT::v4f16, MVT::v2i32);
setOperationAction(ISD::LOAD, MVT::v8i16, Promote);
AddPromotedToType(ISD::LOAD, MVT::v8i16, MVT::v4i32);
setOperationAction(ISD::LOAD, MVT::v8f16, Promote);
AddPromotedToType(ISD::LOAD, MVT::v8f16, MVT::v4i32);
setOperationAction(ISD::STORE, MVT::v4i16, Promote);
AddPromotedToType(ISD::STORE, MVT::v4i16, MVT::v2i32);
setOperationAction(ISD::STORE, MVT::v4f16, Promote);
AddPromotedToType(ISD::STORE, MVT::v4f16, MVT::v2i32);
setOperationAction(ISD::STORE, MVT::v8i16, Promote);
AddPromotedToType(ISD::STORE, MVT::v8i16, MVT::v4i32);
setOperationAction(ISD::STORE, MVT::v8f16, Promote);
AddPromotedToType(ISD::STORE, MVT::v8f16, MVT::v4i32);
setOperationAction(ISD::LOAD, MVT::v16i16, Promote);
AddPromotedToType(ISD::LOAD, MVT::v16i16, MVT::v8i32);
setOperationAction(ISD::LOAD, MVT::v16f16, Promote);
AddPromotedToType(ISD::LOAD, MVT::v16f16, MVT::v8i32);
setOperationAction(ISD::STORE, MVT::v16i16, Promote);
AddPromotedToType(ISD::STORE, MVT::v16i16, MVT::v8i32);
setOperationAction(ISD::STORE, MVT::v16f16, Promote);
AddPromotedToType(ISD::STORE, MVT::v16f16, MVT::v8i32);
setOperationAction({ISD::ANY_EXTEND, ISD::ZERO_EXTEND, ISD::SIGN_EXTEND},
MVT::v2i32, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Expand);
setOperationAction({ISD::ANY_EXTEND, ISD::ZERO_EXTEND, ISD::SIGN_EXTEND},
MVT::v4i32, Expand);
setOperationAction({ISD::ANY_EXTEND, ISD::ZERO_EXTEND, ISD::SIGN_EXTEND},
MVT::v8i32, Expand);
if (!Subtarget->hasVOP3PInsts())
setOperationAction(ISD::BUILD_VECTOR, {MVT::v2i16, MVT::v2f16}, Custom);
setOperationAction(ISD::FNEG, MVT::v2f16, Legal);
// This isn't really legal, but this avoids the legalizer unrolling it (and
// allows matching fneg (fabs x) patterns)
setOperationAction(ISD::FABS, MVT::v2f16, Legal);
setOperationAction({ISD::FMAXNUM, ISD::FMINNUM}, MVT::f16, Custom);
setOperationAction({ISD::FMAXNUM_IEEE, ISD::FMINNUM_IEEE}, MVT::f16, Legal);
setOperationAction({ISD::FMINNUM_IEEE, ISD::FMAXNUM_IEEE},
{MVT::v4f16, MVT::v8f16, MVT::v16f16}, Custom);
setOperationAction({ISD::FMINNUM, ISD::FMAXNUM},
{MVT::v4f16, MVT::v8f16, MVT::v16f16}, Expand);
for (MVT Vec16 : {MVT::v8i16, MVT::v8f16, MVT::v16i16, MVT::v16f16}) {
setOperationAction(
{ISD::BUILD_VECTOR, ISD::EXTRACT_VECTOR_ELT, ISD::SCALAR_TO_VECTOR},
Vec16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, Vec16, Expand);
}
}
if (Subtarget->hasVOP3PInsts()) {
setOperationAction({ISD::ADD, ISD::SUB, ISD::MUL, ISD::SHL, ISD::SRL,
ISD::SRA, ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX,
ISD::UADDSAT, ISD::USUBSAT, ISD::SADDSAT, ISD::SSUBSAT},
MVT::v2i16, Legal);
setOperationAction({ISD::FADD, ISD::FMUL, ISD::FMA, ISD::FMINNUM_IEEE,
ISD::FMAXNUM_IEEE, ISD::FCANONICALIZE},
MVT::v2f16, Legal);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, {MVT::v2i16, MVT::v2f16},
Custom);
setOperationAction(ISD::VECTOR_SHUFFLE,
{MVT::v4f16, MVT::v4i16, MVT::v8f16, MVT::v8i16,
MVT::v16f16, MVT::v16i16},
Custom);
for (MVT VT : {MVT::v4i16, MVT::v8i16, MVT::v16i16})
// Split vector operations.
setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL, ISD::ADD, ISD::SUB,
ISD::MUL, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX,
ISD::UADDSAT, ISD::SADDSAT, ISD::USUBSAT,
ISD::SSUBSAT},
VT, Custom);
for (MVT VT : {MVT::v4f16, MVT::v8f16, MVT::v16f16})
// Split vector operations.
setOperationAction({ISD::FADD, ISD::FMUL, ISD::FMA, ISD::FCANONICALIZE},
VT, Custom);
setOperationAction({ISD::FMAXNUM, ISD::FMINNUM}, {MVT::v2f16, MVT::v4f16},
Custom);
setOperationAction(ISD::FEXP, MVT::v2f16, Custom);
setOperationAction(ISD::SELECT, {MVT::v4i16, MVT::v4f16}, Custom);
if (Subtarget->hasPackedFP32Ops()) {
setOperationAction({ISD::FADD, ISD::FMUL, ISD::FMA, ISD::FNEG},
MVT::v2f32, Legal);
setOperationAction({ISD::FADD, ISD::FMUL, ISD::FMA},
{MVT::v4f32, MVT::v8f32, MVT::v16f32, MVT::v32f32},
Custom);
}
}
setOperationAction({ISD::FNEG, ISD::FABS}, MVT::v4f16, Custom);
if (Subtarget->has16BitInsts()) {
setOperationAction(ISD::SELECT, MVT::v2i16, Promote);
AddPromotedToType(ISD::SELECT, MVT::v2i16, MVT::i32);
setOperationAction(ISD::SELECT, MVT::v2f16, Promote);
AddPromotedToType(ISD::SELECT, MVT::v2f16, MVT::i32);
} else {
// Legalization hack.
setOperationAction(ISD::SELECT, {MVT::v2i16, MVT::v2f16}, Custom);
setOperationAction({ISD::FNEG, ISD::FABS}, MVT::v2f16, Custom);
}
setOperationAction(ISD::SELECT,
{MVT::v4i16, MVT::v4f16, MVT::v2i8, MVT::v4i8, MVT::v8i8,
MVT::v8i16, MVT::v8f16, MVT::v16i16, MVT::v16f16},
Custom);
setOperationAction({ISD::SMULO, ISD::UMULO}, MVT::i64, Custom);
if (Subtarget->hasMad64_32())
setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, MVT::i32, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN,
{MVT::Other, MVT::f32, MVT::v4f32, MVT::i16, MVT::f16,
MVT::v2i16, MVT::v2f16},
Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN,
{MVT::v2f16, MVT::v2i16, MVT::v3f16, MVT::v3i16,
MVT::v4f16, MVT::v4i16, MVT::v8f16, MVT::Other, MVT::f16,
MVT::i16, MVT::i8},
Custom);
setOperationAction(ISD::INTRINSIC_VOID,
{MVT::Other, MVT::v2i16, MVT::v2f16, MVT::v3i16,
MVT::v3f16, MVT::v4f16, MVT::v4i16, MVT::f16, MVT::i16,
MVT::i8},
Custom);
setTargetDAGCombine({ISD::ADD,
ISD::ADDCARRY,
ISD::SUB,
ISD::SUBCARRY,
ISD::FADD,
ISD::FSUB,
ISD::FMINNUM,
ISD::FMAXNUM,
ISD::FMINNUM_IEEE,
ISD::FMAXNUM_IEEE,
ISD::FMA,
ISD::SMIN,
ISD::SMAX,
ISD::UMIN,
ISD::UMAX,
ISD::SETCC,
ISD::AND,
ISD::OR,
ISD::XOR,
ISD::SINT_TO_FP,
ISD::UINT_TO_FP,
ISD::FCANONICALIZE,
ISD::SCALAR_TO_VECTOR,
ISD::ZERO_EXTEND,
ISD::SIGN_EXTEND_INREG,
ISD::EXTRACT_VECTOR_ELT,
ISD::INSERT_VECTOR_ELT});
// All memory operations. Some folding on the pointer operand is done to help
// matching the constant offsets in the addressing modes.
setTargetDAGCombine({ISD::LOAD,
ISD::STORE,
ISD::ATOMIC_LOAD,
ISD::ATOMIC_STORE,
ISD::ATOMIC_CMP_SWAP,
ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS,
ISD::ATOMIC_SWAP,
ISD::ATOMIC_LOAD_ADD,
ISD::ATOMIC_LOAD_SUB,
ISD::ATOMIC_LOAD_AND,
ISD::ATOMIC_LOAD_OR,
ISD::ATOMIC_LOAD_XOR,
ISD::ATOMIC_LOAD_NAND,
ISD::ATOMIC_LOAD_MIN,
ISD::ATOMIC_LOAD_MAX,
ISD::ATOMIC_LOAD_UMIN,
ISD::ATOMIC_LOAD_UMAX,
ISD::ATOMIC_LOAD_FADD,
ISD::INTRINSIC_VOID,
ISD::INTRINSIC_W_CHAIN});
// FIXME: In other contexts we pretend this is a per-function property.
setStackPointerRegisterToSaveRestore(AMDGPU::SGPR32);
setSchedulingPreference(Sched::RegPressure);
}
const GCNSubtarget *SITargetLowering::getSubtarget() const {
return Subtarget;
}
//===----------------------------------------------------------------------===//
// TargetLowering queries
//===----------------------------------------------------------------------===//
// v_mad_mix* support a conversion from f16 to f32.
//
// There is only one special case when denormals are enabled we don't currently,
// where this is OK to use.
bool SITargetLowering::isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode,
EVT DestVT, EVT SrcVT) const {
return ((Opcode == ISD::FMAD && Subtarget->hasMadMixInsts()) ||
(Opcode == ISD::FMA && Subtarget->hasFmaMixInsts())) &&
DestVT.getScalarType() == MVT::f32 &&
SrcVT.getScalarType() == MVT::f16 &&
// TODO: This probably only requires no input flushing?
!hasFP32Denormals(DAG.getMachineFunction());
}
bool SITargetLowering::isFPExtFoldable(const MachineInstr &MI, unsigned Opcode,
LLT DestTy, LLT SrcTy) const {
return ((Opcode == TargetOpcode::G_FMAD && Subtarget->hasMadMixInsts()) ||
(Opcode == TargetOpcode::G_FMA && Subtarget->hasFmaMixInsts())) &&
DestTy.getScalarSizeInBits() == 32 &&
SrcTy.getScalarSizeInBits() == 16 &&
// TODO: This probably only requires no input flushing?
!hasFP32Denormals(*MI.getMF());
}
bool SITargetLowering::isShuffleMaskLegal(ArrayRef<int>, EVT) const {
// SI has some legal vector types, but no legal vector operations. Say no
// shuffles are legal in order to prefer scalarizing some vector operations.
return false;
}
MVT SITargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
if (CC == CallingConv::AMDGPU_KERNEL)
return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
if (VT.isVector()) {
EVT ScalarVT = VT.getScalarType();
unsigned Size = ScalarVT.getSizeInBits();
if (Size == 16) {
if (Subtarget->has16BitInsts())
return VT.isInteger() ? MVT::v2i16 : MVT::v2f16;
return VT.isInteger() ? MVT::i32 : MVT::f32;
}
if (Size < 16)
return Subtarget->has16BitInsts() ? MVT::i16 : MVT::i32;
return Size == 32 ? ScalarVT.getSimpleVT() : MVT::i32;
}
if (VT.getSizeInBits() > 32)
return MVT::i32;
return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
}
unsigned SITargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
if (CC == CallingConv::AMDGPU_KERNEL)
return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
if (VT.isVector()) {
unsigned NumElts = VT.getVectorNumElements();
EVT ScalarVT = VT.getScalarType();
unsigned Size = ScalarVT.getSizeInBits();
// FIXME: Should probably promote 8-bit vectors to i16.
if (Size == 16 && Subtarget->has16BitInsts())
return (NumElts + 1) / 2;
if (Size <= 32)
return NumElts;
if (Size > 32)
return NumElts * ((Size + 31) / 32);
} else if (VT.getSizeInBits() > 32)
return (VT.getSizeInBits() + 31) / 32;
return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
}
unsigned SITargetLowering::getVectorTypeBreakdownForCallingConv(
LLVMContext &Context, CallingConv::ID CC,
EVT VT, EVT &IntermediateVT,
unsigned &NumIntermediates, MVT &RegisterVT) const {
if (CC != CallingConv::AMDGPU_KERNEL && VT.isVector()) {
unsigned NumElts = VT.getVectorNumElements();
EVT ScalarVT = VT.getScalarType();
unsigned Size = ScalarVT.getSizeInBits();
// FIXME: We should fix the ABI to be the same on targets without 16-bit
// support, but unless we can properly handle 3-vectors, it will be still be
// inconsistent.
if (Size == 16 && Subtarget->has16BitInsts()) {
RegisterVT = VT.isInteger() ? MVT::v2i16 : MVT::v2f16;
IntermediateVT = RegisterVT;
NumIntermediates = (NumElts + 1) / 2;
return NumIntermediates;
}
if (Size == 32) {
RegisterVT = ScalarVT.getSimpleVT();
IntermediateVT = RegisterVT;
NumIntermediates = NumElts;
return NumIntermediates;
}
if (Size < 16 && Subtarget->has16BitInsts()) {
// FIXME: Should probably form v2i16 pieces
RegisterVT = MVT::i16;
IntermediateVT = ScalarVT;
NumIntermediates = NumElts;
return NumIntermediates;
}
if (Size != 16 && Size <= 32) {
RegisterVT = MVT::i32;
IntermediateVT = ScalarVT;
NumIntermediates = NumElts;
return NumIntermediates;
}
if (Size > 32) {
RegisterVT = MVT::i32;
IntermediateVT = RegisterVT;
NumIntermediates = NumElts * ((Size + 31) / 32);
return NumIntermediates;
}
}
return TargetLowering::getVectorTypeBreakdownForCallingConv(
Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
}
static EVT memVTFromImageData(Type *Ty, unsigned DMaskLanes) {
assert(DMaskLanes != 0);
if (auto *VT = dyn_cast<FixedVectorType>(Ty)) {
unsigned NumElts = std::min(DMaskLanes, VT->getNumElements());
return EVT::getVectorVT(Ty->getContext(),
EVT::getEVT(VT->getElementType()),
NumElts);
}
return EVT::getEVT(Ty);
}
// Peek through TFE struct returns to only use the data size.
static EVT memVTFromImageReturn(Type *Ty, unsigned DMaskLanes) {
auto *ST = dyn_cast<StructType>(Ty);
if (!ST)
return memVTFromImageData(Ty, DMaskLanes);
// Some intrinsics return an aggregate type - special case to work out the
// correct memVT.
//
// Only limited forms of aggregate type currently expected.
if (ST->getNumContainedTypes() != 2 ||
!ST->getContainedType(1)->isIntegerTy(32))
return EVT();
return memVTFromImageData(ST->getContainedType(0), DMaskLanes);
}
bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &CI,
MachineFunction &MF,
unsigned IntrID) const {
Info.flags = MachineMemOperand::MONone;
if (CI.hasMetadata(LLVMContext::MD_invariant_load))
Info.flags |= MachineMemOperand::MOInvariant;
if (const AMDGPU::RsrcIntrinsic *RsrcIntr =
AMDGPU::lookupRsrcIntrinsic(IntrID)) {
AttributeList Attr = Intrinsic::getAttributes(CI.getContext(),
(Intrinsic::ID)IntrID);
if (Attr.hasFnAttr(Attribute::ReadNone))
return false;
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
const GCNTargetMachine &TM =
static_cast<const GCNTargetMachine &>(getTargetMachine());
if (RsrcIntr->IsImage) {
Info.ptrVal = MFI->getImagePSV(TM);
Info.align.reset();
} else {
Info.ptrVal = MFI->getBufferPSV(TM);
}
Info.flags |= MachineMemOperand::MODereferenceable;
if (Attr.hasFnAttr(Attribute::ReadOnly)) {
unsigned DMaskLanes = 4;
if (RsrcIntr->IsImage) {
const AMDGPU::ImageDimIntrinsicInfo *Intr
= AMDGPU::getImageDimIntrinsicInfo(IntrID);
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);
if (!BaseOpcode->Gather4) {
// If this isn't a gather, we may have excess loaded elements in the
// IR type. Check the dmask for the real number of elements loaded.
unsigned DMask
= cast<ConstantInt>(CI.getArgOperand(0))->getZExtValue();
DMaskLanes = DMask == 0 ? 1 : countPopulation(DMask);
}
Info.memVT = memVTFromImageReturn(CI.getType(), DMaskLanes);
} else
Info.memVT = EVT::getEVT(CI.getType());
// FIXME: What does alignment mean for an image?
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.flags |= MachineMemOperand::MOLoad;
} else if (Attr.hasFnAttr(Attribute::WriteOnly)) {
Info.opc = ISD::INTRINSIC_VOID;
Type *DataTy = CI.getArgOperand(0)->getType();
if (RsrcIntr->IsImage) {
unsigned DMask = cast<ConstantInt>(CI.getArgOperand(1))->getZExtValue();
unsigned DMaskLanes = DMask == 0 ? 1 : countPopulation(DMask);
Info.memVT = memVTFromImageData(DataTy, DMaskLanes);
} else
Info.memVT = EVT::getEVT(DataTy);
Info.flags |= MachineMemOperand::MOStore;
} else {
// Atomic
Info.opc = CI.getType()->isVoidTy() ? ISD::INTRINSIC_VOID :
ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType());
Info.flags |= MachineMemOperand::MOLoad |
MachineMemOperand::MOStore |
MachineMemOperand::MODereferenceable;
// XXX - Should this be volatile without known ordering?
Info.flags |= MachineMemOperand::MOVolatile;
switch (IntrID) {
default:
break;
case Intrinsic::amdgcn_raw_buffer_load_lds:
case Intrinsic::amdgcn_struct_buffer_load_lds: {
unsigned Width = cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue();
Info.memVT = EVT::getIntegerVT(CI.getContext(), Width * 8);
return true;
}
}
}
return true;
}
switch (IntrID) {
case Intrinsic::amdgcn_atomic_inc:
case Intrinsic::amdgcn_atomic_dec:
case Intrinsic::amdgcn_ds_ordered_add:
case Intrinsic::amdgcn_ds_ordered_swap:
case Intrinsic::amdgcn_ds_fadd:
case Intrinsic::amdgcn_ds_fmin:
case Intrinsic::amdgcn_ds_fmax: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getType());
Info.ptrVal = CI.getOperand(0);
Info.align.reset();
Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
const ConstantInt *Vol = cast<ConstantInt>(CI.getOperand(4));
if (!Vol->isZero())
Info.flags |= MachineMemOperand::MOVolatile;
return true;
}
case Intrinsic::amdgcn_buffer_atomic_fadd: {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
const GCNTargetMachine &TM =
static_cast<const GCNTargetMachine &>(getTargetMachine());
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getOperand(0)->getType());
Info.ptrVal = MFI->getBufferPSV(TM);
Info.align.reset();
Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
const ConstantInt *Vol = dyn_cast<ConstantInt>(CI.getOperand(4));
if (!Vol || !Vol->isZero())
Info.flags |= MachineMemOperand::MOVolatile;
return true;
}
case Intrinsic::amdgcn_ds_append:
case Intrinsic::amdgcn_ds_consume: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getType());
Info.ptrVal = CI.getOperand(0);
Info.align.reset();
Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
const ConstantInt *Vol = cast<ConstantInt>(CI.getOperand(1));
if (!Vol->isZero())
Info.flags |= MachineMemOperand::MOVolatile;
return true;
}
case Intrinsic::amdgcn_global_atomic_csub: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getType());
Info.ptrVal = CI.getOperand(0);
Info.align.reset();
Info.flags |= MachineMemOperand::MOLoad |
MachineMemOperand::MOStore |
MachineMemOperand::MOVolatile;
return true;
}
case Intrinsic::amdgcn_image_bvh_intersect_ray: {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getType()); // XXX: what is correct VT?
const GCNTargetMachine &TM =
static_cast<const GCNTargetMachine &>(getTargetMachine());
Info.ptrVal = MFI->getImagePSV(TM);
Info.align.reset();
Info.flags |= MachineMemOperand::MOLoad |
MachineMemOperand::MODereferenceable;
return true;
}
case Intrinsic::amdgcn_global_atomic_fadd:
case Intrinsic::amdgcn_global_atomic_fmin:
case Intrinsic::amdgcn_global_atomic_fmax:
case Intrinsic::amdgcn_flat_atomic_fadd:
case Intrinsic::amdgcn_flat_atomic_fmin:
case Intrinsic::amdgcn_flat_atomic_fmax:
case Intrinsic::amdgcn_global_atomic_fadd_v2bf16:
case Intrinsic::amdgcn_flat_atomic_fadd_v2bf16: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getType());
Info.ptrVal = CI.getOperand(0);
Info.align.reset();
Info.flags |= MachineMemOperand::MOLoad |
MachineMemOperand::MOStore |
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOVolatile;
return true;
}
case Intrinsic::amdgcn_ds_gws_init:
case Intrinsic::amdgcn_ds_gws_barrier:
case Intrinsic::amdgcn_ds_gws_sema_v:
case Intrinsic::amdgcn_ds_gws_sema_br:
case Intrinsic::amdgcn_ds_gws_sema_p:
case Intrinsic::amdgcn_ds_gws_sema_release_all: {
Info.opc = ISD::INTRINSIC_VOID;
const GCNTargetMachine &TM =
static_cast<const GCNTargetMachine &>(getTargetMachine());
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
Info.ptrVal = MFI->getGWSPSV(TM);
// This is an abstract access, but we need to specify a type and size.
Info.memVT = MVT::i32;
Info.size = 4;
Info.align = Align(4);
if (IntrID == Intrinsic::amdgcn_ds_gws_barrier)
Info.flags |= MachineMemOperand::MOLoad;
else
Info.flags |= MachineMemOperand::MOStore;
return true;
}
case Intrinsic::amdgcn_global_load_lds: {
Info.opc = ISD::INTRINSIC_VOID;
unsigned Width = cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue();
Info.memVT = EVT::getIntegerVT(CI.getContext(), Width * 8);
Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
MachineMemOperand::MOVolatile;
return true;
}
default:
return false;
}
}
bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II,
SmallVectorImpl<Value*> &Ops,
Type *&AccessTy) const {
switch (II->getIntrinsicID()) {
case Intrinsic::amdgcn_atomic_inc:
case Intrinsic::amdgcn_atomic_dec:
case Intrinsic::amdgcn_ds_ordered_add:
case Intrinsic::amdgcn_ds_ordered_swap:
case Intrinsic::amdgcn_ds_append:
case Intrinsic::amdgcn_ds_consume:
case Intrinsic::amdgcn_ds_fadd:
case Intrinsic::amdgcn_ds_fmin:
case Intrinsic::amdgcn_ds_fmax:
case Intrinsic::amdgcn_global_atomic_fadd:
case Intrinsic::amdgcn_flat_atomic_fadd:
case Intrinsic::amdgcn_flat_atomic_fmin:
case Intrinsic::amdgcn_flat_atomic_fmax:
case Intrinsic::amdgcn_global_atomic_fadd_v2bf16:
case Intrinsic::amdgcn_flat_atomic_fadd_v2bf16:
case Intrinsic::amdgcn_global_atomic_csub: {
Value *Ptr = II->getArgOperand(0);
AccessTy = II->getType();
Ops.push_back(Ptr);
return true;
}
default:
return false;
}
}
bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM) const {
if (!Subtarget->hasFlatInstOffsets()) {
// Flat instructions do not have offsets, and only have the register
// address.
return AM.BaseOffs == 0 && AM.Scale == 0;
}
return AM.Scale == 0 &&
(AM.BaseOffs == 0 ||
Subtarget->getInstrInfo()->isLegalFLATOffset(
AM.BaseOffs, AMDGPUAS::FLAT_ADDRESS, SIInstrFlags::FLAT));
}
bool SITargetLowering::isLegalGlobalAddressingMode(const AddrMode &AM) const {
if (Subtarget->hasFlatGlobalInsts())
return AM.Scale == 0 &&
(AM.BaseOffs == 0 || Subtarget->getInstrInfo()->isLegalFLATOffset(
AM.BaseOffs, AMDGPUAS::GLOBAL_ADDRESS,
SIInstrFlags::FlatGlobal));
if (!Subtarget->hasAddr64() || Subtarget->useFlatForGlobal()) {
// Assume the we will use FLAT for all global memory accesses
// on VI.
// FIXME: This assumption is currently wrong. On VI we still use
// MUBUF instructions for the r + i addressing mode. As currently
// implemented, the MUBUF instructions only work on buffer < 4GB.
// It may be possible to support > 4GB buffers with MUBUF instructions,
// by setting the stride value in the resource descriptor which would
// increase the size limit to (stride * 4GB). However, this is risky,
// because it has never been validated.
return isLegalFlatAddressingMode(AM);
}
return isLegalMUBUFAddressingMode(AM);
}
bool SITargetLowering::isLegalMUBUFAddressingMode(const AddrMode &AM) const {
// MUBUF / MTBUF instructions have a 12-bit unsigned byte offset, and
// additionally can do r + r + i with addr64. 32-bit has more addressing
// mode options. Depending on the resource constant, it can also do
// (i64 r0) + (i32 r1) * (i14 i).
//
// Private arrays end up using a scratch buffer most of the time, so also
// assume those use MUBUF instructions. Scratch loads / stores are currently
// implemented as mubuf instructions with offen bit set, so slightly
// different than the normal addr64.
if (!SIInstrInfo::isLegalMUBUFImmOffset(AM.BaseOffs))
return false;
// FIXME: Since we can split immediate into soffset and immediate offset,
// would it make sense to allow any immediate?
switch (AM.Scale) {
case 0: // r + i or just i, depending on HasBaseReg.
return true;
case 1:
return true; // We have r + r or r + i.
case 2:
if (AM.HasBaseReg) {
// Reject 2 * r + r.
return false;
}
// Allow 2 * r as r + r
// Or 2 * r + i is allowed as r + r + i.
return true;
default: // Don't allow n * r
return false;
}
}
bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS, Instruction *I) const {
// No global is ever allowed as a base.
if (AM.BaseGV)
return false;
if (AS == AMDGPUAS::GLOBAL_ADDRESS)
return isLegalGlobalAddressingMode(AM);
if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
AS == AMDGPUAS::BUFFER_FAT_POINTER) {
// If the offset isn't a multiple of 4, it probably isn't going to be
// correctly aligned.
// FIXME: Can we get the real alignment here?
if (AM.BaseOffs % 4 != 0)
return isLegalMUBUFAddressingMode(AM);
// There are no SMRD extloads, so if we have to do a small type access we
// will use a MUBUF load.
// FIXME?: We also need to do this if unaligned, but we don't know the
// alignment here.
if (Ty->isSized() && DL.getTypeStoreSize(Ty) < 4)
return isLegalGlobalAddressingMode(AM);
if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) {
// SMRD instructions have an 8-bit, dword offset on SI.
if (!isUInt<8>(AM.BaseOffs / 4))
return false;
} else if (Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS) {
// On CI+, this can also be a 32-bit literal constant offset. If it fits
// in 8-bits, it can use a smaller encoding.
if (!isUInt<32>(AM.BaseOffs / 4))
return false;
} else if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
// On VI, these use the SMEM format and the offset is 20-bit in bytes.
if (!isUInt<20>(AM.BaseOffs))
return false;
} else
llvm_unreachable("unhandled generation");
if (AM.Scale == 0) // r + i or just i, depending on HasBaseReg.
return true;
if (AM.Scale == 1 && AM.HasBaseReg)
return true;
return false;
} else if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
return isLegalMUBUFAddressingMode(AM);
} else if (AS == AMDGPUAS::LOCAL_ADDRESS ||
AS == AMDGPUAS::REGION_ADDRESS) {
// Basic, single offset DS instructions allow a 16-bit unsigned immediate
// field.
// XXX - If doing a 4-byte aligned 8-byte type access, we effectively have
// an 8-bit dword offset but we don't know the alignment here.
if (!isUInt<16>(AM.BaseOffs))
return false;
if (AM.Scale == 0) // r + i or just i, depending on HasBaseReg.
return true;
if (AM.Scale == 1 && AM.HasBaseReg)
return true;
return false;
} else if (AS == AMDGPUAS::FLAT_ADDRESS ||
AS == AMDGPUAS::UNKNOWN_ADDRESS_SPACE) {
// For an unknown address space, this usually means that this is for some
// reason being used for pure arithmetic, and not based on some addressing
// computation. We don't have instructions that compute pointers with any
// addressing modes, so treat them as having no offset like flat
// instructions.
return isLegalFlatAddressingMode(AM);
}
// Assume a user alias of global for unknown address spaces.
return isLegalGlobalAddressingMode(AM);
}
bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
const MachineFunction &MF) const {
if (AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS) {
return (MemVT.getSizeInBits() <= 4 * 32);
} else if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
unsigned MaxPrivateBits = 8 * getSubtarget()->getMaxPrivateElementSize();
return (MemVT.getSizeInBits() <= MaxPrivateBits);
} else if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
return (MemVT.getSizeInBits() <= 2 * 32);
}
return true;
}
bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
unsigned Size, unsigned AddrSpace, Align Alignment,
MachineMemOperand::Flags Flags, bool *IsFast) const {
if (IsFast)
*IsFast = false;
if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
AddrSpace == AMDGPUAS::REGION_ADDRESS) {
// Check if alignment requirements for ds_read/write instructions are
// disabled.
if (!Subtarget->hasUnalignedDSAccessEnabled() && Alignment < Align(4))
return false;
Align RequiredAlignment(PowerOf2Ceil(Size/8)); // Natural alignment.
if (Subtarget->hasLDSMisalignedBug() && Size > 32 &&
Alignment < RequiredAlignment)
return false;
// Either, the alignment requirements are "enabled", or there is an
// unaligned LDS access related hardware bug though alignment requirements
// are "disabled". In either case, we need to check for proper alignment
// requirements.
//
switch (Size) {
case 64:
// SI has a hardware bug in the LDS / GDS bounds checking: if the base
// address is negative, then the instruction is incorrectly treated as
// out-of-bounds even if base + offsets is in bounds. Split vectorized
// loads here to avoid emitting ds_read2_b32. We may re-combine the
// load later in the SILoadStoreOptimizer.
if (!Subtarget->hasUsableDSOffset() && Alignment < Align(8))
return false;
// 8 byte accessing via ds_read/write_b64 require 8-byte alignment, but we
// can do a 4 byte aligned, 8 byte access in a single operation using
// ds_read2/write2_b32 with adjacent offsets.
RequiredAlignment = Align(4);
if (Subtarget->hasUnalignedDSAccessEnabled()) {
// We will either select ds_read_b64/ds_write_b64 or ds_read2_b32/
// ds_write2_b32 depending on the alignment. In either case with either
// alignment there is no faster way of doing this.
if (IsFast)
*IsFast = true;
return true;
}
break;
case 96:
if (!Subtarget->hasDS96AndDS128())
return false;
// 12 byte accessing via ds_read/write_b96 require 16-byte alignment on
// gfx8 and older.
if (Subtarget->hasUnalignedDSAccessEnabled()) {
// Naturally aligned access is fastest. However, also report it is Fast
// if memory is aligned less than DWORD. A narrow load or store will be
// be equally slow as a single ds_read_b96/ds_write_b96, but there will
// be more of them, so overall we will pay less penalty issuing a single
// instruction.
if (IsFast)
*IsFast = Alignment >= RequiredAlignment || Alignment < Align(4);
return true;
}
break;
case 128:
if (!Subtarget->hasDS96AndDS128() || !Subtarget->useDS128())
return false;
// 16 byte accessing via ds_read/write_b128 require 16-byte alignment on
// gfx8 and older, but we can do a 8 byte aligned, 16 byte access in a
// single operation using ds_read2/write2_b64.
RequiredAlignment = Align(8);
if (Subtarget->hasUnalignedDSAccessEnabled()) {
// Naturally aligned access is fastest. However, also report it is Fast
// if memory is aligned less than DWORD. A narrow load or store will be
// be equally slow as a single ds_read_b128/ds_write_b128, but there
// will be more of them, so overall we will pay less penalty issuing a
// single instruction.
if (IsFast)
*IsFast = Alignment >= RequiredAlignment || Alignment < Align(4);
return true;
}
break;
default:
if (Size > 32)
return false;
break;
}
if (IsFast)
*IsFast = Alignment >= RequiredAlignment;
return Alignment >= RequiredAlignment ||
Subtarget->hasUnalignedDSAccessEnabled();
}
if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) {
bool AlignedBy4 = Alignment >= Align(4);
if (IsFast)
*IsFast = AlignedBy4;
return AlignedBy4 ||
Subtarget->enableFlatScratch() ||
Subtarget->hasUnalignedScratchAccess();
}
// FIXME: We have to be conservative here and assume that flat operations
// will access scratch. If we had access to the IR function, then we
// could determine if any private memory was used in the function.
if (AddrSpace == AMDGPUAS::FLAT_ADDRESS &&
!Subtarget->hasUnalignedScratchAccess()) {
bool AlignedBy4 = Alignment >= Align(4);
if (IsFast)
*IsFast = AlignedBy4;
return AlignedBy4;
}
if (Subtarget->hasUnalignedBufferAccessEnabled()) {
// If we have a uniform constant load, it still requires using a slow
// buffer instruction if unaligned.
if (IsFast) {
// Accesses can really be issued as 1-byte aligned or 4-byte aligned, so
// 2-byte alignment is worse than 1 unless doing a 2-byte access.
*IsFast = (AddrSpace == AMDGPUAS::CONSTANT_ADDRESS ||
AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT) ?
Alignment >= Align(4) : Alignment != Align(2);
}
return true;
}
// Smaller than dword value must be aligned.
if (Size < 32)
return false;
// 8.1.6 - For Dword or larger reads or writes, the two LSBs of the
// byte-address are ignored, thus forcing Dword alignment.
// This applies to private, global, and constant memory.
if (IsFast)
*IsFast = true;
return Size >= 32 && Alignment >= Align(4);
}
bool SITargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
bool *IsFast) const {
bool Allow = allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AddrSpace,
Alignment, Flags, IsFast);
if (Allow && IsFast && Subtarget->hasUnalignedDSAccessEnabled() &&
(AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
AddrSpace == AMDGPUAS::REGION_ADDRESS)) {
// Lie it is fast if +unaligned-access-mode is passed so that DS accesses
// get vectorized. We could use ds_read2_b*/ds_write2_b* instructions on a
// misaligned data which is faster than a pair of ds_read_b*/ds_write_b*
// which would be equally misaligned.
// This is only used by the common passes, selection always calls the
// allowsMisalignedMemoryAccessesImpl version.
*IsFast = true;
}
return Allow;
}
EVT SITargetLowering::getOptimalMemOpType(
const MemOp &Op, const AttributeList &FuncAttributes) const {
// FIXME: Should account for address space here.
// The default fallback uses the private pointer size as a guess for a type to
// use. Make sure we switch these to 64-bit accesses.
if (Op.size() >= 16 &&
Op.isDstAligned(Align(4))) // XXX: Should only do for global
return MVT::v4i32;
if (Op.size() >= 8 && Op.isDstAligned(Align(4)))
return MVT::v2i32;
// Use the default.
return MVT::Other;
}
bool SITargetLowering::isMemOpHasNoClobberedMemOperand(const SDNode *N) const {
const MemSDNode *MemNode = cast<MemSDNode>(N);
return MemNode->getMemOperand()->getFlags() & MONoClobber;
}
bool SITargetLowering::isNonGlobalAddrSpace(unsigned AS) {
return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS ||
AS == AMDGPUAS::PRIVATE_ADDRESS;
}
bool SITargetLowering::isFreeAddrSpaceCast(unsigned SrcAS,
unsigned DestAS) const {
// Flat -> private/local is a simple truncate.
// Flat -> global is no-op
if (SrcAS == AMDGPUAS::FLAT_ADDRESS)
return true;
const GCNTargetMachine &TM =
static_cast<const GCNTargetMachine &>(getTargetMachine());
return TM.isNoopAddrSpaceCast(SrcAS, DestAS);
}
bool SITargetLowering::isMemOpUniform(const SDNode *N) const {
const MemSDNode *MemNode = cast<MemSDNode>(N);
return AMDGPUInstrInfo::isUniformMMO(MemNode->getMemOperand());
}
TargetLoweringBase::LegalizeTypeAction
SITargetLowering::getPreferredVectorAction(MVT VT) const {
if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
VT.getScalarType().bitsLE(MVT::i16))
return VT.isPow2VectorType() ? TypeSplitVector : TypeWidenVector;
return TargetLoweringBase::getPreferredVectorAction(VT);
}
bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const {
// FIXME: Could be smarter if called for vector constants.
return true;
}
bool SITargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
unsigned Index) const {
if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
return false;
// TODO: Add more cases that are cheap.
return Index == 0;
}
bool SITargetLowering::isTypeDesirableForOp(unsigned Op, EVT VT) const {
if (Subtarget->has16BitInsts() && VT == MVT::i16) {
switch (Op) {
case ISD::LOAD:
case ISD::STORE:
// These operations are done with 32-bit instructions anyway.
case ISD::AND:
case ISD::OR:
case ISD::XOR:
case ISD::SELECT:
// TODO: Extensions?
return true;
default:
return false;
}
}
// SimplifySetCC uses this function to determine whether or not it should
// create setcc with i1 operands. We don't have instructions for i1 setcc.
if (VT == MVT::i1 && Op == ISD::SETCC)
return false;
return TargetLowering::isTypeDesirableForOp(Op, VT);
}
SDValue SITargetLowering::lowerKernArgParameterPtr(SelectionDAG &DAG,
const SDLoc &SL,
SDValue Chain,
uint64_t Offset) const {
const DataLayout &DL = DAG.getDataLayout();
MachineFunction &MF = DAG.getMachineFunction();
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
const ArgDescriptor *InputPtrReg;
const TargetRegisterClass *RC;
LLT ArgTy;
MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
std::tie(InputPtrReg, RC, ArgTy) =
Info->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
// We may not have the kernarg segment argument if we have no kernel
// arguments.
if (!InputPtrReg)
return DAG.getConstant(0, SL, PtrVT);
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
SDValue BasePtr = DAG.getCopyFromReg(Chain, SL,
MRI.getLiveInVirtReg(InputPtrReg->getRegister()), PtrVT);
return DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::Fixed(Offset));
}
SDValue SITargetLowering::getImplicitArgPtr(SelectionDAG &DAG,
const SDLoc &SL) const {
uint64_t Offset = getImplicitParameterOffset(DAG.getMachineFunction(),
FIRST_IMPLICIT);
return lowerKernArgParameterPtr(DAG, SL, DAG.getEntryNode(), Offset);
}
SDValue SITargetLowering::getLDSKernelId(SelectionDAG &DAG,
const SDLoc &SL) const {
Function &F = DAG.getMachineFunction().getFunction();
Optional<uint32_t> KnownSize =
AMDGPUMachineFunction::getLDSKernelIdMetadata(F);
if (KnownSize.has_value())
return DAG.getConstant(KnownSize.value(), SL, MVT::i32);
return SDValue();
}
SDValue SITargetLowering::convertArgType(SelectionDAG &DAG, EVT VT, EVT MemVT,
const SDLoc &SL, SDValue Val,
bool Signed,
const ISD::InputArg *Arg) const {
// First, if it is a widened vector, narrow it.
if (VT.isVector() &&
VT.getVectorNumElements() != MemVT.getVectorNumElements()) {
EVT NarrowedVT =
EVT::getVectorVT(*DAG.getContext(), MemVT.getVectorElementType(),
VT.getVectorNumElements());
Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SL, NarrowedVT, Val,
DAG.getConstant(0, SL, MVT::i32));
}
// Then convert the vector elements or scalar value.
if (Arg && (Arg->Flags.isSExt() || Arg->Flags.isZExt()) &&
VT.bitsLT(MemVT)) {
unsigned Opc = Arg->Flags.isZExt() ? ISD::AssertZext : ISD::AssertSext;
Val = DAG.getNode(Opc, SL, MemVT, Val, DAG.getValueType(VT));
}
if (MemVT.isFloatingPoint())
Val = getFPExtOrFPRound(DAG, Val, SL, VT);
else if (Signed)
Val = DAG.getSExtOrTrunc(Val, SL, VT);
else
Val = DAG.getZExtOrTrunc(Val, SL, VT);
return Val;
}
SDValue SITargetLowering::lowerKernargMemParameter(
SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL, SDValue Chain,
uint64_t Offset, Align Alignment, bool Signed,
const ISD::InputArg *Arg) const {
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
// Try to avoid using an extload by loading earlier than the argument address,
// and extracting the relevant bits. The load should hopefully be merged with
// the previous argument.
if (MemVT.getStoreSize() < 4 && Alignment < 4) {
// TODO: Handle align < 4 and size >= 4 (can happen with packed structs).
int64_t AlignDownOffset = alignDown(Offset, 4);
int64_t OffsetDiff = Offset - AlignDownOffset;
EVT IntVT = MemVT.changeTypeToInteger();
// TODO: If we passed in the base kernel offset we could have a better
// alignment than 4, but we don't really need it.
SDValue Ptr = lowerKernArgParameterPtr(DAG, SL, Chain, AlignDownOffset);
SDValue Load = DAG.getLoad(MVT::i32, SL, Chain, Ptr, PtrInfo, Align(4),
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant);
SDValue ShiftAmt = DAG.getConstant(OffsetDiff * 8, SL, MVT::i32);
SDValue Extract = DAG.getNode(ISD::SRL, SL, MVT::i32, Load, ShiftAmt);
SDValue ArgVal = DAG.getNode(ISD::TRUNCATE, SL, IntVT, Extract);
ArgVal = DAG.getNode(ISD::BITCAST, SL, MemVT, ArgVal);
ArgVal = convertArgType(DAG, VT, MemVT, SL, ArgVal, Signed, Arg);
return DAG.getMergeValues({ ArgVal, Load.getValue(1) }, SL);
}
SDValue Ptr = lowerKernArgParameterPtr(DAG, SL, Chain, Offset);
SDValue Load = DAG.getLoad(MemVT, SL, Chain, Ptr, PtrInfo, Alignment,
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant);
SDValue Val = convertArgType(DAG, VT, MemVT, SL, Load, Signed, Arg);
return DAG.getMergeValues({ Val, Load.getValue(1) }, SL);
}
SDValue SITargetLowering::lowerStackParameter(SelectionDAG &DAG, CCValAssign &VA,
const SDLoc &SL, SDValue Chain,
const ISD::InputArg &Arg) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
if (Arg.Flags.isByVal()) {
unsigned Size = Arg.Flags.getByValSize();
int FrameIdx = MFI.CreateFixedObject(Size, VA.getLocMemOffset(), false);
return DAG.getFrameIndex(FrameIdx, MVT::i32);
}
unsigned ArgOffset = VA.getLocMemOffset();
unsigned ArgSize = VA.getValVT().getStoreSize();
int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, true);
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
SDValue ArgValue;
// For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
MVT MemVT = VA.getValVT();
switch (VA.getLocInfo()) {
default:
break;
case CCValAssign::BCvt:
MemVT = VA.getLocVT();
break;
case CCValAssign::SExt:
ExtType = ISD::SEXTLOAD;
break;
case CCValAssign::ZExt:
ExtType = ISD::ZEXTLOAD;
break;
case CCValAssign::AExt:
ExtType = ISD::EXTLOAD;
break;
}
ArgValue = DAG.getExtLoad(
ExtType, SL, VA.getLocVT(), Chain, FIN,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
MemVT);
return ArgValue;
}
SDValue SITargetLowering::getPreloadedValue(SelectionDAG &DAG,
const SIMachineFunctionInfo &MFI,
EVT VT,
AMDGPUFunctionArgInfo::PreloadedValue PVID) const {
const ArgDescriptor *Reg;
const TargetRegisterClass *RC;
LLT Ty;
std::tie(Reg, RC, Ty) = MFI.getPreloadedValue(PVID);
if (!Reg) {
if (PVID == AMDGPUFunctionArgInfo::PreloadedValue::KERNARG_SEGMENT_PTR) {
// It's possible for a kernarg intrinsic call to appear in a kernel with
// no allocated segment, in which case we do not add the user sgpr
// argument, so just return null.
return DAG.getConstant(0, SDLoc(), VT);
}
// It's undefined behavior if a function marked with the amdgpu-no-*
// attributes uses the corresponding intrinsic.
return DAG.getUNDEF(VT);
}
return CreateLiveInRegister(DAG, RC, Reg->getRegister(), VT);
}
static void processPSInputArgs(SmallVectorImpl<ISD::InputArg> &Splits,
CallingConv::ID CallConv,
ArrayRef<ISD::InputArg> Ins, BitVector &Skipped,
FunctionType *FType,
SIMachineFunctionInfo *Info) {
for (unsigned I = 0, E = Ins.size(), PSInputNum = 0; I != E; ++I) {
const ISD::InputArg *Arg = &Ins[I];
assert((!Arg->VT.isVector() || Arg->VT.getScalarSizeInBits() == 16) &&
"vector type argument should have been split");
// First check if it's a PS input addr.
if (CallConv == CallingConv::AMDGPU_PS &&
!Arg->Flags.isInReg() && PSInputNum <= 15) {
bool SkipArg = !Arg->Used && !Info->isPSInputAllocated(PSInputNum);
// Inconveniently only the first part of the split is marked as isSplit,
// so skip to the end. We only want to increment PSInputNum once for the
// entire split argument.
if (Arg->Flags.isSplit()) {
while (!Arg->Flags.isSplitEnd()) {
assert((!Arg->VT.isVector() ||
Arg->VT.getScalarSizeInBits() == 16) &&
"unexpected vector split in ps argument type");
if (!SkipArg)
Splits.push_back(*Arg);
Arg = &Ins[++I];
}
}
if (SkipArg) {
// We can safely skip PS inputs.
Skipped.set(Arg->getOrigArgIndex());
++PSInputNum;
continue;
}
Info->markPSInputAllocated(PSInputNum);
if (Arg->Used)
Info->markPSInputEnabled(PSInputNum);
++PSInputNum;
}
Splits.push_back(*Arg);
}
}
// Allocate special inputs passed in VGPRs.
void SITargetLowering::allocateSpecialEntryInputVGPRs(CCState &CCInfo,
MachineFunction &MF,
const SIRegisterInfo &TRI,
SIMachineFunctionInfo &Info) const {
const LLT S32 = LLT::scalar(32);
MachineRegisterInfo &MRI = MF.getRegInfo();
if (Info.hasWorkItemIDX()) {
Register Reg = AMDGPU::VGPR0;
MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
CCInfo.AllocateReg(Reg);
unsigned Mask = (Subtarget->hasPackedTID() &&
Info.hasWorkItemIDY()) ? 0x3ff : ~0u;
Info.setWorkItemIDX(ArgDescriptor::createRegister(Reg, Mask));
}
if (Info.hasWorkItemIDY()) {
assert(Info.hasWorkItemIDX());
if (Subtarget->hasPackedTID()) {
Info.setWorkItemIDY(ArgDescriptor::createRegister(AMDGPU::VGPR0,
0x3ff << 10));
} else {
unsigned Reg = AMDGPU::VGPR1;
MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
CCInfo.AllocateReg(Reg);
Info.setWorkItemIDY(ArgDescriptor::createRegister(Reg));
}
}
if (Info.hasWorkItemIDZ()) {
assert(Info.hasWorkItemIDX() && Info.hasWorkItemIDY());
if (Subtarget->hasPackedTID()) {
Info.setWorkItemIDZ(ArgDescriptor::createRegister(AMDGPU::VGPR0,
0x3ff << 20));
} else {
unsigned Reg = AMDGPU::VGPR2;
MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
CCInfo.AllocateReg(Reg);
Info.setWorkItemIDZ(ArgDescriptor::createRegister(Reg));
}
}
}
// Try to allocate a VGPR at the end of the argument list, or if no argument
// VGPRs are left allocating a stack slot.
// If \p Mask is is given it indicates bitfield position in the register.
// If \p Arg is given use it with new ]p Mask instead of allocating new.
static ArgDescriptor allocateVGPR32Input(CCState &CCInfo, unsigned Mask = ~0u,
ArgDescriptor Arg = ArgDescriptor()) {
if (Arg.isSet())
return ArgDescriptor::createArg(Arg, Mask);
ArrayRef<MCPhysReg> ArgVGPRs
= makeArrayRef(AMDGPU::VGPR_32RegClass.begin(), 32);
unsigned RegIdx = CCInfo.getFirstUnallocated(ArgVGPRs);
if (RegIdx == ArgVGPRs.size()) {
// Spill to stack required.
int64_t Offset = CCInfo.AllocateStack(4, Align(4));
return ArgDescriptor::createStack(Offset, Mask);
}
unsigned Reg = ArgVGPRs[RegIdx];
Reg = CCInfo.AllocateReg(Reg);
assert(Reg != AMDGPU::NoRegister);
MachineFunction &MF = CCInfo.getMachineFunction();
Register LiveInVReg = MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
MF.getRegInfo().setType(LiveInVReg, LLT::scalar(32));
return ArgDescriptor::createRegister(Reg, Mask);
}
static ArgDescriptor allocateSGPR32InputImpl(CCState &CCInfo,
const TargetRegisterClass *RC,
unsigned NumArgRegs) {
ArrayRef<MCPhysReg> ArgSGPRs = makeArrayRef(RC->begin(), 32);
unsigned RegIdx = CCInfo.getFirstUnallocated(ArgSGPRs);
if (RegIdx == ArgSGPRs.size())
report_fatal_error("ran out of SGPRs for arguments");
unsigned Reg = ArgSGPRs[RegIdx];
Reg = CCInfo.AllocateReg(Reg);
assert(Reg != AMDGPU::NoRegister);
MachineFunction &MF = CCInfo.getMachineFunction();
MF.addLiveIn(Reg, RC);
return ArgDescriptor::createRegister(Reg);
}
// If this has a fixed position, we still should allocate the register in the
// CCInfo state. Technically we could get away with this for values passed
// outside of the normal argument range.
static void allocateFixedSGPRInputImpl(CCState &CCInfo,
const TargetRegisterClass *RC,
MCRegister Reg) {
Reg = CCInfo.AllocateReg(Reg);
assert(Reg != AMDGPU::NoRegister);
MachineFunction &MF = CCInfo.getMachineFunction();
MF.addLiveIn(Reg, RC);
}
static void allocateSGPR32Input(CCState &CCInfo, ArgDescriptor &Arg) {
if (Arg) {
allocateFixedSGPRInputImpl(CCInfo, &AMDGPU::SGPR_32RegClass,
Arg.getRegister());
} else
Arg = allocateSGPR32InputImpl(CCInfo, &AMDGPU::SGPR_32RegClass, 32);
}
static void allocateSGPR64Input(CCState &CCInfo, ArgDescriptor &Arg) {
if (Arg) {
allocateFixedSGPRInputImpl(CCInfo, &AMDGPU::SGPR_64RegClass,
Arg.getRegister());
} else
Arg = allocateSGPR32InputImpl(CCInfo, &AMDGPU::SGPR_64RegClass, 16);
}
/// Allocate implicit function VGPR arguments at the end of allocated user
/// arguments.
void SITargetLowering::allocateSpecialInputVGPRs(
CCState &CCInfo, MachineFunction &MF,
const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const {
const unsigned Mask = 0x3ff;
ArgDescriptor Arg;
if (Info.hasWorkItemIDX()) {
Arg = allocateVGPR32Input(CCInfo, Mask);
Info.setWorkItemIDX(Arg);
}
if (Info.hasWorkItemIDY()) {
Arg = allocateVGPR32Input(CCInfo, Mask << 10, Arg);
Info.setWorkItemIDY(Arg);
}
if (Info.hasWorkItemIDZ())
Info.setWorkItemIDZ(allocateVGPR32Input(CCInfo, Mask << 20, Arg));
}
/// Allocate implicit function VGPR arguments in fixed registers.
void SITargetLowering::allocateSpecialInputVGPRsFixed(
CCState &CCInfo, MachineFunction &MF,
const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const {
Register Reg = CCInfo.AllocateReg(AMDGPU::VGPR31);
if (!Reg)
report_fatal_error("failed to allocated VGPR for implicit arguments");
const unsigned Mask = 0x3ff;
Info.setWorkItemIDX(ArgDescriptor::createRegister(Reg, Mask));
Info.setWorkItemIDY(ArgDescriptor::createRegister(Reg, Mask << 10));
Info.setWorkItemIDZ(ArgDescriptor::createRegister(Reg, Mask << 20));
}
void SITargetLowering::allocateSpecialInputSGPRs(
CCState &CCInfo,
MachineFunction &MF,
const SIRegisterInfo &TRI,
SIMachineFunctionInfo &Info) const {
auto &ArgInfo = Info.getArgInfo();
// TODO: Unify handling with private memory pointers.
if (Info.hasDispatchPtr())
allocateSGPR64Input(CCInfo, ArgInfo.DispatchPtr);
if (Info.hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5)
allocateSGPR64Input(CCInfo, ArgInfo.QueuePtr);
// Implicit arg ptr takes the place of the kernarg segment pointer. This is a
// constant offset from the kernarg segment.
if (Info.hasImplicitArgPtr())
allocateSGPR64Input(CCInfo, ArgInfo.ImplicitArgPtr);
if (Info.hasDispatchID())
allocateSGPR64Input(CCInfo, ArgInfo.DispatchID);
// flat_scratch_init is not applicable for non-kernel functions.
if (Info.hasWorkGroupIDX())
allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDX);
if (Info.hasWorkGroupIDY())
allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDY);
if (Info.hasWorkGroupIDZ())
allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDZ);
if (Info.hasLDSKernelId())
allocateSGPR32Input(CCInfo, ArgInfo.LDSKernelId);
}
// Allocate special inputs passed in user SGPRs.
void SITargetLowering::allocateHSAUserSGPRs(CCState &CCInfo,
MachineFunction &MF,
const SIRegisterInfo &TRI,
SIMachineFunctionInfo &Info) const {
if (Info.hasImplicitBufferPtr()) {
Register ImplicitBufferPtrReg = Info.addImplicitBufferPtr(TRI);
MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(ImplicitBufferPtrReg);
}
// FIXME: How should these inputs interact with inreg / custom SGPR inputs?
if (Info.hasPrivateSegmentBuffer()) {
Register PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);
MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
CCInfo.AllocateReg(PrivateSegmentBufferReg);
}
if (Info.hasDispatchPtr()) {
Register DispatchPtrReg = Info.addDispatchPtr(TRI);
MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(DispatchPtrReg);
}
if (Info.hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5) {
Register QueuePtrReg = Info.addQueuePtr(TRI);
MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(QueuePtrReg);
}
if (Info.hasKernargSegmentPtr()) {
MachineRegisterInfo &MRI = MF.getRegInfo();
Register InputPtrReg = Info.addKernargSegmentPtr(TRI);
CCInfo.AllocateReg(InputPtrReg);
Register VReg = MF.addLiveIn(InputPtrReg, &AMDGPU::SGPR_64RegClass);
MRI.setType(VReg, LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
}
if (Info.hasDispatchID()) {
Register DispatchIDReg = Info.addDispatchID(TRI);
MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(DispatchIDReg);
}
if (Info.hasFlatScratchInit() && !getSubtarget()->isAmdPalOS()) {
Register FlatScratchInitReg = Info.addFlatScratchInit(TRI);
MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(FlatScratchInitReg);
}
if (Info.hasLDSKernelId()) {
Register Reg = Info.addLDSKernelId();
MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
CCInfo.AllocateReg(Reg);
}
// TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
// these from the dispatch pointer.
}
// Allocate special input registers that are initialized per-wave.
void SITargetLowering::allocateSystemSGPRs(CCState &CCInfo,
MachineFunction &MF,
SIMachineFunctionInfo &Info,
CallingConv::ID CallConv,
bool IsShader) const {
if (Subtarget->hasUserSGPRInit16Bug() && !IsShader) {
// Note: user SGPRs are handled by the front-end for graphics shaders
// Pad up the used user SGPRs with dead inputs.
unsigned CurrentUserSGPRs = Info.getNumUserSGPRs();
// Note we do not count the PrivateSegmentWaveByteOffset. We do not want to
// rely on it to reach 16 since if we end up having no stack usage, it will
// not really be added.
unsigned NumRequiredSystemSGPRs = Info.hasWorkGroupIDX() +
Info.hasWorkGroupIDY() +
Info.hasWorkGroupIDZ() +
Info.hasWorkGroupInfo();
for (unsigned i = NumRequiredSystemSGPRs + CurrentUserSGPRs; i < 16; ++i) {
Register Reg = Info.addReservedUserSGPR();
MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
CCInfo.AllocateReg(Reg);
}
}
if (Info.hasWorkGroupIDX()) {
Register Reg = Info.addWorkGroupIDX();
MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
CCInfo.AllocateReg(Reg);
}
if (Info.hasWorkGroupIDY()) {
Register Reg = Info.addWorkGroupIDY();
MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
CCInfo.AllocateReg(Reg);
}
if (Info.hasWorkGroupIDZ()) {
Register Reg = Info.addWorkGroupIDZ();
MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
CCInfo.AllocateReg(Reg);
}
if (Info.hasWorkGroupInfo()) {
Register Reg = Info.addWorkGroupInfo();
MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
CCInfo.AllocateReg(Reg);
}
if (Info.hasPrivateSegmentWaveByteOffset()) {
// Scratch wave offset passed in system SGPR.
unsigned PrivateSegmentWaveByteOffsetReg;
if (IsShader) {
PrivateSegmentWaveByteOffsetReg =
Info.getPrivateSegmentWaveByteOffsetSystemSGPR();
// This is true if the scratch wave byte offset doesn't have a fixed
// location.
if (PrivateSegmentWaveByteOffsetReg == AMDGPU::NoRegister) {
PrivateSegmentWaveByteOffsetReg = findFirstFreeSGPR(CCInfo);
Info.setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg);
}
} else
PrivateSegmentWaveByteOffsetReg = Info.addPrivateSegmentWaveByteOffset();
MF.addLiveIn(PrivateSegmentWaveByteOffsetReg, &AMDGPU::SGPR_32RegClass);
CCInfo.AllocateReg(PrivateSegmentWaveByteOffsetReg);
}
assert(!Subtarget->hasUserSGPRInit16Bug() || IsShader ||
Info.getNumPreloadedSGPRs() >= 16);
}
static void reservePrivateMemoryRegs(const TargetMachine &TM,
MachineFunction &MF,
const SIRegisterInfo &TRI,
SIMachineFunctionInfo &Info) {
// Now that we've figured out where the scratch register inputs are, see if
// should reserve the arguments and use them directly.
MachineFrameInfo &MFI = MF.getFrameInfo();
bool HasStackObjects = MFI.hasStackObjects();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
// Record that we know we have non-spill stack objects so we don't need to
// check all stack objects later.
if (HasStackObjects)
Info.setHasNonSpillStackObjects(true);
// Everything live out of a block is spilled with fast regalloc, so it's
// almost certain that spilling will be required.
if (TM.getOptLevel() == CodeGenOpt::None)
HasStackObjects = true;
// For now assume stack access is needed in any callee functions, so we need
// the scratch registers to pass in.
bool RequiresStackAccess = HasStackObjects || MFI.hasCalls();
if (!ST.enableFlatScratch()) {
if (RequiresStackAccess && ST.isAmdHsaOrMesa(MF.getFunction())) {
// If we have stack objects, we unquestionably need the private buffer
// resource. For the Code Object V2 ABI, this will be the first 4 user
// SGPR inputs. We can reserve those and use them directly.
Register PrivateSegmentBufferReg =
Info.getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
Info.setScratchRSrcReg(PrivateSegmentBufferReg);
} else {
unsigned ReservedBufferReg = TRI.reservedPrivateSegmentBufferReg(MF);
// We tentatively reserve the last registers (skipping the last registers
// which may contain VCC, FLAT_SCR, and XNACK). After register allocation,
// we'll replace these with the ones immediately after those which were
// really allocated. In the prologue copies will be inserted from the
// argument to these reserved registers.
// Without HSA, relocations are used for the scratch pointer and the
// buffer resource setup is always inserted in the prologue. Scratch wave
// offset is still in an input SGPR.
Info.setScratchRSrcReg(ReservedBufferReg);
}
}
MachineRegisterInfo &MRI = MF.getRegInfo();
// For entry functions we have to set up the stack pointer if we use it,
// whereas non-entry functions get this "for free". This means there is no
// intrinsic advantage to using S32 over S34 in cases where we do not have
// calls but do need a frame pointer (i.e. if we are requested to have one
// because frame pointer elimination is disabled). To keep things simple we
// only ever use S32 as the call ABI stack pointer, and so using it does not
// imply we need a separate frame pointer.
//
// Try to use s32 as the SP, but move it if it would interfere with input
// arguments. This won't work with calls though.
//
// FIXME: Move SP to avoid any possible inputs, or find a way to spill input
// registers.
if (!MRI.isLiveIn(AMDGPU::SGPR32)) {
Info.setStackPtrOffsetReg(AMDGPU::SGPR32);
} else {
assert(AMDGPU::isShader(MF.getFunction().getCallingConv()));
if (MFI.hasCalls())
report_fatal_error("call in graphics shader with too many input SGPRs");
for (unsigned Reg : AMDGPU::SGPR_32RegClass) {
if (!MRI.isLiveIn(Reg)) {
Info.setStackPtrOffsetReg(Reg);
break;
}
}
if (Info.getStackPtrOffsetReg() == AMDGPU::SP_REG)
report_fatal_error("failed to find register for SP");
}
// hasFP should be accurate for entry functions even before the frame is
// finalized, because it does not rely on the known stack size, only
// properties like whether variable sized objects are present.
if (ST.getFrameLowering()->hasFP(MF)) {
Info.setFrameOffsetReg(AMDGPU::SGPR33);
}
}
bool SITargetLowering::supportSplitCSR(MachineFunction *MF) const {
const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
return !Info->isEntryFunction();
}
void SITargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
}
void SITargetLowering::insertCopiesSplitCSR(
MachineBasicBlock *Entry,
const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
if (!IStart)
return;
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
MachineBasicBlock::iterator MBBI = Entry->begin();
for (const MCPhysReg *I = IStart; *I; ++I) {
const TargetRegisterClass *RC = nullptr;
if (AMDGPU::SReg_64RegClass.contains(*I))
RC = &AMDGPU::SGPR_64RegClass;
else if (AMDGPU::SReg_32RegClass.contains(*I))
RC = &AMDGPU::SGPR_32RegClass;
else
llvm_unreachable("Unexpected register class in CSRsViaCopy!");
Register NewVR = MRI->createVirtualRegister(RC);
// Create copy from CSR to a virtual register.
Entry->addLiveIn(*I);
BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
.addReg(*I);
// Insert the copy-back instructions right before the terminator.
for (auto *Exit : Exits)
BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
TII->get(TargetOpcode::COPY), *I)
.addReg(NewVR);
}
}
SDValue SITargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
MachineFunction &MF = DAG.getMachineFunction();
const Function &Fn = MF.getFunction();
FunctionType *FType = MF.getFunction().getFunctionType();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
if (Subtarget->isAmdHsaOS() && AMDGPU::isGraphics(CallConv)) {
DiagnosticInfoUnsupported NoGraphicsHSA(
Fn, "unsupported non-compute shaders with HSA", DL.getDebugLoc());
DAG.getContext()->diagnose(NoGraphicsHSA);
return DAG.getEntryNode();
}
Info->allocateModuleLDSGlobal(Fn);
SmallVector<ISD::InputArg, 16> Splits;
SmallVector<CCValAssign, 16> ArgLocs;
BitVector Skipped(Ins.size());
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
*DAG.getContext());
bool IsGraphics = AMDGPU::isGraphics(CallConv);
bool IsKernel = AMDGPU::isKernel(CallConv);
bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CallConv);
if (IsGraphics) {
assert(!Info->hasDispatchPtr() && !Info->hasKernargSegmentPtr() &&
(!Info->hasFlatScratchInit() || Subtarget->enableFlatScratch()) &&
!Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
!Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() &&
!Info->hasLDSKernelId() && !Info->hasWorkItemIDX() &&
!Info->hasWorkItemIDY() && !Info->hasWorkItemIDZ());
}
if (CallConv == CallingConv::AMDGPU_PS) {
processPSInputArgs(Splits, CallConv, Ins, Skipped, FType, Info);
// At least one interpolation mode must be enabled or else the GPU will
// hang.
//
// Check PSInputAddr instead of PSInputEnable. The idea is that if the user
// set PSInputAddr, the user wants to enable some bits after the compilation
// based on run-time states. Since we can't know what the final PSInputEna
// will look like, so we shouldn't do anything here and the user should take
// responsibility for the correct programming.
//
// Otherwise, the following restrictions apply:
// - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled.
// - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be
// enabled too.
if ((Info->getPSInputAddr() & 0x7F) == 0 ||
((Info->getPSInputAddr() & 0xF) == 0 && Info->isPSInputAllocated(11))) {
CCInfo.AllocateReg(AMDGPU::VGPR0);
CCInfo.AllocateReg(AMDGPU::VGPR1);
Info->markPSInputAllocated(0);
Info->markPSInputEnabled(0);
}
if (Subtarget->isAmdPalOS()) {
// For isAmdPalOS, the user does not enable some bits after compilation
// based on run-time states; the register values being generated here are
// the final ones set in hardware. Therefore we need to apply the
// workaround to PSInputAddr and PSInputEnable together. (The case where
// a bit is set in PSInputAddr but not PSInputEnable is where the
// frontend set up an input arg for a particular interpolation mode, but
// nothing uses that input arg. Really we should have an earlier pass
// that removes such an arg.)
unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable();
if ((PsInputBits & 0x7F) == 0 ||
((PsInputBits & 0xF) == 0 && (PsInputBits >> 11 & 1)))
Info->markPSInputEnabled(
countTrailingZeros(Info->getPSInputAddr(), ZB_Undefined));
}
} else if (IsKernel) {
assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX());
} else {
Splits.append(Ins.begin(), Ins.end());
}
if (IsEntryFunc) {
allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);
allocateHSAUserSGPRs(CCInfo, MF, *TRI, *Info);
} else if (!IsGraphics) {
// For the fixed ABI, pass workitem IDs in the last argument register.
allocateSpecialInputVGPRsFixed(CCInfo, MF, *TRI, *Info);
}
if (IsKernel) {
analyzeFormalArgumentsCompute(CCInfo, Ins);
} else {
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, isVarArg);
CCInfo.AnalyzeFormalArguments(Splits, AssignFn);
}
SmallVector<SDValue, 16> Chains;
// FIXME: This is the minimum kernel argument alignment. We should improve
// this to the maximum alignment of the arguments.
//
// FIXME: Alignment of explicit arguments totally broken with non-0 explicit
// kern arg offset.
const Align KernelArgBaseAlign = Align(16);
for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
const ISD::InputArg &Arg = Ins[i];
if (Arg.isOrigArg() && Skipped[Arg.getOrigArgIndex()]) {
InVals.push_back(DAG.getUNDEF(Arg.VT));
continue;
}
CCValAssign &VA = ArgLocs[ArgIdx++];
MVT VT = VA.getLocVT();
if (IsEntryFunc && VA.isMemLoc()) {
VT = Ins[i].VT;
EVT MemVT = VA.getLocVT();
const uint64_t Offset = VA.getLocMemOffset();
Align Alignment = commonAlignment(KernelArgBaseAlign, Offset);
if (Arg.Flags.isByRef()) {
SDValue Ptr = lowerKernArgParameterPtr(DAG, DL, Chain, Offset);
const GCNTargetMachine &TM =
static_cast<const GCNTargetMachine &>(getTargetMachine());
if (!TM.isNoopAddrSpaceCast(AMDGPUAS::CONSTANT_ADDRESS,
Arg.Flags.getPointerAddrSpace())) {
Ptr = DAG.getAddrSpaceCast(DL, VT, Ptr, AMDGPUAS::CONSTANT_ADDRESS,
Arg.Flags.getPointerAddrSpace());
}
InVals.push_back(Ptr);
continue;
}
SDValue Arg = lowerKernargMemParameter(
DAG, VT, MemVT, DL, Chain, Offset, Alignment, Ins[i].Flags.isSExt(), &Ins[i]);
Chains.push_back(Arg.getValue(1));
auto *ParamTy =
dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex()));
if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
ParamTy && (ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
ParamTy->getAddressSpace() == AMDGPUAS::REGION_ADDRESS)) {
// On SI local pointers are just offsets into LDS, so they are always
// less than 16-bits. On CI and newer they could potentially be
// real pointers, so we can't guarantee their size.
Arg = DAG.getNode(ISD::AssertZext, DL, Arg.getValueType(), Arg,
DAG.getValueType(MVT::i16));
}
InVals.push_back(Arg);
continue;
} else if (!IsEntryFunc && VA.isMemLoc()) {
SDValue Val = lowerStackParameter(DAG, VA, DL, Chain, Arg);
InVals.push_back(Val);
if (!Arg.Flags.isByVal())
Chains.push_back(Val.getValue(1));
continue;
}
assert(VA.isRegLoc() && "Parameter must be in a register!");
Register Reg = VA.getLocReg();
const TargetRegisterClass *RC = nullptr;
if (AMDGPU::VGPR_32RegClass.contains(Reg))
RC = &AMDGPU::VGPR_32RegClass;
else if (AMDGPU::SGPR_32RegClass.contains(Reg))
RC = &AMDGPU::SGPR_32RegClass;
else
llvm_unreachable("Unexpected register class in LowerFormalArguments!");
EVT ValVT = VA.getValVT();
Reg = MF.addLiveIn(Reg, RC);
SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT);
if (Arg.Flags.isSRet()) {
// The return object should be reasonably addressable.
// FIXME: This helps when the return is a real sret. If it is a
// automatically inserted sret (i.e. CanLowerReturn returns false), an
// extra copy is inserted in SelectionDAGBuilder which obscures this.
unsigned NumBits
= 32 - getSubtarget()->getKnownHighZeroBitsForFrameIndex();
Val = DAG.getNode(ISD::AssertZext, DL, VT, Val,
DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), NumBits)));
}
// If this is an 8 or 16-bit value, it is really passed promoted
// to 32 bits. Insert an assert[sz]ext to capture this, then
// truncate to the right size.
switch (VA.getLocInfo()) {
case CCValAssign::Full:
break;
case CCValAssign::BCvt:
Val = DAG.getNode(ISD::BITCAST, DL, ValVT, Val);
break;
case CCValAssign::SExt:
Val = DAG.getNode(ISD::AssertSext, DL, VT, Val,
DAG.getValueType(ValVT));
Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val);
break;
case CCValAssign::ZExt:
Val = DAG.getNode(ISD::AssertZext, DL, VT, Val,
DAG.getValueType(ValVT));
Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val);
break;
case CCValAssign::AExt:
Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val);
break;
default:
llvm_unreachable("Unknown loc info!");
}
InVals.push_back(Val);
}
// Start adding system SGPRs.
if (IsEntryFunc) {
allocateSystemSGPRs(CCInfo, MF, *Info, CallConv, IsGraphics);
} else {
CCInfo.AllocateReg(Info->getScratchRSrcReg());
if (!IsGraphics)
allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
}
auto &ArgUsageInfo =
DAG.getPass()->getAnalysis<AMDGPUArgumentUsageInfo>();
ArgUsageInfo.setFuncArgInfo(Fn, Info->getArgInfo());
unsigned StackArgSize = CCInfo.getNextStackOffset();
Info->setBytesInStackArgArea(StackArgSize);
return Chains.empty() ? Chain :
DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
}
// TODO: If return values can't fit in registers, we should return as many as
// possible in registers before passing on stack.
bool SITargetLowering::CanLowerReturn(
CallingConv::ID CallConv,
MachineFunction &MF, bool IsVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
// Replacing returns with sret/stack usage doesn't make sense for shaders.
// FIXME: Also sort of a workaround for custom vector splitting in LowerReturn
// for shaders. Vector types should be explicitly handled by CC.
if (AMDGPU::isEntryFunctionCC(CallConv))
return true;
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, IsVarArg));
}
SDValue
SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &DL, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
if (AMDGPU::isKernel(CallConv)) {
return AMDGPUTargetLowering::LowerReturn(Chain, CallConv, isVarArg, Outs,
OutVals, DL, DAG);
}
bool IsShader = AMDGPU::isShader(CallConv);
Info->setIfReturnsVoid(Outs.empty());
bool IsWaveEnd = Info->returnsVoid() && IsShader;
// CCValAssign - represent the assignment of the return value to a location.
SmallVector<CCValAssign, 48> RVLocs;
SmallVector<ISD::OutputArg, 48> Splits;
// CCState - Info about the registers and stack slots.
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
// Analyze outgoing return values.
CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
SDValue Flag;
SmallVector<SDValue, 48> RetOps;
RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
// Copy the result values into the output registers.
for (unsigned I = 0, RealRVLocIdx = 0, E = RVLocs.size(); I != E;
++I, ++RealRVLocIdx) {
CCValAssign &VA = RVLocs[I];
assert(VA.isRegLoc() && "Can only return in registers!");
// TODO: Partially return in registers if return values don't fit.
SDValue Arg = OutVals[RealRVLocIdx];
// Copied from other backends.
switch (VA.getLocInfo()) {
case CCValAssign::Full:
break;
case CCValAssign::BCvt:
Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
break;
case CCValAssign::SExt:
Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
break;
case CCValAssign::ZExt:
Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
break;
case CCValAssign::AExt:
Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
break;
default:
llvm_unreachable("Unknown loc info!");
}
Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag);
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
// FIXME: Does sret work properly?
if (!Info->isEntryFunction()) {
const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
const MCPhysReg *I =
TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
if (I) {
for (; *I; ++I) {
if (AMDGPU::SReg_64RegClass.contains(*I))
RetOps.push_back(DAG.getRegister(*I, MVT::i64));
else if (AMDGPU::SReg_32RegClass.contains(*I))
RetOps.push_back(DAG.getRegister(*I, MVT::i32));
else
llvm_unreachable("Unexpected register class in CSRsViaCopy!");
}
}
}
// Update chain and glue.
RetOps[0] = Chain;
if (Flag.getNode())
RetOps.push_back(Flag);
unsigned Opc = AMDGPUISD::ENDPGM;
if (!IsWaveEnd)
Opc = IsShader ? AMDGPUISD::RETURN_TO_EPILOG : AMDGPUISD::RET_FLAG;
return DAG.getNode(Opc, DL, MVT::Other, RetOps);
}
SDValue SITargetLowering::LowerCallResult(
SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool IsThisReturn,
SDValue ThisVal) const {
CCAssignFn *RetCC = CCAssignFnForReturn(CallConv, IsVarArg);
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC);
// Copy all of the result registers out of their specified physreg.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign VA = RVLocs[i];
SDValue Val;
if (VA.isRegLoc()) {
Val = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
Chain = Val.getValue(1);
InFlag = Val.getValue(2);
} else if (VA.isMemLoc()) {
report_fatal_error("TODO: return values in memory");
} else
llvm_unreachable("unknown argument location type");
switch (VA.getLocInfo()) {
case CCValAssign::Full:
break;
case CCValAssign::BCvt:
Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
break;
case CCValAssign::ZExt:
Val = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Val,
DAG.getValueType(VA.getValVT()));
Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val);
break;
case CCValAssign::SExt:
Val = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Val,
DAG.getValueType(VA.getValVT()));
Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val);
break;
case CCValAssign::AExt:
Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val);
break;
default:
llvm_unreachable("Unknown loc info!");
}
InVals.push_back(Val);
}
return Chain;
}
// Add code to pass special inputs required depending on used features separate
// from the explicit user arguments present in the IR.
void SITargetLowering::passSpecialInputs(
CallLoweringInfo &CLI,
CCState &CCInfo,
const SIMachineFunctionInfo &Info,
SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
SmallVectorImpl<SDValue> &MemOpChains,
SDValue Chain) const {
// If we don't have a call site, this was a call inserted by
// legalization. These can never use special inputs.
if (!CLI.CB)
return;
SelectionDAG &DAG = CLI.DAG;
const SDLoc &DL = CLI.DL;
const Function &F = DAG.getMachineFunction().getFunction();
const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
const AMDGPUFunctionArgInfo &CallerArgInfo = Info.getArgInfo();
const AMDGPUFunctionArgInfo *CalleeArgInfo
= &AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
if (const Function *CalleeFunc = CLI.CB->getCalledFunction()) {
auto &ArgUsageInfo =
DAG.getPass()->getAnalysis<AMDGPUArgumentUsageInfo>();
CalleeArgInfo = &ArgUsageInfo.lookupFuncArgInfo(*CalleeFunc);
}
// TODO: Unify with private memory register handling. This is complicated by
// the fact that at least in kernels, the input argument is not necessarily
// in the same location as the input.
static constexpr std::pair<AMDGPUFunctionArgInfo::PreloadedValue,
StringLiteral> ImplicitAttrs[] = {
{AMDGPUFunctionArgInfo::DISPATCH_PTR, "amdgpu-no-dispatch-ptr"},
{AMDGPUFunctionArgInfo::QUEUE_PTR, "amdgpu-no-queue-ptr" },
{AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR, "amdgpu-no-implicitarg-ptr"},
{AMDGPUFunctionArgInfo::DISPATCH_ID, "amdgpu-no-dispatch-id"},
{AMDGPUFunctionArgInfo::WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x"},
{AMDGPUFunctionArgInfo::WORKGROUP_ID_Y,"amdgpu-no-workgroup-id-y"},
{AMDGPUFunctionArgInfo::WORKGROUP_ID_Z,"amdgpu-no-workgroup-id-z"},
{AMDGPUFunctionArgInfo::LDS_KERNEL_ID,"amdgpu-no-lds-kernel-id"},
};
for (auto Attr : ImplicitAttrs) {
const ArgDescriptor *OutgoingArg;
const TargetRegisterClass *ArgRC;
LLT ArgTy;
AMDGPUFunctionArgInfo::PreloadedValue InputID = Attr.first;
// If the callee does not use the attribute value, skip copying the value.
if (CLI.CB->hasFnAttr(Attr.second))
continue;
std::tie(OutgoingArg, ArgRC, ArgTy) =
CalleeArgInfo->getPreloadedValue(InputID);
if (!OutgoingArg)
continue;
const ArgDescriptor *IncomingArg;
const TargetRegisterClass *IncomingArgRC;
LLT Ty;
std::tie(IncomingArg, IncomingArgRC, Ty) =
CallerArgInfo.getPreloadedValue(InputID);
assert(IncomingArgRC == ArgRC);
// All special arguments are ints for now.
EVT ArgVT = TRI->getSpillSize(*ArgRC) == 8 ? MVT::i64 : MVT::i32;
SDValue InputReg;
if (IncomingArg) {
InputReg = loadInputValue(DAG, ArgRC, ArgVT, DL, *IncomingArg);
} else if (InputID == AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR) {
// The implicit arg ptr is special because it doesn't have a corresponding
// input for kernels, and is computed from the kernarg segment pointer.
InputReg = getImplicitArgPtr(DAG, DL);
} else if (InputID == AMDGPUFunctionArgInfo::LDS_KERNEL_ID) {
Optional<uint32_t> Id = AMDGPUMachineFunction::getLDSKernelIdMetadata(F);
if (Id.has_value()) {
InputReg = DAG.getConstant(Id.value(), DL, ArgVT);
} else {
InputReg = DAG.getUNDEF(ArgVT);
}
} else {
// We may have proven the input wasn't needed, although the ABI is
// requiring it. We just need to allocate the register appropriately.
InputReg = DAG.getUNDEF(ArgVT);
}
if (OutgoingArg->isRegister()) {
RegsToPass.emplace_back(OutgoingArg->getRegister(), InputReg);
if (!CCInfo.AllocateReg(OutgoingArg->getRegister()))
report_fatal_error("failed to allocate implicit input argument");
} else {
unsigned SpecialArgOffset =
CCInfo.AllocateStack(ArgVT.getStoreSize(), Align(4));
SDValue ArgStore = storeStackInputValue(DAG, DL, Chain, InputReg,
SpecialArgOffset);
MemOpChains.push_back(ArgStore);
}
}
// Pack workitem IDs into a single register or pass it as is if already
// packed.
const ArgDescriptor *OutgoingArg;
const TargetRegisterClass *ArgRC;
LLT Ty;
std::tie(OutgoingArg, ArgRC, Ty) =
CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X);
if (!OutgoingArg)
std::tie(OutgoingArg, ArgRC, Ty) =
CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
if (!OutgoingArg)
std::tie(OutgoingArg, ArgRC, Ty) =
CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
if (!OutgoingArg)
return;
const ArgDescriptor *IncomingArgX = std::get<0>(
CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X));
const ArgDescriptor *IncomingArgY = std::get<0>(
CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y));
const ArgDescriptor *IncomingArgZ = std::get<0>(
CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z));
SDValue InputReg;
SDLoc SL;
const bool NeedWorkItemIDX = !CLI.CB->hasFnAttr("amdgpu-no-workitem-id-x");
const bool NeedWorkItemIDY = !CLI.CB->hasFnAttr("amdgpu-no-workitem-id-y");
const bool NeedWorkItemIDZ = !CLI.CB->hasFnAttr("amdgpu-no-workitem-id-z");
// If incoming ids are not packed we need to pack them.
if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX &&
NeedWorkItemIDX) {
if (Subtarget->getMaxWorkitemID(F, 0) != 0) {
InputReg = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgX);
} else {
InputReg = DAG.getConstant(0, DL, MVT::i32);
}
}
if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY &&
NeedWorkItemIDY && Subtarget->getMaxWorkitemID(F, 1) != 0) {
SDValue Y = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgY);
Y = DAG.getNode(ISD::SHL, SL, MVT::i32, Y,
DAG.getShiftAmountConstant(10, MVT::i32, SL));
InputReg = InputReg.getNode() ?
DAG.getNode(ISD::OR, SL, MVT::i32, InputReg, Y) : Y;
}
if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ &&
NeedWorkItemIDZ && Subtarget->getMaxWorkitemID(F, 2) != 0) {
SDValue Z = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgZ);
Z = DAG.getNode(ISD::SHL, SL, MVT::i32, Z,
DAG.getShiftAmountConstant(20, MVT::i32, SL));
InputReg = InputReg.getNode() ?
DAG.getNode(ISD::OR, SL, MVT::i32, InputReg, Z) : Z;
}
if (!InputReg && (NeedWorkItemIDX || NeedWorkItemIDY || NeedWorkItemIDZ)) {
if (!IncomingArgX && !IncomingArgY && !IncomingArgZ) {
// We're in a situation where the outgoing function requires the workitem
// ID, but the calling function does not have it (e.g a graphics function
// calling a C calling convention function). This is illegal, but we need
// to produce something.
InputReg = DAG.getUNDEF(MVT::i32);
} else {
// Workitem ids are already packed, any of present incoming arguments
// will carry all required fields.
ArgDescriptor IncomingArg = ArgDescriptor::createArg(
IncomingArgX ? *IncomingArgX :
IncomingArgY ? *IncomingArgY :
*IncomingArgZ, ~0u);
InputReg = loadInputValue(DAG, ArgRC, MVT::i32, DL, IncomingArg);
}
}
if (OutgoingArg->isRegister()) {
if (InputReg)
RegsToPass.emplace_back(OutgoingArg->getRegister(), InputReg);
CCInfo.AllocateReg(OutgoingArg->getRegister());
} else {
unsigned SpecialArgOffset = CCInfo.AllocateStack(4, Align(4));
if (InputReg) {
SDValue ArgStore = storeStackInputValue(DAG, DL, Chain, InputReg,
SpecialArgOffset);
MemOpChains.push_back(ArgStore);
}
}
}
static bool canGuaranteeTCO(CallingConv::ID CC) {
return CC == CallingConv::Fast;
}
/// Return true if we might ever do TCO for calls with this calling convention.
static bool mayTailCallThisCC(CallingConv::ID CC) {
switch (CC) {
case CallingConv::C:
case CallingConv::AMDGPU_Gfx:
return true;
default:
return canGuaranteeTCO(CC);
}
}
bool SITargetLowering::isEligibleForTailCallOptimization(
SDValue Callee, CallingConv::ID CalleeCC, bool IsVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
if (!mayTailCallThisCC(CalleeCC))
return false;
// For a divergent call target, we need to do a waterfall loop over the
// possible callees which precludes us from using a simple jump.
if (Callee->isDivergent())
return false;
MachineFunction &MF = DAG.getMachineFunction();
const Function &CallerF = MF.getFunction();
CallingConv::ID CallerCC = CallerF.getCallingConv();
const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
// Kernels aren't callable, and don't have a live in return address so it
// doesn't make sense to do a tail call with entry functions.
if (!CallerPreserved)
return false;
bool CCMatch = CallerCC == CalleeCC;
if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
if (canGuaranteeTCO(CalleeCC) && CCMatch)
return true;
return false;
}
// TODO: Can we handle var args?
if (IsVarArg)
return false;
for (const Argument &Arg : CallerF.args()) {
if (Arg.hasByValAttr())
return false;
}
LLVMContext &Ctx = *DAG.getContext();
// Check that the call results are passed in the same way.
if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, Ctx, Ins,
CCAssignFnForCall(CalleeCC, IsVarArg),
CCAssignFnForCall(CallerCC, IsVarArg)))
return false;
// The callee has to preserve all registers the caller needs to preserve.
if (!CCMatch) {
const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
return false;
}
// Nothing more to check if the callee is taking no arguments.
if (Outs.empty())
return true;
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CalleeCC, IsVarArg, MF, ArgLocs, Ctx);
CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, IsVarArg));
const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
// If the stack arguments for this call do not fit into our own save area then
// the call cannot be made tail.
// TODO: Is this really necessary?
if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
return false;
const MachineRegisterInfo &MRI = MF.getRegInfo();
return parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals);
}
bool SITargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
if (!CI->isTailCall())
return false;
const Function *ParentFn = CI->getParent()->getParent();
if (AMDGPU::isEntryFunctionCC(ParentFn->getCallingConv()))
return false;
return true;
}
// The wave scratch offset register is used as the global base pointer.
SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
SelectionDAG &DAG = CLI.DAG;
const SDLoc &DL = CLI.DL;
SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
bool &IsTailCall = CLI.IsTailCall;
CallingConv::ID CallConv = CLI.CallConv;
bool IsVarArg = CLI.IsVarArg;
bool IsSibCall = false;
bool IsThisReturn = false;
MachineFunction &MF = DAG.getMachineFunction();
if (Callee.isUndef() || isNullConstant(Callee)) {
if (!CLI.IsTailCall) {
for (unsigned I = 0, E = CLI.Ins.size(); I != E; ++I)
InVals.push_back(DAG.getUNDEF(CLI.Ins[I].VT));
}
return Chain;
}
if (IsVarArg) {
return lowerUnhandledCall(CLI, InVals,
"unsupported call to variadic function ");
}
if (!CLI.CB)
report_fatal_error("unsupported libcall legalization");
if (IsTailCall && MF.getTarget().Options.GuaranteedTailCallOpt) {
return lowerUnhandledCall(CLI, InVals,
"unsupported required tail call to function ");
}
if (AMDGPU::isShader(CallConv)) {
// Note the issue is with the CC of the called function, not of the call
// itself.
return lowerUnhandledCall(CLI, InVals,
"unsupported call to a shader function ");
}
if (AMDGPU::isShader(MF.getFunction().getCallingConv()) &&
CallConv != CallingConv::AMDGPU_Gfx) {
// Only allow calls with specific calling conventions.
return lowerUnhandledCall(CLI, InVals,
"unsupported calling convention for call from "
"graphics shader of function ");
}
if (IsTailCall) {
IsTailCall = isEligibleForTailCallOptimization(
Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall()) {
report_fatal_error("failed to perform tail call elimination on a call "
"site marked musttail");
}
bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
// A sibling call is one where we're under the usual C ABI and not planning
// to change that but can still do a tail call:
if (!TailCallOpt && IsTailCall)
IsSibCall = true;
if (IsTailCall)
++NumTailCalls;
}
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, IsVarArg);
if (CallConv != CallingConv::AMDGPU_Gfx) {
// With a fixed ABI, allocate fixed registers before user arguments.
passSpecialInputs(CLI, CCInfo, *Info, RegsToPass, MemOpChains, Chain);
}
CCInfo.AnalyzeCallOperands(Outs, AssignFn);
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
if (IsSibCall) {
// Since we're not changing the ABI to make this a tail call, the memory
// operands are already available in the caller's incoming argument space.
NumBytes = 0;
}
// FPDiff is the byte offset of the call's argument area from the callee's.
// Stores to callee stack arguments will be placed in FixedStackSlots offset
// by this amount for a tail call. In a sibling call it must be 0 because the
// caller will deallocate the entire stack and the callee still expects its
// arguments to begin at SP+0. Completely unused for non-tail calls.
int32_t FPDiff = 0;
MachineFrameInfo &MFI = MF.getFrameInfo();
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
if (!IsSibCall) {
Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
if (!Subtarget->enableFlatScratch()) {
SmallVector<SDValue, 4> CopyFromChains;
// In the HSA case, this should be an identity copy.
SDValue ScratchRSrcReg
= DAG.getCopyFromReg(Chain, DL, Info->getScratchRSrcReg(), MVT::v4i32);
RegsToPass.emplace_back(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, ScratchRSrcReg);
CopyFromChains.push_back(ScratchRSrcReg.getValue(1));
Chain = DAG.getTokenFactor(DL, CopyFromChains);
}
}
MVT PtrVT = MVT::i32;
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
SDValue Arg = OutVals[i];
// Promote the value if needed.
switch (VA.getLocInfo()) {
case CCValAssign::Full:
break;
case CCValAssign::BCvt:
Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
break;
case CCValAssign::ZExt:
Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
break;
case CCValAssign::SExt:
Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
break;
case CCValAssign::AExt:
Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
break;
case CCValAssign::FPExt:
Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
break;
default:
llvm_unreachable("Unknown loc info!");
}
if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else {
assert(VA.isMemLoc());
SDValue DstAddr;
MachinePointerInfo DstInfo;
unsigned LocMemOffset = VA.getLocMemOffset();
int32_t Offset = LocMemOffset;
SDValue PtrOff = DAG.getConstant(Offset, DL, PtrVT);
MaybeAlign Alignment;
if (IsTailCall) {
ISD::ArgFlagsTy Flags = Outs[i].Flags;
unsigned OpSize = Flags.isByVal() ?
Flags.getByValSize() : VA.getValVT().getStoreSize();
// FIXME: We can have better than the minimum byval required alignment.
Alignment =
Flags.isByVal()
? Flags.getNonZeroByValAlign()
: commonAlignment(Subtarget->getStackAlignment(), Offset);
Offset = Offset + FPDiff;
int FI = MFI.CreateFixedObject(OpSize, Offset, true);
DstAddr = DAG.getFrameIndex(FI, PtrVT);
DstInfo = MachinePointerInfo::getFixedStack(MF, FI);
// Make sure any stack arguments overlapping with where we're storing
// are loaded before this eventual operation. Otherwise they'll be
// clobbered.
// FIXME: Why is this really necessary? This seems to just result in a
// lot of code to copy the stack and write them back to the same
// locations, which are supposed to be immutable?
Chain = addTokenForArgument(Chain, DAG, MFI, FI);
} else {
// Stores to the argument stack area are relative to the stack pointer.
SDValue SP = DAG.getCopyFromReg(Chain, DL, Info->getStackPtrOffsetReg(),
MVT::i32);
DstAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, SP, PtrOff);
DstInfo = MachinePointerInfo::getStack(MF, LocMemOffset);
Alignment =
commonAlignment(Subtarget->getStackAlignment(), LocMemOffset);
}
if (Outs[i].Flags.isByVal()) {
SDValue SizeNode =
DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i32);
SDValue Cpy =
DAG.getMemcpy(Chain, DL, DstAddr, Arg, SizeNode,
Outs[i].Flags.getNonZeroByValAlign(),
/*isVol = */ false, /*AlwaysInline = */ true,
/*isTailCall = */ false, DstInfo,
MachinePointerInfo(AMDGPUAS::PRIVATE_ADDRESS));
MemOpChains.push_back(Cpy);
} else {
SDValue Store =
DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo, Alignment);
MemOpChains.push_back(Store);
}
}
}
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
SDValue InFlag;
for (auto &RegToPass : RegsToPass) {
Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
RegToPass.second, InFlag);
InFlag = Chain.getValue(1);
}
// We don't usually want to end the call-sequence here because we would tidy
// the frame up *after* the call, however in the ABI-changing tail-call case
// we've carefully laid out the parameters so that when sp is reset they'll be
// in the correct location.
if (IsTailCall && !IsSibCall) {
Chain = DAG.getCALLSEQ_END(Chain,
DAG.getTargetConstant(NumBytes, DL, MVT::i32),
DAG.getTargetConstant(0, DL, MVT::i32),
InFlag, DL);
InFlag = Chain.getValue(1);
}
std::vector<SDValue> Ops;
Ops.push_back(Chain);
Ops.push_back(Callee);
// Add a redundant copy of the callee global which will not be legalized, as
// we need direct access to the callee later.
if (GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(Callee)) {
const GlobalValue *GV = GSD->getGlobal();
Ops.push_back(DAG.getTargetGlobalAddress(GV, DL, MVT::i64));
} else {
Ops.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
}
if (IsTailCall) {
// Each tail call may have to adjust the stack by a different amount, so
// this information must travel along with the operation for eventual
// consumption by emitEpilogue.
Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
}
// Add argument registers to the end of the list so that they are known live
// into the call.
for (auto &RegToPass : RegsToPass) {
Ops.push_back(DAG.getRegister(RegToPass.first,
RegToPass.second.getValueType()));
}
// Add a register mask operand representing the call-preserved registers.
auto *TRI = static_cast<const SIRegisterInfo*>(Subtarget->getRegisterInfo());
const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
if (InFlag.getNode())
Ops.push_back(InFlag);
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
// If we're doing a tall call, use a TC_RETURN here rather than an
// actual call instruction.
if (IsTailCall) {
MFI.setHasTailCall();
return DAG.getNode(AMDGPUISD::TC_RETURN, DL, NodeTys, Ops);
}
// Returns a chain and a flag for retval copy to use.
SDValue Call = DAG.getNode(AMDGPUISD::CALL, DL, NodeTys, Ops);
Chain = Call.getValue(0);
InFlag = Call.getValue(1);
uint64_t CalleePopBytes = NumBytes;
Chain = DAG.getCALLSEQ_END(Chain, DAG.getTargetConstant(0, DL, MVT::i32),
DAG.getTargetConstant(CalleePopBytes, DL, MVT::i32),
InFlag, DL);
if (!Ins.empty())
InFlag = Chain.getValue(1);
// Handle result values, copying them out of physregs into vregs that we
// return.
return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
InVals, IsThisReturn,
IsThisReturn ? OutVals[0] : SDValue());
}
// This is identical to the default implementation in ExpandDYNAMIC_STACKALLOC,
// except for applying the wave size scale to the increment amount.
SDValue SITargetLowering::lowerDYNAMIC_STACKALLOCImpl(
SDValue Op, SelectionDAG &DAG) const {
const MachineFunction &MF = DAG.getMachineFunction();
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
SDLoc dl(Op);
EVT VT = Op.getValueType();
SDValue Tmp1 = Op;
SDValue Tmp2 = Op.getValue(1);
SDValue Tmp3 = Op.getOperand(2);
SDValue Chain = Tmp1.getOperand(0);
Register SPReg = Info->getStackPtrOffsetReg();
// Chain the dynamic stack allocation so that it doesn't modify the stack
// pointer when other instructions are using the stack.
Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
SDValue Size = Tmp2.getOperand(1);
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
Chain = SP.getValue(1);
MaybeAlign Alignment = cast<ConstantSDNode>(Tmp3)->getMaybeAlignValue();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const TargetFrameLowering *TFL = ST.getFrameLowering();
unsigned Opc =
TFL->getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp ?
ISD::ADD : ISD::SUB;
SDValue ScaledSize = DAG.getNode(
ISD::SHL, dl, VT, Size,
DAG.getConstant(ST.getWavefrontSizeLog2(), dl, MVT::i32));
Align StackAlign = TFL->getStackAlign();
Tmp1 = DAG.getNode(Opc, dl, VT, SP, ScaledSize); // Value
if (Alignment && *Alignment > StackAlign) {
Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1,
DAG.getConstant(-(uint64_t)Alignment->value()
<< ST.getWavefrontSizeLog2(),
dl, VT));
}
Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain
Tmp2 = DAG.getCALLSEQ_END(
Chain, DAG.getIntPtrConstant(0, dl, true),
DAG.getIntPtrConstant(0, dl, true), SDValue(), dl);
return DAG.getMergeValues({Tmp1, Tmp2}, dl);
}
SDValue SITargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {
// We only handle constant sizes here to allow non-entry block, static sized
// allocas. A truly dynamic value is more difficult to support because we
// don't know if the size value is uniform or not. If the size isn't uniform,
// we would need to do a wave reduction to get the maximum size to know how
// much to increment the uniform stack pointer.
SDValue Size = Op.getOperand(1);
if (isa<ConstantSDNode>(Size))
return lowerDYNAMIC_STACKALLOCImpl(Op, DAG); // Use "generic" expansion.
return AMDGPUTargetLowering::LowerDYNAMIC_STACKALLOC(Op, DAG);
}
Register SITargetLowering::getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const {
Register Reg = StringSwitch<Register>(RegName)
.Case("m0", AMDGPU::M0)
.Case("exec", AMDGPU::EXEC)
.Case("exec_lo", AMDGPU::EXEC_LO)
.Case("exec_hi", AMDGPU::EXEC_HI)
.Case("flat_scratch", AMDGPU::FLAT_SCR)
.Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
.Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
.Default(Register());
if (Reg == AMDGPU::NoRegister) {
report_fatal_error(Twine("invalid register name \""
+ StringRef(RegName) + "\"."));
}
if (!Subtarget->hasFlatScrRegister() &&
Subtarget->getRegisterInfo()->regsOverlap(Reg, AMDGPU::FLAT_SCR)) {
report_fatal_error(Twine("invalid register \""
+ StringRef(RegName) + "\" for subtarget."));
}
switch (Reg) {
case AMDGPU::M0:
case AMDGPU::EXEC_LO:
case AMDGPU::EXEC_HI:
case AMDGPU::FLAT_SCR_LO:
case AMDGPU::FLAT_SCR_HI:
if (VT.getSizeInBits() == 32)
return Reg;
break;
case AMDGPU::EXEC:
case AMDGPU::FLAT_SCR:
if (VT.getSizeInBits() == 64)
return Reg;
break;
default:
llvm_unreachable("missing register type checking");
}
report_fatal_error(Twine("invalid type for register \""
+ StringRef(RegName) + "\"."));
}
// If kill is not the last instruction, split the block so kill is always a
// proper terminator.
MachineBasicBlock *
SITargetLowering::splitKillBlock(MachineInstr &MI,
MachineBasicBlock *BB) const {
MachineBasicBlock *SplitBB = BB->splitAt(MI, false /*UpdateLiveIns*/);
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
MI.setDesc(TII->getKillTerminatorFromPseudo(MI.getOpcode()));
return SplitBB;
}
// Split block \p MBB at \p MI, as to insert a loop. If \p InstInLoop is true,
// \p MI will be the only instruction in the loop body block. Otherwise, it will
// be the first instruction in the remainder block.
//
/// \returns { LoopBody, Remainder }
static std::pair<MachineBasicBlock *, MachineBasicBlock *>
splitBlockForLoop(MachineInstr &MI, MachineBasicBlock &MBB, bool InstInLoop) {
MachineFunction *MF = MBB.getParent();
MachineBasicBlock::iterator I(&MI);
// To insert the loop we need to split the block. Move everything after this
// point to a new block, and insert a new empty block between the two.
MachineBasicBlock *LoopBB = MF->CreateMachineBasicBlock();
MachineBasicBlock *RemainderBB = MF->CreateMachineBasicBlock();
MachineFunction::iterator MBBI(MBB);
++MBBI;
MF->insert(MBBI, LoopBB);
MF->insert(MBBI, RemainderBB);
LoopBB->addSuccessor(LoopBB);
LoopBB->addSuccessor(RemainderBB);
// Move the rest of the block into a new block.
RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB);
if (InstInLoop) {
auto Next = std::next(I);
// Move instruction to loop body.
LoopBB->splice(LoopBB->begin(), &MBB, I, Next);
// Move the rest of the block.
RemainderBB->splice(RemainderBB->begin(), &MBB, Next, MBB.end());
} else {
RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
}
MBB.addSuccessor(LoopBB);
return std::make_pair(LoopBB, RemainderBB);
}
/// Insert \p MI into a BUNDLE with an S_WAITCNT 0 immediately following it.
void SITargetLowering::bundleInstWithWaitcnt(MachineInstr &MI) const {
MachineBasicBlock *MBB = MI.getParent();
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
auto I = MI.getIterator();
auto E = std::next(I);
BuildMI(*MBB, E, MI.getDebugLoc(), TII->get(AMDGPU::S_WAITCNT))
.addImm(0);
MIBundleBuilder Bundler(*MBB, I, E);
finalizeBundle(*MBB, Bundler.begin());
}
MachineBasicBlock *
SITargetLowering::emitGWSMemViolTestLoop(MachineInstr &MI,
MachineBasicBlock *BB) const {
const DebugLoc &DL = MI.getDebugLoc();
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
MachineBasicBlock *LoopBB;
MachineBasicBlock *RemainderBB;
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
// Apparently kill flags are only valid if the def is in the same block?
if (MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::data0))
Src->setIsKill(false);
std::tie(LoopBB, RemainderBB) = splitBlockForLoop(MI, *BB, true);
MachineBasicBlock::iterator I = LoopBB->end();
const unsigned EncodedReg = AMDGPU::Hwreg::encodeHwreg(
AMDGPU::Hwreg::ID_TRAPSTS, AMDGPU::Hwreg::OFFSET_MEM_VIOL, 1);
// Clear TRAP_STS.MEM_VIOL
BuildMI(*LoopBB, LoopBB->begin(), DL, TII->get(AMDGPU::S_SETREG_IMM32_B32))
.addImm(0)
.addImm(EncodedReg);
bundleInstWithWaitcnt(MI);
Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
// Load and check TRAP_STS.MEM_VIOL
BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::S_GETREG_B32), Reg)
.addImm(EncodedReg);
// FIXME: Do we need to use an isel pseudo that may clobber scc?
BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::S_CMP_LG_U32))
.addReg(Reg, RegState::Kill)
.addImm(0);
BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::S_CBRANCH_SCC1))
.addMBB(LoopBB);
return RemainderBB;
}
// Do a v_movrels_b32 or v_movreld_b32 for each unique value of \p IdxReg in the
// wavefront. If the value is uniform and just happens to be in a VGPR, this
// will only do one iteration. In the worst case, this will loop 64 times.
//
// TODO: Just use v_readlane_b32 if we know the VGPR has a uniform value.
static MachineBasicBlock::iterator
emitLoadM0FromVGPRLoop(const SIInstrInfo *TII, MachineRegisterInfo &MRI,
MachineBasicBlock &OrigBB, MachineBasicBlock &LoopBB,
const DebugLoc &DL, const MachineOperand &Idx,
unsigned InitReg, unsigned ResultReg, unsigned PhiReg,
unsigned InitSaveExecReg, int Offset, bool UseGPRIdxMode,
Register &SGPRIdxReg) {
MachineFunction *MF = OrigBB.getParent();
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
MachineBasicBlock::iterator I = LoopBB.begin();
const TargetRegisterClass *BoolRC = TRI->getBoolRC();
Register PhiExec = MRI.createVirtualRegister(BoolRC);
Register NewExec = MRI.createVirtualRegister(BoolRC);
Register CurrentIdxReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
Register CondReg = MRI.createVirtualRegister(BoolRC);
BuildMI(LoopBB, I, DL, TII->get(TargetOpcode::PHI), PhiReg)
.addReg(InitReg)
.addMBB(&OrigBB)
.addReg(ResultReg)
.addMBB(&LoopBB);
BuildMI(LoopBB, I, DL, TII->get(TargetOpcode::PHI), PhiExec)
.addReg(InitSaveExecReg)
.addMBB(&OrigBB)
.addReg(NewExec)
.addMBB(&LoopBB);
// Read the next variant <- also loop target.
BuildMI(LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), CurrentIdxReg)
.addReg(Idx.getReg(), getUndefRegState(Idx.isUndef()));
// Compare the just read M0 value to all possible Idx values.
BuildMI(LoopBB, I, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e64), CondReg)
.addReg(CurrentIdxReg)
.addReg(Idx.getReg(), 0, Idx.getSubReg());
// Update EXEC, save the original EXEC value to VCC.
BuildMI(LoopBB, I, DL, TII->get(ST.isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32
: AMDGPU::S_AND_SAVEEXEC_B64),
NewExec)
.addReg(CondReg, RegState::Kill);
MRI.setSimpleHint(NewExec, CondReg);
if (UseGPRIdxMode) {
if (Offset == 0) {
SGPRIdxReg = CurrentIdxReg;
} else {
SGPRIdxReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SGPRIdxReg)
.addReg(CurrentIdxReg, RegState::Kill)
.addImm(Offset);
}
} else {
// Move index from VCC into M0
if (Offset == 0) {
BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
.addReg(CurrentIdxReg, RegState::Kill);
} else {
BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
.addReg(CurrentIdxReg, RegState::Kill)
.addImm(Offset);
}
}
// Update EXEC, switch all done bits to 0 and all todo bits to 1.
unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
MachineInstr *InsertPt =
BuildMI(LoopBB, I, DL, TII->get(ST.isWave32() ? AMDGPU::S_XOR_B32_term
: AMDGPU::S_XOR_B64_term), Exec)
.addReg(Exec)
.addReg(NewExec);
// XXX - s_xor_b64 sets scc to 1 if the result is nonzero, so can we use
// s_cbranch_scc0?
// Loop back to V_READFIRSTLANE_B32 if there are still variants to cover.
BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
.addMBB(&LoopBB);
return InsertPt->getIterator();
}
// This has slightly sub-optimal regalloc when the source vector is killed by
// the read. The register allocator does not understand that the kill is
// per-workitem, so is kept alive for the whole loop so we end up not re-using a
// subregister from it, using 1 more VGPR than necessary. This was saved when
// this was expanded after register allocation.
static MachineBasicBlock::iterator
loadM0FromVGPR(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineInstr &MI,
unsigned InitResultReg, unsigned PhiReg, int Offset,
bool UseGPRIdxMode, Register &SGPRIdxReg) {
MachineFunction *MF = MBB.getParent();
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
MachineRegisterInfo &MRI = MF->getRegInfo();
const DebugLoc &DL = MI.getDebugLoc();
MachineBasicBlock::iterator I(&MI);
const auto *BoolXExecRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
Register DstReg = MI.getOperand(0).getReg();
Register SaveExec = MRI.createVirtualRegister(BoolXExecRC);
Register TmpExec = MRI.createVirtualRegister(BoolXExecRC);
unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
BuildMI(MBB, I, DL, TII->get(TargetOpcode::IMPLICIT_DEF), TmpExec);
// Save the EXEC mask
BuildMI(MBB, I, DL, TII->get(MovExecOpc), SaveExec)
.addReg(Exec);
MachineBasicBlock *LoopBB;
MachineBasicBlock *RemainderBB;
std::tie(LoopBB, RemainderBB) = splitBlockForLoop(MI, MBB, false);
const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx);
auto InsPt = emitLoadM0FromVGPRLoop(TII, MRI, MBB, *LoopBB, DL, *Idx,
InitResultReg, DstReg, PhiReg, TmpExec,
Offset, UseGPRIdxMode, SGPRIdxReg);
MachineBasicBlock* LandingPad = MF->CreateMachineBasicBlock();
MachineFunction::iterator MBBI(LoopBB);
++MBBI;
MF->insert(MBBI, LandingPad);
LoopBB->removeSuccessor(RemainderBB);
LandingPad->addSuccessor(RemainderBB);
LoopBB->addSuccessor(LandingPad);
MachineBasicBlock::iterator First = LandingPad->begin();
BuildMI(*LandingPad, First, DL, TII->get(MovExecOpc), Exec)
.addReg(SaveExec);
return InsPt;
}
// Returns subreg index, offset
static std::pair<unsigned, int>
computeIndirectRegAndOffset(const SIRegisterInfo &TRI,
const TargetRegisterClass *SuperRC,
unsigned VecReg,
int Offset) {
int NumElts = TRI.getRegSizeInBits(*SuperRC) / 32;
// Skip out of bounds offsets, or else we would end up using an undefined
// register.
if (Offset >= NumElts || Offset < 0)
return std::make_pair(AMDGPU::sub0, Offset);
return std::make_pair(SIRegisterInfo::getSubRegFromChannel(Offset), 0);
}
static void setM0ToIndexFromSGPR(const SIInstrInfo *TII,
MachineRegisterInfo &MRI, MachineInstr &MI,
int Offset) {
MachineBasicBlock *MBB = MI.getParent();
const DebugLoc &DL = MI.getDebugLoc();
MachineBasicBlock::iterator I(&MI);
const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx);
assert(Idx->getReg() != AMDGPU::NoRegister);
if (Offset == 0) {
BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0).add(*Idx);
} else {
BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
.add(*Idx)
.addImm(Offset);
}
}
static Register getIndirectSGPRIdx(const SIInstrInfo *TII,
MachineRegisterInfo &MRI, MachineInstr &MI,
int Offset) {
MachineBasicBlock *MBB = MI.getParent();
const DebugLoc &DL = MI.getDebugLoc();
MachineBasicBlock::iterator I(&MI);
const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx);
if (Offset == 0)
return Idx->getReg();
Register Tmp = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), Tmp)
.add(*Idx)
.addImm(Offset);
return Tmp;
}
static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI,
MachineBasicBlock &MBB,
const GCNSubtarget &ST) {
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo &TRI = TII->getRegisterInfo();
MachineFunction *MF = MBB.getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
Register Dst = MI.getOperand(0).getReg();
const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx);
Register SrcReg = TII->getNamedOperand(MI, AMDGPU::OpName::src)->getReg();
int Offset = TII->getNamedOperand(MI, AMDGPU::OpName::offset)->getImm();
const TargetRegisterClass *VecRC = MRI.getRegClass(SrcReg);
const TargetRegisterClass *IdxRC = MRI.getRegClass(Idx->getReg());
unsigned SubReg;
std::tie(SubReg, Offset)
= computeIndirectRegAndOffset(TRI, VecRC, SrcReg, Offset);
const bool UseGPRIdxMode = ST.useVGPRIndexMode();
// Check for a SGPR index.
if (TII->getRegisterInfo().isSGPRClass(IdxRC)) {
MachineBasicBlock::iterator I(&MI);
const DebugLoc &DL = MI.getDebugLoc();
if (UseGPRIdxMode) {
// TODO: Look at the uses to avoid the copy. This may require rescheduling
// to avoid interfering with other uses, so probably requires a new
// optimization pass.
Register Idx = getIndirectSGPRIdx(TII, MRI, MI, Offset);
const MCInstrDesc &GPRIDXDesc =
TII->getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC), true);
BuildMI(MBB, I, DL, GPRIDXDesc, Dst)
.addReg(SrcReg)
.addReg(Idx)
.addImm(SubReg);
} else {
setM0ToIndexFromSGPR(TII, MRI, MI, Offset);
BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
.addReg(SrcReg, 0, SubReg)
.addReg(SrcReg, RegState::Implicit);
}
MI.eraseFromParent();
return &MBB;
}
// Control flow needs to be inserted if indexing with a VGPR.
const DebugLoc &DL = MI.getDebugLoc();
MachineBasicBlock::iterator I(&MI);
Register PhiReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
Register InitReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
BuildMI(MBB, I, DL, TII->get(TargetOpcode::IMPLICIT_DEF), InitReg);
Register SGPRIdxReg;
auto InsPt = loadM0FromVGPR(TII, MBB, MI, InitReg, PhiReg, Offset,
UseGPRIdxMode, SGPRIdxReg);
MachineBasicBlock *LoopBB = InsPt->getParent();
if (UseGPRIdxMode) {
const MCInstrDesc &GPRIDXDesc =
TII->getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC), true);
BuildMI(*LoopBB, InsPt, DL, GPRIDXDesc, Dst)
.addReg(SrcReg)
.addReg(SGPRIdxReg)
.addImm(SubReg);
} else {
BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
.addReg(SrcReg, 0, SubReg)
.addReg(SrcReg, RegState::Implicit);
}
MI.eraseFromParent();
return LoopBB;
}
static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
MachineBasicBlock &MBB,
const GCNSubtarget &ST) {
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo &TRI = TII->getRegisterInfo();
MachineFunction *MF = MBB.getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
Register Dst = MI.getOperand(0).getReg();
const MachineOperand *SrcVec = TII->getNamedOperand(MI, AMDGPU::OpName::src);
const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx);
const MachineOperand *Val = TII->getNamedOperand(MI, AMDGPU::OpName::val);
int Offset = TII->getNamedOperand(MI, AMDGPU::OpName::offset)->getImm();
const TargetRegisterClass *VecRC = MRI.getRegClass(SrcVec->getReg());
const TargetRegisterClass *IdxRC = MRI.getRegClass(Idx->getReg());
// This can be an immediate, but will be folded later.
assert(Val->getReg());
unsigned SubReg;
std::tie(SubReg, Offset) = computeIndirectRegAndOffset(TRI, VecRC,
SrcVec->getReg(),
Offset);
const bool UseGPRIdxMode = ST.useVGPRIndexMode();
if (Idx->getReg() == AMDGPU::NoRegister) {
MachineBasicBlock::iterator I(&MI);
const DebugLoc &DL = MI.getDebugLoc();
assert(Offset == 0);
BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dst)
.add(*SrcVec)
.add(*Val)
.addImm(SubReg);
MI.eraseFromParent();
return &MBB;
}
// Check for a SGPR index.
if (TII->getRegisterInfo().isSGPRClass(IdxRC)) {
MachineBasicBlock::iterator I(&MI);
const DebugLoc &DL = MI.getDebugLoc();
if (UseGPRIdxMode) {
Register Idx = getIndirectSGPRIdx(TII, MRI, MI, Offset);
const MCInstrDesc &GPRIDXDesc =
TII->getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC), false);
BuildMI(MBB, I, DL, GPRIDXDesc, Dst)
.addReg(SrcVec->getReg())
.add(*Val)
.addReg(Idx)
.addImm(SubReg);
} else {
setM0ToIndexFromSGPR(TII, MRI, MI, Offset);
const MCInstrDesc &MovRelDesc = TII->getIndirectRegWriteMovRelPseudo(
TRI.getRegSizeInBits(*VecRC), 32, false);
BuildMI(MBB, I, DL, MovRelDesc, Dst)
.addReg(SrcVec->getReg())
.add(*Val)
.addImm(SubReg);
}
MI.eraseFromParent();
return &MBB;
}
// Control flow needs to be inserted if indexing with a VGPR.
if (Val->isReg())
MRI.clearKillFlags(Val->getReg());
const DebugLoc &DL = MI.getDebugLoc();
Register PhiReg = MRI.createVirtualRegister(VecRC);
Register SGPRIdxReg;
auto InsPt = loadM0FromVGPR(TII, MBB, MI, SrcVec->getReg(), PhiReg, Offset,
UseGPRIdxMode, SGPRIdxReg);
MachineBasicBlock *LoopBB = InsPt->getParent();
if (UseGPRIdxMode) {
const MCInstrDesc &GPRIDXDesc =
TII->getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC), false);
BuildMI(*LoopBB, InsPt, DL, GPRIDXDesc, Dst)
.addReg(PhiReg)
.add(*Val)
.addReg(SGPRIdxReg)
.addImm(AMDGPU::sub0);
} else {
const MCInstrDesc &MovRelDesc = TII->getIndirectRegWriteMovRelPseudo(
TRI.getRegSizeInBits(*VecRC), 32, false);
BuildMI(*LoopBB, InsPt, DL, MovRelDesc, Dst)
.addReg(PhiReg)
.add(*Val)
.addImm(AMDGPU::sub0);
}
MI.eraseFromParent();
return LoopBB;
}
MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
MachineInstr &MI, MachineBasicBlock *BB) const {
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
MachineFunction *MF = BB->getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
switch (MI.getOpcode()) {
case AMDGPU::S_UADDO_PSEUDO:
case AMDGPU::S_USUBO_PSEUDO: {
const DebugLoc &DL = MI.getDebugLoc();
MachineOperand &Dest0 = MI.getOperand(0);
MachineOperand &Dest1 = MI.getOperand(1);
MachineOperand &Src0 = MI.getOperand(2);
MachineOperand &Src1 = MI.getOperand(3);
unsigned Opc = (MI.getOpcode() == AMDGPU::S_UADDO_PSEUDO)
? AMDGPU::S_ADD_I32
: AMDGPU::S_SUB_I32;
BuildMI(*BB, MI, DL, TII->get(Opc), Dest0.getReg()).add(Src0).add(Src1);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_CSELECT_B64), Dest1.getReg())
.addImm(1)
.addImm(0);
MI.eraseFromParent();
return BB;
}
case AMDGPU::S_ADD_U64_PSEUDO:
case AMDGPU::S_SUB_U64_PSEUDO: {
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
const TargetRegisterClass *BoolRC = TRI->getBoolRC();
const DebugLoc &DL = MI.getDebugLoc();
MachineOperand &Dest = MI.getOperand(0);
MachineOperand &Src0 = MI.getOperand(1);
MachineOperand &Src1 = MI.getOperand(2);
Register DestSub0 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
Register DestSub1 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
MachineOperand Src0Sub0 = TII->buildExtractSubRegOrImm(
MI, MRI, Src0, BoolRC, AMDGPU::sub0, &AMDGPU::SReg_32RegClass);
MachineOperand Src0Sub1 = TII->buildExtractSubRegOrImm(
MI, MRI, Src0, BoolRC, AMDGPU::sub1, &AMDGPU::SReg_32RegClass);
MachineOperand Src1Sub0 = TII->buildExtractSubRegOrImm(
MI, MRI, Src1, BoolRC, AMDGPU::sub0, &AMDGPU::SReg_32RegClass);
MachineOperand Src1Sub1 = TII->buildExtractSubRegOrImm(
MI, MRI, Src1, BoolRC, AMDGPU::sub1, &AMDGPU::SReg_32RegClass);
bool IsAdd = (MI.getOpcode() == AMDGPU::S_ADD_U64_PSEUDO);
unsigned LoOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
unsigned HiOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
BuildMI(*BB, MI, DL, TII->get(LoOpc), DestSub0).add(Src0Sub0).add(Src1Sub0);
BuildMI(*BB, MI, DL, TII->get(HiOpc), DestSub1).add(Src0Sub1).add(Src1Sub1);
BuildMI(*BB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest.getReg())
.addReg(DestSub0)
.addImm(AMDGPU::sub0)
.addReg(DestSub1)
.addImm(AMDGPU::sub1);
MI.eraseFromParent();
return BB;
}
case AMDGPU::V_ADD_U64_PSEUDO:
case AMDGPU::V_SUB_U64_PSEUDO: {
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
const DebugLoc &DL = MI.getDebugLoc();
bool IsAdd = (MI.getOpcode() == AMDGPU::V_ADD_U64_PSEUDO);
MachineOperand &Dest = MI.getOperand(0);
MachineOperand &Src0 = MI.getOperand(1);
MachineOperand &Src1 = MI.getOperand(2);
if (IsAdd && ST.hasLshlAddB64()) {
auto Add = BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_LSHL_ADD_U64_e64),
Dest.getReg())
.add(Src0)
.addImm(0)
.add(Src1);
TII->legalizeOperands(*Add);
MI.eraseFromParent();
return BB;
}
const auto *CarryRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
Register DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
Register DestSub1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
Register CarryReg = MRI.createVirtualRegister(CarryRC);
Register DeadCarryReg = MRI.createVirtualRegister(CarryRC);
const TargetRegisterClass *Src0RC = Src0.isReg()
? MRI.getRegClass(Src0.getReg())
: &AMDGPU::VReg_64RegClass;
const TargetRegisterClass *Src1RC = Src1.isReg()
? MRI.getRegClass(Src1.getReg())
: &AMDGPU::VReg_64RegClass;
const TargetRegisterClass *Src0SubRC =
TRI->getSubRegClass(Src0RC, AMDGPU::sub0);
const TargetRegisterClass *Src1SubRC =
TRI->getSubRegClass(Src1RC, AMDGPU::sub1);
MachineOperand SrcReg0Sub0 = TII->buildExtractSubRegOrImm(
MI, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC);
MachineOperand SrcReg1Sub0 = TII->buildExtractSubRegOrImm(
MI, MRI, Src1, Src1RC, AMDGPU::sub0, Src1SubRC);
MachineOperand SrcReg0Sub1 = TII->buildExtractSubRegOrImm(
MI, MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC);
MachineOperand SrcReg1Sub1 = TII->buildExtractSubRegOrImm(
MI, MRI, Src1, Src1RC, AMDGPU::sub1, Src1SubRC);
unsigned LoOpc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
MachineInstr *LoHalf = BuildMI(*BB, MI, DL, TII->get(LoOpc), DestSub0)
.addReg(CarryReg, RegState::Define)
.add(SrcReg0Sub0)
.add(SrcReg1Sub0)
.addImm(0); // clamp bit
unsigned HiOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
MachineInstr *HiHalf =
BuildMI(*BB, MI, DL, TII->get(HiOpc), DestSub1)
.addReg(DeadCarryReg, RegState::Define | RegState::Dead)
.add(SrcReg0Sub1)
.add(SrcReg1Sub1)
.addReg(CarryReg, RegState::Kill)
.addImm(0); // clamp bit
BuildMI(*BB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest.getReg())
.addReg(DestSub0)
.addImm(AMDGPU::sub0)
.addReg(DestSub1)
.addImm(AMDGPU::sub1);
TII->legalizeOperands(*LoHalf);
TII->legalizeOperands(*HiHalf);
MI.eraseFromParent();
return BB;
}
case AMDGPU::S_ADD_CO_PSEUDO:
case AMDGPU::S_SUB_CO_PSEUDO: {
// This pseudo has a chance to be selected
// only from uniform add/subcarry node. All the VGPR operands
// therefore assumed to be splat vectors.
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
MachineBasicBlock::iterator MII = MI;
const DebugLoc &DL = MI.getDebugLoc();
MachineOperand &Dest = MI.getOperand(0);
MachineOperand &CarryDest = MI.getOperand(1);
MachineOperand &Src0 = MI.getOperand(2);
MachineOperand &Src1 = MI.getOperand(3);
MachineOperand &Src2 = MI.getOperand(4);
unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
? AMDGPU::S_ADDC_U32
: AMDGPU::S_SUBB_U32;
if (Src0.isReg() && TRI->isVectorRegister(MRI, Src0.getReg())) {
Register RegOp0 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
BuildMI(*BB, MII, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), RegOp0)
.addReg(Src0.getReg());
Src0.setReg(RegOp0);
}
if (Src1.isReg() && TRI->isVectorRegister(MRI, Src1.getReg())) {
Register RegOp1 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
BuildMI(*BB, MII, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), RegOp1)
.addReg(Src1.getReg());
Src1.setReg(RegOp1);
}
Register RegOp2 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
if (TRI->isVectorRegister(MRI, Src2.getReg())) {
BuildMI(*BB, MII, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), RegOp2)
.addReg(Src2.getReg());
Src2.setReg(RegOp2);
}
const TargetRegisterClass *Src2RC = MRI.getRegClass(Src2.getReg());
unsigned WaveSize = TRI->getRegSizeInBits(*Src2RC);
assert(WaveSize == 64 || WaveSize == 32);
if (WaveSize == 64) {
if (ST.hasScalarCompareEq64()) {
BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMP_LG_U64))
.addReg(Src2.getReg())
.addImm(0);
} else {
const TargetRegisterClass *SubRC =
TRI->getSubRegClass(Src2RC, AMDGPU::sub0);
MachineOperand Src2Sub0 = TII->buildExtractSubRegOrImm(
MII, MRI, Src2, Src2RC, AMDGPU::sub0, SubRC);
MachineOperand Src2Sub1 = TII->buildExtractSubRegOrImm(
MII, MRI, Src2, Src2RC, AMDGPU::sub1, SubRC);
Register Src2_32 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_OR_B32), Src2_32)
.add(Src2Sub0)
.add(Src2Sub1);
BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMP_LG_U32))
.addReg(Src2_32, RegState::Kill)
.addImm(0);
}
} else {
BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMPK_LG_U32))
.addReg(Src2.getReg())
.addImm(0);
}
BuildMI(*BB, MII, DL, TII->get(Opc), Dest.getReg()).add(Src0).add(Src1);
unsigned SelOpc =
(WaveSize == 64) ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
BuildMI(*BB, MII, DL, TII->get(SelOpc), CarryDest.getReg())
.addImm(-1)
.addImm(0);
MI.eraseFromParent();
return BB;
}
case AMDGPU::SI_INIT_M0: {
BuildMI(*BB, MI.getIterator(), MI.getDebugLoc(),
TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
.add(MI.getOperand(0));
MI.eraseFromParent();
return BB;
}
case AMDGPU::GET_GROUPSTATICSIZE: {
assert(getTargetMachine().getTargetTriple().getOS() == Triple::AMDHSA ||
getTargetMachine().getTargetTriple().getOS() == Triple::AMDPAL);
DebugLoc DL = MI.getDebugLoc();
BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_MOV_B32))
.add(MI.getOperand(0))
.addImm(MFI->getLDSSize());
MI.eraseFromParent();
return BB;
}
case AMDGPU::SI_INDIRECT_SRC_V1:
case AMDGPU::SI_INDIRECT_SRC_V2:
case AMDGPU::SI_INDIRECT_SRC_V4:
case AMDGPU::SI_INDIRECT_SRC_V8:
case AMDGPU::SI_INDIRECT_SRC_V16:
case AMDGPU::SI_INDIRECT_SRC_V32:
return emitIndirectSrc(MI, *BB, *getSubtarget());
case AMDGPU::SI_INDIRECT_DST_V1:
case AMDGPU::SI_INDIRECT_DST_V2:
case AMDGPU::SI_INDIRECT_DST_V4:
case AMDGPU::SI_INDIRECT_DST_V8:
case AMDGPU::SI_INDIRECT_DST_V16:
case AMDGPU::SI_INDIRECT_DST_V32:
return emitIndirectDst(MI, *BB, *getSubtarget());
case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
case AMDGPU::SI_KILL_I1_PSEUDO:
return splitKillBlock(MI, BB);
case AMDGPU::V_CNDMASK_B64_PSEUDO: {
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
Register Dst = MI.getOperand(0).getReg();
Register Src0 = MI.getOperand(1).getReg();
Register Src1 = MI.getOperand(2).getReg();
const DebugLoc &DL = MI.getDebugLoc();
Register SrcCond = MI.getOperand(3).getReg();
Register DstLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
Register DstHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
const auto *CondRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
Register SrcCondCopy = MRI.createVirtualRegister(CondRC);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), SrcCondCopy)
.addReg(SrcCond);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstLo)
.addImm(0)
.addReg(Src0, 0, AMDGPU::sub0)
.addImm(0)
.addReg(Src1, 0, AMDGPU::sub0)
.addReg(SrcCondCopy);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstHi)
.addImm(0)
.addReg(Src0, 0, AMDGPU::sub1)
.addImm(0)
.addReg(Src1, 0, AMDGPU::sub1)
.addReg(SrcCondCopy);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::REG_SEQUENCE), Dst)
.addReg(DstLo)
.addImm(AMDGPU::sub0)
.addReg(DstHi)
.addImm(AMDGPU::sub1);
MI.eraseFromParent();
return BB;
}
case AMDGPU::SI_BR_UNDEF: {
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
const DebugLoc &DL = MI.getDebugLoc();
MachineInstr *Br = BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_CBRANCH_SCC1))
.add(MI.getOperand(0));
Br->getOperand(1).setIsUndef(true); // read undef SCC
MI.eraseFromParent();
return BB;
}
case AMDGPU::ADJCALLSTACKUP:
case AMDGPU::ADJCALLSTACKDOWN: {
const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
MachineInstrBuilder MIB(*MF, &MI);
MIB.addReg(Info->getStackPtrOffsetReg(), RegState::ImplicitDefine)
.addReg(Info->getStackPtrOffsetReg(), RegState::Implicit);
return BB;
}
case AMDGPU::SI_CALL_ISEL: {
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
const DebugLoc &DL = MI.getDebugLoc();
unsigned ReturnAddrReg = TII->getRegisterInfo().getReturnAddressReg(*MF);
MachineInstrBuilder MIB;
MIB = BuildMI(*BB, MI, DL, TII->get(AMDGPU::SI_CALL), ReturnAddrReg);
for (const MachineOperand &MO : MI.operands())
MIB.add(MO);
MIB.cloneMemRefs(MI);
MI.eraseFromParent();
return BB;
}
case AMDGPU::V_ADD_CO_U32_e32:
case AMDGPU::V_SUB_CO_U32_e32:
case AMDGPU::V_SUBREV_CO_U32_e32: {
// TODO: Define distinct V_*_I32_Pseudo instructions instead.
const DebugLoc &DL = MI.getDebugLoc();
unsigned Opc = MI.getOpcode();
bool NeedClampOperand = false;
if (TII->pseudoToMCOpcode(Opc) == -1) {
Opc = AMDGPU::getVOPe64(Opc);
NeedClampOperand = true;
}
auto I = BuildMI(*BB, MI, DL, TII->get(Opc), MI.getOperand(0).getReg());
if (TII->isVOP3(*I)) {
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
I.addReg(TRI->getVCC(), RegState::Define);
}
I.add(MI.getOperand(1))
.add(MI.getOperand(2));
if (NeedClampOperand)
I.addImm(0); // clamp bit for e64 encoding
TII->legalizeOperands(*I);
MI.eraseFromParent();
return BB;
}
case AMDGPU::V_ADDC_U32_e32:
case AMDGPU::V_SUBB_U32_e32:
case AMDGPU::V_SUBBREV_U32_e32:
// These instructions have an implicit use of vcc which counts towards the
// constant bus limit.
TII->legalizeOperands(MI);
return BB;
case AMDGPU::DS_GWS_INIT:
case AMDGPU::DS_GWS_SEMA_BR:
case AMDGPU::DS_GWS_BARRIER:
TII->enforceOperandRCAlignment(MI, AMDGPU::OpName::data0);
LLVM_FALLTHROUGH;
case AMDGPU::DS_GWS_SEMA_V:
case AMDGPU::DS_GWS_SEMA_P:
case AMDGPU::DS_GWS_SEMA_RELEASE_ALL:
// A s_waitcnt 0 is required to be the instruction immediately following.
if (getSubtarget()->hasGWSAutoReplay()) {
bundleInstWithWaitcnt(MI);
return BB;
}
return emitGWSMemViolTestLoop(MI, BB);
case AMDGPU::S_SETREG_B32: {
// Try to optimize cases that only set the denormal mode or rounding mode.
//
// If the s_setreg_b32 fully sets all of the bits in the rounding mode or
// denormal mode to a constant, we can use s_round_mode or s_denorm_mode
// instead.
//
// FIXME: This could be predicates on the immediate, but tablegen doesn't
// allow you to have a no side effect instruction in the output of a
// sideeffecting pattern.
unsigned ID, Offset, Width;
AMDGPU::Hwreg::decodeHwreg(MI.getOperand(1).getImm(), ID, Offset, Width);
if (ID != AMDGPU::Hwreg::ID_MODE)
return BB;
const unsigned WidthMask = maskTrailingOnes<unsigned>(Width);
const unsigned SetMask = WidthMask << Offset;
if (getSubtarget()->hasDenormModeInst()) {
unsigned SetDenormOp = 0;
unsigned SetRoundOp = 0;
// The dedicated instructions can only set the whole denorm or round mode
// at once, not a subset of bits in either.
if (SetMask ==
(AMDGPU::Hwreg::FP_ROUND_MASK | AMDGPU::Hwreg::FP_DENORM_MASK)) {
// If this fully sets both the round and denorm mode, emit the two
// dedicated instructions for these.
SetRoundOp = AMDGPU::S_ROUND_MODE;
SetDenormOp = AMDGPU::S_DENORM_MODE;
} else if (SetMask == AMDGPU::Hwreg::FP_ROUND_MASK) {
SetRoundOp = AMDGPU::S_ROUND_MODE;
} else if (SetMask == AMDGPU::Hwreg::FP_DENORM_MASK) {
SetDenormOp = AMDGPU::S_DENORM_MODE;
}
if (SetRoundOp || SetDenormOp) {
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
MachineInstr *Def = MRI.getVRegDef(MI.getOperand(0).getReg());
if (Def && Def->isMoveImmediate() && Def->getOperand(1).isImm()) {
unsigned ImmVal = Def->getOperand(1).getImm();
if (SetRoundOp) {
BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(SetRoundOp))
.addImm(ImmVal & 0xf);
// If we also have the denorm mode, get just the denorm mode bits.
ImmVal >>= 4;
}
if (SetDenormOp) {
BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(SetDenormOp))
.addImm(ImmVal & 0xf);
}
MI.eraseFromParent();
return BB;
}
}
}
// If only FP bits are touched, used the no side effects pseudo.
if ((SetMask & (AMDGPU::Hwreg::FP_ROUND_MASK |
AMDGPU::Hwreg::FP_DENORM_MASK)) == SetMask)
MI.setDesc(TII->get(AMDGPU::S_SETREG_B32_mode));
return BB;
}
default:
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
}
}
bool SITargetLowering::hasBitPreservingFPLogic(EVT VT) const {
return isTypeLegal(VT.getScalarType());
}
bool SITargetLowering::hasAtomicFaddRtnForTy(SDValue &Op) const {
switch (Op.getValue(0).getSimpleValueType().SimpleTy) {
case MVT::f32:
return Subtarget->hasAtomicFaddRtnInsts();
case MVT::v2f16:
case MVT::f64:
return Subtarget->hasGFX90AInsts();
default:
return false;
}
}
bool SITargetLowering::enableAggressiveFMAFusion(EVT VT) const {
// This currently forces unfolding various combinations of fsub into fma with
// free fneg'd operands. As long as we have fast FMA (controlled by
// isFMAFasterThanFMulAndFAdd), we should perform these.
// When fma is quarter rate, for f64 where add / sub are at best half rate,
// most of these combines appear to be cycle neutral but save on instruction
// count / code size.
return true;
}
bool SITargetLowering::enableAggressiveFMAFusion(LLT Ty) const { return true; }
EVT SITargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx,
EVT VT) const {
if (!VT.isVector()) {
return MVT::i1;
}
return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements());
}
MVT SITargetLowering::getScalarShiftAmountTy(const DataLayout &, EVT VT) const {
// TODO: Should i16 be used always if legal? For now it would force VALU
// shifts.
return (VT == MVT::i16) ? MVT::i16 : MVT::i32;
}
LLT SITargetLowering::getPreferredShiftAmountTy(LLT Ty) const {
return (Ty.getScalarSizeInBits() <= 16 && Subtarget->has16BitInsts())
? Ty.changeElementSize(16)
: Ty.changeElementSize(32);
}
// Answering this is somewhat tricky and depends on the specific device which
// have different rates for fma or all f64 operations.
//
// v_fma_f64 and v_mul_f64 always take the same number of cycles as each other
// regardless of which device (although the number of cycles differs between
// devices), so it is always profitable for f64.
//
// v_fma_f32 takes 4 or 16 cycles depending on the device, so it is profitable
// only on full rate devices. Normally, we should prefer selecting v_mad_f32
// which we can always do even without fused FP ops since it returns the same
// result as the separate operations and since it is always full
// rate. Therefore, we lie and report that it is not faster for f32. v_mad_f32
// however does not support denormals, so we do report fma as faster if we have
// a fast fma device and require denormals.
//
bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
EVT VT) const {
VT = VT.getScalarType();
switch (VT.getSimpleVT().SimpleTy) {
case MVT::f32: {
// If mad is not available this depends only on if f32 fma is full rate.
if (!Subtarget->hasMadMacF32Insts())
return Subtarget->hasFastFMAF32();
// Otherwise f32 mad is always full rate and returns the same result as
// the separate operations so should be preferred over fma.
// However does not support denormals.
if (hasFP32Denormals(MF))
return Subtarget->hasFastFMAF32() || Subtarget->hasDLInsts();
// If the subtarget has v_fmac_f32, that's just as good as v_mac_f32.
return Subtarget->hasFastFMAF32() && Subtarget->hasDLInsts();
}
case MVT::f64:
return true;
case MVT::f16:
return Subtarget->has16BitInsts() && hasFP64FP16Denormals(MF);
default:
break;
}
return false;
}
bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
LLT Ty) const {
switch (Ty.getScalarSizeInBits()) {
case 16:
return isFMAFasterThanFMulAndFAdd(MF, MVT::f16);
case 32:
return isFMAFasterThanFMulAndFAdd(MF, MVT::f32);
case 64:
return isFMAFasterThanFMulAndFAdd(MF, MVT::f64);
default:
break;
}
return false;
}
bool SITargetLowering::isFMADLegal(const MachineInstr &MI, LLT Ty) const {
if (!Ty.isScalar())
return false;
if (Ty.getScalarSizeInBits() == 16)
return Subtarget->hasMadF16() && !hasFP64FP16Denormals(*MI.getMF());
if (Ty.getScalarSizeInBits() == 32)
return Subtarget->hasMadMacF32Insts() && !hasFP32Denormals(*MI.getMF());
return false;
}
bool SITargetLowering::isFMADLegal(const SelectionDAG &DAG,
const SDNode *N) const {
// TODO: Check future ftz flag
// v_mad_f32/v_mac_f32 do not support denormals.
EVT VT = N->getValueType(0);
if (VT == MVT::f32)
return Subtarget->hasMadMacF32Insts() &&
!hasFP32Denormals(DAG.getMachineFunction());
if (VT == MVT::f16) {
return Subtarget->hasMadF16() &&
!hasFP64FP16Denormals(DAG.getMachineFunction());
}
return false;
}
//===----------------------------------------------------------------------===//
// Custom DAG Lowering Operations
//===----------------------------------------------------------------------===//
// Work around LegalizeDAG doing the wrong thing and fully scalarizing if the
// wider vector type is legal.
SDValue SITargetLowering::splitUnaryVectorOp(SDValue Op,
SelectionDAG &DAG) const {
unsigned Opc = Op.getOpcode();
EVT VT = Op.getValueType();
assert(VT == MVT::v4f16 || VT == MVT::v4i16);
SDValue Lo, Hi;
std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0);
SDLoc SL(Op);
SDValue OpLo = DAG.getNode(Opc, SL, Lo.getValueType(), Lo,
Op->getFlags());
SDValue OpHi = DAG.getNode(Opc, SL, Hi.getValueType(), Hi,
Op->getFlags());
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), VT, OpLo, OpHi);
}
// Work around LegalizeDAG doing the wrong thing and fully scalarizing if the
// wider vector type is legal.
SDValue SITargetLowering::splitBinaryVectorOp(SDValue Op,
SelectionDAG &DAG) const {
unsigned Opc = Op.getOpcode();
EVT VT = Op.getValueType();
assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4f32 ||
VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i16 ||
VT == MVT::v16f16 || VT == MVT::v8f32 || VT == MVT::v16f32 ||
VT == MVT::v32f32);
SDValue Lo0, Hi0;
std::tie(Lo0, Hi0) = DAG.SplitVectorOperand(Op.getNode(), 0);
SDValue Lo1, Hi1;
std::tie(Lo1, Hi1) = DAG.SplitVectorOperand(Op.getNode(), 1);
SDLoc SL(Op);
SDValue OpLo = DAG.getNode(Opc, SL, Lo0.getValueType(), Lo0, Lo1,
Op->getFlags());
SDValue OpHi = DAG.getNode(Opc, SL, Hi0.getValueType(), Hi0, Hi1,
Op->getFlags());
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), VT, OpLo, OpHi);
}
SDValue SITargetLowering::splitTernaryVectorOp(SDValue Op,
SelectionDAG &DAG) const {
unsigned Opc = Op.getOpcode();
EVT VT = Op.getValueType();
assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v8i16 ||
VT == MVT::v8f16 || VT == MVT::v4f32 || VT == MVT::v16i16 ||
VT == MVT::v16f16 || VT == MVT::v8f32 || VT == MVT::v16f32 ||
VT == MVT::v32f32);
SDValue Lo0, Hi0;
SDValue Op0 = Op.getOperand(0);
std::tie(Lo0, Hi0) = Op0.getValueType().isVector()
? DAG.SplitVectorOperand(Op.getNode(), 0)
: std::make_pair(Op0, Op0);
SDValue Lo1, Hi1;
std::tie(Lo1, Hi1) = DAG.SplitVectorOperand(Op.getNode(), 1);
SDValue Lo2, Hi2;
std::tie(Lo2, Hi2) = DAG.SplitVectorOperand(Op.getNode(), 2);
SDLoc SL(Op);
auto ResVT = DAG.GetSplitDestVTs(VT);
SDValue OpLo = DAG.getNode(Opc, SL, ResVT.first, Lo0, Lo1, Lo2,
Op->getFlags());
SDValue OpHi = DAG.getNode(Opc, SL, ResVT.second, Hi0, Hi1, Hi2,
Op->getFlags());
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), VT, OpLo, OpHi);
}
SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::LOAD: {
SDValue Result = LowerLOAD(Op, DAG);
assert((!Result.getNode() ||
Result.getNode()->getNumValues() == 2) &&
"Load should return a value and a chain");
return Result;
}
case ISD::FSIN:
case ISD::FCOS:
return LowerTrig(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::FDIV: return LowerFDIV(Op, DAG);
case ISD::ATOMIC_CMP_SWAP: return LowerATOMIC_CMP_SWAP(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
case ISD::GlobalAddress: {
MachineFunction &MF = DAG.getMachineFunction();
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
return LowerGlobalAddress(MFI, Op, DAG);
}
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG);
case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG);
case ISD::ADDRSPACECAST: return lowerADDRSPACECAST(Op, DAG);
case ISD::INSERT_SUBVECTOR:
return lowerINSERT_SUBVECTOR(Op, DAG);
case ISD::INSERT_VECTOR_ELT:
return lowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT:
return lowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::VECTOR_SHUFFLE:
return lowerVECTOR_SHUFFLE(Op, DAG);
case ISD::SCALAR_TO_VECTOR:
return lowerSCALAR_TO_VECTOR(Op, DAG);
case ISD::BUILD_VECTOR:
return lowerBUILD_VECTOR(Op, DAG);
case ISD::FP_ROUND:
return lowerFP_ROUND(Op, DAG);
case ISD::FPTRUNC_ROUND: {
unsigned Opc;
SDLoc DL(Op);
if (Op.getOperand(0)->getValueType(0) != MVT::f32)
return SDValue();
// Get the rounding mode from the last operand
int RoundMode = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
if (RoundMode == (int)RoundingMode::TowardPositive)
Opc = AMDGPUISD::FPTRUNC_ROUND_UPWARD;
else if (RoundMode == (int)RoundingMode::TowardNegative)
Opc = AMDGPUISD::FPTRUNC_ROUND_DOWNWARD;
else
return SDValue();
return DAG.getNode(Opc, DL, Op.getNode()->getVTList(), Op->getOperand(0));
}
case ISD::TRAP:
return lowerTRAP(Op, DAG);
case ISD::DEBUGTRAP:
return lowerDEBUGTRAP(Op, DAG);
case ISD::FABS:
case ISD::FNEG:
case ISD::FCANONICALIZE:
case ISD::BSWAP:
return splitUnaryVectorOp(Op, DAG);
case ISD::FMINNUM:
case ISD::FMAXNUM:
return lowerFMINNUM_FMAXNUM(Op, DAG);
case ISD::FMA:
return splitTernaryVectorOp(Op, DAG);
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
return LowerFP_TO_INT(Op, DAG);
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
case ISD::ADD:
case ISD::SUB:
case ISD::MUL:
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
case ISD::FADD:
case ISD::FMUL:
case ISD::FMINNUM_IEEE:
case ISD::FMAXNUM_IEEE:
case ISD::UADDSAT:
case ISD::USUBSAT:
case ISD::SADDSAT:
case ISD::SSUBSAT:
return splitBinaryVectorOp(Op, DAG);
case ISD::SMULO:
case ISD::UMULO:
return lowerXMULO(Op, DAG);
case ISD::SMUL_LOHI:
case ISD::UMUL_LOHI:
return lowerXMUL_LOHI(Op, DAG);
case ISD::DYNAMIC_STACKALLOC:
return LowerDYNAMIC_STACKALLOC(Op, DAG);
}
return SDValue();
}
// Used for D16: Casts the result of an instruction into the right vector,
// packs values if loads return unpacked values.
static SDValue adjustLoadValueTypeImpl(SDValue Result, EVT LoadVT,
const SDLoc &DL,
SelectionDAG &DAG, bool Unpacked) {
if (!LoadVT.isVector())
return Result;
// Cast back to the original packed type or to a larger type that is a
// multiple of 32 bit for D16. Widening the return type is a required for
// legalization.
EVT FittingLoadVT = LoadVT;
if ((LoadVT.getVectorNumElements() % 2) == 1) {
FittingLoadVT =
EVT::getVectorVT(*DAG.getContext(), LoadVT.getVectorElementType(),
LoadVT.getVectorNumElements() + 1);
}
if (Unpacked) { // From v2i32/v4i32 back to v2f16/v4f16.
// Truncate to v2i16/v4i16.
EVT IntLoadVT = FittingLoadVT.changeTypeToInteger();
// Workaround legalizer not scalarizing truncate after vector op
// legalization but not creating intermediate vector trunc.
SmallVector<SDValue, 4> Elts;
DAG.ExtractVectorElements(Result, Elts);
for (SDValue &Elt : Elts)
Elt = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Elt);
// Pad illegal v1i16/v3fi6 to v4i16
if ((LoadVT.getVectorNumElements() % 2) == 1)
Elts.push_back(DAG.getUNDEF(MVT::i16));
Result = DAG.getBuildVector(IntLoadVT, DL, Elts);
// Bitcast to original type (v2f16/v4f16).
return DAG.getNode(ISD::BITCAST, DL, FittingLoadVT, Result);
}
// Cast back to the original packed type.
return DAG.getNode(ISD::BITCAST, DL, FittingLoadVT, Result);
}
SDValue SITargetLowering::adjustLoadValueType(unsigned Opcode,
MemSDNode *M,
SelectionDAG &DAG,
ArrayRef<SDValue> Ops,
bool IsIntrinsic) const {
SDLoc DL(M);
bool Unpacked = Subtarget->hasUnpackedD16VMem();
EVT LoadVT = M->getValueType(0);
EVT EquivLoadVT = LoadVT;
if (LoadVT.isVector()) {
if (Unpacked) {
EquivLoadVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
LoadVT.getVectorNumElements());
} else if ((LoadVT.getVectorNumElements() % 2) == 1) {
// Widen v3f16 to legal type
EquivLoadVT =
EVT::getVectorVT(*DAG.getContext(), LoadVT.getVectorElementType(),
LoadVT.getVectorNumElements() + 1);
}
}
// Change from v4f16/v2f16 to EquivLoadVT.
SDVTList VTList = DAG.getVTList(EquivLoadVT, MVT::Other);
SDValue Load
= DAG.getMemIntrinsicNode(
IsIntrinsic ? (unsigned)ISD::INTRINSIC_W_CHAIN : Opcode, DL,
VTList, Ops, M->getMemoryVT(),
M->getMemOperand());
SDValue Adjusted = adjustLoadValueTypeImpl(Load, LoadVT, DL, DAG, Unpacked);
return DAG.getMergeValues({ Adjusted, Load.getValue(1) }, DL);
}
SDValue SITargetLowering::lowerIntrinsicLoad(MemSDNode *M, bool IsFormat,
SelectionDAG &DAG,
ArrayRef<SDValue> Ops) const {
SDLoc DL(M);
EVT LoadVT = M->getValueType(0);
EVT EltType = LoadVT.getScalarType();
EVT IntVT = LoadVT.changeTypeToInteger();
bool IsD16 = IsFormat && (EltType.getSizeInBits() == 16);
unsigned Opc =
IsFormat ? AMDGPUISD::BUFFER_LOAD_FORMAT : AMDGPUISD::BUFFER_LOAD;
if (IsD16) {
return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16, M, DAG, Ops);
}
// Handle BUFFER_LOAD_BYTE/UBYTE/SHORT/USHORT overloaded intrinsics
if (!IsD16 && !LoadVT.isVector() && EltType.getSizeInBits() < 32)
return handleByteShortBufferLoads(DAG, LoadVT, DL, Ops, M);
if (isTypeLegal(LoadVT)) {
return getMemIntrinsicNode(Opc, DL, M->getVTList(), Ops, IntVT,
M->getMemOperand(), DAG);
}
EVT CastVT = getEquivalentMemType(*DAG.getContext(), LoadVT);
SDVTList VTList = DAG.getVTList(CastVT, MVT::Other);
SDValue MemNode = getMemIntrinsicNode(Opc, DL, VTList, Ops, CastVT,
M->getMemOperand(), DAG);
return DAG.getMergeValues(
{DAG.getNode(ISD::BITCAST, DL, LoadVT, MemNode), MemNode.getValue(1)},
DL);
}
static SDValue lowerICMPIntrinsic(const SITargetLowering &TLI,
SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
const auto *CD = cast<ConstantSDNode>(N->getOperand(3));
unsigned CondCode = CD->getZExtValue();
if (!ICmpInst::isIntPredicate(static_cast<ICmpInst::Predicate>(CondCode)))
return DAG.getUNDEF(VT);
ICmpInst::Predicate IcInput = static_cast<ICmpInst::Predicate>(CondCode);
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
SDLoc DL(N);
EVT CmpVT = LHS.getValueType();
if (CmpVT == MVT::i16 && !TLI.isTypeLegal(MVT::i16)) {
unsigned PromoteOp = ICmpInst::isSigned(IcInput) ?
ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
LHS = DAG.getNode(PromoteOp, DL, MVT::i32, LHS);
RHS = DAG.getNode(PromoteOp, DL, MVT::i32, RHS);
}
ISD::CondCode CCOpcode = getICmpCondCode(IcInput);
unsigned WavefrontSize = TLI.getSubtarget()->getWavefrontSize();
EVT CCVT = EVT::getIntegerVT(*DAG.getContext(), WavefrontSize);
SDValue SetCC = DAG.getNode(AMDGPUISD::SETCC, DL, CCVT, LHS, RHS,
DAG.getCondCode(CCOpcode));
if (VT.bitsEq(CCVT))
return SetCC;
return DAG.getZExtOrTrunc(SetCC, DL, VT);
}
static SDValue lowerFCMPIntrinsic(const SITargetLowering &TLI,
SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
const auto *CD = cast<ConstantSDNode>(N->getOperand(3));
unsigned CondCode = CD->getZExtValue();
if (!FCmpInst::isFPPredicate(static_cast<FCmpInst::Predicate>(CondCode)))
return DAG.getUNDEF(VT);
SDValue Src0 = N->getOperand(1);
SDValue Src1 = N->getOperand(2);
EVT CmpVT = Src0.getValueType();
SDLoc SL(N);
if (CmpVT == MVT::f16 && !TLI.isTypeLegal(CmpVT)) {
Src0 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src0);
Src1 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src1);
}
FCmpInst::Predicate IcInput = static_cast<FCmpInst::Predicate>(CondCode);
ISD::CondCode CCOpcode = getFCmpCondCode(IcInput);
unsigned WavefrontSize = TLI.getSubtarget()->getWavefrontSize();
EVT CCVT = EVT::getIntegerVT(*DAG.getContext(), WavefrontSize);
SDValue SetCC = DAG.getNode(AMDGPUISD::SETCC, SL, CCVT, Src0,
Src1, DAG.getCondCode(CCOpcode));
if (VT.bitsEq(CCVT))
return SetCC;
return DAG.getZExtOrTrunc(SetCC, SL, VT);
}
static SDValue lowerBALLOTIntrinsic(const SITargetLowering &TLI, SDNode *N,
SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
SDValue Src = N->getOperand(1);
SDLoc SL(N);
if (Src.getOpcode() == ISD::SETCC) {
// (ballot (ISD::SETCC ...)) -> (AMDGPUISD::SETCC ...)
return DAG.getNode(AMDGPUISD::SETCC, SL, VT, Src.getOperand(0),
Src.getOperand(1), Src.getOperand(2));
}
if (const ConstantSDNode *Arg = dyn_cast<ConstantSDNode>(Src)) {
// (ballot 0) -> 0
if (Arg->isZero())
return DAG.getConstant(0, SL, VT);
// (ballot 1) -> EXEC/EXEC_LO
if (Arg->isOne()) {
Register Exec;
if (VT.getScalarSizeInBits() == 32)
Exec = AMDGPU::EXEC_LO;
else if (VT.getScalarSizeInBits() == 64)
Exec = AMDGPU::EXEC;
else
return SDValue();
return DAG.getCopyFromReg(DAG.getEntryNode(), SL, Exec, VT);
}
}
// (ballot (i1 $src)) -> (AMDGPUISD::SETCC (i32 (zext $src)) (i32 0)
// ISD::SETNE)
return DAG.getNode(
AMDGPUISD::SETCC, SL, VT, DAG.getZExtOrTrunc(Src, SL, MVT::i32),
DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
}
void SITargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
switch (N->getOpcode()) {
case ISD::INSERT_VECTOR_ELT: {
if (SDValue Res = lowerINSERT_VECTOR_ELT(SDValue(N, 0), DAG))
Results.push_back(Res);
return;
}
case ISD::EXTRACT_VECTOR_ELT: {
if (SDValue Res = lowerEXTRACT_VECTOR_ELT(SDValue(N, 0), DAG))
Results.push_back(Res);
return;
}
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
switch (IID) {
case Intrinsic::amdgcn_cvt_pkrtz: {
SDValue Src0 = N->getOperand(1);
SDValue Src1 = N->getOperand(2);
SDLoc SL(N);
SDValue Cvt = DAG.getNode(AMDGPUISD::CVT_PKRTZ_F16_F32, SL, MVT::i32,
Src0, Src1);
Results.push_back(DAG.getNode(ISD::BITCAST, SL, MVT::v2f16, Cvt));
return;
}
case Intrinsic::amdgcn_cvt_pknorm_i16:
case Intrinsic::amdgcn_cvt_pknorm_u16:
case Intrinsic::amdgcn_cvt_pk_i16:
case Intrinsic::amdgcn_cvt_pk_u16: {
SDValue Src0 = N->getOperand(1);
SDValue Src1 = N->getOperand(2);
SDLoc SL(N);
unsigned Opcode;
if (IID == Intrinsic::amdgcn_cvt_pknorm_i16)
Opcode = AMDGPUISD::CVT_PKNORM_I16_F32;
else if (IID == Intrinsic::amdgcn_cvt_pknorm_u16)
Opcode = AMDGPUISD::CVT_PKNORM_U16_F32;
else if (IID == Intrinsic::amdgcn_cvt_pk_i16)
Opcode = AMDGPUISD::CVT_PK_I16_I32;
else
Opcode = AMDGPUISD::CVT_PK_U16_U32;
EVT VT = N->getValueType(0);
if (isTypeLegal(VT))
Results.push_back(DAG.getNode(Opcode, SL, VT, Src0, Src1));
else {
SDValue Cvt = DAG.getNode(Opcode, SL, MVT::i32, Src0, Src1);
Results.push_back(DAG.getNode(ISD::BITCAST, SL, MVT::v2i16, Cvt));
}
return;
}
}
break;
}
case ISD::INTRINSIC_W_CHAIN: {
if (SDValue Res = LowerINTRINSIC_W_CHAIN(SDValue(N, 0), DAG)) {
if (Res.getOpcode() == ISD::MERGE_VALUES) {
// FIXME: Hacky
for (unsigned I = 0; I < Res.getNumOperands(); I++) {
Results.push_back(Res.getOperand(I));
}
} else {
Results.push_back(Res);
Results.push_back(Res.getValue(1));
}
return;
}
break;
}
case ISD::SELECT: {
SDLoc SL(N);
EVT VT = N->getValueType(0);
EVT NewVT = getEquivalentMemType(*DAG.getContext(), VT);
SDValue LHS = DAG.getNode(ISD::BITCAST, SL, NewVT, N->getOperand(1));
SDValue RHS = DAG.getNode(ISD::BITCAST, SL, NewVT, N->getOperand(2));
EVT SelectVT = NewVT;
if (NewVT.bitsLT(MVT::i32)) {
LHS = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, LHS);
RHS = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, RHS);
SelectVT = MVT::i32;
}
SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, SelectVT,
N->getOperand(0), LHS, RHS);
if (NewVT != SelectVT)
NewSelect = DAG.getNode(ISD::TRUNCATE, SL, NewVT, NewSelect);
Results.push_back(DAG.getNode(ISD::BITCAST, SL, VT, NewSelect));
return;
}
case ISD::FNEG: {
if (N->getValueType(0) != MVT::v2f16)
break;
SDLoc SL(N);
SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::i32, N->getOperand(0));
SDValue Op = DAG.getNode(ISD::XOR, SL, MVT::i32,
BC,
DAG.getConstant(0x80008000, SL, MVT::i32));
Results.push_back(DAG.getNode(ISD::BITCAST, SL, MVT::v2f16, Op));
return;
}
case ISD::FABS: {
if (N->getValueType(0) != MVT::v2f16)
break;
SDLoc SL(N);
SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::i32, N->getOperand(0));
SDValue Op = DAG.getNode(ISD::AND, SL, MVT::i32,
BC,
DAG.getConstant(0x7fff7fff, SL, MVT::i32));
Results.push_back(DAG.getNode(ISD::BITCAST, SL, MVT::v2f16, Op));
return;
}
default:
break;
}
}
/// Helper function for LowerBRCOND
static SDNode *findUser(SDValue Value, unsigned Opcode) {
SDNode *Parent = Value.getNode();
for (SDNode::use_iterator I = Parent->use_begin(), E = Parent->use_end();
I != E; ++I) {
if (I.getUse().get() != Value)
continue;
if (I->getOpcode() == Opcode)
return *I;
}
return nullptr;
}
unsigned SITargetLowering::isCFIntrinsic(const SDNode *Intr) const {
if (Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
switch (cast<ConstantSDNode>(Intr->getOperand(1))->getZExtValue()) {
case Intrinsic::amdgcn_if:
return AMDGPUISD::IF;
case Intrinsic::amdgcn_else:
return AMDGPUISD::ELSE;
case Intrinsic::amdgcn_loop:
return AMDGPUISD::LOOP;
case Intrinsic::amdgcn_end_cf:
llvm_unreachable("should not occur");
default:
return 0;
}
}
// break, if_break, else_break are all only used as inputs to loop, not
// directly as branch conditions.
return 0;
}
bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const {
const Triple &TT = getTargetMachine().getTargetTriple();
return (GV->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
GV->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
AMDGPU::shouldEmitConstantsToTextSection(TT);
}
bool SITargetLowering::shouldEmitGOTReloc(const GlobalValue *GV) const {
// FIXME: Either avoid relying on address space here or change the default
// address space for functions to avoid the explicit check.
return (GV->getValueType()->isFunctionTy() ||
!isNonGlobalAddrSpace(GV->getAddressSpace())) &&
!shouldEmitFixup(GV) &&
!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
}
bool SITargetLowering::shouldEmitPCReloc(const GlobalValue *GV) const {
return !shouldEmitFixup(GV) && !shouldEmitGOTReloc(GV);
}
bool SITargetLowering::shouldUseLDSConstAddress(const GlobalValue *GV) const {
if (!GV->hasExternalLinkage())
return true;
const auto OS = getTargetMachine().getTargetTriple().getOS();
return OS == Triple::AMDHSA || OS == Triple::AMDPAL;
}
/// This transforms the control flow intrinsics to get the branch destination as
/// last parameter, also switches branch target with BR if the need arise
SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
SelectionDAG &DAG) const {
SDLoc DL(BRCOND);
SDNode *Intr = BRCOND.getOperand(1).getNode();
SDValue Target = BRCOND.getOperand(2);
SDNode *BR = nullptr;
SDNode *SetCC = nullptr;
if (Intr->getOpcode() == ISD::SETCC) {
// As long as we negate the condition everything is fine
SetCC = Intr;
Intr = SetCC->getOperand(0).getNode();
} else {
// Get the target from BR if we don't negate the condition
BR = findUser(BRCOND, ISD::BR);
assert(BR && "brcond missing unconditional branch user");
Target = BR->getOperand(1);
}
unsigned CFNode = isCFIntrinsic(Intr);
if (CFNode == 0) {
// This is a uniform branch so we don't need to legalize.
return BRCOND;
}
bool HaveChain = Intr->getOpcode() == ISD::INTRINSIC_VOID ||
Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN;
assert(!SetCC ||
(SetCC->getConstantOperandVal(1) == 1 &&
cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get() ==
ISD::SETNE));
// operands of the new intrinsic call
SmallVector<SDValue, 4> Ops;
if (HaveChain)
Ops.push_back(BRCOND.getOperand(0));
Ops.append(Intr->op_begin() + (HaveChain ? 2 : 1), Intr->op_end());
Ops.push_back(Target);
ArrayRef<EVT> Res(Intr->value_begin() + 1, Intr->value_end());
// build the new intrinsic call
SDNode *Result = DAG.getNode(CFNode, DL, DAG.getVTList(Res), Ops).getNode();
if (!HaveChain) {
SDValue Ops[] = {
SDValue(Result, 0),
BRCOND.getOperand(0)
};
Result = DAG.getMergeValues(Ops, DL).getNode();
}
if (BR) {
// Give the branch instruction our target
SDValue Ops[] = {
BR->getOperand(0),
BRCOND.getOperand(2)
};
SDValue NewBR = DAG.getNode(ISD::BR, DL, BR->getVTList(), Ops);
DAG.ReplaceAllUsesWith(BR, NewBR.getNode());
}
SDValue Chain = SDValue(Result, Result->getNumValues() - 1);
// Copy the intrinsic results to registers
for (unsigned i = 1, e = Intr->getNumValues() - 1; i != e; ++i) {
SDNode *CopyToReg = findUser(SDValue(Intr, i), ISD::CopyToReg);
if (!CopyToReg)
continue;
Chain = DAG.getCopyToReg(
Chain, DL,
CopyToReg->getOperand(1),
SDValue(Result, i - 1),
SDValue());
DAG.ReplaceAllUsesWith(SDValue(CopyToReg, 0), CopyToReg->getOperand(0));
}
// Remove the old intrinsic from the chain
DAG.ReplaceAllUsesOfValueWith(
SDValue(Intr, Intr->getNumValues() - 1),
Intr->getOperand(0));
return Chain;
}
SDValue SITargetLowering::LowerRETURNADDR(SDValue Op,
SelectionDAG &DAG) const {
MVT VT = Op.getSimpleValueType();
SDLoc DL(Op);
// Checking the depth
if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() != 0)
return DAG.getConstant(0, DL, VT);
MachineFunction &MF = DAG.getMachineFunction();
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
// Check for kernel and shader functions
if (Info->isEntryFunction())
return DAG.getConstant(0, DL, VT);
MachineFrameInfo &MFI = MF.getFrameInfo();
// There is a call to @llvm.returnaddress in this function
MFI.setReturnAddressIsTaken(true);
const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
// Get the return address reg and mark it as an implicit live-in
Register Reg = MF.addLiveIn(TRI->getReturnAddressReg(MF), getRegClassFor(VT, Op.getNode()->isDivergent()));
return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
}
SDValue SITargetLowering::getFPExtOrFPRound(SelectionDAG &DAG,
SDValue Op,
const SDLoc &DL,
EVT VT) const {
return Op.getValueType().bitsLE(VT) ?
DAG.getNode(ISD::FP_EXTEND, DL, VT, Op) :
DAG.getNode(ISD::FP_ROUND, DL, VT, Op,
DAG.getTargetConstant(0, DL, MVT::i32));
}
SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
assert(Op.getValueType() == MVT::f16 &&
"Do not know how to custom lower FP_ROUND for non-f16 type");
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
if (SrcVT != MVT::f64)
return Op;
SDLoc DL(Op);
SDValue FpToFp16 = DAG.getNode(ISD::FP_TO_FP16, DL, MVT::i32, Src);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FpToFp16);
return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc);
}
SDValue SITargetLowering::lowerFMINNUM_FMAXNUM(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
const MachineFunction &MF = DAG.getMachineFunction();
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
bool IsIEEEMode = Info->getMode().IEEE;
// FIXME: Assert during selection that this is only selected for
// ieee_mode. Currently a combine can produce the ieee version for non-ieee
// mode functions, but this happens to be OK since it's only done in cases
// where there is known no sNaN.
if (IsIEEEMode)
return expandFMINNUM_FMAXNUM(Op.getNode(), DAG);
if (VT == MVT::v4f16 || VT == MVT::v8f16 || VT == MVT::v16f16)
return splitBinaryVectorOp(Op, DAG);
return Op;
}
SDValue SITargetLowering::lowerXMULO(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDLoc SL(Op);
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
bool isSigned = Op.getOpcode() == ISD::SMULO;
if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
const APInt &C = RHSC->getAPIntValue();
// mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
if (C.isPowerOf2()) {
// smulo(x, signed_min) is same as umulo(x, signed_min).
bool UseArithShift = isSigned && !C.isMinSignedValue();
SDValue ShiftAmt = DAG.getConstant(C.logBase2(), SL, MVT::i32);
SDValue Result = DAG.getNode(ISD::SHL, SL, VT, LHS, ShiftAmt);
SDValue Overflow = DAG.getSetCC(SL, MVT::i1,
DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
SL, VT, Result, ShiftAmt),
LHS, ISD::SETNE);
return DAG.getMergeValues({ Result, Overflow }, SL);
}
}
SDValue Result = DAG.getNode(ISD::MUL, SL, VT, LHS, RHS);
SDValue Top = DAG.getNode(isSigned ? ISD::MULHS : ISD::MULHU,
SL, VT, LHS, RHS);
SDValue Sign = isSigned
? DAG.getNode(ISD::SRA, SL, VT, Result,
DAG.getConstant(VT.getScalarSizeInBits() - 1, SL, MVT::i32))
: DAG.getConstant(0, SL, VT);
SDValue Overflow = DAG.getSetCC(SL, MVT::i1, Top, Sign, ISD::SETNE);
return DAG.getMergeValues({ Result, Overflow }, SL);
}
SDValue SITargetLowering::lowerXMUL_LOHI(SDValue Op, SelectionDAG &DAG) const {
if (Op->isDivergent()) {
// Select to V_MAD_[IU]64_[IU]32.
return Op;
}
if (Subtarget->hasSMulHi()) {
// Expand to S_MUL_I32 + S_MUL_HI_[IU]32.
return SDValue();
}
// The multiply is uniform but we would have to use V_MUL_HI_[IU]32 to
// calculate the high part, so we might as well do the whole thing with
// V_MAD_[IU]64_[IU]32.
return Op;
}
SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const {
if (!Subtarget->isTrapHandlerEnabled() ||
Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA)
return lowerTrapEndpgm(Op, DAG);
if (Optional<uint8_t> HsaAbiVer = AMDGPU::getHsaAbiVersion(Subtarget)) {
switch (*HsaAbiVer) {
case ELF::ELFABIVERSION_AMDGPU_HSA_V2:
case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
return lowerTrapHsaQueuePtr(Op, DAG);
case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
return Subtarget->supportsGetDoorbellID() ?
lowerTrapHsa(Op, DAG) : lowerTrapHsaQueuePtr(Op, DAG);
}
}
llvm_unreachable("Unknown trap handler");
}
SDValue SITargetLowering::lowerTrapEndpgm(
SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue Chain = Op.getOperand(0);
return DAG.getNode(AMDGPUISD::ENDPGM, SL, MVT::Other, Chain);
}
SDValue SITargetLowering::loadImplicitKernelArgument(SelectionDAG &DAG, MVT VT,
const SDLoc &DL, Align Alignment, ImplicitParameter Param) const {
MachineFunction &MF = DAG.getMachineFunction();
uint64_t Offset = getImplicitParameterOffset(MF, Param);
SDValue Ptr = lowerKernArgParameterPtr(DAG, DL, DAG.getEntryNode(), Offset);
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
return DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, PtrInfo, Alignment,
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant);
}
SDValue SITargetLowering::lowerTrapHsaQueuePtr(
SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue Chain = Op.getOperand(0);
SDValue QueuePtr;
// For code object version 5, QueuePtr is passed through implicit kernarg.
if (AMDGPU::getAmdhsaCodeObjectVersion() == 5) {
QueuePtr =
loadImplicitKernelArgument(DAG, MVT::i64, SL, Align(8), QUEUE_PTR);
} else {
MachineFunction &MF = DAG.getMachineFunction();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
Register UserSGPR = Info->getQueuePtrUserSGPR();
if (UserSGPR == AMDGPU::NoRegister) {
// We probably are in a function incorrectly marked with
// amdgpu-no-queue-ptr. This is undefined. We don't want to delete the
// trap, so just use a null pointer.
QueuePtr = DAG.getConstant(0, SL, MVT::i64);
} else {
QueuePtr = CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, UserSGPR,
MVT::i64);
}
}
SDValue SGPR01 = DAG.getRegister(AMDGPU::SGPR0_SGPR1, MVT::i64);
SDValue ToReg = DAG.getCopyToReg(Chain, SL, SGPR01,
QueuePtr, SDValue());
uint64_t TrapID = static_cast<uint64_t>(GCNSubtarget::TrapID::LLVMAMDHSATrap);
SDValue Ops[] = {
ToReg,
DAG.getTargetConstant(TrapID, SL, MVT::i16),
SGPR01,
ToReg.getValue(1)
};
return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops);
}
SDValue SITargetLowering::lowerTrapHsa(
SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue Chain = Op.getOperand(0);
uint64_t TrapID = static_cast<uint64_t>(GCNSubtarget::TrapID::LLVMAMDHSATrap);
SDValue Ops[] = {
Chain,
DAG.getTargetConstant(TrapID, SL, MVT::i16)
};
return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops);
}
SDValue SITargetLowering::lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue Chain = Op.getOperand(0);
MachineFunction &MF = DAG.getMachineFunction();
if (!Subtarget->isTrapHandlerEnabled() ||
Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA) {
DiagnosticInfoUnsupported NoTrap(MF.getFunction(),
"debugtrap handler not supported",
Op.getDebugLoc(),
DS_Warning);
LLVMContext &Ctx = MF.getFunction().getContext();
Ctx.diagnose(NoTrap);
return Chain;
}
uint64_t TrapID = static_cast<uint64_t>(GCNSubtarget::TrapID::LLVMAMDHSADebugTrap);
SDValue Ops[] = {
Chain,
DAG.getTargetConstant(TrapID, SL, MVT::i16)
};
return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops);
}
SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
SelectionDAG &DAG) const {
// FIXME: Use inline constants (src_{shared, private}_base) instead.
if (Subtarget->hasApertureRegs()) {
unsigned Offset = AS == AMDGPUAS::LOCAL_ADDRESS ?
AMDGPU::Hwreg::OFFSET_SRC_SHARED_BASE :
AMDGPU::Hwreg::OFFSET_SRC_PRIVATE_BASE;
unsigned WidthM1 = AS == AMDGPUAS::LOCAL_ADDRESS ?
AMDGPU::Hwreg::WIDTH_M1_SRC_SHARED_BASE :
AMDGPU::Hwreg::WIDTH_M1_SRC_PRIVATE_BASE;
unsigned Encoding =
AMDGPU::Hwreg::ID_MEM_BASES << AMDGPU::Hwreg::ID_SHIFT_ |
Offset << AMDGPU::Hwreg::OFFSET_SHIFT_ |
WidthM1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_;
SDValue EncodingImm = DAG.getTargetConstant(Encoding, DL, MVT::i16);
SDValue ApertureReg = SDValue(
DAG.getMachineNode(AMDGPU::S_GETREG_B32, DL, MVT::i32, EncodingImm), 0);
SDValue ShiftAmount = DAG.getTargetConstant(WidthM1 + 1, DL, MVT::i32);
return DAG.getNode(ISD::SHL, DL, MVT::i32, ApertureReg, ShiftAmount);
}
// For code object version 5, private_base and shared_base are passed through
// implicit kernargs.
if (AMDGPU::getAmdhsaCodeObjectVersion() == 5) {
ImplicitParameter Param =
(AS == AMDGPUAS::LOCAL_ADDRESS) ? SHARED_BASE : PRIVATE_BASE;
return loadImplicitKernelArgument(DAG, MVT::i32, DL, Align(4), Param);
}
MachineFunction &MF = DAG.getMachineFunction();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
Register UserSGPR = Info->getQueuePtrUserSGPR();
if (UserSGPR == AMDGPU::NoRegister) {
// We probably are in a function incorrectly marked with
// amdgpu-no-queue-ptr. This is undefined.
return DAG.getUNDEF(MVT::i32);
}
SDValue QueuePtr = CreateLiveInRegister(
DAG, &AMDGPU::SReg_64RegClass, UserSGPR, MVT::i64);
// Offset into amd_queue_t for group_segment_aperture_base_hi /
// private_segment_aperture_base_hi.
uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44;
SDValue Ptr =
DAG.getObjectPtrOffset(DL, QueuePtr, TypeSize::Fixed(StructOffset));
// TODO: Use custom target PseudoSourceValue.
// TODO: We should use the value from the IR intrinsic call, but it might not
// be available and how do we get it?
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
return DAG.getLoad(MVT::i32, DL, QueuePtr.getValue(1), Ptr, PtrInfo,
commonAlignment(Align(64), StructOffset),
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant);
}
/// Return true if the value is a known valid address, such that a null check is
/// not necessary.
static bool isKnownNonNull(SDValue Val, SelectionDAG &DAG,
const AMDGPUTargetMachine &TM, unsigned AddrSpace) {
if (isa<FrameIndexSDNode>(Val) || isa<GlobalAddressSDNode>(Val) ||
isa<BasicBlockSDNode>(Val))
return true;
if (auto *ConstVal = dyn_cast<ConstantSDNode>(Val))
return ConstVal->getSExtValue() != TM.getNullPointerValue(AddrSpace);
// TODO: Search through arithmetic, handle arguments and loads
// marked nonnull.
return false;
}
SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
SelectionDAG &DAG) const {
SDLoc SL(Op);
const AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(Op);
SDValue Src = ASC->getOperand(0);
SDValue FlatNullPtr = DAG.getConstant(0, SL, MVT::i64);
unsigned SrcAS = ASC->getSrcAddressSpace();
const AMDGPUTargetMachine &TM =
static_cast<const AMDGPUTargetMachine &>(getTargetMachine());
// flat -> local/private
if (SrcAS == AMDGPUAS::FLAT_ADDRESS) {
unsigned DestAS = ASC->getDestAddressSpace();
if (DestAS == AMDGPUAS::LOCAL_ADDRESS ||
DestAS == AMDGPUAS::PRIVATE_ADDRESS) {
SDValue Ptr = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, Src);
if (isKnownNonNull(Src, DAG, TM, SrcAS))
return Ptr;
unsigned NullVal = TM.getNullPointerValue(DestAS);
SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32);
SDValue NonNull = DAG.getSetCC(SL, MVT::i1, Src, FlatNullPtr, ISD::SETNE);
return DAG.getNode(ISD::SELECT, SL, MVT::i32, NonNull, Ptr,
SegmentNullPtr);
}
}
// local/private -> flat
if (ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) {
if (SrcAS == AMDGPUAS::LOCAL_ADDRESS ||
SrcAS == AMDGPUAS::PRIVATE_ADDRESS) {
SDValue Aperture = getSegmentAperture(ASC->getSrcAddressSpace(), SL, DAG);
SDValue CvtPtr =
DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, Src, Aperture);
CvtPtr = DAG.getNode(ISD::BITCAST, SL, MVT::i64, CvtPtr);
if (isKnownNonNull(Src, DAG, TM, SrcAS))
return CvtPtr;
unsigned NullVal = TM.getNullPointerValue(SrcAS);
SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32);
SDValue NonNull
= DAG.getSetCC(SL, MVT::i1, Src, SegmentNullPtr, ISD::SETNE);
return DAG.getNode(ISD::SELECT, SL, MVT::i64, NonNull, CvtPtr,
FlatNullPtr);
}
}
if (SrcAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT &&
Op.getValueType() == MVT::i64) {
const SIMachineFunctionInfo *Info =
DAG.getMachineFunction().getInfo<SIMachineFunctionInfo>();
SDValue Hi = DAG.getConstant(Info->get32BitAddressHighBits(), SL, MVT::i32);
SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, Src, Hi);
return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);
}
if (ASC->getDestAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT &&
Src.getValueType() == MVT::i64)
return DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, Src);
// global <-> flat are no-ops and never emitted.
const MachineFunction &MF = DAG.getMachineFunction();
DiagnosticInfoUnsupported InvalidAddrSpaceCast(
MF.getFunction(), "invalid addrspacecast", SL.getDebugLoc());
DAG.getContext()->diagnose(InvalidAddrSpaceCast);
return DAG.getUNDEF(ASC->getValueType(0));
}
// This lowers an INSERT_SUBVECTOR by extracting the individual elements from
// the small vector and inserting them into the big vector. That is better than
// the default expansion of doing it via a stack slot. Even though the use of
// the stack slot would be optimized away afterwards, the stack slot itself
// remains.
SDValue SITargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDValue Vec = Op.getOperand(0);
SDValue Ins = Op.getOperand(1);
SDValue Idx = Op.getOperand(2);
EVT VecVT = Vec.getValueType();
EVT InsVT = Ins.getValueType();
EVT EltVT = VecVT.getVectorElementType();
unsigned InsNumElts = InsVT.getVectorNumElements();
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
SDLoc SL(Op);
for (unsigned I = 0; I != InsNumElts; ++I) {
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Ins,
DAG.getConstant(I, SL, MVT::i32));
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, SL, VecVT, Vec, Elt,
DAG.getConstant(IdxVal + I, SL, MVT::i32));
}
return Vec;
}
SDValue SITargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
SDValue Vec = Op.getOperand(0);
SDValue InsVal = Op.getOperand(1);
SDValue Idx = Op.getOperand(2);
EVT VecVT = Vec.getValueType();
EVT EltVT = VecVT.getVectorElementType();
unsigned VecSize = VecVT.getSizeInBits();
unsigned EltSize = EltVT.getSizeInBits();
SDLoc SL(Op);
// Specially handle the case of v4i16 with static indexing.
unsigned NumElts = VecVT.getVectorNumElements();
auto KIdx = dyn_cast<ConstantSDNode>(Idx);
if (NumElts == 4 && EltSize == 16 && KIdx) {
SDValue BCVec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Vec);
SDValue LoHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BCVec,
DAG.getConstant(0, SL, MVT::i32));
SDValue HiHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BCVec,
DAG.getConstant(1, SL, MVT::i32));
SDValue LoVec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i16, LoHalf);
SDValue HiVec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i16, HiHalf);
unsigned Idx = KIdx->getZExtValue();
bool InsertLo = Idx < 2;
SDValue InsHalf = DAG.getNode(ISD::INSERT_VECTOR_ELT, SL, MVT::v2i16,
InsertLo ? LoVec : HiVec,
DAG.getNode(ISD::BITCAST, SL, MVT::i16, InsVal),
DAG.getConstant(InsertLo ? Idx : (Idx - 2), SL, MVT::i32));
InsHalf = DAG.getNode(ISD::BITCAST, SL, MVT::i32, InsHalf);
SDValue Concat = InsertLo ?
DAG.getBuildVector(MVT::v2i32, SL, { InsHalf, HiHalf }) :
DAG.getBuildVector(MVT::v2i32, SL, { LoHalf, InsHalf });
return DAG.getNode(ISD::BITCAST, SL, VecVT, Concat);
}
// Static indexing does not lower to stack access, and hence there is no need
// for special custom lowering to avoid stack access.
if (isa<ConstantSDNode>(Idx))
return SDValue();
// Avoid stack access for dynamic indexing by custom lowering to
// v_bfi_b32 (v_bfm_b32 16, (shl idx, 16)), val, vec
assert(VecSize <= 64 && "Expected target vector size to be <= 64 bits");
MVT IntVT = MVT::getIntegerVT(VecSize);
// Convert vector index to bit-index and get the required bit mask.
assert(isPowerOf2_32(EltSize));
SDValue ScaleFactor = DAG.getConstant(Log2_32(EltSize), SL, MVT::i32);
SDValue ScaledIdx = DAG.getNode(ISD::SHL, SL, MVT::i32, Idx, ScaleFactor);
SDValue BFM = DAG.getNode(ISD::SHL, SL, IntVT,
DAG.getConstant(0xffff, SL, IntVT),
ScaledIdx);
// 1. Create a congruent vector with the target value in each element.
SDValue ExtVal = DAG.getNode(ISD::BITCAST, SL, IntVT,
DAG.getSplatBuildVector(VecVT, SL, InsVal));
// 2. Mask off all other indicies except the required index within (1).
SDValue LHS = DAG.getNode(ISD::AND, SL, IntVT, BFM, ExtVal);
// 3. Mask off the required index within the target vector.
SDValue BCVec = DAG.getNode(ISD::BITCAST, SL, IntVT, Vec);
SDValue RHS = DAG.getNode(ISD::AND, SL, IntVT,
DAG.getNOT(SL, BFM, IntVT), BCVec);
// 4. Get (2) and (3) ORed into the target vector.
SDValue BFI = DAG.getNode(ISD::OR, SL, IntVT, LHS, RHS);
return DAG.getNode(ISD::BITCAST, SL, VecVT, BFI);
}
SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
SDLoc SL(Op);
EVT ResultVT = Op.getValueType();
SDValue Vec = Op.getOperand(0);
SDValue Idx = Op.getOperand(1);
EVT VecVT = Vec.getValueType();
unsigned VecSize = VecVT.getSizeInBits();
EVT EltVT = VecVT.getVectorElementType();
DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
// Make sure we do any optimizations that will make it easier to fold
// source modifiers before obscuring it with bit operations.
// XXX - Why doesn't this get called when vector_shuffle is expanded?
if (SDValue Combined = performExtractVectorEltCombine(Op.getNode(), DCI))
return Combined;
if (VecSize == 128 || VecSize == 256) {
SDValue Lo, Hi;
EVT LoVT, HiVT;
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT);
if (VecSize == 128) {
SDValue V2 = DAG.getBitcast(MVT::v2i64, Vec);
Lo = DAG.getBitcast(LoVT,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i64, V2,
DAG.getConstant(0, SL, MVT::i32)));
Hi = DAG.getBitcast(HiVT,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i64, V2,
DAG.getConstant(1, SL, MVT::i32)));
} else {
assert(VecSize == 256);
SDValue V2 = DAG.getBitcast(MVT::v4i64, Vec);
SDValue Parts[4];
for (unsigned P = 0; P < 4; ++P) {
Parts[P] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i64, V2,
DAG.getConstant(P, SL, MVT::i32));
}
Lo = DAG.getBitcast(LoVT, DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i64,
Parts[0], Parts[1]));
Hi = DAG.getBitcast(HiVT, DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i64,
Parts[2], Parts[3]));
}
EVT IdxVT = Idx.getValueType();
unsigned NElem = VecVT.getVectorNumElements();
assert(isPowerOf2_32(NElem));
SDValue IdxMask = DAG.getConstant(NElem / 2 - 1, SL, IdxVT);
SDValue NewIdx = DAG.getNode(ISD::AND, SL, IdxVT, Idx, IdxMask);
SDValue Half = DAG.getSelectCC(SL, Idx, IdxMask, Hi, Lo, ISD::SETUGT);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Half, NewIdx);
}
assert(VecSize <= 64);
MVT IntVT = MVT::getIntegerVT(VecSize);
// If Vec is just a SCALAR_TO_VECTOR, then use the scalar integer directly.
SDValue VecBC = peekThroughBitcasts(Vec);
if (VecBC.getOpcode() == ISD::SCALAR_TO_VECTOR) {
SDValue Src = VecBC.getOperand(0);
Src = DAG.getBitcast(Src.getValueType().changeTypeToInteger(), Src);
Vec = DAG.getAnyExtOrTrunc(Src, SL, IntVT);
}
unsigned EltSize = EltVT.getSizeInBits();
assert(isPowerOf2_32(EltSize));
SDValue ScaleFactor = DAG.getConstant(Log2_32(EltSize), SL, MVT::i32);
// Convert vector index to bit-index (* EltSize)
SDValue ScaledIdx = DAG.getNode(ISD::SHL, SL, MVT::i32, Idx, ScaleFactor);
SDValue BC = DAG.getNode(ISD::BITCAST, SL, IntVT, Vec);
SDValue Elt = DAG.getNode(ISD::SRL, SL, IntVT, BC, ScaledIdx);
if (ResultVT == MVT::f16) {
SDValue Result = DAG.getNode(ISD::TRUNCATE, SL, MVT::i16, Elt);
return DAG.getNode(ISD::BITCAST, SL, ResultVT, Result);
}
return DAG.getAnyExtOrTrunc(Elt, SL, ResultVT);
}
static bool elementPairIsContiguous(ArrayRef<int> Mask, int Elt) {
assert(Elt % 2 == 0);
return Mask[Elt + 1] == Mask[Elt] + 1 && (Mask[Elt] % 2 == 0);
}
SDValue SITargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc SL(Op);
EVT ResultVT = Op.getValueType();
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
EVT PackVT = ResultVT.isInteger() ? MVT::v2i16 : MVT::v2f16;
EVT EltVT = PackVT.getVectorElementType();
int SrcNumElts = Op.getOperand(0).getValueType().getVectorNumElements();
// vector_shuffle <0,1,6,7> lhs, rhs
// -> concat_vectors (extract_subvector lhs, 0), (extract_subvector rhs, 2)
//
// vector_shuffle <6,7,2,3> lhs, rhs
// -> concat_vectors (extract_subvector rhs, 2), (extract_subvector lhs, 2)
//
// vector_shuffle <6,7,0,1> lhs, rhs
// -> concat_vectors (extract_subvector rhs, 2), (extract_subvector lhs, 0)
// Avoid scalarizing when both halves are reading from consecutive elements.
SmallVector<SDValue, 4> Pieces;
for (int I = 0, N = ResultVT.getVectorNumElements(); I != N; I += 2) {
if (elementPairIsContiguous(SVN->getMask(), I)) {
const int Idx = SVN->getMaskElt(I);
int VecIdx = Idx < SrcNumElts ? 0 : 1;
int EltIdx = Idx < SrcNumElts ? Idx : Idx - SrcNumElts;
SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SL,
PackVT, SVN->getOperand(VecIdx),
DAG.getConstant(EltIdx, SL, MVT::i32));
Pieces.push_back(SubVec);
} else {
const int Idx0 = SVN->getMaskElt(I);
const int Idx1 = SVN->getMaskElt(I + 1);
int VecIdx0 = Idx0 < SrcNumElts ? 0 : 1;
int VecIdx1 = Idx1 < SrcNumElts ? 0 : 1;
int EltIdx0 = Idx0 < SrcNumElts ? Idx0 : Idx0 - SrcNumElts;
int EltIdx1 = Idx1 < SrcNumElts ? Idx1 : Idx1 - SrcNumElts;
SDValue Vec0 = SVN->getOperand(VecIdx0);
SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
Vec0, DAG.getConstant(EltIdx0, SL, MVT::i32));
SDValue Vec1 = SVN->getOperand(VecIdx1);
SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
Vec1, DAG.getConstant(EltIdx1, SL, MVT::i32));
Pieces.push_back(DAG.getBuildVector(PackVT, SL, { Elt0, Elt1 }));
}
}
return DAG.getNode(ISD::CONCAT_VECTORS, SL, ResultVT, Pieces);
}
SDValue SITargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDValue SVal = Op.getOperand(0);
EVT ResultVT = Op.getValueType();
EVT SValVT = SVal.getValueType();
SDValue UndefVal = DAG.getUNDEF(SValVT);
SDLoc SL(Op);
SmallVector<SDValue, 8> VElts;
VElts.push_back(SVal);
for (int I = 1, E = ResultVT.getVectorNumElements(); I < E; ++I)
VElts.push_back(UndefVal);
return DAG.getBuildVector(ResultVT, SL, VElts);
}
SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDLoc SL(Op);
EVT VT = Op.getValueType();
if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
VT == MVT::v8i16 || VT == MVT::v8f16) {
EVT HalfVT = MVT::getVectorVT(VT.getVectorElementType().getSimpleVT(),
VT.getVectorNumElements() / 2);
MVT HalfIntVT = MVT::getIntegerVT(HalfVT.getSizeInBits());
// Turn into pair of packed build_vectors.
// TODO: Special case for constants that can be materialized with s_mov_b64.
SmallVector<SDValue, 4> LoOps, HiOps;
for (unsigned I = 0, E = VT.getVectorNumElements() / 2; I != E; ++I) {
LoOps.push_back(Op.getOperand(I));
HiOps.push_back(Op.getOperand(I + E));
}
SDValue Lo = DAG.getBuildVector(HalfVT, SL, LoOps);
SDValue Hi = DAG.getBuildVector(HalfVT, SL, HiOps);
SDValue CastLo = DAG.getNode(ISD::BITCAST, SL, HalfIntVT, Lo);
SDValue CastHi = DAG.getNode(ISD::BITCAST, SL, HalfIntVT, Hi);
SDValue Blend = DAG.getBuildVector(MVT::getVectorVT(HalfIntVT, 2), SL,
{ CastLo, CastHi });
return DAG.getNode(ISD::BITCAST, SL, VT, Blend);
}
if (VT == MVT::v16i16 || VT == MVT::v16f16) {
EVT QuarterVT = MVT::getVectorVT(VT.getVectorElementType().getSimpleVT(),
VT.getVectorNumElements() / 4);
MVT QuarterIntVT = MVT::getIntegerVT(QuarterVT.getSizeInBits());
SmallVector<SDValue, 4> Parts[4];
for (unsigned I = 0, E = VT.getVectorNumElements() / 4; I != E; ++I) {
for (unsigned P = 0; P < 4; ++P)
Parts[P].push_back(Op.getOperand(I + P * E));
}
SDValue Casts[4];
for (unsigned P = 0; P < 4; ++P) {
SDValue Vec = DAG.getBuildVector(QuarterVT, SL, Parts[P]);
Casts[P] = DAG.getNode(ISD::BITCAST, SL, QuarterIntVT, Vec);
}
SDValue Blend =
DAG.getBuildVector(MVT::getVectorVT(QuarterIntVT, 4), SL, Casts);
return DAG.getNode(ISD::BITCAST, SL, VT, Blend);
}
assert(VT == MVT::v2f16 || VT == MVT::v2i16);
assert(!Subtarget->hasVOP3PInsts() && "this should be legal");
SDValue Lo = Op.getOperand(0);
SDValue Hi = Op.getOperand(1);
// Avoid adding defined bits with the zero_extend.
if (Hi.isUndef()) {
Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo);
SDValue ExtLo = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, Lo);
return DAG.getNode(ISD::BITCAST, SL, VT, ExtLo);
}
Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Hi);
Hi = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Hi);
SDValue ShlHi = DAG.getNode(ISD::SHL, SL, MVT::i32, Hi,
DAG.getConstant(16, SL, MVT::i32));
if (Lo.isUndef())
return DAG.getNode(ISD::BITCAST, SL, VT, ShlHi);
Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo);
Lo = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Lo);
SDValue Or = DAG.getNode(ISD::OR, SL, MVT::i32, Lo, ShlHi);
return DAG.getNode(ISD::BITCAST, SL, VT, Or);
}
bool
SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// We can fold offsets for anything that doesn't require a GOT relocation.
return (GA->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
!shouldEmitGOTReloc(GA->getGlobal());
}
static SDValue
buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV,
const SDLoc &DL, int64_t Offset, EVT PtrVT,
unsigned GAFlags = SIInstrInfo::MO_NONE) {
assert(isInt<32>(Offset + 4) && "32-bit offset is expected!");
// In order to support pc-relative addressing, the PC_ADD_REL_OFFSET SDNode is
// lowered to the following code sequence:
//
// For constant address space:
// s_getpc_b64 s[0:1]
// s_add_u32 s0, s0, $symbol
// s_addc_u32 s1, s1, 0
//
// s_getpc_b64 returns the address of the s_add_u32 instruction and then
// a fixup or relocation is emitted to replace $symbol with a literal
// constant, which is a pc-relative offset from the encoding of the $symbol
// operand to the global variable.
//
// For global address space:
// s_getpc_b64 s[0:1]
// s_add_u32 s0, s0, $symbol@{gotpc}rel32@lo
// s_addc_u32 s1, s1, $symbol@{gotpc}rel32@hi
//
// s_getpc_b64 returns the address of the s_add_u32 instruction and then
// fixups or relocations are emitted to replace $symbol@*@lo and
// $symbol@*@hi with lower 32 bits and higher 32 bits of a literal constant,
// which is a 64-bit pc-relative offset from the encoding of the $symbol
// operand to the global variable.
//
// What we want here is an offset from the value returned by s_getpc
// (which is the address of the s_add_u32 instruction) to the global
// variable, but since the encoding of $symbol starts 4 bytes after the start
// of the s_add_u32 instruction, we end up with an offset that is 4 bytes too
// small. This requires us to add 4 to the global variable offset in order to
// compute the correct address. Similarly for the s_addc_u32 instruction, the
// encoding of $symbol starts 12 bytes after the start of the s_add_u32
// instruction.
SDValue PtrLo =
DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 4, GAFlags);
SDValue PtrHi;
if (GAFlags == SIInstrInfo::MO_NONE) {
PtrHi = DAG.getTargetConstant(0, DL, MVT::i32);
} else {
PtrHi =
DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 12, GAFlags + 1);
}
return DAG.getNode(AMDGPUISD::PC_ADD_REL_OFFSET, DL, PtrVT, PtrLo, PtrHi);
}
SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
SDValue Op,
SelectionDAG &DAG) const {
GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
SDLoc DL(GSD);
EVT PtrVT = Op.getValueType();
const GlobalValue *GV = GSD->getGlobal();
if ((GSD->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
shouldUseLDSConstAddress(GV)) ||
GSD->getAddressSpace() == AMDGPUAS::REGION_ADDRESS ||
GSD->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
if (GSD->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
GV->hasExternalLinkage()) {
Type *Ty = GV->getValueType();
// HIP uses an unsized array `extern __shared__ T s[]` or similar
// zero-sized type in other languages to declare the dynamic shared
// memory which size is not known at the compile time. They will be
// allocated by the runtime and placed directly after the static
// allocated ones. They all share the same offset.
if (DAG.getDataLayout().getTypeAllocSize(Ty).isZero()) {
assert(PtrVT == MVT::i32 && "32-bit pointer is expected.");
// Adjust alignment for that dynamic shared memory array.
MFI->setDynLDSAlign(DAG.getDataLayout(), *cast<GlobalVariable>(GV));
return SDValue(
DAG.getMachineNode(AMDGPU::GET_GROUPSTATICSIZE, DL, PtrVT), 0);
}
}
return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
}
if (GSD->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, GSD->getOffset(),
SIInstrInfo::MO_ABS32_LO);
return DAG.getNode(AMDGPUISD::LDS, DL, MVT::i32, GA);
}
if (shouldEmitFixup(GV))
return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT);
else if (shouldEmitPCReloc(GV))
return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT,
SIInstrInfo::MO_REL32);
SDValue GOTAddr = buildPCRelGlobalAddress(DAG, GV, DL, 0, PtrVT,
SIInstrInfo::MO_GOTPCREL32);
Type *Ty = PtrVT.getTypeForEVT(*DAG.getContext());
PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
const DataLayout &DataLayout = DAG.getDataLayout();
Align Alignment = DataLayout.getABITypeAlign(PtrTy);
MachinePointerInfo PtrInfo
= MachinePointerInfo::getGOT(DAG.getMachineFunction());
return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), GOTAddr, PtrInfo, Alignment,
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant);
}
SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain,
const SDLoc &DL, SDValue V) const {
// We can't use S_MOV_B32 directly, because there is no way to specify m0 as
// the destination register.
//
// We can't use CopyToReg, because MachineCSE won't combine COPY instructions,
// so we will end up with redundant moves to m0.
//
// We use a pseudo to ensure we emit s_mov_b32 with m0 as the direct result.
// A Null SDValue creates a glue result.
SDNode *M0 = DAG.getMachineNode(AMDGPU::SI_INIT_M0, DL, MVT::Other, MVT::Glue,
V, Chain);
return SDValue(M0, 0);
}
SDValue SITargetLowering::lowerImplicitZextParam(SelectionDAG &DAG,
SDValue Op,
MVT VT,
unsigned Offset) const {
SDLoc SL(Op);
SDValue Param = lowerKernargMemParameter(
DAG, MVT::i32, MVT::i32, SL, DAG.getEntryNode(), Offset, Align(4), false);
// The local size values will have the hi 16-bits as zero.
return DAG.getNode(ISD::AssertZext, SL, MVT::i32, Param,
DAG.getValueType(VT));
}
static SDValue emitNonHSAIntrinsicError(SelectionDAG &DAG, const SDLoc &DL,
EVT VT) {
DiagnosticInfoUnsupported BadIntrin(DAG.getMachineFunction().getFunction(),
"non-hsa intrinsic with hsa target",
DL.getDebugLoc());
DAG.getContext()->diagnose(BadIntrin);
return DAG.getUNDEF(VT);
}
static SDValue emitRemovedIntrinsicError(SelectionDAG &DAG, const SDLoc &DL,
EVT VT) {
DiagnosticInfoUnsupported BadIntrin(DAG.getMachineFunction().getFunction(),
"intrinsic not supported on subtarget",
DL.getDebugLoc());
DAG.getContext()->diagnose(BadIntrin);
return DAG.getUNDEF(VT);
}
static SDValue getBuildDwordsVector(SelectionDAG &DAG, SDLoc DL,
ArrayRef<SDValue> Elts) {
assert(!Elts.empty());
MVT Type;
unsigned NumElts = Elts.size();
if (NumElts <= 8) {
Type = MVT::getVectorVT(MVT::f32, NumElts);
} else {
assert(Elts.size() <= 16);
Type = MVT::v16f32;
NumElts = 16;
}
SmallVector<SDValue, 16> VecElts(NumElts);
for (unsigned i = 0; i < Elts.size(); ++i) {
SDValue Elt = Elts[i];
if (Elt.getValueType() != MVT::f32)
Elt = DAG.getBitcast(MVT::f32, Elt);
VecElts[i] = Elt;
}
for (unsigned i = Elts.size(); i < NumElts; ++i)
VecElts[i] = DAG.getUNDEF(MVT::f32);
if (NumElts == 1)
return VecElts[0];
return DAG.getBuildVector(Type, DL, VecElts);
}
static SDValue padEltsToUndef(SelectionDAG &DAG, const SDLoc &DL, EVT CastVT,
SDValue Src, int ExtraElts) {
EVT SrcVT = Src.getValueType();
SmallVector<SDValue, 8> Elts;
if (SrcVT.isVector())
DAG.ExtractVectorElements(Src, Elts);
else
Elts.push_back(Src);
SDValue Undef = DAG.getUNDEF(SrcVT.getScalarType());
while (ExtraElts--)
Elts.push_back(Undef);
return DAG.getBuildVector(CastVT, DL, Elts);
}
// Re-construct the required return value for a image load intrinsic.
// This is more complicated due to the optional use TexFailCtrl which means the required
// return type is an aggregate
static SDValue constructRetValue(SelectionDAG &DAG,
MachineSDNode *Result,
ArrayRef<EVT> ResultTypes,
bool IsTexFail, bool Unpacked, bool IsD16,
int DMaskPop, int NumVDataDwords,
const SDLoc &DL) {
// Determine the required return type. This is the same regardless of IsTexFail flag
EVT ReqRetVT = ResultTypes[0];
int ReqRetNumElts = ReqRetVT.isVector() ? ReqRetVT.getVectorNumElements() : 1;
int NumDataDwords = (!IsD16 || (IsD16 && Unpacked)) ?
ReqRetNumElts : (ReqRetNumElts + 1) / 2;
int MaskPopDwords = (!IsD16 || (IsD16 && Unpacked)) ?
DMaskPop : (DMaskPop + 1) / 2;
MVT DataDwordVT = NumDataDwords == 1 ?
MVT::i32 : MVT::getVectorVT(MVT::i32, NumDataDwords);
MVT MaskPopVT = MaskPopDwords == 1 ?
MVT::i32 : MVT::getVectorVT(MVT::i32, MaskPopDwords);
SDValue Data(Result, 0);
SDValue TexFail;
if (DMaskPop > 0 && Data.getValueType() != MaskPopVT) {
SDValue ZeroIdx = DAG.getConstant(0, DL, MVT::i32);
if (MaskPopVT.isVector()) {
Data = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MaskPopVT,
SDValue(Result, 0), ZeroIdx);
} else {
Data = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MaskPopVT,
SDValue(Result, 0), ZeroIdx);
}
}
if (DataDwordVT.isVector())
Data = padEltsToUndef(DAG, DL, DataDwordVT, Data,
NumDataDwords - MaskPopDwords);
if (IsD16)
Data = adjustLoadValueTypeImpl(Data, ReqRetVT, DL, DAG, Unpacked);
EVT LegalReqRetVT = ReqRetVT;
if (!ReqRetVT.isVector()) {
if (!Data.getValueType().isInteger())
Data = DAG.getNode(ISD::BITCAST, DL,
Data.getValueType().changeTypeToInteger(), Data);
Data = DAG.getNode(ISD::TRUNCATE, DL, ReqRetVT.changeTypeToInteger(), Data);
} else {
// We need to widen the return vector to a legal type
if ((ReqRetVT.getVectorNumElements() % 2) == 1 &&
ReqRetVT.getVectorElementType().getSizeInBits() == 16) {
LegalReqRetVT =
EVT::getVectorVT(*DAG.getContext(), ReqRetVT.getVectorElementType(),
ReqRetVT.getVectorNumElements() + 1);
}
}
Data = DAG.getNode(ISD::BITCAST, DL, LegalReqRetVT, Data);
if (IsTexFail) {
TexFail =
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, SDValue(Result, 0),
DAG.getConstant(MaskPopDwords, DL, MVT::i32));
return DAG.getMergeValues({Data, TexFail, SDValue(Result, 1)}, DL);
}
if (Result->getNumValues() == 1)
return Data;
return DAG.getMergeValues({Data, SDValue(Result, 1)}, DL);
}
static bool parseTexFail(SDValue TexFailCtrl, SelectionDAG &DAG, SDValue *TFE,
SDValue *LWE, bool &IsTexFail) {
auto TexFailCtrlConst = cast<ConstantSDNode>(TexFailCtrl.getNode());
uint64_t Value = TexFailCtrlConst->getZExtValue();
if (Value) {
IsTexFail = true;
}
SDLoc DL(TexFailCtrlConst);
*TFE = DAG.getTargetConstant((Value & 0x1) ? 1 : 0, DL, MVT::i32);
Value &= ~(uint64_t)0x1;
*LWE = DAG.getTargetConstant((Value & 0x2) ? 1 : 0, DL, MVT::i32);
Value &= ~(uint64_t)0x2;
return Value == 0;
}
static void packImage16bitOpsToDwords(SelectionDAG &DAG, SDValue Op,
MVT PackVectorVT,
SmallVectorImpl<SDValue> &PackedAddrs,
unsigned DimIdx, unsigned EndIdx,
unsigned NumGradients) {
SDLoc DL(Op);
for (unsigned I = DimIdx; I < EndIdx; I++) {
SDValue Addr = Op.getOperand(I);
// Gradients are packed with undef for each coordinate.
// In <hi 16 bit>,<lo 16 bit> notation, the registers look like this:
// 1D: undef,dx/dh; undef,dx/dv
// 2D: dy/dh,dx/dh; dy/dv,dx/dv
// 3D: dy/dh,dx/dh; undef,dz/dh; dy/dv,dx/dv; undef,dz/dv
if (((I + 1) >= EndIdx) ||
((NumGradients / 2) % 2 == 1 && (I == DimIdx + (NumGradients / 2) - 1 ||
I == DimIdx + NumGradients - 1))) {
if (Addr.getValueType() != MVT::i16)
Addr = DAG.getBitcast(MVT::i16, Addr);
Addr = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Addr);
} else {
Addr = DAG.getBuildVector(PackVectorVT, DL, {Addr, Op.getOperand(I + 1)});
I++;
}
Addr = DAG.getBitcast(MVT::f32, Addr);
PackedAddrs.push_back(Addr);
}
}
SDValue SITargetLowering::lowerImage(SDValue Op,
const AMDGPU::ImageDimIntrinsicInfo *Intr,
SelectionDAG &DAG, bool WithChain) const {
SDLoc DL(Op);
MachineFunction &MF = DAG.getMachineFunction();
const GCNSubtarget* ST = &MF.getSubtarget<GCNSubtarget>();
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
unsigned IntrOpcode = Intr->BaseOpcode;
bool IsGFX10Plus = AMDGPU::isGFX10Plus(*Subtarget);
bool IsGFX11Plus = AMDGPU::isGFX11Plus(*Subtarget);
SmallVector<EVT, 3> ResultTypes(Op->values());
SmallVector<EVT, 3> OrigResultTypes(Op->values());
bool IsD16 = false;
bool IsG16 = false;
bool IsA16 = false;
SDValue VData;
int NumVDataDwords;
bool AdjustRetType = false;
// Offset of intrinsic arguments
const unsigned ArgOffset = WithChain ? 2 : 1;
unsigned DMask;
unsigned DMaskLanes = 0;
if (BaseOpcode->Atomic) {
VData = Op.getOperand(2);
bool Is64Bit = VData.getValueType() == MVT::i64;
if (BaseOpcode->AtomicX2) {
SDValue VData2 = Op.getOperand(3);
VData = DAG.getBuildVector(Is64Bit ? MVT::v2i64 : MVT::v2i32, DL,
{VData, VData2});
if (Is64Bit)
VData = DAG.getBitcast(MVT::v4i32, VData);
ResultTypes[0] = Is64Bit ? MVT::v2i64 : MVT::v2i32;
DMask = Is64Bit ? 0xf : 0x3;
NumVDataDwords = Is64Bit ? 4 : 2;
} else {
DMask = Is64Bit ? 0x3 : 0x1;
NumVDataDwords = Is64Bit ? 2 : 1;
}
} else {
auto *DMaskConst =
cast<ConstantSDNode>(Op.getOperand(ArgOffset + Intr->DMaskIndex));
DMask = DMaskConst->getZExtValue();
DMaskLanes = BaseOpcode->Gather4 ? 4 : countPopulation(DMask);
if (BaseOpcode->Store) {
VData = Op.getOperand(2);
MVT StoreVT = VData.getSimpleValueType();
if (StoreVT.getScalarType() == MVT::f16) {
if (!Subtarget->hasD16Images() || !BaseOpcode->HasD16)
return Op; // D16 is unsupported for this instruction
IsD16 = true;
VData = handleD16VData(VData, DAG, true);
}
NumVDataDwords = (VData.getValueType().getSizeInBits() + 31) / 32;
} else {
// Work out the num dwords based on the dmask popcount and underlying type
// and whether packing is supported.
MVT LoadVT = ResultTypes[0].getSimpleVT();
if (LoadVT.getScalarType() == MVT::f16) {
if (!Subtarget->hasD16Images() || !BaseOpcode->HasD16)
return Op; // D16 is unsupported for this instruction
IsD16 = true;
}
// Confirm that the return type is large enough for the dmask specified
if ((LoadVT.isVector() && LoadVT.getVectorNumElements() < DMaskLanes) ||
(!LoadVT.isVector() && DMaskLanes > 1))
return Op;
// The sq block of gfx8 and gfx9 do not estimate register use correctly
// for d16 image_gather4, image_gather4_l, and image_gather4_lz
// instructions.
if (IsD16 && !Subtarget->hasUnpackedD16VMem() &&
!(BaseOpcode->Gather4 && Subtarget->hasImageGather4D16Bug()))
NumVDataDwords = (DMaskLanes + 1) / 2;
else
NumVDataDwords = DMaskLanes;
AdjustRetType = true;
}
}
unsigned VAddrEnd = ArgOffset + Intr->VAddrEnd;
SmallVector<SDValue, 4> VAddrs;
// Check for 16 bit addresses or derivatives and pack if true.
MVT VAddrVT =
Op.getOperand(ArgOffset + Intr->GradientStart).getSimpleValueType();
MVT VAddrScalarVT = VAddrVT.getScalarType();
MVT GradPackVectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16;
IsG16 = VAddrScalarVT == MVT::f16 || VAddrScalarVT == MVT::i16;
VAddrVT = Op.getOperand(ArgOffset + Intr->CoordStart).getSimpleValueType();
VAddrScalarVT = VAddrVT.getScalarType();
MVT AddrPackVectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16;
IsA16 = VAddrScalarVT == MVT::f16 || VAddrScalarVT == MVT::i16;
// Push back extra arguments.
for (unsigned I = Intr->VAddrStart; I < Intr->GradientStart; I++) {
if (IsA16 && (Op.getOperand(ArgOffset + I).getValueType() == MVT::f16)) {
assert(I == Intr->BiasIndex && "Got unexpected 16-bit extra argument");
// Special handling of bias when A16 is on. Bias is of type half but
// occupies full 32-bit.
SDValue Bias = DAG.getBuildVector(
MVT::v2f16, DL,
{Op.getOperand(ArgOffset + I), DAG.getUNDEF(MVT::f16)});
VAddrs.push_back(Bias);
} else {
assert((!IsA16 || Intr->NumBiasArgs == 0 || I != Intr->BiasIndex) &&
"Bias needs to be converted to 16 bit in A16 mode");
VAddrs.push_back(Op.getOperand(ArgOffset + I));
}
}
if (BaseOpcode->Gradients && !ST->hasG16() && (IsA16 != IsG16)) {
// 16 bit gradients are supported, but are tied to the A16 control
// so both gradients and addresses must be 16 bit
LLVM_DEBUG(
dbgs() << "Failed to lower image intrinsic: 16 bit addresses "
"require 16 bit args for both gradients and addresses");
return Op;
}
if (IsA16) {
if (!ST->hasA16()) {
LLVM_DEBUG(dbgs() << "Failed to lower image intrinsic: Target does not "
"support 16 bit addresses\n");
return Op;
}
}
// We've dealt with incorrect input so we know that if IsA16, IsG16
// are set then we have to compress/pack operands (either address,
// gradient or both)
// In the case where a16 and gradients are tied (no G16 support) then we
// have already verified that both IsA16 and IsG16 are true
if (BaseOpcode->Gradients && IsG16 && ST->hasG16()) {
// Activate g16
const AMDGPU::MIMGG16MappingInfo *G16MappingInfo =
AMDGPU::getMIMGG16MappingInfo(Intr->BaseOpcode);
IntrOpcode = G16MappingInfo->G16; // set new opcode to variant with _g16
}
// Add gradients (packed or unpacked)
if (IsG16) {
// Pack the gradients
// const int PackEndIdx = IsA16 ? VAddrEnd : (ArgOffset + Intr->CoordStart);
packImage16bitOpsToDwords(DAG, Op, GradPackVectorVT, VAddrs,
ArgOffset + Intr->GradientStart,
ArgOffset + Intr->CoordStart, Intr->NumGradients);
} else {
for (unsigned I = ArgOffset + Intr->GradientStart;
I < ArgOffset + Intr->CoordStart; I++)
VAddrs.push_back(Op.getOperand(I));
}
// Add addresses (packed or unpacked)
if (IsA16) {
packImage16bitOpsToDwords(DAG, Op, AddrPackVectorVT, VAddrs,
ArgOffset + Intr->CoordStart, VAddrEnd,
0 /* No gradients */);
} else {
// Add uncompressed address
for (unsigned I = ArgOffset + Intr->CoordStart; I < VAddrEnd; I++)
VAddrs.push_back(Op.getOperand(I));
}
// If the register allocator cannot place the address registers contiguously
// without introducing moves, then using the non-sequential address encoding
// is always preferable, since it saves VALU instructions and is usually a
// wash in terms of code size or even better.
//
// However, we currently have no way of hinting to the register allocator that
// MIMG addresses should be placed contiguously when it is possible to do so,
// so force non-NSA for the common 2-address case as a heuristic.
//
// SIShrinkInstructions will convert NSA encodings to non-NSA after register
// allocation when possible.
//
// TODO: we can actually allow partial NSA where the final register is a
// contiguous set of the remaining addresses.
// This could help where there are more addresses than supported.
bool UseNSA = ST->hasFeature(AMDGPU::FeatureNSAEncoding) &&
VAddrs.size() >= 3 &&
VAddrs.size() <= (unsigned)ST->getNSAMaxSize();
SDValue VAddr;
if (!UseNSA)
VAddr = getBuildDwordsVector(DAG, DL, VAddrs);
SDValue True = DAG.getTargetConstant(1, DL, MVT::i1);
SDValue False = DAG.getTargetConstant(0, DL, MVT::i1);
SDValue Unorm;
if (!BaseOpcode->Sampler) {
Unorm = True;
} else {
auto UnormConst =
cast<ConstantSDNode>(Op.getOperand(ArgOffset + Intr->UnormIndex));
Unorm = UnormConst->getZExtValue() ? True : False;
}
SDValue TFE;
SDValue LWE;
SDValue TexFail = Op.getOperand(ArgOffset + Intr->TexFailCtrlIndex);
bool IsTexFail = false;
if (!parseTexFail(TexFail, DAG, &TFE, &LWE, IsTexFail))
return Op;
if (IsTexFail) {
if (!DMaskLanes) {
// Expecting to get an error flag since TFC is on - and dmask is 0
// Force dmask to be at least 1 otherwise the instruction will fail
DMask = 0x1;
DMaskLanes = 1;
NumVDataDwords = 1;
}
NumVDataDwords += 1;
AdjustRetType = true;
}
// Has something earlier tagged that the return type needs adjusting
// This happens if the instruction is a load or has set TexFailCtrl flags
if (AdjustRetType) {
// NumVDataDwords reflects the true number of dwords required in the return type
if (DMaskLanes == 0 && !BaseOpcode->Store) {
// This is a no-op load. This can be eliminated
SDValue Undef = DAG.getUNDEF(Op.getValueType());
if (isa<MemSDNode>(Op))
return DAG.getMergeValues({Undef, Op.getOperand(0)}, DL);
return Undef;
}
EVT NewVT = NumVDataDwords > 1 ?
EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumVDataDwords)
: MVT::i32;
ResultTypes[0] = NewVT;
if (ResultTypes.size() == 3) {
// Original result was aggregate type used for TexFailCtrl results
// The actual instruction returns as a vector type which has now been
// created. Remove the aggregate result.
ResultTypes.erase(&ResultTypes[1]);
}
}
unsigned CPol = cast<ConstantSDNode>(
Op.getOperand(ArgOffset + Intr->CachePolicyIndex))->getZExtValue();
if (BaseOpcode->Atomic)
CPol |= AMDGPU::CPol::GLC; // TODO no-return optimization
if (CPol & ~AMDGPU::CPol::ALL)
return Op;
SmallVector<SDValue, 26> Ops;
if (BaseOpcode->Store || BaseOpcode->Atomic)
Ops.push_back(VData); // vdata
if (UseNSA)
append_range(Ops, VAddrs);
else
Ops.push_back(VAddr);
Ops.push_back(Op.getOperand(ArgOffset + Intr->RsrcIndex));
if (BaseOpcode->Sampler)
Ops.push_back(Op.getOperand(ArgOffset + Intr->SampIndex));
Ops.push_back(DAG.getTargetConstant(DMask, DL, MVT::i32));
if (IsGFX10Plus)
Ops.push_back(DAG.getTargetConstant(DimInfo->Encoding, DL, MVT::i32));
Ops.push_back(Unorm);
Ops.push_back(DAG.getTargetConstant(CPol, DL, MVT::i32));
Ops.push_back(IsA16 && // r128, a16 for gfx9
ST->hasFeature(AMDGPU::FeatureR128A16) ? True : False);
if (IsGFX10Plus)
Ops.push_back(IsA16 ? True : False);
if (!Subtarget->hasGFX90AInsts()) {
Ops.push_back(TFE); //tfe
} else if (cast<ConstantSDNode>(TFE)->getZExtValue()) {
report_fatal_error("TFE is not supported on this GPU");
}
Ops.push_back(LWE); // lwe
if (!IsGFX10Plus)
Ops.push_back(DimInfo->DA ? True : False);
if (BaseOpcode->HasD16)
Ops.push_back(IsD16 ? True : False);
if (isa<MemSDNode>(Op))
Ops.push_back(Op.getOperand(0)); // chain
int NumVAddrDwords =
UseNSA ? VAddrs.size() : VAddr.getValueType().getSizeInBits() / 32;
int Opcode = -1;
if (IsGFX11Plus) {
Opcode = AMDGPU::getMIMGOpcode(IntrOpcode,
UseNSA ? AMDGPU::MIMGEncGfx11NSA
: AMDGPU::MIMGEncGfx11Default,
NumVDataDwords, NumVAddrDwords);
} else if (IsGFX10Plus) {
Opcode = AMDGPU::getMIMGOpcode(IntrOpcode,
UseNSA ? AMDGPU::MIMGEncGfx10NSA
: AMDGPU::MIMGEncGfx10Default,
NumVDataDwords, NumVAddrDwords);
} else {
if (Subtarget->hasGFX90AInsts()) {
Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx90a,
NumVDataDwords, NumVAddrDwords);
if (Opcode == -1)
report_fatal_error(
"requested image instruction is not supported on this GPU");
}
if (Opcode == -1 &&
Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx8,
NumVDataDwords, NumVAddrDwords);
if (Opcode == -1)
Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx6,
NumVDataDwords, NumVAddrDwords);
}
assert(Opcode != -1);
MachineSDNode *NewNode = DAG.getMachineNode(Opcode, DL, ResultTypes, Ops);
if (auto MemOp = dyn_cast<MemSDNode>(Op)) {
MachineMemOperand *MemRef = MemOp->getMemOperand();
DAG.setNodeMemRefs(NewNode, {MemRef});
}
if (BaseOpcode->AtomicX2) {
SmallVector<SDValue, 1> Elt;
DAG.ExtractVectorElements(SDValue(NewNode, 0), Elt, 0, 1);
return DAG.getMergeValues({Elt[0], SDValue(NewNode, 1)}, DL);
}
if (BaseOpcode->Store)
return SDValue(NewNode, 0);
return constructRetValue(DAG, NewNode,
OrigResultTypes, IsTexFail,
Subtarget->hasUnpackedD16VMem(), IsD16,
DMaskLanes, NumVDataDwords, DL);
}
SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
SDValue Offset, SDValue CachePolicy,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
const DataLayout &DataLayout = DAG.getDataLayout();
Align Alignment =
DataLayout.getABITypeAlign(VT.getTypeForEVT(*DAG.getContext()));
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo(),
MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant,
VT.getStoreSize(), Alignment);
if (!Offset->isDivergent()) {
SDValue Ops[] = {
Rsrc,
Offset, // Offset
CachePolicy
};
// Widen vec3 load to vec4.
if (VT.isVector() && VT.getVectorNumElements() == 3) {
EVT WidenedVT =
EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), 4);
auto WidenedOp = DAG.getMemIntrinsicNode(
AMDGPUISD::SBUFFER_LOAD, DL, DAG.getVTList(WidenedVT), Ops, WidenedVT,
MF.getMachineMemOperand(MMO, 0, WidenedVT.getStoreSize()));
auto Subvector = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, WidenedOp,
DAG.getVectorIdxConstant(0, DL));
return Subvector;
}
return DAG.getMemIntrinsicNode(AMDGPUISD::SBUFFER_LOAD, DL,
DAG.getVTList(VT), Ops, VT, MMO);
}
// We have a divergent offset. Emit a MUBUF buffer load instead. We can
// assume that the buffer is unswizzled.
SmallVector<SDValue, 4> Loads;
unsigned NumLoads = 1;
MVT LoadVT = VT.getSimpleVT();
unsigned NumElts = LoadVT.isVector() ? LoadVT.getVectorNumElements() : 1;
assert((LoadVT.getScalarType() == MVT::i32 ||
LoadVT.getScalarType() == MVT::f32));
if (NumElts == 8 || NumElts == 16) {
NumLoads = NumElts / 4;
LoadVT = MVT::getVectorVT(LoadVT.getScalarType(), 4);
}
SDVTList VTList = DAG.getVTList({LoadVT, MVT::Glue});
SDValue Ops[] = {
DAG.getEntryNode(), // Chain
Rsrc, // rsrc
DAG.getConstant(0, DL, MVT::i32), // vindex
{}, // voffset
{}, // soffset
{}, // offset
CachePolicy, // cachepolicy
DAG.getTargetConstant(0, DL, MVT::i1), // idxen
};
// Use the alignment to ensure that the required offsets will fit into the
// immediate offsets.
setBufferOffsets(Offset, DAG, &Ops[3],
NumLoads > 1 ? Align(16 * NumLoads) : Align(4));
uint64_t InstOffset = cast<ConstantSDNode>(Ops[5])->getZExtValue();
for (unsigned i = 0; i < NumLoads; ++i) {
Ops[5] = DAG.getTargetConstant(InstOffset + 16 * i, DL, MVT::i32);
Loads.push_back(getMemIntrinsicNode(AMDGPUISD::BUFFER_LOAD, DL, VTList, Ops,
LoadVT, MMO, DAG));
}
if (NumElts == 8 || NumElts == 16)
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Loads);
return Loads[0];
}
SDValue SITargetLowering::lowerWorkitemID(SelectionDAG &DAG, SDValue Op,
unsigned Dim,
const ArgDescriptor &Arg) const {
SDLoc SL(Op);
MachineFunction &MF = DAG.getMachineFunction();
unsigned MaxID = Subtarget->getMaxWorkitemID(MF.getFunction(), Dim);
if (MaxID == 0)
return DAG.getConstant(0, SL, MVT::i32);
SDValue Val = loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
SDLoc(DAG.getEntryNode()), Arg);
// Don't bother inserting AssertZext for packed IDs since we're emitting the
// masking operations anyway.
//
// TODO: We could assert the top bit is 0 for the source copy.
if (Arg.isMasked())
return Val;
// Preserve the known bits after expansion to a copy.
EVT SmallVT =
EVT::getIntegerVT(*DAG.getContext(), 32 - countLeadingZeros(MaxID));
return DAG.getNode(ISD::AssertZext, SL, MVT::i32, Val,
DAG.getValueType(SmallVT));
}
SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
auto MFI = MF.getInfo<SIMachineFunctionInfo>();
EVT VT = Op.getValueType();
SDLoc DL(Op);
unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
// TODO: Should this propagate fast-math-flags?
switch (IntrinsicID) {
case Intrinsic::amdgcn_implicit_buffer_ptr: {
if (getSubtarget()->isAmdHsaOrMesa(MF.getFunction()))
return emitNonHSAIntrinsicError(DAG, DL, VT);
return getPreloadedValue(DAG, *MFI, VT,
AMDGPUFunctionArgInfo::IMPLICIT_BUFFER_PTR);
}
case Intrinsic::amdgcn_dispatch_ptr:
case Intrinsic::amdgcn_queue_ptr: {
if (!Subtarget->isAmdHsaOrMesa(MF.getFunction())) {
DiagnosticInfoUnsupported BadIntrin(
MF.getFunction(), "unsupported hsa intrinsic without hsa target",
DL.getDebugLoc());
DAG.getContext()->diagnose(BadIntrin);
return DAG.getUNDEF(VT);
}
auto RegID = IntrinsicID == Intrinsic::amdgcn_dispatch_ptr ?
AMDGPUFunctionArgInfo::DISPATCH_PTR : AMDGPUFunctionArgInfo::QUEUE_PTR;
return getPreloadedValue(DAG, *MFI, VT, RegID);
}
case Intrinsic::amdgcn_implicitarg_ptr: {
if (MFI->isEntryFunction())
return getImplicitArgPtr(DAG, DL);
return getPreloadedValue(DAG, *MFI, VT,
AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR);
}
case Intrinsic::amdgcn_kernarg_segment_ptr: {
if (!AMDGPU::isKernel(MF.getFunction().getCallingConv())) {
// This only makes sense to call in a kernel, so just lower to null.
return DAG.getConstant(0, DL, VT);
}
return getPreloadedValue(DAG, *MFI, VT,
AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
}
case Intrinsic::amdgcn_dispatch_id: {
return getPreloadedValue(DAG, *MFI, VT, AMDGPUFunctionArgInfo::DISPATCH_ID);
}
case Intrinsic::amdgcn_rcp:
return DAG.getNode(AMDGPUISD::RCP, DL, VT, Op.getOperand(1));
case Intrinsic::amdgcn_rsq:
return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
case Intrinsic::amdgcn_rsq_legacy:
if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
return emitRemovedIntrinsicError(DAG, DL, VT);
return SDValue();
case Intrinsic::amdgcn_rcp_legacy:
if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
return emitRemovedIntrinsicError(DAG, DL, VT);
return DAG.getNode(AMDGPUISD::RCP_LEGACY, DL, VT, Op.getOperand(1));
case Intrinsic::amdgcn_rsq_clamp: {
if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
Type *Type = VT.getTypeForEVT(*DAG.getContext());
APFloat Max = APFloat::getLargest(Type->getFltSemantics());
APFloat Min = APFloat::getLargest(Type->getFltSemantics(), true);
SDValue Rsq = DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
SDValue Tmp = DAG.getNode(ISD::FMINNUM, DL, VT, Rsq,
DAG.getConstantFP(Max, DL, VT));
return DAG.getNode(ISD::FMAXNUM, DL, VT, Tmp,
DAG.getConstantFP(Min, DL, VT));
}
case Intrinsic::r600_read_ngroups_x:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::KernelInputOffsets::NGROUPS_X, Align(4),
false);
case Intrinsic::r600_read_ngroups_y:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::KernelInputOffsets::NGROUPS_Y, Align(4),
false);
case Intrinsic::r600_read_ngroups_z:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::KernelInputOffsets::NGROUPS_Z, Align(4),
false);
case Intrinsic::r600_read_global_size_x:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::KernelInputOffsets::GLOBAL_SIZE_X,
Align(4), false);
case Intrinsic::r600_read_global_size_y:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::KernelInputOffsets::GLOBAL_SIZE_Y,
Align(4), false);
case Intrinsic::r600_read_global_size_z:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::KernelInputOffsets::GLOBAL_SIZE_Z,
Align(4), false);
case Intrinsic::r600_read_local_size_x:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
return lowerImplicitZextParam(DAG, Op, MVT::i16,
SI::KernelInputOffsets::LOCAL_SIZE_X);
case Intrinsic::r600_read_local_size_y:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
return lowerImplicitZextParam(DAG, Op, MVT::i16,
SI::KernelInputOffsets::LOCAL_SIZE_Y);
case Intrinsic::r600_read_local_size_z:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
return lowerImplicitZextParam(DAG, Op, MVT::i16,
SI::KernelInputOffsets::LOCAL_SIZE_Z);
case Intrinsic::amdgcn_workgroup_id_x:
return getPreloadedValue(DAG, *MFI, VT,
AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
case Intrinsic::amdgcn_workgroup_id_y:
return getPreloadedValue(DAG, *MFI, VT,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Y);
case Intrinsic::amdgcn_workgroup_id_z:
return getPreloadedValue(DAG, *MFI, VT,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
case Intrinsic::amdgcn_lds_kernel_id: {
if (MFI->isEntryFunction())
return getLDSKernelId(DAG, DL);
return getPreloadedValue(DAG, *MFI, VT,
AMDGPUFunctionArgInfo::LDS_KERNEL_ID);
}
case Intrinsic::amdgcn_workitem_id_x:
return lowerWorkitemID(DAG, Op, 0, MFI->getArgInfo().WorkItemIDX);
case Intrinsic::amdgcn_workitem_id_y:
return lowerWorkitemID(DAG, Op, 1, MFI->getArgInfo().WorkItemIDY);
case Intrinsic::amdgcn_workitem_id_z:
return lowerWorkitemID(DAG, Op, 2, MFI->getArgInfo().WorkItemIDZ);
case Intrinsic::amdgcn_wavefrontsize:
return DAG.getConstant(MF.getSubtarget<GCNSubtarget>().getWavefrontSize(),
SDLoc(Op), MVT::i32);
case Intrinsic::amdgcn_s_buffer_load: {
unsigned CPol = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
if (CPol & ~AMDGPU::CPol::ALL)
return Op;
return lowerSBuffer(VT, DL, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
DAG);
}
case Intrinsic::amdgcn_fdiv_fast:
return lowerFDIV_FAST(Op, DAG);
case Intrinsic::amdgcn_sin:
return DAG.getNode(AMDGPUISD::SIN_HW, DL, VT, Op.getOperand(1));
case Intrinsic::amdgcn_cos:
return DAG.getNode(AMDGPUISD::COS_HW, DL, VT, Op.getOperand(1));
case Intrinsic::amdgcn_mul_u24:
return DAG.getNode(AMDGPUISD::MUL_U24, DL, VT, Op.getOperand(1), Op.getOperand(2));
case Intrinsic::amdgcn_mul_i24:
return DAG.getNode(AMDGPUISD::MUL_I24, DL, VT, Op.getOperand(1), Op.getOperand(2));
case Intrinsic::amdgcn_log_clamp: {
if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
return SDValue();
return emitRemovedIntrinsicError(DAG, DL, VT);
}
case Intrinsic::amdgcn_ldexp:
return DAG.getNode(AMDGPUISD::LDEXP, DL, VT,
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::amdgcn_fract:
return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
case Intrinsic::amdgcn_class:
return DAG.getNode(AMDGPUISD::FP_CLASS, DL, VT,
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::amdgcn_div_fmas:
return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT,
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
Op.getOperand(4));
case Intrinsic::amdgcn_div_fixup:
return DAG.getNode(AMDGPUISD::DIV_FIXUP, DL, VT,
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
case Intrinsic::amdgcn_div_scale: {
const ConstantSDNode *Param = cast<ConstantSDNode>(Op.getOperand(3));
// Translate to the operands expected by the machine instruction. The
// first parameter must be the same as the first instruction.
SDValue Numerator = Op.getOperand(1);
SDValue Denominator = Op.getOperand(2);
// Note this order is opposite of the machine instruction's operations,
// which is s0.f = Quotient, s1.f = Denominator, s2.f = Numerator. The
// intrinsic has the numerator as the first operand to match a normal
// division operation.
SDValue Src0 = Param->isAllOnes() ? Numerator : Denominator;
return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, Op->getVTList(), Src0,
Denominator, Numerator);
}
case Intrinsic::amdgcn_icmp: {
// There is a Pat that handles this variant, so return it as-is.
if (Op.getOperand(1).getValueType() == MVT::i1 &&
Op.getConstantOperandVal(2) == 0 &&
Op.getConstantOperandVal(3) == ICmpInst::Predicate::ICMP_NE)
return Op;
return lowerICMPIntrinsic(*this, Op.getNode(), DAG);
}
case Intrinsic::amdgcn_fcmp: {
return lowerFCMPIntrinsic(*this, Op.getNode(), DAG);
}
case Intrinsic::amdgcn_ballot:
return lowerBALLOTIntrinsic(*this, Op.getNode(), DAG);
case Intrinsic::amdgcn_fmed3:
return DAG.getNode(AMDGPUISD::FMED3, DL, VT,
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
case Intrinsic::amdgcn_fdot2:
return DAG.getNode(AMDGPUISD::FDOT2, DL, VT,
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
Op.getOperand(4));
case Intrinsic::amdgcn_fmul_legacy:
return DAG.getNode(AMDGPUISD::FMUL_LEGACY, DL, VT,
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::amdgcn_sffbh:
return DAG.getNode(AMDGPUISD::FFBH_I32, DL, VT, Op.getOperand(1));
case Intrinsic::amdgcn_sbfe:
return DAG.getNode(AMDGPUISD::BFE_I32, DL, VT,
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
case Intrinsic::amdgcn_ubfe:
return DAG.getNode(AMDGPUISD::BFE_U32, DL, VT,
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
case Intrinsic::amdgcn_cvt_pkrtz:
case Intrinsic::amdgcn_cvt_pknorm_i16:
case Intrinsic::amdgcn_cvt_pknorm_u16:
case Intrinsic::amdgcn_cvt_pk_i16:
case Intrinsic::amdgcn_cvt_pk_u16: {
// FIXME: Stop adding cast if v2f16/v2i16 are legal.
EVT VT = Op.getValueType();
unsigned Opcode;
if (IntrinsicID == Intrinsic::amdgcn_cvt_pkrtz)
Opcode = AMDGPUISD::CVT_PKRTZ_F16_F32;
else if (IntrinsicID == Intrinsic::amdgcn_cvt_pknorm_i16)
Opcode = AMDGPUISD::CVT_PKNORM_I16_F32;
else if (IntrinsicID == Intrinsic::amdgcn_cvt_pknorm_u16)
Opcode = AMDGPUISD::CVT_PKNORM_U16_F32;
else if (IntrinsicID == Intrinsic::amdgcn_cvt_pk_i16)
Opcode = AMDGPUISD::CVT_PK_I16_I32;
else
Opcode = AMDGPUISD::CVT_PK_U16_U32;
if (isTypeLegal(VT))
return DAG.getNode(Opcode, DL, VT, Op.getOperand(1), Op.getOperand(2));
SDValue Node = DAG.getNode(Opcode, DL, MVT::i32,
Op.getOperand(1), Op.getOperand(2));
return DAG.getNode(ISD::BITCAST, DL, VT, Node);
}
case Intrinsic::amdgcn_fmad_ftz:
return DAG.getNode(AMDGPUISD::FMAD_FTZ, DL, VT, Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3));
case Intrinsic::amdgcn_if_break:
return SDValue(DAG.getMachineNode(AMDGPU::SI_IF_BREAK, DL, VT,
Op->getOperand(1), Op->getOperand(2)), 0);
case Intrinsic::amdgcn_groupstaticsize: {
Triple::OSType OS = getTargetMachine().getTargetTriple().getOS();
if (OS == Triple::AMDHSA || OS == Triple::AMDPAL)
return Op;
const Module *M = MF.getFunction().getParent();
const GlobalValue *GV =
M->getNamedValue(Intrinsic::getName(Intrinsic::amdgcn_groupstaticsize));
SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, 0,
SIInstrInfo::MO_ABS32_LO);
return {DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, GA), 0};
}
case Intrinsic::amdgcn_is_shared:
case Intrinsic::amdgcn_is_private: {
SDLoc SL(Op);
unsigned AS = (IntrinsicID == Intrinsic::amdgcn_is_shared) ?
AMDGPUAS::LOCAL_ADDRESS : AMDGPUAS::PRIVATE_ADDRESS;
SDValue Aperture = getSegmentAperture(AS, SL, DAG);
SDValue SrcVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32,
Op.getOperand(1));
SDValue SrcHi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, SrcVec,
DAG.getConstant(1, SL, MVT::i32));
return DAG.getSetCC(SL, MVT::i1, SrcHi, Aperture, ISD::SETEQ);
}
case Intrinsic::amdgcn_perm:
return DAG.getNode(AMDGPUISD::PERM, DL, MVT::i32, Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3));
case Intrinsic::amdgcn_reloc_constant: {
Module *M = const_cast<Module *>(MF.getFunction().getParent());
const MDNode *Metadata = cast<MDNodeSDNode>(Op.getOperand(1))->getMD();
auto SymbolName = cast<MDString>(Metadata->getOperand(0))->getString();
auto RelocSymbol = cast<GlobalVariable>(
M->getOrInsertGlobal(SymbolName, Type::getInt32Ty(M->getContext())));
SDValue GA = DAG.getTargetGlobalAddress(RelocSymbol, DL, MVT::i32, 0,
SIInstrInfo::MO_ABS32_LO);
return {DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, GA), 0};
}
default:
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
return lowerImage(Op, ImageDimIntr, DAG, false);
return Op;
}
}
/// Update \p MMO based on the offset inputs to an intrinsic.
static void updateBufferMMO(MachineMemOperand *MMO, SDValue VOffset,
SDValue SOffset, SDValue Offset,
SDValue VIndex = SDValue()) {
if (!isa<ConstantSDNode>(VOffset) || !isa<ConstantSDNode>(SOffset) ||
!isa<ConstantSDNode>(Offset)) {
// The combined offset is not known to be constant, so we cannot represent
// it in the MMO. Give up.
MMO->setValue((Value *)nullptr);
return;
}
if (VIndex && (!isa<ConstantSDNode>(VIndex) ||
!cast<ConstantSDNode>(VIndex)->isZero())) {
// The strided index component of the address is not known to be zero, so we
// cannot represent it in the MMO. Give up.
MMO->setValue((Value *)nullptr);
return;
}
MMO->setOffset(cast<ConstantSDNode>(VOffset)->getSExtValue() +
cast<ConstantSDNode>(SOffset)->getSExtValue() +
cast<ConstantSDNode>(Offset)->getSExtValue());
}
SDValue SITargetLowering::lowerRawBufferAtomicIntrin(SDValue Op,
SelectionDAG &DAG,
unsigned NewOpcode) const {
SDLoc DL(Op);
SDValue VData = Op.getOperand(2);
auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
SDValue Ops[] = {
Op.getOperand(0), // Chain
VData, // vdata
Op.getOperand(3), // rsrc
DAG.getConstant(0, DL, MVT::i32), // vindex
Offsets.first, // voffset
Op.getOperand(5), // soffset
Offsets.second, // offset
Op.getOperand(6), // cachepolicy
DAG.getTargetConstant(0, DL, MVT::i1), // idxen
};
auto *M = cast<MemSDNode>(Op);
updateBufferMMO(M->getMemOperand(), Ops[4], Ops[5], Ops[6]);
EVT MemVT = VData.getValueType();
return DAG.getMemIntrinsicNode(NewOpcode, DL, Op->getVTList(), Ops, MemVT,
M->getMemOperand());
}
// Return a value to use for the idxen operand by examining the vindex operand.
static unsigned getIdxEn(SDValue VIndex) {
if (auto VIndexC = dyn_cast<ConstantSDNode>(VIndex))
// No need to set idxen if vindex is known to be zero.
return VIndexC->getZExtValue() != 0;
return 1;
}
SDValue
SITargetLowering::lowerStructBufferAtomicIntrin(SDValue Op, SelectionDAG &DAG,
unsigned NewOpcode) const {
SDLoc DL(Op);
SDValue VData = Op.getOperand(2);
auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG);
SDValue Ops[] = {
Op.getOperand(0), // Chain
VData, // vdata
Op.getOperand(3), // rsrc
Op.getOperand(4), // vindex
Offsets.first, // voffset
Op.getOperand(6), // soffset
Offsets.second, // offset
Op.getOperand(7), // cachepolicy
DAG.getTargetConstant(1, DL, MVT::i1), // idxen
};
auto *M = cast<MemSDNode>(Op);
updateBufferMMO(M->getMemOperand(), Ops[4], Ops[5], Ops[6], Ops[3]);
EVT MemVT = VData.getValueType();
return DAG.getMemIntrinsicNode(NewOpcode, DL, Op->getVTList(), Ops, MemVT,
M->getMemOperand());
}
SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntrID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
SDLoc DL(Op);
switch (IntrID) {
case Intrinsic::amdgcn_ds_ordered_add:
case Intrinsic::amdgcn_ds_ordered_swap: {
MemSDNode *M = cast<MemSDNode>(Op);
SDValue Chain = M->getOperand(0);
SDValue M0 = M->getOperand(2);
SDValue Value = M->getOperand(3);
unsigned IndexOperand = M->getConstantOperandVal(7);
unsigned WaveRelease = M->getConstantOperandVal(8);
unsigned WaveDone = M->getConstantOperandVal(9);
unsigned OrderedCountIndex = IndexOperand & 0x3f;
IndexOperand &= ~0x3f;
unsigned CountDw = 0;
if (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10) {
CountDw = (IndexOperand >> 24) & 0xf;
IndexOperand &= ~(0xf << 24);
if (CountDw < 1 || CountDw > 4) {
report_fatal_error(
"ds_ordered_count: dword count must be between 1 and 4");
}
}
if (IndexOperand)
report_fatal_error("ds_ordered_count: bad index operand");
if (WaveDone && !WaveRelease)
report_fatal_error("ds_ordered_count: wave_done requires wave_release");
unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
unsigned ShaderType =
SIInstrInfo::getDSShaderTypeValue(DAG.getMachineFunction());
unsigned Offset0 = OrderedCountIndex << 2;
unsigned Offset1 = WaveRelease | (WaveDone << 1) | (Instruction << 4);
if (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10)
Offset1 |= (CountDw - 1) << 6;
if (Subtarget->getGeneration() < AMDGPUSubtarget::GFX11)
Offset1 |= ShaderType << 2;
unsigned Offset = Offset0 | (Offset1 << 8);
SDValue Ops[] = {
Chain,
Value,
DAG.getTargetConstant(Offset, DL, MVT::i16),
copyToM0(DAG, Chain, DL, M0).getValue(1), // Glue
};
return DAG.getMemIntrinsicNode(AMDGPUISD::DS_ORDERED_COUNT, DL,
M->getVTList(), Ops, M->getMemoryVT(),
M->getMemOperand());
}
case Intrinsic::amdgcn_ds_fadd: {
MemSDNode *M = cast<MemSDNode>(Op);
unsigned Opc;
switch (IntrID) {
case Intrinsic::amdgcn_ds_fadd:
Opc = ISD::ATOMIC_LOAD_FADD;
break;
}
return DAG.getAtomic(Opc, SDLoc(Op), M->getMemoryVT(),
M->getOperand(0), M->getOperand(2), M->getOperand(3),
M->getMemOperand());
}
case Intrinsic::amdgcn_atomic_inc:
case Intrinsic::amdgcn_atomic_dec:
case Intrinsic::amdgcn_ds_fmin:
case Intrinsic::amdgcn_ds_fmax: {
MemSDNode *M = cast<MemSDNode>(Op);
unsigned Opc;
switch (IntrID) {
case Intrinsic::amdgcn_atomic_inc:
Opc = AMDGPUISD::ATOMIC_INC;
break;
case Intrinsic::amdgcn_atomic_dec:
Opc = AMDGPUISD::ATOMIC_DEC;
break;
case Intrinsic::amdgcn_ds_fmin:
Opc = AMDGPUISD::ATOMIC_LOAD_FMIN;
break;
case Intrinsic::amdgcn_ds_fmax:
Opc = AMDGPUISD::ATOMIC_LOAD_FMAX;
break;
default:
llvm_unreachable("Unknown intrinsic!");
}
SDValue Ops[] = {
M->getOperand(0), // Chain
M->getOperand(2), // Ptr
M->getOperand(3) // Value
};
return DAG.getMemIntrinsicNode(Opc, SDLoc(Op), M->getVTList(), Ops,
M->getMemoryVT(), M->getMemOperand());
}
case Intrinsic::amdgcn_buffer_load:
case Intrinsic::amdgcn_buffer_load_format: {
unsigned Glc = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
unsigned Slc = cast<ConstantSDNode>(Op.getOperand(6))->getZExtValue();
unsigned IdxEn = getIdxEn(Op.getOperand(3));
SDValue Ops[] = {
Op.getOperand(0), // Chain
Op.getOperand(2), // rsrc
Op.getOperand(3), // vindex
SDValue(), // voffset -- will be set by setBufferOffsets
SDValue(), // soffset -- will be set by setBufferOffsets
SDValue(), // offset -- will be set by setBufferOffsets
DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
};
setBufferOffsets(Op.getOperand(4), DAG, &Ops[3]);
unsigned Opc = (IntrID == Intrinsic::amdgcn_buffer_load) ?
AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT;
EVT VT = Op.getValueType();
EVT IntVT = VT.changeTypeToInteger();
auto *M = cast<MemSDNode>(Op);
updateBufferMMO(M->getMemOperand(), Ops[3], Ops[4], Ops[5], Ops[2]);
EVT LoadVT = Op.getValueType();
if (LoadVT.getScalarType() == MVT::f16)
return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16,
M, DAG, Ops);
// Handle BUFFER_LOAD_BYTE/UBYTE/SHORT/USHORT overloaded intrinsics
if (LoadVT.getScalarType() == MVT::i8 ||
LoadVT.getScalarType() == MVT::i16)
return handleByteShortBufferLoads(DAG, LoadVT, DL, Ops, M);
return getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT,
M->getMemOperand(), DAG);
}
case Intrinsic::amdgcn_raw_buffer_load:
case Intrinsic::amdgcn_raw_buffer_load_format: {
const bool IsFormat = IntrID == Intrinsic::amdgcn_raw_buffer_load_format;
auto Offsets = splitBufferOffsets(Op.getOperand(3), DAG);
SDValue Ops[] = {
Op.getOperand(0), // Chain
Op.getOperand(2), // rsrc
DAG.getConstant(0, DL, MVT::i32), // vindex
Offsets.first, // voffset
Op.getOperand(4), // soffset
Offsets.second, // offset
Op.getOperand(5), // cachepolicy, swizzled buffer
DAG.getTargetConstant(0, DL, MVT::i1), // idxen
};
auto *M = cast<MemSDNode>(Op);
updateBufferMMO(M->getMemOperand(), Ops[3], Ops[4], Ops[5]);
return lowerIntrinsicLoad(M, IsFormat, DAG, Ops);
}
case Intrinsic::amdgcn_struct_buffer_load:
case Intrinsic::amdgcn_struct_buffer_load_format: {
const bool IsFormat = IntrID == Intrinsic::amdgcn_struct_buffer_load_format;
auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
SDValue Ops[] = {
Op.getOperand(0), // Chain
Op.getOperand(2), // rsrc
Op.getOperand(3), // vindex
Offsets.first, // voffset
Op.getOperand(5), // soffset
Offsets.second, // offset
Op.getOperand(6), // cachepolicy, swizzled buffer
DAG.getTargetConstant(1, DL, MVT::i1), // idxen
};
auto *M = cast<MemSDNode>(Op);
updateBufferMMO(M->getMemOperand(), Ops[3], Ops[4], Ops[5], Ops[2]);
return lowerIntrinsicLoad(cast<MemSDNode>(Op), IsFormat, DAG, Ops);
}
case Intrinsic::amdgcn_tbuffer_load: {
MemSDNode *M = cast<MemSDNode>(Op);
EVT LoadVT = Op.getValueType();
unsigned Dfmt = cast<ConstantSDNode>(Op.getOperand(7))->getZExtValue();
unsigned Nfmt = cast<ConstantSDNode>(Op.getOperand(8))->getZExtValue();
unsigned Glc = cast<ConstantSDNode>(Op.getOperand(9))->getZExtValue();
unsigned Slc = cast<ConstantSDNode>(Op.getOperand(10))->getZExtValue();
unsigned IdxEn = getIdxEn(Op.getOperand(3));
SDValue Ops[] = {
Op.getOperand(0), // Chain
Op.getOperand(2), // rsrc
Op.getOperand(3), // vindex
Op.getOperand(4), // voffset
Op.getOperand(5), // soffset
Op.getOperand(6), // offset
DAG.getTargetConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
DAG.getTargetConstant(IdxEn, DL, MVT::i1) // idxen
};
if (LoadVT.getScalarType() == MVT::f16)
return adjustLoadValueType(AMDGPUISD::TBUFFER_LOAD_FORMAT_D16,
M, DAG, Ops);
return getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
Op->getVTList(), Ops, LoadVT, M->getMemOperand(),
DAG);
}
case Intrinsic::amdgcn_raw_tbuffer_load: {
MemSDNode *M = cast<MemSDNode>(Op);
EVT LoadVT = Op.getValueType();
auto Offsets = splitBufferOffsets(Op.getOperand(3), DAG);
SDValue Ops[] = {
Op.getOperand(0), // Chain
Op.getOperand(2), // rsrc
DAG.getConstant(0, DL, MVT::i32), // vindex
Offsets.first, // voffset
Op.getOperand(4), // soffset
Offsets.second, // offset
Op.getOperand(5), // format
Op.getOperand(6), // cachepolicy, swizzled buffer
DAG.getTargetConstant(0, DL, MVT::i1), // idxen
};
if (LoadVT.getScalarType() == MVT::f16)
return adjustLoadValueType(AMDGPUISD::TBUFFER_LOAD_FORMAT_D16,
M, DAG, Ops);
return getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
Op->getVTList(), Ops, LoadVT, M->getMemOperand(),
DAG);
}
case Intrinsic::amdgcn_struct_tbuffer_load: {
MemSDNode *M = cast<MemSDNode>(Op);
EVT LoadVT = Op.getValueType();
auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
SDValue Ops[] = {
Op.getOperand(0), // Chain
Op.getOperand(2), // rsrc
Op.getOperand(3), // vindex
Offsets.first, // voffset
Op.getOperand(5), // soffset
Offsets.second, // offset
Op.getOperand(6), // format
Op.getOperand(7), // cachepolicy, swizzled buffer
DAG.getTargetConstant(1, DL, MVT::i1), // idxen
};
if (LoadVT.getScalarType() == MVT::f16)
return adjustLoadValueType(AMDGPUISD::TBUFFER_LOAD_FORMAT_D16,
M, DAG, Ops);
return getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
Op->getVTList(), Ops, LoadVT, M->getMemOperand(),
DAG);
}
case Intrinsic::amdgcn_buffer_atomic_swap:
case Intrinsic::amdgcn_buffer_atomic_add:
case Intrinsic::amdgcn_buffer_atomic_sub:
case Intrinsic::amdgcn_buffer_atomic_csub:
case Intrinsic::amdgcn_buffer_atomic_smin:
case Intrinsic::amdgcn_buffer_atomic_umin:
case Intrinsic::amdgcn_buffer_atomic_smax:
case Intrinsic::amdgcn_buffer_atomic_umax:
case Intrinsic::amdgcn_buffer_atomic_and:
case Intrinsic::amdgcn_buffer_atomic_or:
case Intrinsic::amdgcn_buffer_atomic_xor:
case Intrinsic::amdgcn_buffer_atomic_fadd: {
unsigned Slc = cast<ConstantSDNode>(Op.getOperand(6))->getZExtValue();
unsigned IdxEn = getIdxEn(Op.getOperand(4));
SDValue Ops[] = {
Op.getOperand(0), // Chain
Op.getOperand(2), // vdata
Op.getOperand(3), // rsrc
Op.getOperand(4), // vindex
SDValue(), // voffset -- will be set by setBufferOffsets
SDValue(), // soffset -- will be set by setBufferOffsets
SDValue(), // offset -- will be set by setBufferOffsets
DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy
DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
};
setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
EVT VT = Op.getValueType();
auto *M = cast<MemSDNode>(Op);
updateBufferMMO(M->getMemOperand(), Ops[4], Ops[5], Ops[6], Ops[3]);
unsigned Opcode = 0;
switch (IntrID) {
case Intrinsic::amdgcn_buffer_atomic_swap:
Opcode = AMDGPUISD::BUFFER_ATOMIC_SWAP;
break;
case Intrinsic::amdgcn_buffer_atomic_add:
Opcode = AMDGPUISD::BUFFER_ATOMIC_ADD;
break;
case Intrinsic::amdgcn_buffer_atomic_sub:
Opcode = AMDGPUISD::BUFFER_ATOMIC_SUB;
break;
case Intrinsic::amdgcn_buffer_atomic_csub:
Opcode = AMDGPUISD::BUFFER_ATOMIC_CSUB;
break;
case Intrinsic::amdgcn_buffer_atomic_smin:
Opcode = AMDGPUISD::BUFFER_ATOMIC_SMIN;
break;
case Intrinsic::amdgcn_buffer_atomic_umin:
Opcode = AMDGPUISD::BUFFER_ATOMIC_UMIN;
break;
case Intrinsic::amdgcn_buffer_atomic_smax:
Opcode = AMDGPUISD::BUFFER_ATOMIC_SMAX;
break;
case Intrinsic::amdgcn_buffer_atomic_umax:
Opcode = AMDGPUISD::BUFFER_ATOMIC_UMAX;
break;
case Intrinsic::amdgcn_buffer_atomic_and:
Opcode = AMDGPUISD::BUFFER_ATOMIC_AND;
break;
case Intrinsic::amdgcn_buffer_atomic_or:
Opcode = AMDGPUISD::BUFFER_ATOMIC_OR;
break;
case Intrinsic::amdgcn_buffer_atomic_xor:
Opcode = AMDGPUISD::BUFFER_ATOMIC_XOR;
break;
case Intrinsic::amdgcn_buffer_atomic_fadd:
if (!Op.getValue(0).use_empty() && !hasAtomicFaddRtnForTy(Op)) {
DiagnosticInfoUnsupported
NoFpRet(DAG.getMachineFunction().getFunction(),
"return versions of fp atomics not supported",
DL.getDebugLoc(), DS_Error);
DAG.getContext()->diagnose(NoFpRet);
return SDValue();
}
Opcode = AMDGPUISD::BUFFER_ATOMIC_FADD;
break;
default:
llvm_unreachable("unhandled atomic opcode");
}
return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT,
M->getMemOperand());
}
case Intrinsic::amdgcn_raw_buffer_atomic_fadd:
return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FADD);
case Intrinsic::amdgcn_struct_buffer_atomic_fadd:
return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FADD);
case Intrinsic::amdgcn_raw_buffer_atomic_fmin:
return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FMIN);
case Intrinsic::amdgcn_struct_buffer_atomic_fmin:
return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FMIN);
case Intrinsic::amdgcn_raw_buffer_atomic_fmax:
return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FMAX);
case Intrinsic::amdgcn_struct_buffer_atomic_fmax:
return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FMAX);
case Intrinsic::amdgcn_raw_buffer_atomic_swap:
return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_SWAP);
case Intrinsic::amdgcn_raw_buffer_atomic_add:
return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_ADD);
case Intrinsic::amdgcn_raw_buffer_atomic_sub:
return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_SUB);
case Intrinsic::amdgcn_raw_buffer_atomic_smin:
return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_SMIN);
case Intrinsic::amdgcn_raw_buffer_atomic_umin:
return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_UMIN);
case Intrinsic::amdgcn_raw_buffer_atomic_smax:
return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_SMAX);
case Intrinsic::amdgcn_raw_buffer_atomic_umax:
return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_UMAX);
case Intrinsic::amdgcn_raw_buffer_atomic_and:
return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_AND);
case Intrinsic::amdgcn_raw_buffer_atomic_or:
return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_OR);
case Intrinsic::amdgcn_raw_buffer_atomic_xor:
return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_XOR);
case Intrinsic::amdgcn_raw_buffer_atomic_inc:
return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_INC);
case Intrinsic::amdgcn_raw_buffer_atomic_dec:
return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_DEC);
case Intrinsic::amdgcn_struct_buffer_atomic_swap:
return lowerStructBufferAtomicIntrin(Op, DAG,
AMDGPUISD::BUFFER_ATOMIC_SWAP);
case Intrinsic::amdgcn_struct_buffer_atomic_add:
return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_ADD);
case Intrinsic::amdgcn_struct_buffer_atomic_sub:
return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_SUB);
case Intrinsic::amdgcn_struct_buffer_atomic_smin:
return lowerStructBufferAtomicIntrin(Op, DAG,
AMDGPUISD::BUFFER_ATOMIC_SMIN);
case Intrinsic::amdgcn_struct_buffer_atomic_umin:
return lowerStructBufferAtomicIntrin(Op, DAG,
AMDGPUISD::BUFFER_ATOMIC_UMIN);
case Intrinsic::amdgcn_struct_buffer_atomic_smax:
return lowerStructBufferAtomicIntrin(Op, DAG,
AMDGPUISD::BUFFER_ATOMIC_SMAX);
case Intrinsic::amdgcn_struct_buffer_atomic_umax:
return lowerStructBufferAtomicIntrin(Op, DAG,
AMDGPUISD::BUFFER_ATOMIC_UMAX);
case Intrinsic::amdgcn_struct_buffer_atomic_and:
return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_AND);
case Intrinsic::amdgcn_struct_buffer_atomic_or:
return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_OR);
case Intrinsic::amdgcn_struct_buffer_atomic_xor:
return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_XOR);
case Intrinsic::amdgcn_struct_buffer_atomic_inc:
return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_INC);
case Intrinsic::amdgcn_struct_buffer_atomic_dec:
return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_DEC);
case Intrinsic::amdgcn_buffer_atomic_cmpswap: {
unsigned Slc = cast<ConstantSDNode>(Op.getOperand(7))->getZExtValue();
unsigned IdxEn = getIdxEn(Op.getOperand(5));
SDValue Ops[] = {
Op.getOperand(0), // Chain
Op.getOperand(2), // src
Op.getOperand(3), // cmp
Op.getOperand(4), // rsrc
Op.getOperand(5), // vindex
SDValue(), // voffset -- will be set by setBufferOffsets
SDValue(), // soffset -- will be set by setBufferOffsets
SDValue(), // offset -- will be set by setBufferOffsets
DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy
DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
};
setBufferOffsets(Op.getOperand(6), DAG, &Ops[5]);
EVT VT = Op.getValueType();
auto *M = cast<MemSDNode>(Op);
updateBufferMMO(M->getMemOperand(), Ops[5], Ops[6], Ops[7], Ops[4]);
return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL,
Op->getVTList(), Ops, VT, M->getMemOperand());
}
case Intrinsic::amdgcn_raw_buffer_atomic_cmpswap: {
auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG);
SDValue Ops[] = {
Op.getOperand(0), // Chain
Op.getOperand(2), // src
Op.getOperand(3), // cmp
Op.getOperand(4), // rsrc
DAG.getConstant(0, DL, MVT::i32), // vindex
Offsets.first, // voffset
Op.getOperand(6), // soffset
Offsets.second, // offset
Op.getOperand(7), // cachepolicy
DAG.getTargetConstant(0, DL, MVT::i1), // idxen
};
EVT VT = Op.getValueType();
auto *M = cast<MemSDNode>(Op);
updateBufferMMO(M->getMemOperand(), Ops[5], Ops[6], Ops[7]);
return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL,
Op->getVTList(), Ops, VT, M->getMemOperand());
}
case Intrinsic::amdgcn_struct_buffer_atomic_cmpswap: {
auto Offsets = splitBufferOffsets(Op.getOperand(6), DAG);
SDValue Ops[] = {
Op.getOperand(0), // Chain
Op.getOperand(2), // src
Op.getOperand(3), // cmp
Op.getOperand(4), // rsrc
Op.getOperand(5), // vindex
Offsets.first, // voffset
Op.getOperand(7), // soffset
Offsets.second, // offset
Op.getOperand(8), // cachepolicy
DAG.getTargetConstant(1, DL, MVT::i1), // idxen
};
EVT VT = Op.getValueType();
auto *M = cast<MemSDNode>(Op);
updateBufferMMO(M->getMemOperand(), Ops[5], Ops[6], Ops[7], Ops[4]);
return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL,
Op->getVTList(), Ops, VT, M->getMemOperand());
}
case Intrinsic::amdgcn_image_bvh_intersect_ray: {
MemSDNode *M = cast<MemSDNode>(Op);
SDValue NodePtr = M->getOperand(2);
SDValue RayExtent = M->getOperand(3);
SDValue RayOrigin = M->getOperand(4);
SDValue RayDir = M->getOperand(5);
SDValue RayInvDir = M->getOperand(6);
SDValue TDescr = M->getOperand(7);
assert(NodePtr.getValueType() == MVT::i32 ||
NodePtr.getValueType() == MVT::i64);
assert(RayDir.getValueType() == MVT::v3f16 ||
RayDir.getValueType() == MVT::v3f32);
if (!Subtarget->hasGFX10_AEncoding()) {
emitRemovedIntrinsicError(DAG, DL, Op.getValueType());
return SDValue();
}
const bool IsGFX11Plus = AMDGPU::isGFX11Plus(*Subtarget);
const bool IsA16 = RayDir.getValueType().getVectorElementType() == MVT::f16;
const bool Is64 = NodePtr.getValueType() == MVT::i64;
const unsigned NumVDataDwords = 4;
const unsigned NumVAddrDwords = IsA16 ? (Is64 ? 9 : 8) : (Is64 ? 12 : 11);
const unsigned NumVAddrs = IsGFX11Plus ? (IsA16 ? 4 : 5) : NumVAddrDwords;
const bool UseNSA =
Subtarget->hasNSAEncoding() && NumVAddrs <= Subtarget->getNSAMaxSize();
const unsigned BaseOpcodes[2][2] = {
{AMDGPU::IMAGE_BVH_INTERSECT_RAY, AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16},
{AMDGPU::IMAGE_BVH64_INTERSECT_RAY,
AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16}};
int Opcode;
if (UseNSA) {
Opcode = AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16],
IsGFX11Plus ? AMDGPU::MIMGEncGfx11NSA
: AMDGPU::MIMGEncGfx10NSA,
NumVDataDwords, NumVAddrDwords);
} else {
Opcode =
AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16],
IsGFX11Plus ? AMDGPU::MIMGEncGfx11Default
: AMDGPU::MIMGEncGfx10Default,
NumVDataDwords, PowerOf2Ceil(NumVAddrDwords));
}
assert(Opcode != -1);
SmallVector<SDValue, 16> Ops;
auto packLanes = [&DAG, &Ops, &DL] (SDValue Op, bool IsAligned) {
SmallVector<SDValue, 3> Lanes;
DAG.ExtractVectorElements(Op, Lanes, 0, 3);
if (Lanes[0].getValueSizeInBits() == 32) {
for (unsigned I = 0; I < 3; ++I)
Ops.push_back(DAG.getBitcast(MVT::i32, Lanes[I]));
} else {
if (IsAligned) {
Ops.push_back(
DAG.getBitcast(MVT::i32,
DAG.getBuildVector(MVT::v2f16, DL,
{ Lanes[0], Lanes[1] })));
Ops.push_back(Lanes[2]);
} else {
SDValue Elt0 = Ops.pop_back_val();
Ops.push_back(
DAG.getBitcast(MVT::i32,
DAG.getBuildVector(MVT::v2f16, DL,
{ Elt0, Lanes[0] })));
Ops.push_back(
DAG.getBitcast(MVT::i32,
DAG.getBuildVector(MVT::v2f16, DL,
{ Lanes[1], Lanes[2] })));
}
}
};
if (UseNSA && IsGFX11Plus) {
Ops.push_back(NodePtr);
Ops.push_back(DAG.getBitcast(MVT::i32, RayExtent));
Ops.push_back(RayOrigin);
if (IsA16) {
SmallVector<SDValue, 3> DirLanes, InvDirLanes, MergedLanes;
DAG.ExtractVectorElements(RayDir, DirLanes, 0, 3);
DAG.ExtractVectorElements(RayInvDir, InvDirLanes, 0, 3);
for (unsigned I = 0; I < 3; ++I) {
MergedLanes.push_back(DAG.getBitcast(
MVT::i32, DAG.getBuildVector(MVT::v2f16, DL,
{DirLanes[I], InvDirLanes[I]})));
}
Ops.push_back(DAG.getBuildVector(MVT::v3i32, DL, MergedLanes));
} else {
Ops.push_back(RayDir);
Ops.push_back(RayInvDir);
}
} else {
if (Is64)
DAG.ExtractVectorElements(DAG.getBitcast(MVT::v2i32, NodePtr), Ops, 0,
2);
else
Ops.push_back(NodePtr);
Ops.push_back(DAG.getBitcast(MVT::i32, RayExtent));
packLanes(RayOrigin, true);
packLanes(RayDir, true);
packLanes(RayInvDir, false);
}
if (!UseNSA) {
// Build a single vector containing all the operands so far prepared.
if (NumVAddrDwords > 8) {
SDValue Undef = DAG.getUNDEF(MVT::i32);
Ops.append(16 - Ops.size(), Undef);
}
assert(Ops.size() == 8 || Ops.size() == 16);
SDValue MergedOps = DAG.getBuildVector(
Ops.size() == 16 ? MVT::v16i32 : MVT::v8i32, DL, Ops);
Ops.clear();
Ops.push_back(MergedOps);
}
Ops.push_back(TDescr);
if (IsA16)
Ops.push_back(DAG.getTargetConstant(1, DL, MVT::i1));
Ops.push_back(M->getChain());
auto *NewNode = DAG.getMachineNode(Opcode, DL, M->getVTList(), Ops);
MachineMemOperand *MemRef = M->getMemOperand();
DAG.setNodeMemRefs(NewNode, {MemRef});
return SDValue(NewNode, 0);
}
case Intrinsic::amdgcn_global_atomic_fadd:
if (!Op.getValue(0).use_empty() && !Subtarget->hasGFX90AInsts()) {
DiagnosticInfoUnsupported
NoFpRet(DAG.getMachineFunction().getFunction(),
"return versions of fp atomics not supported",
DL.getDebugLoc(), DS_Error);
DAG.getContext()->diagnose(NoFpRet);
return SDValue();
}
LLVM_FALLTHROUGH;
case Intrinsic::amdgcn_global_atomic_fmin:
case Intrinsic::amdgcn_global_atomic_fmax:
case Intrinsic::amdgcn_flat_atomic_fadd:
case Intrinsic::amdgcn_flat_atomic_fmin:
case Intrinsic::amdgcn_flat_atomic_fmax: {
MemSDNode *M = cast<MemSDNode>(Op);
SDValue Ops[] = {
M->getOperand(0), // Chain
M->getOperand(2), // Ptr
M->getOperand(3) // Value
};
unsigned Opcode = 0;
switch (IntrID) {
case Intrinsic::amdgcn_global_atomic_fadd:
case Intrinsic::amdgcn_flat_atomic_fadd: {
EVT VT = Op.getOperand(3).getValueType();
return DAG.getAtomic(ISD::ATOMIC_LOAD_FADD, DL, VT,
DAG.getVTList(VT, MVT::Other), Ops,
M->getMemOperand());
}
case Intrinsic::amdgcn_global_atomic_fmin:
case Intrinsic::amdgcn_flat_atomic_fmin: {
Opcode = AMDGPUISD::ATOMIC_LOAD_FMIN;
break;
}
case Intrinsic::amdgcn_global_atomic_fmax:
case Intrinsic::amdgcn_flat_atomic_fmax: {
Opcode = AMDGPUISD::ATOMIC_LOAD_FMAX;
break;
}
default:
llvm_unreachable("unhandled atomic opcode");
}
return DAG.getMemIntrinsicNode(Opcode, SDLoc(Op),
M->getVTList(), Ops, M->getMemoryVT(),
M->getMemOperand());
}
default:
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(IntrID))
return lowerImage(Op, ImageDimIntr, DAG, true);
return SDValue();
}
}
// Call DAG.getMemIntrinsicNode for a load, but first widen a dwordx3 type to
// dwordx4 if on SI.
SDValue SITargetLowering::getMemIntrinsicNode(unsigned Opcode, const SDLoc &DL,
SDVTList VTList,
ArrayRef<SDValue> Ops, EVT MemVT,
MachineMemOperand *MMO,
SelectionDAG &DAG) const {
EVT VT = VTList.VTs[0];
EVT WidenedVT = VT;
EVT WidenedMemVT = MemVT;
if (!Subtarget->hasDwordx3LoadStores() &&
(WidenedVT == MVT::v3i32 || WidenedVT == MVT::v3f32)) {
WidenedVT = EVT::getVectorVT(*DAG.getContext(),
WidenedVT.getVectorElementType(), 4);
WidenedMemVT = EVT::getVectorVT(*DAG.getContext(),
WidenedMemVT.getVectorElementType(), 4);
MMO = DAG.getMachineFunction().getMachineMemOperand(MMO, 0, 16);
}
assert(VTList.NumVTs == 2);
SDVTList WidenedVTList = DAG.getVTList(WidenedVT, VTList.VTs[1]);
auto NewOp = DAG.getMemIntrinsicNode(Opcode, DL, WidenedVTList, Ops,
WidenedMemVT, MMO);
if (WidenedVT != VT) {
auto Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, NewOp,
DAG.getVectorIdxConstant(0, DL));
NewOp = DAG.getMergeValues({ Extract, SDValue(NewOp.getNode(), 1) }, DL);
}
return NewOp;
}
SDValue SITargetLowering::handleD16VData(SDValue VData, SelectionDAG &DAG,
bool ImageStore) const {
EVT StoreVT = VData.getValueType();
// No change for f16 and legal vector D16 types.
if (!StoreVT.isVector())
return VData;
SDLoc DL(VData);
unsigned NumElements = StoreVT.getVectorNumElements();
if (Subtarget->hasUnpackedD16VMem()) {
// We need to unpack the packed data to store.
EVT IntStoreVT = StoreVT.changeTypeToInteger();
SDValue IntVData = DAG.getNode(ISD::BITCAST, DL, IntStoreVT, VData);
EVT EquivStoreVT =
EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElements);
SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, EquivStoreVT, IntVData);
return DAG.UnrollVectorOp(ZExt.getNode());
}
// The sq block of gfx8.1 does not estimate register use correctly for d16
// image store instructions. The data operand is computed as if it were not a
// d16 image instruction.
if (ImageStore && Subtarget->hasImageStoreD16Bug()) {
// Bitcast to i16
EVT IntStoreVT = StoreVT.changeTypeToInteger();
SDValue IntVData = DAG.getNode(ISD::BITCAST, DL, IntStoreVT, VData);
// Decompose into scalars
SmallVector<SDValue, 4> Elts;
DAG.ExtractVectorElements(IntVData, Elts);
// Group pairs of i16 into v2i16 and bitcast to i32
SmallVector<SDValue, 4> PackedElts;
for (unsigned I = 0; I < Elts.size() / 2; I += 1) {
SDValue Pair =
DAG.getBuildVector(MVT::v2i16, DL, {Elts[I * 2], Elts[I * 2 + 1]});
SDValue IntPair = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Pair);
PackedElts.push_back(IntPair);
}
if ((NumElements % 2) == 1) {
// Handle v3i16
unsigned I = Elts.size() / 2;
SDValue Pair = DAG.getBuildVector(MVT::v2i16, DL,
{Elts[I * 2], DAG.getUNDEF(MVT::i16)});
SDValue IntPair = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Pair);
PackedElts.push_back(IntPair);
}
// Pad using UNDEF
PackedElts.resize(Elts.size(), DAG.getUNDEF(MVT::i32));
// Build final vector
EVT VecVT =
EVT::getVectorVT(*DAG.getContext(), MVT::i32, PackedElts.size());
return DAG.getBuildVector(VecVT, DL, PackedElts);
}
if (NumElements == 3) {
EVT IntStoreVT =
EVT::getIntegerVT(*DAG.getContext(), StoreVT.getStoreSizeInBits());
SDValue IntVData = DAG.getNode(ISD::BITCAST, DL, IntStoreVT, VData);
EVT WidenedStoreVT = EVT::getVectorVT(
*DAG.getContext(), StoreVT.getVectorElementType(), NumElements + 1);
EVT WidenedIntVT = EVT::getIntegerVT(*DAG.getContext(),
WidenedStoreVT.getStoreSizeInBits());
SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenedIntVT, IntVData);
return DAG.getNode(ISD::BITCAST, DL, WidenedStoreVT, ZExt);
}
assert(isTypeLegal(StoreVT));
return VData;
}
SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
SDValue Chain = Op.getOperand(0);
unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
MachineFunction &MF = DAG.getMachineFunction();
switch (IntrinsicID) {
case Intrinsic::amdgcn_exp_compr: {
if (!Subtarget->hasCompressedExport()) {
DiagnosticInfoUnsupported BadIntrin(
DAG.getMachineFunction().getFunction(),
"intrinsic not supported on subtarget", DL.getDebugLoc());
DAG.getContext()->diagnose(BadIntrin);
}
SDValue Src0 = Op.getOperand(4);
SDValue Src1 = Op.getOperand(5);
// Hack around illegal type on SI by directly selecting it.
if (isTypeLegal(Src0.getValueType()))
return SDValue();
const ConstantSDNode *Done = cast<ConstantSDNode>(Op.getOperand(6));
SDValue Undef = DAG.getUNDEF(MVT::f32);
const SDValue Ops[] = {
Op.getOperand(2), // tgt
DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src0), // src0
DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src1), // src1
Undef, // src2
Undef, // src3
Op.getOperand(7), // vm
DAG.getTargetConstant(1, DL, MVT::i1), // compr
Op.getOperand(3), // en
Op.getOperand(0) // Chain
};
unsigned Opc = Done->isZero() ? AMDGPU::EXP : AMDGPU::EXP_DONE;
return SDValue(DAG.getMachineNode(Opc, DL, Op->getVTList(), Ops), 0);
}
case Intrinsic::amdgcn_s_barrier: {
if (getTargetMachine().getOptLevel() > CodeGenOpt::None) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
unsigned WGSize = ST.getFlatWorkGroupSizes(MF.getFunction()).second;
if (WGSize <= ST.getWavefrontSize())
return SDValue(DAG.getMachineNode(AMDGPU::WAVE_BARRIER, DL, MVT::Other,
Op.getOperand(0)), 0);
}
return SDValue();
};
case Intrinsic::amdgcn_tbuffer_store: {
SDValue VData = Op.getOperand(2);
bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
if (IsD16)
VData = handleD16VData(VData, DAG);
unsigned Dfmt = cast<ConstantSDNode>(Op.getOperand(8))->getZExtValue();
unsigned Nfmt = cast<ConstantSDNode>(Op.getOperand(9))->getZExtValue();
unsigned Glc = cast<ConstantSDNode>(Op.getOperand(10))->getZExtValue();
unsigned Slc = cast<ConstantSDNode>(Op.getOperand(11))->getZExtValue();
unsigned IdxEn = getIdxEn(Op.getOperand(4));
SDValue Ops[] = {
Chain,
VData, // vdata
Op.getOperand(3), // rsrc
Op.getOperand(4), // vindex
Op.getOperand(5), // voffset
Op.getOperand(6), // soffset
Op.getOperand(7), // offset
DAG.getTargetConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
};
unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
AMDGPUISD::TBUFFER_STORE_FORMAT;
MemSDNode *M = cast<MemSDNode>(Op);
return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
M->getMemoryVT(), M->getMemOperand());
}
case Intrinsic::amdgcn_struct_tbuffer_store: {
SDValue VData = Op.getOperand(2);
bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
if (IsD16)
VData = handleD16VData(VData, DAG);
auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG);
SDValue Ops[] = {
Chain,
VData, // vdata
Op.getOperand(3), // rsrc
Op.getOperand(4), // vindex
Offsets.first, // voffset
Op.getOperand(6), // soffset
Offsets.second, // offset
Op.getOperand(7), // format
Op.getOperand(8), // cachepolicy, swizzled buffer
DAG.getTargetConstant(1, DL, MVT::i1), // idxen
};
unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
AMDGPUISD::TBUFFER_STORE_FORMAT;
MemSDNode *M = cast<MemSDNode>(Op);
return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
M->getMemoryVT(), M->getMemOperand());
}
case Intrinsic::amdgcn_raw_tbuffer_store: {
SDValue VData = Op.getOperand(2);
bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
if (IsD16)
VData = handleD16VData(VData, DAG);
auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
SDValue Ops[] = {
Chain,
VData, // vdata
Op.getOperand(3), // rsrc
DAG.getConstant(0, DL, MVT::i32), // vindex
Offsets.first, // voffset
Op.getOperand(5), // soffset
Offsets.second, // offset
Op.getOperand(6), // format
Op.getOperand(7), // cachepolicy, swizzled buffer
DAG.getTargetConstant(0, DL, MVT::i1), // idxen
};
unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
AMDGPUISD::TBUFFER_STORE_FORMAT;
MemSDNode *M = cast<MemSDNode>(Op);
return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
M->getMemoryVT(), M->getMemOperand());
}
case Intrinsic::amdgcn_buffer_store:
case Intrinsic::amdgcn_buffer_store_format: {
SDValue VData = Op.getOperand(2);
bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
if (IsD16)
VData = handleD16VData(VData, DAG);
unsigned Glc = cast<ConstantSDNode>(Op.getOperand(6))->getZExtValue();
unsigned Slc = cast<ConstantSDNode>(Op.getOperand(7))->getZExtValue();
unsigned IdxEn = getIdxEn(Op.getOperand(4));
SDValue Ops[] = {
Chain,
VData,
Op.getOperand(3), // rsrc
Op.getOperand(4), // vindex
SDValue(), // voffset -- will be set by setBufferOffsets
SDValue(), // soffset -- will be set by setBufferOffsets
SDValue(), // offset -- will be set by setBufferOffsets
DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
};
setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
unsigned Opc = IntrinsicID == Intrinsic::amdgcn_buffer_store ?
AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT;
Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc;
MemSDNode *M = cast<MemSDNode>(Op);
updateBufferMMO(M->getMemOperand(), Ops[4], Ops[5], Ops[6], Ops[3]);
// Handle BUFFER_STORE_BYTE/SHORT overloaded intrinsics
EVT VDataType = VData.getValueType().getScalarType();
if (VDataType == MVT::i8 || VDataType == MVT::i16)
return handleByteShortBufferStores(DAG, VDataType, DL, Ops, M);
return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
M->getMemoryVT(), M->getMemOperand());
}
case Intrinsic::amdgcn_raw_buffer_store:
case Intrinsic::amdgcn_raw_buffer_store_format: {
const bool IsFormat =
IntrinsicID == Intrinsic::amdgcn_raw_buffer_store_format;
SDValue VData = Op.getOperand(2);
EVT VDataVT = VData.getValueType();
EVT EltType = VDataVT.getScalarType();
bool IsD16 = IsFormat && (EltType.getSizeInBits() == 16);
if (IsD16) {
VData = handleD16VData(VData, DAG);
VDataVT = VData.getValueType();
}
if (!isTypeLegal(VDataVT)) {
VData =
DAG.getNode(ISD::BITCAST, DL,
getEquivalentMemType(*DAG.getContext(), VDataVT), VData);
}
auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
SDValue Ops[] = {
Chain,
VData,
Op.getOperand(3), // rsrc
DAG.getConstant(0, DL, MVT::i32), // vindex
Offsets.first, // voffset
Op.getOperand(5), // soffset
Offsets.second, // offset
Op.getOperand(6), // cachepolicy, swizzled buffer
DAG.getTargetConstant(0, DL, MVT::i1), // idxen
};
unsigned Opc =
IsFormat ? AMDGPUISD::BUFFER_STORE_FORMAT : AMDGPUISD::BUFFER_STORE;
Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc;
MemSDNode *M = cast<MemSDNode>(Op);
updateBufferMMO(M->getMemOperand(), Ops[4], Ops[5], Ops[6]);
// Handle BUFFER_STORE_BYTE/SHORT overloaded intrinsics
if (!IsD16 && !VDataVT.isVector() && EltType.getSizeInBits() < 32)
return handleByteShortBufferStores(DAG, VDataVT, DL, Ops, M);
return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
M->getMemoryVT(), M->getMemOperand());
}
case Intrinsic::amdgcn_struct_buffer_store:
case Intrinsic::amdgcn_struct_buffer_store_format: {
const bool IsFormat =
IntrinsicID == Intrinsic::amdgcn_struct_buffer_store_format;
SDValue VData = Op.getOperand(2);
EVT VDataVT = VData.getValueType();
EVT EltType = VDataVT.getScalarType();
bool IsD16 = IsFormat && (EltType.getSizeInBits() == 16);
if (IsD16) {
VData = handleD16VData(VData, DAG);
VDataVT = VData.getValueType();
}
if (!isTypeLegal(VDataVT)) {
VData =
DAG.getNode(ISD::BITCAST, DL,
getEquivalentMemType(*DAG.getContext(), VDataVT), VData);
}
auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG);
SDValue Ops[] = {
Chain,
VData,
Op.getOperand(3), // rsrc
Op.getOperand(4), // vindex
Offsets.first, // voffset
Op.getOperand(6), // soffset
Offsets.second, // offset
Op.getOperand(7), // cachepolicy, swizzled buffer
DAG.getTargetConstant(1, DL, MVT::i1), // idxen
};
unsigned Opc = IntrinsicID == Intrinsic::amdgcn_struct_buffer_store ?
AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT;
Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc;
MemSDNode *M = cast<MemSDNode>(Op);
updateBufferMMO(M->getMemOperand(), Ops[4], Ops[5], Ops[6], Ops[3]);
// Handle BUFFER_STORE_BYTE/SHORT overloaded intrinsics
EVT VDataType = VData.getValueType().getScalarType();
if (!IsD16 && !VDataVT.isVector() && EltType.getSizeInBits() < 32)
return handleByteShortBufferStores(DAG, VDataType, DL, Ops, M);
return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
M->getMemoryVT(), M->getMemOperand());
}
case Intrinsic::amdgcn_raw_buffer_load_lds:
case Intrinsic::amdgcn_struct_buffer_load_lds: {
unsigned Opc;
bool HasVIndex = IntrinsicID == Intrinsic::amdgcn_struct_buffer_load_lds;
unsigned OpOffset = HasVIndex ? 1 : 0;
SDValue VOffset = Op.getOperand(5 + OpOffset);
auto CVOffset = dyn_cast<ConstantSDNode>(VOffset);
bool HasVOffset = !CVOffset || !CVOffset->isZero();
unsigned Size = Op->getConstantOperandVal(4);
switch (Size) {
default:
return SDValue();
case 1:
Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
: AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
: HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
: AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
break;
case 2:
Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
: AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
: HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
: AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
break;
case 4:
Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
: AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
: HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
: AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
break;
}
SDValue M0Val = copyToM0(DAG, Chain, DL, Op.getOperand(3));
SmallVector<SDValue, 8> Ops;
if (HasVIndex && HasVOffset)
Ops.push_back(DAG.getBuildVector(MVT::v2i32, DL,
{ Op.getOperand(5), // VIndex
VOffset }));
else if (HasVIndex)
Ops.push_back(Op.getOperand(5));
else if (HasVOffset)
Ops.push_back(VOffset);
Ops.push_back(Op.getOperand(2)); // rsrc
Ops.push_back(Op.getOperand(6 + OpOffset)); // soffset
Ops.push_back(Op.getOperand(7 + OpOffset)); // imm offset
unsigned Aux = Op.getConstantOperandVal(8 + OpOffset);
Ops.push_back(
DAG.getTargetConstant(Aux & AMDGPU::CPol::ALL, DL, MVT::i8)); // cpol
Ops.push_back(
DAG.getTargetConstant((Aux >> 3) & 1, DL, MVT::i8)); // swz
Ops.push_back(M0Val.getValue(0)); // Chain
Ops.push_back(M0Val.getValue(1)); // Glue
auto *M = cast<MemSDNode>(Op);
MachineMemOperand *LoadMMO = M->getMemOperand();
MachinePointerInfo LoadPtrI = LoadMMO->getPointerInfo();
LoadPtrI.Offset = Op->getConstantOperandVal(7 + OpOffset);
MachinePointerInfo StorePtrI = LoadPtrI;
StorePtrI.V = nullptr;
StorePtrI.AddrSpace = AMDGPUAS::LOCAL_ADDRESS;
auto F = LoadMMO->getFlags() &
~(MachineMemOperand::MOStore | MachineMemOperand::MOLoad);
LoadMMO = MF.getMachineMemOperand(LoadPtrI, F | MachineMemOperand::MOLoad,
Size, LoadMMO->getBaseAlign());
MachineMemOperand *StoreMMO =
MF.getMachineMemOperand(StorePtrI, F | MachineMemOperand::MOStore,
sizeof(int32_t), LoadMMO->getBaseAlign());
auto Load = DAG.getMachineNode(Opc, DL, M->getVTList(), Ops);
DAG.setNodeMemRefs(Load, {LoadMMO, StoreMMO});
return SDValue(Load, 0);
}
case Intrinsic::amdgcn_global_load_lds: {
unsigned Opc;
unsigned Size = Op->getConstantOperandVal(4);
switch (Size) {
default:
return SDValue();
case 1:
Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
break;
case 2:
Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
break;
case 4:
Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
break;
}
auto *M = cast<MemSDNode>(Op);
SDValue M0Val = copyToM0(DAG, Chain, DL, Op.getOperand(3));
SmallVector<SDValue, 6> Ops;
SDValue Addr = Op.getOperand(2); // Global ptr
SDValue VOffset;
// Try to split SAddr and VOffset. Global and LDS pointers share the same
// immediate offset, so we cannot use a regular SelectGlobalSAddr().
if (Addr->isDivergent() && Addr.getOpcode() == ISD::ADD) {
SDValue LHS = Addr.getOperand(0);
SDValue RHS = Addr.getOperand(1);
if (LHS->isDivergent())
std::swap(LHS, RHS);
if (!LHS->isDivergent() && RHS.getOpcode() == ISD::ZERO_EXTEND &&
RHS.getOperand(0).getValueType() == MVT::i32) {
// add (i64 sgpr), (zero_extend (i32 vgpr))
Addr = LHS;
VOffset = RHS.getOperand(0);
}
}
Ops.push_back(Addr);
if (!Addr->isDivergent()) {
Opc = AMDGPU::getGlobalSaddrOp(Opc);
if (!VOffset)
VOffset = SDValue(
DAG.getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32,
DAG.getTargetConstant(0, DL, MVT::i32)), 0);
Ops.push_back(VOffset);
}
Ops.push_back(Op.getOperand(5)); // Offset
Ops.push_back(Op.getOperand(6)); // CPol
Ops.push_back(M0Val.getValue(0)); // Chain
Ops.push_back(M0Val.getValue(1)); // Glue
MachineMemOperand *LoadMMO = M->getMemOperand();
MachinePointerInfo LoadPtrI = LoadMMO->getPointerInfo();
LoadPtrI.Offset = Op->getConstantOperandVal(5);
MachinePointerInfo StorePtrI = LoadPtrI;
LoadPtrI.AddrSpace = AMDGPUAS::GLOBAL_ADDRESS;
StorePtrI.AddrSpace = AMDGPUAS::LOCAL_ADDRESS;
auto F = LoadMMO->getFlags() &
~(MachineMemOperand::MOStore | MachineMemOperand::MOLoad);
LoadMMO = MF.getMachineMemOperand(LoadPtrI, F | MachineMemOperand::MOLoad,
Size, LoadMMO->getBaseAlign());
MachineMemOperand *StoreMMO =
MF.getMachineMemOperand(StorePtrI, F | MachineMemOperand::MOStore,
sizeof(int32_t), Align(4));
auto Load = DAG.getMachineNode(Opc, DL, Op->getVTList(), Ops);
DAG.setNodeMemRefs(Load, {LoadMMO, StoreMMO});
return SDValue(Load, 0);
}
case Intrinsic::amdgcn_end_cf:
return SDValue(DAG.getMachineNode(AMDGPU::SI_END_CF, DL, MVT::Other,
Op->getOperand(2), Chain), 0);
default: {
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
return lowerImage(Op, ImageDimIntr, DAG, true);
return Op;
}
}
}
// The raw.(t)buffer and struct.(t)buffer intrinsics have two offset args:
// offset (the offset that is included in bounds checking and swizzling, to be
// split between the instruction's voffset and immoffset fields) and soffset
// (the offset that is excluded from bounds checking and swizzling, to go in
// the instruction's soffset field). This function takes the first kind of
// offset and figures out how to split it between voffset and immoffset.
std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
SDValue Offset, SelectionDAG &DAG) const {
SDLoc DL(Offset);
const unsigned MaxImm = 4095;
SDValue N0 = Offset;
ConstantSDNode *C1 = nullptr;
if ((C1 = dyn_cast<ConstantSDNode>(N0)))
N0 = SDValue();
else if (DAG.isBaseWithConstantOffset(N0)) {
C1 = cast<ConstantSDNode>(N0.getOperand(1));
N0 = N0.getOperand(0);
}
if (C1) {
unsigned ImmOffset = C1->getZExtValue();
// If the immediate value is too big for the immoffset field, put the value
// and -4096 into the immoffset field so that the value that is copied/added
// for the voffset field is a multiple of 4096, and it stands more chance
// of being CSEd with the copy/add for another similar load/store.
// However, do not do that rounding down to a multiple of 4096 if that is a
// negative number, as it appears to be illegal to have a negative offset
// in the vgpr, even if adding the immediate offset makes it positive.
unsigned Overflow = ImmOffset & ~MaxImm;
ImmOffset -= Overflow;
if ((int32_t)Overflow < 0) {
Overflow += ImmOffset;
ImmOffset = 0;
}
C1 = cast<ConstantSDNode>(DAG.getTargetConstant(ImmOffset, DL, MVT::i32));
if (Overflow) {
auto OverflowVal = DAG.getConstant(Overflow, DL, MVT::i32);
if (!N0)
N0 = OverflowVal;
else {
SDValue Ops[] = { N0, OverflowVal };
N0 = DAG.getNode(ISD::ADD, DL, MVT::i32, Ops);
}
}
}
if (!N0)
N0 = DAG.getConstant(0, DL, MVT::i32);
if (!C1)
C1 = cast<ConstantSDNode>(DAG.getTargetConstant(0, DL, MVT::i32));
return {N0, SDValue(C1, 0)};
}
// Analyze a combined offset from an amdgcn_buffer_ intrinsic and store the
// three offsets (voffset, soffset and instoffset) into the SDValue[3] array
// pointed to by Offsets.
void SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
SelectionDAG &DAG, SDValue *Offsets,
Align Alignment) const {
SDLoc DL(CombinedOffset);
if (auto C = dyn_cast<ConstantSDNode>(CombinedOffset)) {
uint32_t Imm = C->getZExtValue();
uint32_t SOffset, ImmOffset;
if (AMDGPU::splitMUBUFOffset(Imm, SOffset, ImmOffset, Subtarget,
Alignment)) {
Offsets[0] = DAG.getConstant(0, DL, MVT::i32);
Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32);
Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32);
return;
}
}
if (DAG.isBaseWithConstantOffset(CombinedOffset)) {
SDValue N0 = CombinedOffset.getOperand(0);
SDValue N1 = CombinedOffset.getOperand(1);
uint32_t SOffset, ImmOffset;
int Offset = cast<ConstantSDNode>(N1)->getSExtValue();
if (Offset >= 0 && AMDGPU::splitMUBUFOffset(Offset, SOffset, ImmOffset,
Subtarget, Alignment)) {
Offsets[0] = N0;
Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32);
Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32);
return;
}
}
Offsets[0] = CombinedOffset;
Offsets[1] = DAG.getConstant(0, DL, MVT::i32);
Offsets[2] = DAG.getTargetConstant(0, DL, MVT::i32);
}
// Handle 8 bit and 16 bit buffer loads
SDValue SITargetLowering::handleByteShortBufferLoads(SelectionDAG &DAG,
EVT LoadVT, SDLoc DL,
ArrayRef<SDValue> Ops,
MemSDNode *M) const {
EVT IntVT = LoadVT.changeTypeToInteger();
unsigned Opc = (LoadVT.getScalarType() == MVT::i8) ?
AMDGPUISD::BUFFER_LOAD_UBYTE : AMDGPUISD::BUFFER_LOAD_USHORT;
SDVTList ResList = DAG.getVTList(MVT::i32, MVT::Other);
SDValue BufferLoad = DAG.getMemIntrinsicNode(Opc, DL, ResList,
Ops, IntVT,
M->getMemOperand());
SDValue LoadVal = DAG.getNode(ISD::TRUNCATE, DL, IntVT, BufferLoad);
LoadVal = DAG.getNode(ISD::BITCAST, DL, LoadVT, LoadVal);
return DAG.getMergeValues({LoadVal, BufferLoad.getValue(1)}, DL);
}
// Handle 8 bit and 16 bit buffer stores
SDValue SITargetLowering::handleByteShortBufferStores(SelectionDAG &DAG,
EVT VDataType, SDLoc DL,
SDValue Ops[],
MemSDNode *M) const {
if (VDataType == MVT::f16)
Ops[1] = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Ops[1]);
SDValue BufferStoreExt = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Ops[1]);
Ops[1] = BufferStoreExt;
unsigned Opc = (VDataType == MVT::i8) ? AMDGPUISD::BUFFER_STORE_BYTE :
AMDGPUISD::BUFFER_STORE_SHORT;
ArrayRef<SDValue> OpsRef = makeArrayRef(&Ops[0], 9);
return DAG.getMemIntrinsicNode(Opc, DL, M->getVTList(), OpsRef, VDataType,
M->getMemOperand());
}
static SDValue getLoadExtOrTrunc(SelectionDAG &DAG,
ISD::LoadExtType ExtType, SDValue Op,
const SDLoc &SL, EVT VT) {
if (VT.bitsLT(Op.getValueType()))
return DAG.getNode(ISD::TRUNCATE, SL, VT, Op);
switch (ExtType) {
case ISD::SEXTLOAD:
return DAG.getNode(ISD::SIGN_EXTEND, SL, VT, Op);
case ISD::ZEXTLOAD:
return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, Op);
case ISD::EXTLOAD:
return DAG.getNode(ISD::ANY_EXTEND, SL, VT, Op);
case ISD::NON_EXTLOAD:
return Op;
}
llvm_unreachable("invalid ext type");
}
SDValue SITargetLowering::widenLoad(LoadSDNode *Ld, DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
if (Ld->getAlign() < Align(4) || Ld->isDivergent())
return SDValue();
// FIXME: Constant loads should all be marked invariant.
unsigned AS = Ld->getAddressSpace();
if (AS != AMDGPUAS::CONSTANT_ADDRESS &&
AS != AMDGPUAS::CONSTANT_ADDRESS_32BIT &&
(AS != AMDGPUAS::GLOBAL_ADDRESS || !Ld->isInvariant()))
return SDValue();
// Don't do this early, since it may interfere with adjacent load merging for
// illegal types. We can avoid losing alignment information for exotic types
// pre-legalize.
EVT MemVT = Ld->getMemoryVT();
if ((MemVT.isSimple() && !DCI.isAfterLegalizeDAG()) ||
MemVT.getSizeInBits() >= 32)
return SDValue();
SDLoc SL(Ld);
assert((!MemVT.isVector() || Ld->getExtensionType() == ISD::NON_EXTLOAD) &&
"unexpected vector extload");
// TODO: Drop only high part of range.
SDValue Ptr = Ld->getBasePtr();
SDValue NewLoad = DAG.getLoad(
ISD::UNINDEXED, ISD::NON_EXTLOAD, MVT::i32, SL, Ld->getChain(), Ptr,
Ld->getOffset(), Ld->getPointerInfo(), MVT::i32, Ld->getAlign(),
Ld->getMemOperand()->getFlags(), Ld->getAAInfo(),
nullptr); // Drop ranges
EVT TruncVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
if (MemVT.isFloatingPoint()) {
assert(Ld->getExtensionType() == ISD::NON_EXTLOAD &&
"unexpected fp extload");
TruncVT = MemVT.changeTypeToInteger();
}
SDValue Cvt = NewLoad;
if (Ld->getExtensionType() == ISD::SEXTLOAD) {
Cvt = DAG.getNode(ISD::SIGN_EXTEND_INREG, SL, MVT::i32, NewLoad,
DAG.getValueType(TruncVT));
} else if (Ld->getExtensionType() == ISD::ZEXTLOAD ||
Ld->getExtensionType() == ISD::NON_EXTLOAD) {
Cvt = DAG.getZeroExtendInReg(NewLoad, SL, TruncVT);
} else {
assert(Ld->getExtensionType() == ISD::EXTLOAD);
}
EVT VT = Ld->getValueType(0);
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
DCI.AddToWorklist(Cvt.getNode());
// We may need to handle exotic cases, such as i16->i64 extloads, so insert
// the appropriate extension from the 32-bit load.
Cvt = getLoadExtOrTrunc(DAG, Ld->getExtensionType(), Cvt, SL, IntVT);
DCI.AddToWorklist(Cvt.getNode());
// Handle conversion back to floating point if necessary.
Cvt = DAG.getNode(ISD::BITCAST, SL, VT, Cvt);
return DAG.getMergeValues({ Cvt, NewLoad.getValue(1) }, SL);
}
SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
LoadSDNode *Load = cast<LoadSDNode>(Op);
ISD::LoadExtType ExtType = Load->getExtensionType();
EVT MemVT = Load->getMemoryVT();
if (ExtType == ISD::NON_EXTLOAD && MemVT.getSizeInBits() < 32) {
if (MemVT == MVT::i16 && isTypeLegal(MVT::i16))
return SDValue();
// FIXME: Copied from PPC
// First, load into 32 bits, then truncate to 1 bit.
SDValue Chain = Load->getChain();
SDValue BasePtr = Load->getBasePtr();
MachineMemOperand *MMO = Load->getMemOperand();
EVT RealMemVT = (MemVT == MVT::i1) ? MVT::i8 : MVT::i16;
SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain,
BasePtr, RealMemVT, MMO);
if (!MemVT.isVector()) {
SDValue Ops[] = {
DAG.getNode(ISD::TRUNCATE, DL, MemVT, NewLD),
NewLD.getValue(1)
};
return DAG.getMergeValues(Ops, DL);
}
SmallVector<SDValue, 3> Elts;
for (unsigned I = 0, N = MemVT.getVectorNumElements(); I != N; ++I) {
SDValue Elt = DAG.getNode(ISD::SRL, DL, MVT::i32, NewLD,
DAG.getConstant(I, DL, MVT::i32));
Elts.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Elt));
}
SDValue Ops[] = {
DAG.getBuildVector(MemVT, DL, Elts),
NewLD.getValue(1)
};
return DAG.getMergeValues(Ops, DL);
}
if (!MemVT.isVector())
return SDValue();
assert(Op.getValueType().getVectorElementType() == MVT::i32 &&
"Custom lowering for non-i32 vectors hasn't been implemented.");
Align Alignment = Load->getAlign();
unsigned AS = Load->getAddressSpace();
if (Subtarget->hasLDSMisalignedBug() && AS == AMDGPUAS::FLAT_ADDRESS &&
Alignment.value() < MemVT.getStoreSize() && MemVT.getSizeInBits() > 32) {
return SplitVectorLoad(Op, DAG);
}
MachineFunction &MF = DAG.getMachineFunction();
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
// If there is a possibility that flat instruction access scratch memory
// then we need to use the same legalization rules we use for private.
if (AS == AMDGPUAS::FLAT_ADDRESS &&
!Subtarget->hasMultiDwordFlatScratchAddressing())
AS = MFI->hasFlatScratchInit() ?
AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;
unsigned NumElements = MemVT.getVectorNumElements();
if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
if (!Op->isDivergent() && Alignment >= Align(4) && NumElements < 32) {
if (MemVT.isPow2VectorType())
return SDValue();
return WidenOrSplitVectorLoad(Op, DAG);
}
// Non-uniform loads will be selected to MUBUF instructions, so they
// have the same legalization requirements as global and private
// loads.
//
}
if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
AS == AMDGPUAS::GLOBAL_ADDRESS) {
if (Subtarget->getScalarizeGlobalBehavior() && !Op->isDivergent() &&
Load->isSimple() && isMemOpHasNoClobberedMemOperand(Load) &&
Alignment >= Align(4) && NumElements < 32) {
if (MemVT.isPow2VectorType())
return SDValue();
return WidenOrSplitVectorLoad(Op, DAG);
}
// Non-uniform loads will be selected to MUBUF instructions, so they
// have the same legalization requirements as global and private
// loads.
//
}
if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
AS == AMDGPUAS::GLOBAL_ADDRESS ||
AS == AMDGPUAS::FLAT_ADDRESS) {
if (NumElements > 4)
return SplitVectorLoad(Op, DAG);
// v3 loads not supported on SI.
if (NumElements == 3 && !Subtarget->hasDwordx3LoadStores())
return WidenOrSplitVectorLoad(Op, DAG);
// v3 and v4 loads are supported for private and global memory.
return SDValue();
}
if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
// Depending on the setting of the private_element_size field in the
// resource descriptor, we can only make private accesses up to a certain
// size.
switch (Subtarget->getMaxPrivateElementSize()) {
case 4: {
SDValue Ops[2];
std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(Load, DAG);
return DAG.getMergeValues(Ops, DL);
}
case 8:
if (NumElements > 2)
return SplitVectorLoad(Op, DAG);
return SDValue();
case 16:
// Same as global/flat
if (NumElements > 4)
return SplitVectorLoad(Op, DAG);
// v3 loads not supported on SI.
if (NumElements == 3 && !Subtarget->hasDwordx3LoadStores())
return WidenOrSplitVectorLoad(Op, DAG);
return SDValue();
default:
llvm_unreachable("unsupported private_element_size");
}
} else if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
bool Fast = false;
auto Flags = Load->getMemOperand()->getFlags();
if (allowsMisalignedMemoryAccessesImpl(MemVT.getSizeInBits(), AS,
Load->getAlign(), Flags, &Fast) &&
Fast)
return SDValue();
if (MemVT.isVector())
return SplitVectorLoad(Op, DAG);
}
if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
MemVT, *Load->getMemOperand())) {
SDValue Ops[2];
std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG);
return DAG.getMergeValues(Ops, DL);
}
return SDValue();
}
SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
if (VT.getSizeInBits() == 128 || VT.getSizeInBits() == 256)
return splitTernaryVectorOp(Op, DAG);
assert(VT.getSizeInBits() == 64);
SDLoc DL(Op);
SDValue Cond = Op.getOperand(0);
SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
SDValue One = DAG.getConstant(1, DL, MVT::i32);
SDValue LHS = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Op.getOperand(1));
SDValue RHS = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Op.getOperand(2));
SDValue Lo0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, LHS, Zero);
SDValue Lo1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, RHS, Zero);
SDValue Lo = DAG.getSelect(DL, MVT::i32, Cond, Lo0, Lo1);
SDValue Hi0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, LHS, One);
SDValue Hi1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, RHS, One);
SDValue Hi = DAG.getSelect(DL, MVT::i32, Cond, Hi0, Hi1);
SDValue Res = DAG.getBuildVector(MVT::v2i32, DL, {Lo, Hi});
return DAG.getNode(ISD::BITCAST, DL, VT, Res);
}
// Catch division cases where we can use shortcuts with rcp and rsq
// instructions.
SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op,
SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
EVT VT = Op.getValueType();
const SDNodeFlags Flags = Op->getFlags();
bool AllowInaccurateRcp = Flags.hasApproximateFuncs();
// Without !fpmath accuracy information, we can't do more because we don't
// know exactly whether rcp is accurate enough to meet !fpmath requirement.
if (!AllowInaccurateRcp)
return SDValue();
if (const ConstantFPSDNode *CLHS = dyn_cast<ConstantFPSDNode>(LHS)) {
if (CLHS->isExactlyValue(1.0)) {
// v_rcp_f32 and v_rsq_f32 do not support denormals, and according to
// the CI documentation has a worst case error of 1 ulp.
// OpenCL requires <= 2.5 ulp for 1.0 / x, so it should always be OK to
// use it as long as we aren't trying to use denormals.
//
// v_rcp_f16 and v_rsq_f16 DO support denormals.
// 1.0 / sqrt(x) -> rsq(x)
// XXX - Is UnsafeFPMath sufficient to do this for f64? The maximum ULP
// error seems really high at 2^29 ULP.
if (RHS.getOpcode() == ISD::FSQRT)
return DAG.getNode(AMDGPUISD::RSQ, SL, VT, RHS.getOperand(0));
// 1.0 / x -> rcp(x)
return DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS);
}
// Same as for 1.0, but expand the sign out of the constant.
if (CLHS->isExactlyValue(-1.0)) {
// -1.0 / x -> rcp (fneg x)
SDValue FNegRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
return DAG.getNode(AMDGPUISD::RCP, SL, VT, FNegRHS);
}
}
// Turn into multiply by the reciprocal.
// x / y -> x * (1.0 / y)
SDValue Recip = DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS);
return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip, Flags);
}
SDValue SITargetLowering::lowerFastUnsafeFDIV64(SDValue Op,
SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue X = Op.getOperand(0);
SDValue Y = Op.getOperand(1);
EVT VT = Op.getValueType();
const SDNodeFlags Flags = Op->getFlags();
bool AllowInaccurateDiv = Flags.hasApproximateFuncs() ||
DAG.getTarget().Options.UnsafeFPMath;
if (!AllowInaccurateDiv)
return SDValue();
SDValue NegY = DAG.getNode(ISD::FNEG, SL, VT, Y);
SDValue One = DAG.getConstantFP(1.0, SL, VT);
SDValue R = DAG.getNode(AMDGPUISD::RCP, SL, VT, Y);
SDValue Tmp0 = DAG.getNode(ISD::FMA, SL, VT, NegY, R, One);
R = DAG.getNode(ISD::FMA, SL, VT, Tmp0, R, R);
SDValue Tmp1 = DAG.getNode(ISD::FMA, SL, VT, NegY, R, One);
R = DAG.getNode(ISD::FMA, SL, VT, Tmp1, R, R);
SDValue Ret = DAG.getNode(ISD::FMUL, SL, VT, X, R);
SDValue Tmp2 = DAG.getNode(ISD::FMA, SL, VT, NegY, Ret, X);
return DAG.getNode(ISD::FMA, SL, VT, Tmp2, R, Ret);
}
static SDValue getFPBinOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL,
EVT VT, SDValue A, SDValue B, SDValue GlueChain,
SDNodeFlags Flags) {
if (GlueChain->getNumValues() <= 1) {
return DAG.getNode(Opcode, SL, VT, A, B, Flags);
}
assert(GlueChain->getNumValues() == 3);
SDVTList VTList = DAG.getVTList(VT, MVT::Other, MVT::Glue);
switch (Opcode) {
default: llvm_unreachable("no chain equivalent for opcode");
case ISD::FMUL:
Opcode = AMDGPUISD::FMUL_W_CHAIN;
break;
}
return DAG.getNode(Opcode, SL, VTList,
{GlueChain.getValue(1), A, B, GlueChain.getValue(2)},
Flags);
}
static SDValue getFPTernOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL,
EVT VT, SDValue A, SDValue B, SDValue C,
SDValue GlueChain, SDNodeFlags Flags) {
if (GlueChain->getNumValues() <= 1) {
return DAG.getNode(Opcode, SL, VT, {A, B, C}, Flags);
}
assert(GlueChain->getNumValues() == 3);
SDVTList VTList = DAG.getVTList(VT, MVT::Other, MVT::Glue);
switch (Opcode) {
default: llvm_unreachable("no chain equivalent for opcode");
case ISD::FMA:
Opcode = AMDGPUISD::FMA_W_CHAIN;
break;
}
return DAG.getNode(Opcode, SL, VTList,
{GlueChain.getValue(1), A, B, C, GlueChain.getValue(2)},
Flags);
}
SDValue SITargetLowering::LowerFDIV16(SDValue Op, SelectionDAG &DAG) const {
if (SDValue FastLowered = lowerFastUnsafeFDIV(Op, DAG))
return FastLowered;
SDLoc SL(Op);
SDValue Src0 = Op.getOperand(0);
SDValue Src1 = Op.getOperand(1);
SDValue CvtSrc0 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src0);
SDValue CvtSrc1 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src1);
SDValue RcpSrc1 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, CvtSrc1);
SDValue Quot = DAG.getNode(ISD::FMUL, SL, MVT::f32, CvtSrc0, RcpSrc1);
SDValue FPRoundFlag = DAG.getTargetConstant(0, SL, MVT::i32);
SDValue BestQuot = DAG.getNode(ISD::FP_ROUND, SL, MVT::f16, Quot, FPRoundFlag);
return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f16, BestQuot, Src1, Src0);
}
// Faster 2.5 ULP division that does not support denormals.
SDValue SITargetLowering::lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue LHS = Op.getOperand(1);
SDValue RHS = Op.getOperand(2);
SDValue r1 = DAG.getNode(ISD::FABS, SL, MVT::f32, RHS);
const APFloat K0Val(BitsToFloat(0x6f800000));
const SDValue K0 = DAG.getConstantFP(K0Val, SL, MVT::f32);
const APFloat K1Val(BitsToFloat(0x2f800000));
const SDValue K1 = DAG.getConstantFP(K1Val, SL, MVT::f32);
const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);
EVT SetCCVT =
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f32);
SDValue r2 = DAG.getSetCC(SL, SetCCVT, r1, K0, ISD::SETOGT);
SDValue r3 = DAG.getNode(ISD::SELECT, SL, MVT::f32, r2, K1, One);
// TODO: Should this propagate fast-math-flags?
r1 = DAG.getNode(ISD::FMUL, SL, MVT::f32, RHS, r3);
// rcp does not support denormals.
SDValue r0 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, r1);
SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, LHS, r0);
return DAG.getNode(ISD::FMUL, SL, MVT::f32, r3, Mul);
}
// Returns immediate value for setting the F32 denorm mode when using the
// S_DENORM_MODE instruction.
static SDValue getSPDenormModeValue(int SPDenormMode, SelectionDAG &DAG,
const SDLoc &SL, const GCNSubtarget *ST) {
assert(ST->hasDenormModeInst() && "Requires S_DENORM_MODE");
int DPDenormModeDefault = hasFP64FP16Denormals(DAG.getMachineFunction())
? FP_DENORM_FLUSH_NONE
: FP_DENORM_FLUSH_IN_FLUSH_OUT;
int Mode = SPDenormMode | (DPDenormModeDefault << 2);
return DAG.getTargetConstant(Mode, SL, MVT::i32);
}
SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
if (SDValue FastLowered = lowerFastUnsafeFDIV(Op, DAG))
return FastLowered;
// The selection matcher assumes anything with a chain selecting to a
// mayRaiseFPException machine instruction. Since we're introducing a chain
// here, we need to explicitly report nofpexcept for the regular fdiv
// lowering.
SDNodeFlags Flags = Op->getFlags();
Flags.setNoFPExcept(true);
SDLoc SL(Op);
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);
SDVTList ScaleVT = DAG.getVTList(MVT::f32, MVT::i1);
SDValue DenominatorScaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT,
{RHS, RHS, LHS}, Flags);
SDValue NumeratorScaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT,
{LHS, RHS, LHS}, Flags);
// Denominator is scaled to not be denormal, so using rcp is ok.
SDValue ApproxRcp = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32,
DenominatorScaled, Flags);
SDValue NegDivScale0 = DAG.getNode(ISD::FNEG, SL, MVT::f32,
DenominatorScaled, Flags);
const unsigned Denorm32Reg = AMDGPU::Hwreg::ID_MODE |
(4 << AMDGPU::Hwreg::OFFSET_SHIFT_) |
(1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_);
const SDValue BitField = DAG.getTargetConstant(Denorm32Reg, SL, MVT::i32);
const bool HasFP32Denormals = hasFP32Denormals(DAG.getMachineFunction());
if (!HasFP32Denormals) {
// Note we can't use the STRICT_FMA/STRICT_FMUL for the non-strict FDIV
// lowering. The chain dependence is insufficient, and we need glue. We do
// not need the glue variants in a strictfp function.
SDVTList BindParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDNode *EnableDenorm;
if (Subtarget->hasDenormModeInst()) {
const SDValue EnableDenormValue =
getSPDenormModeValue(FP_DENORM_FLUSH_NONE, DAG, SL, Subtarget);
EnableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, BindParamVTs,
DAG.getEntryNode(), EnableDenormValue).getNode();
} else {
const SDValue EnableDenormValue = DAG.getConstant(FP_DENORM_FLUSH_NONE,
SL, MVT::i32);
EnableDenorm =
DAG.getMachineNode(AMDGPU::S_SETREG_B32, SL, BindParamVTs,
{EnableDenormValue, BitField, DAG.getEntryNode()});
}
SDValue Ops[3] = {
NegDivScale0,
SDValue(EnableDenorm, 0),
SDValue(EnableDenorm, 1)
};
NegDivScale0 = DAG.getMergeValues(Ops, SL);
}
SDValue Fma0 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0,
ApproxRcp, One, NegDivScale0, Flags);
SDValue Fma1 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, Fma0, ApproxRcp,
ApproxRcp, Fma0, Flags);
SDValue Mul = getFPBinOp(DAG, ISD::FMUL, SL, MVT::f32, NumeratorScaled,
Fma1, Fma1, Flags);
SDValue Fma2 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Mul,
NumeratorScaled, Mul, Flags);
SDValue Fma3 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32,
Fma2, Fma1, Mul, Fma2, Flags);
SDValue Fma4 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Fma3,
NumeratorScaled, Fma3, Flags);
if (!HasFP32Denormals) {
SDNode *DisableDenorm;
if (Subtarget->hasDenormModeInst()) {
const SDValue DisableDenormValue =
getSPDenormModeValue(FP_DENORM_FLUSH_IN_FLUSH_OUT, DAG, SL, Subtarget);
DisableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, MVT::Other,
Fma4.getValue(1), DisableDenormValue,
Fma4.getValue(2)).getNode();
} else {
const SDValue DisableDenormValue =
DAG.getConstant(FP_DENORM_FLUSH_IN_FLUSH_OUT, SL, MVT::i32);
DisableDenorm = DAG.getMachineNode(
AMDGPU::S_SETREG_B32, SL, MVT::Other,
{DisableDenormValue, BitField, Fma4.getValue(1), Fma4.getValue(2)});
}
SDValue OutputChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other,
SDValue(DisableDenorm, 0), DAG.getRoot());
DAG.setRoot(OutputChain);
}
SDValue Scale = NumeratorScaled.getValue(1);
SDValue Fmas = DAG.getNode(AMDGPUISD::DIV_FMAS, SL, MVT::f32,
{Fma4, Fma1, Fma3, Scale}, Flags);
return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f32, Fmas, RHS, LHS, Flags);
}
SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const {
if (SDValue FastLowered = lowerFastUnsafeFDIV64(Op, DAG))
return FastLowered;
SDLoc SL(Op);
SDValue X = Op.getOperand(0);
SDValue Y = Op.getOperand(1);
const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f64);
SDVTList ScaleVT = DAG.getVTList(MVT::f64, MVT::i1);
SDValue DivScale0 = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, Y, Y, X);
SDValue NegDivScale0 = DAG.getNode(ISD::FNEG, SL, MVT::f64, DivScale0);
SDValue Rcp = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f64, DivScale0);
SDValue Fma0 = DAG.getNode(ISD::FMA, SL, MVT::f64, NegDivScale0, Rcp, One);
SDValue Fma1 = DAG.getNode(ISD::FMA, SL, MVT::f64, Rcp, Fma0, Rcp);
SDValue Fma2 = DAG.getNode(ISD::FMA, SL, MVT::f64, NegDivScale0, Fma1, One);
SDValue DivScale1 = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, X, Y, X);
SDValue Fma3 = DAG.getNode(ISD::FMA, SL, MVT::f64, Fma1, Fma2, Fma1);
SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, DivScale1, Fma3);
SDValue Fma4 = DAG.getNode(ISD::FMA, SL, MVT::f64,
NegDivScale0, Mul, DivScale1);
SDValue Scale;
if (!Subtarget->hasUsableDivScaleConditionOutput()) {
// Workaround a hardware bug on SI where the condition output from div_scale
// is not usable.
const SDValue Hi = DAG.getConstant(1, SL, MVT::i32);
// Figure out if the scale to use for div_fmas.
SDValue NumBC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, X);
SDValue DenBC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Y);
SDValue Scale0BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, DivScale0);
SDValue Scale1BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, DivScale1);
SDValue NumHi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, NumBC, Hi);
SDValue DenHi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, DenBC, Hi);
SDValue Scale0Hi
= DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Scale0BC, Hi);
SDValue Scale1Hi
= DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Scale1BC, Hi);
SDValue CmpDen = DAG.getSetCC(SL, MVT::i1, DenHi, Scale0Hi, ISD::SETEQ);
SDValue CmpNum = DAG.getSetCC(SL, MVT::i1, NumHi, Scale1Hi, ISD::SETEQ);
Scale = DAG.getNode(ISD::XOR, SL, MVT::i1, CmpNum, CmpDen);
} else {
Scale = DivScale1.getValue(1);
}
SDValue Fmas = DAG.getNode(AMDGPUISD::DIV_FMAS, SL, MVT::f64,
Fma4, Fma3, Mul, Scale);
return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f64, Fmas, Y, X);
}
SDValue SITargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
if (VT == MVT::f32)
return LowerFDIV32(Op, DAG);
if (VT == MVT::f64)
return LowerFDIV64(Op, DAG);
if (VT == MVT::f16)
return LowerFDIV16(Op, DAG);
llvm_unreachable("Unexpected type for fdiv");
}
SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
StoreSDNode *Store = cast<StoreSDNode>(Op);
EVT VT = Store->getMemoryVT();
if (VT == MVT::i1) {
return DAG.getTruncStore(Store->getChain(), DL,
DAG.getSExtOrTrunc(Store->getValue(), DL, MVT::i32),
Store->getBasePtr(), MVT::i1, Store->getMemOperand());
}
assert(VT.isVector() &&
Store->getValue().getValueType().getScalarType() == MVT::i32);
unsigned AS = Store->getAddressSpace();
if (Subtarget->hasLDSMisalignedBug() &&
AS == AMDGPUAS::FLAT_ADDRESS &&
Store->getAlign().value() < VT.getStoreSize() && VT.getSizeInBits() > 32) {
return SplitVectorStore(Op, DAG);
}
MachineFunction &MF = DAG.getMachineFunction();
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
// If there is a possibility that flat instruction access scratch memory
// then we need to use the same legalization rules we use for private.
if (AS == AMDGPUAS::FLAT_ADDRESS &&
!Subtarget->hasMultiDwordFlatScratchAddressing())
AS = MFI->hasFlatScratchInit() ?
AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;
unsigned NumElements = VT.getVectorNumElements();
if (AS == AMDGPUAS::GLOBAL_ADDRESS ||
AS == AMDGPUAS::FLAT_ADDRESS) {
if (NumElements > 4)
return SplitVectorStore(Op, DAG);
// v3 stores not supported on SI.
if (NumElements == 3 && !Subtarget->hasDwordx3LoadStores())
return SplitVectorStore(Op, DAG);
if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
VT, *Store->getMemOperand()))
return expandUnalignedStore(Store, DAG);
return SDValue();
} else if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
switch (Subtarget->getMaxPrivateElementSize()) {
case 4:
return scalarizeVectorStore(Store, DAG);
case 8:
if (NumElements > 2)
return SplitVectorStore(Op, DAG);
return SDValue();
case 16:
if (NumElements > 4 ||
(NumElements == 3 && !Subtarget->enableFlatScratch()))
return SplitVectorStore(Op, DAG);
return SDValue();
default:
llvm_unreachable("unsupported private_element_size");
}
} else if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
bool Fast = false;
auto Flags = Store->getMemOperand()->getFlags();
if (allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AS,
Store->getAlign(), Flags, &Fast) &&
Fast)
return SDValue();
if (VT.isVector())
return SplitVectorStore(Op, DAG);
return expandUnalignedStore(Store, DAG);
}
// Probably an invalid store. If so we'll end up emitting a selection error.
return SDValue();
}
SDValue SITargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
SDValue Arg = Op.getOperand(0);
SDValue TrigVal;
// Propagate fast-math flags so that the multiply we introduce can be folded
// if Arg is already the result of a multiply by constant.
auto Flags = Op->getFlags();
SDValue OneOver2Pi = DAG.getConstantFP(0.5 * numbers::inv_pi, DL, VT);
if (Subtarget->hasTrigReducedRange()) {
SDValue MulVal = DAG.getNode(ISD::FMUL, DL, VT, Arg, OneOver2Pi, Flags);
TrigVal = DAG.getNode(AMDGPUISD::FRACT, DL, VT, MulVal, Flags);
} else {
TrigVal = DAG.getNode(ISD::FMUL, DL, VT, Arg, OneOver2Pi, Flags);
}
switch (Op.getOpcode()) {
case ISD::FCOS:
return DAG.getNode(AMDGPUISD::COS_HW, SDLoc(Op), VT, TrigVal, Flags);
case ISD::FSIN:
return DAG.getNode(AMDGPUISD::SIN_HW, SDLoc(Op), VT, TrigVal, Flags);
default:
llvm_unreachable("Wrong trig opcode");
}
}
SDValue SITargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const {
AtomicSDNode *AtomicNode = cast<AtomicSDNode>(Op);
assert(AtomicNode->isCompareAndSwap());
unsigned AS = AtomicNode->getAddressSpace();
// No custom lowering required for local address space
if (!AMDGPU::isFlatGlobalAddrSpace(AS))
return Op;
// Non-local address space requires custom lowering for atomic compare
// and swap; cmp and swap should be in a v2i32 or v2i64 in case of _X2
SDLoc DL(Op);
SDValue ChainIn = Op.getOperand(0);
SDValue Addr = Op.getOperand(1);
SDValue Old = Op.getOperand(2);
SDValue New = Op.getOperand(3);
EVT VT = Op.getValueType();
MVT SimpleVT = VT.getSimpleVT();
MVT VecType = MVT::getVectorVT(SimpleVT, 2);
SDValue NewOld = DAG.getBuildVector(VecType, DL, {New, Old});
SDValue Ops[] = { ChainIn, Addr, NewOld };
return DAG.getMemIntrinsicNode(AMDGPUISD::ATOMIC_CMP_SWAP, DL, Op->getVTList(),
Ops, VT, AtomicNode->getMemOperand());
}
//===----------------------------------------------------------------------===//
// Custom DAG optimizations
//===----------------------------------------------------------------------===//
SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
EVT VT = N->getValueType(0);
EVT ScalarVT = VT.getScalarType();
if (ScalarVT != MVT::f32 && ScalarVT != MVT::f16)
return SDValue();
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
SDValue Src = N->getOperand(0);
EVT SrcVT = Src.getValueType();
// TODO: We could try to match extracting the higher bytes, which would be
// easier if i8 vectors weren't promoted to i32 vectors, particularly after
// types are legalized. v4i8 -> v4f32 is probably the only case to worry
// about in practice.
if (DCI.isAfterLegalizeDAG() && SrcVT == MVT::i32) {
if (DAG.MaskedValueIsZero(Src, APInt::getHighBitsSet(32, 24))) {
SDValue Cvt = DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0, DL, MVT::f32, Src);
DCI.AddToWorklist(Cvt.getNode());
// For the f16 case, fold to a cast to f32 and then cast back to f16.
if (ScalarVT != MVT::f32) {
Cvt = DAG.getNode(ISD::FP_ROUND, DL, VT, Cvt,
DAG.getTargetConstant(0, DL, MVT::i32));
}
return Cvt;
}
}
return SDValue();
}
// (shl (add x, c1), c2) -> add (shl x, c2), (shl c1, c2)
// This is a variant of
// (mul (add x, c1), c2) -> add (mul x, c2), (mul c1, c2),
//
// The normal DAG combiner will do this, but only if the add has one use since
// that would increase the number of instructions.
//
// This prevents us from seeing a constant offset that can be folded into a
// memory instruction's addressing mode. If we know the resulting add offset of
// a pointer can be folded into an addressing offset, we can replace the pointer
// operand with the add of new constant offset. This eliminates one of the uses,
// and may allow the remaining use to also be simplified.
//
SDValue SITargetLowering::performSHLPtrCombine(SDNode *N,
unsigned AddrSpace,
EVT MemVT,
DAGCombinerInfo &DCI) const {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
// We only do this to handle cases where it's profitable when there are
// multiple uses of the add, so defer to the standard combine.
if ((N0.getOpcode() != ISD::ADD && N0.getOpcode() != ISD::OR) ||
N0->hasOneUse())
return SDValue();
const ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N1);
if (!CN1)
return SDValue();
const ConstantSDNode *CAdd = dyn_cast<ConstantSDNode>(N0.getOperand(1));
if (!CAdd)
return SDValue();
// If the resulting offset is too large, we can't fold it into the addressing
// mode offset.
APInt Offset = CAdd->getAPIntValue() << CN1->getAPIntValue();
Type *Ty = MemVT.getTypeForEVT(*DCI.DAG.getContext());
AddrMode AM;
AM.HasBaseReg = true;
AM.BaseOffs = Offset.getSExtValue();
if (!isLegalAddressingMode(DCI.DAG.getDataLayout(), AM, Ty, AddrSpace))
return SDValue();
SelectionDAG &DAG = DCI.DAG;
SDLoc SL(N);
EVT VT = N->getValueType(0);
SDValue ShlX = DAG.getNode(ISD::SHL, SL, VT, N0.getOperand(0), N1);
SDValue COffset = DAG.getConstant(Offset, SL, VT);
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap() &&
(N0.getOpcode() == ISD::OR ||
N0->getFlags().hasNoUnsignedWrap()));
return DAG.getNode(ISD::ADD, SL, VT, ShlX, COffset, Flags);
}
/// MemSDNode::getBasePtr() does not work for intrinsics, which needs to offset
/// by the chain and intrinsic ID. Theoretically we would also need to check the
/// specific intrinsic, but they all place the pointer operand first.
static unsigned getBasePtrIndex(const MemSDNode *N) {
switch (N->getOpcode()) {
case ISD::STORE:
case ISD::INTRINSIC_W_CHAIN:
case ISD::INTRINSIC_VOID:
return 2;
default:
return 1;
}
}
SDValue SITargetLowering::performMemSDNodeCombine(MemSDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
SDLoc SL(N);
unsigned PtrIdx = getBasePtrIndex(N);
SDValue Ptr = N->getOperand(PtrIdx);
// TODO: We could also do this for multiplies.
if (Ptr.getOpcode() == ISD::SHL) {
SDValue NewPtr = performSHLPtrCombine(Ptr.getNode(), N->getAddressSpace(),
N->getMemoryVT(), DCI);
if (NewPtr) {
SmallVector<SDValue, 8> NewOps(N->op_begin(), N->op_end());
NewOps[PtrIdx] = NewPtr;
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
}
}
return SDValue();
}
static bool bitOpWithConstantIsReducible(unsigned Opc, uint32_t Val) {
return (Opc == ISD::AND && (Val == 0 || Val == 0xffffffff)) ||
(Opc == ISD::OR && (Val == 0xffffffff || Val == 0)) ||
(Opc == ISD::XOR && Val == 0);
}
// Break up 64-bit bit operation of a constant into two 32-bit and/or/xor. This
// will typically happen anyway for a VALU 64-bit and. This exposes other 32-bit
// integer combine opportunities since most 64-bit operations are decomposed
// this way. TODO: We won't want this for SALU especially if it is an inline
// immediate.
SDValue SITargetLowering::splitBinaryBitConstantOp(
DAGCombinerInfo &DCI,
const SDLoc &SL,
unsigned Opc, SDValue LHS,
const ConstantSDNode *CRHS) const {
uint64_t Val = CRHS->getZExtValue();
uint32_t ValLo = Lo_32(Val);
uint32_t ValHi = Hi_32(Val);
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
if ((bitOpWithConstantIsReducible(Opc, ValLo) ||
bitOpWithConstantIsReducible(Opc, ValHi)) ||
(CRHS->hasOneUse() && !TII->isInlineConstant(CRHS->getAPIntValue()))) {
// If we need to materialize a 64-bit immediate, it will be split up later
// anyway. Avoid creating the harder to understand 64-bit immediate
// materialization.
return splitBinaryBitConstantOpImpl(DCI, SL, Opc, LHS, ValLo, ValHi);
}
return SDValue();
}
// Returns true if argument is a boolean value which is not serialized into
// memory or argument and does not require v_cndmask_b32 to be deserialized.
static bool isBoolSGPR(SDValue V) {
if (V.getValueType() != MVT::i1)
return false;
switch (V.getOpcode()) {
default:
break;
case ISD::SETCC:
case AMDGPUISD::FP_CLASS:
return true;
case ISD::AND:
case ISD::OR:
case ISD::XOR:
return isBoolSGPR(V.getOperand(0)) && isBoolSGPR(V.getOperand(1));
}
return false;
}
// If a constant has all zeroes or all ones within each byte return it.
// Otherwise return 0.
static uint32_t getConstantPermuteMask(uint32_t C) {
// 0xff for any zero byte in the mask
uint32_t ZeroByteMask = 0;
if (!(C & 0x000000ff)) ZeroByteMask |= 0x000000ff;
if (!(C & 0x0000ff00)) ZeroByteMask |= 0x0000ff00;
if (!(C & 0x00ff0000)) ZeroByteMask |= 0x00ff0000;
if (!(C & 0xff000000)) ZeroByteMask |= 0xff000000;
uint32_t NonZeroByteMask = ~ZeroByteMask; // 0xff for any non-zero byte
if ((NonZeroByteMask & C) != NonZeroByteMask)
return 0; // Partial bytes selected.
return C;
}
// Check if a node selects whole bytes from its operand 0 starting at a byte
// boundary while masking the rest. Returns select mask as in the v_perm_b32
// or -1 if not succeeded.
// Note byte select encoding:
// value 0-3 selects corresponding source byte;
// value 0xc selects zero;
// value 0xff selects 0xff.
static uint32_t getPermuteMask(SelectionDAG &DAG, SDValue V) {
assert(V.getValueSizeInBits() == 32);
if (V.getNumOperands() != 2)
return ~0;
ConstantSDNode *N1 = dyn_cast<ConstantSDNode>(V.getOperand(1));
if (!N1)
return ~0;
uint32_t C = N1->getZExtValue();
switch (V.getOpcode()) {
default:
break;
case ISD::AND:
if (uint32_t ConstMask = getConstantPermuteMask(C)) {
return (0x03020100 & ConstMask) | (0x0c0c0c0c & ~ConstMask);
}
break;
case ISD::OR:
if (uint32_t ConstMask = getConstantPermuteMask(C)) {
return (0x03020100 & ~ConstMask) | ConstMask;
}
break;
case ISD::SHL:
if (C % 8)
return ~0;
return uint32_t((0x030201000c0c0c0cull << C) >> 32);
case ISD::SRL:
if (C % 8)
return ~0;
return uint32_t(0x0c0c0c0c03020100ull >> C);
}
return ~0;
}
SDValue SITargetLowering::performAndCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
if (DCI.isBeforeLegalize())
return SDValue();
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS);
if (VT == MVT::i64 && CRHS) {
if (SDValue Split
= splitBinaryBitConstantOp(DCI, SDLoc(N), ISD::AND, LHS, CRHS))
return Split;
}
if (CRHS && VT == MVT::i32) {
// and (srl x, c), mask => shl (bfe x, nb + c, mask >> nb), nb
// nb = number of trailing zeroes in mask
// It can be optimized out using SDWA for GFX8+ in the SDWA peephole pass,
// given that we are selecting 8 or 16 bit fields starting at byte boundary.
uint64_t Mask = CRHS->getZExtValue();
unsigned Bits = countPopulation(Mask);
if (getSubtarget()->hasSDWA() && LHS->getOpcode() == ISD::SRL &&
(Bits == 8 || Bits == 16) && isShiftedMask_64(Mask) && !(Mask & 1)) {
if (auto *CShift = dyn_cast<ConstantSDNode>(LHS->getOperand(1))) {
unsigned Shift = CShift->getZExtValue();
unsigned NB = CRHS->getAPIntValue().countTrailingZeros();
unsigned Offset = NB + Shift;
if ((Offset & (Bits - 1)) == 0) { // Starts at a byte or word boundary.
SDLoc SL(N);
SDValue BFE = DAG.getNode(AMDGPUISD::BFE_U32, SL, MVT::i32,
LHS->getOperand(0),
DAG.getConstant(Offset, SL, MVT::i32),
DAG.getConstant(Bits, SL, MVT::i32));
EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(), Bits);
SDValue Ext = DAG.getNode(ISD::AssertZext, SL, VT, BFE,
DAG.getValueType(NarrowVT));
SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(LHS), VT, Ext,
DAG.getConstant(NB, SDLoc(CRHS), MVT::i32));
return Shl;
}
}
}
// and (perm x, y, c1), c2 -> perm x, y, permute_mask(c1, c2)
if (LHS.hasOneUse() && LHS.getOpcode() == AMDGPUISD::PERM &&
isa<ConstantSDNode>(LHS.getOperand(2))) {
uint32_t Sel = getConstantPermuteMask(Mask);
if (!Sel)
return SDValue();
// Select 0xc for all zero bytes
Sel = (LHS.getConstantOperandVal(2) & Sel) | (~Sel & 0x0c0c0c0c);
SDLoc DL(N);
return DAG.getNode(AMDGPUISD::PERM, DL, MVT::i32, LHS.getOperand(0),
LHS.getOperand(1), DAG.getConstant(Sel, DL, MVT::i32));
}
}
// (and (fcmp ord x, x), (fcmp une (fabs x), inf)) ->
// fp_class x, ~(s_nan | q_nan | n_infinity | p_infinity)
if (LHS.getOpcode() == ISD::SETCC && RHS.getOpcode() == ISD::SETCC) {
ISD::CondCode LCC = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
ISD::CondCode RCC = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
SDValue X = LHS.getOperand(0);
SDValue Y = RHS.getOperand(0);
if (Y.getOpcode() != ISD::FABS || Y.getOperand(0) != X)
return SDValue();
if (LCC == ISD::SETO) {
if (X != LHS.getOperand(1))
return SDValue();
if (RCC == ISD::SETUNE) {
const ConstantFPSDNode *C1 = dyn_cast<ConstantFPSDNode>(RHS.getOperand(1));
if (!C1 || !C1->isInfinity() || C1->isNegative())
return SDValue();
const uint32_t Mask = SIInstrFlags::N_NORMAL |
SIInstrFlags::N_SUBNORMAL |
SIInstrFlags::N_ZERO |
SIInstrFlags::P_ZERO |
SIInstrFlags::P_SUBNORMAL |
SIInstrFlags::P_NORMAL;
static_assert(((~(SIInstrFlags::S_NAN |
SIInstrFlags::Q_NAN |
SIInstrFlags::N_INFINITY |
SIInstrFlags::P_INFINITY)) & 0x3ff) == Mask,
"mask not equal");
SDLoc DL(N);
return DAG.getNode(AMDGPUISD::FP_CLASS, DL, MVT::i1,
X, DAG.getConstant(Mask, DL, MVT::i32));
}
}
}
if (RHS.getOpcode() == ISD::SETCC && LHS.getOpcode() == AMDGPUISD::FP_CLASS)
std::swap(LHS, RHS);
if (LHS.getOpcode() == ISD::SETCC && RHS.getOpcode() == AMDGPUISD::FP_CLASS &&
RHS.hasOneUse()) {
ISD::CondCode LCC = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
// and (fcmp seto), (fp_class x, mask) -> fp_class x, mask & ~(p_nan | n_nan)
// and (fcmp setuo), (fp_class x, mask) -> fp_class x, mask & (p_nan | n_nan)
const ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS.getOperand(1));
if ((LCC == ISD::SETO || LCC == ISD::SETUO) && Mask &&
(RHS.getOperand(0) == LHS.getOperand(0) &&
LHS.getOperand(0) == LHS.getOperand(1))) {
const unsigned OrdMask = SIInstrFlags::S_NAN | SIInstrFlags::Q_NAN;
unsigned NewMask = LCC == ISD::SETO ?
Mask->getZExtValue() & ~OrdMask :
Mask->getZExtValue() & OrdMask;
SDLoc DL(N);
return DAG.getNode(AMDGPUISD::FP_CLASS, DL, MVT::i1, RHS.getOperand(0),
DAG.getConstant(NewMask, DL, MVT::i32));
}
}
if (VT == MVT::i32 &&
(RHS.getOpcode() == ISD::SIGN_EXTEND || LHS.getOpcode() == ISD::SIGN_EXTEND)) {
// and x, (sext cc from i1) => select cc, x, 0
if (RHS.getOpcode() != ISD::SIGN_EXTEND)
std::swap(LHS, RHS);
if (isBoolSGPR(RHS.getOperand(0)))
return DAG.getSelect(SDLoc(N), MVT::i32, RHS.getOperand(0),
LHS, DAG.getConstant(0, SDLoc(N), MVT::i32));
}
// and (op x, c1), (op y, c2) -> perm x, y, permute_mask(c1, c2)
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
if (VT == MVT::i32 && LHS.hasOneUse() && RHS.hasOneUse() &&
N->isDivergent() && TII->pseudoToMCOpcode(AMDGPU::V_PERM_B32_e64) != -1) {
uint32_t LHSMask = getPermuteMask(DAG, LHS);
uint32_t RHSMask = getPermuteMask(DAG, RHS);
if (LHSMask != ~0u && RHSMask != ~0u) {
// Canonicalize the expression in an attempt to have fewer unique masks
// and therefore fewer registers used to hold the masks.
if (LHSMask > RHSMask) {
std::swap(LHSMask, RHSMask);
std::swap(LHS, RHS);
}
// Select 0xc for each lane used from source operand. Zero has 0xc mask
// set, 0xff have 0xff in the mask, actual lanes are in the 0-3 range.
uint32_t LHSUsedLanes = ~(LHSMask & 0x0c0c0c0c) & 0x0c0c0c0c;
uint32_t RHSUsedLanes = ~(RHSMask & 0x0c0c0c0c) & 0x0c0c0c0c;
// Check of we need to combine values from two sources within a byte.
if (!(LHSUsedLanes & RHSUsedLanes) &&
// If we select high and lower word keep it for SDWA.
// TODO: teach SDWA to work with v_perm_b32 and remove the check.
!(LHSUsedLanes == 0x0c0c0000 && RHSUsedLanes == 0x00000c0c)) {
// Each byte in each mask is either selector mask 0-3, or has higher
// bits set in either of masks, which can be 0xff for 0xff or 0x0c for
// zero. If 0x0c is in either mask it shall always be 0x0c. Otherwise
// mask which is not 0xff wins. By anding both masks we have a correct
// result except that 0x0c shall be corrected to give 0x0c only.
uint32_t Mask = LHSMask & RHSMask;
for (unsigned I = 0; I < 32; I += 8) {
uint32_t ByteSel = 0xff << I;
if ((LHSMask & ByteSel) == 0x0c || (RHSMask & ByteSel) == 0x0c)
Mask &= (0x0c << I) & 0xffffffff;
}
// Add 4 to each active LHS lane. It will not affect any existing 0xff
// or 0x0c.
uint32_t Sel = Mask | (LHSUsedLanes & 0x04040404);
SDLoc DL(N);
return DAG.getNode(AMDGPUISD::PERM, DL, MVT::i32,
LHS.getOperand(0), RHS.getOperand(0),
DAG.getConstant(Sel, DL, MVT::i32));
}
}
}
return SDValue();
}
SDValue SITargetLowering::performOrCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
EVT VT = N->getValueType(0);
if (VT == MVT::i1) {
// or (fp_class x, c1), (fp_class x, c2) -> fp_class x, (c1 | c2)
if (LHS.getOpcode() == AMDGPUISD::FP_CLASS &&
RHS.getOpcode() == AMDGPUISD::FP_CLASS) {
SDValue Src = LHS.getOperand(0);
if (Src != RHS.getOperand(0))
return SDValue();
const ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS.getOperand(1));
if (!CLHS || !CRHS)
return SDValue();
// Only 10 bits are used.
static const uint32_t MaxMask = 0x3ff;
uint32_t NewMask = (CLHS->getZExtValue() | CRHS->getZExtValue()) & MaxMask;
SDLoc DL(N);
return DAG.getNode(AMDGPUISD::FP_CLASS, DL, MVT::i1,
Src, DAG.getConstant(NewMask, DL, MVT::i32));
}
return SDValue();
}
// or (perm x, y, c1), c2 -> perm x, y, permute_mask(c1, c2)
if (isa<ConstantSDNode>(RHS) && LHS.hasOneUse() &&
LHS.getOpcode() == AMDGPUISD::PERM &&
isa<ConstantSDNode>(LHS.getOperand(2))) {
uint32_t Sel = getConstantPermuteMask(N->getConstantOperandVal(1));
if (!Sel)
return SDValue();
Sel |= LHS.getConstantOperandVal(2);
SDLoc DL(N);
return DAG.getNode(AMDGPUISD::PERM, DL, MVT::i32, LHS.getOperand(0),
LHS.getOperand(1), DAG.getConstant(Sel, DL, MVT::i32));
}
// or (op x, c1), (op y, c2) -> perm x, y, permute_mask(c1, c2)
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
if (VT == MVT::i32 && LHS.hasOneUse() && RHS.hasOneUse() &&
N->isDivergent() && TII->pseudoToMCOpcode(AMDGPU::V_PERM_B32_e64) != -1) {
uint32_t LHSMask = getPermuteMask(DAG, LHS);
uint32_t RHSMask = getPermuteMask(DAG, RHS);
if (LHSMask != ~0u && RHSMask != ~0u) {
// Canonicalize the expression in an attempt to have fewer unique masks
// and therefore fewer registers used to hold the masks.
if (LHSMask > RHSMask) {
std::swap(LHSMask, RHSMask);
std::swap(LHS, RHS);
}
// Select 0xc for each lane used from source operand. Zero has 0xc mask
// set, 0xff have 0xff in the mask, actual lanes are in the 0-3 range.
uint32_t LHSUsedLanes = ~(LHSMask & 0x0c0c0c0c) & 0x0c0c0c0c;
uint32_t RHSUsedLanes = ~(RHSMask & 0x0c0c0c0c) & 0x0c0c0c0c;
// Check of we need to combine values from two sources within a byte.
if (!(LHSUsedLanes & RHSUsedLanes) &&
// If we select high and lower word keep it for SDWA.
// TODO: teach SDWA to work with v_perm_b32 and remove the check.
!(LHSUsedLanes == 0x0c0c0000 && RHSUsedLanes == 0x00000c0c)) {
// Kill zero bytes selected by other mask. Zero value is 0xc.
LHSMask &= ~RHSUsedLanes;
RHSMask &= ~LHSUsedLanes;
// Add 4 to each active LHS lane
LHSMask |= LHSUsedLanes & 0x04040404;
// Combine masks
uint32_t Sel = LHSMask | RHSMask;
SDLoc DL(N);
return DAG.getNode(AMDGPUISD::PERM, DL, MVT::i32,
LHS.getOperand(0), RHS.getOperand(0),
DAG.getConstant(Sel, DL, MVT::i32));
}
}
}
if (VT != MVT::i64 || DCI.isBeforeLegalizeOps())
return SDValue();
// TODO: This could be a generic combine with a predicate for extracting the
// high half of an integer being free.
// (or i64:x, (zero_extend i32:y)) ->
// i64 (bitcast (v2i32 build_vector (or i32:y, lo_32(x)), hi_32(x)))
if (LHS.getOpcode() == ISD::ZERO_EXTEND &&
RHS.getOpcode() != ISD::ZERO_EXTEND)
std::swap(LHS, RHS);
if (RHS.getOpcode() == ISD::ZERO_EXTEND) {
SDValue ExtSrc = RHS.getOperand(0);
EVT SrcVT = ExtSrc.getValueType();
if (SrcVT == MVT::i32) {
SDLoc SL(N);
SDValue LowLHS, HiBits;
std::tie(LowLHS, HiBits) = split64BitValue(LHS, DAG);
SDValue LowOr = DAG.getNode(ISD::OR, SL, MVT::i32, LowLHS, ExtSrc);
DCI.AddToWorklist(LowOr.getNode());
DCI.AddToWorklist(HiBits.getNode());
SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
LowOr, HiBits);
return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);
}
}
const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (CRHS) {
if (SDValue Split
= splitBinaryBitConstantOp(DCI, SDLoc(N), ISD::OR,
N->getOperand(0), CRHS))
return Split;
}
return SDValue();
}
SDValue SITargetLowering::performXorCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
if (SDValue RV = reassociateScalarOps(N, DCI.DAG))
return RV;
EVT VT = N->getValueType(0);
if (VT != MVT::i64)
return SDValue();
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS);
if (CRHS) {
if (SDValue Split
= splitBinaryBitConstantOp(DCI, SDLoc(N), ISD::XOR, LHS, CRHS))
return Split;
}
return SDValue();
}
SDValue SITargetLowering::performZeroExtendCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
if (!Subtarget->has16BitInsts() ||
DCI.getDAGCombineLevel() < AfterLegalizeDAG)
return SDValue();
EVT VT = N->getValueType(0);
if (VT != MVT::i32)
return SDValue();
SDValue Src = N->getOperand(0);
if (Src.getValueType() != MVT::i16)
return SDValue();
return SDValue();
}
SDValue SITargetLowering::performSignExtendInRegCombine(SDNode *N,
DAGCombinerInfo &DCI)
const {
SDValue Src = N->getOperand(0);
auto *VTSign = cast<VTSDNode>(N->getOperand(1));
if (((Src.getOpcode() == AMDGPUISD::BUFFER_LOAD_UBYTE &&
VTSign->getVT() == MVT::i8) ||
(Src.getOpcode() == AMDGPUISD::BUFFER_LOAD_USHORT &&
VTSign->getVT() == MVT::i16)) &&
Src.hasOneUse()) {
auto *M = cast<MemSDNode>(Src);
SDValue Ops[] = {
Src.getOperand(0), // Chain
Src.getOperand(1), // rsrc
Src.getOperand(2), // vindex
Src.getOperand(3), // voffset
Src.getOperand(4), // soffset
Src.getOperand(5), // offset
Src.getOperand(6),
Src.getOperand(7)
};
// replace with BUFFER_LOAD_BYTE/SHORT
SDVTList ResList = DCI.DAG.getVTList(MVT::i32,
Src.getOperand(0).getValueType());
unsigned Opc = (Src.getOpcode() == AMDGPUISD::BUFFER_LOAD_UBYTE) ?
AMDGPUISD::BUFFER_LOAD_BYTE : AMDGPUISD::BUFFER_LOAD_SHORT;
SDValue BufferLoadSignExt = DCI.DAG.getMemIntrinsicNode(Opc, SDLoc(N),
ResList,
Ops, M->getMemoryVT(),
M->getMemOperand());
return DCI.DAG.getMergeValues({BufferLoadSignExt,
BufferLoadSignExt.getValue(1)}, SDLoc(N));
}
return SDValue();
}
SDValue SITargetLowering::performClassCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
SDValue Mask = N->getOperand(1);
// fp_class x, 0 -> false
if (const ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(Mask)) {
if (CMask->isZero())
return DAG.getConstant(0, SDLoc(N), MVT::i1);
}
if (N->getOperand(0).isUndef())
return DAG.getUNDEF(MVT::i1);
return SDValue();
}
SDValue SITargetLowering::performRcpCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
if (N0.isUndef())
return N0;
if (VT == MVT::f32 && (N0.getOpcode() == ISD::UINT_TO_FP ||
N0.getOpcode() == ISD::SINT_TO_FP)) {
return DCI.DAG.getNode(AMDGPUISD::RCP_IFLAG, SDLoc(N), VT, N0,
N->getFlags());
}
if ((VT == MVT::f32 || VT == MVT::f16) && N0.getOpcode() == ISD::FSQRT) {
return DCI.DAG.getNode(AMDGPUISD::RSQ, SDLoc(N), VT,
N0.getOperand(0), N->getFlags());
}
return AMDGPUTargetLowering::performRcpCombine(N, DCI);
}
bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
unsigned MaxDepth) const {
unsigned Opcode = Op.getOpcode();
if (Opcode == ISD::FCANONICALIZE)
return true;
if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op)) {
auto F = CFP->getValueAPF();
if (F.isNaN() && F.isSignaling())
return false;
return !F.isDenormal() || denormalsEnabledForType(DAG, Op.getValueType());
}
// If source is a result of another standard FP operation it is already in
// canonical form.
if (MaxDepth == 0)
return false;
switch (Opcode) {
// These will flush denorms if required.
case ISD::FADD:
case ISD::FSUB:
case ISD::FMUL:
case ISD::FCEIL:
case ISD::FFLOOR:
case ISD::FMA:
case ISD::FMAD:
case ISD::FSQRT:
case ISD::FDIV:
case ISD::FREM:
case ISD::FP_ROUND:
case ISD::FP_EXTEND:
case AMDGPUISD::FMUL_LEGACY:
case AMDGPUISD::FMAD_FTZ:
case AMDGPUISD::RCP:
case AMDGPUISD::RSQ:
case AMDGPUISD::RSQ_CLAMP:
case AMDGPUISD::RCP_LEGACY:
case AMDGPUISD::RCP_IFLAG:
case AMDGPUISD::DIV_SCALE:
case AMDGPUISD::DIV_FMAS:
case AMDGPUISD::DIV_FIXUP:
case AMDGPUISD::FRACT:
case AMDGPUISD::LDEXP:
case AMDGPUISD::CVT_PKRTZ_F16_F32:
case AMDGPUISD::CVT_F32_UBYTE0:
case AMDGPUISD::CVT_F32_UBYTE1:
case AMDGPUISD::CVT_F32_UBYTE2:
case AMDGPUISD::CVT_F32_UBYTE3:
return true;
// It can/will be lowered or combined as a bit operation.
// Need to check their input recursively to handle.
case ISD::FNEG:
case ISD::FABS:
case ISD::FCOPYSIGN:
return isCanonicalized(DAG, Op.getOperand(0), MaxDepth - 1);
case ISD::FSIN:
case ISD::FCOS:
case ISD::FSINCOS:
return Op.getValueType().getScalarType() != MVT::f16;
case ISD::FMINNUM:
case ISD::FMAXNUM:
case ISD::FMINNUM_IEEE:
case ISD::FMAXNUM_IEEE:
case AMDGPUISD::CLAMP:
case AMDGPUISD::FMED3:
case AMDGPUISD::FMAX3:
case AMDGPUISD::FMIN3: {
// FIXME: Shouldn't treat the generic operations different based these.
// However, we aren't really required to flush the result from
// minnum/maxnum..
// snans will be quieted, so we only need to worry about denormals.
if (Subtarget->supportsMinMaxDenormModes() ||
denormalsEnabledForType(DAG, Op.getValueType()))
return true;
// Flushing may be required.
// In pre-GFX9 targets V_MIN_F32 and others do not flush denorms. For such
// targets need to check their input recursively.
// FIXME: Does this apply with clamp? It's implemented with max.
for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I) {
if (!isCanonicalized(DAG, Op.getOperand(I), MaxDepth - 1))
return false;
}
return true;
}
case ISD::SELECT: {
return isCanonicalized(DAG, Op.getOperand(1), MaxDepth - 1) &&
isCanonicalized(DAG, Op.getOperand(2), MaxDepth - 1);
}
case ISD::BUILD_VECTOR: {
for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
SDValue SrcOp = Op.getOperand(i);
if (!isCanonicalized(DAG, SrcOp, MaxDepth - 1))
return false;
}
return true;
}
case ISD::EXTRACT_VECTOR_ELT:
case ISD::EXTRACT_SUBVECTOR: {
return isCanonicalized(DAG, Op.getOperand(0), MaxDepth - 1);
}
case ISD::INSERT_VECTOR_ELT: {
return isCanonicalized(DAG, Op.getOperand(0), MaxDepth - 1) &&
isCanonicalized(DAG, Op.getOperand(1), MaxDepth - 1);
}
case ISD::UNDEF:
// Could be anything.
return false;
case ISD::BITCAST:
return isCanonicalized(DAG, Op.getOperand(0), MaxDepth - 1);
case ISD::TRUNCATE: {
// Hack round the mess we make when legalizing extract_vector_elt
if (Op.getValueType() == MVT::i16) {
SDValue TruncSrc = Op.getOperand(0);
if (TruncSrc.getValueType() == MVT::i32 &&
TruncSrc.getOpcode() == ISD::BITCAST &&
TruncSrc.getOperand(0).getValueType() == MVT::v2f16) {
return isCanonicalized(DAG, TruncSrc.getOperand(0), MaxDepth - 1);
}
}
return false;
}
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntrinsicID
= cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
// TODO: Handle more intrinsics
switch (IntrinsicID) {
case Intrinsic::amdgcn_cvt_pkrtz:
case Intrinsic::amdgcn_cubeid:
case Intrinsic::amdgcn_frexp_mant:
case Intrinsic::amdgcn_fdot2:
case Intrinsic::amdgcn_rcp:
case Intrinsic::amdgcn_rsq:
case Intrinsic::amdgcn_rsq_clamp:
case Intrinsic::amdgcn_rcp_legacy:
case Intrinsic::amdgcn_rsq_legacy:
case Intrinsic::amdgcn_trig_preop:
return true;
default:
break;
}
LLVM_FALLTHROUGH;
}
default:
return denormalsEnabledForType(DAG, Op.getValueType()) &&
DAG.isKnownNeverSNaN(Op);
}
llvm_unreachable("invalid operation");
}
bool SITargetLowering::isCanonicalized(Register Reg, MachineFunction &MF,
unsigned MaxDepth) const {
MachineRegisterInfo &MRI = MF.getRegInfo();
MachineInstr *MI = MRI.getVRegDef(Reg);
unsigned Opcode = MI->getOpcode();
if (Opcode == AMDGPU::G_FCANONICALIZE)
return true;
Optional<FPValueAndVReg> FCR;
// Constant splat (can be padded with undef) or scalar constant.
if (mi_match(Reg, MRI, MIPatternMatch::m_GFCstOrSplat(FCR))) {
if (FCR->Value.isSignaling())
return false;
return !FCR->Value.isDenormal() ||
denormalsEnabledForType(MRI.getType(FCR->VReg), MF);
}
if (MaxDepth == 0)
return false;
switch (Opcode) {
case AMDGPU::G_FMINNUM_IEEE:
case AMDGPU::G_FMAXNUM_IEEE: {
if (Subtarget->supportsMinMaxDenormModes() ||
denormalsEnabledForType(MRI.getType(Reg), MF))
return true;
for (const MachineOperand &MO : llvm::drop_begin(MI->operands()))
if (!isCanonicalized(MO.getReg(), MF, MaxDepth - 1))
return false;
return true;
}
default:
return denormalsEnabledForType(MRI.getType(Reg), MF) &&
isKnownNeverSNaN(Reg, MRI);
}
llvm_unreachable("invalid operation");
}
// Constant fold canonicalize.
SDValue SITargetLowering::getCanonicalConstantFP(
SelectionDAG &DAG, const SDLoc &SL, EVT VT, const APFloat &C) const {
// Flush denormals to 0 if not enabled.
if (C.isDenormal() && !denormalsEnabledForType(DAG, VT))
return DAG.getConstantFP(0.0, SL, VT);
if (C.isNaN()) {
APFloat CanonicalQNaN = APFloat::getQNaN(C.getSemantics());
if (C.isSignaling()) {
// Quiet a signaling NaN.
// FIXME: Is this supposed to preserve payload bits?
return DAG.getConstantFP(CanonicalQNaN, SL, VT);
}
// Make sure it is the canonical NaN bitpattern.
//
// TODO: Can we use -1 as the canonical NaN value since it's an inline
// immediate?
if (C.bitcastToAPInt() != CanonicalQNaN.bitcastToAPInt())
return DAG.getConstantFP(CanonicalQNaN, SL, VT);
}
// Already canonical.
return DAG.getConstantFP(C, SL, VT);
}
static bool vectorEltWillFoldAway(SDValue Op) {
return Op.isUndef() || isa<ConstantFPSDNode>(Op);
}
SDValue SITargetLowering::performFCanonicalizeCombine(
SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
// fcanonicalize undef -> qnan
if (N0.isUndef()) {
APFloat QNaN = APFloat::getQNaN(SelectionDAG::EVTToAPFloatSemantics(VT));
return DAG.getConstantFP(QNaN, SDLoc(N), VT);
}
if (ConstantFPSDNode *CFP = isConstOrConstSplatFP(N0)) {
EVT VT = N->getValueType(0);
return getCanonicalConstantFP(DAG, SDLoc(N), VT, CFP->getValueAPF());
}
// fcanonicalize (build_vector x, k) -> build_vector (fcanonicalize x),
// (fcanonicalize k)
//
// fcanonicalize (build_vector x, undef) -> build_vector (fcanonicalize x), 0
// TODO: This could be better with wider vectors that will be split to v2f16,
// and to consider uses since there aren't that many packed operations.
if (N0.getOpcode() == ISD::BUILD_VECTOR && VT == MVT::v2f16 &&
isTypeLegal(MVT::v2f16)) {
SDLoc SL(N);
SDValue NewElts[2];
SDValue Lo = N0.getOperand(0);
SDValue Hi = N0.getOperand(1);
EVT EltVT = Lo.getValueType();
if (vectorEltWillFoldAway(Lo) || vectorEltWillFoldAway(Hi)) {
for (unsigned I = 0; I != 2; ++I) {
SDValue Op = N0.getOperand(I);
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) {
NewElts[I] = getCanonicalConstantFP(DAG, SL, EltVT,
CFP->getValueAPF());
} else if (Op.isUndef()) {
// Handled below based on what the other operand is.
NewElts[I] = Op;
} else {
NewElts[I] = DAG.getNode(ISD::FCANONICALIZE, SL, EltVT, Op);
}
}
// If one half is undef, and one is constant, prefer a splat vector rather
// than the normal qNaN. If it's a register, prefer 0.0 since that's
// cheaper to use and may be free with a packed operation.
if (NewElts[0].isUndef()) {
if (isa<ConstantFPSDNode>(NewElts[1]))
NewElts[0] = isa<ConstantFPSDNode>(NewElts[1]) ?
NewElts[1]: DAG.getConstantFP(0.0f, SL, EltVT);
}
if (NewElts[1].isUndef()) {
NewElts[1] = isa<ConstantFPSDNode>(NewElts[0]) ?
NewElts[0] : DAG.getConstantFP(0.0f, SL, EltVT);
}
return DAG.getBuildVector(VT, SL, NewElts);
}
}
unsigned SrcOpc = N0.getOpcode();
// If it's free to do so, push canonicalizes further up the source, which may
// find a canonical source.
//
// TODO: More opcodes. Note this is unsafe for the the _ieee minnum/maxnum for
// sNaNs.
if (SrcOpc == ISD::FMINNUM || SrcOpc == ISD::FMAXNUM) {
auto *CRHS = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
if (CRHS && N0.hasOneUse()) {
SDLoc SL(N);
SDValue Canon0 = DAG.getNode(ISD::FCANONICALIZE, SL, VT,
N0.getOperand(0));
SDValue Canon1 = getCanonicalConstantFP(DAG, SL, VT, CRHS->getValueAPF());
DCI.AddToWorklist(Canon0.getNode());
return DAG.getNode(N0.getOpcode(), SL, VT, Canon0, Canon1);
}
}
return isCanonicalized(DAG, N0) ? N0 : SDValue();
}
static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {
switch (Opc) {
case ISD::FMAXNUM:
case ISD::FMAXNUM_IEEE:
return AMDGPUISD::FMAX3;
case ISD::SMAX:
return AMDGPUISD::SMAX3;
case ISD::UMAX:
return AMDGPUISD::UMAX3;
case ISD::FMINNUM:
case ISD::FMINNUM_IEEE:
return AMDGPUISD::FMIN3;
case ISD::SMIN:
return AMDGPUISD::SMIN3;
case ISD::UMIN:
return AMDGPUISD::UMIN3;
default:
llvm_unreachable("Not a min/max opcode");
}
}
SDValue SITargetLowering::performIntMed3ImmCombine(
SelectionDAG &DAG, const SDLoc &SL,
SDValue Op0, SDValue Op1, bool Signed) const {
ConstantSDNode *K1 = dyn_cast<ConstantSDNode>(Op1);
if (!K1)
return SDValue();
ConstantSDNode *K0 = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
if (!K0)
return SDValue();
if (Signed) {
if (K0->getAPIntValue().sge(K1->getAPIntValue()))
return SDValue();
} else {
if (K0->getAPIntValue().uge(K1->getAPIntValue()))
return SDValue();
}
EVT VT = K0->getValueType(0);
unsigned Med3Opc = Signed ? AMDGPUISD::SMED3 : AMDGPUISD::UMED3;
if (VT == MVT::i32 || (VT == MVT::i16 && Subtarget->hasMed3_16())) {
return DAG.getNode(Med3Opc, SL, VT,
Op0.getOperand(0), SDValue(K0, 0), SDValue(K1, 0));
}
// If there isn't a 16-bit med3 operation, convert to 32-bit.
if (VT == MVT::i16) {
MVT NVT = MVT::i32;
unsigned ExtOp = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
SDValue Tmp1 = DAG.getNode(ExtOp, SL, NVT, Op0->getOperand(0));
SDValue Tmp2 = DAG.getNode(ExtOp, SL, NVT, Op0->getOperand(1));
SDValue Tmp3 = DAG.getNode(ExtOp, SL, NVT, Op1);
SDValue Med3 = DAG.getNode(Med3Opc, SL, NVT, Tmp1, Tmp2, Tmp3);
return DAG.getNode(ISD::TRUNCATE, SL, VT, Med3);
}
return SDValue();
}
static ConstantFPSDNode *getSplatConstantFP(SDValue Op) {
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
return C;
if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op)) {
if (ConstantFPSDNode *C = BV->getConstantFPSplatNode())
return C;
}
return nullptr;
}
SDValue SITargetLowering::performFPMed3ImmCombine(SelectionDAG &DAG,
const SDLoc &SL,
SDValue Op0,
SDValue Op1) const {
ConstantFPSDNode *K1 = getSplatConstantFP(Op1);
if (!K1)
return SDValue();
ConstantFPSDNode *K0 = getSplatConstantFP(Op0.getOperand(1));
if (!K0)
return SDValue();
// Ordered >= (although NaN inputs should have folded away by now).
if (K0->getValueAPF() > K1->getValueAPF())
return SDValue();
const MachineFunction &MF = DAG.getMachineFunction();
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
// TODO: Check IEEE bit enabled?
EVT VT = Op0.getValueType();
if (Info->getMode().DX10Clamp) {
// If dx10_clamp is enabled, NaNs clamp to 0.0. This is the same as the
// hardware fmed3 behavior converting to a min.
// FIXME: Should this be allowing -0.0?
if (K1->isExactlyValue(1.0) && K0->isExactlyValue(0.0))
return DAG.getNode(AMDGPUISD::CLAMP, SL, VT, Op0.getOperand(0));
}
// med3 for f16 is only available on gfx9+, and not available for v2f16.
if (VT == MVT::f32 || (VT == MVT::f16 && Subtarget->hasMed3_16())) {
// This isn't safe with signaling NaNs because in IEEE mode, min/max on a
// signaling NaN gives a quiet NaN. The quiet NaN input to the min would
// then give the other result, which is different from med3 with a NaN
// input.
SDValue Var = Op0.getOperand(0);
if (!DAG.isKnownNeverSNaN(Var))
return SDValue();
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
if ((!K0->hasOneUse() ||
TII->isInlineConstant(K0->getValueAPF().bitcastToAPInt())) &&
(!K1->hasOneUse() ||
TII->isInlineConstant(K1->getValueAPF().bitcastToAPInt()))) {
return DAG.getNode(AMDGPUISD::FMED3, SL, K0->getValueType(0),
Var, SDValue(K0, 0), SDValue(K1, 0));
}
}
return SDValue();
}
SDValue SITargetLowering::performMinMaxCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
unsigned Opc = N->getOpcode();
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
// Only do this if the inner op has one use since this will just increases
// register pressure for no benefit.
if (Opc != AMDGPUISD::FMIN_LEGACY && Opc != AMDGPUISD::FMAX_LEGACY &&
!VT.isVector() &&
(VT == MVT::i32 || VT == MVT::f32 ||
((VT == MVT::f16 || VT == MVT::i16) && Subtarget->hasMin3Max3_16()))) {
// max(max(a, b), c) -> max3(a, b, c)
// min(min(a, b), c) -> min3(a, b, c)
if (Op0.getOpcode() == Opc && Op0.hasOneUse()) {
SDLoc DL(N);
return DAG.getNode(minMaxOpcToMin3Max3Opc(Opc),
DL,
N->getValueType(0),
Op0.getOperand(0),
Op0.getOperand(1),
Op1);
}
// Try commuted.
// max(a, max(b, c)) -> max3(a, b, c)
// min(a, min(b, c)) -> min3(a, b, c)
if (Op1.getOpcode() == Opc && Op1.hasOneUse()) {
SDLoc DL(N);
return DAG.getNode(minMaxOpcToMin3Max3Opc(Opc),
DL,
N->getValueType(0),
Op0,
Op1.getOperand(0),
Op1.getOperand(1));
}
}
// min(max(x, K0), K1), K0 < K1 -> med3(x, K0, K1)
if (Opc == ISD::SMIN && Op0.getOpcode() == ISD::SMAX && Op0.hasOneUse()) {
if (SDValue Med3 = performIntMed3ImmCombine(DAG, SDLoc(N), Op0, Op1, true))
return Med3;
}
if (Opc == ISD::UMIN && Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
if (SDValue Med3 = performIntMed3ImmCombine(DAG, SDLoc(N), Op0, Op1, false))
return Med3;
}
// fminnum(fmaxnum(x, K0), K1), K0 < K1 && !is_snan(x) -> fmed3(x, K0, K1)
if (((Opc == ISD::FMINNUM && Op0.getOpcode() == ISD::FMAXNUM) ||
(Opc == ISD::FMINNUM_IEEE && Op0.getOpcode() == ISD::FMAXNUM_IEEE) ||
(Opc == AMDGPUISD::FMIN_LEGACY &&
Op0.getOpcode() == AMDGPUISD::FMAX_LEGACY)) &&
(VT == MVT::f32 || VT == MVT::f64 ||
(VT == MVT::f16 && Subtarget->has16BitInsts()) ||
(VT == MVT::v2f16 && Subtarget->hasVOP3PInsts())) &&
Op0.hasOneUse()) {
if (SDValue Res = performFPMed3ImmCombine(DAG, SDLoc(N), Op0, Op1))
return Res;
}
return SDValue();
}
static bool isClampZeroToOne(SDValue A, SDValue B) {
if (ConstantFPSDNode *CA = dyn_cast<ConstantFPSDNode>(A)) {
if (ConstantFPSDNode *CB = dyn_cast<ConstantFPSDNode>(B)) {
// FIXME: Should this be allowing -0.0?
return (CA->isExactlyValue(0.0) && CB->isExactlyValue(1.0)) ||
(CA->isExactlyValue(1.0) && CB->isExactlyValue(0.0));
}
}
return false;
}
// FIXME: Should only worry about snans for version with chain.
SDValue SITargetLowering::performFMed3Combine(SDNode *N,
DAGCombinerInfo &DCI) const {
EVT VT = N->getValueType(0);
// v_med3_f32 and v_max_f32 behave identically wrt denorms, exceptions and
// NaNs. With a NaN input, the order of the operands may change the result.
SelectionDAG &DAG = DCI.DAG;
SDLoc SL(N);
SDValue Src0 = N->getOperand(0);
SDValue Src1 = N->getOperand(1);
SDValue Src2 = N->getOperand(2);
if (isClampZeroToOne(Src0, Src1)) {
// const_a, const_b, x -> clamp is safe in all cases including signaling
// nans.
// FIXME: Should this be allowing -0.0?
return DAG.getNode(AMDGPUISD::CLAMP, SL, VT, Src2);
}
const MachineFunction &MF = DAG.getMachineFunction();
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
// FIXME: dx10_clamp behavior assumed in instcombine. Should we really bother
// handling no dx10-clamp?
if (Info->getMode().DX10Clamp) {
// If NaNs is clamped to 0, we are free to reorder the inputs.
if (isa<ConstantFPSDNode>(Src0) && !isa<ConstantFPSDNode>(Src1))
std::swap(Src0, Src1);
if (isa<ConstantFPSDNode>(Src1) && !isa<ConstantFPSDNode>(Src2))
std::swap(Src1, Src2);
if (isa<ConstantFPSDNode>(Src0) && !isa<ConstantFPSDNode>(Src1))
std::swap(Src0, Src1);
if (isClampZeroToOne(Src1, Src2))
return DAG.getNode(AMDGPUISD::CLAMP, SL, VT, Src0);
}
return SDValue();
}
SDValue SITargetLowering::performCvtPkRTZCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SDValue Src0 = N->getOperand(0);
SDValue Src1 = N->getOperand(1);
if (Src0.isUndef() && Src1.isUndef())
return DCI.DAG.getUNDEF(N->getValueType(0));
return SDValue();
}
// Check if EXTRACT_VECTOR_ELT/INSERT_VECTOR_ELT (<n x e>, var-idx) should be
// expanded into a set of cmp/select instructions.
bool SITargetLowering::shouldExpandVectorDynExt(unsigned EltSize,
unsigned NumElem,
bool IsDivergentIdx,
const GCNSubtarget *Subtarget) {
if (UseDivergentRegisterIndexing)
return false;
unsigned VecSize = EltSize * NumElem;
// Sub-dword vectors of size 2 dword or less have better implementation.
if (VecSize <= 64 && EltSize < 32)
return false;
// Always expand the rest of sub-dword instructions, otherwise it will be
// lowered via memory.
if (EltSize < 32)
return true;
// Always do this if var-idx is divergent, otherwise it will become a loop.
if (IsDivergentIdx)
return true;
// Large vectors would yield too many compares and v_cndmask_b32 instructions.
unsigned NumInsts = NumElem /* Number of compares */ +
((EltSize + 31) / 32) * NumElem /* Number of cndmasks */;
// On some architectures (GFX9) movrel is not available and it's better
// to expand.
if (!Subtarget->hasMovrel())
return NumInsts <= 16;
// If movrel is available, use it instead of expanding for vector of 8
// elements.
return NumInsts <= 15;
}
bool SITargetLowering::shouldExpandVectorDynExt(SDNode *N) const {
SDValue Idx = N->getOperand(N->getNumOperands() - 1);
if (isa<ConstantSDNode>(Idx))
return false;
SDValue Vec = N->getOperand(0);
EVT VecVT = Vec.getValueType();
EVT EltVT = VecVT.getVectorElementType();
unsigned EltSize = EltVT.getSizeInBits();
unsigned NumElem = VecVT.getVectorNumElements();
return SITargetLowering::shouldExpandVectorDynExt(
EltSize, NumElem, Idx->isDivergent(), getSubtarget());
}
SDValue SITargetLowering::performExtractVectorEltCombine(
SDNode *N, DAGCombinerInfo &DCI) const {
SDValue Vec = N->getOperand(0);
SelectionDAG &DAG = DCI.DAG;
EVT VecVT = Vec.getValueType();
EVT EltVT = VecVT.getVectorElementType();
if ((Vec.getOpcode() == ISD::FNEG ||
Vec.getOpcode() == ISD::FABS) && allUsesHaveSourceMods(N)) {
SDLoc SL(N);
EVT EltVT = N->getValueType(0);
SDValue Idx = N->getOperand(1);
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
Vec.getOperand(0), Idx);
return DAG.getNode(Vec.getOpcode(), SL, EltVT, Elt);
}
// ScalarRes = EXTRACT_VECTOR_ELT ((vector-BINOP Vec1, Vec2), Idx)
// =>
// Vec1Elt = EXTRACT_VECTOR_ELT(Vec1, Idx)
// Vec2Elt = EXTRACT_VECTOR_ELT(Vec2, Idx)
// ScalarRes = scalar-BINOP Vec1Elt, Vec2Elt
if (Vec.hasOneUse() && DCI.isBeforeLegalize()) {
SDLoc SL(N);
EVT EltVT = N->getValueType(0);
SDValue Idx = N->getOperand(1);
unsigned Opc = Vec.getOpcode();
switch(Opc) {
default:
break;
// TODO: Support other binary operations.
case ISD::FADD:
case ISD::FSUB:
case ISD::FMUL:
case ISD::ADD:
case ISD::UMIN:
case ISD::UMAX:
case ISD::SMIN:
case ISD::SMAX:
case ISD::FMAXNUM:
case ISD::FMINNUM:
case ISD::FMAXNUM_IEEE:
case ISD::FMINNUM_IEEE: {
SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
Vec.getOperand(0), Idx);
SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
Vec.getOperand(1), Idx);
DCI.AddToWorklist(Elt0.getNode());
DCI.AddToWorklist(Elt1.getNode());
return DAG.getNode(Opc, SL, EltVT, Elt0, Elt1, Vec->getFlags());
}
}
}
unsigned VecSize = VecVT.getSizeInBits();
unsigned EltSize = EltVT.getSizeInBits();
// EXTRACT_VECTOR_ELT (<n x e>, var-idx) => n x select (e, const-idx)
if (shouldExpandVectorDynExt(N)) {
SDLoc SL(N);
SDValue Idx = N->getOperand(1);
SDValue V;
for (unsigned I = 0, E = VecVT.getVectorNumElements(); I < E; ++I) {
SDValue IC = DAG.getVectorIdxConstant(I, SL);
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Vec, IC);
if (I == 0)
V = Elt;
else
V = DAG.getSelectCC(SL, Idx, IC, Elt, V, ISD::SETEQ);
}
return V;
}
if (!DCI.isBeforeLegalize())
return SDValue();
// Try to turn sub-dword accesses of vectors into accesses of the same 32-bit
// elements. This exposes more load reduction opportunities by replacing
// multiple small extract_vector_elements with a single 32-bit extract.
auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (isa<MemSDNode>(Vec) &&
EltSize <= 16 &&
EltVT.isByteSized() &&
VecSize > 32 &&
VecSize % 32 == 0 &&
Idx) {
EVT NewVT = getEquivalentMemType(*DAG.getContext(), VecVT);
unsigned BitIndex = Idx->getZExtValue() * EltSize;
unsigned EltIdx = BitIndex / 32;
unsigned LeftoverBitIdx = BitIndex % 32;
SDLoc SL(N);
SDValue Cast = DAG.getNode(ISD::BITCAST, SL, NewVT, Vec);
DCI.AddToWorklist(Cast.getNode());
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Cast,
DAG.getConstant(EltIdx, SL, MVT::i32));
DCI.AddToWorklist(Elt.getNode());
SDValue Srl = DAG.getNode(ISD::SRL, SL, MVT::i32, Elt,
DAG.getConstant(LeftoverBitIdx, SL, MVT::i32));
DCI.AddToWorklist(Srl.getNode());
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, EltVT.changeTypeToInteger(), Srl);
DCI.AddToWorklist(Trunc.getNode());
return DAG.getNode(ISD::BITCAST, SL, EltVT, Trunc);
}
return SDValue();
}
SDValue
SITargetLowering::performInsertVectorEltCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SDValue Vec = N->getOperand(0);
SDValue Idx = N->getOperand(2);
EVT VecVT = Vec.getValueType();
EVT EltVT = VecVT.getVectorElementType();
// INSERT_VECTOR_ELT (<n x e>, var-idx)
// => BUILD_VECTOR n x select (e, const-idx)
if (!shouldExpandVectorDynExt(N))
return SDValue();
SelectionDAG &DAG = DCI.DAG;
SDLoc SL(N);
SDValue Ins = N->getOperand(1);
EVT IdxVT = Idx.getValueType();
SmallVector<SDValue, 16> Ops;
for (unsigned I = 0, E = VecVT.getVectorNumElements(); I < E; ++I) {
SDValue IC = DAG.getConstant(I, SL, IdxVT);
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Vec, IC);
SDValue V = DAG.getSelectCC(SL, Idx, IC, Ins, Elt, ISD::SETEQ);
Ops.push_back(V);
}
return DAG.getBuildVector(VecVT, SL, Ops);
}
unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG,
const SDNode *N0,
const SDNode *N1) const {
EVT VT = N0->getValueType(0);
// Only do this if we are not trying to support denormals. v_mad_f32 does not
// support denormals ever.
if (((VT == MVT::f32 && !hasFP32Denormals(DAG.getMachineFunction())) ||
(VT == MVT::f16 && !hasFP64FP16Denormals(DAG.getMachineFunction()) &&
getSubtarget()->hasMadF16())) &&
isOperationLegal(ISD::FMAD, VT))
return ISD::FMAD;
const TargetOptions &Options = DAG.getTarget().Options;
if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
(N0->getFlags().hasAllowContract() &&
N1->getFlags().hasAllowContract())) &&
isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) {
return ISD::FMA;
}
return 0;
}
// For a reassociatable opcode perform:
// op x, (op y, z) -> op (op x, z), y, if x and z are uniform
SDValue SITargetLowering::reassociateScalarOps(SDNode *N,
SelectionDAG &DAG) const {
EVT VT = N->getValueType(0);
if (VT != MVT::i32 && VT != MVT::i64)
return SDValue();
if (DAG.isBaseWithConstantOffset(SDValue(N, 0)))
return SDValue();
unsigned Opc = N->getOpcode();
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
if (!(Op0->isDivergent() ^ Op1->isDivergent()))
return SDValue();
if (Op0->isDivergent())
std::swap(Op0, Op1);
if (Op1.getOpcode() != Opc || !Op1.hasOneUse())
return SDValue();
SDValue Op2 = Op1.getOperand(1);
Op1 = Op1.getOperand(0);
if (!(Op1->isDivergent() ^ Op2->isDivergent()))
return SDValue();
if (Op1->isDivergent())
std::swap(Op1, Op2);
SDLoc SL(N);
SDValue Add1 = DAG.getNode(Opc, SL, VT, Op0, Op1);
return DAG.getNode(Opc, SL, VT, Add1, Op2);
}
static SDValue getMad64_32(SelectionDAG &DAG, const SDLoc &SL,
EVT VT,
SDValue N0, SDValue N1, SDValue N2,
bool Signed) {
unsigned MadOpc = Signed ? AMDGPUISD::MAD_I64_I32 : AMDGPUISD::MAD_U64_U32;
SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i1);
SDValue Mad = DAG.getNode(MadOpc, SL, VTs, N0, N1, N2);
return DAG.getNode(ISD::TRUNCATE, SL, VT, Mad);
}
// Fold (add (mul x, y), z) --> (mad_[iu]64_[iu]32 x, y, z) plus high
// multiplies, if any.
//
// Full 64-bit multiplies that feed into an addition are lowered here instead
// of using the generic expansion. The generic expansion ends up with
// a tree of ADD nodes that prevents us from using the "add" part of the
// MAD instruction. The expansion produced here results in a chain of ADDs
// instead of a tree.
SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
DAGCombinerInfo &DCI) const {
assert(N->getOpcode() == ISD::ADD);
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
SDLoc SL(N);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
if (VT.isVector())
return SDValue();
// S_MUL_HI_[IU]32 was added in gfx9, which allows us to keep the overall
// result in scalar registers for uniform values.
if (!N->isDivergent() && Subtarget->hasSMulHi())
return SDValue();
unsigned NumBits = VT.getScalarSizeInBits();
if (NumBits <= 32 || NumBits > 64)
return SDValue();
if (LHS.getOpcode() != ISD::MUL) {
assert(RHS.getOpcode() == ISD::MUL);
std::swap(LHS, RHS);
}
// Avoid the fold if it would unduly increase the number of multiplies due to
// multiple uses, except on hardware with full-rate multiply-add (which is
// part of full-rate 64-bit ops).
if (!Subtarget->hasFullRate64Ops()) {
unsigned NumUsers = 0;
for (SDNode *Use : LHS->uses()) {
// There is a use that does not feed into addition, so the multiply can't
// be removed. We prefer MUL + ADD + ADDC over MAD + MUL.
if (Use->getOpcode() != ISD::ADD)
return SDValue();
// We prefer 2xMAD over MUL + 2xADD + 2xADDC (code density), and prefer
// MUL + 3xADD + 3xADDC over 3xMAD.
++NumUsers;
if (NumUsers >= 3)
return SDValue();
}
}
SDValue MulLHS = LHS.getOperand(0);
SDValue MulRHS = LHS.getOperand(1);
SDValue AddRHS = RHS;
// Always check whether operands are small unsigned values, since that
// knowledge is useful in more cases. Check for small signed values only if
// doing so can unlock a shorter code sequence.
bool MulLHSUnsigned32 = numBitsUnsigned(MulLHS, DAG) <= 32;
bool MulRHSUnsigned32 = numBitsUnsigned(MulRHS, DAG) <= 32;
bool MulSignedLo = false;
if (!MulLHSUnsigned32 || !MulRHSUnsigned32) {
MulSignedLo = numBitsSigned(MulLHS, DAG) <= 32 &&
numBitsSigned(MulRHS, DAG) <= 32;
}
// The operands and final result all have the same number of bits. If
// operands need to be extended, they can be extended with garbage. The
// resulting garbage in the high bits of the mad_[iu]64_[iu]32 result is
// truncated away in the end.
if (VT != MVT::i64) {
MulLHS = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i64, MulLHS);
MulRHS = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i64, MulRHS);
AddRHS = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i64, AddRHS);
}
// The basic code generated is conceptually straightforward. Pseudo code:
//
// accum = mad_64_32 lhs.lo, rhs.lo, accum
// accum.hi = add (mul lhs.hi, rhs.lo), accum.hi
// accum.hi = add (mul lhs.lo, rhs.hi), accum.hi
//
// The second and third lines are optional, depending on whether the factors
// are {sign,zero}-extended or not.
//
// The actual DAG is noisier than the pseudo code, but only due to
// instructions that disassemble values into low and high parts, and
// assemble the final result.
SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
SDValue One = DAG.getConstant(1, SL, MVT::i32);
auto MulLHSLo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, MulLHS);
auto MulRHSLo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, MulRHS);
SDValue Accum =
getMad64_32(DAG, SL, MVT::i64, MulLHSLo, MulRHSLo, AddRHS, MulSignedLo);
if (!MulSignedLo && (!MulLHSUnsigned32 || !MulRHSUnsigned32)) {
auto AccumLo = DAG.getNode(ISD::EXTRACT_ELEMENT, SL, MVT::i32, Accum, Zero);
auto AccumHi = DAG.getNode(ISD::EXTRACT_ELEMENT, SL, MVT::i32, Accum, One);
if (!MulLHSUnsigned32) {
auto MulLHSHi =
DAG.getNode(ISD::EXTRACT_ELEMENT, SL, MVT::i32, MulLHS, One);
SDValue MulHi = DAG.getNode(ISD::MUL, SL, MVT::i32, MulLHSHi, MulRHSLo);
AccumHi = DAG.getNode(ISD::ADD, SL, MVT::i32, MulHi, AccumHi);
}
if (!MulRHSUnsigned32) {
auto MulRHSHi =
DAG.getNode(ISD::EXTRACT_ELEMENT, SL, MVT::i32, MulRHS, One);
SDValue MulHi = DAG.getNode(ISD::MUL, SL, MVT::i32, MulLHSLo, MulRHSHi);
AccumHi = DAG.getNode(ISD::ADD, SL, MVT::i32, MulHi, AccumHi);
}
Accum = DAG.getBuildVector(MVT::v2i32, SL, {AccumLo, AccumHi});
Accum = DAG.getBitcast(MVT::i64, Accum);
}
if (VT != MVT::i64)
Accum = DAG.getNode(ISD::TRUNCATE, SL, VT, Accum);
return Accum;
}
SDValue SITargetLowering::performAddCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
SDLoc SL(N);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
if (LHS.getOpcode() == ISD::MUL || RHS.getOpcode() == ISD::MUL) {
if (Subtarget->hasMad64_32()) {
if (SDValue Folded = tryFoldToMad64_32(N, DCI))
return Folded;
}
return SDValue();
}
if (SDValue V = reassociateScalarOps(N, DAG)) {
return V;
}
if (VT != MVT::i32 || !DCI.isAfterLegalizeDAG())
return SDValue();
// add x, zext (setcc) => addcarry x, 0, setcc
// add x, sext (setcc) => subcarry x, 0, setcc
unsigned Opc = LHS.getOpcode();
if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND ||
Opc == ISD::ANY_EXTEND || Opc == ISD::ADDCARRY)
std::swap(RHS, LHS);
Opc = RHS.getOpcode();
switch (Opc) {
default: break;
case ISD::ZERO_EXTEND:
case ISD::SIGN_EXTEND:
case ISD::ANY_EXTEND: {
auto Cond = RHS.getOperand(0);
// If this won't be a real VOPC output, we would still need to insert an
// extra instruction anyway.
if (!isBoolSGPR(Cond))
break;
SDVTList VTList = DAG.getVTList(MVT::i32, MVT::i1);
SDValue Args[] = { LHS, DAG.getConstant(0, SL, MVT::i32), Cond };
Opc = (Opc == ISD::SIGN_EXTEND) ? ISD::SUBCARRY : ISD::ADDCARRY;
return DAG.getNode(Opc, SL, VTList, Args);
}
case ISD::ADDCARRY: {
// add x, (addcarry y, 0, cc) => addcarry x, y, cc
auto C = dyn_cast<ConstantSDNode>(RHS.getOperand(1));
if (!C || C->getZExtValue() != 0) break;
SDValue Args[] = { LHS, RHS.getOperand(0), RHS.getOperand(2) };
return DAG.getNode(ISD::ADDCARRY, SDLoc(N), RHS->getVTList(), Args);
}
}
return SDValue();
}
SDValue SITargetLowering::performSubCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
if (VT != MVT::i32)
return SDValue();
SDLoc SL(N);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
// sub x, zext (setcc) => subcarry x, 0, setcc
// sub x, sext (setcc) => addcarry x, 0, setcc
unsigned Opc = RHS.getOpcode();
switch (Opc) {
default: break;
case ISD::ZERO_EXTEND:
case ISD::SIGN_EXTEND:
case ISD::ANY_EXTEND: {
auto Cond = RHS.getOperand(0);
// If this won't be a real VOPC output, we would still need to insert an
// extra instruction anyway.
if (!isBoolSGPR(Cond))
break;
SDVTList VTList = DAG.getVTList(MVT::i32, MVT::i1);
SDValue Args[] = { LHS, DAG.getConstant(0, SL, MVT::i32), Cond };
Opc = (Opc == ISD::SIGN_EXTEND) ? ISD::ADDCARRY : ISD::SUBCARRY;
return DAG.getNode(Opc, SL, VTList, Args);
}
}
if (LHS.getOpcode() == ISD::SUBCARRY) {
// sub (subcarry x, 0, cc), y => subcarry x, y, cc
auto C = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
if (!C || !C->isZero())
return SDValue();
SDValue Args[] = { LHS.getOperand(0), RHS, LHS.getOperand(2) };
return DAG.getNode(ISD::SUBCARRY, SDLoc(N), LHS->getVTList(), Args);
}
return SDValue();
}
SDValue SITargetLowering::performAddCarrySubCarryCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
if (N->getValueType(0) != MVT::i32)
return SDValue();
auto C = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!C || C->getZExtValue() != 0)
return SDValue();
SelectionDAG &DAG = DCI.DAG;
SDValue LHS = N->getOperand(0);
// addcarry (add x, y), 0, cc => addcarry x, y, cc
// subcarry (sub x, y), 0, cc => subcarry x, y, cc
unsigned LHSOpc = LHS.getOpcode();
unsigned Opc = N->getOpcode();
if ((LHSOpc == ISD::ADD && Opc == ISD::ADDCARRY) ||
(LHSOpc == ISD::SUB && Opc == ISD::SUBCARRY)) {
SDValue Args[] = { LHS.getOperand(0), LHS.getOperand(1), N->getOperand(2) };
return DAG.getNode(Opc, SDLoc(N), N->getVTList(), Args);
}
return SDValue();
}
SDValue SITargetLowering::performFAddCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
return SDValue();
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
SDLoc SL(N);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
// These should really be instruction patterns, but writing patterns with
// source modifiers is a pain.
// fadd (fadd (a, a), b) -> mad 2.0, a, b
if (LHS.getOpcode() == ISD::FADD) {
SDValue A = LHS.getOperand(0);
if (A == LHS.getOperand(1)) {
unsigned FusedOp = getFusedOpcode(DAG, N, LHS.getNode());
if (FusedOp != 0) {
const SDValue Two = DAG.getConstantFP(2.0, SL, VT);
return DAG.getNode(FusedOp, SL, VT, A, Two, RHS);
}
}
}
// fadd (b, fadd (a, a)) -> mad 2.0, a, b
if (RHS.getOpcode() == ISD::FADD) {
SDValue A = RHS.getOperand(0);
if (A == RHS.getOperand(1)) {
unsigned FusedOp = getFusedOpcode(DAG, N, RHS.getNode());
if (FusedOp != 0) {
const SDValue Two = DAG.getConstantFP(2.0, SL, VT);
return DAG.getNode(FusedOp, SL, VT, A, Two, LHS);
}
}
}
return SDValue();
}
SDValue SITargetLowering::performFSubCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
return SDValue();
SelectionDAG &DAG = DCI.DAG;
SDLoc SL(N);
EVT VT = N->getValueType(0);
assert(!VT.isVector());
// Try to get the fneg to fold into the source modifier. This undoes generic
// DAG combines and folds them into the mad.
//
// Only do this if we are not trying to support denormals. v_mad_f32 does
// not support denormals ever.
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
if (LHS.getOpcode() == ISD::FADD) {
// (fsub (fadd a, a), c) -> mad 2.0, a, (fneg c)
SDValue A = LHS.getOperand(0);
if (A == LHS.getOperand(1)) {
unsigned FusedOp = getFusedOpcode(DAG, N, LHS.getNode());
if (FusedOp != 0){
const SDValue Two = DAG.getConstantFP(2.0, SL, VT);
SDValue NegRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
return DAG.getNode(FusedOp, SL, VT, A, Two, NegRHS);
}
}
}
if (RHS.getOpcode() == ISD::FADD) {
// (fsub c, (fadd a, a)) -> mad -2.0, a, c
SDValue A = RHS.getOperand(0);
if (A == RHS.getOperand(1)) {
unsigned FusedOp = getFusedOpcode(DAG, N, RHS.getNode());
if (FusedOp != 0){
const SDValue NegTwo = DAG.getConstantFP(-2.0, SL, VT);
return DAG.getNode(FusedOp, SL, VT, A, NegTwo, LHS);
}
}
}
return SDValue();
}
SDValue SITargetLowering::performFMACombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
SDLoc SL(N);
if (!Subtarget->hasDot7Insts() || VT != MVT::f32)
return SDValue();
// FMA((F32)S0.x, (F32)S1. x, FMA((F32)S0.y, (F32)S1.y, (F32)z)) ->
// FDOT2((V2F16)S0, (V2F16)S1, (F32)z))
SDValue Op1 = N->getOperand(0);
SDValue Op2 = N->getOperand(1);
SDValue FMA = N->getOperand(2);
if (FMA.getOpcode() != ISD::FMA ||
Op1.getOpcode() != ISD::FP_EXTEND ||
Op2.getOpcode() != ISD::FP_EXTEND)
return SDValue();
// fdot2_f32_f16 always flushes fp32 denormal operand and output to zero,
// regardless of the denorm mode setting. Therefore,
// unsafe-fp-math/fp-contract is sufficient to allow generating fdot2.
const TargetOptions &Options = DAG.getTarget().Options;
if (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
(N->getFlags().hasAllowContract() &&
FMA->getFlags().hasAllowContract())) {
Op1 = Op1.getOperand(0);
Op2 = Op2.getOperand(0);
if (Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
Op2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
return SDValue();
SDValue Vec1 = Op1.getOperand(0);
SDValue Idx1 = Op1.getOperand(1);
SDValue Vec2 = Op2.getOperand(0);
SDValue FMAOp1 = FMA.getOperand(0);
SDValue FMAOp2 = FMA.getOperand(1);
SDValue FMAAcc = FMA.getOperand(2);
if (FMAOp1.getOpcode() != ISD::FP_EXTEND ||
FMAOp2.getOpcode() != ISD::FP_EXTEND)
return SDValue();
FMAOp1 = FMAOp1.getOperand(0);
FMAOp2 = FMAOp2.getOperand(0);
if (FMAOp1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
FMAOp2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
return SDValue();
SDValue Vec3 = FMAOp1.getOperand(0);
SDValue Vec4 = FMAOp2.getOperand(0);
SDValue Idx2 = FMAOp1.getOperand(1);
if (Idx1 != Op2.getOperand(1) || Idx2 != FMAOp2.getOperand(1) ||
// Idx1 and Idx2 cannot be the same.
Idx1 == Idx2)
return SDValue();
if (Vec1 == Vec2 || Vec3 == Vec4)
return SDValue();
if (Vec1.getValueType() != MVT::v2f16 || Vec2.getValueType() != MVT::v2f16)
return SDValue();
if ((Vec1 == Vec3 && Vec2 == Vec4) ||
(Vec1 == Vec4 && Vec2 == Vec3)) {
return DAG.getNode(AMDGPUISD::FDOT2, SL, MVT::f32, Vec1, Vec2, FMAAcc,
DAG.getTargetConstant(0, SL, MVT::i1));
}
}
return SDValue();
}
SDValue SITargetLowering::performSetCCCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
SDLoc SL(N);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
EVT VT = LHS.getValueType();
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
auto CRHS = dyn_cast<ConstantSDNode>(RHS);
if (!CRHS) {
CRHS = dyn_cast<ConstantSDNode>(LHS);
if (CRHS) {
std::swap(LHS, RHS);
CC = getSetCCSwappedOperands(CC);
}
}
if (CRHS) {
if (VT == MVT::i32 && LHS.getOpcode() == ISD::SIGN_EXTEND &&
isBoolSGPR(LHS.getOperand(0))) {
// setcc (sext from i1 cc), -1, ne|sgt|ult) => not cc => xor cc, -1
// setcc (sext from i1 cc), -1, eq|sle|uge) => cc
// setcc (sext from i1 cc), 0, eq|sge|ule) => not cc => xor cc, -1
// setcc (sext from i1 cc), 0, ne|ugt|slt) => cc
if ((CRHS->isAllOnes() &&
(CC == ISD::SETNE || CC == ISD::SETGT || CC == ISD::SETULT)) ||
(CRHS->isZero() &&
(CC == ISD::SETEQ || CC == ISD::SETGE || CC == ISD::SETULE)))
return DAG.getNode(ISD::XOR, SL, MVT::i1, LHS.getOperand(0),
DAG.getConstant(-1, SL, MVT::i1));
if ((CRHS->isAllOnes() &&
(CC == ISD::SETEQ || CC == ISD::SETLE || CC == ISD::SETUGE)) ||
(CRHS->isZero() &&
(CC == ISD::SETNE || CC == ISD::SETUGT || CC == ISD::SETLT)))
return LHS.getOperand(0);
}
const APInt &CRHSVal = CRHS->getAPIntValue();
if ((CC == ISD::SETEQ || CC == ISD::SETNE) &&
LHS.getOpcode() == ISD::SELECT &&
isa<ConstantSDNode>(LHS.getOperand(1)) &&
isa<ConstantSDNode>(LHS.getOperand(2)) &&
LHS.getConstantOperandVal(1) != LHS.getConstantOperandVal(2) &&
isBoolSGPR(LHS.getOperand(0))) {
// Given CT != FT:
// setcc (select cc, CT, CF), CF, eq => xor cc, -1
// setcc (select cc, CT, CF), CF, ne => cc
// setcc (select cc, CT, CF), CT, ne => xor cc, -1
// setcc (select cc, CT, CF), CT, eq => cc
const APInt &CT = LHS.getConstantOperandAPInt(1);
const APInt &CF = LHS.getConstantOperandAPInt(2);
if ((CF == CRHSVal && CC == ISD::SETEQ) ||
(CT == CRHSVal && CC == ISD::SETNE))
return DAG.getNode(ISD::XOR, SL, MVT::i1, LHS.getOperand(0),
DAG.getConstant(-1, SL, MVT::i1));
if ((CF == CRHSVal && CC == ISD::SETNE) ||
(CT == CRHSVal && CC == ISD::SETEQ))
return LHS.getOperand(0);
}
}
if (VT != MVT::f32 && VT != MVT::f64 && (Subtarget->has16BitInsts() &&
VT != MVT::f16))
return SDValue();
// Match isinf/isfinite pattern
// (fcmp oeq (fabs x), inf) -> (fp_class x, (p_infinity | n_infinity))
// (fcmp one (fabs x), inf) -> (fp_class x,
// (p_normal | n_normal | p_subnormal | n_subnormal | p_zero | n_zero)
if ((CC == ISD::SETOEQ || CC == ISD::SETONE) && LHS.getOpcode() == ISD::FABS) {
const ConstantFPSDNode *CRHS = dyn_cast<ConstantFPSDNode>(RHS);
if (!CRHS)
return SDValue();
const APFloat &APF = CRHS->getValueAPF();
if (APF.isInfinity() && !APF.isNegative()) {
const unsigned IsInfMask = SIInstrFlags::P_INFINITY |
SIInstrFlags::N_INFINITY;
const unsigned IsFiniteMask = SIInstrFlags::N_ZERO |
SIInstrFlags::P_ZERO |
SIInstrFlags::N_NORMAL |
SIInstrFlags::P_NORMAL |
SIInstrFlags::N_SUBNORMAL |
SIInstrFlags::P_SUBNORMAL;
unsigned Mask = CC == ISD::SETOEQ ? IsInfMask : IsFiniteMask;
return DAG.getNode(AMDGPUISD::FP_CLASS, SL, MVT::i1, LHS.getOperand(0),
DAG.getConstant(Mask, SL, MVT::i32));
}
}
return SDValue();
}
SDValue SITargetLowering::performCvtF32UByteNCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
SDLoc SL(N);
unsigned Offset = N->getOpcode() - AMDGPUISD::CVT_F32_UBYTE0;
SDValue Src = N->getOperand(0);
SDValue Shift = N->getOperand(0);
// TODO: Extend type shouldn't matter (assuming legal types).
if (Shift.getOpcode() == ISD::ZERO_EXTEND)
Shift = Shift.getOperand(0);
if (Shift.getOpcode() == ISD::SRL || Shift.getOpcode() == ISD::SHL) {
// cvt_f32_ubyte1 (shl x, 8) -> cvt_f32_ubyte0 x
// cvt_f32_ubyte3 (shl x, 16) -> cvt_f32_ubyte1 x
// cvt_f32_ubyte0 (srl x, 16) -> cvt_f32_ubyte2 x
// cvt_f32_ubyte1 (srl x, 16) -> cvt_f32_ubyte3 x
// cvt_f32_ubyte0 (srl x, 8) -> cvt_f32_ubyte1 x
if (auto *C = dyn_cast<ConstantSDNode>(Shift.getOperand(1))) {
SDValue Shifted = DAG.getZExtOrTrunc(Shift.getOperand(0),
SDLoc(Shift.getOperand(0)), MVT::i32);
unsigned ShiftOffset = 8 * Offset;
if (Shift.getOpcode() == ISD::SHL)
ShiftOffset -= C->getZExtValue();
else
ShiftOffset += C->getZExtValue();
if (ShiftOffset < 32 && (ShiftOffset % 8) == 0) {
return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0 + ShiftOffset / 8, SL,
MVT::f32, Shifted);
}
}
}
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
APInt DemandedBits = APInt::getBitsSet(32, 8 * Offset, 8 * Offset + 8);
if (TLI.SimplifyDemandedBits(Src, DemandedBits, DCI)) {
// We simplified Src. If this node is not dead, visit it again so it is
// folded properly.
if (N->getOpcode() != ISD::DELETED_NODE)
DCI.AddToWorklist(N);
return SDValue(N, 0);
}
// Handle (or x, (srl y, 8)) pattern when known bits are zero.
if (SDValue DemandedSrc =
TLI.SimplifyMultipleUseDemandedBits(Src, DemandedBits, DAG))
return DAG.getNode(N->getOpcode(), SL, MVT::f32, DemandedSrc);
return SDValue();
}
SDValue SITargetLowering::performClampCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
ConstantFPSDNode *CSrc = dyn_cast<ConstantFPSDNode>(N->getOperand(0));
if (!CSrc)
return SDValue();
const MachineFunction &MF = DCI.DAG.getMachineFunction();
const APFloat &F = CSrc->getValueAPF();
APFloat Zero = APFloat::getZero(F.getSemantics());
if (F < Zero ||
(F.isNaN() && MF.getInfo<SIMachineFunctionInfo>()->getMode().DX10Clamp)) {
return DCI.DAG.getConstantFP(Zero, SDLoc(N), N->getValueType(0));
}
APFloat One(F.getSemantics(), "1.0");
if (F > One)
return DCI.DAG.getConstantFP(One, SDLoc(N), N->getValueType(0));
return SDValue(CSrc, 0);
}
SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
return SDValue();
switch (N->getOpcode()) {
case ISD::ADD:
return performAddCombine(N, DCI);
case ISD::SUB:
return performSubCombine(N, DCI);
case ISD::ADDCARRY:
case ISD::SUBCARRY:
return performAddCarrySubCarryCombine(N, DCI);
case ISD::FADD:
return performFAddCombine(N, DCI);
case ISD::FSUB:
return performFSubCombine(N, DCI);
case ISD::SETCC:
return performSetCCCombine(N, DCI);
case ISD::FMAXNUM:
case ISD::FMINNUM:
case ISD::FMAXNUM_IEEE:
case ISD::FMINNUM_IEEE:
case ISD::SMAX:
case ISD::SMIN:
case ISD::UMAX:
case ISD::UMIN:
case AMDGPUISD::FMIN_LEGACY:
case AMDGPUISD::FMAX_LEGACY:
return performMinMaxCombine(N, DCI);
case ISD::FMA:
return performFMACombine(N, DCI);
case ISD::AND:
return performAndCombine(N, DCI);
case ISD::OR:
return performOrCombine(N, DCI);
case ISD::XOR:
return performXorCombine(N, DCI);
case ISD::ZERO_EXTEND:
return performZeroExtendCombine(N, DCI);
case ISD::SIGN_EXTEND_INREG:
return performSignExtendInRegCombine(N , DCI);
case AMDGPUISD::FP_CLASS:
return performClassCombine(N, DCI);
case ISD::FCANONICALIZE:
return performFCanonicalizeCombine(N, DCI);
case AMDGPUISD::RCP:
return performRcpCombine(N, DCI);
case AMDGPUISD::FRACT:
case AMDGPUISD::RSQ:
case AMDGPUISD::RCP_LEGACY:
case AMDGPUISD::RCP_IFLAG:
case AMDGPUISD::RSQ_CLAMP:
case AMDGPUISD::LDEXP: {
// FIXME: This is probably wrong. If src is an sNaN, it won't be quieted
SDValue Src = N->getOperand(0);
if (Src.isUndef())
return Src;
break;
}
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
return performUCharToFloatCombine(N, DCI);
case AMDGPUISD::CVT_F32_UBYTE0:
case AMDGPUISD::CVT_F32_UBYTE1:
case AMDGPUISD::CVT_F32_UBYTE2:
case AMDGPUISD::CVT_F32_UBYTE3:
return performCvtF32UByteNCombine(N, DCI);
case AMDGPUISD::FMED3:
return performFMed3Combine(N, DCI);
case AMDGPUISD::CVT_PKRTZ_F16_F32:
return performCvtPkRTZCombine(N, DCI);
case AMDGPUISD::CLAMP:
return performClampCombine(N, DCI);
case ISD::SCALAR_TO_VECTOR: {
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
// v2i16 (scalar_to_vector i16:x) -> v2i16 (bitcast (any_extend i16:x))
if (VT == MVT::v2i16 || VT == MVT::v2f16) {
SDLoc SL(N);
SDValue Src = N->getOperand(0);
EVT EltVT = Src.getValueType();
if (EltVT == MVT::f16)
Src = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Src);
SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, Src);
return DAG.getNode(ISD::BITCAST, SL, VT, Ext);
}
break;
}
case ISD::EXTRACT_VECTOR_ELT:
return performExtractVectorEltCombine(N, DCI);
case ISD::INSERT_VECTOR_ELT:
return performInsertVectorEltCombine(N, DCI);
case ISD::LOAD: {
if (SDValue Widended = widenLoad(cast<LoadSDNode>(N), DCI))
return Widended;
LLVM_FALLTHROUGH;
}
default: {
if (!DCI.isBeforeLegalize()) {
if (MemSDNode *MemNode = dyn_cast<MemSDNode>(N))
return performMemSDNodeCombine(MemNode, DCI);
}
break;
}
}
return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
}
/// Helper function for adjustWritemask
static unsigned SubIdx2Lane(unsigned Idx) {
switch (Idx) {
default: return ~0u;
case AMDGPU::sub0: return 0;
case AMDGPU::sub1: return 1;
case AMDGPU::sub2: return 2;
case AMDGPU::sub3: return 3;
case AMDGPU::sub4: return 4; // Possible with TFE/LWE
}
}
/// Adjust the writemask of MIMG instructions
SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
SelectionDAG &DAG) const {
unsigned Opcode = Node->getMachineOpcode();
// Subtract 1 because the vdata output is not a MachineSDNode operand.
int D16Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::d16) - 1;
if (D16Idx >= 0 && Node->getConstantOperandVal(D16Idx))
return Node; // not implemented for D16
SDNode *Users[5] = { nullptr };
unsigned Lane = 0;
unsigned DmaskIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::dmask) - 1;
unsigned OldDmask = Node->getConstantOperandVal(DmaskIdx);
unsigned NewDmask = 0;
unsigned TFEIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::tfe) - 1;
unsigned LWEIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::lwe) - 1;
bool UsesTFC = ((int(TFEIdx) >= 0 && Node->getConstantOperandVal(TFEIdx)) ||
Node->getConstantOperandVal(LWEIdx))
? true
: false;
unsigned TFCLane = 0;
bool HasChain = Node->getNumValues() > 1;
if (OldDmask == 0) {
// These are folded out, but on the chance it happens don't assert.
return Node;
}
unsigned OldBitsSet = countPopulation(OldDmask);
// Work out which is the TFE/LWE lane if that is enabled.
if (UsesTFC) {
TFCLane = OldBitsSet;
}
// Try to figure out the used register components
for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end();
I != E; ++I) {
// Don't look at users of the chain.
if (I.getUse().getResNo() != 0)
continue;
// Abort if we can't understand the usage
if (!I->isMachineOpcode() ||
I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
return Node;
// Lane means which subreg of %vgpra_vgprb_vgprc_vgprd is used.
// Note that subregs are packed, i.e. Lane==0 is the first bit set
// in OldDmask, so it can be any of X,Y,Z,W; Lane==1 is the second bit
// set, etc.
Lane = SubIdx2Lane(I->getConstantOperandVal(1));
if (Lane == ~0u)
return Node;
// Check if the use is for the TFE/LWE generated result at VGPRn+1.
if (UsesTFC && Lane == TFCLane) {
Users[Lane] = *I;
} else {
// Set which texture component corresponds to the lane.
unsigned Comp;
for (unsigned i = 0, Dmask = OldDmask; (i <= Lane) && (Dmask != 0); i++) {
Comp = countTrailingZeros(Dmask);
Dmask &= ~(1 << Comp);
}
// Abort if we have more than one user per component.
if (Users[Lane])
return Node;
Users[Lane] = *I;
NewDmask |= 1 << Comp;
}
}
// Don't allow 0 dmask, as hardware assumes one channel enabled.
bool NoChannels = !NewDmask;
if (NoChannels) {
if (!UsesTFC) {
// No uses of the result and not using TFC. Then do nothing.
return Node;
}
// If the original dmask has one channel - then nothing to do
if (OldBitsSet == 1)
return Node;
// Use an arbitrary dmask - required for the instruction to work
NewDmask = 1;
}
// Abort if there's no change
if (NewDmask == OldDmask)
return Node;
unsigned BitsSet = countPopulation(NewDmask);
// Check for TFE or LWE - increase the number of channels by one to account
// for the extra return value
// This will need adjustment for D16 if this is also included in
// adjustWriteMask (this function) but at present D16 are excluded.
unsigned NewChannels = BitsSet + UsesTFC;
int NewOpcode =
AMDGPU::getMaskedMIMGOp(Node->getMachineOpcode(), NewChannels);
assert(NewOpcode != -1 &&
NewOpcode != static_cast<int>(Node->getMachineOpcode()) &&
"failed to find equivalent MIMG op");
// Adjust the writemask in the node
SmallVector<SDValue, 12> Ops;
Ops.insert(Ops.end(), Node->op_begin(), Node->op_begin() + DmaskIdx);
Ops.push_back(DAG.getTargetConstant(NewDmask, SDLoc(Node), MVT::i32));
Ops.insert(Ops.end(), Node->op_begin() + DmaskIdx + 1, Node->op_end());
MVT SVT = Node->getValueType(0).getVectorElementType().getSimpleVT();
MVT ResultVT = NewChannels == 1 ?
SVT : MVT::getVectorVT(SVT, NewChannels == 3 ? 4 :
NewChannels == 5 ? 8 : NewChannels);
SDVTList NewVTList = HasChain ?
DAG.getVTList(ResultVT, MVT::Other) : DAG.getVTList(ResultVT);
MachineSDNode *NewNode = DAG.getMachineNode(NewOpcode, SDLoc(Node),
NewVTList, Ops);
if (HasChain) {
// Update chain.
DAG.setNodeMemRefs(NewNode, Node->memoperands());
DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), SDValue(NewNode, 1));
}
if (NewChannels == 1) {
assert(Node->hasNUsesOfValue(1, 0));
SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY,
SDLoc(Node), Users[Lane]->getValueType(0),
SDValue(NewNode, 0));
DAG.ReplaceAllUsesWith(Users[Lane], Copy);
return nullptr;
}
// Update the users of the node with the new indices
for (unsigned i = 0, Idx = AMDGPU::sub0; i < 5; ++i) {
SDNode *User = Users[i];
if (!User) {
// Handle the special case of NoChannels. We set NewDmask to 1 above, but
// Users[0] is still nullptr because channel 0 doesn't really have a use.
if (i || !NoChannels)
continue;
} else {
SDValue Op = DAG.getTargetConstant(Idx, SDLoc(User), MVT::i32);
DAG.UpdateNodeOperands(User, SDValue(NewNode, 0), Op);
}
switch (Idx) {
default: break;
case AMDGPU::sub0: Idx = AMDGPU::sub1; break;
case AMDGPU::sub1: Idx = AMDGPU::sub2; break;
case AMDGPU::sub2: Idx = AMDGPU::sub3; break;
case AMDGPU::sub3: Idx = AMDGPU::sub4; break;
}
}
DAG.RemoveDeadNode(Node);
return nullptr;
}
static bool isFrameIndexOp(SDValue Op) {
if (Op.getOpcode() == ISD::AssertZext)
Op = Op.getOperand(0);
return isa<FrameIndexSDNode>(Op);
}
/// Legalize target independent instructions (e.g. INSERT_SUBREG)
/// with frame index operands.
/// LLVM assumes that inputs are to these instructions are registers.
SDNode *SITargetLowering::legalizeTargetIndependentNode(SDNode *Node,
SelectionDAG &DAG) const {
if (Node->getOpcode() == ISD::CopyToReg) {
RegisterSDNode *DestReg = cast<RegisterSDNode>(Node->getOperand(1));
SDValue SrcVal = Node->getOperand(2);
// Insert a copy to a VReg_1 virtual register so LowerI1Copies doesn't have
// to try understanding copies to physical registers.
if (SrcVal.getValueType() == MVT::i1 && DestReg->getReg().isPhysical()) {
SDLoc SL(Node);
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
SDValue VReg = DAG.getRegister(
MRI.createVirtualRegister(&AMDGPU::VReg_1RegClass), MVT::i1);
SDNode *Glued = Node->getGluedNode();
SDValue ToVReg
= DAG.getCopyToReg(Node->getOperand(0), SL, VReg, SrcVal,
SDValue(Glued, Glued ? Glued->getNumValues() - 1 : 0));
SDValue ToResultReg
= DAG.getCopyToReg(ToVReg, SL, SDValue(DestReg, 0),
VReg, ToVReg.getValue(1));
DAG.ReplaceAllUsesWith(Node, ToResultReg.getNode());
DAG.RemoveDeadNode(Node);
return ToResultReg.getNode();
}
}
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < Node->getNumOperands(); ++i) {
if (!isFrameIndexOp(Node->getOperand(i))) {
Ops.push_back(Node->getOperand(i));
continue;
}
SDLoc DL(Node);
Ops.push_back(SDValue(DAG.getMachineNode(AMDGPU::S_MOV_B32, DL,
Node->getOperand(i).getValueType(),
Node->getOperand(i)), 0));
}
return DAG.UpdateNodeOperands(Node, Ops);
}
/// Fold the instructions after selecting them.
/// Returns null if users were already updated.
SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
SelectionDAG &DAG) const {
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
unsigned Opcode = Node->getMachineOpcode();
if (TII->isMIMG(Opcode) && !TII->get(Opcode).mayStore() &&
!TII->isGather4(Opcode) &&
AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::dmask) != -1) {
return adjustWritemask(Node, DAG);
}
if (Opcode == AMDGPU::INSERT_SUBREG ||
Opcode == AMDGPU::REG_SEQUENCE) {
legalizeTargetIndependentNode(Node, DAG);
return Node;
}
switch (Opcode) {
case AMDGPU::V_DIV_SCALE_F32_e64:
case AMDGPU::V_DIV_SCALE_F64_e64: {
// Satisfy the operand register constraint when one of the inputs is
// undefined. Ordinarily each undef value will have its own implicit_def of
// a vreg, so force these to use a single register.
SDValue Src0 = Node->getOperand(1);
SDValue Src1 = Node->getOperand(3);
SDValue Src2 = Node->getOperand(5);
if ((Src0.isMachineOpcode() &&
Src0.getMachineOpcode() != AMDGPU::IMPLICIT_DEF) &&
(Src0 == Src1 || Src0 == Src2))
break;
MVT VT = Src0.getValueType().getSimpleVT();
const TargetRegisterClass *RC =
getRegClassFor(VT, Src0.getNode()->isDivergent());
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
SDValue UndefReg = DAG.getRegister(MRI.createVirtualRegister(RC), VT);
SDValue ImpDef = DAG.getCopyToReg(DAG.getEntryNode(), SDLoc(Node),
UndefReg, Src0, SDValue());
// src0 must be the same register as src1 or src2, even if the value is
// undefined, so make sure we don't violate this constraint.
if (Src0.isMachineOpcode() &&
Src0.getMachineOpcode() == AMDGPU::IMPLICIT_DEF) {
if (Src1.isMachineOpcode() &&
Src1.getMachineOpcode() != AMDGPU::IMPLICIT_DEF)
Src0 = Src1;
else if (Src2.isMachineOpcode() &&
Src2.getMachineOpcode() != AMDGPU::IMPLICIT_DEF)
Src0 = Src2;
else {
assert(Src1.getMachineOpcode() == AMDGPU::IMPLICIT_DEF);
Src0 = UndefReg;
Src1 = UndefReg;
}
} else
break;
SmallVector<SDValue, 9> Ops(Node->op_begin(), Node->op_end());
Ops[1] = Src0;
Ops[3] = Src1;
Ops[5] = Src2;
Ops.push_back(ImpDef.getValue(1));
return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
}
default:
break;
}
return Node;
}
// Any MIMG instructions that use tfe or lwe require an initialization of the
// result register that will be written in the case of a memory access failure.
// The required code is also added to tie this init code to the result of the
// img instruction.
void SITargetLowering::AddIMGInit(MachineInstr &MI) const {
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
const SIRegisterInfo &TRI = TII->getRegisterInfo();
MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
MachineBasicBlock &MBB = *MI.getParent();
MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe);
MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe);
MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16);
if (!TFE && !LWE) // intersect_ray
return;
unsigned TFEVal = TFE ? TFE->getImm() : 0;
unsigned LWEVal = LWE->getImm();
unsigned D16Val = D16 ? D16->getImm() : 0;
if (!TFEVal && !LWEVal)
return;
// At least one of TFE or LWE are non-zero
// We have to insert a suitable initialization of the result value and
// tie this to the dest of the image instruction.
const DebugLoc &DL = MI.getDebugLoc();
int DstIdx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata);
// Calculate which dword we have to initialize to 0.
MachineOperand *MO_Dmask = TII->getNamedOperand(MI, AMDGPU::OpName::dmask);
// check that dmask operand is found.
assert(MO_Dmask && "Expected dmask operand in instruction");
unsigned dmask = MO_Dmask->getImm();
// Determine the number of active lanes taking into account the
// Gather4 special case
unsigned ActiveLanes = TII->isGather4(MI) ? 4 : countPopulation(dmask);
bool Packed = !Subtarget->hasUnpackedD16VMem();
unsigned InitIdx =
D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1;
// Abandon attempt if the dst size isn't large enough
// - this is in fact an error but this is picked up elsewhere and
// reported correctly.
uint32_t DstSize = TRI.getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32;
if (DstSize < InitIdx)
return;
// Create a register for the initialization value.
Register PrevDst = MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
unsigned NewDst = 0; // Final initialized value will be in here
// If PRTStrictNull feature is enabled (the default) then initialize
// all the result registers to 0, otherwise just the error indication
// register (VGPRn+1)
unsigned SizeLeft = Subtarget->usePRTStrictNull() ? InitIdx : 1;
unsigned CurrIdx = Subtarget->usePRTStrictNull() ? 0 : (InitIdx - 1);
BuildMI(MBB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), PrevDst);
for (; SizeLeft; SizeLeft--, CurrIdx++) {
NewDst = MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
// Initialize dword
Register SubReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg)
.addImm(0);
// Insert into the super-reg
BuildMI(MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst)
.addReg(PrevDst)
.addReg(SubReg)
.addImm(SIRegisterInfo::getSubRegFromChannel(CurrIdx));
PrevDst = NewDst;
}
// Add as an implicit operand
MI.addOperand(MachineOperand::CreateReg(NewDst, false, true));
// Tie the just added implicit operand to the dst
MI.tieOperands(DstIdx, MI.getNumOperands() - 1);
}
/// Assign the register class depending on the number of
/// bits set in the writemask
void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
SDNode *Node) const {
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
if (TII->isVOP3(MI.getOpcode())) {
// Make sure constant bus requirements are respected.
TII->legalizeOperandsVOP3(MRI, MI);
// Prefer VGPRs over AGPRs in mAI instructions where possible.
// This saves a chain-copy of registers and better balance register
// use between vgpr and agpr as agpr tuples tend to be big.
if (MI.getDesc().OpInfo) {
unsigned Opc = MI.getOpcode();
const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
for (auto I : { AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0),
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1) }) {
if (I == -1)
break;
MachineOperand &Op = MI.getOperand(I);
if (!Op.isReg() || !Op.getReg().isVirtual())
continue;
auto *RC = TRI->getRegClassForReg(MRI, Op.getReg());
if (!TRI->hasAGPRs(RC))
continue;
auto *Src = MRI.getUniqueVRegDef(Op.getReg());
if (!Src || !Src->isCopy() ||
!TRI->isSGPRReg(MRI, Src->getOperand(1).getReg()))
continue;
auto *NewRC = TRI->getEquivalentVGPRClass(RC);
// All uses of agpr64 and agpr32 can also accept vgpr except for
// v_accvgpr_read, but we do not produce agpr reads during selection,
// so no use checks are needed.
MRI.setRegClass(Op.getReg(), NewRC);
}
// Resolve the rest of AV operands to AGPRs.
if (auto *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2)) {
if (Src2->isReg() && Src2->getReg().isVirtual()) {
auto *RC = TRI->getRegClassForReg(MRI, Src2->getReg());
if (TRI->isVectorSuperClass(RC)) {
auto *NewRC = TRI->getEquivalentAGPRClass(RC);
MRI.setRegClass(Src2->getReg(), NewRC);
if (Src2->isTied())
MRI.setRegClass(MI.getOperand(0).getReg(), NewRC);
}
}
}
}
return;
}
if (TII->isMIMG(MI)) {
if (!MI.mayStore())
AddIMGInit(MI);
TII->enforceOperandRCAlignment(MI, AMDGPU::OpName::vaddr);
}
}
static SDValue buildSMovImm32(SelectionDAG &DAG, const SDLoc &DL,
uint64_t Val) {
SDValue K = DAG.getTargetConstant(Val, DL, MVT::i32);
return SDValue(DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, K), 0);
}
MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG,
const SDLoc &DL,
SDValue Ptr) const {
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
// Build the half of the subregister with the constants before building the
// full 128-bit register. If we are building multiple resource descriptors,
// this will allow CSEing of the 2-component register.
const SDValue Ops0[] = {
DAG.getTargetConstant(AMDGPU::SGPR_64RegClassID, DL, MVT::i32),
buildSMovImm32(DAG, DL, 0),
DAG.getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
buildSMovImm32(DAG, DL, TII->getDefaultRsrcDataFormat() >> 32),
DAG.getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
};
SDValue SubRegHi = SDValue(DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL,
MVT::v2i32, Ops0), 0);
// Combine the constants and the pointer.
const SDValue Ops1[] = {
DAG.getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32),
Ptr,
DAG.getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32),
SubRegHi,
DAG.getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32)
};
return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops1);
}
/// Return a resource descriptor with the 'Add TID' bit enabled
/// The TID (Thread ID) is multiplied by the stride value (bits [61:48]
/// of the resource descriptor) to create an offset, which is added to
/// the resource pointer.
MachineSDNode *SITargetLowering::buildRSRC(SelectionDAG &DAG, const SDLoc &DL,
SDValue Ptr, uint32_t RsrcDword1,
uint64_t RsrcDword2And3) const {
SDValue PtrLo = DAG.getTargetExtractSubreg(AMDGPU::sub0, DL, MVT::i32, Ptr);
SDValue PtrHi = DAG.getTargetExtractSubreg(AMDGPU::sub1, DL, MVT::i32, Ptr);
if (RsrcDword1) {
PtrHi = SDValue(DAG.getMachineNode(AMDGPU::S_OR_B32, DL, MVT::i32, PtrHi,
DAG.getConstant(RsrcDword1, DL, MVT::i32)),
0);
}
SDValue DataLo = buildSMovImm32(DAG, DL,
RsrcDword2And3 & UINT64_C(0xFFFFFFFF));
SDValue DataHi = buildSMovImm32(DAG, DL, RsrcDword2And3 >> 32);
const SDValue Ops[] = {
DAG.getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32),
PtrLo,
DAG.getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
PtrHi,
DAG.getTargetConstant(AMDGPU::sub1, DL, MVT::i32),
DataLo,
DAG.getTargetConstant(AMDGPU::sub2, DL, MVT::i32),
DataHi,
DAG.getTargetConstant(AMDGPU::sub3, DL, MVT::i32)
};
return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops);
}
//===----------------------------------------------------------------------===//
// SI Inline Assembly Support
//===----------------------------------------------------------------------===//
std::pair<unsigned, const TargetRegisterClass *>
SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI_,
StringRef Constraint,
MVT VT) const {
const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(TRI_);
const TargetRegisterClass *RC = nullptr;
if (Constraint.size() == 1) {
const unsigned BitWidth = VT.getSizeInBits();
switch (Constraint[0]) {
default:
return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
case 's':
case 'r':
switch (BitWidth) {
case 16:
RC = &AMDGPU::SReg_32RegClass;
break;
case 64:
RC = &AMDGPU::SGPR_64RegClass;
break;
default:
RC = SIRegisterInfo::getSGPRClassForBitWidth(BitWidth);
if (!RC)
return std::make_pair(0U, nullptr);
break;
}
break;
case 'v':
switch (BitWidth) {
case 16:
RC = &AMDGPU::VGPR_32RegClass;
break;
default:
RC = TRI->getVGPRClassForBitWidth(BitWidth);
if (!RC)
return std::make_pair(0U, nullptr);
break;
}
break;
case 'a':
if (!Subtarget->hasMAIInsts())
break;
switch (BitWidth) {
case 16:
RC = &AMDGPU::AGPR_32RegClass;
break;
default:
RC = TRI->getAGPRClassForBitWidth(BitWidth);
if (!RC)
return std::make_pair(0U, nullptr);
break;
}
break;
}
// We actually support i128, i16 and f16 as inline parameters
// even if they are not reported as legal
if (RC && (isTypeLegal(VT) || VT.SimpleTy == MVT::i128 ||
VT.SimpleTy == MVT::i16 || VT.SimpleTy == MVT::f16))
return std::make_pair(0U, RC);
}
if (Constraint.startswith("{") && Constraint.endswith("}")) {
StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
if (RegName.consume_front("v")) {
RC = &AMDGPU::VGPR_32RegClass;
} else if (RegName.consume_front("s")) {
RC = &AMDGPU::SGPR_32RegClass;
} else if (RegName.consume_front("a")) {
RC = &AMDGPU::AGPR_32RegClass;
}
if (RC) {
uint32_t Idx;
if (RegName.consume_front("[")) {
uint32_t End;
bool Failed = RegName.consumeInteger(10, Idx);
Failed |= !RegName.consume_front(":");
Failed |= RegName.consumeInteger(10, End);
Failed |= !RegName.consume_back("]");
if (!Failed) {
uint32_t Width = (End - Idx + 1) * 32;
MCRegister Reg = RC->getRegister(Idx);
if (SIRegisterInfo::isVGPRClass(RC))
RC = TRI->getVGPRClassForBitWidth(Width);
else if (SIRegisterInfo::isSGPRClass(RC))
RC = TRI->getSGPRClassForBitWidth(Width);
else if (SIRegisterInfo::isAGPRClass(RC))
RC = TRI->getAGPRClassForBitWidth(Width);
if (RC) {
Reg = TRI->getMatchingSuperReg(Reg, AMDGPU::sub0, RC);
return std::make_pair(Reg, RC);
}
}
} else {
bool Failed = RegName.getAsInteger(10, Idx);
if (!Failed && Idx < RC->getNumRegs())
return std::make_pair(RC->getRegister(Idx), RC);
}
}
}
auto Ret = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
if (Ret.first)
Ret.second = TRI->getPhysRegClass(Ret.first);
return Ret;
}
static bool isImmConstraint(StringRef Constraint) {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
default: break;
case 'I':
case 'J':
case 'A':
case 'B':
case 'C':
return true;
}
} else if (Constraint == "DA" ||
Constraint == "DB") {
return true;
}
return false;
}
SITargetLowering::ConstraintType
SITargetLowering::getConstraintType(StringRef Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
default: break;
case 's':
case 'v':
case 'a':
return C_RegisterClass;
}
}
if (isImmConstraint(Constraint)) {
return C_Other;
}
return TargetLowering::getConstraintType(Constraint);
}
static uint64_t clearUnusedBits(uint64_t Val, unsigned Size) {
if (!AMDGPU::isInlinableIntLiteral(Val)) {
Val = Val & maskTrailingOnes<uint64_t>(Size);
}
return Val;
}
void SITargetLowering::LowerAsmOperandForConstraint(SDValue Op,
std::string &Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
if (isImmConstraint(Constraint)) {
uint64_t Val;
if (getAsmOperandConstVal(Op, Val) &&
checkAsmConstraintVal(Op, Constraint, Val)) {
Val = clearUnusedBits(Val, Op.getScalarValueSizeInBits());
Ops.push_back(DAG.getTargetConstant(Val, SDLoc(Op), MVT::i64));
}
} else {
TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
}
bool SITargetLowering::getAsmOperandConstVal(SDValue Op, uint64_t &Val) const {
unsigned Size = Op.getScalarValueSizeInBits();
if (Size > 64)
return false;
if (Size == 16 && !Subtarget->has16BitInsts())
return false;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
Val = C->getSExtValue();
return true;
}
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) {
Val = C->getValueAPF().bitcastToAPInt().getSExtValue();
return true;
}
if (BuildVectorSDNode *V = dyn_cast<BuildVectorSDNode>(Op)) {
if (Size != 16 || Op.getNumOperands() != 2)
return false;
if (Op.getOperand(0).isUndef() || Op.getOperand(1).isUndef())
return false;
if (ConstantSDNode *C = V->getConstantSplatNode()) {
Val = C->getSExtValue();
return true;
}
if (ConstantFPSDNode *C = V->getConstantFPSplatNode()) {
Val = C->getValueAPF().bitcastToAPInt().getSExtValue();
return true;
}
}
return false;
}
bool SITargetLowering::checkAsmConstraintVal(SDValue Op,
const std::string &Constraint,
uint64_t Val) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'I':
return AMDGPU::isInlinableIntLiteral(Val);
case 'J':
return isInt<16>(Val);
case 'A':
return checkAsmConstraintValA(Op, Val);
case 'B':
return isInt<32>(Val);
case 'C':
return isUInt<32>(clearUnusedBits(Val, Op.getScalarValueSizeInBits())) ||
AMDGPU::isInlinableIntLiteral(Val);
default:
break;
}
} else if (Constraint.size() == 2) {
if (Constraint == "DA") {
int64_t HiBits = static_cast<int32_t>(Val >> 32);
int64_t LoBits = static_cast<int32_t>(Val);
return checkAsmConstraintValA(Op, HiBits, 32) &&
checkAsmConstraintValA(Op, LoBits, 32);
}
if (Constraint == "DB") {
return true;
}
}
llvm_unreachable("Invalid asm constraint");
}
bool SITargetLowering::checkAsmConstraintValA(SDValue Op,
uint64_t Val,
unsigned MaxSize) const {
unsigned Size = std::min<unsigned>(Op.getScalarValueSizeInBits(), MaxSize);
bool HasInv2Pi = Subtarget->hasInv2PiInlineImm();
if ((Size == 16 && AMDGPU::isInlinableLiteral16(Val, HasInv2Pi)) ||
(Size == 32 && AMDGPU::isInlinableLiteral32(Val, HasInv2Pi)) ||
(Size == 64 && AMDGPU::isInlinableLiteral64(Val, HasInv2Pi))) {
return true;
}
return false;
}
static int getAlignedAGPRClassID(unsigned UnalignedClassID) {
switch (UnalignedClassID) {
case AMDGPU::VReg_64RegClassID:
return AMDGPU::VReg_64_Align2RegClassID;
case AMDGPU::VReg_96RegClassID:
return AMDGPU::VReg_96_Align2RegClassID;
case AMDGPU::VReg_128RegClassID:
return AMDGPU::VReg_128_Align2RegClassID;
case AMDGPU::VReg_160RegClassID:
return AMDGPU::VReg_160_Align2RegClassID;
case AMDGPU::VReg_192RegClassID:
return AMDGPU::VReg_192_Align2RegClassID;
case AMDGPU::VReg_224RegClassID:
return AMDGPU::VReg_224_Align2RegClassID;
case AMDGPU::VReg_256RegClassID:
return AMDGPU::VReg_256_Align2RegClassID;
case AMDGPU::VReg_512RegClassID:
return AMDGPU::VReg_512_Align2RegClassID;
case AMDGPU::VReg_1024RegClassID:
return AMDGPU::VReg_1024_Align2RegClassID;
case AMDGPU::AReg_64RegClassID:
return AMDGPU::AReg_64_Align2RegClassID;
case AMDGPU::AReg_96RegClassID:
return AMDGPU::AReg_96_Align2RegClassID;
case AMDGPU::AReg_128RegClassID:
return AMDGPU::AReg_128_Align2RegClassID;
case AMDGPU::AReg_160RegClassID:
return AMDGPU::AReg_160_Align2RegClassID;
case AMDGPU::AReg_192RegClassID:
return AMDGPU::AReg_192_Align2RegClassID;
case AMDGPU::AReg_256RegClassID:
return AMDGPU::AReg_256_Align2RegClassID;
case AMDGPU::AReg_512RegClassID:
return AMDGPU::AReg_512_Align2RegClassID;
case AMDGPU::AReg_1024RegClassID:
return AMDGPU::AReg_1024_Align2RegClassID;
default:
return -1;
}
}
// Figure out which registers should be reserved for stack access. Only after
// the function is legalized do we know all of the non-spill stack objects or if
// calls are present.
void SITargetLowering::finalizeLowering(MachineFunction &MF) const {
MachineRegisterInfo &MRI = MF.getRegInfo();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
const SIInstrInfo *TII = ST.getInstrInfo();
if (Info->isEntryFunction()) {
// Callable functions have fixed registers used for stack access.
reservePrivateMemoryRegs(getTargetMachine(), MF, *TRI, *Info);
}
assert(!TRI->isSubRegister(Info->getScratchRSrcReg(),
Info->getStackPtrOffsetReg()));
if (Info->getStackPtrOffsetReg() != AMDGPU::SP_REG)
MRI.replaceRegWith(AMDGPU::SP_REG, Info->getStackPtrOffsetReg());
// We need to worry about replacing the default register with itself in case
// of MIR testcases missing the MFI.
if (Info->getScratchRSrcReg() != AMDGPU::PRIVATE_RSRC_REG)
MRI.replaceRegWith(AMDGPU::PRIVATE_RSRC_REG, Info->getScratchRSrcReg());
if (Info->getFrameOffsetReg() != AMDGPU::FP_REG)
MRI.replaceRegWith(AMDGPU::FP_REG, Info->getFrameOffsetReg());
Info->limitOccupancy(MF);
if (ST.isWave32() && !MF.empty()) {
for (auto &MBB : MF) {
for (auto &MI : MBB) {
TII->fixImplicitOperands(MI);
}
}
}
// FIXME: This is a hack to fixup AGPR classes to use the properly aligned
// classes if required. Ideally the register class constraints would differ
// per-subtarget, but there's no easy way to achieve that right now. This is
// not a problem for VGPRs because the correctly aligned VGPR class is implied
// from using them as the register class for legal types.
if (ST.needsAlignedVGPRs()) {
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
const Register Reg = Register::index2VirtReg(I);
const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg);
if (!RC)
continue;
int NewClassID = getAlignedAGPRClassID(RC->getID());
if (NewClassID != -1)
MRI.setRegClass(Reg, TRI->getRegClass(NewClassID));
}
}
TargetLoweringBase::finalizeLowering(MF);
}
void SITargetLowering::computeKnownBitsForFrameIndex(
const int FI, KnownBits &Known, const MachineFunction &MF) const {
TargetLowering::computeKnownBitsForFrameIndex(FI, Known, MF);
// Set the high bits to zero based on the maximum allowed scratch size per
// wave. We can't use vaddr in MUBUF instructions if we don't know the address
// calculation won't overflow, so assume the sign bit is never set.
Known.Zero.setHighBits(getSubtarget()->getKnownHighZeroBitsForFrameIndex());
}
static void knownBitsForWorkitemID(const GCNSubtarget &ST, GISelKnownBits &KB,
KnownBits &Known, unsigned Dim) {
unsigned MaxValue =
ST.getMaxWorkitemID(KB.getMachineFunction().getFunction(), Dim);
Known.Zero.setHighBits(countLeadingZeros(MaxValue));
}
void SITargetLowering::computeKnownBitsForTargetInstr(
GISelKnownBits &KB, Register R, KnownBits &Known, const APInt &DemandedElts,
const MachineRegisterInfo &MRI, unsigned Depth) const {
const MachineInstr *MI = MRI.getVRegDef(R);
switch (MI->getOpcode()) {
case AMDGPU::G_INTRINSIC: {
switch (MI->getIntrinsicID()) {
case Intrinsic::amdgcn_workitem_id_x:
knownBitsForWorkitemID(*getSubtarget(), KB, Known, 0);
break;
case Intrinsic::amdgcn_workitem_id_y:
knownBitsForWorkitemID(*getSubtarget(), KB, Known, 1);
break;
case Intrinsic::amdgcn_workitem_id_z:
knownBitsForWorkitemID(*getSubtarget(), KB, Known, 2);
break;
case Intrinsic::amdgcn_mbcnt_lo:
case Intrinsic::amdgcn_mbcnt_hi: {
// These return at most the wavefront size - 1.
unsigned Size = MRI.getType(R).getSizeInBits();
Known.Zero.setHighBits(Size - getSubtarget()->getWavefrontSizeLog2());
break;
}
case Intrinsic::amdgcn_groupstaticsize: {
// We can report everything over the maximum size as 0. We can't report
// based on the actual size because we don't know if it's accurate or not
// at any given point.
Known.Zero.setHighBits(countLeadingZeros(getSubtarget()->getLocalMemorySize()));
break;
}
}
break;
}
case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
Known.Zero.setHighBits(24);
break;
case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
Known.Zero.setHighBits(16);
break;
}
}
Align SITargetLowering::computeKnownAlignForTargetInstr(
GISelKnownBits &KB, Register R, const MachineRegisterInfo &MRI,
unsigned Depth) const {
const MachineInstr *MI = MRI.getVRegDef(R);
switch (MI->getOpcode()) {
case AMDGPU::G_INTRINSIC:
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
// FIXME: Can this move to generic code? What about the case where the call
// site specifies a lower alignment?
Intrinsic::ID IID = MI->getIntrinsicID();
LLVMContext &Ctx = KB.getMachineFunction().getFunction().getContext();
AttributeList Attrs = Intrinsic::getAttributes(Ctx, IID);
if (MaybeAlign RetAlign = Attrs.getRetAlignment())
return *RetAlign;
return Align(1);
}
default:
return Align(1);
}
}
Align SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
const Align PrefAlign = TargetLowering::getPrefLoopAlignment(ML);
const Align CacheLineAlign = Align(64);
// Pre-GFX10 target did not benefit from loop alignment
if (!ML || DisableLoopAlignment ||
(getSubtarget()->getGeneration() < AMDGPUSubtarget::GFX10) ||
getSubtarget()->hasInstFwdPrefetchBug())
return PrefAlign;
// On GFX10 I$ is 4 x 64 bytes cache lines.
// By default prefetcher keeps one cache line behind and reads two ahead.
// We can modify it with S_INST_PREFETCH for larger loops to have two lines
// behind and one ahead.
// Therefor we can benefit from aligning loop headers if loop fits 192 bytes.
// If loop fits 64 bytes it always spans no more than two cache lines and
// does not need an alignment.
// Else if loop is less or equal 128 bytes we do not need to modify prefetch,
// Else if loop is less or equal 192 bytes we need two lines behind.
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
const MachineBasicBlock *Header = ML->getHeader();
if (Header->getAlignment() != PrefAlign)
return Header->getAlignment(); // Already processed.
unsigned LoopSize = 0;
for (const MachineBasicBlock *MBB : ML->blocks()) {
// If inner loop block is aligned assume in average half of the alignment
// size to be added as nops.
if (MBB != Header)
LoopSize += MBB->getAlignment().value() / 2;
for (const MachineInstr &MI : *MBB) {
LoopSize += TII->getInstSizeInBytes(MI);
if (LoopSize > 192)
return PrefAlign;
}
}
if (LoopSize <= 64)
return PrefAlign;
if (LoopSize <= 128)
return CacheLineAlign;
// If any of parent loops is surrounded by prefetch instructions do not
// insert new for inner loop, which would reset parent's settings.
for (MachineLoop *P = ML->getParentLoop(); P; P = P->getParentLoop()) {
if (MachineBasicBlock *Exit = P->getExitBlock()) {
auto I = Exit->getFirstNonDebugInstr();
if (I != Exit->end() && I->getOpcode() == AMDGPU::S_INST_PREFETCH)
return CacheLineAlign;
}
}
MachineBasicBlock *Pre = ML->getLoopPreheader();
MachineBasicBlock *Exit = ML->getExitBlock();
if (Pre && Exit) {
auto PreTerm = Pre->getFirstTerminator();
if (PreTerm == Pre->begin() ||
std::prev(PreTerm)->getOpcode() != AMDGPU::S_INST_PREFETCH)
BuildMI(*Pre, PreTerm, DebugLoc(), TII->get(AMDGPU::S_INST_PREFETCH))
.addImm(1); // prefetch 2 lines behind PC
auto ExitHead = Exit->getFirstNonDebugInstr();
if (ExitHead == Exit->end() ||
ExitHead->getOpcode() != AMDGPU::S_INST_PREFETCH)
BuildMI(*Exit, ExitHead, DebugLoc(), TII->get(AMDGPU::S_INST_PREFETCH))
.addImm(2); // prefetch 1 line behind PC
}
return CacheLineAlign;
}
LLVM_ATTRIBUTE_UNUSED
static bool isCopyFromRegOfInlineAsm(const SDNode *N) {
assert(N->getOpcode() == ISD::CopyFromReg);
do {
// Follow the chain until we find an INLINEASM node.
N = N->getOperand(0).getNode();
if (N->getOpcode() == ISD::INLINEASM ||
N->getOpcode() == ISD::INLINEASM_BR)
return true;
} while (N->getOpcode() == ISD::CopyFromReg);
return false;
}
bool SITargetLowering::isSDNodeSourceOfDivergence(
const SDNode *N, FunctionLoweringInfo *FLI,
LegacyDivergenceAnalysis *KDA) const {
switch (N->getOpcode()) {
case ISD::CopyFromReg: {
const RegisterSDNode *R = cast<RegisterSDNode>(N->getOperand(1));
const MachineRegisterInfo &MRI = FLI->MF->getRegInfo();
const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
Register Reg = R->getReg();
// FIXME: Why does this need to consider isLiveIn?
if (Reg.isPhysical() || MRI.isLiveIn(Reg))
return !TRI->isSGPRReg(MRI, Reg);
if (const Value *V = FLI->getValueFromVirtualReg(R->getReg()))
return KDA->isDivergent(V);
assert(Reg == FLI->DemoteRegister || isCopyFromRegOfInlineAsm(N));
return !TRI->isSGPRReg(MRI, Reg);
}
case ISD::LOAD: {
const LoadSDNode *L = cast<LoadSDNode>(N);
unsigned AS = L->getAddressSpace();
// A flat load may access private memory.
return AS == AMDGPUAS::PRIVATE_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS;
}
case ISD::CALLSEQ_END:
return true;
case ISD::INTRINSIC_WO_CHAIN:
return AMDGPU::isIntrinsicSourceOfDivergence(
cast<ConstantSDNode>(N->getOperand(0))->getZExtValue());
case ISD::INTRINSIC_W_CHAIN:
return AMDGPU::isIntrinsicSourceOfDivergence(
cast<ConstantSDNode>(N->getOperand(1))->getZExtValue());
case AMDGPUISD::ATOMIC_CMP_SWAP:
case AMDGPUISD::ATOMIC_INC:
case AMDGPUISD::ATOMIC_DEC:
case AMDGPUISD::ATOMIC_LOAD_FMIN:
case AMDGPUISD::ATOMIC_LOAD_FMAX:
case AMDGPUISD::BUFFER_ATOMIC_SWAP:
case AMDGPUISD::BUFFER_ATOMIC_ADD:
case AMDGPUISD::BUFFER_ATOMIC_SUB:
case AMDGPUISD::BUFFER_ATOMIC_SMIN:
case AMDGPUISD::BUFFER_ATOMIC_UMIN:
case AMDGPUISD::BUFFER_ATOMIC_SMAX:
case AMDGPUISD::BUFFER_ATOMIC_UMAX:
case AMDGPUISD::BUFFER_ATOMIC_AND:
case AMDGPUISD::BUFFER_ATOMIC_OR:
case AMDGPUISD::BUFFER_ATOMIC_XOR:
case AMDGPUISD::BUFFER_ATOMIC_INC:
case AMDGPUISD::BUFFER_ATOMIC_DEC:
case AMDGPUISD::BUFFER_ATOMIC_CMPSWAP:
case AMDGPUISD::BUFFER_ATOMIC_CSUB:
case AMDGPUISD::BUFFER_ATOMIC_FADD:
case AMDGPUISD::BUFFER_ATOMIC_FMIN:
case AMDGPUISD::BUFFER_ATOMIC_FMAX:
// Target-specific read-modify-write atomics are sources of divergence.
return true;
default:
if (auto *A = dyn_cast<AtomicSDNode>(N)) {
// Generic read-modify-write atomics are sources of divergence.
return A->readMem() && A->writeMem();
}
return false;
}
}
bool SITargetLowering::denormalsEnabledForType(const SelectionDAG &DAG,
EVT VT) const {
switch (VT.getScalarType().getSimpleVT().SimpleTy) {
case MVT::f32:
return hasFP32Denormals(DAG.getMachineFunction());
case MVT::f64:
case MVT::f16:
return hasFP64FP16Denormals(DAG.getMachineFunction());
default:
return false;
}
}
bool SITargetLowering::denormalsEnabledForType(LLT Ty,
MachineFunction &MF) const {
switch (Ty.getScalarSizeInBits()) {
case 32:
return hasFP32Denormals(MF);
case 64:
case 16:
return hasFP64FP16Denormals(MF);
default:
return false;
}
}
bool SITargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
const SelectionDAG &DAG,
bool SNaN,
unsigned Depth) const {
if (Op.getOpcode() == AMDGPUISD::CLAMP) {
const MachineFunction &MF = DAG.getMachineFunction();
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
if (Info->getMode().DX10Clamp)
return true; // Clamped to 0.
return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
}
return AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(Op, DAG,
SNaN, Depth);
}
// Global FP atomic instructions have a hardcoded FP mode and do not support
// FP32 denormals, and only support v2f16 denormals.
static bool fpModeMatchesGlobalFPAtomicMode(const AtomicRMWInst *RMW) {
const fltSemantics &Flt = RMW->getType()->getScalarType()->getFltSemantics();
auto DenormMode = RMW->getParent()->getParent()->getDenormalMode(Flt);
if (&Flt == &APFloat::IEEEsingle())
return DenormMode == DenormalMode::getPreserveSign();
return DenormMode == DenormalMode::getIEEE();
}
TargetLowering::AtomicExpansionKind
SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
unsigned AS = RMW->getPointerAddressSpace();
if (AS == AMDGPUAS::PRIVATE_ADDRESS)
return AtomicExpansionKind::NotAtomic;
auto ReportUnsafeHWInst = [&](TargetLowering::AtomicExpansionKind Kind) {
OptimizationRemarkEmitter ORE(RMW->getFunction());
LLVMContext &Ctx = RMW->getFunction()->getContext();
SmallVector<StringRef> SSNs;
Ctx.getSyncScopeNames(SSNs);
auto MemScope = SSNs[RMW->getSyncScopeID()].empty()
? "system"
: SSNs[RMW->getSyncScopeID()];
ORE.emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "Passed", RMW)
<< "Hardware instruction generated for atomic "
<< RMW->getOperationName(RMW->getOperation())
<< " operation at memory scope " << MemScope
<< " due to an unsafe request.";
});
return Kind;
};
switch (RMW->getOperation()) {
case AtomicRMWInst::FAdd: {
Type *Ty = RMW->getType();
// We don't have a way to support 16-bit atomics now, so just leave them
// as-is.
if (Ty->isHalfTy())
return AtomicExpansionKind::None;
if (!Ty->isFloatTy() && (!Subtarget->hasGFX90AInsts() || !Ty->isDoubleTy()))
return AtomicExpansionKind::CmpXChg;
if ((AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS) &&
Subtarget->hasAtomicFaddNoRtnInsts()) {
if (Subtarget->hasGFX940Insts())
return AtomicExpansionKind::None;
// The amdgpu-unsafe-fp-atomics attribute enables generation of unsafe
// floating point atomic instructions. May generate more efficient code,
// but may not respect rounding and denormal modes, and may give incorrect
// results for certain memory destinations.
if (RMW->getFunction()
->getFnAttribute("amdgpu-unsafe-fp-atomics")
.getValueAsString() != "true")
return AtomicExpansionKind::CmpXChg;
if (Subtarget->hasGFX90AInsts()) {
if (Ty->isFloatTy() && AS == AMDGPUAS::FLAT_ADDRESS)
return AtomicExpansionKind::CmpXChg;
auto SSID = RMW->getSyncScopeID();
if (SSID == SyncScope::System ||
SSID == RMW->getContext().getOrInsertSyncScopeID("one-as"))
return AtomicExpansionKind::CmpXChg;
return ReportUnsafeHWInst(AtomicExpansionKind::None);
}
if (AS == AMDGPUAS::FLAT_ADDRESS)
return AtomicExpansionKind::CmpXChg;
return RMW->use_empty() ? ReportUnsafeHWInst(AtomicExpansionKind::None)
: AtomicExpansionKind::CmpXChg;
}
// DS FP atomics do respect the denormal mode, but the rounding mode is
// fixed to round-to-nearest-even.
// The only exception is DS_ADD_F64 which never flushes regardless of mode.
if (AS == AMDGPUAS::LOCAL_ADDRESS && Subtarget->hasLDSFPAtomicAdd()) {
if (!Ty->isDoubleTy())
return AtomicExpansionKind::None;
if (fpModeMatchesGlobalFPAtomicMode(RMW))
return AtomicExpansionKind::None;
return RMW->getFunction()
->getFnAttribute("amdgpu-unsafe-fp-atomics")
.getValueAsString() == "true"
? ReportUnsafeHWInst(AtomicExpansionKind::None)
: AtomicExpansionKind::CmpXChg;
}
return AtomicExpansionKind::CmpXChg;
}
default:
break;
}
return AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(RMW);
}
TargetLowering::AtomicExpansionKind
SITargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
return LI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS
? AtomicExpansionKind::NotAtomic
: AtomicExpansionKind::None;
}
TargetLowering::AtomicExpansionKind
SITargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
return SI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS
? AtomicExpansionKind::NotAtomic
: AtomicExpansionKind::None;
}
TargetLowering::AtomicExpansionKind
SITargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CmpX) const {
return CmpX->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS
? AtomicExpansionKind::NotAtomic
: AtomicExpansionKind::None;
}
const TargetRegisterClass *
SITargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
const TargetRegisterClass *RC = TargetLoweringBase::getRegClassFor(VT, false);
const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
if (RC == &AMDGPU::VReg_1RegClass && !isDivergent)
return Subtarget->getWavefrontSize() == 64 ? &AMDGPU::SReg_64RegClass
: &AMDGPU::SReg_32RegClass;
if (!TRI->isSGPRClass(RC) && !isDivergent)
return TRI->getEquivalentSGPRClass(RC);
else if (TRI->isSGPRClass(RC) && isDivergent)
return TRI->getEquivalentVGPRClass(RC);
return RC;
}
// FIXME: This is a workaround for DivergenceAnalysis not understanding always
// uniform values (as produced by the mask results of control flow intrinsics)
// used outside of divergent blocks. The phi users need to also be treated as
// always uniform.
static bool hasCFUser(const Value *V, SmallPtrSet<const Value *, 16> &Visited,
unsigned WaveSize) {
// FIXME: We assume we never cast the mask results of a control flow
// intrinsic.
// Early exit if the type won't be consistent as a compile time hack.
IntegerType *IT = dyn_cast<IntegerType>(V->getType());
if (!IT || IT->getBitWidth() != WaveSize)
return false;
if (!isa<Instruction>(V))
return false;
if (!Visited.insert(V).second)
return false;
bool Result = false;
for (auto U : V->users()) {
if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(U)) {
if (V == U->getOperand(1)) {
switch (Intrinsic->getIntrinsicID()) {
default:
Result = false;
break;
case Intrinsic::amdgcn_if_break:
case Intrinsic::amdgcn_if:
case Intrinsic::amdgcn_else:
Result = true;
break;
}
}
if (V == U->getOperand(0)) {
switch (Intrinsic->getIntrinsicID()) {
default:
Result = false;
break;
case Intrinsic::amdgcn_end_cf:
case Intrinsic::amdgcn_loop:
Result = true;
break;
}
}
} else {
Result = hasCFUser(U, Visited, WaveSize);
}
if (Result)
break;
}
return Result;
}
bool SITargetLowering::requiresUniformRegister(MachineFunction &MF,
const Value *V) const {
if (const CallInst *CI = dyn_cast<CallInst>(V)) {
if (CI->isInlineAsm()) {
// FIXME: This cannot give a correct answer. This should only trigger in
// the case where inline asm returns mixed SGPR and VGPR results, used
// outside the defining block. We don't have a specific result to
// consider, so this assumes if any value is SGPR, the overall register
// also needs to be SGPR.
const SIRegisterInfo *SIRI = Subtarget->getRegisterInfo();
TargetLowering::AsmOperandInfoVector TargetConstraints = ParseConstraints(
MF.getDataLayout(), Subtarget->getRegisterInfo(), *CI);
for (auto &TC : TargetConstraints) {
if (TC.Type == InlineAsm::isOutput) {
ComputeConstraintToUse(TC, SDValue());
const TargetRegisterClass *RC = getRegForInlineAsmConstraint(
SIRI, TC.ConstraintCode, TC.ConstraintVT).second;
if (RC && SIRI->isSGPRClass(RC))
return true;
}
}
}
}
SmallPtrSet<const Value *, 16> Visited;
return hasCFUser(V, Visited, Subtarget->getWavefrontSize());
}
std::pair<InstructionCost, MVT>
SITargetLowering::getTypeLegalizationCost(const DataLayout &DL,
Type *Ty) const {
std::pair<InstructionCost, MVT> Cost =
TargetLoweringBase::getTypeLegalizationCost(DL, Ty);
auto Size = DL.getTypeSizeInBits(Ty);
// Maximum load or store can handle 8 dwords for scalar and 4 for
// vector ALU. Let's assume anything above 8 dwords is expensive
// even if legal.
if (Size <= 256)
return Cost;
Cost.first += (Size + 255) / 256;
return Cost;
}
bool SITargetLowering::hasMemSDNodeUser(SDNode *N) const {
SDNode::use_iterator I = N->use_begin(), E = N->use_end();
for (; I != E; ++I) {
if (MemSDNode *M = dyn_cast<MemSDNode>(*I)) {
if (getBasePtrIndex(M) == I.getOperandNo())
return true;
}
}
return false;
}
bool SITargetLowering::isReassocProfitable(SelectionDAG &DAG, SDValue N0,
SDValue N1) const {
if (!N0.hasOneUse())
return false;
// Take care of the opportunity to keep N0 uniform
if (N0->isDivergent() || !N1->isDivergent())
return true;
// Check if we have a good chance to form the memory access pattern with the
// base and offset
return (DAG.isBaseWithConstantOffset(N0) &&
hasMemSDNodeUser(*N0->use_begin()));
}
MachineMemOperand::Flags
SITargetLowering::getTargetMMOFlags(const Instruction &I) const {
// Propagate metadata set by AMDGPUAnnotateUniformValues to the MMO of a load.
if (I.getMetadata("amdgpu.noclobber"))
return MONoClobber;
return MachineMemOperand::MONone;
}
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 3c102463ba08..cbfd2bc68f18 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -1,2385 +1,2385 @@
//===- ARMTargetTransformInfo.cpp - ARM specific TTI ----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "ARMTargetTransformInfo.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/CodeGen/CostTable.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <utility>
using namespace llvm;
#define DEBUG_TYPE "armtti"
static cl::opt<bool> EnableMaskedLoadStores(
"enable-arm-maskedldst", cl::Hidden, cl::init(true),
cl::desc("Enable the generation of masked loads and stores"));
static cl::opt<bool> DisableLowOverheadLoops(
"disable-arm-loloops", cl::Hidden, cl::init(false),
cl::desc("Disable the generation of low-overhead loops"));
static cl::opt<bool>
AllowWLSLoops("allow-arm-wlsloops", cl::Hidden, cl::init(true),
cl::desc("Enable the generation of WLS loops"));
extern cl::opt<TailPredication::Mode> EnableTailPredication;
extern cl::opt<bool> EnableMaskedGatherScatters;
extern cl::opt<unsigned> MVEMaxSupportedInterleaveFactor;
/// Convert a vector load intrinsic into a simple llvm load instruction.
/// This is beneficial when the underlying object being addressed comes
/// from a constant, since we get constant-folding for free.
static Value *simplifyNeonVld1(const IntrinsicInst &II, unsigned MemAlign,
InstCombiner::BuilderTy &Builder) {
auto *IntrAlign = dyn_cast<ConstantInt>(II.getArgOperand(1));
if (!IntrAlign)
return nullptr;
unsigned Alignment = IntrAlign->getLimitedValue() < MemAlign
? MemAlign
: IntrAlign->getLimitedValue();
if (!isPowerOf2_32(Alignment))
return nullptr;
auto *BCastInst = Builder.CreateBitCast(II.getArgOperand(0),
PointerType::get(II.getType(), 0));
return Builder.CreateAlignedLoad(II.getType(), BCastInst, Align(Alignment));
}
bool ARMTTIImpl::areInlineCompatible(const Function *Caller,
const Function *Callee) const {
const TargetMachine &TM = getTLI()->getTargetMachine();
const FeatureBitset &CallerBits =
TM.getSubtargetImpl(*Caller)->getFeatureBits();
const FeatureBitset &CalleeBits =
TM.getSubtargetImpl(*Callee)->getFeatureBits();
// To inline a callee, all features not in the allowed list must match exactly.
bool MatchExact = (CallerBits & ~InlineFeaturesAllowed) ==
(CalleeBits & ~InlineFeaturesAllowed);
// For features in the allowed list, the callee's features must be a subset of
// the callers'.
bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeaturesAllowed) ==
(CalleeBits & InlineFeaturesAllowed);
return MatchExact && MatchSubset;
}
TTI::AddressingModeKind
ARMTTIImpl::getPreferredAddressingMode(const Loop *L,
ScalarEvolution *SE) const {
if (ST->hasMVEIntegerOps())
return TTI::AMK_PostIndexed;
if (L->getHeader()->getParent()->hasOptSize())
return TTI::AMK_None;
if (ST->isMClass() && ST->isThumb2() &&
L->getNumBlocks() == 1)
return TTI::AMK_PreIndexed;
return TTI::AMK_None;
}
Optional<Instruction *>
ARMTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
using namespace PatternMatch;
Intrinsic::ID IID = II.getIntrinsicID();
switch (IID) {
default:
break;
case Intrinsic::arm_neon_vld1: {
Align MemAlign =
getKnownAlignment(II.getArgOperand(0), IC.getDataLayout(), &II,
&IC.getAssumptionCache(), &IC.getDominatorTree());
if (Value *V = simplifyNeonVld1(II, MemAlign.value(), IC.Builder)) {
return IC.replaceInstUsesWith(II, V);
}
break;
}
case Intrinsic::arm_neon_vld2:
case Intrinsic::arm_neon_vld3:
case Intrinsic::arm_neon_vld4:
case Intrinsic::arm_neon_vld2lane:
case Intrinsic::arm_neon_vld3lane:
case Intrinsic::arm_neon_vld4lane:
case Intrinsic::arm_neon_vst1:
case Intrinsic::arm_neon_vst2:
case Intrinsic::arm_neon_vst3:
case Intrinsic::arm_neon_vst4:
case Intrinsic::arm_neon_vst2lane:
case Intrinsic::arm_neon_vst3lane:
case Intrinsic::arm_neon_vst4lane: {
Align MemAlign =
getKnownAlignment(II.getArgOperand(0), IC.getDataLayout(), &II,
&IC.getAssumptionCache(), &IC.getDominatorTree());
unsigned AlignArg = II.arg_size() - 1;
Value *AlignArgOp = II.getArgOperand(AlignArg);
MaybeAlign Align = cast<ConstantInt>(AlignArgOp)->getMaybeAlignValue();
if (Align && *Align < MemAlign) {
return IC.replaceOperand(
II, AlignArg,
ConstantInt::get(Type::getInt32Ty(II.getContext()), MemAlign.value(),
false));
}
break;
}
case Intrinsic::arm_mve_pred_i2v: {
Value *Arg = II.getArgOperand(0);
Value *ArgArg;
if (match(Arg, PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_v2i>(
PatternMatch::m_Value(ArgArg))) &&
II.getType() == ArgArg->getType()) {
return IC.replaceInstUsesWith(II, ArgArg);
}
Constant *XorMask;
if (match(Arg, m_Xor(PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_v2i>(
PatternMatch::m_Value(ArgArg)),
PatternMatch::m_Constant(XorMask))) &&
II.getType() == ArgArg->getType()) {
if (auto *CI = dyn_cast<ConstantInt>(XorMask)) {
if (CI->getValue().trunc(16).isAllOnes()) {
auto TrueVector = IC.Builder.CreateVectorSplat(
cast<FixedVectorType>(II.getType())->getNumElements(),
IC.Builder.getTrue());
return BinaryOperator::Create(Instruction::Xor, ArgArg, TrueVector);
}
}
}
KnownBits ScalarKnown(32);
if (IC.SimplifyDemandedBits(&II, 0, APInt::getLowBitsSet(32, 16),
ScalarKnown, 0)) {
return &II;
}
break;
}
case Intrinsic::arm_mve_pred_v2i: {
Value *Arg = II.getArgOperand(0);
Value *ArgArg;
if (match(Arg, PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_i2v>(
PatternMatch::m_Value(ArgArg)))) {
return IC.replaceInstUsesWith(II, ArgArg);
}
if (!II.getMetadata(LLVMContext::MD_range)) {
Type *IntTy32 = Type::getInt32Ty(II.getContext());
Metadata *M[] = {
ConstantAsMetadata::get(ConstantInt::get(IntTy32, 0)),
ConstantAsMetadata::get(ConstantInt::get(IntTy32, 0x10000))};
II.setMetadata(LLVMContext::MD_range, MDNode::get(II.getContext(), M));
return &II;
}
break;
}
case Intrinsic::arm_mve_vadc:
case Intrinsic::arm_mve_vadc_predicated: {
unsigned CarryOp =
(II.getIntrinsicID() == Intrinsic::arm_mve_vadc_predicated) ? 3 : 2;
assert(II.getArgOperand(CarryOp)->getType()->getScalarSizeInBits() == 32 &&
"Bad type for intrinsic!");
KnownBits CarryKnown(32);
if (IC.SimplifyDemandedBits(&II, CarryOp, APInt::getOneBitSet(32, 29),
CarryKnown)) {
return &II;
}
break;
}
case Intrinsic::arm_mve_vmldava: {
Instruction *I = cast<Instruction>(&II);
if (I->hasOneUse()) {
auto *User = cast<Instruction>(*I->user_begin());
Value *OpZ;
if (match(User, m_c_Add(m_Specific(I), m_Value(OpZ))) &&
match(I->getOperand(3), m_Zero())) {
Value *OpX = I->getOperand(4);
Value *OpY = I->getOperand(5);
Type *OpTy = OpX->getType();
IC.Builder.SetInsertPoint(User);
Value *V =
IC.Builder.CreateIntrinsic(Intrinsic::arm_mve_vmldava, {OpTy},
{I->getOperand(0), I->getOperand(1),
I->getOperand(2), OpZ, OpX, OpY});
IC.replaceInstUsesWith(*User, V);
return IC.eraseInstFromFunction(*User);
}
}
return None;
}
}
return None;
}
Optional<Value *> ARMTTIImpl::simplifyDemandedVectorEltsIntrinsic(
InstCombiner &IC, IntrinsicInst &II, APInt OrigDemandedElts,
APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
std::function<void(Instruction *, unsigned, APInt, APInt &)>
SimplifyAndSetOp) const {
// Compute the demanded bits for a narrowing MVE intrinsic. The TopOpc is the
// opcode specifying a Top/Bottom instruction, which can change between
// instructions.
auto SimplifyNarrowInstrTopBottom =[&](unsigned TopOpc) {
unsigned NumElts = cast<FixedVectorType>(II.getType())->getNumElements();
unsigned IsTop = cast<ConstantInt>(II.getOperand(TopOpc))->getZExtValue();
// The only odd/even lanes of operand 0 will only be demanded depending
// on whether this is a top/bottom instruction.
APInt DemandedElts =
APInt::getSplat(NumElts, IsTop ? APInt::getLowBitsSet(2, 1)
: APInt::getHighBitsSet(2, 1));
SimplifyAndSetOp(&II, 0, OrigDemandedElts & DemandedElts, UndefElts);
// The other lanes will be defined from the inserted elements.
UndefElts &= APInt::getSplat(NumElts, !IsTop ? APInt::getLowBitsSet(2, 1)
: APInt::getHighBitsSet(2, 1));
return None;
};
switch (II.getIntrinsicID()) {
default:
break;
case Intrinsic::arm_mve_vcvt_narrow:
SimplifyNarrowInstrTopBottom(2);
break;
case Intrinsic::arm_mve_vqmovn:
SimplifyNarrowInstrTopBottom(4);
break;
case Intrinsic::arm_mve_vshrn:
SimplifyNarrowInstrTopBottom(7);
break;
}
return None;
}
InstructionCost ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy());
unsigned Bits = Ty->getPrimitiveSizeInBits();
if (Bits == 0 || Imm.getActiveBits() >= 64)
return 4;
int64_t SImmVal = Imm.getSExtValue();
uint64_t ZImmVal = Imm.getZExtValue();
if (!ST->isThumb()) {
if ((SImmVal >= 0 && SImmVal < 65536) ||
(ARM_AM::getSOImmVal(ZImmVal) != -1) ||
(ARM_AM::getSOImmVal(~ZImmVal) != -1))
return 1;
return ST->hasV6T2Ops() ? 2 : 3;
}
if (ST->isThumb2()) {
if ((SImmVal >= 0 && SImmVal < 65536) ||
(ARM_AM::getT2SOImmVal(ZImmVal) != -1) ||
(ARM_AM::getT2SOImmVal(~ZImmVal) != -1))
return 1;
return ST->hasV6T2Ops() ? 2 : 3;
}
// Thumb1, any i8 imm cost 1.
if (Bits == 8 || (SImmVal >= 0 && SImmVal < 256))
return 1;
if ((~SImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal))
return 2;
// Load from constantpool.
return 3;
}
// Constants smaller than 256 fit in the immediate field of
// Thumb1 instructions so we return a zero cost and 1 otherwise.
InstructionCost ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
const APInt &Imm, Type *Ty) {
if (Imm.isNonNegative() && Imm.getLimitedValue() < 256)
return 0;
return 1;
}
// Checks whether Inst is part of a min(max()) or max(min()) pattern
// that will match to an SSAT instruction. Returns the instruction being
// saturated, or null if no saturation pattern was found.
static Value *isSSATMinMaxPattern(Instruction *Inst, const APInt &Imm) {
Value *LHS, *RHS;
ConstantInt *C;
SelectPatternFlavor InstSPF = matchSelectPattern(Inst, LHS, RHS).Flavor;
if (InstSPF == SPF_SMAX &&
PatternMatch::match(RHS, PatternMatch::m_ConstantInt(C)) &&
C->getValue() == Imm && Imm.isNegative() && Imm.isNegatedPowerOf2()) {
auto isSSatMin = [&](Value *MinInst) {
if (isa<SelectInst>(MinInst)) {
Value *MinLHS, *MinRHS;
ConstantInt *MinC;
SelectPatternFlavor MinSPF =
matchSelectPattern(MinInst, MinLHS, MinRHS).Flavor;
if (MinSPF == SPF_SMIN &&
PatternMatch::match(MinRHS, PatternMatch::m_ConstantInt(MinC)) &&
MinC->getValue() == ((-Imm) - 1))
return true;
}
return false;
};
if (isSSatMin(Inst->getOperand(1)))
return cast<Instruction>(Inst->getOperand(1))->getOperand(1);
if (Inst->hasNUses(2) &&
(isSSatMin(*Inst->user_begin()) || isSSatMin(*(++Inst->user_begin()))))
return Inst->getOperand(1);
}
return nullptr;
}
// Look for a FP Saturation pattern, where the instruction can be simplified to
// a fptosi.sat. max(min(fptosi)). The constant in this case is always free.
static bool isFPSatMinMaxPattern(Instruction *Inst, const APInt &Imm) {
if (Imm.getBitWidth() != 64 ||
Imm != APInt::getHighBitsSet(64, 33)) // -2147483648
return false;
Value *FP = isSSATMinMaxPattern(Inst, Imm);
if (!FP && isa<ICmpInst>(Inst) && Inst->hasOneUse())
FP = isSSATMinMaxPattern(cast<Instruction>(*Inst->user_begin()), Imm);
if (!FP)
return false;
return isa<FPToSIInst>(FP);
}
InstructionCost ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind,
Instruction *Inst) {
// Division by a constant can be turned into multiplication, but only if we
// know it's constant. So it's not so much that the immediate is cheap (it's
// not), but that the alternative is worse.
// FIXME: this is probably unneeded with GlobalISel.
if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||
Opcode == Instruction::SRem || Opcode == Instruction::URem) &&
Idx == 1)
return 0;
// Leave any gep offsets for the CodeGenPrepare, which will do a better job at
// splitting any large offsets.
if (Opcode == Instruction::GetElementPtr && Idx != 0)
return 0;
if (Opcode == Instruction::And) {
// UXTB/UXTH
if (Imm == 255 || Imm == 65535)
return 0;
// Conversion to BIC is free, and means we can use ~Imm instead.
return std::min(getIntImmCost(Imm, Ty, CostKind),
getIntImmCost(~Imm, Ty, CostKind));
}
if (Opcode == Instruction::Add)
// Conversion to SUB is free, and means we can use -Imm instead.
return std::min(getIntImmCost(Imm, Ty, CostKind),
getIntImmCost(-Imm, Ty, CostKind));
if (Opcode == Instruction::ICmp && Imm.isNegative() &&
Ty->getIntegerBitWidth() == 32) {
int64_t NegImm = -Imm.getSExtValue();
if (ST->isThumb2() && NegImm < 1<<12)
// icmp X, #-C -> cmn X, #C
return 0;
if (ST->isThumb() && NegImm < 1<<8)
// icmp X, #-C -> adds X, #C
return 0;
}
// xor a, -1 can always be folded to MVN
if (Opcode == Instruction::Xor && Imm.isAllOnes())
return 0;
// Ensures negative constant of min(max()) or max(min()) patterns that
// match to SSAT instructions don't get hoisted
if (Inst && ((ST->hasV6Ops() && !ST->isThumb()) || ST->isThumb2()) &&
Ty->getIntegerBitWidth() <= 32) {
if (isSSATMinMaxPattern(Inst, Imm) ||
(isa<ICmpInst>(Inst) && Inst->hasOneUse() &&
isSSATMinMaxPattern(cast<Instruction>(*Inst->user_begin()), Imm)))
return 0;
}
if (Inst && ST->hasVFP2Base() && isFPSatMinMaxPattern(Inst, Imm))
return 0;
// We can convert <= -1 to < 0, which is generally quite cheap.
if (Inst && Opcode == Instruction::ICmp && Idx == 1 && Imm.isAllOnesValue()) {
ICmpInst::Predicate Pred = cast<ICmpInst>(Inst)->getPredicate();
if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLE)
return std::min(getIntImmCost(Imm, Ty, CostKind),
getIntImmCost(Imm + 1, Ty, CostKind));
}
return getIntImmCost(Imm, Ty, CostKind);
}
InstructionCost ARMTTIImpl::getCFInstrCost(unsigned Opcode,
TTI::TargetCostKind CostKind,
const Instruction *I) {
if (CostKind == TTI::TCK_RecipThroughput &&
(ST->hasNEON() || ST->hasMVEIntegerOps())) {
// FIXME: The vectorizer is highly sensistive to the cost of these
// instructions, which suggests that it may be using the costs incorrectly.
// But, for now, just make them free to avoid performance regressions for
// vector targets.
return 0;
}
return BaseT::getCFInstrCost(Opcode, CostKind, I);
}
InstructionCost ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
Type *Src,
TTI::CastContextHint CCH,
TTI::TargetCostKind CostKind,
const Instruction *I) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
// TODO: Allow non-throughput costs that aren't binary.
auto AdjustCost = [&CostKind](InstructionCost Cost) -> InstructionCost {
if (CostKind != TTI::TCK_RecipThroughput)
return Cost == 0 ? 0 : 1;
return Cost;
};
auto IsLegalFPType = [this](EVT VT) {
EVT EltVT = VT.getScalarType();
return (EltVT == MVT::f32 && ST->hasVFP2Base()) ||
(EltVT == MVT::f64 && ST->hasFP64()) ||
(EltVT == MVT::f16 && ST->hasFullFP16());
};
EVT SrcTy = TLI->getValueType(DL, Src);
EVT DstTy = TLI->getValueType(DL, Dst);
if (!SrcTy.isSimple() || !DstTy.isSimple())
return AdjustCost(
BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
// Extending masked load/Truncating masked stores is expensive because we
// currently don't split them. This means that we'll likely end up
// loading/storing each element individually (hence the high cost).
if ((ST->hasMVEIntegerOps() &&
(Opcode == Instruction::Trunc || Opcode == Instruction::ZExt ||
Opcode == Instruction::SExt)) ||
(ST->hasMVEFloatOps() &&
(Opcode == Instruction::FPExt || Opcode == Instruction::FPTrunc) &&
IsLegalFPType(SrcTy) && IsLegalFPType(DstTy)))
if (CCH == TTI::CastContextHint::Masked && DstTy.getSizeInBits() > 128)
return 2 * DstTy.getVectorNumElements() *
ST->getMVEVectorCostFactor(CostKind);
// The extend of other kinds of load is free
if (CCH == TTI::CastContextHint::Normal ||
CCH == TTI::CastContextHint::Masked) {
static const TypeConversionCostTblEntry LoadConversionTbl[] = {
{ISD::SIGN_EXTEND, MVT::i32, MVT::i16, 0},
{ISD::ZERO_EXTEND, MVT::i32, MVT::i16, 0},
{ISD::SIGN_EXTEND, MVT::i32, MVT::i8, 0},
{ISD::ZERO_EXTEND, MVT::i32, MVT::i8, 0},
{ISD::SIGN_EXTEND, MVT::i16, MVT::i8, 0},
{ISD::ZERO_EXTEND, MVT::i16, MVT::i8, 0},
{ISD::SIGN_EXTEND, MVT::i64, MVT::i32, 1},
{ISD::ZERO_EXTEND, MVT::i64, MVT::i32, 1},
{ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 1},
{ISD::ZERO_EXTEND, MVT::i64, MVT::i16, 1},
{ISD::SIGN_EXTEND, MVT::i64, MVT::i8, 1},
{ISD::ZERO_EXTEND, MVT::i64, MVT::i8, 1},
};
if (const auto *Entry = ConvertCostTableLookup(
LoadConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
return AdjustCost(Entry->Cost);
static const TypeConversionCostTblEntry MVELoadConversionTbl[] = {
{ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0},
{ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0},
{ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 0},
{ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 0},
{ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 0},
{ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 0},
// The following extend from a legal type to an illegal type, so need to
// split the load. This introduced an extra load operation, but the
// extend is still "free".
{ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1},
{ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1},
{ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 3},
{ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 3},
{ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1},
{ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1},
};
if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
if (const auto *Entry =
ConvertCostTableLookup(MVELoadConversionTbl, ISD,
DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
return Entry->Cost * ST->getMVEVectorCostFactor(CostKind);
}
static const TypeConversionCostTblEntry MVEFLoadConversionTbl[] = {
// FPExtends are similar but also require the VCVT instructions.
{ISD::FP_EXTEND, MVT::v4f32, MVT::v4f16, 1},
{ISD::FP_EXTEND, MVT::v8f32, MVT::v8f16, 3},
};
if (SrcTy.isVector() && ST->hasMVEFloatOps()) {
if (const auto *Entry =
ConvertCostTableLookup(MVEFLoadConversionTbl, ISD,
DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
return Entry->Cost * ST->getMVEVectorCostFactor(CostKind);
}
// The truncate of a store is free. This is the mirror of extends above.
static const TypeConversionCostTblEntry MVEStoreConversionTbl[] = {
{ISD::TRUNCATE, MVT::v4i32, MVT::v4i16, 0},
{ISD::TRUNCATE, MVT::v4i32, MVT::v4i8, 0},
{ISD::TRUNCATE, MVT::v8i16, MVT::v8i8, 0},
{ISD::TRUNCATE, MVT::v8i32, MVT::v8i16, 1},
{ISD::TRUNCATE, MVT::v8i32, MVT::v8i8, 1},
{ISD::TRUNCATE, MVT::v16i32, MVT::v16i8, 3},
{ISD::TRUNCATE, MVT::v16i16, MVT::v16i8, 1},
};
if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
if (const auto *Entry =
ConvertCostTableLookup(MVEStoreConversionTbl, ISD,
SrcTy.getSimpleVT(), DstTy.getSimpleVT()))
return Entry->Cost * ST->getMVEVectorCostFactor(CostKind);
}
static const TypeConversionCostTblEntry MVEFStoreConversionTbl[] = {
{ISD::FP_ROUND, MVT::v4f32, MVT::v4f16, 1},
{ISD::FP_ROUND, MVT::v8f32, MVT::v8f16, 3},
};
if (SrcTy.isVector() && ST->hasMVEFloatOps()) {
if (const auto *Entry =
ConvertCostTableLookup(MVEFStoreConversionTbl, ISD,
SrcTy.getSimpleVT(), DstTy.getSimpleVT()))
return Entry->Cost * ST->getMVEVectorCostFactor(CostKind);
}
}
// NEON vector operations that can extend their inputs.
if ((ISD == ISD::SIGN_EXTEND || ISD == ISD::ZERO_EXTEND) &&
I && I->hasOneUse() && ST->hasNEON() && SrcTy.isVector()) {
static const TypeConversionCostTblEntry NEONDoubleWidthTbl[] = {
// vaddl
{ ISD::ADD, MVT::v4i32, MVT::v4i16, 0 },
{ ISD::ADD, MVT::v8i16, MVT::v8i8, 0 },
// vsubl
{ ISD::SUB, MVT::v4i32, MVT::v4i16, 0 },
{ ISD::SUB, MVT::v8i16, MVT::v8i8, 0 },
// vmull
{ ISD::MUL, MVT::v4i32, MVT::v4i16, 0 },
{ ISD::MUL, MVT::v8i16, MVT::v8i8, 0 },
// vshll
{ ISD::SHL, MVT::v4i32, MVT::v4i16, 0 },
{ ISD::SHL, MVT::v8i16, MVT::v8i8, 0 },
};
auto *User = cast<Instruction>(*I->user_begin());
int UserISD = TLI->InstructionOpcodeToISD(User->getOpcode());
if (auto *Entry = ConvertCostTableLookup(NEONDoubleWidthTbl, UserISD,
DstTy.getSimpleVT(),
SrcTy.getSimpleVT())) {
return AdjustCost(Entry->Cost);
}
}
// Single to/from double precision conversions.
if (Src->isVectorTy() && ST->hasNEON() &&
((ISD == ISD::FP_ROUND && SrcTy.getScalarType() == MVT::f64 &&
DstTy.getScalarType() == MVT::f32) ||
(ISD == ISD::FP_EXTEND && SrcTy.getScalarType() == MVT::f32 &&
DstTy.getScalarType() == MVT::f64))) {
static const CostTblEntry NEONFltDblTbl[] = {
// Vector fptrunc/fpext conversions.
{ISD::FP_ROUND, MVT::v2f64, 2},
{ISD::FP_EXTEND, MVT::v2f32, 2},
{ISD::FP_EXTEND, MVT::v4f32, 4}};
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second))
return AdjustCost(LT.first * Entry->Cost);
}
// Some arithmetic, load and store operations have specific instructions
// to cast up/down their types automatically at no extra cost.
// TODO: Get these tables to know at least what the related operations are.
static const TypeConversionCostTblEntry NEONVectorConversionTbl[] = {
{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
{ ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
{ ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
// The number of vmovl instructions for the extension.
{ ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
{ ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 2 },
{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2 },
{ ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i8, 3 },
{ ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i8, 3 },
{ ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i16, 2 },
{ ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i16, 2 },
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
{ ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
{ ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
// Operations that we legalize using splitting.
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
// Vector float <-> i32 conversions.
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 },
{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 },
{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 },
{ ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 },
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 },
{ ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 },
{ ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
{ ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 },
{ ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 },
{ ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
{ ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
// Vector double <-> i32 conversions.
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 },
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
{ ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
{ ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 4 },
{ ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 4 },
{ ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 8 },
{ ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 8 }
};
if (SrcTy.isVector() && ST->hasNEON()) {
if (const auto *Entry = ConvertCostTableLookup(NEONVectorConversionTbl, ISD,
DstTy.getSimpleVT(),
SrcTy.getSimpleVT()))
return AdjustCost(Entry->Cost);
}
// Scalar float to integer conversions.
static const TypeConversionCostTblEntry NEONFloatConversionTbl[] = {
{ ISD::FP_TO_SINT, MVT::i1, MVT::f32, 2 },
{ ISD::FP_TO_UINT, MVT::i1, MVT::f32, 2 },
{ ISD::FP_TO_SINT, MVT::i1, MVT::f64, 2 },
{ ISD::FP_TO_UINT, MVT::i1, MVT::f64, 2 },
{ ISD::FP_TO_SINT, MVT::i8, MVT::f32, 2 },
{ ISD::FP_TO_UINT, MVT::i8, MVT::f32, 2 },
{ ISD::FP_TO_SINT, MVT::i8, MVT::f64, 2 },
{ ISD::FP_TO_UINT, MVT::i8, MVT::f64, 2 },
{ ISD::FP_TO_SINT, MVT::i16, MVT::f32, 2 },
{ ISD::FP_TO_UINT, MVT::i16, MVT::f32, 2 },
{ ISD::FP_TO_SINT, MVT::i16, MVT::f64, 2 },
{ ISD::FP_TO_UINT, MVT::i16, MVT::f64, 2 },
{ ISD::FP_TO_SINT, MVT::i32, MVT::f32, 2 },
{ ISD::FP_TO_UINT, MVT::i32, MVT::f32, 2 },
{ ISD::FP_TO_SINT, MVT::i32, MVT::f64, 2 },
{ ISD::FP_TO_UINT, MVT::i32, MVT::f64, 2 },
{ ISD::FP_TO_SINT, MVT::i64, MVT::f32, 10 },
{ ISD::FP_TO_UINT, MVT::i64, MVT::f32, 10 },
{ ISD::FP_TO_SINT, MVT::i64, MVT::f64, 10 },
{ ISD::FP_TO_UINT, MVT::i64, MVT::f64, 10 }
};
if (SrcTy.isFloatingPoint() && ST->hasNEON()) {
if (const auto *Entry = ConvertCostTableLookup(NEONFloatConversionTbl, ISD,
DstTy.getSimpleVT(),
SrcTy.getSimpleVT()))
return AdjustCost(Entry->Cost);
}
// Scalar integer to float conversions.
static const TypeConversionCostTblEntry NEONIntegerConversionTbl[] = {
{ ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 },
{ ISD::UINT_TO_FP, MVT::f32, MVT::i1, 2 },
{ ISD::SINT_TO_FP, MVT::f64, MVT::i1, 2 },
{ ISD::UINT_TO_FP, MVT::f64, MVT::i1, 2 },
{ ISD::SINT_TO_FP, MVT::f32, MVT::i8, 2 },
{ ISD::UINT_TO_FP, MVT::f32, MVT::i8, 2 },
{ ISD::SINT_TO_FP, MVT::f64, MVT::i8, 2 },
{ ISD::UINT_TO_FP, MVT::f64, MVT::i8, 2 },
{ ISD::SINT_TO_FP, MVT::f32, MVT::i16, 2 },
{ ISD::UINT_TO_FP, MVT::f32, MVT::i16, 2 },
{ ISD::SINT_TO_FP, MVT::f64, MVT::i16, 2 },
{ ISD::UINT_TO_FP, MVT::f64, MVT::i16, 2 },
{ ISD::SINT_TO_FP, MVT::f32, MVT::i32, 2 },
{ ISD::UINT_TO_FP, MVT::f32, MVT::i32, 2 },
{ ISD::SINT_TO_FP, MVT::f64, MVT::i32, 2 },
{ ISD::UINT_TO_FP, MVT::f64, MVT::i32, 2 },
{ ISD::SINT_TO_FP, MVT::f32, MVT::i64, 10 },
{ ISD::UINT_TO_FP, MVT::f32, MVT::i64, 10 },
{ ISD::SINT_TO_FP, MVT::f64, MVT::i64, 10 },
{ ISD::UINT_TO_FP, MVT::f64, MVT::i64, 10 }
};
if (SrcTy.isInteger() && ST->hasNEON()) {
if (const auto *Entry = ConvertCostTableLookup(NEONIntegerConversionTbl,
ISD, DstTy.getSimpleVT(),
SrcTy.getSimpleVT()))
return AdjustCost(Entry->Cost);
}
// MVE extend costs, taken from codegen tests. i8->i16 or i16->i32 is one
// instruction, i8->i32 is two. i64 zexts are an VAND with a constant, sext
// are linearised so take more.
static const TypeConversionCostTblEntry MVEVectorConversionTbl[] = {
{ ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
{ ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 2 },
{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2 },
{ ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i8, 10 },
{ ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i8, 2 },
{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
{ ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i16, 10 },
{ ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i16, 2 },
{ ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 8 },
{ ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 2 },
};
if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
if (const auto *Entry = ConvertCostTableLookup(MVEVectorConversionTbl,
ISD, DstTy.getSimpleVT(),
SrcTy.getSimpleVT()))
return Entry->Cost * ST->getMVEVectorCostFactor(CostKind);
}
if (ISD == ISD::FP_ROUND || ISD == ISD::FP_EXTEND) {
// As general rule, fp converts that were not matched above are scalarized
// and cost 1 vcvt for each lane, so long as the instruction is available.
// If not it will become a series of function calls.
const InstructionCost CallCost =
getCallInstrCost(nullptr, Dst, {Src}, CostKind);
int Lanes = 1;
if (SrcTy.isFixedLengthVector())
Lanes = SrcTy.getVectorNumElements();
if (IsLegalFPType(SrcTy) && IsLegalFPType(DstTy))
return Lanes;
else
return Lanes * CallCost;
}
if (ISD == ISD::TRUNCATE && ST->hasMVEIntegerOps() &&
SrcTy.isFixedLengthVector()) {
// Treat a truncate with larger than legal source (128bits for MVE) as
// expensive, 2 instructions per lane.
if ((SrcTy.getScalarType() == MVT::i8 ||
SrcTy.getScalarType() == MVT::i16 ||
SrcTy.getScalarType() == MVT::i32) &&
SrcTy.getSizeInBits() > 128 &&
SrcTy.getSizeInBits() > DstTy.getSizeInBits())
return SrcTy.getVectorNumElements() * 2;
}
// Scalar integer conversion costs.
static const TypeConversionCostTblEntry ARMIntegerConversionTbl[] = {
// i16 -> i64 requires two dependent operations.
{ ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 },
// Truncates on i64 are assumed to be free.
{ ISD::TRUNCATE, MVT::i32, MVT::i64, 0 },
{ ISD::TRUNCATE, MVT::i16, MVT::i64, 0 },
{ ISD::TRUNCATE, MVT::i8, MVT::i64, 0 },
{ ISD::TRUNCATE, MVT::i1, MVT::i64, 0 }
};
if (SrcTy.isInteger()) {
if (const auto *Entry = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD,
DstTy.getSimpleVT(),
SrcTy.getSimpleVT()))
return AdjustCost(Entry->Cost);
}
int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
? ST->getMVEVectorCostFactor(CostKind)
: 1;
return AdjustCost(
BaseCost * BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
}
InstructionCost ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
unsigned Index) {
// Penalize inserting into an D-subregister. We end up with a three times
// lower estimated throughput on swift.
if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement &&
ValTy->isVectorTy() && ValTy->getScalarSizeInBits() <= 32)
return 3;
if (ST->hasNEON() && (Opcode == Instruction::InsertElement ||
Opcode == Instruction::ExtractElement)) {
// Cross-class copies are expensive on many microarchitectures,
// so assume they are expensive by default.
if (cast<VectorType>(ValTy)->getElementType()->isIntegerTy())
return 3;
// Even if it's not a cross class copy, this likely leads to mixing
// of NEON and VFP code and should be therefore penalized.
if (ValTy->isVectorTy() &&
ValTy->getScalarSizeInBits() <= 32)
return std::max<InstructionCost>(
BaseT::getVectorInstrCost(Opcode, ValTy, Index), 2U);
}
if (ST->hasMVEIntegerOps() && (Opcode == Instruction::InsertElement ||
Opcode == Instruction::ExtractElement)) {
// Integer cross-lane moves are more expensive than float, which can
// sometimes just be vmovs. Integer involve being passes to GPR registers,
// causing more of a delay.
std::pair<InstructionCost, MVT> LT =
getTLI()->getTypeLegalizationCost(DL, ValTy->getScalarType());
return LT.first * (ValTy->getScalarType()->isIntegerTy() ? 4 : 1);
}
return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
}
InstructionCost ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
Type *CondTy,
CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
const Instruction *I) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
// Thumb scalar code size cost for select.
if (CostKind == TTI::TCK_CodeSize && ISD == ISD::SELECT &&
ST->isThumb() && !ValTy->isVectorTy()) {
// Assume expensive structs.
if (TLI->getValueType(DL, ValTy, true) == MVT::Other)
return TTI::TCC_Expensive;
// Select costs can vary because they:
// - may require one or more conditional mov (including an IT),
// - can't operate directly on immediates,
// - require live flags, which we can't copy around easily.
InstructionCost Cost = TLI->getTypeLegalizationCost(DL, ValTy).first;
// Possible IT instruction for Thumb2, or more for Thumb1.
++Cost;
// i1 values may need rematerialising by using mov immediates and/or
// flag setting instructions.
if (ValTy->isIntegerTy(1))
++Cost;
return Cost;
}
// If this is a vector min/max/abs, use the cost of that intrinsic directly
// instead. Hopefully when min/max intrinsics are more prevalent this code
// will not be needed.
const Instruction *Sel = I;
if ((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) && Sel &&
Sel->hasOneUse())
Sel = cast<Instruction>(Sel->user_back());
if (Sel && ValTy->isVectorTy() &&
(ValTy->isIntOrIntVectorTy() || ValTy->isFPOrFPVectorTy())) {
const Value *LHS, *RHS;
SelectPatternFlavor SPF = matchSelectPattern(Sel, LHS, RHS).Flavor;
unsigned IID = 0;
switch (SPF) {
case SPF_ABS:
IID = Intrinsic::abs;
break;
case SPF_SMIN:
IID = Intrinsic::smin;
break;
case SPF_SMAX:
IID = Intrinsic::smax;
break;
case SPF_UMIN:
IID = Intrinsic::umin;
break;
case SPF_UMAX:
IID = Intrinsic::umax;
break;
case SPF_FMINNUM:
IID = Intrinsic::minnum;
break;
case SPF_FMAXNUM:
IID = Intrinsic::maxnum;
break;
default:
break;
}
if (IID) {
// The ICmp is free, the select gets the cost of the min/max/etc
if (Sel != I)
return 0;
IntrinsicCostAttributes CostAttrs(IID, ValTy, {ValTy, ValTy});
return getIntrinsicInstrCost(CostAttrs, CostKind);
}
}
// On NEON a vector select gets lowered to vbsl.
if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT && CondTy) {
// Lowering of some vector selects is currently far from perfect.
static const TypeConversionCostTblEntry NEONVectorSelectTbl[] = {
{ ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
{ ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 },
{ ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 }
};
EVT SelCondTy = TLI->getValueType(DL, CondTy);
EVT SelValTy = TLI->getValueType(DL, ValTy);
if (SelCondTy.isSimple() && SelValTy.isSimple()) {
if (const auto *Entry = ConvertCostTableLookup(NEONVectorSelectTbl, ISD,
SelCondTy.getSimpleVT(),
SelValTy.getSimpleVT()))
return Entry->Cost;
}
std::pair<InstructionCost, MVT> LT =
TLI->getTypeLegalizationCost(DL, ValTy);
return LT.first;
}
if (ST->hasMVEIntegerOps() && ValTy->isVectorTy() &&
(Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
cast<FixedVectorType>(ValTy)->getNumElements() > 1) {
FixedVectorType *VecValTy = cast<FixedVectorType>(ValTy);
FixedVectorType *VecCondTy = dyn_cast_or_null<FixedVectorType>(CondTy);
if (!VecCondTy)
VecCondTy = cast<FixedVectorType>(CmpInst::makeCmpResultType(VecValTy));
// If we don't have mve.fp any fp operations will need to be scalarized.
if (Opcode == Instruction::FCmp && !ST->hasMVEFloatOps()) {
// One scalaization insert, one scalarization extract and the cost of the
// fcmps.
return BaseT::getScalarizationOverhead(VecValTy, false, true) +
BaseT::getScalarizationOverhead(VecCondTy, true, false) +
VecValTy->getNumElements() *
getCmpSelInstrCost(Opcode, ValTy->getScalarType(),
VecCondTy->getScalarType(), VecPred, CostKind,
I);
}
std::pair<InstructionCost, MVT> LT =
TLI->getTypeLegalizationCost(DL, ValTy);
int BaseCost = ST->getMVEVectorCostFactor(CostKind);
// There are two types - the input that specifies the type of the compare
// and the output vXi1 type. Because we don't know how the output will be
// split, we may need an expensive shuffle to get two in sync. This has the
// effect of making larger than legal compares (v8i32 for example)
// expensive.
- if (LT.second.getVectorNumElements() > 2) {
+ if (LT.second.isVector() && LT.second.getVectorNumElements() > 2) {
if (LT.first > 1)
return LT.first * BaseCost +
BaseT::getScalarizationOverhead(VecCondTy, true, false);
return BaseCost;
}
}
// Default to cheap (throughput/size of 1 instruction) but adjust throughput
// for "multiple beats" potentially needed by MVE instructions.
int BaseCost = 1;
if (ST->hasMVEIntegerOps() && ValTy->isVectorTy())
BaseCost = ST->getMVEVectorCostFactor(CostKind);
return BaseCost *
BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
}
InstructionCost ARMTTIImpl::getAddressComputationCost(Type *Ty,
ScalarEvolution *SE,
const SCEV *Ptr) {
// Address computations in vectorized code with non-consecutive addresses will
// likely result in more instructions compared to scalar code where the
// computation can more often be merged into the index mode. The resulting
// extra micro-ops can significantly decrease throughput.
unsigned NumVectorInstToHideOverhead = 10;
int MaxMergeDistance = 64;
if (ST->hasNEON()) {
if (Ty->isVectorTy() && SE &&
!BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
return NumVectorInstToHideOverhead;
// In many cases the address computation is not merged into the instruction
// addressing mode.
return 1;
}
return BaseT::getAddressComputationCost(Ty, SE, Ptr);
}
bool ARMTTIImpl::isProfitableLSRChainElement(Instruction *I) {
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
// If a VCTP is part of a chain, it's already profitable and shouldn't be
// optimized, else LSR may block tail-predication.
switch (II->getIntrinsicID()) {
case Intrinsic::arm_mve_vctp8:
case Intrinsic::arm_mve_vctp16:
case Intrinsic::arm_mve_vctp32:
case Intrinsic::arm_mve_vctp64:
return true;
default:
break;
}
}
return false;
}
bool ARMTTIImpl::isLegalMaskedLoad(Type *DataTy, Align Alignment) {
if (!EnableMaskedLoadStores || !ST->hasMVEIntegerOps())
return false;
if (auto *VecTy = dyn_cast<FixedVectorType>(DataTy)) {
// Don't support v2i1 yet.
if (VecTy->getNumElements() == 2)
return false;
// We don't support extending fp types.
unsigned VecWidth = DataTy->getPrimitiveSizeInBits();
if (VecWidth != 128 && VecTy->getElementType()->isFloatingPointTy())
return false;
}
unsigned EltWidth = DataTy->getScalarSizeInBits();
return (EltWidth == 32 && Alignment >= 4) ||
(EltWidth == 16 && Alignment >= 2) || (EltWidth == 8);
}
bool ARMTTIImpl::isLegalMaskedGather(Type *Ty, Align Alignment) {
if (!EnableMaskedGatherScatters || !ST->hasMVEIntegerOps())
return false;
unsigned EltWidth = Ty->getScalarSizeInBits();
return ((EltWidth == 32 && Alignment >= 4) ||
(EltWidth == 16 && Alignment >= 2) || EltWidth == 8);
}
/// Given a memcpy/memset/memmove instruction, return the number of memory
/// operations performed, via querying findOptimalMemOpLowering. Returns -1 if a
/// call is used.
int ARMTTIImpl::getNumMemOps(const IntrinsicInst *I) const {
MemOp MOp;
unsigned DstAddrSpace = ~0u;
unsigned SrcAddrSpace = ~0u;
const Function *F = I->getParent()->getParent();
if (const auto *MC = dyn_cast<MemTransferInst>(I)) {
ConstantInt *C = dyn_cast<ConstantInt>(MC->getLength());
// If 'size' is not a constant, a library call will be generated.
if (!C)
return -1;
const unsigned Size = C->getValue().getZExtValue();
const Align DstAlign = *MC->getDestAlign();
const Align SrcAlign = *MC->getSourceAlign();
MOp = MemOp::Copy(Size, /*DstAlignCanChange*/ false, DstAlign, SrcAlign,
/*IsVolatile*/ false);
DstAddrSpace = MC->getDestAddressSpace();
SrcAddrSpace = MC->getSourceAddressSpace();
}
else if (const auto *MS = dyn_cast<MemSetInst>(I)) {
ConstantInt *C = dyn_cast<ConstantInt>(MS->getLength());
// If 'size' is not a constant, a library call will be generated.
if (!C)
return -1;
const unsigned Size = C->getValue().getZExtValue();
const Align DstAlign = *MS->getDestAlign();
MOp = MemOp::Set(Size, /*DstAlignCanChange*/ false, DstAlign,
/*IsZeroMemset*/ false, /*IsVolatile*/ false);
DstAddrSpace = MS->getDestAddressSpace();
}
else
llvm_unreachable("Expected a memcpy/move or memset!");
unsigned Limit, Factor = 2;
switch(I->getIntrinsicID()) {
case Intrinsic::memcpy:
Limit = TLI->getMaxStoresPerMemcpy(F->hasMinSize());
break;
case Intrinsic::memmove:
Limit = TLI->getMaxStoresPerMemmove(F->hasMinSize());
break;
case Intrinsic::memset:
Limit = TLI->getMaxStoresPerMemset(F->hasMinSize());
Factor = 1;
break;
default:
llvm_unreachable("Expected a memcpy/move or memset!");
}
// MemOps will be poplulated with a list of data types that needs to be
// loaded and stored. That's why we multiply the number of elements by 2 to
// get the cost for this memcpy.
std::vector<EVT> MemOps;
if (getTLI()->findOptimalMemOpLowering(
MemOps, Limit, MOp, DstAddrSpace,
SrcAddrSpace, F->getAttributes()))
return MemOps.size() * Factor;
// If we can't find an optimal memop lowering, return the default cost
return -1;
}
InstructionCost ARMTTIImpl::getMemcpyCost(const Instruction *I) {
int NumOps = getNumMemOps(cast<IntrinsicInst>(I));
// To model the cost of a library call, we assume 1 for the call, and
// 3 for the argument setup.
if (NumOps == -1)
return 4;
return NumOps;
}
InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *Tp, ArrayRef<int> Mask,
int Index, VectorType *SubTp,
ArrayRef<const Value *> Args) {
Kind = improveShuffleKindFromMask(Kind, Mask);
if (ST->hasNEON()) {
if (Kind == TTI::SK_Broadcast) {
static const CostTblEntry NEONDupTbl[] = {
// VDUP handles these cases.
{ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
{ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
{ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
{ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
{ISD::VECTOR_SHUFFLE, MVT::v4i16, 1},
{ISD::VECTOR_SHUFFLE, MVT::v8i8, 1},
{ISD::VECTOR_SHUFFLE, MVT::v4i32, 1},
{ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}};
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
if (const auto *Entry =
CostTableLookup(NEONDupTbl, ISD::VECTOR_SHUFFLE, LT.second))
return LT.first * Entry->Cost;
}
if (Kind == TTI::SK_Reverse) {
static const CostTblEntry NEONShuffleTbl[] = {
// Reverse shuffle cost one instruction if we are shuffling within a
// double word (vrev) or two if we shuffle a quad word (vrev, vext).
{ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
{ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
{ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
{ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
{ISD::VECTOR_SHUFFLE, MVT::v4i16, 1},
{ISD::VECTOR_SHUFFLE, MVT::v8i8, 1},
{ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
{ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
if (const auto *Entry =
CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
return LT.first * Entry->Cost;
}
if (Kind == TTI::SK_Select) {
static const CostTblEntry NEONSelShuffleTbl[] = {
// Select shuffle cost table for ARM. Cost is the number of
// instructions
// required to create the shuffled vector.
{ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
{ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
{ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
{ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
{ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
{ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
{ISD::VECTOR_SHUFFLE, MVT::v4i16, 2},
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 16},
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
if (const auto *Entry = CostTableLookup(NEONSelShuffleTbl,
ISD::VECTOR_SHUFFLE, LT.second))
return LT.first * Entry->Cost;
}
}
if (ST->hasMVEIntegerOps()) {
if (Kind == TTI::SK_Broadcast) {
static const CostTblEntry MVEDupTbl[] = {
// VDUP handles these cases.
{ISD::VECTOR_SHUFFLE, MVT::v4i32, 1},
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 1},
{ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
{ISD::VECTOR_SHUFFLE, MVT::v8f16, 1}};
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
if (const auto *Entry = CostTableLookup(MVEDupTbl, ISD::VECTOR_SHUFFLE,
LT.second))
return LT.first * Entry->Cost *
ST->getMVEVectorCostFactor(TTI::TCK_RecipThroughput);
}
if (!Mask.empty()) {
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
if (LT.second.isVector() &&
Mask.size() <= LT.second.getVectorNumElements() &&
(isVREVMask(Mask, LT.second, 16) || isVREVMask(Mask, LT.second, 32) ||
isVREVMask(Mask, LT.second, 64)))
return ST->getMVEVectorCostFactor(TTI::TCK_RecipThroughput) * LT.first;
}
}
int BaseCost = ST->hasMVEIntegerOps() && Tp->isVectorTy()
? ST->getMVEVectorCostFactor(TTI::TCK_RecipThroughput)
: 1;
return BaseCost * BaseT::getShuffleCost(Kind, Tp, Mask, Index, SubTp);
}
InstructionCost ARMTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info,
TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
const Instruction *CxtI) {
int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
if (ST->isThumb() && CostKind == TTI::TCK_CodeSize && Ty->isIntegerTy(1)) {
// Make operations on i1 relatively expensive as this often involves
// combining predicates. AND and XOR should be easier to handle with IT
// blocks.
switch (ISDOpcode) {
default:
break;
case ISD::AND:
case ISD::XOR:
return 2;
case ISD::OR:
return 3;
}
}
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
if (ST->hasNEON()) {
const unsigned FunctionCallDivCost = 20;
const unsigned ReciprocalDivCost = 10;
static const CostTblEntry CostTbl[] = {
// Division.
// These costs are somewhat random. Choose a cost of 20 to indicate that
// vectorizing devision (added function call) is going to be very expensive.
// Double registers types.
{ ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
{ ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
{ ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
{ ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
{ ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
{ ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
{ ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
{ ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
{ ISD::SDIV, MVT::v4i16, ReciprocalDivCost},
{ ISD::UDIV, MVT::v4i16, ReciprocalDivCost},
{ ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
{ ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
{ ISD::SDIV, MVT::v8i8, ReciprocalDivCost},
{ ISD::UDIV, MVT::v8i8, ReciprocalDivCost},
{ ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost},
{ ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost},
// Quad register types.
{ ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
{ ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
{ ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
{ ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
{ ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
{ ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
{ ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
{ ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
{ ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
{ ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
{ ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
{ ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
{ ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
{ ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
{ ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
{ ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
// Multiplication.
};
if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second))
return LT.first * Entry->Cost;
InstructionCost Cost = BaseT::getArithmeticInstrCost(
Opcode, Ty, CostKind, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo);
// This is somewhat of a hack. The problem that we are facing is that SROA
// creates a sequence of shift, and, or instructions to construct values.
// These sequences are recognized by the ISel and have zero-cost. Not so for
// the vectorized code. Because we have support for v2i64 but not i64 those
// sequences look particularly beneficial to vectorize.
// To work around this we increase the cost of v2i64 operations to make them
// seem less beneficial.
if (LT.second == MVT::v2i64 &&
Op2Info == TargetTransformInfo::OK_UniformConstantValue)
Cost += 4;
return Cost;
}
// If this operation is a shift on arm/thumb2, it might well be folded into
// the following instruction, hence having a cost of 0.
auto LooksLikeAFreeShift = [&]() {
if (ST->isThumb1Only() || Ty->isVectorTy())
return false;
if (!CxtI || !CxtI->hasOneUse() || !CxtI->isShift())
return false;
if (Op2Info != TargetTransformInfo::OK_UniformConstantValue)
return false;
// Folded into a ADC/ADD/AND/BIC/CMP/EOR/MVN/ORR/ORN/RSB/SBC/SUB
switch (cast<Instruction>(CxtI->user_back())->getOpcode()) {
case Instruction::Add:
case Instruction::Sub:
case Instruction::And:
case Instruction::Xor:
case Instruction::Or:
case Instruction::ICmp:
return true;
default:
return false;
}
};
if (LooksLikeAFreeShift())
return 0;
// Default to cheap (throughput/size of 1 instruction) but adjust throughput
// for "multiple beats" potentially needed by MVE instructions.
int BaseCost = 1;
if (ST->hasMVEIntegerOps() && Ty->isVectorTy())
BaseCost = ST->getMVEVectorCostFactor(CostKind);
// The rest of this mostly follows what is done in BaseT::getArithmeticInstrCost,
// without treating floats as more expensive that scalars or increasing the
// costs for custom operations. The results is also multiplied by the
// MVEVectorCostFactor where appropriate.
if (TLI->isOperationLegalOrCustomOrPromote(ISDOpcode, LT.second))
return LT.first * BaseCost;
// Else this is expand, assume that we need to scalarize this op.
if (auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
unsigned Num = VTy->getNumElements();
InstructionCost Cost =
getArithmeticInstrCost(Opcode, Ty->getScalarType(), CostKind);
// Return the cost of multiple scalar invocation plus the cost of
// inserting and extracting the values.
SmallVector<Type *> Tys(Args.size(), Ty);
return BaseT::getScalarizationOverhead(VTy, Args, Tys) + Num * Cost;
}
return BaseCost;
}
InstructionCost ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I) {
// TODO: Handle other cost kinds.
if (CostKind != TTI::TCK_RecipThroughput)
return 1;
// Type legalization can't handle structs
if (TLI->getValueType(DL, Src, true) == MVT::Other)
return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
CostKind);
if (ST->hasNEON() && Src->isVectorTy() &&
(Alignment && *Alignment != Align(16)) &&
cast<VectorType>(Src)->getElementType()->isDoubleTy()) {
// Unaligned loads/stores are extremely inefficient.
// We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
return LT.first * 4;
}
// MVE can optimize a fpext(load(4xhalf)) using an extending integer load.
// Same for stores.
if (ST->hasMVEFloatOps() && isa<FixedVectorType>(Src) && I &&
((Opcode == Instruction::Load && I->hasOneUse() &&
isa<FPExtInst>(*I->user_begin())) ||
(Opcode == Instruction::Store && isa<FPTruncInst>(I->getOperand(0))))) {
FixedVectorType *SrcVTy = cast<FixedVectorType>(Src);
Type *DstTy =
Opcode == Instruction::Load
? (*I->user_begin())->getType()
: cast<Instruction>(I->getOperand(0))->getOperand(0)->getType();
if (SrcVTy->getNumElements() == 4 && SrcVTy->getScalarType()->isHalfTy() &&
DstTy->getScalarType()->isFloatTy())
return ST->getMVEVectorCostFactor(CostKind);
}
int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
? ST->getMVEVectorCostFactor(CostKind)
: 1;
return BaseCost * BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
CostKind, I);
}
InstructionCost
ARMTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind) {
if (ST->hasMVEIntegerOps()) {
if (Opcode == Instruction::Load && isLegalMaskedLoad(Src, Alignment))
return ST->getMVEVectorCostFactor(CostKind);
if (Opcode == Instruction::Store && isLegalMaskedStore(Src, Alignment))
return ST->getMVEVectorCostFactor(CostKind);
}
if (!isa<FixedVectorType>(Src))
return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
CostKind);
// Scalar cost, which is currently very high due to the efficiency of the
// generated code.
return cast<FixedVectorType>(Src)->getNumElements() * 8;
}
InstructionCost ARMTTIImpl::getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond, bool UseMaskForGaps) {
assert(Factor >= 2 && "Invalid interleave factor");
assert(isa<VectorType>(VecTy) && "Expect a vector type");
// vldN/vstN doesn't support vector types of i64/f64 element.
bool EltIs64Bits = DL.getTypeSizeInBits(VecTy->getScalarType()) == 64;
if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits &&
!UseMaskForCond && !UseMaskForGaps) {
unsigned NumElts = cast<FixedVectorType>(VecTy)->getNumElements();
auto *SubVecTy =
FixedVectorType::get(VecTy->getScalarType(), NumElts / Factor);
// vldN/vstN only support legal vector types of size 64 or 128 in bits.
// Accesses having vector types that are a multiple of 128 bits can be
// matched to more than one vldN/vstN instruction.
int BaseCost =
ST->hasMVEIntegerOps() ? ST->getMVEVectorCostFactor(CostKind) : 1;
if (NumElts % Factor == 0 &&
TLI->isLegalInterleavedAccessType(Factor, SubVecTy, Alignment, DL))
return Factor * BaseCost * TLI->getNumInterleavedAccesses(SubVecTy, DL);
// Some smaller than legal interleaved patterns are cheap as we can make
// use of the vmovn or vrev patterns to interleave a standard load. This is
// true for v4i8, v8i8 and v4i16 at least (but not for v4f16 as it is
// promoted differently). The cost of 2 here is then a load and vrev or
// vmovn.
if (ST->hasMVEIntegerOps() && Factor == 2 && NumElts / Factor > 2 &&
VecTy->isIntOrIntVectorTy() &&
DL.getTypeSizeInBits(SubVecTy).getFixedSize() <= 64)
return 2 * BaseCost;
}
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
Alignment, AddressSpace, CostKind,
UseMaskForCond, UseMaskForGaps);
}
InstructionCost ARMTTIImpl::getGatherScatterOpCost(
unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {
using namespace PatternMatch;
if (!ST->hasMVEIntegerOps() || !EnableMaskedGatherScatters)
return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
Alignment, CostKind, I);
assert(DataTy->isVectorTy() && "Can't do gather/scatters on scalar!");
auto *VTy = cast<FixedVectorType>(DataTy);
// TODO: Splitting, once we do that.
unsigned NumElems = VTy->getNumElements();
unsigned EltSize = VTy->getScalarSizeInBits();
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, DataTy);
// For now, it is assumed that for the MVE gather instructions the loads are
// all effectively serialised. This means the cost is the scalar cost
// multiplied by the number of elements being loaded. This is possibly very
// conservative, but even so we still end up vectorising loops because the
// cost per iteration for many loops is lower than for scalar loops.
InstructionCost VectorCost =
NumElems * LT.first * ST->getMVEVectorCostFactor(CostKind);
// The scalarization cost should be a lot higher. We use the number of vector
// elements plus the scalarization overhead.
InstructionCost ScalarCost =
NumElems * LT.first + BaseT::getScalarizationOverhead(VTy, true, false) +
BaseT::getScalarizationOverhead(VTy, false, true);
if (EltSize < 8 || Alignment < EltSize / 8)
return ScalarCost;
unsigned ExtSize = EltSize;
// Check whether there's a single user that asks for an extended type
if (I != nullptr) {
// Dependent of the caller of this function, a gather instruction will
// either have opcode Instruction::Load or be a call to the masked_gather
// intrinsic
if ((I->getOpcode() == Instruction::Load ||
match(I, m_Intrinsic<Intrinsic::masked_gather>())) &&
I->hasOneUse()) {
const User *Us = *I->users().begin();
if (isa<ZExtInst>(Us) || isa<SExtInst>(Us)) {
// only allow valid type combinations
unsigned TypeSize =
cast<Instruction>(Us)->getType()->getScalarSizeInBits();
if (((TypeSize == 32 && (EltSize == 8 || EltSize == 16)) ||
(TypeSize == 16 && EltSize == 8)) &&
TypeSize * NumElems == 128) {
ExtSize = TypeSize;
}
}
}
// Check whether the input data needs to be truncated
TruncInst *T;
if ((I->getOpcode() == Instruction::Store ||
match(I, m_Intrinsic<Intrinsic::masked_scatter>())) &&
(T = dyn_cast<TruncInst>(I->getOperand(0)))) {
// Only allow valid type combinations
unsigned TypeSize = T->getOperand(0)->getType()->getScalarSizeInBits();
if (((EltSize == 16 && TypeSize == 32) ||
(EltSize == 8 && (TypeSize == 32 || TypeSize == 16))) &&
TypeSize * NumElems == 128)
ExtSize = TypeSize;
}
}
if (ExtSize * NumElems != 128 || NumElems < 4)
return ScalarCost;
// Any (aligned) i32 gather will not need to be scalarised.
if (ExtSize == 32)
return VectorCost;
// For smaller types, we need to ensure that the gep's inputs are correctly
// extended from a small enough value. Other sizes (including i64) are
// scalarized for now.
if (ExtSize != 8 && ExtSize != 16)
return ScalarCost;
if (const auto *BC = dyn_cast<BitCastInst>(Ptr))
Ptr = BC->getOperand(0);
if (const auto *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
if (GEP->getNumOperands() != 2)
return ScalarCost;
unsigned Scale = DL.getTypeAllocSize(GEP->getResultElementType());
// Scale needs to be correct (which is only relevant for i16s).
if (Scale != 1 && Scale * 8 != ExtSize)
return ScalarCost;
// And we need to zext (not sext) the indexes from a small enough type.
if (const auto *ZExt = dyn_cast<ZExtInst>(GEP->getOperand(1))) {
if (ZExt->getOperand(0)->getType()->getScalarSizeInBits() <= ExtSize)
return VectorCost;
}
return ScalarCost;
}
return ScalarCost;
}
InstructionCost
ARMTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
Optional<FastMathFlags> FMF,
TTI::TargetCostKind CostKind) {
if (TTI::requiresOrderedReduction(FMF))
return BaseT::getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind);
EVT ValVT = TLI->getValueType(DL, ValTy);
int ISD = TLI->InstructionOpcodeToISD(Opcode);
if (!ST->hasMVEIntegerOps() || !ValVT.isSimple() || ISD != ISD::ADD)
return BaseT::getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind);
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
static const CostTblEntry CostTblAdd[]{
{ISD::ADD, MVT::v16i8, 1},
{ISD::ADD, MVT::v8i16, 1},
{ISD::ADD, MVT::v4i32, 1},
};
if (const auto *Entry = CostTableLookup(CostTblAdd, ISD, LT.second))
return Entry->Cost * ST->getMVEVectorCostFactor(CostKind) * LT.first;
return BaseT::getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind);
}
InstructionCost
ARMTTIImpl::getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned,
Type *ResTy, VectorType *ValTy,
TTI::TargetCostKind CostKind) {
EVT ValVT = TLI->getValueType(DL, ValTy);
EVT ResVT = TLI->getValueType(DL, ResTy);
if (ST->hasMVEIntegerOps() && ValVT.isSimple() && ResVT.isSimple()) {
std::pair<InstructionCost, MVT> LT =
TLI->getTypeLegalizationCost(DL, ValTy);
// The legal cases are:
// VADDV u/s 8/16/32
// VMLAV u/s 8/16/32
// VADDLV u/s 32
// VMLALV u/s 16/32
// Codegen currently cannot always handle larger than legal vectors very
// well, especially for predicated reductions where the mask needs to be
// split, so restrict to 128bit or smaller input types.
unsigned RevVTSize = ResVT.getSizeInBits();
if (ValVT.getSizeInBits() <= 128 &&
((LT.second == MVT::v16i8 && RevVTSize <= 32) ||
(LT.second == MVT::v8i16 && RevVTSize <= (IsMLA ? 64u : 32u)) ||
(LT.second == MVT::v4i32 && RevVTSize <= 64)))
return ST->getMVEVectorCostFactor(CostKind) * LT.first;
}
return BaseT::getExtendedAddReductionCost(IsMLA, IsUnsigned, ResTy, ValTy,
CostKind);
}
InstructionCost
ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) {
switch (ICA.getID()) {
case Intrinsic::get_active_lane_mask:
// Currently we make a somewhat optimistic assumption that
// active_lane_mask's are always free. In reality it may be freely folded
// into a tail predicated loop, expanded into a VCPT or expanded into a lot
// of add/icmp code. We may need to improve this in the future, but being
// able to detect if it is free or not involves looking at a lot of other
// code. We currently assume that the vectorizer inserted these, and knew
// what it was doing in adding one.
if (ST->hasMVEIntegerOps())
return 0;
break;
case Intrinsic::sadd_sat:
case Intrinsic::ssub_sat:
case Intrinsic::uadd_sat:
case Intrinsic::usub_sat: {
if (!ST->hasMVEIntegerOps())
break;
Type *VT = ICA.getReturnType();
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VT);
if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
LT.second == MVT::v16i8) {
// This is a base cost of 1 for the vqadd, plus 3 extract shifts if we
// need to extend the type, as it uses shr(qadd(shl, shl)).
unsigned Instrs =
LT.second.getScalarSizeInBits() == VT->getScalarSizeInBits() ? 1 : 4;
return LT.first * ST->getMVEVectorCostFactor(CostKind) * Instrs;
}
break;
}
case Intrinsic::abs:
case Intrinsic::smin:
case Intrinsic::smax:
case Intrinsic::umin:
case Intrinsic::umax: {
if (!ST->hasMVEIntegerOps())
break;
Type *VT = ICA.getReturnType();
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VT);
if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
LT.second == MVT::v16i8)
return LT.first * ST->getMVEVectorCostFactor(CostKind);
break;
}
case Intrinsic::minnum:
case Intrinsic::maxnum: {
if (!ST->hasMVEFloatOps())
break;
Type *VT = ICA.getReturnType();
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VT);
if (LT.second == MVT::v4f32 || LT.second == MVT::v8f16)
return LT.first * ST->getMVEVectorCostFactor(CostKind);
break;
}
case Intrinsic::fptosi_sat:
case Intrinsic::fptoui_sat: {
if (ICA.getArgTypes().empty())
break;
bool IsSigned = ICA.getID() == Intrinsic::fptosi_sat;
auto LT = TLI->getTypeLegalizationCost(DL, ICA.getArgTypes()[0]);
EVT MTy = TLI->getValueType(DL, ICA.getReturnType());
// Check for the legal types, with the corect subtarget features.
if ((ST->hasVFP2Base() && LT.second == MVT::f32 && MTy == MVT::i32) ||
(ST->hasFP64() && LT.second == MVT::f64 && MTy == MVT::i32) ||
(ST->hasFullFP16() && LT.second == MVT::f16 && MTy == MVT::i32))
return LT.first;
// Equally for MVE vector types
if (ST->hasMVEFloatOps() &&
(LT.second == MVT::v4f32 || LT.second == MVT::v8f16) &&
LT.second.getScalarSizeInBits() == MTy.getScalarSizeInBits())
return LT.first * ST->getMVEVectorCostFactor(CostKind);
// Otherwise we use a legal convert followed by a min+max
if (((ST->hasVFP2Base() && LT.second == MVT::f32) ||
(ST->hasFP64() && LT.second == MVT::f64) ||
(ST->hasFullFP16() && LT.second == MVT::f16) ||
(ST->hasMVEFloatOps() &&
(LT.second == MVT::v4f32 || LT.second == MVT::v8f16))) &&
LT.second.getScalarSizeInBits() >= MTy.getScalarSizeInBits()) {
Type *LegalTy = Type::getIntNTy(ICA.getReturnType()->getContext(),
LT.second.getScalarSizeInBits());
InstructionCost Cost =
LT.second.isVector() ? ST->getMVEVectorCostFactor(CostKind) : 1;
IntrinsicCostAttributes Attrs1(IsSigned ? Intrinsic::smin
: Intrinsic::umin,
LegalTy, {LegalTy, LegalTy});
Cost += getIntrinsicInstrCost(Attrs1, CostKind);
IntrinsicCostAttributes Attrs2(IsSigned ? Intrinsic::smax
: Intrinsic::umax,
LegalTy, {LegalTy, LegalTy});
Cost += getIntrinsicInstrCost(Attrs2, CostKind);
return LT.first * Cost;
}
break;
}
}
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
}
bool ARMTTIImpl::isLoweredToCall(const Function *F) {
if (!F->isIntrinsic())
return BaseT::isLoweredToCall(F);
// Assume all Arm-specific intrinsics map to an instruction.
if (F->getName().startswith("llvm.arm"))
return false;
switch (F->getIntrinsicID()) {
default: break;
case Intrinsic::powi:
case Intrinsic::sin:
case Intrinsic::cos:
case Intrinsic::pow:
case Intrinsic::log:
case Intrinsic::log10:
case Intrinsic::log2:
case Intrinsic::exp:
case Intrinsic::exp2:
return true;
case Intrinsic::sqrt:
case Intrinsic::fabs:
case Intrinsic::copysign:
case Intrinsic::floor:
case Intrinsic::ceil:
case Intrinsic::trunc:
case Intrinsic::rint:
case Intrinsic::nearbyint:
case Intrinsic::round:
case Intrinsic::canonicalize:
case Intrinsic::lround:
case Intrinsic::llround:
case Intrinsic::lrint:
case Intrinsic::llrint:
if (F->getReturnType()->isDoubleTy() && !ST->hasFP64())
return true;
if (F->getReturnType()->isHalfTy() && !ST->hasFullFP16())
return true;
// Some operations can be handled by vector instructions and assume
// unsupported vectors will be expanded into supported scalar ones.
// TODO Handle scalar operations properly.
return !ST->hasFPARMv8Base() && !ST->hasVFP2Base();
case Intrinsic::masked_store:
case Intrinsic::masked_load:
case Intrinsic::masked_gather:
case Intrinsic::masked_scatter:
return !ST->hasMVEIntegerOps();
case Intrinsic::sadd_with_overflow:
case Intrinsic::uadd_with_overflow:
case Intrinsic::ssub_with_overflow:
case Intrinsic::usub_with_overflow:
case Intrinsic::sadd_sat:
case Intrinsic::uadd_sat:
case Intrinsic::ssub_sat:
case Intrinsic::usub_sat:
return false;
}
return BaseT::isLoweredToCall(F);
}
bool ARMTTIImpl::maybeLoweredToCall(Instruction &I) {
unsigned ISD = TLI->InstructionOpcodeToISD(I.getOpcode());
EVT VT = TLI->getValueType(DL, I.getType(), true);
if (TLI->getOperationAction(ISD, VT) == TargetLowering::LibCall)
return true;
// Check if an intrinsic will be lowered to a call and assume that any
// other CallInst will generate a bl.
if (auto *Call = dyn_cast<CallInst>(&I)) {
if (auto *II = dyn_cast<IntrinsicInst>(Call)) {
switch(II->getIntrinsicID()) {
case Intrinsic::memcpy:
case Intrinsic::memset:
case Intrinsic::memmove:
return getNumMemOps(II) == -1;
default:
if (const Function *F = Call->getCalledFunction())
return isLoweredToCall(F);
}
}
return true;
}
// FPv5 provides conversions between integer, double-precision,
// single-precision, and half-precision formats.
switch (I.getOpcode()) {
default:
break;
case Instruction::FPToSI:
case Instruction::FPToUI:
case Instruction::SIToFP:
case Instruction::UIToFP:
case Instruction::FPTrunc:
case Instruction::FPExt:
return !ST->hasFPARMv8Base();
}
// FIXME: Unfortunately the approach of checking the Operation Action does
// not catch all cases of Legalization that use library calls. Our
// Legalization step categorizes some transformations into library calls as
// Custom, Expand or even Legal when doing type legalization. So for now
// we have to special case for instance the SDIV of 64bit integers and the
// use of floating point emulation.
if (VT.isInteger() && VT.getSizeInBits() >= 64) {
switch (ISD) {
default:
break;
case ISD::SDIV:
case ISD::UDIV:
case ISD::SREM:
case ISD::UREM:
case ISD::SDIVREM:
case ISD::UDIVREM:
return true;
}
}
// Assume all other non-float operations are supported.
if (!VT.isFloatingPoint())
return false;
// We'll need a library call to handle most floats when using soft.
if (TLI->useSoftFloat()) {
switch (I.getOpcode()) {
default:
return true;
case Instruction::Alloca:
case Instruction::Load:
case Instruction::Store:
case Instruction::Select:
case Instruction::PHI:
return false;
}
}
// We'll need a libcall to perform double precision operations on a single
// precision only FPU.
if (I.getType()->isDoubleTy() && !ST->hasFP64())
return true;
// Likewise for half precision arithmetic.
if (I.getType()->isHalfTy() && !ST->hasFullFP16())
return true;
return false;
}
bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
AssumptionCache &AC,
TargetLibraryInfo *LibInfo,
HardwareLoopInfo &HWLoopInfo) {
// Low-overhead branches are only supported in the 'low-overhead branch'
// extension of v8.1-m.
if (!ST->hasLOB() || DisableLowOverheadLoops) {
LLVM_DEBUG(dbgs() << "ARMHWLoops: Disabled\n");
return false;
}
if (!SE.hasLoopInvariantBackedgeTakenCount(L)) {
LLVM_DEBUG(dbgs() << "ARMHWLoops: No BETC\n");
return false;
}
const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
LLVM_DEBUG(dbgs() << "ARMHWLoops: Uncomputable BETC\n");
return false;
}
const SCEV *TripCountSCEV =
SE.getAddExpr(BackedgeTakenCount,
SE.getOne(BackedgeTakenCount->getType()));
// We need to store the trip count in LR, a 32-bit register.
if (SE.getUnsignedRangeMax(TripCountSCEV).getBitWidth() > 32) {
LLVM_DEBUG(dbgs() << "ARMHWLoops: Trip count does not fit into 32bits\n");
return false;
}
// Making a call will trash LR and clear LO_BRANCH_INFO, so there's little
// point in generating a hardware loop if that's going to happen.
auto IsHardwareLoopIntrinsic = [](Instruction &I) {
if (auto *Call = dyn_cast<IntrinsicInst>(&I)) {
switch (Call->getIntrinsicID()) {
default:
break;
case Intrinsic::start_loop_iterations:
case Intrinsic::test_start_loop_iterations:
case Intrinsic::loop_decrement:
case Intrinsic::loop_decrement_reg:
return true;
}
}
return false;
};
// Scan the instructions to see if there's any that we know will turn into a
// call or if this loop is already a low-overhead loop or will become a tail
// predicated loop.
bool IsTailPredLoop = false;
auto ScanLoop = [&](Loop *L) {
for (auto *BB : L->getBlocks()) {
for (auto &I : *BB) {
if (maybeLoweredToCall(I) || IsHardwareLoopIntrinsic(I) ||
isa<InlineAsm>(I)) {
LLVM_DEBUG(dbgs() << "ARMHWLoops: Bad instruction: " << I << "\n");
return false;
}
if (auto *II = dyn_cast<IntrinsicInst>(&I))
IsTailPredLoop |=
II->getIntrinsicID() == Intrinsic::get_active_lane_mask ||
II->getIntrinsicID() == Intrinsic::arm_mve_vctp8 ||
II->getIntrinsicID() == Intrinsic::arm_mve_vctp16 ||
II->getIntrinsicID() == Intrinsic::arm_mve_vctp32 ||
II->getIntrinsicID() == Intrinsic::arm_mve_vctp64;
}
}
return true;
};
// Visit inner loops.
for (auto Inner : *L)
if (!ScanLoop(Inner))
return false;
if (!ScanLoop(L))
return false;
// TODO: Check whether the trip count calculation is expensive. If L is the
// inner loop but we know it has a low trip count, calculating that trip
// count (in the parent loop) may be detrimental.
LLVMContext &C = L->getHeader()->getContext();
HWLoopInfo.CounterInReg = true;
HWLoopInfo.IsNestingLegal = false;
HWLoopInfo.PerformEntryTest = AllowWLSLoops && !IsTailPredLoop;
HWLoopInfo.CountType = Type::getInt32Ty(C);
HWLoopInfo.LoopDecrement = ConstantInt::get(HWLoopInfo.CountType, 1);
return true;
}
static bool canTailPredicateInstruction(Instruction &I, int &ICmpCount) {
// We don't allow icmp's, and because we only look at single block loops,
// we simply count the icmps, i.e. there should only be 1 for the backedge.
if (isa<ICmpInst>(&I) && ++ICmpCount > 1)
return false;
// FIXME: This is a workaround for poor cost modelling. Min/Max intrinsics are
// not currently canonical, but soon will be. Code without them uses icmp, and
// so is not tail predicated as per the condition above. In order to get the
// same performance we treat min and max the same as an icmp for tailpred
// purposes for the moment (we often rely on non-tailpred and higher VF's to
// pick more optimial instructions like VQDMULH. They need to be recognized
// directly by the vectorizer).
if (auto *II = dyn_cast<IntrinsicInst>(&I))
if ((II->getIntrinsicID() == Intrinsic::smin ||
II->getIntrinsicID() == Intrinsic::smax ||
II->getIntrinsicID() == Intrinsic::umin ||
II->getIntrinsicID() == Intrinsic::umax) &&
++ICmpCount > 1)
return false;
if (isa<FCmpInst>(&I))
return false;
// We could allow extending/narrowing FP loads/stores, but codegen is
// too inefficient so reject this for now.
if (isa<FPExtInst>(&I) || isa<FPTruncInst>(&I))
return false;
// Extends have to be extending-loads
if (isa<SExtInst>(&I) || isa<ZExtInst>(&I) )
if (!I.getOperand(0)->hasOneUse() || !isa<LoadInst>(I.getOperand(0)))
return false;
// Truncs have to be narrowing-stores
if (isa<TruncInst>(&I) )
if (!I.hasOneUse() || !isa<StoreInst>(*I.user_begin()))
return false;
return true;
}
// To set up a tail-predicated loop, we need to know the total number of
// elements processed by that loop. Thus, we need to determine the element
// size and:
// 1) it should be uniform for all operations in the vector loop, so we
// e.g. don't want any widening/narrowing operations.
// 2) it should be smaller than i64s because we don't have vector operations
// that work on i64s.
// 3) we don't want elements to be reversed or shuffled, to make sure the
// tail-predication masks/predicates the right lanes.
//
static bool canTailPredicateLoop(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
const DataLayout &DL,
const LoopAccessInfo *LAI) {
LLVM_DEBUG(dbgs() << "Tail-predication: checking allowed instructions\n");
// If there are live-out values, it is probably a reduction. We can predicate
// most reduction operations freely under MVE using a combination of
// prefer-predicated-reduction-select and inloop reductions. We limit this to
// floating point and integer reductions, but don't check for operators
// specifically here. If the value ends up not being a reduction (and so the
// vectorizer cannot tailfold the loop), we should fall back to standard
// vectorization automatically.
SmallVector< Instruction *, 8 > LiveOuts;
LiveOuts = llvm::findDefsUsedOutsideOfLoop(L);
bool ReductionsDisabled =
EnableTailPredication == TailPredication::EnabledNoReductions ||
EnableTailPredication == TailPredication::ForceEnabledNoReductions;
for (auto *I : LiveOuts) {
if (!I->getType()->isIntegerTy() && !I->getType()->isFloatTy() &&
!I->getType()->isHalfTy()) {
LLVM_DEBUG(dbgs() << "Don't tail-predicate loop with non-integer/float "
"live-out value\n");
return false;
}
if (ReductionsDisabled) {
LLVM_DEBUG(dbgs() << "Reductions not enabled\n");
return false;
}
}
// Next, check that all instructions can be tail-predicated.
PredicatedScalarEvolution PSE = LAI->getPSE();
SmallVector<Instruction *, 16> LoadStores;
int ICmpCount = 0;
for (BasicBlock *BB : L->blocks()) {
for (Instruction &I : BB->instructionsWithoutDebug()) {
if (isa<PHINode>(&I))
continue;
if (!canTailPredicateInstruction(I, ICmpCount)) {
LLVM_DEBUG(dbgs() << "Instruction not allowed: "; I.dump());
return false;
}
Type *T = I.getType();
if (T->getScalarSizeInBits() > 32) {
LLVM_DEBUG(dbgs() << "Unsupported Type: "; T->dump());
return false;
}
if (isa<StoreInst>(I) || isa<LoadInst>(I)) {
Value *Ptr = getLoadStorePointerOperand(&I);
Type *AccessTy = getLoadStoreType(&I);
int64_t NextStride = getPtrStride(PSE, AccessTy, Ptr, L);
if (NextStride == 1) {
// TODO: for now only allow consecutive strides of 1. We could support
// other strides as long as it is uniform, but let's keep it simple
// for now.
continue;
} else if (NextStride == -1 ||
(NextStride == 2 && MVEMaxSupportedInterleaveFactor >= 2) ||
(NextStride == 4 && MVEMaxSupportedInterleaveFactor >= 4)) {
LLVM_DEBUG(dbgs()
<< "Consecutive strides of 2 found, vld2/vstr2 can't "
"be tail-predicated\n.");
return false;
// TODO: don't tail predicate if there is a reversed load?
} else if (EnableMaskedGatherScatters) {
// Gather/scatters do allow loading from arbitrary strides, at
// least if they are loop invariant.
// TODO: Loop variant strides should in theory work, too, but
// this requires further testing.
const SCEV *PtrScev = PSE.getSE()->getSCEV(Ptr);
if (auto AR = dyn_cast<SCEVAddRecExpr>(PtrScev)) {
const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
if (PSE.getSE()->isLoopInvariant(Step, L))
continue;
}
}
LLVM_DEBUG(dbgs() << "Bad stride found, can't "
"tail-predicate\n.");
return false;
}
}
}
LLVM_DEBUG(dbgs() << "tail-predication: all instructions allowed!\n");
return true;
}
bool ARMTTIImpl::preferPredicateOverEpilogue(
Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC,
TargetLibraryInfo *TLI, DominatorTree *DT, LoopVectorizationLegality *LVL) {
if (!EnableTailPredication) {
LLVM_DEBUG(dbgs() << "Tail-predication not enabled.\n");
return false;
}
// Creating a predicated vector loop is the first step for generating a
// tail-predicated hardware loop, for which we need the MVE masked
// load/stores instructions:
if (!ST->hasMVEIntegerOps())
return false;
// For now, restrict this to single block loops.
if (L->getNumBlocks() > 1) {
LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: not a single block "
"loop.\n");
return false;
}
assert(L->isInnermost() && "preferPredicateOverEpilogue: inner-loop expected");
HardwareLoopInfo HWLoopInfo(L);
if (!HWLoopInfo.canAnalyze(*LI)) {
LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "
"analyzable.\n");
return false;
}
// This checks if we have the low-overhead branch architecture
// extension, and if we will create a hardware-loop:
if (!isHardwareLoopProfitable(L, SE, AC, TLI, HWLoopInfo)) {
LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "
"profitable.\n");
return false;
}
if (!HWLoopInfo.isHardwareLoopCandidate(SE, *LI, *DT)) {
LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "
"a candidate.\n");
return false;
}
return canTailPredicateLoop(L, LI, SE, DL, LVL->getLAI());
}
PredicationStyle ARMTTIImpl::emitGetActiveLaneMask() const {
if (!ST->hasMVEIntegerOps() || !EnableTailPredication)
return PredicationStyle::None;
// Intrinsic @llvm.get.active.lane.mask is supported.
// It is used in the MVETailPredication pass, which requires the number of
// elements processed by this vector loop to setup the tail-predicated
// loop.
return PredicationStyle::Data;
}
void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP,
OptimizationRemarkEmitter *ORE) {
// Enable Upper bound unrolling universally, not dependant upon the conditions
// below.
UP.UpperBound = true;
// Only currently enable these preferences for M-Class cores.
if (!ST->isMClass())
return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP, ORE);
// Disable loop unrolling for Oz and Os.
UP.OptSizeThreshold = 0;
UP.PartialOptSizeThreshold = 0;
if (L->getHeader()->getParent()->hasOptSize())
return;
SmallVector<BasicBlock*, 4> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
LLVM_DEBUG(dbgs() << "Loop has:\n"
<< "Blocks: " << L->getNumBlocks() << "\n"
<< "Exit blocks: " << ExitingBlocks.size() << "\n");
// Only allow another exit other than the latch. This acts as an early exit
// as it mirrors the profitability calculation of the runtime unroller.
if (ExitingBlocks.size() > 2)
return;
// Limit the CFG of the loop body for targets with a branch predictor.
// Allowing 4 blocks permits if-then-else diamonds in the body.
if (ST->hasBranchPredictor() && L->getNumBlocks() > 4)
return;
// Don't unroll vectorized loops, including the remainder loop
if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
return;
// Scan the loop: don't unroll loops with calls as this could prevent
// inlining.
InstructionCost Cost = 0;
for (auto *BB : L->getBlocks()) {
for (auto &I : *BB) {
// Don't unroll vectorised loop. MVE does not benefit from it as much as
// scalar code.
if (I.getType()->isVectorTy())
return;
if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
if (!isLoweredToCall(F))
continue;
}
return;
}
SmallVector<const Value*, 4> Operands(I.operand_values());
Cost +=
getUserCost(&I, Operands, TargetTransformInfo::TCK_SizeAndLatency);
}
}
// On v6m cores, there are very few registers available. We can easily end up
// spilling and reloading more registers in an unrolled loop. Look at the
// number of LCSSA phis as a rough measure of how many registers will need to
// be live out of the loop, reducing the default unroll count if more than 1
// value is needed. In the long run, all of this should be being learnt by a
// machine.
unsigned UnrollCount = 4;
if (ST->isThumb1Only()) {
unsigned ExitingValues = 0;
SmallVector<BasicBlock *, 4> ExitBlocks;
L->getExitBlocks(ExitBlocks);
for (auto *Exit : ExitBlocks) {
// Count the number of LCSSA phis. Exclude values coming from GEP's as
// only the last is expected to be needed for address operands.
unsigned LiveOuts = count_if(Exit->phis(), [](auto &PH) {
return PH.getNumOperands() != 1 ||
!isa<GetElementPtrInst>(PH.getOperand(0));
});
ExitingValues = ExitingValues < LiveOuts ? LiveOuts : ExitingValues;
}
if (ExitingValues)
UnrollCount /= ExitingValues;
if (UnrollCount <= 1)
return;
}
LLVM_DEBUG(dbgs() << "Cost of loop: " << Cost << "\n");
LLVM_DEBUG(dbgs() << "Default Runtime Unroll Count: " << UnrollCount << "\n");
UP.Partial = true;
UP.Runtime = true;
UP.UnrollRemainder = true;
UP.DefaultUnrollRuntimeCount = UnrollCount;
UP.UnrollAndJam = true;
UP.UnrollAndJamInnerLoopThreshold = 60;
// Force unrolling small loops can be very useful because of the branch
// taken cost of the backedge.
if (Cost < 12)
UP.Force = true;
}
void ARMTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP) {
BaseT::getPeelingPreferences(L, SE, PP);
}
bool ARMTTIImpl::preferInLoopReduction(unsigned Opcode, Type *Ty,
TTI::ReductionFlags Flags) const {
if (!ST->hasMVEIntegerOps())
return false;
unsigned ScalarBits = Ty->getScalarSizeInBits();
switch (Opcode) {
case Instruction::Add:
return ScalarBits <= 64;
default:
return false;
}
}
bool ARMTTIImpl::preferPredicatedReductionSelect(
unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const {
if (!ST->hasMVEIntegerOps())
return false;
return true;
}
diff --git a/llvm/lib/Target/Sparc/SparcCallingConv.td b/llvm/lib/Target/Sparc/SparcCallingConv.td
index e6d23f741ea5..8afd0a7fc09a 100644
--- a/llvm/lib/Target/Sparc/SparcCallingConv.td
+++ b/llvm/lib/Target/Sparc/SparcCallingConv.td
@@ -1,143 +1,147 @@
//===-- SparcCallingConv.td - Calling Conventions Sparc ----*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This describes the calling conventions for the Sparc architectures.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// SPARC v8 32-bit.
//===----------------------------------------------------------------------===//
def CC_Sparc32 : CallingConv<[
// Custom assign SRet to [sp+64].
CCIfSRet<CCCustom<"CC_Sparc_Assign_SRet">>,
// i32 f32 arguments get passed in integer registers if there is space.
CCIfType<[i32, f32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
// f64 arguments are split and passed through registers or through stack.
CCIfType<[f64], CCCustom<"CC_Sparc_Assign_Split_64">>,
// As are v2i32 arguments (this would be the default behavior for
// v2i32 if it wasn't allocated to the IntPair register-class)
CCIfType<[v2i32], CCCustom<"CC_Sparc_Assign_Split_64">>,
// Alternatively, they are assigned to the stack in 4-byte aligned units.
CCAssignToStack<4, 4>
]>;
def RetCC_Sparc32 : CallingConv<[
CCIfType<[i32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>,
CCIfType<[f64], CCAssignToReg<[D0, D1]>>,
CCIfType<[v2i32], CCCustom<"CC_Sparc_Assign_Ret_Split_64">>
]>;
//===----------------------------------------------------------------------===//
// SPARC v9 64-bit.
//===----------------------------------------------------------------------===//
//
// The 64-bit ABI conceptually assigns all function arguments to a parameter
// array starting at [%fp+BIAS+128] in the callee's stack frame. All arguments
// occupy a multiple of 8 bytes in the array. Integer arguments are extended to
// 64 bits by the caller. Floats are right-aligned in their 8-byte slot, the
// first 4 bytes in the slot are undefined.
//
// The integer registers %i0 to %i5 shadow the first 48 bytes of the parameter
// array at fixed offsets. Integer arguments are promoted to registers when
// possible.
//
// The floating point registers %f0 to %f31 shadow the first 128 bytes of the
// parameter array at fixed offsets. Float and double parameters are promoted
// to these registers when possible.
//
// Structs up to 16 bytes in size are passed by value. They are right-aligned
// in one or two 8-byte slots in the parameter array. Struct members are
// promoted to both floating point and integer registers when possible. A
// struct containing two floats would thus be passed in %f0 and %f1, while two
// float function arguments would occupy 8 bytes each, and be passed in %f1 and
// %f3.
//
// When a struct { int, float } is passed by value, the int goes in the high
// bits of an integer register while the float goes in a floating point
// register.
//
// The difference is encoded in LLVM IR using the inreg attribute on function
// arguments:
//
// C: void f(float, float);
// IR: declare void f(float %f1, float %f3)
//
// C: void f(struct { float f0, f1; });
// IR: declare void f(float inreg %f0, float inreg %f1)
//
// C: void f(int, float);
// IR: declare void f(int signext %i0, float %f3)
//
// C: void f(struct { int i0high; float f1; });
// IR: declare void f(i32 inreg %i0high, float inreg %f1)
//
// Two ints in a struct are simply coerced to i64:
//
// C: void f(struct { int i0high, i0low; });
// IR: declare void f(i64 %i0.coerced)
//
// The frontend and backend divide the task of producing ABI compliant code for
// C functions. The C frontend will:
//
// - Annotate integer arguments with zeroext or signext attributes.
//
// - Split structs into one or two 64-bit sized chunks, or 32-bit chunks with
// inreg attributes.
//
// - Pass structs larger than 16 bytes indirectly with an explicit pointer
// argument. The byval attribute is not used.
//
// The backend will:
//
// - Assign all arguments to 64-bit aligned stack slots, 32-bits for inreg.
//
// - Promote to integer or floating point registers depending on type.
//
// Function return values are passed exactly like function arguments, except a
// struct up to 32 bytes in size can be returned in registers.
// Function arguments AND most return values.
def CC_Sparc64 : CallingConv<[
// The frontend uses the inreg flag to indicate i32 and float arguments from
// structs. These arguments are not promoted to 64 bits, but they can still
// be assigned to integer and float registers.
CCIfInReg<CCIfType<[i32, f32], CCCustom<"CC_Sparc64_Half">>>,
// All integers are promoted to i64 by the caller.
CCIfType<[i32], CCPromoteToType<i64>>,
// Custom assignment is required because stack space is reserved for all
// arguments whether they are passed in registers or not.
CCCustom<"CC_Sparc64_Full">
]>;
def RetCC_Sparc64 : CallingConv<[
// A single f32 return value always goes in %f0. The ABI doesn't specify what
// happens to multiple f32 return values outside a struct.
- CCIfType<[f32], CCCustom<"CC_Sparc64_Half">>,
+ CCIfType<[f32], CCCustom<"RetCC_Sparc64_Half">>,
- // Otherwise, return values are passed exactly like arguments.
- CCDelegateTo<CC_Sparc64>
+ // Otherwise, return values are passed exactly like arguments, except that
+ // returns that are too big to fit into the registers is passed as an sret
+ // instead.
+ CCIfInReg<CCIfType<[i32, f32], CCCustom<"RetCC_Sparc64_Half">>>,
+ CCIfType<[i32], CCPromoteToType<i64>>,
+ CCCustom<"RetCC_Sparc64_Full">
]>;
// Callee-saved registers are handled by the register window mechanism.
def CSR : CalleeSavedRegs<(add)> {
let OtherPreserved = (add (sequence "I%u", 0, 7),
(sequence "L%u", 0, 7), O6);
}
// Callee-saved registers for calls with ReturnsTwice attribute.
def RTCSR : CalleeSavedRegs<(add)> {
let OtherPreserved = (add I6, I7);
}
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index 2cb74e7709c7..f55675089102 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -1,3513 +1,3562 @@
//===-- SparcISelLowering.cpp - Sparc DAG Lowering Implementation ---------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the interfaces that Sparc uses to lower LLVM code into a
// selection DAG.
//
//===----------------------------------------------------------------------===//
#include "SparcISelLowering.h"
#include "MCTargetDesc/SparcMCExpr.h"
#include "SparcMachineFunctionInfo.h"
#include "SparcRegisterInfo.h"
#include "SparcTargetMachine.h"
#include "SparcTargetObjectFile.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
static bool CC_Sparc_Assign_SRet(unsigned &ValNo, MVT &ValVT,
MVT &LocVT, CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags, CCState &State)
{
assert (ArgFlags.isSRet());
// Assign SRet argument.
State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
0,
LocVT, LocInfo));
return true;
}
static bool CC_Sparc_Assign_Split_64(unsigned &ValNo, MVT &ValVT,
MVT &LocVT, CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags, CCState &State)
{
static const MCPhysReg RegList[] = {
SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
};
// Try to get first reg.
if (Register Reg = State.AllocateReg(RegList)) {
State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
} else {
// Assign whole thing in stack.
State.addLoc(CCValAssign::getCustomMem(
ValNo, ValVT, State.AllocateStack(8, Align(4)), LocVT, LocInfo));
return true;
}
// Try to get second reg.
if (Register Reg = State.AllocateReg(RegList))
State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
else
State.addLoc(CCValAssign::getCustomMem(
ValNo, ValVT, State.AllocateStack(4, Align(4)), LocVT, LocInfo));
return true;
}
static bool CC_Sparc_Assign_Ret_Split_64(unsigned &ValNo, MVT &ValVT,
MVT &LocVT, CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags, CCState &State)
{
static const MCPhysReg RegList[] = {
SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
};
// Try to get first reg.
if (Register Reg = State.AllocateReg(RegList))
State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
else
return false;
// Try to get second reg.
if (Register Reg = State.AllocateReg(RegList))
State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
else
return false;
return true;
}
// Allocate a full-sized argument for the 64-bit ABI.
-static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT, CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+static bool Analyze_CC_Sparc64_Full(bool IsReturn, unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
assert((LocVT == MVT::f32 || LocVT == MVT::f128
|| LocVT.getSizeInBits() == 64) &&
"Can't handle non-64 bits locations");
// Stack space is allocated for all arguments starting from [%fp+BIAS+128].
unsigned size = (LocVT == MVT::f128) ? 16 : 8;
Align alignment = (LocVT == MVT::f128) ? Align(16) : Align(8);
unsigned Offset = State.AllocateStack(size, alignment);
unsigned Reg = 0;
if (LocVT == MVT::i64 && Offset < 6*8)
// Promote integers to %i0-%i5.
Reg = SP::I0 + Offset/8;
else if (LocVT == MVT::f64 && Offset < 16*8)
// Promote doubles to %d0-%d30. (Which LLVM calls D0-D15).
Reg = SP::D0 + Offset/8;
else if (LocVT == MVT::f32 && Offset < 16*8)
// Promote floats to %f1, %f3, ...
Reg = SP::F1 + Offset/4;
else if (LocVT == MVT::f128 && Offset < 16*8)
// Promote long doubles to %q0-%q28. (Which LLVM calls Q0-Q7).
Reg = SP::Q0 + Offset/16;
// Promote to register when possible, otherwise use the stack slot.
if (Reg) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return true;
}
+ // Bail out if this is a return CC and we run out of registers to place
+ // values into.
+ if (IsReturn)
+ return false;
+
// This argument goes on the stack in an 8-byte slot.
// When passing floats, LocVT is smaller than 8 bytes. Adjust the offset to
// the right-aligned float. The first 4 bytes of the stack slot are undefined.
if (LocVT == MVT::f32)
Offset += 4;
State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
return true;
}
// Allocate a half-sized argument for the 64-bit ABI.
//
// This is used when passing { float, int } structs by value in registers.
-static bool CC_Sparc64_Half(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT, CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+static bool Analyze_CC_Sparc64_Half(bool IsReturn, unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
assert(LocVT.getSizeInBits() == 32 && "Can't handle non-32 bits locations");
unsigned Offset = State.AllocateStack(4, Align(4));
if (LocVT == MVT::f32 && Offset < 16*8) {
// Promote floats to %f0-%f31.
State.addLoc(CCValAssign::getReg(ValNo, ValVT, SP::F0 + Offset/4,
LocVT, LocInfo));
return true;
}
if (LocVT == MVT::i32 && Offset < 6*8) {
// Promote integers to %i0-%i5, using half the register.
unsigned Reg = SP::I0 + Offset/8;
LocVT = MVT::i64;
LocInfo = CCValAssign::AExt;
// Set the Custom bit if this i32 goes in the high bits of a register.
if (Offset % 8 == 0)
State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg,
LocVT, LocInfo));
else
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return true;
}
+ // Bail out if this is a return CC and we run out of registers to place
+ // values into.
+ if (IsReturn)
+ return false;
+
State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
return true;
}
+static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+ return Analyze_CC_Sparc64_Full(false, ValNo, ValVT, LocVT, LocInfo, ArgFlags,
+ State);
+}
+
+static bool CC_Sparc64_Half(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+ return Analyze_CC_Sparc64_Half(false, ValNo, ValVT, LocVT, LocInfo, ArgFlags,
+ State);
+}
+
+static bool RetCC_Sparc64_Full(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+ return Analyze_CC_Sparc64_Full(true, ValNo, ValVT, LocVT, LocInfo, ArgFlags,
+ State);
+}
+
+static bool RetCC_Sparc64_Half(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+ return Analyze_CC_Sparc64_Half(true, ValNo, ValVT, LocVT, LocInfo, ArgFlags,
+ State);
+}
+
#include "SparcGenCallingConv.inc"
// The calling conventions in SparcCallingConv.td are described in terms of the
// callee's register window. This function translates registers to the
// corresponding caller window %o register.
static unsigned toCallerWindow(unsigned Reg) {
static_assert(SP::I0 + 7 == SP::I7 && SP::O0 + 7 == SP::O7,
"Unexpected enum");
if (Reg >= SP::I0 && Reg <= SP::I7)
return Reg - SP::I0 + SP::O0;
return Reg;
}
+bool SparcTargetLowering::CanLowerReturn(
+ CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, Subtarget->is64Bit() ? RetCC_Sparc64
+ : RetCC_Sparc32);
+}
+
SDValue
SparcTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
bool IsVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &DL, SelectionDAG &DAG) const {
if (Subtarget->is64Bit())
return LowerReturn_64(Chain, CallConv, IsVarArg, Outs, OutVals, DL, DAG);
return LowerReturn_32(Chain, CallConv, IsVarArg, Outs, OutVals, DL, DAG);
}
SDValue
SparcTargetLowering::LowerReturn_32(SDValue Chain, CallingConv::ID CallConv,
bool IsVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &DL, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
// CCValAssign - represent the assignment of the return value to locations.
SmallVector<CCValAssign, 16> RVLocs;
// CCState - Info about the registers and stack slot.
CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
// Analyze return values.
CCInfo.AnalyzeReturn(Outs, RetCC_Sparc32);
SDValue Flag;
SmallVector<SDValue, 4> RetOps(1, Chain);
// Make room for the return address offset.
RetOps.push_back(SDValue());
// Copy the result values into the output registers.
for (unsigned i = 0, realRVLocIdx = 0;
i != RVLocs.size();
++i, ++realRVLocIdx) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
SDValue Arg = OutVals[realRVLocIdx];
if (VA.needsCustom()) {
assert(VA.getLocVT() == MVT::v2i32);
// Legalize ret v2i32 -> ret 2 x i32 (Basically: do what would
// happen by default if this wasn't a legal type)
SDValue Part0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
Arg,
DAG.getConstant(0, DL, getVectorIdxTy(DAG.getDataLayout())));
SDValue Part1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
Arg,
DAG.getConstant(1, DL, getVectorIdxTy(DAG.getDataLayout())));
Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Part0, Flag);
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Part1,
Flag);
} else
Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag);
// Guarantee that all emitted copies are stuck together with flags.
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
unsigned RetAddrOffset = 8; // Call Inst + Delay Slot
// If the function returns a struct, copy the SRetReturnReg to I0
if (MF.getFunction().hasStructRetAttr()) {
SparcMachineFunctionInfo *SFI = MF.getInfo<SparcMachineFunctionInfo>();
Register Reg = SFI->getSRetReturnReg();
if (!Reg)
llvm_unreachable("sret virtual register not created in the entry block");
auto PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, PtrVT);
Chain = DAG.getCopyToReg(Chain, DL, SP::I0, Val, Flag);
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(SP::I0, PtrVT));
RetAddrOffset = 12; // CallInst + Delay Slot + Unimp
}
RetOps[0] = Chain; // Update chain.
RetOps[1] = DAG.getConstant(RetAddrOffset, DL, MVT::i32);
// Add the flag if we have it.
if (Flag.getNode())
RetOps.push_back(Flag);
return DAG.getNode(SPISD::RET_FLAG, DL, MVT::Other, RetOps);
}
// Lower return values for the 64-bit ABI.
// Return values are passed the exactly the same way as function arguments.
SDValue
SparcTargetLowering::LowerReturn_64(SDValue Chain, CallingConv::ID CallConv,
bool IsVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &DL, SelectionDAG &DAG) const {
// CCValAssign - represent the assignment of the return value to locations.
SmallVector<CCValAssign, 16> RVLocs;
// CCState - Info about the registers and stack slot.
CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
// Analyze return values.
CCInfo.AnalyzeReturn(Outs, RetCC_Sparc64);
SDValue Flag;
SmallVector<SDValue, 4> RetOps(1, Chain);
// The second operand on the return instruction is the return address offset.
// The return address is always %i7+8 with the 64-bit ABI.
RetOps.push_back(DAG.getConstant(8, DL, MVT::i32));
// Copy the result values into the output registers.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
SDValue OutVal = OutVals[i];
// Integer return values must be sign or zero extended by the callee.
switch (VA.getLocInfo()) {
case CCValAssign::Full: break;
case CCValAssign::SExt:
OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal);
break;
case CCValAssign::ZExt:
OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal);
break;
case CCValAssign::AExt:
OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal);
break;
default:
llvm_unreachable("Unknown loc info!");
}
// The custom bit on an i32 return value indicates that it should be passed
// in the high bits of the register.
if (VA.getValVT() == MVT::i32 && VA.needsCustom()) {
OutVal = DAG.getNode(ISD::SHL, DL, MVT::i64, OutVal,
DAG.getConstant(32, DL, MVT::i32));
// The next value may go in the low bits of the same register.
// Handle both at once.
if (i+1 < RVLocs.size() && RVLocs[i+1].getLocReg() == VA.getLocReg()) {
SDValue NV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, OutVals[i+1]);
OutVal = DAG.getNode(ISD::OR, DL, MVT::i64, OutVal, NV);
// Skip the next value, it's already done.
++i;
}
}
Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag);
// Guarantee that all emitted copies are stuck together with flags.
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
RetOps[0] = Chain; // Update chain.
// Add the flag if we have it.
if (Flag.getNode())
RetOps.push_back(Flag);
return DAG.getNode(SPISD::RET_FLAG, DL, MVT::Other, RetOps);
}
SDValue SparcTargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
if (Subtarget->is64Bit())
return LowerFormalArguments_64(Chain, CallConv, IsVarArg, Ins,
DL, DAG, InVals);
return LowerFormalArguments_32(Chain, CallConv, IsVarArg, Ins,
DL, DAG, InVals);
}
/// LowerFormalArguments32 - V8 uses a very simple ABI, where all values are
/// passed in either one or two GPRs, including FP values. TODO: we should
/// pass FP values in FP registers for fastcc functions.
SDValue SparcTargetLowering::LowerFormalArguments_32(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
*DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, CC_Sparc32);
const unsigned StackOffset = 92;
bool IsLittleEndian = DAG.getDataLayout().isLittleEndian();
unsigned InIdx = 0;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i, ++InIdx) {
CCValAssign &VA = ArgLocs[i];
if (Ins[InIdx].Flags.isSRet()) {
if (InIdx != 0)
report_fatal_error("sparc only supports sret on the first parameter");
// Get SRet from [%fp+64].
int FrameIdx = MF.getFrameInfo().CreateFixedObject(4, 64, true);
SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
SDValue Arg =
DAG.getLoad(MVT::i32, dl, Chain, FIPtr, MachinePointerInfo());
InVals.push_back(Arg);
continue;
}
if (VA.isRegLoc()) {
if (VA.needsCustom()) {
assert(VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2i32);
Register VRegHi = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
MF.getRegInfo().addLiveIn(VA.getLocReg(), VRegHi);
SDValue HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32);
assert(i+1 < e);
CCValAssign &NextVA = ArgLocs[++i];
SDValue LoVal;
if (NextVA.isMemLoc()) {
int FrameIdx = MF.getFrameInfo().
CreateFixedObject(4, StackOffset+NextVA.getLocMemOffset(),true);
SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, MachinePointerInfo());
} else {
Register loReg = MF.addLiveIn(NextVA.getLocReg(),
&SP::IntRegsRegClass);
LoVal = DAG.getCopyFromReg(Chain, dl, loReg, MVT::i32);
}
if (IsLittleEndian)
std::swap(LoVal, HiVal);
SDValue WholeValue =
DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
WholeValue = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), WholeValue);
InVals.push_back(WholeValue);
continue;
}
Register VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
MF.getRegInfo().addLiveIn(VA.getLocReg(), VReg);
SDValue Arg = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
if (VA.getLocVT() == MVT::f32)
Arg = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Arg);
else if (VA.getLocVT() != MVT::i32) {
Arg = DAG.getNode(ISD::AssertSext, dl, MVT::i32, Arg,
DAG.getValueType(VA.getLocVT()));
Arg = DAG.getNode(ISD::TRUNCATE, dl, VA.getLocVT(), Arg);
}
InVals.push_back(Arg);
continue;
}
assert(VA.isMemLoc());
unsigned Offset = VA.getLocMemOffset()+StackOffset;
auto PtrVT = getPointerTy(DAG.getDataLayout());
if (VA.needsCustom()) {
assert(VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::v2i32);
// If it is double-word aligned, just load.
if (Offset % 8 == 0) {
int FI = MF.getFrameInfo().CreateFixedObject(8,
Offset,
true);
SDValue FIPtr = DAG.getFrameIndex(FI, PtrVT);
SDValue Load =
DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr, MachinePointerInfo());
InVals.push_back(Load);
continue;
}
int FI = MF.getFrameInfo().CreateFixedObject(4,
Offset,
true);
SDValue FIPtr = DAG.getFrameIndex(FI, PtrVT);
SDValue HiVal =
DAG.getLoad(MVT::i32, dl, Chain, FIPtr, MachinePointerInfo());
int FI2 = MF.getFrameInfo().CreateFixedObject(4,
Offset+4,
true);
SDValue FIPtr2 = DAG.getFrameIndex(FI2, PtrVT);
SDValue LoVal =
DAG.getLoad(MVT::i32, dl, Chain, FIPtr2, MachinePointerInfo());
if (IsLittleEndian)
std::swap(LoVal, HiVal);
SDValue WholeValue =
DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
WholeValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), WholeValue);
InVals.push_back(WholeValue);
continue;
}
int FI = MF.getFrameInfo().CreateFixedObject(4,
Offset,
true);
SDValue FIPtr = DAG.getFrameIndex(FI, PtrVT);
SDValue Load ;
if (VA.getValVT() == MVT::i32 || VA.getValVT() == MVT::f32) {
Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr, MachinePointerInfo());
} else if (VA.getValVT() == MVT::f128) {
report_fatal_error("SPARCv8 does not handle f128 in calls; "
"pass indirectly");
} else {
// We shouldn't see any other value types here.
llvm_unreachable("Unexpected ValVT encountered in frame lowering.");
}
InVals.push_back(Load);
}
if (MF.getFunction().hasStructRetAttr()) {
// Copy the SRet Argument to SRetReturnReg.
SparcMachineFunctionInfo *SFI = MF.getInfo<SparcMachineFunctionInfo>();
Register Reg = SFI->getSRetReturnReg();
if (!Reg) {
Reg = MF.getRegInfo().createVirtualRegister(&SP::IntRegsRegClass);
SFI->setSRetReturnReg(Reg);
}
SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
}
// Store remaining ArgRegs to the stack if this is a varargs function.
if (isVarArg) {
static const MCPhysReg ArgRegs[] = {
SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
};
unsigned NumAllocated = CCInfo.getFirstUnallocated(ArgRegs);
const MCPhysReg *CurArgReg = ArgRegs+NumAllocated, *ArgRegEnd = ArgRegs+6;
unsigned ArgOffset = CCInfo.getNextStackOffset();
if (NumAllocated == 6)
ArgOffset += StackOffset;
else {
assert(!ArgOffset);
ArgOffset = 68+4*NumAllocated;
}
// Remember the vararg offset for the va_start implementation.
FuncInfo->setVarArgsFrameOffset(ArgOffset);
std::vector<SDValue> OutChains;
for (; CurArgReg != ArgRegEnd; ++CurArgReg) {
Register VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
MF.getRegInfo().addLiveIn(*CurArgReg, VReg);
SDValue Arg = DAG.getCopyFromReg(DAG.getRoot(), dl, VReg, MVT::i32);
int FrameIdx = MF.getFrameInfo().CreateFixedObject(4, ArgOffset,
true);
SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
OutChains.push_back(
DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr, MachinePointerInfo()));
ArgOffset += 4;
}
if (!OutChains.empty()) {
OutChains.push_back(Chain);
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
}
}
return Chain;
}
// Lower formal arguments for the 64 bit ABI.
SDValue SparcTargetLowering::LowerFormalArguments_64(
SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
// Analyze arguments according to CC_Sparc64.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
*DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, CC_Sparc64);
// The argument array begins at %fp+BIAS+128, after the register save area.
const unsigned ArgArea = 128;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
if (VA.isRegLoc()) {
// This argument is passed in a register.
// All integer register arguments are promoted by the caller to i64.
// Create a virtual register for the promoted live-in value.
Register VReg = MF.addLiveIn(VA.getLocReg(),
getRegClassFor(VA.getLocVT()));
SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT());
// Get the high bits for i32 struct elements.
if (VA.getValVT() == MVT::i32 && VA.needsCustom())
Arg = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Arg,
DAG.getConstant(32, DL, MVT::i32));
// The caller promoted the argument, so insert an Assert?ext SDNode so we
// won't promote the value again in this function.
switch (VA.getLocInfo()) {
case CCValAssign::SExt:
Arg = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Arg,
DAG.getValueType(VA.getValVT()));
break;
case CCValAssign::ZExt:
Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg,
DAG.getValueType(VA.getValVT()));
break;
default:
break;
}
// Truncate the register down to the argument type.
if (VA.isExtInLoc())
Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
InVals.push_back(Arg);
continue;
}
// The registers are exhausted. This argument was passed on the stack.
assert(VA.isMemLoc());
// The CC_Sparc64_Full/Half functions compute stack offsets relative to the
// beginning of the arguments area at %fp+BIAS+128.
unsigned Offset = VA.getLocMemOffset() + ArgArea;
unsigned ValSize = VA.getValVT().getSizeInBits() / 8;
// Adjust offset for extended arguments, SPARC is big-endian.
// The caller will have written the full slot with extended bytes, but we
// prefer our own extending loads.
if (VA.isExtInLoc())
Offset += 8 - ValSize;
int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true);
InVals.push_back(
DAG.getLoad(VA.getValVT(), DL, Chain,
DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())),
MachinePointerInfo::getFixedStack(MF, FI)));
}
if (!IsVarArg)
return Chain;
// This function takes variable arguments, some of which may have been passed
// in registers %i0-%i5. Variable floating point arguments are never passed
// in floating point registers. They go on %i0-%i5 or on the stack like
// integer arguments.
//
// The va_start intrinsic needs to know the offset to the first variable
// argument.
unsigned ArgOffset = CCInfo.getNextStackOffset();
SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
// Skip the 128 bytes of register save area.
FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgArea +
Subtarget->getStackPointerBias());
// Save the variable arguments that were passed in registers.
// The caller is required to reserve stack space for 6 arguments regardless
// of how many arguments were actually passed.
SmallVector<SDValue, 8> OutChains;
for (; ArgOffset < 6*8; ArgOffset += 8) {
Register VReg = MF.addLiveIn(SP::I0 + ArgOffset/8, &SP::I64RegsRegClass);
SDValue VArg = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
int FI = MF.getFrameInfo().CreateFixedObject(8, ArgOffset + ArgArea, true);
auto PtrVT = getPointerTy(MF.getDataLayout());
OutChains.push_back(
DAG.getStore(Chain, DL, VArg, DAG.getFrameIndex(FI, PtrVT),
MachinePointerInfo::getFixedStack(MF, FI)));
}
if (!OutChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
return Chain;
}
SDValue
SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
if (Subtarget->is64Bit())
return LowerCall_64(CLI, InVals);
return LowerCall_32(CLI, InVals);
}
static bool hasReturnsTwiceAttr(SelectionDAG &DAG, SDValue Callee,
const CallBase *Call) {
if (Call)
return Call->hasFnAttr(Attribute::ReturnsTwice);
const Function *CalleeFn = nullptr;
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
CalleeFn = dyn_cast<Function>(G->getGlobal());
} else if (ExternalSymbolSDNode *E =
dyn_cast<ExternalSymbolSDNode>(Callee)) {
const Function &Fn = DAG.getMachineFunction().getFunction();
const Module *M = Fn.getParent();
const char *CalleeName = E->getSymbol();
CalleeFn = M->getFunction(CalleeName);
}
if (!CalleeFn)
return false;
return CalleeFn->hasFnAttribute(Attribute::ReturnsTwice);
}
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization.
bool SparcTargetLowering::IsEligibleForTailCallOptimization(
CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF) const {
auto &Outs = CLI.Outs;
auto &Caller = MF.getFunction();
// Do not tail call opt functions with "disable-tail-calls" attribute.
if (Caller.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
return false;
// Do not tail call opt if the stack is used to pass parameters.
if (CCInfo.getNextStackOffset() != 0)
return false;
// Do not tail call opt if either the callee or caller returns
// a struct and the other does not.
if (!Outs.empty() && Caller.hasStructRetAttr() != Outs[0].Flags.isSRet())
return false;
// Byval parameters hand the function a pointer directly into the stack area
// we want to reuse during a tail call.
for (auto &Arg : Outs)
if (Arg.Flags.isByVal())
return false;
return true;
}
// Lower a call for the 32-bit ABI.
SDValue
SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
SelectionDAG &DAG = CLI.DAG;
SDLoc &dl = CLI.DL;
SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
bool &isTailCall = CLI.IsTailCall;
CallingConv::ID CallConv = CLI.CallConv;
bool isVarArg = CLI.IsVarArg;
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
*DAG.getContext());
CCInfo.AnalyzeCallOperands(Outs, CC_Sparc32);
isTailCall = isTailCall && IsEligibleForTailCallOptimization(
CCInfo, CLI, DAG.getMachineFunction());
// Get the size of the outgoing arguments stack space requirement.
unsigned ArgsSize = CCInfo.getNextStackOffset();
// Keep stack frames 8-byte aligned.
ArgsSize = (ArgsSize+7) & ~7;
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
// Create local copies for byval args.
SmallVector<SDValue, 8> ByValArgs;
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
ISD::ArgFlagsTy Flags = Outs[i].Flags;
if (!Flags.isByVal())
continue;
SDValue Arg = OutVals[i];
unsigned Size = Flags.getByValSize();
Align Alignment = Flags.getNonZeroByValAlign();
if (Size > 0U) {
int FI = MFI.CreateStackObject(Size, Alignment, false);
SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
SDValue SizeNode = DAG.getConstant(Size, dl, MVT::i32);
Chain = DAG.getMemcpy(Chain, dl, FIPtr, Arg, SizeNode, Alignment,
false, // isVolatile,
(Size <= 32), // AlwaysInline if size <= 32,
false, // isTailCall
MachinePointerInfo(), MachinePointerInfo());
ByValArgs.push_back(FIPtr);
}
else {
SDValue nullVal;
ByValArgs.push_back(nullVal);
}
}
assert(!isTailCall || ArgsSize == 0);
if (!isTailCall)
Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, dl);
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
const unsigned StackOffset = 92;
bool hasStructRetAttr = false;
unsigned SRetArgSize = 0;
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, realArgIdx = 0, byvalArgIdx = 0, e = ArgLocs.size();
i != e;
++i, ++realArgIdx) {
CCValAssign &VA = ArgLocs[i];
SDValue Arg = OutVals[realArgIdx];
ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
// Use local copy if it is a byval arg.
if (Flags.isByVal()) {
Arg = ByValArgs[byvalArgIdx++];
if (!Arg) {
continue;
}
}
// Promote the value if needed.
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full: break;
case CCValAssign::SExt:
Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
break;
case CCValAssign::ZExt:
Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
break;
case CCValAssign::AExt:
Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
break;
case CCValAssign::BCvt:
Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
break;
}
if (Flags.isSRet()) {
assert(VA.needsCustom());
if (isTailCall)
continue;
// store SRet argument in %sp+64
SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
SDValue PtrOff = DAG.getIntPtrConstant(64, dl);
PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
MemOpChains.push_back(
DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
hasStructRetAttr = true;
// sret only allowed on first argument
assert(Outs[realArgIdx].OrigArgIndex == 0);
SRetArgSize =
DAG.getDataLayout().getTypeAllocSize(CLI.getArgs()[0].IndirectType);
continue;
}
if (VA.needsCustom()) {
assert(VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2i32);
if (VA.isMemLoc()) {
unsigned Offset = VA.getLocMemOffset() + StackOffset;
// if it is double-word aligned, just store.
if (Offset % 8 == 0) {
SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
MemOpChains.push_back(
DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
continue;
}
}
if (VA.getLocVT() == MVT::f64) {
// Move from the float value from float registers into the
// integer registers.
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Arg))
Arg = bitcastConstantFPToInt(C, dl, DAG);
else
Arg = DAG.getNode(ISD::BITCAST, dl, MVT::v2i32, Arg);
}
SDValue Part0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
Arg,
DAG.getConstant(0, dl, getVectorIdxTy(DAG.getDataLayout())));
SDValue Part1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
Arg,
DAG.getConstant(1, dl, getVectorIdxTy(DAG.getDataLayout())));
if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Part0));
assert(i+1 != e);
CCValAssign &NextVA = ArgLocs[++i];
if (NextVA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Part1));
} else {
// Store the second part in stack.
unsigned Offset = NextVA.getLocMemOffset() + StackOffset;
SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
MemOpChains.push_back(
DAG.getStore(Chain, dl, Part1, PtrOff, MachinePointerInfo()));
}
} else {
unsigned Offset = VA.getLocMemOffset() + StackOffset;
// Store the first part.
SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
MemOpChains.push_back(
DAG.getStore(Chain, dl, Part0, PtrOff, MachinePointerInfo()));
// Store the second part.
PtrOff = DAG.getIntPtrConstant(Offset + 4, dl);
PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
MemOpChains.push_back(
DAG.getStore(Chain, dl, Part1, PtrOff, MachinePointerInfo()));
}
continue;
}
// Arguments that can be passed on register must be kept at
// RegsToPass vector
if (VA.isRegLoc()) {
if (VA.getLocVT() != MVT::f32) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
continue;
}
Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
continue;
}
assert(VA.isMemLoc());
// Create a store off the stack pointer for this argument.
SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset() + StackOffset,
dl);
PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
MemOpChains.push_back(
DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
}
// Emit all stores, make sure the occur before any copies into physregs.
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
// Build a sequence of copy-to-reg nodes chained together with token
// chain and flag operands which copy the outgoing args into registers.
// The InFlag in necessary since all emitted instructions must be
// stuck together.
SDValue InFlag;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Register Reg = RegsToPass[i].first;
if (!isTailCall)
Reg = toCallerWindow(Reg);
Chain = DAG.getCopyToReg(Chain, dl, Reg, RegsToPass[i].second, InFlag);
InFlag = Chain.getValue(1);
}
bool hasReturnsTwice = hasReturnsTwiceAttr(DAG, Callee, CLI.CB);
// If the callee is a GlobalAddress node (quite common, every direct call is)
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
// Likewise ExternalSymbol -> TargetExternalSymbol.
unsigned TF = isPositionIndependent() ? SparcMCExpr::VK_Sparc_WPLT30
: SparcMCExpr::VK_Sparc_WDISP30;
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32, 0, TF);
else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32, TF);
// Returns a chain & a flag for retval copy to use
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SmallVector<SDValue, 8> Ops;
Ops.push_back(Chain);
Ops.push_back(Callee);
if (hasStructRetAttr)
Ops.push_back(DAG.getTargetConstant(SRetArgSize, dl, MVT::i32));
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Register Reg = RegsToPass[i].first;
if (!isTailCall)
Reg = toCallerWindow(Reg);
Ops.push_back(DAG.getRegister(Reg, RegsToPass[i].second.getValueType()));
}
// Add a register mask operand representing the call-preserved registers.
const SparcRegisterInfo *TRI = Subtarget->getRegisterInfo();
const uint32_t *Mask =
((hasReturnsTwice)
? TRI->getRTCallPreservedMask(CallConv)
: TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv));
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
if (InFlag.getNode())
Ops.push_back(InFlag);
if (isTailCall) {
DAG.getMachineFunction().getFrameInfo().setHasTailCall();
return DAG.getNode(SPISD::TAIL_CALL, dl, MVT::Other, Ops);
}
Chain = DAG.getNode(SPISD::CALL, dl, NodeTys, Ops);
InFlag = Chain.getValue(1);
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, dl, true),
DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
InFlag = Chain.getValue(1);
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
CCState RVInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
RVInfo.AnalyzeCallResult(Ins, RetCC_Sparc32);
// Copy all of the result registers out of their specified physreg.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ assert(RVLocs[i].isRegLoc() && "Can only return in registers!");
if (RVLocs[i].getLocVT() == MVT::v2i32) {
SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2i32);
SDValue Lo = DAG.getCopyFromReg(
Chain, dl, toCallerWindow(RVLocs[i++].getLocReg()), MVT::i32, InFlag);
Chain = Lo.getValue(1);
InFlag = Lo.getValue(2);
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2i32, Vec, Lo,
DAG.getConstant(0, dl, MVT::i32));
SDValue Hi = DAG.getCopyFromReg(
Chain, dl, toCallerWindow(RVLocs[i].getLocReg()), MVT::i32, InFlag);
Chain = Hi.getValue(1);
InFlag = Hi.getValue(2);
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2i32, Vec, Hi,
DAG.getConstant(1, dl, MVT::i32));
InVals.push_back(Vec);
} else {
Chain =
DAG.getCopyFromReg(Chain, dl, toCallerWindow(RVLocs[i].getLocReg()),
RVLocs[i].getValVT(), InFlag)
.getValue(1);
InFlag = Chain.getValue(2);
InVals.push_back(Chain.getValue(0));
}
}
return Chain;
}
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
Register SparcTargetLowering::getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const {
Register Reg = StringSwitch<Register>(RegName)
.Case("i0", SP::I0).Case("i1", SP::I1).Case("i2", SP::I2).Case("i3", SP::I3)
.Case("i4", SP::I4).Case("i5", SP::I5).Case("i6", SP::I6).Case("i7", SP::I7)
.Case("o0", SP::O0).Case("o1", SP::O1).Case("o2", SP::O2).Case("o3", SP::O3)
.Case("o4", SP::O4).Case("o5", SP::O5).Case("o6", SP::O6).Case("o7", SP::O7)
.Case("l0", SP::L0).Case("l1", SP::L1).Case("l2", SP::L2).Case("l3", SP::L3)
.Case("l4", SP::L4).Case("l5", SP::L5).Case("l6", SP::L6).Case("l7", SP::L7)
.Case("g0", SP::G0).Case("g1", SP::G1).Case("g2", SP::G2).Case("g3", SP::G3)
.Case("g4", SP::G4).Case("g5", SP::G5).Case("g6", SP::G6).Case("g7", SP::G7)
.Default(0);
if (Reg)
return Reg;
report_fatal_error("Invalid register name global variable");
}
// Fixup floating point arguments in the ... part of a varargs call.
//
// The SPARC v9 ABI requires that floating point arguments are treated the same
// as integers when calling a varargs function. This does not apply to the
// fixed arguments that are part of the function's prototype.
//
// This function post-processes a CCValAssign array created by
// AnalyzeCallOperands().
static void fixupVariableFloatArgs(SmallVectorImpl<CCValAssign> &ArgLocs,
ArrayRef<ISD::OutputArg> Outs) {
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
const CCValAssign &VA = ArgLocs[i];
MVT ValTy = VA.getLocVT();
// FIXME: What about f32 arguments? C promotes them to f64 when calling
// varargs functions.
if (!VA.isRegLoc() || (ValTy != MVT::f64 && ValTy != MVT::f128))
continue;
// The fixed arguments to a varargs function still go in FP registers.
if (Outs[VA.getValNo()].IsFixed)
continue;
// This floating point argument should be reassigned.
CCValAssign NewVA;
// Determine the offset into the argument array.
Register firstReg = (ValTy == MVT::f64) ? SP::D0 : SP::Q0;
unsigned argSize = (ValTy == MVT::f64) ? 8 : 16;
unsigned Offset = argSize * (VA.getLocReg() - firstReg);
assert(Offset < 16*8 && "Offset out of range, bad register enum?");
if (Offset < 6*8) {
// This argument should go in %i0-%i5.
unsigned IReg = SP::I0 + Offset/8;
if (ValTy == MVT::f64)
// Full register, just bitconvert into i64.
NewVA = CCValAssign::getReg(VA.getValNo(), VA.getValVT(),
IReg, MVT::i64, CCValAssign::BCvt);
else {
assert(ValTy == MVT::f128 && "Unexpected type!");
// Full register, just bitconvert into i128 -- We will lower this into
// two i64s in LowerCall_64.
NewVA = CCValAssign::getCustomReg(VA.getValNo(), VA.getValVT(),
IReg, MVT::i128, CCValAssign::BCvt);
}
} else {
// This needs to go to memory, we're out of integer registers.
NewVA = CCValAssign::getMem(VA.getValNo(), VA.getValVT(),
Offset, VA.getLocVT(), VA.getLocInfo());
}
ArgLocs[i] = NewVA;
}
}
// Lower a call for the 64-bit ABI.
SDValue
SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
SelectionDAG &DAG = CLI.DAG;
SDLoc DL = CLI.DL;
SDValue Chain = CLI.Chain;
auto PtrVT = getPointerTy(DAG.getDataLayout());
// Sparc target does not yet support tail call optimization.
CLI.IsTailCall = false;
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs,
*DAG.getContext());
CCInfo.AnalyzeCallOperands(CLI.Outs, CC_Sparc64);
// Get the size of the outgoing arguments stack space requirement.
// The stack offset computed by CC_Sparc64 includes all arguments.
// Called functions expect 6 argument words to exist in the stack frame, used
// or not.
unsigned ArgsSize = std::max(6*8u, CCInfo.getNextStackOffset());
// Keep stack frames 16-byte aligned.
ArgsSize = alignTo(ArgsSize, 16);
// Varargs calls require special treatment.
if (CLI.IsVarArg)
fixupVariableFloatArgs(ArgLocs, CLI.Outs);
// Adjust the stack pointer to make room for the arguments.
// FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
// with more than 6 arguments.
Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL);
// Collect the set of registers to pass to the function and their values.
// This will be emitted as a sequence of CopyToReg nodes glued to the call
// instruction.
SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
// Collect chains from all the memory opeations that copy arguments to the
// stack. They must follow the stack pointer adjustment above and precede the
// call instruction itself.
SmallVector<SDValue, 8> MemOpChains;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
const CCValAssign &VA = ArgLocs[i];
SDValue Arg = CLI.OutVals[i];
// Promote the value if needed.
switch (VA.getLocInfo()) {
default:
llvm_unreachable("Unknown location info!");
case CCValAssign::Full:
break;
case CCValAssign::SExt:
Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
break;
case CCValAssign::ZExt:
Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
break;
case CCValAssign::AExt:
Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
break;
case CCValAssign::BCvt:
// fixupVariableFloatArgs() may create bitcasts from f128 to i128. But
// SPARC does not support i128 natively. Lower it into two i64, see below.
if (!VA.needsCustom() || VA.getValVT() != MVT::f128
|| VA.getLocVT() != MVT::i128)
Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
break;
}
if (VA.isRegLoc()) {
if (VA.needsCustom() && VA.getValVT() == MVT::f128
&& VA.getLocVT() == MVT::i128) {
// Store and reload into the integer register reg and reg+1.
unsigned Offset = 8 * (VA.getLocReg() - SP::I0);
unsigned StackOffset = Offset + Subtarget->getStackPointerBias() + 128;
SDValue StackPtr = DAG.getRegister(SP::O6, PtrVT);
SDValue HiPtrOff = DAG.getIntPtrConstant(StackOffset, DL);
HiPtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, HiPtrOff);
SDValue LoPtrOff = DAG.getIntPtrConstant(StackOffset + 8, DL);
LoPtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, LoPtrOff);
// Store to %sp+BIAS+128+Offset
SDValue Store =
DAG.getStore(Chain, DL, Arg, HiPtrOff, MachinePointerInfo());
// Load into Reg and Reg+1
SDValue Hi64 =
DAG.getLoad(MVT::i64, DL, Store, HiPtrOff, MachinePointerInfo());
SDValue Lo64 =
DAG.getLoad(MVT::i64, DL, Store, LoPtrOff, MachinePointerInfo());
RegsToPass.push_back(std::make_pair(toCallerWindow(VA.getLocReg()),
Hi64));
RegsToPass.push_back(std::make_pair(toCallerWindow(VA.getLocReg()+1),
Lo64));
continue;
}
// The custom bit on an i32 return value indicates that it should be
// passed in the high bits of the register.
if (VA.getValVT() == MVT::i32 && VA.needsCustom()) {
Arg = DAG.getNode(ISD::SHL, DL, MVT::i64, Arg,
DAG.getConstant(32, DL, MVT::i32));
// The next value may go in the low bits of the same register.
// Handle both at once.
if (i+1 < ArgLocs.size() && ArgLocs[i+1].isRegLoc() &&
ArgLocs[i+1].getLocReg() == VA.getLocReg()) {
SDValue NV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64,
CLI.OutVals[i+1]);
Arg = DAG.getNode(ISD::OR, DL, MVT::i64, Arg, NV);
// Skip the next value, it's already done.
++i;
}
}
RegsToPass.push_back(std::make_pair(toCallerWindow(VA.getLocReg()), Arg));
continue;
}
assert(VA.isMemLoc());
// Create a store off the stack pointer for this argument.
SDValue StackPtr = DAG.getRegister(SP::O6, PtrVT);
// The argument area starts at %fp+BIAS+128 in the callee frame,
// %sp+BIAS+128 in ours.
SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset() +
Subtarget->getStackPointerBias() +
128, DL);
PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
MemOpChains.push_back(
DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo()));
}
// Emit all stores, make sure they occur before the call.
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
// Build a sequence of CopyToReg nodes glued together with token chain and
// glue operands which copy the outgoing args into registers. The InGlue is
// necessary since all emitted instructions must be stuck together in order
// to pass the live physical registers.
SDValue InGlue;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, DL,
RegsToPass[i].first, RegsToPass[i].second, InGlue);
InGlue = Chain.getValue(1);
}
// If the callee is a GlobalAddress node (quite common, every direct call is)
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
// Likewise ExternalSymbol -> TargetExternalSymbol.
SDValue Callee = CLI.Callee;
bool hasReturnsTwice = hasReturnsTwiceAttr(DAG, Callee, CLI.CB);
unsigned TF = isPositionIndependent() ? SparcMCExpr::VK_Sparc_WPLT30
: SparcMCExpr::VK_Sparc_WDISP30;
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT, 0, TF);
else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, TF);
// Build the operands for the call instruction itself.
SmallVector<SDValue, 8> Ops;
Ops.push_back(Chain);
Ops.push_back(Callee);
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
const SparcRegisterInfo *TRI = Subtarget->getRegisterInfo();
const uint32_t *Mask =
((hasReturnsTwice) ? TRI->getRTCallPreservedMask(CLI.CallConv)
: TRI->getCallPreservedMask(DAG.getMachineFunction(),
CLI.CallConv));
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
// Make sure the CopyToReg nodes are glued to the call instruction which
// consumes the registers.
if (InGlue.getNode())
Ops.push_back(InGlue);
// Now the call itself.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
Chain = DAG.getNode(SPISD::CALL, DL, NodeTys, Ops);
InGlue = Chain.getValue(1);
// Revert the stack pointer immediately after the call.
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, DL, true),
DAG.getIntPtrConstant(0, DL, true), InGlue, DL);
InGlue = Chain.getValue(1);
// Now extract the return values. This is more or less the same as
// LowerFormalArguments_64.
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
// Set inreg flag manually for codegen generated library calls that
// return float.
if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CB)
CLI.Ins[0].Flags.setInReg();
RVInfo.AnalyzeCallResult(CLI.Ins, RetCC_Sparc64);
// Copy all of the result registers out of their specified physreg.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
unsigned Reg = toCallerWindow(VA.getLocReg());
// When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
// reside in the same register in the high and low bits. Reuse the
// CopyFromReg previous node to avoid duplicate copies.
SDValue RV;
if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Chain.getOperand(1)))
if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg)
RV = Chain.getValue(0);
// But usually we'll create a new CopyFromReg for a different register.
if (!RV.getNode()) {
RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue);
Chain = RV.getValue(1);
InGlue = Chain.getValue(2);
}
// Get the high bits for i32 struct elements.
if (VA.getValVT() == MVT::i32 && VA.needsCustom())
RV = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), RV,
DAG.getConstant(32, DL, MVT::i32));
// The callee promoted the return value, so insert an Assert?ext SDNode so
// we won't promote the value again in this function.
switch (VA.getLocInfo()) {
case CCValAssign::SExt:
RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV,
DAG.getValueType(VA.getValVT()));
break;
case CCValAssign::ZExt:
RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV,
DAG.getValueType(VA.getValVT()));
break;
default:
break;
}
// Truncate the register down to the return value type.
if (VA.isExtInLoc())
RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV);
InVals.push_back(RV);
}
return Chain;
}
//===----------------------------------------------------------------------===//
// TargetLowering Implementation
//===----------------------------------------------------------------------===//
TargetLowering::AtomicExpansionKind SparcTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
if (AI->getOperation() == AtomicRMWInst::Xchg &&
AI->getType()->getPrimitiveSizeInBits() == 32)
return AtomicExpansionKind::None; // Uses xchg instruction
return AtomicExpansionKind::CmpXChg;
}
/// IntCondCCodeToICC - Convert a DAG integer condition code to a SPARC ICC
/// condition.
static SPCC::CondCodes IntCondCCodeToICC(ISD::CondCode CC) {
switch (CC) {
default: llvm_unreachable("Unknown integer condition code!");
case ISD::SETEQ: return SPCC::ICC_E;
case ISD::SETNE: return SPCC::ICC_NE;
case ISD::SETLT: return SPCC::ICC_L;
case ISD::SETGT: return SPCC::ICC_G;
case ISD::SETLE: return SPCC::ICC_LE;
case ISD::SETGE: return SPCC::ICC_GE;
case ISD::SETULT: return SPCC::ICC_CS;
case ISD::SETULE: return SPCC::ICC_LEU;
case ISD::SETUGT: return SPCC::ICC_GU;
case ISD::SETUGE: return SPCC::ICC_CC;
}
}
/// FPCondCCodeToFCC - Convert a DAG floatingp oint condition code to a SPARC
/// FCC condition.
static SPCC::CondCodes FPCondCCodeToFCC(ISD::CondCode CC) {
switch (CC) {
default: llvm_unreachable("Unknown fp condition code!");
case ISD::SETEQ:
case ISD::SETOEQ: return SPCC::FCC_E;
case ISD::SETNE:
case ISD::SETUNE: return SPCC::FCC_NE;
case ISD::SETLT:
case ISD::SETOLT: return SPCC::FCC_L;
case ISD::SETGT:
case ISD::SETOGT: return SPCC::FCC_G;
case ISD::SETLE:
case ISD::SETOLE: return SPCC::FCC_LE;
case ISD::SETGE:
case ISD::SETOGE: return SPCC::FCC_GE;
case ISD::SETULT: return SPCC::FCC_UL;
case ISD::SETULE: return SPCC::FCC_ULE;
case ISD::SETUGT: return SPCC::FCC_UG;
case ISD::SETUGE: return SPCC::FCC_UGE;
case ISD::SETUO: return SPCC::FCC_U;
case ISD::SETO: return SPCC::FCC_O;
case ISD::SETONE: return SPCC::FCC_LG;
case ISD::SETUEQ: return SPCC::FCC_UE;
}
}
SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
const SparcSubtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
// Instructions which use registers as conditionals examine all the
// bits (as does the pseudo SELECT_CC expansion). I don't think it
// matters much whether it's ZeroOrOneBooleanContent, or
// ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the
// former.
setBooleanContents(ZeroOrOneBooleanContent);
setBooleanVectorContents(ZeroOrOneBooleanContent);
// Set up the register classes.
addRegisterClass(MVT::i32, &SP::IntRegsRegClass);
if (!Subtarget->useSoftFloat()) {
addRegisterClass(MVT::f32, &SP::FPRegsRegClass);
addRegisterClass(MVT::f64, &SP::DFPRegsRegClass);
addRegisterClass(MVT::f128, &SP::QFPRegsRegClass);
}
if (Subtarget->is64Bit()) {
addRegisterClass(MVT::i64, &SP::I64RegsRegClass);
} else {
// On 32bit sparc, we define a double-register 32bit register
// class, as well. This is modeled in LLVM as a 2-vector of i32.
addRegisterClass(MVT::v2i32, &SP::IntPairRegClass);
// ...but almost all operations must be expanded, so set that as
// the default.
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
setOperationAction(Op, MVT::v2i32, Expand);
}
// Truncating/extending stores/loads are also not supported.
for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i32, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Expand);
setLoadExtAction(ISD::SEXTLOAD, MVT::v2i32, VT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i32, VT, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, VT, Expand);
setTruncStoreAction(VT, MVT::v2i32, Expand);
setTruncStoreAction(MVT::v2i32, VT, Expand);
}
// However, load and store *are* legal.
setOperationAction(ISD::LOAD, MVT::v2i32, Legal);
setOperationAction(ISD::STORE, MVT::v2i32, Legal);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Legal);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Legal);
// And we need to promote i64 loads/stores into vector load/store
setOperationAction(ISD::LOAD, MVT::i64, Custom);
setOperationAction(ISD::STORE, MVT::i64, Custom);
// Sadly, this doesn't work:
// AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32);
// AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32);
}
// Turn FP extload into load/fpextend
for (MVT VT : MVT::fp_valuetypes()) {
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
}
// Sparc doesn't have i1 sign extending load
for (MVT VT : MVT::integer_valuetypes())
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
// Turn FP truncstore into trunc + store.
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::f128, MVT::f16, Expand);
setTruncStoreAction(MVT::f128, MVT::f32, Expand);
setTruncStoreAction(MVT::f128, MVT::f64, Expand);
// Custom legalize GlobalAddress nodes into LO/HI parts.
setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
setOperationAction(ISD::ConstantPool, PtrVT, Custom);
setOperationAction(ISD::BlockAddress, PtrVT, Custom);
// Sparc doesn't have sext_inreg, replace them with shl/sra
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
// Sparc has no REM or DIVREM operations.
setOperationAction(ISD::UREM, MVT::i32, Expand);
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
// ... nor does SparcV9.
if (Subtarget->is64Bit()) {
setOperationAction(ISD::UREM, MVT::i64, Expand);
setOperationAction(ISD::SREM, MVT::i64, Expand);
setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
}
// Custom expand fp<->sint
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
// Custom Expand fp<->uint
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
// Lower f16 conversion operations into library calls
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
setOperationAction(ISD::FP16_TO_FP, MVT::f128, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f128, Expand);
setOperationAction(ISD::BITCAST, MVT::f32, Expand);
setOperationAction(ISD::BITCAST, MVT::i32, Expand);
// Sparc has no select or setcc: expand to SELECT_CC.
setOperationAction(ISD::SELECT, MVT::i32, Expand);
setOperationAction(ISD::SELECT, MVT::f32, Expand);
setOperationAction(ISD::SELECT, MVT::f64, Expand);
setOperationAction(ISD::SELECT, MVT::f128, Expand);
setOperationAction(ISD::SETCC, MVT::i32, Expand);
setOperationAction(ISD::SETCC, MVT::f32, Expand);
setOperationAction(ISD::SETCC, MVT::f64, Expand);
setOperationAction(ISD::SETCC, MVT::f128, Expand);
// Sparc doesn't have BRCOND either, it has BR_CC.
setOperationAction(ISD::BRCOND, MVT::Other, Expand);
setOperationAction(ISD::BRIND, MVT::Other, Expand);
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
setOperationAction(ISD::BR_CC, MVT::i32, Custom);
setOperationAction(ISD::BR_CC, MVT::f32, Custom);
setOperationAction(ISD::BR_CC, MVT::f64, Custom);
setOperationAction(ISD::BR_CC, MVT::f128, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
setOperationAction(ISD::ADDC, MVT::i32, Custom);
setOperationAction(ISD::ADDE, MVT::i32, Custom);
setOperationAction(ISD::SUBC, MVT::i32, Custom);
setOperationAction(ISD::SUBE, MVT::i32, Custom);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::ADDC, MVT::i64, Custom);
setOperationAction(ISD::ADDE, MVT::i64, Custom);
setOperationAction(ISD::SUBC, MVT::i64, Custom);
setOperationAction(ISD::SUBE, MVT::i64, Custom);
setOperationAction(ISD::BITCAST, MVT::f64, Expand);
setOperationAction(ISD::BITCAST, MVT::i64, Expand);
setOperationAction(ISD::SELECT, MVT::i64, Expand);
setOperationAction(ISD::SETCC, MVT::i64, Expand);
setOperationAction(ISD::BR_CC, MVT::i64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
setOperationAction(ISD::CTPOP, MVT::i64,
Subtarget->usePopc() ? Legal : Expand);
setOperationAction(ISD::CTTZ , MVT::i64, Expand);
setOperationAction(ISD::CTLZ , MVT::i64, Expand);
setOperationAction(ISD::BSWAP, MVT::i64, Expand);
setOperationAction(ISD::ROTL , MVT::i64, Expand);
setOperationAction(ISD::ROTR , MVT::i64, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
}
// ATOMICs.
// Atomics are supported on SparcV9. 32-bit atomics are also
// supported by some Leon SparcV8 variants. Otherwise, atomics
// are unsupported.
if (Subtarget->isV9())
setMaxAtomicSizeInBitsSupported(64);
else if (Subtarget->hasLeonCasa())
setMaxAtomicSizeInBitsSupported(32);
else
setMaxAtomicSizeInBitsSupported(0);
setMinCmpXchgSizeInBits(32);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Legal);
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Legal);
// Custom Lower Atomic LOAD/STORE
setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Legal);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Legal);
setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Custom);
}
if (!Subtarget->is64Bit()) {
// These libcalls are not available in 32-bit.
setLibcallName(RTLIB::MULO_I64, nullptr);
setLibcallName(RTLIB::SHL_I128, nullptr);
setLibcallName(RTLIB::SRL_I128, nullptr);
setLibcallName(RTLIB::SRA_I128, nullptr);
}
setLibcallName(RTLIB::MULO_I128, nullptr);
if (!Subtarget->isV9()) {
// SparcV8 does not have FNEGD and FABSD.
setOperationAction(ISD::FNEG, MVT::f64, Custom);
setOperationAction(ISD::FABS, MVT::f64, Custom);
}
setOperationAction(ISD::FSIN , MVT::f128, Expand);
setOperationAction(ISD::FCOS , MVT::f128, Expand);
setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
setOperationAction(ISD::FREM , MVT::f128, Expand);
setOperationAction(ISD::FMA , MVT::f128, Expand);
setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FREM , MVT::f64, Expand);
setOperationAction(ISD::FMA , MVT::f64, Expand);
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FREM , MVT::f32, Expand);
setOperationAction(ISD::FMA , MVT::f32, Expand);
setOperationAction(ISD::CTTZ , MVT::i32, Expand);
setOperationAction(ISD::CTLZ , MVT::i32, Expand);
setOperationAction(ISD::ROTL , MVT::i32, Expand);
setOperationAction(ISD::ROTR , MVT::i32, Expand);
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
setOperationAction(ISD::FPOW , MVT::f128, Expand);
setOperationAction(ISD::FPOW , MVT::f64, Expand);
setOperationAction(ISD::FPOW , MVT::f32, Expand);
setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
// Expands to [SU]MUL_LOHI.
setOperationAction(ISD::MULHU, MVT::i32, Expand);
setOperationAction(ISD::MULHS, MVT::i32, Expand);
setOperationAction(ISD::MUL, MVT::i32, Expand);
if (Subtarget->useSoftMulDiv()) {
// .umul works for both signed and unsigned
setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
setLibcallName(RTLIB::MUL_I32, ".umul");
setOperationAction(ISD::SDIV, MVT::i32, Expand);
setLibcallName(RTLIB::SDIV_I32, ".div");
setOperationAction(ISD::UDIV, MVT::i32, Expand);
setLibcallName(RTLIB::UDIV_I32, ".udiv");
setLibcallName(RTLIB::SREM_I32, ".rem");
setLibcallName(RTLIB::UREM_I32, ".urem");
}
if (Subtarget->is64Bit()) {
setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
setOperationAction(ISD::MULHU, MVT::i64, Expand);
setOperationAction(ISD::MULHS, MVT::i64, Expand);
setOperationAction(ISD::UMULO, MVT::i64, Custom);
setOperationAction(ISD::SMULO, MVT::i64, Custom);
setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
}
// VASTART needs to be custom lowered to use the VarArgsFrameIndex.
setOperationAction(ISD::VASTART , MVT::Other, Custom);
// VAARG needs to be lowered to not do unaligned accesses for doubles.
setOperationAction(ISD::VAARG , MVT::Other, Custom);
setOperationAction(ISD::TRAP , MVT::Other, Legal);
setOperationAction(ISD::DEBUGTRAP , MVT::Other, Legal);
// Use the default implementation.
setOperationAction(ISD::VACOPY , MVT::Other, Expand);
setOperationAction(ISD::VAEND , MVT::Other, Expand);
setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
setStackPointerRegisterToSaveRestore(SP::O6);
setOperationAction(ISD::CTPOP, MVT::i32,
Subtarget->usePopc() ? Legal : Expand);
if (Subtarget->isV9() && Subtarget->hasHardQuad()) {
setOperationAction(ISD::LOAD, MVT::f128, Legal);
setOperationAction(ISD::STORE, MVT::f128, Legal);
} else {
setOperationAction(ISD::LOAD, MVT::f128, Custom);
setOperationAction(ISD::STORE, MVT::f128, Custom);
}
if (Subtarget->hasHardQuad()) {
setOperationAction(ISD::FADD, MVT::f128, Legal);
setOperationAction(ISD::FSUB, MVT::f128, Legal);
setOperationAction(ISD::FMUL, MVT::f128, Legal);
setOperationAction(ISD::FDIV, MVT::f128, Legal);
setOperationAction(ISD::FSQRT, MVT::f128, Legal);
setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
if (Subtarget->isV9()) {
setOperationAction(ISD::FNEG, MVT::f128, Legal);
setOperationAction(ISD::FABS, MVT::f128, Legal);
} else {
setOperationAction(ISD::FNEG, MVT::f128, Custom);
setOperationAction(ISD::FABS, MVT::f128, Custom);
}
if (!Subtarget->is64Bit()) {
setLibcallName(RTLIB::FPTOSINT_F128_I64, "_Q_qtoll");
setLibcallName(RTLIB::FPTOUINT_F128_I64, "_Q_qtoull");
setLibcallName(RTLIB::SINTTOFP_I64_F128, "_Q_lltoq");
setLibcallName(RTLIB::UINTTOFP_I64_F128, "_Q_ulltoq");
}
} else {
// Custom legalize f128 operations.
setOperationAction(ISD::FADD, MVT::f128, Custom);
setOperationAction(ISD::FSUB, MVT::f128, Custom);
setOperationAction(ISD::FMUL, MVT::f128, Custom);
setOperationAction(ISD::FDIV, MVT::f128, Custom);
setOperationAction(ISD::FSQRT, MVT::f128, Custom);
setOperationAction(ISD::FNEG, MVT::f128, Custom);
setOperationAction(ISD::FABS, MVT::f128, Custom);
setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
// Setup Runtime library names.
if (Subtarget->is64Bit() && !Subtarget->useSoftFloat()) {
setLibcallName(RTLIB::ADD_F128, "_Qp_add");
setLibcallName(RTLIB::SUB_F128, "_Qp_sub");
setLibcallName(RTLIB::MUL_F128, "_Qp_mul");
setLibcallName(RTLIB::DIV_F128, "_Qp_div");
setLibcallName(RTLIB::SQRT_F128, "_Qp_sqrt");
setLibcallName(RTLIB::FPTOSINT_F128_I32, "_Qp_qtoi");
setLibcallName(RTLIB::FPTOUINT_F128_I32, "_Qp_qtoui");
setLibcallName(RTLIB::SINTTOFP_I32_F128, "_Qp_itoq");
setLibcallName(RTLIB::UINTTOFP_I32_F128, "_Qp_uitoq");
setLibcallName(RTLIB::FPTOSINT_F128_I64, "_Qp_qtox");
setLibcallName(RTLIB::FPTOUINT_F128_I64, "_Qp_qtoux");
setLibcallName(RTLIB::SINTTOFP_I64_F128, "_Qp_xtoq");
setLibcallName(RTLIB::UINTTOFP_I64_F128, "_Qp_uxtoq");
setLibcallName(RTLIB::FPEXT_F32_F128, "_Qp_stoq");
setLibcallName(RTLIB::FPEXT_F64_F128, "_Qp_dtoq");
setLibcallName(RTLIB::FPROUND_F128_F32, "_Qp_qtos");
setLibcallName(RTLIB::FPROUND_F128_F64, "_Qp_qtod");
} else if (!Subtarget->useSoftFloat()) {
setLibcallName(RTLIB::ADD_F128, "_Q_add");
setLibcallName(RTLIB::SUB_F128, "_Q_sub");
setLibcallName(RTLIB::MUL_F128, "_Q_mul");
setLibcallName(RTLIB::DIV_F128, "_Q_div");
setLibcallName(RTLIB::SQRT_F128, "_Q_sqrt");
setLibcallName(RTLIB::FPTOSINT_F128_I32, "_Q_qtoi");
setLibcallName(RTLIB::FPTOUINT_F128_I32, "_Q_qtou");
setLibcallName(RTLIB::SINTTOFP_I32_F128, "_Q_itoq");
setLibcallName(RTLIB::UINTTOFP_I32_F128, "_Q_utoq");
setLibcallName(RTLIB::FPTOSINT_F128_I64, "_Q_qtoll");
setLibcallName(RTLIB::FPTOUINT_F128_I64, "_Q_qtoull");
setLibcallName(RTLIB::SINTTOFP_I64_F128, "_Q_lltoq");
setLibcallName(RTLIB::UINTTOFP_I64_F128, "_Q_ulltoq");
setLibcallName(RTLIB::FPEXT_F32_F128, "_Q_stoq");
setLibcallName(RTLIB::FPEXT_F64_F128, "_Q_dtoq");
setLibcallName(RTLIB::FPROUND_F128_F32, "_Q_qtos");
setLibcallName(RTLIB::FPROUND_F128_F64, "_Q_qtod");
}
}
if (Subtarget->fixAllFDIVSQRT()) {
// Promote FDIVS and FSQRTS to FDIVD and FSQRTD instructions instead as
// the former instructions generate errata on LEON processors.
setOperationAction(ISD::FDIV, MVT::f32, Promote);
setOperationAction(ISD::FSQRT, MVT::f32, Promote);
}
if (Subtarget->hasNoFMULS()) {
setOperationAction(ISD::FMUL, MVT::f32, Promote);
}
// Custom combine bitcast between f64 and v2i32
if (!Subtarget->is64Bit())
setTargetDAGCombine(ISD::BITCAST);
if (Subtarget->hasLeonCycleCounter())
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setMinFunctionAlignment(Align(4));
computeRegisterProperties(Subtarget->getRegisterInfo());
}
bool SparcTargetLowering::useSoftFloat() const {
return Subtarget->useSoftFloat();
}
const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((SPISD::NodeType)Opcode) {
case SPISD::FIRST_NUMBER: break;
case SPISD::CMPICC: return "SPISD::CMPICC";
case SPISD::CMPFCC: return "SPISD::CMPFCC";
case SPISD::BRICC: return "SPISD::BRICC";
case SPISD::BRXCC: return "SPISD::BRXCC";
case SPISD::BRFCC: return "SPISD::BRFCC";
case SPISD::SELECT_ICC: return "SPISD::SELECT_ICC";
case SPISD::SELECT_XCC: return "SPISD::SELECT_XCC";
case SPISD::SELECT_FCC: return "SPISD::SELECT_FCC";
case SPISD::Hi: return "SPISD::Hi";
case SPISD::Lo: return "SPISD::Lo";
case SPISD::FTOI: return "SPISD::FTOI";
case SPISD::ITOF: return "SPISD::ITOF";
case SPISD::FTOX: return "SPISD::FTOX";
case SPISD::XTOF: return "SPISD::XTOF";
case SPISD::CALL: return "SPISD::CALL";
case SPISD::RET_FLAG: return "SPISD::RET_FLAG";
case SPISD::GLOBAL_BASE_REG: return "SPISD::GLOBAL_BASE_REG";
case SPISD::FLUSHW: return "SPISD::FLUSHW";
case SPISD::TLS_ADD: return "SPISD::TLS_ADD";
case SPISD::TLS_LD: return "SPISD::TLS_LD";
case SPISD::TLS_CALL: return "SPISD::TLS_CALL";
case SPISD::TAIL_CALL: return "SPISD::TAIL_CALL";
case SPISD::LOAD_GDOP: return "SPISD::LOAD_GDOP";
}
return nullptr;
}
EVT SparcTargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
EVT VT) const {
if (!VT.isVector())
return MVT::i32;
return VT.changeVectorElementTypeToInteger();
}
/// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
/// be zero. Op is expected to be a target specific node. Used by DAG
/// combiner.
void SparcTargetLowering::computeKnownBitsForTargetNode
(const SDValue Op,
KnownBits &Known,
const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth) const {
KnownBits Known2;
Known.resetAll();
switch (Op.getOpcode()) {
default: break;
case SPISD::SELECT_ICC:
case SPISD::SELECT_XCC:
case SPISD::SELECT_FCC:
Known = DAG.computeKnownBits(Op.getOperand(1), Depth + 1);
Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
// Only known if known in both the LHS and RHS.
Known = KnownBits::commonBits(Known, Known2);
break;
}
}
// Look at LHS/RHS/CC and see if they are a lowered setcc instruction. If so
// set LHS/RHS and SPCC to the LHS/RHS of the setcc and SPCC to the condition.
static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
ISD::CondCode CC, unsigned &SPCC) {
if (isNullConstant(RHS) &&
CC == ISD::SETNE &&
(((LHS.getOpcode() == SPISD::SELECT_ICC ||
LHS.getOpcode() == SPISD::SELECT_XCC) &&
LHS.getOperand(3).getOpcode() == SPISD::CMPICC) ||
(LHS.getOpcode() == SPISD::SELECT_FCC &&
LHS.getOperand(3).getOpcode() == SPISD::CMPFCC)) &&
isOneConstant(LHS.getOperand(0)) &&
isNullConstant(LHS.getOperand(1))) {
SDValue CMPCC = LHS.getOperand(3);
SPCC = cast<ConstantSDNode>(LHS.getOperand(2))->getZExtValue();
LHS = CMPCC.getOperand(0);
RHS = CMPCC.getOperand(1);
}
}
// Convert to a target node and set target flags.
SDValue SparcTargetLowering::withTargetFlags(SDValue Op, unsigned TF,
SelectionDAG &DAG) const {
if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op))
return DAG.getTargetGlobalAddress(GA->getGlobal(),
SDLoc(GA),
GA->getValueType(0),
GA->getOffset(), TF);
if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op))
return DAG.getTargetConstantPool(CP->getConstVal(), CP->getValueType(0),
CP->getAlign(), CP->getOffset(), TF);
if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op))
return DAG.getTargetBlockAddress(BA->getBlockAddress(),
Op.getValueType(),
0,
TF);
if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op))
return DAG.getTargetExternalSymbol(ES->getSymbol(),
ES->getValueType(0), TF);
llvm_unreachable("Unhandled address SDNode");
}
// Split Op into high and low parts according to HiTF and LoTF.
// Return an ADD node combining the parts.
SDValue SparcTargetLowering::makeHiLoPair(SDValue Op,
unsigned HiTF, unsigned LoTF,
SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
SDValue Hi = DAG.getNode(SPISD::Hi, DL, VT, withTargetFlags(Op, HiTF, DAG));
SDValue Lo = DAG.getNode(SPISD::Lo, DL, VT, withTargetFlags(Op, LoTF, DAG));
return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo);
}
// Build SDNodes for producing an address from a GlobalAddress, ConstantPool,
// or ExternalSymbol SDNode.
SDValue SparcTargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT VT = getPointerTy(DAG.getDataLayout());
// Handle PIC mode first. SPARC needs a got load for every variable!
if (isPositionIndependent()) {
const Module *M = DAG.getMachineFunction().getFunction().getParent();
PICLevel::Level picLevel = M->getPICLevel();
SDValue Idx;
if (picLevel == PICLevel::SmallPIC) {
// This is the pic13 code model, the GOT is known to be smaller than 8KiB.
Idx = DAG.getNode(SPISD::Lo, DL, Op.getValueType(),
withTargetFlags(Op, SparcMCExpr::VK_Sparc_GOT13, DAG));
} else {
// This is the pic32 code model, the GOT is known to be smaller than 4GB.
Idx = makeHiLoPair(Op, SparcMCExpr::VK_Sparc_GOT22,
SparcMCExpr::VK_Sparc_GOT10, DAG);
}
SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, DL, VT);
SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, VT, GlobalBase, Idx);
// GLOBAL_BASE_REG codegen'ed with call. Inform MFI that this
// function has calls.
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
MFI.setHasCalls(true);
return DAG.getLoad(VT, DL, DAG.getEntryNode(), AbsAddr,
MachinePointerInfo::getGOT(DAG.getMachineFunction()));
}
// This is one of the absolute code models.
switch(getTargetMachine().getCodeModel()) {
default:
llvm_unreachable("Unsupported absolute code model");
case CodeModel::Small:
// abs32.
return makeHiLoPair(Op, SparcMCExpr::VK_Sparc_HI,
SparcMCExpr::VK_Sparc_LO, DAG);
case CodeModel::Medium: {
// abs44.
SDValue H44 = makeHiLoPair(Op, SparcMCExpr::VK_Sparc_H44,
SparcMCExpr::VK_Sparc_M44, DAG);
H44 = DAG.getNode(ISD::SHL, DL, VT, H44, DAG.getConstant(12, DL, MVT::i32));
SDValue L44 = withTargetFlags(Op, SparcMCExpr::VK_Sparc_L44, DAG);
L44 = DAG.getNode(SPISD::Lo, DL, VT, L44);
return DAG.getNode(ISD::ADD, DL, VT, H44, L44);
}
case CodeModel::Large: {
// abs64.
SDValue Hi = makeHiLoPair(Op, SparcMCExpr::VK_Sparc_HH,
SparcMCExpr::VK_Sparc_HM, DAG);
Hi = DAG.getNode(ISD::SHL, DL, VT, Hi, DAG.getConstant(32, DL, MVT::i32));
SDValue Lo = makeHiLoPair(Op, SparcMCExpr::VK_Sparc_HI,
SparcMCExpr::VK_Sparc_LO, DAG);
return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo);
}
}
}
SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
return makeAddress(Op, DAG);
}
SDValue SparcTargetLowering::LowerConstantPool(SDValue Op,
SelectionDAG &DAG) const {
return makeAddress(Op, DAG);
}
SDValue SparcTargetLowering::LowerBlockAddress(SDValue Op,
SelectionDAG &DAG) const {
return makeAddress(Op, DAG);
}
SDValue SparcTargetLowering::LowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
if (DAG.getTarget().useEmulatedTLS())
return LowerToTLSEmulatedModel(GA, DAG);
SDLoc DL(GA);
const GlobalValue *GV = GA->getGlobal();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
TLSModel::Model model = getTargetMachine().getTLSModel(GV);
if (model == TLSModel::GeneralDynamic || model == TLSModel::LocalDynamic) {
unsigned HiTF = ((model == TLSModel::GeneralDynamic)
? SparcMCExpr::VK_Sparc_TLS_GD_HI22
: SparcMCExpr::VK_Sparc_TLS_LDM_HI22);
unsigned LoTF = ((model == TLSModel::GeneralDynamic)
? SparcMCExpr::VK_Sparc_TLS_GD_LO10
: SparcMCExpr::VK_Sparc_TLS_LDM_LO10);
unsigned addTF = ((model == TLSModel::GeneralDynamic)
? SparcMCExpr::VK_Sparc_TLS_GD_ADD
: SparcMCExpr::VK_Sparc_TLS_LDM_ADD);
unsigned callTF = ((model == TLSModel::GeneralDynamic)
? SparcMCExpr::VK_Sparc_TLS_GD_CALL
: SparcMCExpr::VK_Sparc_TLS_LDM_CALL);
SDValue HiLo = makeHiLoPair(Op, HiTF, LoTF, DAG);
SDValue Base = DAG.getNode(SPISD::GLOBAL_BASE_REG, DL, PtrVT);
SDValue Argument = DAG.getNode(SPISD::TLS_ADD, DL, PtrVT, Base, HiLo,
withTargetFlags(Op, addTF, DAG));
SDValue Chain = DAG.getEntryNode();
SDValue InFlag;
Chain = DAG.getCALLSEQ_START(Chain, 1, 0, DL);
Chain = DAG.getCopyToReg(Chain, DL, SP::O0, Argument, InFlag);
InFlag = Chain.getValue(1);
SDValue Callee = DAG.getTargetExternalSymbol("__tls_get_addr", PtrVT);
SDValue Symbol = withTargetFlags(Op, callTF, DAG);
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask(
DAG.getMachineFunction(), CallingConv::C);
assert(Mask && "Missing call preserved mask for calling convention");
SDValue Ops[] = {Chain,
Callee,
Symbol,
DAG.getRegister(SP::O0, PtrVT),
DAG.getRegisterMask(Mask),
InFlag};
Chain = DAG.getNode(SPISD::TLS_CALL, DL, NodeTys, Ops);
InFlag = Chain.getValue(1);
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(1, DL, true),
DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
InFlag = Chain.getValue(1);
SDValue Ret = DAG.getCopyFromReg(Chain, DL, SP::O0, PtrVT, InFlag);
if (model != TLSModel::LocalDynamic)
return Ret;
SDValue Hi = DAG.getNode(SPISD::Hi, DL, PtrVT,
withTargetFlags(Op, SparcMCExpr::VK_Sparc_TLS_LDO_HIX22, DAG));
SDValue Lo = DAG.getNode(SPISD::Lo, DL, PtrVT,
withTargetFlags(Op, SparcMCExpr::VK_Sparc_TLS_LDO_LOX10, DAG));
HiLo = DAG.getNode(ISD::XOR, DL, PtrVT, Hi, Lo);
return DAG.getNode(SPISD::TLS_ADD, DL, PtrVT, Ret, HiLo,
withTargetFlags(Op, SparcMCExpr::VK_Sparc_TLS_LDO_ADD, DAG));
}
if (model == TLSModel::InitialExec) {
unsigned ldTF = ((PtrVT == MVT::i64)? SparcMCExpr::VK_Sparc_TLS_IE_LDX
: SparcMCExpr::VK_Sparc_TLS_IE_LD);
SDValue Base = DAG.getNode(SPISD::GLOBAL_BASE_REG, DL, PtrVT);
// GLOBAL_BASE_REG codegen'ed with call. Inform MFI that this
// function has calls.
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
MFI.setHasCalls(true);
SDValue TGA = makeHiLoPair(Op,
SparcMCExpr::VK_Sparc_TLS_IE_HI22,
SparcMCExpr::VK_Sparc_TLS_IE_LO10, DAG);
SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Base, TGA);
SDValue Offset = DAG.getNode(SPISD::TLS_LD,
DL, PtrVT, Ptr,
withTargetFlags(Op, ldTF, DAG));
return DAG.getNode(SPISD::TLS_ADD, DL, PtrVT,
DAG.getRegister(SP::G7, PtrVT), Offset,
withTargetFlags(Op,
SparcMCExpr::VK_Sparc_TLS_IE_ADD, DAG));
}
assert(model == TLSModel::LocalExec);
SDValue Hi = DAG.getNode(SPISD::Hi, DL, PtrVT,
withTargetFlags(Op, SparcMCExpr::VK_Sparc_TLS_LE_HIX22, DAG));
SDValue Lo = DAG.getNode(SPISD::Lo, DL, PtrVT,
withTargetFlags(Op, SparcMCExpr::VK_Sparc_TLS_LE_LOX10, DAG));
SDValue Offset = DAG.getNode(ISD::XOR, DL, PtrVT, Hi, Lo);
return DAG.getNode(ISD::ADD, DL, PtrVT,
DAG.getRegister(SP::G7, PtrVT), Offset);
}
SDValue SparcTargetLowering::LowerF128_LibCallArg(SDValue Chain,
ArgListTy &Args, SDValue Arg,
const SDLoc &DL,
SelectionDAG &DAG) const {
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
EVT ArgVT = Arg.getValueType();
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
ArgListEntry Entry;
Entry.Node = Arg;
Entry.Ty = ArgTy;
if (ArgTy->isFP128Ty()) {
// Create a stack object and pass the pointer to the library function.
int FI = MFI.CreateStackObject(16, Align(8), false);
SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
Chain = DAG.getStore(Chain, DL, Entry.Node, FIPtr, MachinePointerInfo(),
Align(8));
Entry.Node = FIPtr;
Entry.Ty = PointerType::getUnqual(ArgTy);
}
Args.push_back(Entry);
return Chain;
}
SDValue
SparcTargetLowering::LowerF128Op(SDValue Op, SelectionDAG &DAG,
const char *LibFuncName,
unsigned numArgs) const {
ArgListTy Args;
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
auto PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Callee = DAG.getExternalSymbol(LibFuncName, PtrVT);
Type *RetTy = Op.getValueType().getTypeForEVT(*DAG.getContext());
Type *RetTyABI = RetTy;
SDValue Chain = DAG.getEntryNode();
SDValue RetPtr;
if (RetTy->isFP128Ty()) {
// Create a Stack Object to receive the return value of type f128.
ArgListEntry Entry;
int RetFI = MFI.CreateStackObject(16, Align(8), false);
RetPtr = DAG.getFrameIndex(RetFI, PtrVT);
Entry.Node = RetPtr;
Entry.Ty = PointerType::getUnqual(RetTy);
if (!Subtarget->is64Bit()) {
Entry.IsSRet = true;
Entry.IndirectType = RetTy;
}
Entry.IsReturned = false;
Args.push_back(Entry);
RetTyABI = Type::getVoidTy(*DAG.getContext());
}
assert(Op->getNumOperands() >= numArgs && "Not enough operands!");
for (unsigned i = 0, e = numArgs; i != e; ++i) {
Chain = LowerF128_LibCallArg(Chain, Args, Op.getOperand(i), SDLoc(Op), DAG);
}
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(SDLoc(Op)).setChain(Chain)
.setCallee(CallingConv::C, RetTyABI, Callee, std::move(Args));
std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
// chain is in second result.
if (RetTyABI == RetTy)
return CallInfo.first;
assert (RetTy->isFP128Ty() && "Unexpected return type!");
Chain = CallInfo.second;
// Load RetPtr to get the return value.
return DAG.getLoad(Op.getValueType(), SDLoc(Op), Chain, RetPtr,
MachinePointerInfo(), Align(8));
}
SDValue SparcTargetLowering::LowerF128Compare(SDValue LHS, SDValue RHS,
unsigned &SPCC, const SDLoc &DL,
SelectionDAG &DAG) const {
const char *LibCall = nullptr;
bool is64Bit = Subtarget->is64Bit();
switch(SPCC) {
default: llvm_unreachable("Unhandled conditional code!");
case SPCC::FCC_E : LibCall = is64Bit? "_Qp_feq" : "_Q_feq"; break;
case SPCC::FCC_NE : LibCall = is64Bit? "_Qp_fne" : "_Q_fne"; break;
case SPCC::FCC_L : LibCall = is64Bit? "_Qp_flt" : "_Q_flt"; break;
case SPCC::FCC_G : LibCall = is64Bit? "_Qp_fgt" : "_Q_fgt"; break;
case SPCC::FCC_LE : LibCall = is64Bit? "_Qp_fle" : "_Q_fle"; break;
case SPCC::FCC_GE : LibCall = is64Bit? "_Qp_fge" : "_Q_fge"; break;
case SPCC::FCC_UL :
case SPCC::FCC_ULE:
case SPCC::FCC_UG :
case SPCC::FCC_UGE:
case SPCC::FCC_U :
case SPCC::FCC_O :
case SPCC::FCC_LG :
case SPCC::FCC_UE : LibCall = is64Bit? "_Qp_cmp" : "_Q_cmp"; break;
}
auto PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Callee = DAG.getExternalSymbol(LibCall, PtrVT);
Type *RetTy = Type::getInt32Ty(*DAG.getContext());
ArgListTy Args;
SDValue Chain = DAG.getEntryNode();
Chain = LowerF128_LibCallArg(Chain, Args, LHS, DL, DAG);
Chain = LowerF128_LibCallArg(Chain, Args, RHS, DL, DAG);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(DL).setChain(Chain)
.setCallee(CallingConv::C, RetTy, Callee, std::move(Args));
std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
// result is in first, and chain is in second result.
SDValue Result = CallInfo.first;
switch(SPCC) {
default: {
SDValue RHS = DAG.getConstant(0, DL, Result.getValueType());
SPCC = SPCC::ICC_NE;
return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
}
case SPCC::FCC_UL : {
SDValue Mask = DAG.getConstant(1, DL, Result.getValueType());
Result = DAG.getNode(ISD::AND, DL, Result.getValueType(), Result, Mask);
SDValue RHS = DAG.getConstant(0, DL, Result.getValueType());
SPCC = SPCC::ICC_NE;
return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
}
case SPCC::FCC_ULE: {
SDValue RHS = DAG.getConstant(2, DL, Result.getValueType());
SPCC = SPCC::ICC_NE;
return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
}
case SPCC::FCC_UG : {
SDValue RHS = DAG.getConstant(1, DL, Result.getValueType());
SPCC = SPCC::ICC_G;
return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
}
case SPCC::FCC_UGE: {
SDValue RHS = DAG.getConstant(1, DL, Result.getValueType());
SPCC = SPCC::ICC_NE;
return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
}
case SPCC::FCC_U : {
SDValue RHS = DAG.getConstant(3, DL, Result.getValueType());
SPCC = SPCC::ICC_E;
return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
}
case SPCC::FCC_O : {
SDValue RHS = DAG.getConstant(3, DL, Result.getValueType());
SPCC = SPCC::ICC_NE;
return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
}
case SPCC::FCC_LG : {
SDValue Mask = DAG.getConstant(3, DL, Result.getValueType());
Result = DAG.getNode(ISD::AND, DL, Result.getValueType(), Result, Mask);
SDValue RHS = DAG.getConstant(0, DL, Result.getValueType());
SPCC = SPCC::ICC_NE;
return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
}
case SPCC::FCC_UE : {
SDValue Mask = DAG.getConstant(3, DL, Result.getValueType());
Result = DAG.getNode(ISD::AND, DL, Result.getValueType(), Result, Mask);
SDValue RHS = DAG.getConstant(0, DL, Result.getValueType());
SPCC = SPCC::ICC_E;
return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
}
}
}
static SDValue
LowerF128_FPEXTEND(SDValue Op, SelectionDAG &DAG,
const SparcTargetLowering &TLI) {
if (Op.getOperand(0).getValueType() == MVT::f64)
return TLI.LowerF128Op(Op, DAG,
TLI.getLibcallName(RTLIB::FPEXT_F64_F128), 1);
if (Op.getOperand(0).getValueType() == MVT::f32)
return TLI.LowerF128Op(Op, DAG,
TLI.getLibcallName(RTLIB::FPEXT_F32_F128), 1);
llvm_unreachable("fpextend with non-float operand!");
return SDValue();
}
static SDValue
LowerF128_FPROUND(SDValue Op, SelectionDAG &DAG,
const SparcTargetLowering &TLI) {
// FP_ROUND on f64 and f32 are legal.
if (Op.getOperand(0).getValueType() != MVT::f128)
return Op;
if (Op.getValueType() == MVT::f64)
return TLI.LowerF128Op(Op, DAG,
TLI.getLibcallName(RTLIB::FPROUND_F128_F64), 1);
if (Op.getValueType() == MVT::f32)
return TLI.LowerF128Op(Op, DAG,
TLI.getLibcallName(RTLIB::FPROUND_F128_F32), 1);
llvm_unreachable("fpround to non-float!");
return SDValue();
}
static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG,
const SparcTargetLowering &TLI,
bool hasHardQuad) {
SDLoc dl(Op);
EVT VT = Op.getValueType();
assert(VT == MVT::i32 || VT == MVT::i64);
// Expand f128 operations to fp128 abi calls.
if (Op.getOperand(0).getValueType() == MVT::f128
&& (!hasHardQuad || !TLI.isTypeLegal(VT))) {
const char *libName = TLI.getLibcallName(VT == MVT::i32
? RTLIB::FPTOSINT_F128_I32
: RTLIB::FPTOSINT_F128_I64);
return TLI.LowerF128Op(Op, DAG, libName, 1);
}
// Expand if the resulting type is illegal.
if (!TLI.isTypeLegal(VT))
return SDValue();
// Otherwise, Convert the fp value to integer in an FP register.
if (VT == MVT::i32)
Op = DAG.getNode(SPISD::FTOI, dl, MVT::f32, Op.getOperand(0));
else
Op = DAG.getNode(SPISD::FTOX, dl, MVT::f64, Op.getOperand(0));
return DAG.getNode(ISD::BITCAST, dl, VT, Op);
}
static SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG,
const SparcTargetLowering &TLI,
bool hasHardQuad) {
SDLoc dl(Op);
EVT OpVT = Op.getOperand(0).getValueType();
assert(OpVT == MVT::i32 || (OpVT == MVT::i64));
EVT floatVT = (OpVT == MVT::i32) ? MVT::f32 : MVT::f64;
// Expand f128 operations to fp128 ABI calls.
if (Op.getValueType() == MVT::f128
&& (!hasHardQuad || !TLI.isTypeLegal(OpVT))) {
const char *libName = TLI.getLibcallName(OpVT == MVT::i32
? RTLIB::SINTTOFP_I32_F128
: RTLIB::SINTTOFP_I64_F128);
return TLI.LowerF128Op(Op, DAG, libName, 1);
}
// Expand if the operand type is illegal.
if (!TLI.isTypeLegal(OpVT))
return SDValue();
// Otherwise, Convert the int value to FP in an FP register.
SDValue Tmp = DAG.getNode(ISD::BITCAST, dl, floatVT, Op.getOperand(0));
unsigned opcode = (OpVT == MVT::i32)? SPISD::ITOF : SPISD::XTOF;
return DAG.getNode(opcode, dl, Op.getValueType(), Tmp);
}
static SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG,
const SparcTargetLowering &TLI,
bool hasHardQuad) {
SDLoc dl(Op);
EVT VT = Op.getValueType();
// Expand if it does not involve f128 or the target has support for
// quad floating point instructions and the resulting type is legal.
if (Op.getOperand(0).getValueType() != MVT::f128 ||
(hasHardQuad && TLI.isTypeLegal(VT)))
return SDValue();
assert(VT == MVT::i32 || VT == MVT::i64);
return TLI.LowerF128Op(Op, DAG,
TLI.getLibcallName(VT == MVT::i32
? RTLIB::FPTOUINT_F128_I32
: RTLIB::FPTOUINT_F128_I64),
1);
}
static SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG,
const SparcTargetLowering &TLI,
bool hasHardQuad) {
SDLoc dl(Op);
EVT OpVT = Op.getOperand(0).getValueType();
assert(OpVT == MVT::i32 || OpVT == MVT::i64);
// Expand if it does not involve f128 or the target has support for
// quad floating point instructions and the operand type is legal.
if (Op.getValueType() != MVT::f128 || (hasHardQuad && TLI.isTypeLegal(OpVT)))
return SDValue();
return TLI.LowerF128Op(Op, DAG,
TLI.getLibcallName(OpVT == MVT::i32
? RTLIB::UINTTOFP_I32_F128
: RTLIB::UINTTOFP_I64_F128),
1);
}
static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG,
const SparcTargetLowering &TLI,
bool hasHardQuad) {
SDValue Chain = Op.getOperand(0);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
SDValue LHS = Op.getOperand(2);
SDValue RHS = Op.getOperand(3);
SDValue Dest = Op.getOperand(4);
SDLoc dl(Op);
unsigned Opc, SPCC = ~0U;
// If this is a br_cc of a "setcc", and if the setcc got lowered into
// an CMP[IF]CC/SELECT_[IF]CC pair, find the original compared values.
LookThroughSetCC(LHS, RHS, CC, SPCC);
// Get the condition flag.
SDValue CompareFlag;
if (LHS.getValueType().isInteger()) {
CompareFlag = DAG.getNode(SPISD::CMPICC, dl, MVT::Glue, LHS, RHS);
if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
// 32-bit compares use the icc flags, 64-bit uses the xcc flags.
Opc = LHS.getValueType() == MVT::i32 ? SPISD::BRICC : SPISD::BRXCC;
} else {
if (!hasHardQuad && LHS.getValueType() == MVT::f128) {
if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
CompareFlag = TLI.LowerF128Compare(LHS, RHS, SPCC, dl, DAG);
Opc = SPISD::BRICC;
} else {
CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
Opc = SPISD::BRFCC;
}
}
return DAG.getNode(Opc, dl, MVT::Other, Chain, Dest,
DAG.getConstant(SPCC, dl, MVT::i32), CompareFlag);
}
static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
const SparcTargetLowering &TLI,
bool hasHardQuad) {
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
SDValue TrueVal = Op.getOperand(2);
SDValue FalseVal = Op.getOperand(3);
SDLoc dl(Op);
unsigned Opc, SPCC = ~0U;
// If this is a select_cc of a "setcc", and if the setcc got lowered into
// an CMP[IF]CC/SELECT_[IF]CC pair, find the original compared values.
LookThroughSetCC(LHS, RHS, CC, SPCC);
SDValue CompareFlag;
if (LHS.getValueType().isInteger()) {
CompareFlag = DAG.getNode(SPISD::CMPICC, dl, MVT::Glue, LHS, RHS);
Opc = LHS.getValueType() == MVT::i32 ?
SPISD::SELECT_ICC : SPISD::SELECT_XCC;
if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
} else {
if (!hasHardQuad && LHS.getValueType() == MVT::f128) {
if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
CompareFlag = TLI.LowerF128Compare(LHS, RHS, SPCC, dl, DAG);
Opc = SPISD::SELECT_ICC;
} else {
CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
Opc = SPISD::SELECT_FCC;
if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
}
}
return DAG.getNode(Opc, dl, TrueVal.getValueType(), TrueVal, FalseVal,
DAG.getConstant(SPCC, dl, MVT::i32), CompareFlag);
}
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
const SparcTargetLowering &TLI) {
MachineFunction &MF = DAG.getMachineFunction();
SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
auto PtrVT = TLI.getPointerTy(DAG.getDataLayout());
// Need frame address to find the address of VarArgsFrameIndex.
MF.getFrameInfo().setFrameAddressIsTaken(true);
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
SDLoc DL(Op);
SDValue Offset =
DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getRegister(SP::I6, PtrVT),
DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL));
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1),
MachinePointerInfo(SV));
}
static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
SDNode *Node = Op.getNode();
EVT VT = Node->getValueType(0);
SDValue InChain = Node->getOperand(0);
SDValue VAListPtr = Node->getOperand(1);
EVT PtrVT = VAListPtr.getValueType();
const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
SDLoc DL(Node);
SDValue VAList =
DAG.getLoad(PtrVT, DL, InChain, VAListPtr, MachinePointerInfo(SV));
// Increment the pointer, VAList, to the next vaarg.
SDValue NextPtr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
DAG.getIntPtrConstant(VT.getSizeInBits()/8,
DL));
// Store the incremented VAList to the legalized pointer.
InChain = DAG.getStore(VAList.getValue(1), DL, NextPtr, VAListPtr,
MachinePointerInfo(SV));
// Load the actual argument out of the pointer VAList.
// We can't count on greater alignment than the word size.
return DAG.getLoad(
VT, DL, InChain, VAList, MachinePointerInfo(),
std::min(PtrVT.getFixedSizeInBits(), VT.getFixedSizeInBits()) / 8);
}
static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
const SparcSubtarget *Subtarget) {
SDValue Chain = Op.getOperand(0); // Legalize the chain.
SDValue Size = Op.getOperand(1); // Legalize the size.
MaybeAlign Alignment =
cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
Align StackAlign = Subtarget->getFrameLowering()->getStackAlign();
EVT VT = Size->getValueType(0);
SDLoc dl(Op);
// TODO: implement over-aligned alloca. (Note: also implies
// supporting support for overaligned function frames + dynamic
// allocations, at all, which currently isn't supported)
if (Alignment && *Alignment > StackAlign) {
const MachineFunction &MF = DAG.getMachineFunction();
report_fatal_error("Function \"" + Twine(MF.getName()) + "\": "
"over-aligned dynamic alloca not supported.");
}
// The resultant pointer needs to be above the register spill area
// at the bottom of the stack.
unsigned regSpillArea;
if (Subtarget->is64Bit()) {
regSpillArea = 128;
} else {
// On Sparc32, the size of the spill area is 92. Unfortunately,
// that's only 4-byte aligned, not 8-byte aligned (the stack
// pointer is 8-byte aligned). So, if the user asked for an 8-byte
// aligned dynamic allocation, we actually need to add 96 to the
// bottom of the stack, instead of 92, to ensure 8-byte alignment.
// That also means adding 4 to the size of the allocation --
// before applying the 8-byte rounding. Unfortunately, we the
// value we get here has already had rounding applied. So, we need
// to add 8, instead, wasting a bit more memory.
// Further, this only actually needs to be done if the required
// alignment is > 4, but, we've lost that info by this point, too,
// so we always apply it.
// (An alternative approach would be to always reserve 96 bytes
// instead of the required 92, but then we'd waste 4 extra bytes
// in every frame, not just those with dynamic stack allocations)
// TODO: modify code in SelectionDAGBuilder to make this less sad.
Size = DAG.getNode(ISD::ADD, dl, VT, Size,
DAG.getConstant(8, dl, VT));
regSpillArea = 96;
}
unsigned SPReg = SP::O6;
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
SDValue NewSP = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
Chain = DAG.getCopyToReg(SP.getValue(1), dl, SPReg, NewSP); // Output chain
regSpillArea += Subtarget->getStackPointerBias();
SDValue NewVal = DAG.getNode(ISD::ADD, dl, VT, NewSP,
DAG.getConstant(regSpillArea, dl, VT));
SDValue Ops[2] = { NewVal, Chain };
return DAG.getMergeValues(Ops, dl);
}
static SDValue getFLUSHW(SDValue Op, SelectionDAG &DAG) {
SDLoc dl(Op);
SDValue Chain = DAG.getNode(SPISD::FLUSHW,
dl, MVT::Other, DAG.getEntryNode());
return Chain;
}
static SDValue getFRAMEADDR(uint64_t depth, SDValue Op, SelectionDAG &DAG,
const SparcSubtarget *Subtarget,
bool AlwaysFlush = false) {
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
MFI.setFrameAddressIsTaken(true);
EVT VT = Op.getValueType();
SDLoc dl(Op);
unsigned FrameReg = SP::I6;
unsigned stackBias = Subtarget->getStackPointerBias();
SDValue FrameAddr;
SDValue Chain;
// flush first to make sure the windowed registers' values are in stack
Chain = (depth || AlwaysFlush) ? getFLUSHW(Op, DAG) : DAG.getEntryNode();
FrameAddr = DAG.getCopyFromReg(Chain, dl, FrameReg, VT);
unsigned Offset = (Subtarget->is64Bit()) ? (stackBias + 112) : 56;
while (depth--) {
SDValue Ptr = DAG.getNode(ISD::ADD, dl, VT, FrameAddr,
DAG.getIntPtrConstant(Offset, dl));
FrameAddr = DAG.getLoad(VT, dl, Chain, Ptr, MachinePointerInfo());
}
if (Subtarget->is64Bit())
FrameAddr = DAG.getNode(ISD::ADD, dl, VT, FrameAddr,
DAG.getIntPtrConstant(stackBias, dl));
return FrameAddr;
}
static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG,
const SparcSubtarget *Subtarget) {
uint64_t depth = Op.getConstantOperandVal(0);
return getFRAMEADDR(depth, Op, DAG, Subtarget);
}
static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG,
const SparcTargetLowering &TLI,
const SparcSubtarget *Subtarget) {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
MFI.setReturnAddressIsTaken(true);
if (TLI.verifyReturnAddressArgumentIsConstant(Op, DAG))
return SDValue();
EVT VT = Op.getValueType();
SDLoc dl(Op);
uint64_t depth = Op.getConstantOperandVal(0);
SDValue RetAddr;
if (depth == 0) {
auto PtrVT = TLI.getPointerTy(DAG.getDataLayout());
Register RetReg = MF.addLiveIn(SP::I7, TLI.getRegClassFor(PtrVT));
RetAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, RetReg, VT);
return RetAddr;
}
// Need frame address to find return address of the caller.
SDValue FrameAddr = getFRAMEADDR(depth - 1, Op, DAG, Subtarget, true);
unsigned Offset = (Subtarget->is64Bit()) ? 120 : 60;
SDValue Ptr = DAG.getNode(ISD::ADD,
dl, VT,
FrameAddr,
DAG.getIntPtrConstant(Offset, dl));
RetAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), Ptr, MachinePointerInfo());
return RetAddr;
}
static SDValue LowerF64Op(SDValue SrcReg64, const SDLoc &dl, SelectionDAG &DAG,
unsigned opcode) {
assert(SrcReg64.getValueType() == MVT::f64 && "LowerF64Op called on non-double!");
assert(opcode == ISD::FNEG || opcode == ISD::FABS);
// Lower fneg/fabs on f64 to fneg/fabs on f32.
// fneg f64 => fneg f32:sub_even, fmov f32:sub_odd.
// fabs f64 => fabs f32:sub_even, fmov f32:sub_odd.
// Note: in little-endian, the floating-point value is stored in the
// registers are in the opposite order, so the subreg with the sign
// bit is the highest-numbered (odd), rather than the
// lowest-numbered (even).
SDValue Hi32 = DAG.getTargetExtractSubreg(SP::sub_even, dl, MVT::f32,
SrcReg64);
SDValue Lo32 = DAG.getTargetExtractSubreg(SP::sub_odd, dl, MVT::f32,
SrcReg64);
if (DAG.getDataLayout().isLittleEndian())
Lo32 = DAG.getNode(opcode, dl, MVT::f32, Lo32);
else
Hi32 = DAG.getNode(opcode, dl, MVT::f32, Hi32);
SDValue DstReg64 = SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF,
dl, MVT::f64), 0);
DstReg64 = DAG.getTargetInsertSubreg(SP::sub_even, dl, MVT::f64,
DstReg64, Hi32);
DstReg64 = DAG.getTargetInsertSubreg(SP::sub_odd, dl, MVT::f64,
DstReg64, Lo32);
return DstReg64;
}
// Lower a f128 load into two f64 loads.
static SDValue LowerF128Load(SDValue Op, SelectionDAG &DAG)
{
SDLoc dl(Op);
LoadSDNode *LdNode = cast<LoadSDNode>(Op.getNode());
assert(LdNode->getOffset().isUndef() && "Unexpected node type");
Align Alignment = commonAlignment(LdNode->getOriginalAlign(), 8);
SDValue Hi64 =
DAG.getLoad(MVT::f64, dl, LdNode->getChain(), LdNode->getBasePtr(),
LdNode->getPointerInfo(), Alignment);
EVT addrVT = LdNode->getBasePtr().getValueType();
SDValue LoPtr = DAG.getNode(ISD::ADD, dl, addrVT,
LdNode->getBasePtr(),
DAG.getConstant(8, dl, addrVT));
SDValue Lo64 = DAG.getLoad(MVT::f64, dl, LdNode->getChain(), LoPtr,
LdNode->getPointerInfo().getWithOffset(8),
Alignment);
SDValue SubRegEven = DAG.getTargetConstant(SP::sub_even64, dl, MVT::i32);
SDValue SubRegOdd = DAG.getTargetConstant(SP::sub_odd64, dl, MVT::i32);
SDNode *InFP128 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF,
dl, MVT::f128);
InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl,
MVT::f128,
SDValue(InFP128, 0),
Hi64,
SubRegEven);
InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl,
MVT::f128,
SDValue(InFP128, 0),
Lo64,
SubRegOdd);
SDValue OutChains[2] = { SDValue(Hi64.getNode(), 1),
SDValue(Lo64.getNode(), 1) };
SDValue OutChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
SDValue Ops[2] = {SDValue(InFP128,0), OutChain};
return DAG.getMergeValues(Ops, dl);
}
static SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG)
{
LoadSDNode *LdNode = cast<LoadSDNode>(Op.getNode());
EVT MemVT = LdNode->getMemoryVT();
if (MemVT == MVT::f128)
return LowerF128Load(Op, DAG);
return Op;
}
// Lower a f128 store into two f64 stores.
static SDValue LowerF128Store(SDValue Op, SelectionDAG &DAG) {
SDLoc dl(Op);
StoreSDNode *StNode = cast<StoreSDNode>(Op.getNode());
assert(StNode->getOffset().isUndef() && "Unexpected node type");
SDValue SubRegEven = DAG.getTargetConstant(SP::sub_even64, dl, MVT::i32);
SDValue SubRegOdd = DAG.getTargetConstant(SP::sub_odd64, dl, MVT::i32);
SDNode *Hi64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG,
dl,
MVT::f64,
StNode->getValue(),
SubRegEven);
SDNode *Lo64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG,
dl,
MVT::f64,
StNode->getValue(),
SubRegOdd);
Align Alignment = commonAlignment(StNode->getOriginalAlign(), 8);
SDValue OutChains[2];
OutChains[0] =
DAG.getStore(StNode->getChain(), dl, SDValue(Hi64, 0),
StNode->getBasePtr(), StNode->getPointerInfo(),
Alignment);
EVT addrVT = StNode->getBasePtr().getValueType();
SDValue LoPtr = DAG.getNode(ISD::ADD, dl, addrVT,
StNode->getBasePtr(),
DAG.getConstant(8, dl, addrVT));
OutChains[1] = DAG.getStore(StNode->getChain(), dl, SDValue(Lo64, 0), LoPtr,
StNode->getPointerInfo().getWithOffset(8),
Alignment);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
}
static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG)
{
SDLoc dl(Op);
StoreSDNode *St = cast<StoreSDNode>(Op.getNode());
EVT MemVT = St->getMemoryVT();
if (MemVT == MVT::f128)
return LowerF128Store(Op, DAG);
if (MemVT == MVT::i64) {
// Custom handling for i64 stores: turn it into a bitcast and a
// v2i32 store.
SDValue Val = DAG.getNode(ISD::BITCAST, dl, MVT::v2i32, St->getValue());
SDValue Chain = DAG.getStore(
St->getChain(), dl, Val, St->getBasePtr(), St->getPointerInfo(),
St->getOriginalAlign(), St->getMemOperand()->getFlags(),
St->getAAInfo());
return Chain;
}
return SDValue();
}
static SDValue LowerFNEGorFABS(SDValue Op, SelectionDAG &DAG, bool isV9) {
assert((Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::FABS)
&& "invalid opcode");
SDLoc dl(Op);
if (Op.getValueType() == MVT::f64)
return LowerF64Op(Op.getOperand(0), dl, DAG, Op.getOpcode());
if (Op.getValueType() != MVT::f128)
return Op;
// Lower fabs/fneg on f128 to fabs/fneg on f64
// fabs/fneg f128 => fabs/fneg f64:sub_even64, fmov f64:sub_odd64
// (As with LowerF64Op, on little-endian, we need to negate the odd
// subreg)
SDValue SrcReg128 = Op.getOperand(0);
SDValue Hi64 = DAG.getTargetExtractSubreg(SP::sub_even64, dl, MVT::f64,
SrcReg128);
SDValue Lo64 = DAG.getTargetExtractSubreg(SP::sub_odd64, dl, MVT::f64,
SrcReg128);
if (DAG.getDataLayout().isLittleEndian()) {
if (isV9)
Lo64 = DAG.getNode(Op.getOpcode(), dl, MVT::f64, Lo64);
else
Lo64 = LowerF64Op(Lo64, dl, DAG, Op.getOpcode());
} else {
if (isV9)
Hi64 = DAG.getNode(Op.getOpcode(), dl, MVT::f64, Hi64);
else
Hi64 = LowerF64Op(Hi64, dl, DAG, Op.getOpcode());
}
SDValue DstReg128 = SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF,
dl, MVT::f128), 0);
DstReg128 = DAG.getTargetInsertSubreg(SP::sub_even64, dl, MVT::f128,
DstReg128, Hi64);
DstReg128 = DAG.getTargetInsertSubreg(SP::sub_odd64, dl, MVT::f128,
DstReg128, Lo64);
return DstReg128;
}
static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
if (Op.getValueType() != MVT::i64)
return Op;
SDLoc dl(Op);
SDValue Src1 = Op.getOperand(0);
SDValue Src1Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src1);
SDValue Src1Hi = DAG.getNode(ISD::SRL, dl, MVT::i64, Src1,
DAG.getConstant(32, dl, MVT::i64));
Src1Hi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src1Hi);
SDValue Src2 = Op.getOperand(1);
SDValue Src2Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src2);
SDValue Src2Hi = DAG.getNode(ISD::SRL, dl, MVT::i64, Src2,
DAG.getConstant(32, dl, MVT::i64));
Src2Hi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src2Hi);
bool hasChain = false;
unsigned hiOpc = Op.getOpcode();
switch (Op.getOpcode()) {
default: llvm_unreachable("Invalid opcode");
case ISD::ADDC: hiOpc = ISD::ADDE; break;
case ISD::ADDE: hasChain = true; break;
case ISD::SUBC: hiOpc = ISD::SUBE; break;
case ISD::SUBE: hasChain = true; break;
}
SDValue Lo;
SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Glue);
if (hasChain) {
Lo = DAG.getNode(Op.getOpcode(), dl, VTs, Src1Lo, Src2Lo,
Op.getOperand(2));
} else {
Lo = DAG.getNode(Op.getOpcode(), dl, VTs, Src1Lo, Src2Lo);
}
SDValue Hi = DAG.getNode(hiOpc, dl, VTs, Src1Hi, Src2Hi, Lo.getValue(1));
SDValue Carry = Hi.getValue(1);
Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Lo);
Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Hi);
Hi = DAG.getNode(ISD::SHL, dl, MVT::i64, Hi,
DAG.getConstant(32, dl, MVT::i64));
SDValue Dst = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, Lo);
SDValue Ops[2] = { Dst, Carry };
return DAG.getMergeValues(Ops, dl);
}
// Custom lower UMULO/SMULO for SPARC. This code is similar to ExpandNode()
// in LegalizeDAG.cpp except the order of arguments to the library function.
static SDValue LowerUMULO_SMULO(SDValue Op, SelectionDAG &DAG,
const SparcTargetLowering &TLI)
{
unsigned opcode = Op.getOpcode();
assert((opcode == ISD::UMULO || opcode == ISD::SMULO) && "Invalid Opcode.");
bool isSigned = (opcode == ISD::SMULO);
EVT VT = MVT::i64;
EVT WideVT = MVT::i128;
SDLoc dl(Op);
SDValue LHS = Op.getOperand(0);
if (LHS.getValueType() != VT)
return Op;
SDValue ShiftAmt = DAG.getConstant(63, dl, VT);
SDValue RHS = Op.getOperand(1);
SDValue HiLHS, HiRHS;
if (isSigned) {
HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, ShiftAmt);
HiRHS = DAG.getNode(ISD::SRA, dl, MVT::i64, RHS, ShiftAmt);
} else {
HiLHS = DAG.getConstant(0, dl, VT);
HiRHS = DAG.getConstant(0, dl, MVT::i64);
}
SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setSExt(isSigned);
SDValue MulResult = TLI.makeLibCall(DAG,
RTLIB::MUL_I128, WideVT,
Args, CallOptions, dl).first;
SDValue BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT,
MulResult, DAG.getIntPtrConstant(0, dl));
SDValue TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT,
MulResult, DAG.getIntPtrConstant(1, dl));
if (isSigned) {
SDValue Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
TopHalf = DAG.getSetCC(dl, MVT::i32, TopHalf, Tmp1, ISD::SETNE);
} else {
TopHalf = DAG.getSetCC(dl, MVT::i32, TopHalf, DAG.getConstant(0, dl, VT),
ISD::SETNE);
}
// MulResult is a node with an illegal type. Because such things are not
// generally permitted during this phase of legalization, ensure that
// nothing is left using the node. The above EXTRACT_ELEMENT nodes should have
// been folded.
assert(MulResult->use_empty() && "Illegally typed node still in use!");
SDValue Ops[2] = { BottomHalf, TopHalf } ;
return DAG.getMergeValues(Ops, dl);
}
static SDValue LowerATOMIC_LOAD_STORE(SDValue Op, SelectionDAG &DAG) {
if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getSuccessOrdering())) {
// Expand with a fence.
return SDValue();
}
// Monotonic load/stores are legal.
return Op;
}
SDValue SparcTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
SDLoc dl(Op);
switch (IntNo) {
default: return SDValue(); // Don't custom lower most intrinsics.
case Intrinsic::thread_pointer: {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
return DAG.getRegister(SP::G7, PtrVT);
}
}
}
SDValue SparcTargetLowering::
LowerOperation(SDValue Op, SelectionDAG &DAG) const {
bool hasHardQuad = Subtarget->hasHardQuad();
bool isV9 = Subtarget->isV9();
switch (Op.getOpcode()) {
default: llvm_unreachable("Should not custom lower this!");
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG, *this,
Subtarget);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG,
Subtarget);
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG, *this,
hasHardQuad);
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG, *this,
hasHardQuad);
case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG, *this,
hasHardQuad);
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG, *this,
hasHardQuad);
case ISD::BR_CC: return LowerBR_CC(Op, DAG, *this,
hasHardQuad);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG, *this,
hasHardQuad);
case ISD::VASTART: return LowerVASTART(Op, DAG, *this);
case ISD::VAARG: return LowerVAARG(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG,
Subtarget);
case ISD::LOAD: return LowerLOAD(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
case ISD::FADD: return LowerF128Op(Op, DAG,
getLibcallName(RTLIB::ADD_F128), 2);
case ISD::FSUB: return LowerF128Op(Op, DAG,
getLibcallName(RTLIB::SUB_F128), 2);
case ISD::FMUL: return LowerF128Op(Op, DAG,
getLibcallName(RTLIB::MUL_F128), 2);
case ISD::FDIV: return LowerF128Op(Op, DAG,
getLibcallName(RTLIB::DIV_F128), 2);
case ISD::FSQRT: return LowerF128Op(Op, DAG,
getLibcallName(RTLIB::SQRT_F128),1);
case ISD::FABS:
case ISD::FNEG: return LowerFNEGorFABS(Op, DAG, isV9);
case ISD::FP_EXTEND: return LowerF128_FPEXTEND(Op, DAG, *this);
case ISD::FP_ROUND: return LowerF128_FPROUND(Op, DAG, *this);
case ISD::ADDC:
case ISD::ADDE:
case ISD::SUBC:
case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
case ISD::UMULO:
case ISD::SMULO: return LowerUMULO_SMULO(Op, DAG, *this);
case ISD::ATOMIC_LOAD:
case ISD::ATOMIC_STORE: return LowerATOMIC_LOAD_STORE(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
}
}
SDValue SparcTargetLowering::bitcastConstantFPToInt(ConstantFPSDNode *C,
const SDLoc &DL,
SelectionDAG &DAG) const {
APInt V = C->getValueAPF().bitcastToAPInt();
SDValue Lo = DAG.getConstant(V.zextOrTrunc(32), DL, MVT::i32);
SDValue Hi = DAG.getConstant(V.lshr(32).zextOrTrunc(32), DL, MVT::i32);
if (DAG.getDataLayout().isLittleEndian())
std::swap(Lo, Hi);
return DAG.getBuildVector(MVT::v2i32, DL, {Hi, Lo});
}
SDValue SparcTargetLowering::PerformBITCASTCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SDLoc dl(N);
SDValue Src = N->getOperand(0);
if (isa<ConstantFPSDNode>(Src) && N->getSimpleValueType(0) == MVT::v2i32 &&
Src.getSimpleValueType() == MVT::f64)
return bitcastConstantFPToInt(cast<ConstantFPSDNode>(Src), dl, DCI.DAG);
return SDValue();
}
SDValue SparcTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
switch (N->getOpcode()) {
default:
break;
case ISD::BITCAST:
return PerformBITCASTCombine(N, DCI);
}
return SDValue();
}
MachineBasicBlock *
SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const {
switch (MI.getOpcode()) {
default: llvm_unreachable("Unknown SELECT_CC!");
case SP::SELECT_CC_Int_ICC:
case SP::SELECT_CC_FP_ICC:
case SP::SELECT_CC_DFP_ICC:
case SP::SELECT_CC_QFP_ICC:
return expandSelectCC(MI, BB, SP::BCOND);
case SP::SELECT_CC_Int_XCC:
case SP::SELECT_CC_FP_XCC:
case SP::SELECT_CC_DFP_XCC:
case SP::SELECT_CC_QFP_XCC:
return expandSelectCC(MI, BB, SP::BPXCC);
case SP::SELECT_CC_Int_FCC:
case SP::SELECT_CC_FP_FCC:
case SP::SELECT_CC_DFP_FCC:
case SP::SELECT_CC_QFP_FCC:
return expandSelectCC(MI, BB, SP::FBCOND);
}
}
MachineBasicBlock *
SparcTargetLowering::expandSelectCC(MachineInstr &MI, MachineBasicBlock *BB,
unsigned BROpcode) const {
const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
unsigned CC = (SPCC::CondCodes)MI.getOperand(3).getImm();
// To "insert" a SELECT_CC instruction, we actually have to insert the
// triangle control-flow pattern. The incoming instruction knows the
// destination vreg to set, the condition code register to branch on, the
// true/false values to select between, and the condition code for the branch.
//
// We produce the following control flow:
// ThisMBB
// | \
// | IfFalseMBB
// | /
// SinkMBB
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction::iterator It = ++BB->getIterator();
MachineBasicBlock *ThisMBB = BB;
MachineFunction *F = BB->getParent();
MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(It, IfFalseMBB);
F->insert(It, SinkMBB);
// Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
SinkMBB->splice(SinkMBB->begin(), ThisMBB,
std::next(MachineBasicBlock::iterator(MI)), ThisMBB->end());
SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
// Set the new successors for ThisMBB.
ThisMBB->addSuccessor(IfFalseMBB);
ThisMBB->addSuccessor(SinkMBB);
BuildMI(ThisMBB, dl, TII.get(BROpcode))
.addMBB(SinkMBB)
.addImm(CC);
// IfFalseMBB just falls through to SinkMBB.
IfFalseMBB->addSuccessor(SinkMBB);
// %Result = phi [ %TrueValue, ThisMBB ], [ %FalseValue, IfFalseMBB ]
BuildMI(*SinkMBB, SinkMBB->begin(), dl, TII.get(SP::PHI),
MI.getOperand(0).getReg())
.addReg(MI.getOperand(1).getReg())
.addMBB(ThisMBB)
.addReg(MI.getOperand(2).getReg())
.addMBB(IfFalseMBB);
MI.eraseFromParent(); // The pseudo instruction is gone now.
return SinkMBB;
}
//===----------------------------------------------------------------------===//
// Sparc Inline Assembly Support
//===----------------------------------------------------------------------===//
/// getConstraintType - Given a constraint letter, return the type of
/// constraint it is for this target.
SparcTargetLowering::ConstraintType
SparcTargetLowering::getConstraintType(StringRef Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
default: break;
case 'r':
case 'f':
case 'e':
return C_RegisterClass;
case 'I': // SIMM13
return C_Immediate;
}
}
return TargetLowering::getConstraintType(Constraint);
}
TargetLowering::ConstraintWeight SparcTargetLowering::
getSingleConstraintMatchWeight(AsmOperandInfo &info,
const char *constraint) const {
ConstraintWeight weight = CW_Invalid;
Value *CallOperandVal = info.CallOperandVal;
// If we don't have a value, we can't do a match,
// but allow it at the lowest weight.
if (!CallOperandVal)
return CW_Default;
// Look at the constraint type.
switch (*constraint) {
default:
weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
break;
case 'I': // SIMM13
if (ConstantInt *C = dyn_cast<ConstantInt>(info.CallOperandVal)) {
if (isInt<13>(C->getSExtValue()))
weight = CW_Constant;
}
break;
}
return weight;
}
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops.
void SparcTargetLowering::
LowerAsmOperandForConstraint(SDValue Op,
std::string &Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
SDValue Result;
// Only support length 1 constraints for now.
if (Constraint.length() > 1)
return;
char ConstraintLetter = Constraint[0];
switch (ConstraintLetter) {
default: break;
case 'I':
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
if (isInt<13>(C->getSExtValue())) {
Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
Op.getValueType());
break;
}
return;
}
}
if (Result.getNode()) {
Ops.push_back(Result);
return;
}
TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
std::pair<unsigned, const TargetRegisterClass *>
SparcTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint,
MVT VT) const {
if (Constraint.empty())
return std::make_pair(0U, nullptr);
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'r':
if (VT == MVT::v2i32)
return std::make_pair(0U, &SP::IntPairRegClass);
else if (Subtarget->is64Bit())
return std::make_pair(0U, &SP::I64RegsRegClass);
else
return std::make_pair(0U, &SP::IntRegsRegClass);
case 'f':
if (VT == MVT::f32 || VT == MVT::i32)
return std::make_pair(0U, &SP::FPRegsRegClass);
else if (VT == MVT::f64 || VT == MVT::i64)
return std::make_pair(0U, &SP::LowDFPRegsRegClass);
else if (VT == MVT::f128)
return std::make_pair(0U, &SP::LowQFPRegsRegClass);
// This will generate an error message
return std::make_pair(0U, nullptr);
case 'e':
if (VT == MVT::f32 || VT == MVT::i32)
return std::make_pair(0U, &SP::FPRegsRegClass);
else if (VT == MVT::f64 || VT == MVT::i64 )
return std::make_pair(0U, &SP::DFPRegsRegClass);
else if (VT == MVT::f128)
return std::make_pair(0U, &SP::QFPRegsRegClass);
// This will generate an error message
return std::make_pair(0U, nullptr);
}
}
if (Constraint.front() != '{')
return std::make_pair(0U, nullptr);
assert(Constraint.back() == '}' && "Not a brace enclosed constraint?");
StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
if (RegName.empty())
return std::make_pair(0U, nullptr);
unsigned long long RegNo;
// Handle numbered register aliases.
if (RegName[0] == 'r' &&
getAsUnsignedInteger(RegName.begin() + 1, 10, RegNo)) {
// r0-r7 -> g0-g7
// r8-r15 -> o0-o7
// r16-r23 -> l0-l7
// r24-r31 -> i0-i7
if (RegNo > 31)
return std::make_pair(0U, nullptr);
const char RegTypes[] = {'g', 'o', 'l', 'i'};
char RegType = RegTypes[RegNo / 8];
char RegIndex = '0' + (RegNo % 8);
char Tmp[] = {'{', RegType, RegIndex, '}', 0};
return getRegForInlineAsmConstraint(TRI, Tmp, VT);
}
// Rewrite the fN constraint according to the value type if needed.
if (VT != MVT::f32 && VT != MVT::Other && RegName[0] == 'f' &&
getAsUnsignedInteger(RegName.begin() + 1, 10, RegNo)) {
if (VT == MVT::f64 && (RegNo % 2 == 0)) {
return getRegForInlineAsmConstraint(
TRI, StringRef("{d" + utostr(RegNo / 2) + "}"), VT);
} else if (VT == MVT::f128 && (RegNo % 4 == 0)) {
return getRegForInlineAsmConstraint(
TRI, StringRef("{q" + utostr(RegNo / 4) + "}"), VT);
} else {
return std::make_pair(0U, nullptr);
}
}
auto ResultPair =
TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
if (!ResultPair.second)
return std::make_pair(0U, nullptr);
// Force the use of I64Regs over IntRegs for 64-bit values.
if (Subtarget->is64Bit() && VT == MVT::i64) {
assert(ResultPair.second == &SP::IntRegsRegClass &&
"Unexpected register class");
return std::make_pair(ResultPair.first, &SP::I64RegsRegClass);
}
return ResultPair;
}
bool
SparcTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// The Sparc target isn't yet aware of offsets.
return false;
}
void SparcTargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue>& Results,
SelectionDAG &DAG) const {
SDLoc dl(N);
RTLIB::Libcall libCall = RTLIB::UNKNOWN_LIBCALL;
switch (N->getOpcode()) {
default:
llvm_unreachable("Do not know how to custom type legalize this operation!");
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
// Custom lower only if it involves f128 or i64.
if (N->getOperand(0).getValueType() != MVT::f128
|| N->getValueType(0) != MVT::i64)
return;
libCall = ((N->getOpcode() == ISD::FP_TO_SINT)
? RTLIB::FPTOSINT_F128_I64
: RTLIB::FPTOUINT_F128_I64);
Results.push_back(LowerF128Op(SDValue(N, 0),
DAG,
getLibcallName(libCall),
1));
return;
case ISD::READCYCLECOUNTER: {
assert(Subtarget->hasLeonCycleCounter());
SDValue Lo = DAG.getCopyFromReg(N->getOperand(0), dl, SP::ASR23, MVT::i32);
SDValue Hi = DAG.getCopyFromReg(Lo, dl, SP::G0, MVT::i32);
SDValue Ops[] = { Lo, Hi };
SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Ops);
Results.push_back(Pair);
Results.push_back(N->getOperand(0));
return;
}
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
// Custom lower only if it involves f128 or i64.
if (N->getValueType(0) != MVT::f128
|| N->getOperand(0).getValueType() != MVT::i64)
return;
libCall = ((N->getOpcode() == ISD::SINT_TO_FP)
? RTLIB::SINTTOFP_I64_F128
: RTLIB::UINTTOFP_I64_F128);
Results.push_back(LowerF128Op(SDValue(N, 0),
DAG,
getLibcallName(libCall),
1));
return;
case ISD::LOAD: {
LoadSDNode *Ld = cast<LoadSDNode>(N);
// Custom handling only for i64: turn i64 load into a v2i32 load,
// and a bitcast.
if (Ld->getValueType(0) != MVT::i64 || Ld->getMemoryVT() != MVT::i64)
return;
SDLoc dl(N);
SDValue LoadRes = DAG.getExtLoad(
Ld->getExtensionType(), dl, MVT::v2i32, Ld->getChain(),
Ld->getBasePtr(), Ld->getPointerInfo(), MVT::v2i32,
Ld->getOriginalAlign(), Ld->getMemOperand()->getFlags(),
Ld->getAAInfo());
SDValue Res = DAG.getNode(ISD::BITCAST, dl, MVT::i64, LoadRes);
Results.push_back(Res);
Results.push_back(LoadRes.getValue(1));
return;
}
}
}
// Override to enable LOAD_STACK_GUARD lowering on Linux.
bool SparcTargetLowering::useLoadStackGuardNode() const {
if (!Subtarget->isTargetLinux())
return TargetLowering::useLoadStackGuardNode();
return true;
}
// Override to disable global variable loading on Linux.
void SparcTargetLowering::insertSSPDeclarations(Module &M) const {
if (!Subtarget->isTargetLinux())
return TargetLowering::insertSSPDeclarations(M);
}
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.h b/llvm/lib/Target/Sparc/SparcISelLowering.h
index 2768bb20566a..16e4f2687054 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.h
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.h
@@ -1,217 +1,222 @@
//===-- SparcISelLowering.h - Sparc DAG Lowering Interface ------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the interfaces that Sparc uses to lower LLVM code into a
// selection DAG.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_SPARC_SPARCISELLOWERING_H
#define LLVM_LIB_TARGET_SPARC_SPARCISELLOWERING_H
#include "Sparc.h"
#include "llvm/CodeGen/TargetLowering.h"
namespace llvm {
class SparcSubtarget;
namespace SPISD {
enum NodeType : unsigned {
FIRST_NUMBER = ISD::BUILTIN_OP_END,
CMPICC, // Compare two GPR operands, set icc+xcc.
CMPFCC, // Compare two FP operands, set fcc.
BRICC, // Branch to dest on icc condition
BRXCC, // Branch to dest on xcc condition (64-bit only).
BRFCC, // Branch to dest on fcc condition
SELECT_ICC, // Select between two values using the current ICC flags.
SELECT_XCC, // Select between two values using the current XCC flags.
SELECT_FCC, // Select between two values using the current FCC flags.
Hi, Lo, // Hi/Lo operations, typically on a global address.
FTOI, // FP to Int within a FP register.
ITOF, // Int to FP within a FP register.
FTOX, // FP to Int64 within a FP register.
XTOF, // Int64 to FP within a FP register.
CALL, // A call instruction.
RET_FLAG, // Return with a flag operand.
GLOBAL_BASE_REG, // Global base reg for PIC.
FLUSHW, // FLUSH register windows to stack.
TAIL_CALL, // Tail call
TLS_ADD, // For Thread Local Storage (TLS).
TLS_LD,
TLS_CALL,
LOAD_GDOP, // Load operation w/ gdop relocation.
};
}
class SparcTargetLowering : public TargetLowering {
const SparcSubtarget *Subtarget;
public:
SparcTargetLowering(const TargetMachine &TM, const SparcSubtarget &STI);
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
bool useSoftFloat() const override;
/// computeKnownBitsForTargetNode - Determine which of the bits specified
/// in Mask are known to be either zero or one and return them in the
/// KnownZero/KnownOne bitsets.
void computeKnownBitsForTargetNode(const SDValue Op,
KnownBits &Known,
const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *MBB) const override;
const char *getTargetNodeName(unsigned Opcode) const override;
ConstraintType getConstraintType(StringRef Constraint) const override;
ConstraintWeight
getSingleConstraintMatchWeight(AsmOperandInfo &info,
const char *constraint) const override;
void LowerAsmOperandForConstraint(SDValue Op,
std::string &Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint, MVT VT) const override;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
return MVT::i32;
}
Register getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const override;
/// If a physical register, this returns the register that receives the
/// exception address on entry to an EH pad.
Register
getExceptionPointerRegister(const Constant *PersonalityFn) const override {
return SP::I0;
}
/// If a physical register, this returns the register that receives the
/// exception typeid on entry to a landing pad.
Register
getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
return SP::I1;
}
/// Override to support customized stack guard loading.
bool useLoadStackGuardNode() const override;
void insertSSPDeclarations(Module &M) const override;
/// getSetCCResultType - Return the ISD::SETCC ValueType
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
SDValue
LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const override;
SDValue LowerFormalArguments_32(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
SDValue LowerFormalArguments_64(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
SDValue
LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
SDValue LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const;
SDValue LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const;
+ bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const override;
+
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &dl, SelectionDAG &DAG) const override;
SDValue LowerReturn_32(SDValue Chain, CallingConv::ID CallConv,
bool IsVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &DL, SelectionDAG &DAG) const;
SDValue LowerReturn_64(SDValue Chain, CallingConv::ID CallConv,
bool IsVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &DL, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue withTargetFlags(SDValue Op, unsigned TF, SelectionDAG &DAG) const;
SDValue makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
SelectionDAG &DAG) const;
SDValue makeAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerF128_LibCallArg(SDValue Chain, ArgListTy &Args, SDValue Arg,
const SDLoc &DL, SelectionDAG &DAG) const;
SDValue LowerF128Op(SDValue Op, SelectionDAG &DAG,
const char *LibFuncName,
unsigned numArgs) const;
SDValue LowerF128Compare(SDValue LHS, SDValue RHS, unsigned &SPCC,
const SDLoc &DL, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue PerformBITCASTCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue bitcastConstantFPToInt(ConstantFPSDNode *C, const SDLoc &DL,
SelectionDAG &DAG) const;
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
bool IsEligibleForTailCallOptimization(CCState &CCInfo,
CallLoweringInfo &CLI,
MachineFunction &MF) const;
bool ShouldShrinkFPConstant(EVT VT) const override {
// Do not shrink FP constpool if VT == MVT::f128.
// (ldd, call _Q_fdtoq) is more expensive than two ldds.
return VT != MVT::f128;
}
bool shouldInsertFencesForAtomic(const Instruction *I) const override {
// FIXME: We insert fences for each atomics and generate
// sub-optimal code for PSO/TSO. (Approximately nobody uses any
// mode but TSO, which makes this even more silly)
return true;
}
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
void ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue>& Results,
SelectionDAG &DAG) const override;
MachineBasicBlock *expandSelectCC(MachineInstr &MI, MachineBasicBlock *BB,
unsigned BROpcode) const;
};
} // end namespace llvm
#endif // SPARC_ISELLOWERING_H
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 6df0409256bb..6fc7b29c5b78 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1,2619 +1,2619 @@
//===- GlobalOpt.cpp - Optimize Global Variables --------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass transforms simple global variables that never have their address
// taken. If obviously true, it marks read/write globals as constant, deletes
// variables only stored to, etc.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/GlobalOpt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/CtorUtils.h"
#include "llvm/Transforms/Utils/Evaluator.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <cstdint>
#include <utility>
#include <vector>
using namespace llvm;
#define DEBUG_TYPE "globalopt"
STATISTIC(NumMarked , "Number of globals marked constant");
STATISTIC(NumUnnamed , "Number of globals marked unnamed_addr");
STATISTIC(NumSRA , "Number of aggregate globals broken into scalars");
STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
STATISTIC(NumDeleted , "Number of globals deleted");
STATISTIC(NumGlobUses , "Number of global uses devirtualized");
STATISTIC(NumLocalized , "Number of globals localized");
STATISTIC(NumShrunkToBool , "Number of global vars shrunk to booleans");
STATISTIC(NumFastCallFns , "Number of functions converted to fastcc");
STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated");
STATISTIC(NumNestRemoved , "Number of nest attributes removed");
STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
STATISTIC(NumInternalFunc, "Number of internal functions");
STATISTIC(NumColdCC, "Number of functions marked coldcc");
static cl::opt<bool>
EnableColdCCStressTest("enable-coldcc-stress-test",
cl::desc("Enable stress test of coldcc by adding "
"calling conv to all internal functions."),
cl::init(false), cl::Hidden);
static cl::opt<int> ColdCCRelFreq(
"coldcc-rel-freq", cl::Hidden, cl::init(2),
cl::desc(
"Maximum block frequency, expressed as a percentage of caller's "
"entry frequency, for a call site to be considered cold for enabling"
"coldcc"));
/// Is this global variable possibly used by a leak checker as a root? If so,
/// we might not really want to eliminate the stores to it.
static bool isLeakCheckerRoot(GlobalVariable *GV) {
// A global variable is a root if it is a pointer, or could plausibly contain
// a pointer. There are two challenges; one is that we could have a struct
// the has an inner member which is a pointer. We recurse through the type to
// detect these (up to a point). The other is that we may actually be a union
// of a pointer and another type, and so our LLVM type is an integer which
// gets converted into a pointer, or our type is an [i8 x #] with a pointer
// potentially contained here.
if (GV->hasPrivateLinkage())
return false;
SmallVector<Type *, 4> Types;
Types.push_back(GV->getValueType());
unsigned Limit = 20;
do {
Type *Ty = Types.pop_back_val();
switch (Ty->getTypeID()) {
default: break;
case Type::PointerTyID:
return true;
case Type::FixedVectorTyID:
case Type::ScalableVectorTyID:
if (cast<VectorType>(Ty)->getElementType()->isPointerTy())
return true;
break;
case Type::ArrayTyID:
Types.push_back(cast<ArrayType>(Ty)->getElementType());
break;
case Type::StructTyID: {
StructType *STy = cast<StructType>(Ty);
if (STy->isOpaque()) return true;
for (StructType::element_iterator I = STy->element_begin(),
E = STy->element_end(); I != E; ++I) {
Type *InnerTy = *I;
if (isa<PointerType>(InnerTy)) return true;
if (isa<StructType>(InnerTy) || isa<ArrayType>(InnerTy) ||
isa<VectorType>(InnerTy))
Types.push_back(InnerTy);
}
break;
}
}
if (--Limit == 0) return true;
} while (!Types.empty());
return false;
}
/// Given a value that is stored to a global but never read, determine whether
/// it's safe to remove the store and the chain of computation that feeds the
/// store.
static bool IsSafeComputationToRemove(
Value *V, function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
do {
if (isa<Constant>(V))
return true;
if (!V->hasOneUse())
return false;
if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) ||
isa<GlobalValue>(V))
return false;
if (isAllocationFn(V, GetTLI))
return true;
Instruction *I = cast<Instruction>(V);
if (I->mayHaveSideEffects())
return false;
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
if (!GEP->hasAllConstantIndices())
return false;
} else if (I->getNumOperands() != 1) {
return false;
}
V = I->getOperand(0);
} while (true);
}
/// This GV is a pointer root. Loop over all users of the global and clean up
/// any that obviously don't assign the global a value that isn't dynamically
/// allocated.
static bool
CleanupPointerRootUsers(GlobalVariable *GV,
function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
// A brief explanation of leak checkers. The goal is to find bugs where
// pointers are forgotten, causing an accumulating growth in memory
// usage over time. The common strategy for leak checkers is to explicitly
// allow the memory pointed to by globals at exit. This is popular because it
// also solves another problem where the main thread of a C++ program may shut
// down before other threads that are still expecting to use those globals. To
// handle that case, we expect the program may create a singleton and never
// destroy it.
bool Changed = false;
// If Dead[n].first is the only use of a malloc result, we can delete its
// chain of computation and the store to the global in Dead[n].second.
SmallVector<std::pair<Instruction *, Instruction *>, 32> Dead;
// Constants can't be pointers to dynamically allocated memory.
for (User *U : llvm::make_early_inc_range(GV->users())) {
if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
Value *V = SI->getValueOperand();
if (isa<Constant>(V)) {
Changed = true;
SI->eraseFromParent();
} else if (Instruction *I = dyn_cast<Instruction>(V)) {
if (I->hasOneUse())
Dead.push_back(std::make_pair(I, SI));
}
} else if (MemSetInst *MSI = dyn_cast<MemSetInst>(U)) {
if (isa<Constant>(MSI->getValue())) {
Changed = true;
MSI->eraseFromParent();
} else if (Instruction *I = dyn_cast<Instruction>(MSI->getValue())) {
if (I->hasOneUse())
Dead.push_back(std::make_pair(I, MSI));
}
} else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U)) {
GlobalVariable *MemSrc = dyn_cast<GlobalVariable>(MTI->getSource());
if (MemSrc && MemSrc->isConstant()) {
Changed = true;
MTI->eraseFromParent();
} else if (Instruction *I = dyn_cast<Instruction>(MTI->getSource())) {
if (I->hasOneUse())
Dead.push_back(std::make_pair(I, MTI));
}
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
if (CE->use_empty()) {
CE->destroyConstant();
Changed = true;
}
} else if (Constant *C = dyn_cast<Constant>(U)) {
if (isSafeToDestroyConstant(C)) {
C->destroyConstant();
// This could have invalidated UI, start over from scratch.
Dead.clear();
CleanupPointerRootUsers(GV, GetTLI);
return true;
}
}
}
for (int i = 0, e = Dead.size(); i != e; ++i) {
if (IsSafeComputationToRemove(Dead[i].first, GetTLI)) {
Dead[i].second->eraseFromParent();
Instruction *I = Dead[i].first;
do {
if (isAllocationFn(I, GetTLI))
break;
Instruction *J = dyn_cast<Instruction>(I->getOperand(0));
if (!J)
break;
I->eraseFromParent();
I = J;
} while (true);
I->eraseFromParent();
Changed = true;
}
}
return Changed;
}
/// We just marked GV constant. Loop over all users of the global, cleaning up
/// the obvious ones. This is largely just a quick scan over the use list to
/// clean up the easy and obvious cruft. This returns true if it made a change.
static bool CleanupConstantGlobalUsers(GlobalVariable *GV,
const DataLayout &DL) {
Constant *Init = GV->getInitializer();
SmallVector<User *, 8> WorkList(GV->users());
SmallPtrSet<User *, 8> Visited;
bool Changed = false;
SmallVector<WeakTrackingVH> MaybeDeadInsts;
auto EraseFromParent = [&](Instruction *I) {
for (Value *Op : I->operands())
if (auto *OpI = dyn_cast<Instruction>(Op))
MaybeDeadInsts.push_back(OpI);
I->eraseFromParent();
Changed = true;
};
while (!WorkList.empty()) {
User *U = WorkList.pop_back_val();
if (!Visited.insert(U).second)
continue;
if (auto *BO = dyn_cast<BitCastOperator>(U))
append_range(WorkList, BO->users());
if (auto *ASC = dyn_cast<AddrSpaceCastOperator>(U))
append_range(WorkList, ASC->users());
else if (auto *GEP = dyn_cast<GEPOperator>(U))
append_range(WorkList, GEP->users());
else if (auto *LI = dyn_cast<LoadInst>(U)) {
// A load from a uniform value is always the same, regardless of any
// applied offset.
Type *Ty = LI->getType();
if (Constant *Res = ConstantFoldLoadFromUniformValue(Init, Ty)) {
LI->replaceAllUsesWith(Res);
EraseFromParent(LI);
continue;
}
Value *PtrOp = LI->getPointerOperand();
APInt Offset(DL.getIndexTypeSizeInBits(PtrOp->getType()), 0);
PtrOp = PtrOp->stripAndAccumulateConstantOffsets(
DL, Offset, /* AllowNonInbounds */ true);
if (PtrOp == GV) {
if (auto *Value = ConstantFoldLoadFromConst(Init, Ty, Offset, DL)) {
LI->replaceAllUsesWith(Value);
EraseFromParent(LI);
}
}
} else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
// Store must be unreachable or storing Init into the global.
EraseFromParent(SI);
} else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv
if (getUnderlyingObject(MI->getRawDest()) == GV)
EraseFromParent(MI);
}
}
Changed |=
RecursivelyDeleteTriviallyDeadInstructionsPermissive(MaybeDeadInsts);
GV->removeDeadConstantUsers();
return Changed;
}
/// Look at all uses of the global and determine which (offset, type) pairs it
/// can be split into.
static bool collectSRATypes(DenseMap<uint64_t, Type *> &Types, GlobalValue *GV,
const DataLayout &DL) {
SmallVector<Use *, 16> Worklist;
SmallPtrSet<Use *, 16> Visited;
auto AppendUses = [&](Value *V) {
for (Use &U : V->uses())
if (Visited.insert(&U).second)
Worklist.push_back(&U);
};
AppendUses(GV);
while (!Worklist.empty()) {
Use *U = Worklist.pop_back_val();
User *V = U->getUser();
auto *GEP = dyn_cast<GEPOperator>(V);
if (isa<BitCastOperator>(V) || isa<AddrSpaceCastOperator>(V) ||
(GEP && GEP->hasAllConstantIndices())) {
AppendUses(V);
continue;
}
if (Value *Ptr = getLoadStorePointerOperand(V)) {
// This is storing the global address into somewhere, not storing into
// the global.
if (isa<StoreInst>(V) && U->getOperandNo() == 0)
return false;
APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
/* AllowNonInbounds */ true);
if (Ptr != GV || Offset.getActiveBits() >= 64)
return false;
// TODO: We currently require that all accesses at a given offset must
// use the same type. This could be relaxed.
Type *Ty = getLoadStoreType(V);
auto It = Types.try_emplace(Offset.getZExtValue(), Ty).first;
if (Ty != It->second)
return false;
continue;
}
// Ignore dead constant users.
if (auto *C = dyn_cast<Constant>(V)) {
if (!isSafeToDestroyConstant(C))
return false;
continue;
}
// Unknown user.
return false;
}
return true;
}
/// Copy over the debug info for a variable to its SRA replacements.
static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV,
uint64_t FragmentOffsetInBits,
uint64_t FragmentSizeInBits,
uint64_t VarSize) {
SmallVector<DIGlobalVariableExpression *, 1> GVs;
GV->getDebugInfo(GVs);
for (auto *GVE : GVs) {
DIVariable *Var = GVE->getVariable();
DIExpression *Expr = GVE->getExpression();
int64_t CurVarOffsetInBytes = 0;
uint64_t CurVarOffsetInBits = 0;
// Calculate the offset (Bytes), Continue if unknown.
if (!Expr->extractIfOffset(CurVarOffsetInBytes))
continue;
// Ignore negative offset.
if (CurVarOffsetInBytes < 0)
continue;
// Convert offset to bits.
CurVarOffsetInBits = CHAR_BIT * (uint64_t)CurVarOffsetInBytes;
// Current var starts after the fragment, ignore.
if (CurVarOffsetInBits >= (FragmentOffsetInBits + FragmentSizeInBits))
continue;
uint64_t CurVarSize = Var->getType()->getSizeInBits();
// Current variable ends before start of fragment, ignore.
if (CurVarSize != 0 &&
(CurVarOffsetInBits + CurVarSize) <= FragmentOffsetInBits)
continue;
// Current variable fits in the fragment.
if (CurVarOffsetInBits == FragmentOffsetInBits &&
CurVarSize == FragmentSizeInBits)
Expr = DIExpression::get(Expr->getContext(), {});
// If the FragmentSize is smaller than the variable,
// emit a fragment expression.
else if (FragmentSizeInBits < VarSize) {
if (auto E = DIExpression::createFragmentExpression(
Expr, FragmentOffsetInBits, FragmentSizeInBits))
Expr = *E;
else
return;
}
auto *NGVE = DIGlobalVariableExpression::get(GVE->getContext(), Var, Expr);
NGV->addDebugInfo(NGVE);
}
}
/// Perform scalar replacement of aggregates on the specified global variable.
/// This opens the door for other optimizations by exposing the behavior of the
/// program in a more fine-grained way. We have determined that this
/// transformation is safe already. We return the first global variable we
/// insert so that the caller can reprocess it.
static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
assert(GV->hasLocalLinkage());
// Collect types to split into.
DenseMap<uint64_t, Type *> Types;
if (!collectSRATypes(Types, GV, DL) || Types.empty())
return nullptr;
// Make sure we don't SRA back to the same type.
if (Types.size() == 1 && Types.begin()->second == GV->getValueType())
return nullptr;
// Don't perform SRA if we would have to split into many globals.
if (Types.size() > 16)
return nullptr;
// Sort by offset.
SmallVector<std::pair<uint64_t, Type *>, 16> TypesVector;
append_range(TypesVector, Types);
sort(TypesVector, llvm::less_first());
// Check that the types are non-overlapping.
uint64_t Offset = 0;
for (const auto &Pair : TypesVector) {
// Overlaps with previous type.
if (Pair.first < Offset)
return nullptr;
Offset = Pair.first + DL.getTypeAllocSize(Pair.second);
}
// Some accesses go beyond the end of the global, don't bother.
if (Offset > DL.getTypeAllocSize(GV->getValueType()))
return nullptr;
// Collect initializers for new globals.
Constant *OrigInit = GV->getInitializer();
DenseMap<uint64_t, Constant *> Initializers;
for (const auto &Pair : Types) {
Constant *NewInit = ConstantFoldLoadFromConst(OrigInit, Pair.second,
APInt(64, Pair.first), DL);
if (!NewInit) {
LLVM_DEBUG(dbgs() << "Global SRA: Failed to evaluate initializer of "
<< *GV << " with type " << *Pair.second << " at offset "
<< Pair.first << "\n");
return nullptr;
}
Initializers.insert({Pair.first, NewInit});
}
LLVM_DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n");
// Get the alignment of the global, either explicit or target-specific.
Align StartAlignment =
DL.getValueOrABITypeAlignment(GV->getAlign(), GV->getValueType());
uint64_t VarSize = DL.getTypeSizeInBits(GV->getValueType());
// Create replacement globals.
DenseMap<uint64_t, GlobalVariable *> NewGlobals;
unsigned NameSuffix = 0;
for (auto &Pair : TypesVector) {
uint64_t Offset = Pair.first;
Type *Ty = Pair.second;
GlobalVariable *NGV = new GlobalVariable(
*GV->getParent(), Ty, false, GlobalVariable::InternalLinkage,
Initializers[Offset], GV->getName() + "." + Twine(NameSuffix++), GV,
GV->getThreadLocalMode(), GV->getAddressSpace());
NGV->copyAttributesFrom(GV);
NewGlobals.insert({Offset, NGV});
// Calculate the known alignment of the field. If the original aggregate
// had 256 byte alignment for example, something might depend on that:
// propagate info to each field.
Align NewAlign = commonAlignment(StartAlignment, Offset);
if (NewAlign > DL.getABITypeAlign(Ty))
NGV->setAlignment(NewAlign);
// Copy over the debug info for the variable.
transferSRADebugInfo(GV, NGV, Offset * 8, DL.getTypeAllocSizeInBits(Ty),
VarSize);
}
// Replace uses of the original global with uses of the new global.
SmallVector<Value *, 16> Worklist;
SmallPtrSet<Value *, 16> Visited;
SmallVector<WeakTrackingVH, 16> DeadInsts;
auto AppendUsers = [&](Value *V) {
for (User *U : V->users())
if (Visited.insert(U).second)
Worklist.push_back(U);
};
AppendUsers(GV);
while (!Worklist.empty()) {
Value *V = Worklist.pop_back_val();
if (isa<BitCastOperator>(V) || isa<AddrSpaceCastOperator>(V) ||
isa<GEPOperator>(V)) {
AppendUsers(V);
if (isa<Instruction>(V))
DeadInsts.push_back(V);
continue;
}
if (Value *Ptr = getLoadStorePointerOperand(V)) {
APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
/* AllowNonInbounds */ true);
assert(Ptr == GV && "Load/store must be from/to global");
GlobalVariable *NGV = NewGlobals[Offset.getZExtValue()];
assert(NGV && "Must have replacement global for this offset");
// Update the pointer operand and recalculate alignment.
Align PrefAlign = DL.getPrefTypeAlign(getLoadStoreType(V));
Align NewAlign =
getOrEnforceKnownAlignment(NGV, PrefAlign, DL, cast<Instruction>(V));
if (auto *LI = dyn_cast<LoadInst>(V)) {
LI->setOperand(0, NGV);
LI->setAlignment(NewAlign);
} else {
auto *SI = cast<StoreInst>(V);
SI->setOperand(1, NGV);
SI->setAlignment(NewAlign);
}
continue;
}
assert(isa<Constant>(V) && isSafeToDestroyConstant(cast<Constant>(V)) &&
"Other users can only be dead constants");
}
// Delete old instructions and global.
RecursivelyDeleteTriviallyDeadInstructions(DeadInsts);
GV->removeDeadConstantUsers();
GV->eraseFromParent();
++NumSRA;
assert(NewGlobals.size() > 0);
return NewGlobals.begin()->second;
}
/// Return true if all users of the specified value will trap if the value is
/// dynamically null. PHIs keeps track of any phi nodes we've seen to avoid
/// reprocessing them.
static bool AllUsesOfValueWillTrapIfNull(const Value *V,
SmallPtrSetImpl<const PHINode*> &PHIs) {
for (const User *U : V->users()) {
if (const Instruction *I = dyn_cast<Instruction>(U)) {
// If null pointer is considered valid, then all uses are non-trapping.
// Non address-space 0 globals have already been pruned by the caller.
if (NullPointerIsDefined(I->getFunction()))
return false;
}
if (isa<LoadInst>(U)) {
// Will trap.
} else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
if (SI->getOperand(0) == V) {
return false; // Storing the value.
}
} else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
if (CI->getCalledOperand() != V) {
return false; // Not calling the ptr
}
} else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) {
if (II->getCalledOperand() != V) {
return false; // Not calling the ptr
}
} else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) {
if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false;
} else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false;
} else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
// If we've already seen this phi node, ignore it, it has already been
// checked.
if (PHIs.insert(PN).second && !AllUsesOfValueWillTrapIfNull(PN, PHIs))
return false;
} else if (isa<ICmpInst>(U) &&
!ICmpInst::isSigned(cast<ICmpInst>(U)->getPredicate()) &&
isa<LoadInst>(U->getOperand(0)) &&
isa<ConstantPointerNull>(U->getOperand(1))) {
assert(isa<GlobalValue>(cast<LoadInst>(U->getOperand(0))
->getPointerOperand()
->stripPointerCasts()) &&
"Should be GlobalVariable");
// This and only this kind of non-signed ICmpInst is to be replaced with
// the comparing of the value of the created global init bool later in
// optimizeGlobalAddressOfAllocation for the global variable.
} else {
return false;
}
}
return true;
}
/// Return true if all uses of any loads from GV will trap if the loaded value
/// is null. Note that this also permits comparisons of the loaded value
/// against null, as a special case.
static bool allUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
SmallVector<const Value *, 4> Worklist;
Worklist.push_back(GV);
while (!Worklist.empty()) {
const Value *P = Worklist.pop_back_val();
for (auto *U : P->users()) {
if (auto *LI = dyn_cast<LoadInst>(U)) {
SmallPtrSet<const PHINode *, 8> PHIs;
if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
return false;
} else if (auto *SI = dyn_cast<StoreInst>(U)) {
// Ignore stores to the global.
if (SI->getPointerOperand() != P)
return false;
} else if (auto *CE = dyn_cast<ConstantExpr>(U)) {
if (CE->stripPointerCasts() != GV)
return false;
// Check further the ConstantExpr.
Worklist.push_back(CE);
} else {
// We don't know or understand this user, bail out.
return false;
}
}
}
return true;
}
/// Get all the loads/store uses for global variable \p GV.
static void allUsesOfLoadAndStores(GlobalVariable *GV,
SmallVector<Value *, 4> &Uses) {
SmallVector<Value *, 4> Worklist;
Worklist.push_back(GV);
while (!Worklist.empty()) {
auto *P = Worklist.pop_back_val();
for (auto *U : P->users()) {
if (auto *CE = dyn_cast<ConstantExpr>(U)) {
Worklist.push_back(CE);
continue;
}
assert((isa<LoadInst>(U) || isa<StoreInst>(U)) &&
"Expect only load or store instructions");
Uses.push_back(U);
}
}
}
static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
bool Changed = false;
for (auto UI = V->user_begin(), E = V->user_end(); UI != E; ) {
Instruction *I = cast<Instruction>(*UI++);
// Uses are non-trapping if null pointer is considered valid.
// Non address-space 0 globals are already pruned by the caller.
if (NullPointerIsDefined(I->getFunction()))
return false;
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
LI->setOperand(0, NewV);
Changed = true;
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
if (SI->getOperand(1) == V) {
SI->setOperand(1, NewV);
Changed = true;
}
} else if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
CallBase *CB = cast<CallBase>(I);
if (CB->getCalledOperand() == V) {
// Calling through the pointer! Turn into a direct call, but be careful
// that the pointer is not also being passed as an argument.
CB->setCalledOperand(NewV);
Changed = true;
bool PassedAsArg = false;
for (unsigned i = 0, e = CB->arg_size(); i != e; ++i)
if (CB->getArgOperand(i) == V) {
PassedAsArg = true;
CB->setArgOperand(i, NewV);
}
if (PassedAsArg) {
// Being passed as an argument also. Be careful to not invalidate UI!
UI = V->user_begin();
}
}
} else if (CastInst *CI = dyn_cast<CastInst>(I)) {
Changed |= OptimizeAwayTrappingUsesOfValue(CI,
ConstantExpr::getCast(CI->getOpcode(),
NewV, CI->getType()));
if (CI->use_empty()) {
Changed = true;
CI->eraseFromParent();
}
} else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
// Should handle GEP here.
SmallVector<Constant*, 8> Idxs;
Idxs.reserve(GEPI->getNumOperands()-1);
for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end();
i != e; ++i)
if (Constant *C = dyn_cast<Constant>(*i))
Idxs.push_back(C);
else
break;
if (Idxs.size() == GEPI->getNumOperands()-1)
Changed |= OptimizeAwayTrappingUsesOfValue(
GEPI, ConstantExpr::getGetElementPtr(GEPI->getSourceElementType(),
NewV, Idxs));
if (GEPI->use_empty()) {
Changed = true;
GEPI->eraseFromParent();
}
}
}
return Changed;
}
/// The specified global has only one non-null value stored into it. If there
/// are uses of the loaded value that would trap if the loaded value is
/// dynamically null, then we know that they cannot be reachable with a null
/// optimize away the load.
static bool OptimizeAwayTrappingUsesOfLoads(
GlobalVariable *GV, Constant *LV, const DataLayout &DL,
function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
bool Changed = false;
// Keep track of whether we are able to remove all the uses of the global
// other than the store that defines it.
bool AllNonStoreUsesGone = true;
// Replace all uses of loads with uses of uses of the stored value.
for (User *GlobalUser : llvm::make_early_inc_range(GV->users())) {
if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
// If we were able to delete all uses of the loads
if (LI->use_empty()) {
LI->eraseFromParent();
Changed = true;
} else {
AllNonStoreUsesGone = false;
}
} else if (isa<StoreInst>(GlobalUser)) {
// Ignore the store that stores "LV" to the global.
assert(GlobalUser->getOperand(1) == GV &&
"Must be storing *to* the global");
} else {
AllNonStoreUsesGone = false;
// If we get here we could have other crazy uses that are transitively
// loaded.
assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) ||
isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) ||
isa<BitCastInst>(GlobalUser) ||
isa<GetElementPtrInst>(GlobalUser)) &&
"Only expect load and stores!");
}
}
if (Changed) {
LLVM_DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV
<< "\n");
++NumGlobUses;
}
// If we nuked all of the loads, then none of the stores are needed either,
// nor is the global.
if (AllNonStoreUsesGone) {
if (isLeakCheckerRoot(GV)) {
Changed |= CleanupPointerRootUsers(GV, GetTLI);
} else {
Changed = true;
CleanupConstantGlobalUsers(GV, DL);
}
if (GV->use_empty()) {
LLVM_DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n");
Changed = true;
GV->eraseFromParent();
++NumDeleted;
}
}
return Changed;
}
/// Walk the use list of V, constant folding all of the instructions that are
/// foldable.
static void ConstantPropUsersOf(Value *V, const DataLayout &DL,
TargetLibraryInfo *TLI) {
for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; )
if (Instruction *I = dyn_cast<Instruction>(*UI++))
if (Constant *NewC = ConstantFoldInstruction(I, DL, TLI)) {
I->replaceAllUsesWith(NewC);
// Advance UI to the next non-I use to avoid invalidating it!
// Instructions could multiply use V.
while (UI != E && *UI == I)
++UI;
if (isInstructionTriviallyDead(I, TLI))
I->eraseFromParent();
}
}
/// This function takes the specified global variable, and transforms the
/// program as if it always contained the result of the specified malloc.
/// Because it is always the result of the specified malloc, there is no reason
/// to actually DO the malloc. Instead, turn the malloc into a global, and any
/// loads of GV as uses of the new global.
static GlobalVariable *
OptimizeGlobalAddressOfAllocation(GlobalVariable *GV, CallInst *CI,
uint64_t AllocSize, Constant *InitVal,
const DataLayout &DL,
TargetLibraryInfo *TLI) {
LLVM_DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI
<< '\n');
// Create global of type [AllocSize x i8].
Type *GlobalType = ArrayType::get(Type::getInt8Ty(GV->getContext()),
AllocSize);
// Create the new global variable. The contents of the allocated memory is
// undefined initially, so initialize with an undef value.
GlobalVariable *NewGV = new GlobalVariable(
*GV->getParent(), GlobalType, false, GlobalValue::InternalLinkage,
UndefValue::get(GlobalType), GV->getName() + ".body", nullptr,
GV->getThreadLocalMode());
// Initialize the global at the point of the original call. Note that this
// is a different point from the initialization referred to below for the
// nullability handling. Sublety: We have not proven the original global was
// only initialized once. As such, we can not fold this into the initializer
// of the new global as may need to re-init the storage multiple times.
if (!isa<UndefValue>(InitVal)) {
IRBuilder<> Builder(CI->getNextNode());
// TODO: Use alignment above if align!=1
Builder.CreateMemSet(NewGV, InitVal, AllocSize, None);
}
// Update users of the allocation to use the new global instead.
BitCastInst *TheBC = nullptr;
while (!CI->use_empty()) {
Instruction *User = cast<Instruction>(CI->user_back());
if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
if (BCI->getType() == NewGV->getType()) {
BCI->replaceAllUsesWith(NewGV);
BCI->eraseFromParent();
} else {
BCI->setOperand(0, NewGV);
}
} else {
if (!TheBC)
TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI);
User->replaceUsesOfWith(CI, TheBC);
}
}
SmallSetVector<Constant *, 1> RepValues;
RepValues.insert(NewGV);
// If there is a comparison against null, we will insert a global bool to
// keep track of whether the global was initialized yet or not.
GlobalVariable *InitBool =
new GlobalVariable(Type::getInt1Ty(GV->getContext()), false,
GlobalValue::InternalLinkage,
ConstantInt::getFalse(GV->getContext()),
GV->getName()+".init", GV->getThreadLocalMode());
bool InitBoolUsed = false;
// Loop over all instruction uses of GV, processing them in turn.
SmallVector<Value *, 4> Guses;
allUsesOfLoadAndStores(GV, Guses);
for (auto *U : Guses) {
if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
// The global is initialized when the store to it occurs. If the stored
// value is null value, the global bool is set to false, otherwise true.
new StoreInst(ConstantInt::getBool(
GV->getContext(),
!isa<ConstantPointerNull>(SI->getValueOperand())),
InitBool, false, Align(1), SI->getOrdering(),
SI->getSyncScopeID(), SI);
SI->eraseFromParent();
continue;
}
LoadInst *LI = cast<LoadInst>(U);
while (!LI->use_empty()) {
Use &LoadUse = *LI->use_begin();
ICmpInst *ICI = dyn_cast<ICmpInst>(LoadUse.getUser());
if (!ICI) {
auto *CE = ConstantExpr::getBitCast(NewGV, LI->getType());
RepValues.insert(CE);
LoadUse.set(CE);
continue;
}
// Replace the cmp X, 0 with a use of the bool value.
Value *LV = new LoadInst(InitBool->getValueType(), InitBool,
InitBool->getName() + ".val", false, Align(1),
LI->getOrdering(), LI->getSyncScopeID(), LI);
InitBoolUsed = true;
switch (ICI->getPredicate()) {
default: llvm_unreachable("Unknown ICmp Predicate!");
case ICmpInst::ICMP_ULT: // X < null -> always false
LV = ConstantInt::getFalse(GV->getContext());
break;
case ICmpInst::ICMP_UGE: // X >= null -> always true
LV = ConstantInt::getTrue(GV->getContext());
break;
case ICmpInst::ICMP_ULE:
case ICmpInst::ICMP_EQ:
LV = BinaryOperator::CreateNot(LV, "notinit", ICI);
break;
case ICmpInst::ICMP_NE:
case ICmpInst::ICMP_UGT:
break; // no change.
}
ICI->replaceAllUsesWith(LV);
ICI->eraseFromParent();
}
LI->eraseFromParent();
}
// If the initialization boolean was used, insert it, otherwise delete it.
if (!InitBoolUsed) {
while (!InitBool->use_empty()) // Delete initializations
cast<StoreInst>(InitBool->user_back())->eraseFromParent();
delete InitBool;
} else
GV->getParent()->getGlobalList().insert(GV->getIterator(), InitBool);
// Now the GV is dead, nuke it and the allocation..
GV->eraseFromParent();
CI->eraseFromParent();
// To further other optimizations, loop over all users of NewGV and try to
// constant prop them. This will promote GEP instructions with constant
// indices into GEP constant-exprs, which will allow global-opt to hack on it.
for (auto *CE : RepValues)
ConstantPropUsersOf(CE, DL, TLI);
return NewGV;
}
/// Scan the use-list of GV checking to make sure that there are no complex uses
/// of GV. We permit simple things like dereferencing the pointer, but not
/// storing through the address, unless it is to the specified global.
static bool
valueIsOnlyUsedLocallyOrStoredToOneGlobal(const CallInst *CI,
const GlobalVariable *GV) {
SmallPtrSet<const Value *, 4> Visited;
SmallVector<const Value *, 4> Worklist;
Worklist.push_back(CI);
while (!Worklist.empty()) {
const Value *V = Worklist.pop_back_val();
if (!Visited.insert(V).second)
continue;
for (const Use &VUse : V->uses()) {
const User *U = VUse.getUser();
if (isa<LoadInst>(U) || isa<CmpInst>(U))
continue; // Fine, ignore.
if (auto *SI = dyn_cast<StoreInst>(U)) {
if (SI->getValueOperand() == V &&
SI->getPointerOperand()->stripPointerCasts() != GV)
return false; // Storing the pointer not into GV... bad.
continue; // Otherwise, storing through it, or storing into GV... fine.
}
if (auto *BCI = dyn_cast<BitCastInst>(U)) {
Worklist.push_back(BCI);
continue;
}
if (auto *GEPI = dyn_cast<GetElementPtrInst>(U)) {
Worklist.push_back(GEPI);
continue;
}
return false;
}
}
return true;
}
/// If we have a global that is only initialized with a fixed size allocation
/// try to transform the program to use global memory instead of heap
/// allocated memory. This eliminates dynamic allocation, avoids an indirection
/// accessing the data, and exposes the resultant global to further GlobalOpt.
static bool tryToOptimizeStoreOfAllocationToGlobal(GlobalVariable *GV,
CallInst *CI,
const DataLayout &DL,
TargetLibraryInfo *TLI) {
if (!isRemovableAlloc(CI, TLI))
// Must be able to remove the call when we get done..
return false;
Type *Int8Ty = Type::getInt8Ty(CI->getFunction()->getContext());
Constant *InitVal = getInitialValueOfAllocation(CI, TLI, Int8Ty);
if (!InitVal)
// Must be able to emit a memset for initialization
return false;
uint64_t AllocSize;
if (!getObjectSize(CI, AllocSize, DL, TLI, ObjectSizeOpts()))
return false;
// Restrict this transformation to only working on small allocations
// (2048 bytes currently), as we don't want to introduce a 16M global or
// something.
if (AllocSize >= 2048)
return false;
// We can't optimize this global unless all uses of it are *known* to be
// of the malloc value, not of the null initializer value (consider a use
// that compares the global's value against zero to see if the malloc has
// been reached). To do this, we check to see if all uses of the global
// would trap if the global were null: this proves that they must all
// happen after the malloc.
if (!allUsesOfLoadedValueWillTrapIfNull(GV))
return false;
// We can't optimize this if the malloc itself is used in a complex way,
// for example, being stored into multiple globals. This allows the
// malloc to be stored into the specified global, loaded, gep, icmp'd.
// These are all things we could transform to using the global for.
if (!valueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV))
return false;
OptimizeGlobalAddressOfAllocation(GV, CI, AllocSize, InitVal, DL, TLI);
return true;
}
// Try to optimize globals based on the knowledge that only one value (besides
// its initializer) is ever stored to the global.
static bool
optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
const DataLayout &DL,
function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
// Ignore no-op GEPs and bitcasts.
StoredOnceVal = StoredOnceVal->stripPointerCasts();
// If we are dealing with a pointer global that is initialized to null and
// only has one (non-null) value stored into it, then we can optimize any
// users of the loaded value (often calls and loads) that would trap if the
// value was null.
if (GV->getInitializer()->getType()->isPointerTy() &&
GV->getInitializer()->isNullValue() &&
StoredOnceVal->getType()->isPointerTy() &&
!NullPointerIsDefined(
nullptr /* F */,
GV->getInitializer()->getType()->getPointerAddressSpace())) {
if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
if (GV->getInitializer()->getType() != SOVC->getType())
SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
// Optimize away any trapping uses of the loaded value.
if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, GetTLI))
return true;
} else if (isAllocationFn(StoredOnceVal, GetTLI)) {
if (auto *CI = dyn_cast<CallInst>(StoredOnceVal)) {
auto *TLI = &GetTLI(*CI->getFunction());
if (tryToOptimizeStoreOfAllocationToGlobal(GV, CI, DL, TLI))
return true;
}
}
}
return false;
}
/// At this point, we have learned that the only two values ever stored into GV
/// are its initializer and OtherVal. See if we can shrink the global into a
/// boolean and select between the two values whenever it is used. This exposes
/// the values to other scalar optimizations.
static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
Type *GVElType = GV->getValueType();
// If GVElType is already i1, it is already shrunk. If the type of the GV is
// an FP value, pointer or vector, don't do this optimization because a select
// between them is very expensive and unlikely to lead to later
// simplification. In these cases, we typically end up with "cond ? v1 : v2"
// where v1 and v2 both require constant pool loads, a big loss.
if (GVElType == Type::getInt1Ty(GV->getContext()) ||
GVElType->isFloatingPointTy() ||
GVElType->isPointerTy() || GVElType->isVectorTy())
return false;
// Walk the use list of the global seeing if all the uses are load or store.
// If there is anything else, bail out.
for (User *U : GV->users()) {
if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
return false;
if (getLoadStoreType(U) != GVElType)
return false;
}
LLVM_DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV << "\n");
// Create the new global, initializing it to false.
GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
false,
GlobalValue::InternalLinkage,
ConstantInt::getFalse(GV->getContext()),
GV->getName()+".b",
GV->getThreadLocalMode(),
GV->getType()->getAddressSpace());
NewGV->copyAttributesFrom(GV);
GV->getParent()->getGlobalList().insert(GV->getIterator(), NewGV);
Constant *InitVal = GV->getInitializer();
assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&
"No reason to shrink to bool!");
SmallVector<DIGlobalVariableExpression *, 1> GVs;
GV->getDebugInfo(GVs);
// If initialized to zero and storing one into the global, we can use a cast
// instead of a select to synthesize the desired value.
bool IsOneZero = false;
bool EmitOneOrZero = true;
auto *CI = dyn_cast<ConstantInt>(OtherVal);
if (CI && CI->getValue().getActiveBits() <= 64) {
IsOneZero = InitVal->isNullValue() && CI->isOne();
auto *CIInit = dyn_cast<ConstantInt>(GV->getInitializer());
if (CIInit && CIInit->getValue().getActiveBits() <= 64) {
uint64_t ValInit = CIInit->getZExtValue();
uint64_t ValOther = CI->getZExtValue();
uint64_t ValMinus = ValOther - ValInit;
for(auto *GVe : GVs){
DIGlobalVariable *DGV = GVe->getVariable();
DIExpression *E = GVe->getExpression();
const DataLayout &DL = GV->getParent()->getDataLayout();
unsigned SizeInOctets =
DL.getTypeAllocSizeInBits(NewGV->getValueType()) / 8;
// It is expected that the address of global optimized variable is on
// top of the stack. After optimization, value of that variable will
// be ether 0 for initial value or 1 for other value. The following
// expression should return constant integer value depending on the
// value at global object address:
// val * (ValOther - ValInit) + ValInit:
// DW_OP_deref DW_OP_constu <ValMinus>
// DW_OP_mul DW_OP_constu <ValInit> DW_OP_plus DW_OP_stack_value
SmallVector<uint64_t, 12> Ops = {
dwarf::DW_OP_deref_size, SizeInOctets,
dwarf::DW_OP_constu, ValMinus,
dwarf::DW_OP_mul, dwarf::DW_OP_constu, ValInit,
dwarf::DW_OP_plus};
bool WithStackValue = true;
E = DIExpression::prependOpcodes(E, Ops, WithStackValue);
DIGlobalVariableExpression *DGVE =
DIGlobalVariableExpression::get(NewGV->getContext(), DGV, E);
NewGV->addDebugInfo(DGVE);
}
EmitOneOrZero = false;
}
}
if (EmitOneOrZero) {
// FIXME: This will only emit address for debugger on which will
// be written only 0 or 1.
for(auto *GV : GVs)
NewGV->addDebugInfo(GV);
}
while (!GV->use_empty()) {
Instruction *UI = cast<Instruction>(GV->user_back());
if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
// Change the store into a boolean store.
bool StoringOther = SI->getOperand(0) == OtherVal;
// Only do this if we weren't storing a loaded value.
Value *StoreVal;
if (StoringOther || SI->getOperand(0) == InitVal) {
StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()),
StoringOther);
} else {
// Otherwise, we are storing a previously loaded copy. To do this,
// change the copy from copying the original value to just copying the
// bool.
Instruction *StoredVal = cast<Instruction>(SI->getOperand(0));
// If we've already replaced the input, StoredVal will be a cast or
// select instruction. If not, it will be a load of the original
// global.
if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
assert(LI->getOperand(0) == GV && "Not a copy!");
// Insert a new load, to preserve the saved value.
StoreVal = new LoadInst(NewGV->getValueType(), NewGV,
LI->getName() + ".b", false, Align(1),
LI->getOrdering(), LI->getSyncScopeID(), LI);
} else {
assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
"This is not a form that we understand!");
StoreVal = StoredVal->getOperand(0);
assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!");
}
}
StoreInst *NSI =
new StoreInst(StoreVal, NewGV, false, Align(1), SI->getOrdering(),
SI->getSyncScopeID(), SI);
NSI->setDebugLoc(SI->getDebugLoc());
} else {
// Change the load into a load of bool then a select.
LoadInst *LI = cast<LoadInst>(UI);
LoadInst *NLI = new LoadInst(NewGV->getValueType(), NewGV,
LI->getName() + ".b", false, Align(1),
LI->getOrdering(), LI->getSyncScopeID(), LI);
Instruction *NSI;
if (IsOneZero)
NSI = new ZExtInst(NLI, LI->getType(), "", LI);
else
NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI);
NSI->takeName(LI);
// Since LI is split into two instructions, NLI and NSI both inherit the
// same DebugLoc
NLI->setDebugLoc(LI->getDebugLoc());
NSI->setDebugLoc(LI->getDebugLoc());
LI->replaceAllUsesWith(NSI);
}
UI->eraseFromParent();
}
// Retain the name of the old global variable. People who are debugging their
// programs may expect these variables to be named the same.
NewGV->takeName(GV);
GV->eraseFromParent();
return true;
}
static bool
deleteIfDead(GlobalValue &GV,
SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats,
function_ref<void(Function &)> DeleteFnCallback = nullptr) {
GV.removeDeadConstantUsers();
if (!GV.isDiscardableIfUnused() && !GV.isDeclaration())
return false;
if (const Comdat *C = GV.getComdat())
if (!GV.hasLocalLinkage() && NotDiscardableComdats.count(C))
return false;
bool Dead;
if (auto *F = dyn_cast<Function>(&GV))
Dead = (F->isDeclaration() && F->use_empty()) || F->isDefTriviallyDead();
else
Dead = GV.use_empty();
if (!Dead)
return false;
LLVM_DEBUG(dbgs() << "GLOBAL DEAD: " << GV << "\n");
if (auto *F = dyn_cast<Function>(&GV)) {
if (DeleteFnCallback)
DeleteFnCallback(*F);
}
GV.eraseFromParent();
++NumDeleted;
return true;
}
static bool isPointerValueDeadOnEntryToFunction(
const Function *F, GlobalValue *GV,
function_ref<DominatorTree &(Function &)> LookupDomTree) {
// Find all uses of GV. We expect them all to be in F, and if we can't
// identify any of the uses we bail out.
//
// On each of these uses, identify if the memory that GV points to is
// used/required/live at the start of the function. If it is not, for example
// if the first thing the function does is store to the GV, the GV can
// possibly be demoted.
//
// We don't do an exhaustive search for memory operations - simply look
// through bitcasts as they're quite common and benign.
const DataLayout &DL = GV->getParent()->getDataLayout();
SmallVector<LoadInst *, 4> Loads;
SmallVector<StoreInst *, 4> Stores;
for (auto *U : GV->users()) {
if (Operator::getOpcode(U) == Instruction::BitCast) {
for (auto *UU : U->users()) {
if (auto *LI = dyn_cast<LoadInst>(UU))
Loads.push_back(LI);
else if (auto *SI = dyn_cast<StoreInst>(UU))
Stores.push_back(SI);
else
return false;
}
continue;
}
Instruction *I = dyn_cast<Instruction>(U);
if (!I)
return false;
assert(I->getParent()->getParent() == F);
if (auto *LI = dyn_cast<LoadInst>(I))
Loads.push_back(LI);
else if (auto *SI = dyn_cast<StoreInst>(I))
Stores.push_back(SI);
else
return false;
}
// We have identified all uses of GV into loads and stores. Now check if all
// of them are known not to depend on the value of the global at the function
// entry point. We do this by ensuring that every load is dominated by at
// least one store.
auto &DT = LookupDomTree(*const_cast<Function *>(F));
// The below check is quadratic. Check we're not going to do too many tests.
// FIXME: Even though this will always have worst-case quadratic time, we
// could put effort into minimizing the average time by putting stores that
// have been shown to dominate at least one load at the beginning of the
// Stores array, making subsequent dominance checks more likely to succeed
// early.
//
// The threshold here is fairly large because global->local demotion is a
// very powerful optimization should it fire.
const unsigned Threshold = 100;
if (Loads.size() * Stores.size() > Threshold)
return false;
for (auto *L : Loads) {
auto *LTy = L->getType();
if (none_of(Stores, [&](const StoreInst *S) {
auto *STy = S->getValueOperand()->getType();
// The load is only dominated by the store if DomTree says so
// and the number of bits loaded in L is less than or equal to
// the number of bits stored in S.
return DT.dominates(S, L) &&
DL.getTypeStoreSize(LTy).getFixedSize() <=
DL.getTypeStoreSize(STy).getFixedSize();
}))
return false;
}
// All loads have known dependences inside F, so the global can be localized.
return true;
}
/// C may have non-instruction users. Can all of those users be turned into
/// instructions?
static bool allNonInstructionUsersCanBeMadeInstructions(Constant *C) {
// We don't do this exhaustively. The most common pattern that we really need
// to care about is a constant GEP or constant bitcast - so just looking
// through one single ConstantExpr.
//
// The set of constants that this function returns true for must be able to be
// handled by makeAllConstantUsesInstructions.
for (auto *U : C->users()) {
if (isa<Instruction>(U))
continue;
if (!isa<ConstantExpr>(U))
// Non instruction, non-constantexpr user; cannot convert this.
return false;
for (auto *UU : U->users())
if (!isa<Instruction>(UU))
// A constantexpr used by another constant. We don't try and recurse any
// further but just bail out at this point.
return false;
}
return true;
}
/// C may have non-instruction users, and
/// allNonInstructionUsersCanBeMadeInstructions has returned true. Convert the
/// non-instruction users to instructions.
static void makeAllConstantUsesInstructions(Constant *C) {
SmallVector<ConstantExpr*,4> Users;
for (auto *U : C->users()) {
if (isa<ConstantExpr>(U))
Users.push_back(cast<ConstantExpr>(U));
else
// We should never get here; allNonInstructionUsersCanBeMadeInstructions
// should not have returned true for C.
assert(
isa<Instruction>(U) &&
"Can't transform non-constantexpr non-instruction to instruction!");
}
SmallVector<Value*,4> UUsers;
for (auto *U : Users) {
UUsers.clear();
append_range(UUsers, U->users());
for (auto *UU : UUsers) {
Instruction *UI = cast<Instruction>(UU);
Instruction *NewU = U->getAsInstruction(UI);
UI->replaceUsesOfWith(U, NewU);
}
// We've replaced all the uses, so destroy the constant. (destroyConstant
// will update value handles and metadata.)
U->destroyConstant();
}
}
// For a global variable with one store, if the store dominates any loads,
// those loads will always load the stored value (as opposed to the
// initializer), even in the presence of recursion.
static bool forwardStoredOnceStore(
GlobalVariable *GV, const StoreInst *StoredOnceStore,
function_ref<DominatorTree &(Function &)> LookupDomTree) {
const Value *StoredOnceValue = StoredOnceStore->getValueOperand();
// We can do this optimization for non-constants in nosync + norecurse
// functions, but globals used in exactly one norecurse functions are already
// promoted to an alloca.
if (!isa<Constant>(StoredOnceValue))
return false;
const Function *F = StoredOnceStore->getFunction();
SmallVector<LoadInst *> Loads;
for (User *U : GV->users()) {
if (auto *LI = dyn_cast<LoadInst>(U)) {
if (LI->getFunction() == F &&
LI->getType() == StoredOnceValue->getType() && LI->isSimple())
Loads.push_back(LI);
}
}
// Only compute DT if we have any loads to examine.
bool MadeChange = false;
if (!Loads.empty()) {
auto &DT = LookupDomTree(*const_cast<Function *>(F));
for (auto *LI : Loads) {
if (DT.dominates(StoredOnceStore, LI)) {
LI->replaceAllUsesWith(const_cast<Value *>(StoredOnceValue));
LI->eraseFromParent();
MadeChange = true;
}
}
}
return MadeChange;
}
/// Analyze the specified global variable and optimize
/// it if possible. If we make a change, return true.
static bool
processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
function_ref<TargetTransformInfo &(Function &)> GetTTI,
function_ref<TargetLibraryInfo &(Function &)> GetTLI,
function_ref<DominatorTree &(Function &)> LookupDomTree) {
auto &DL = GV->getParent()->getDataLayout();
// If this is a first class global and has only one accessing function and
// this function is non-recursive, we replace the global with a local alloca
// in this function.
//
// NOTE: It doesn't make sense to promote non-single-value types since we
// are just replacing static memory to stack memory.
//
// If the global is in different address space, don't bring it to stack.
if (!GS.HasMultipleAccessingFunctions &&
GS.AccessingFunction &&
GV->getValueType()->isSingleValueType() &&
GV->getType()->getAddressSpace() == 0 &&
!GV->isExternallyInitialized() &&
allNonInstructionUsersCanBeMadeInstructions(GV) &&
GS.AccessingFunction->doesNotRecurse() &&
isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV,
LookupDomTree)) {
const DataLayout &DL = GV->getParent()->getDataLayout();
LLVM_DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n");
Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
->getEntryBlock().begin());
Type *ElemTy = GV->getValueType();
// FIXME: Pass Global's alignment when globals have alignment
AllocaInst *Alloca = new AllocaInst(ElemTy, DL.getAllocaAddrSpace(), nullptr,
GV->getName(), &FirstI);
if (!isa<UndefValue>(GV->getInitializer()))
new StoreInst(GV->getInitializer(), Alloca, &FirstI);
makeAllConstantUsesInstructions(GV);
GV->replaceAllUsesWith(Alloca);
GV->eraseFromParent();
++NumLocalized;
return true;
}
bool Changed = false;
// If the global is never loaded (but may be stored to), it is dead.
// Delete it now.
if (!GS.IsLoaded) {
LLVM_DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n");
if (isLeakCheckerRoot(GV)) {
// Delete any constant stores to the global.
Changed = CleanupPointerRootUsers(GV, GetTLI);
} else {
// Delete any stores we can find to the global. We may not be able to
// make it completely dead though.
Changed = CleanupConstantGlobalUsers(GV, DL);
}
// If the global is dead now, delete it.
if (GV->use_empty()) {
GV->eraseFromParent();
++NumDeleted;
Changed = true;
}
return Changed;
}
if (GS.StoredType <= GlobalStatus::InitializerStored) {
LLVM_DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n");
// Don't actually mark a global constant if it's atomic because atomic loads
// are implemented by a trivial cmpxchg in some edge-cases and that usually
// requires write access to the variable even if it's not actually changed.
if (GS.Ordering == AtomicOrdering::NotAtomic) {
assert(!GV->isConstant() && "Expected a non-constant global");
GV->setConstant(true);
Changed = true;
}
// Clean up any obviously simplifiable users now.
Changed |= CleanupConstantGlobalUsers(GV, DL);
// If the global is dead now, just nuke it.
if (GV->use_empty()) {
LLVM_DEBUG(dbgs() << " *** Marking constant allowed us to simplify "
<< "all users and delete global!\n");
GV->eraseFromParent();
++NumDeleted;
return true;
}
// Fall through to the next check; see if we can optimize further.
++NumMarked;
}
if (!GV->getInitializer()->getType()->isSingleValueType()) {
const DataLayout &DL = GV->getParent()->getDataLayout();
if (SRAGlobal(GV, DL))
return true;
}
Value *StoredOnceValue = GS.getStoredOnceValue();
if (GS.StoredType == GlobalStatus::StoredOnce && StoredOnceValue) {
Function &StoreFn =
const_cast<Function &>(*GS.StoredOnceStore->getFunction());
bool CanHaveNonUndefGlobalInitializer =
GetTTI(StoreFn).canHaveNonUndefGlobalInitializerInAddressSpace(
GV->getType()->getAddressSpace());
// If the initial value for the global was an undef value, and if only
// one other value was stored into it, we can just change the
// initializer to be the stored value, then delete all stores to the
// global. This allows us to mark it constant.
// This is restricted to address spaces that allow globals to have
// initializers. NVPTX, for example, does not support initializers for
// shared memory (AS 3).
auto *SOVConstant = dyn_cast<Constant>(StoredOnceValue);
if (SOVConstant && isa<UndefValue>(GV->getInitializer()) &&
DL.getTypeAllocSize(SOVConstant->getType()) ==
DL.getTypeAllocSize(GV->getValueType()) &&
CanHaveNonUndefGlobalInitializer) {
if (SOVConstant->getType() == GV->getValueType()) {
// Change the initializer in place.
GV->setInitializer(SOVConstant);
} else {
// Create a new global with adjusted type.
auto *NGV = new GlobalVariable(
*GV->getParent(), SOVConstant->getType(), GV->isConstant(),
GV->getLinkage(), SOVConstant, "", GV, GV->getThreadLocalMode(),
GV->getAddressSpace());
NGV->takeName(GV);
NGV->copyAttributesFrom(GV);
GV->replaceAllUsesWith(ConstantExpr::getBitCast(NGV, GV->getType()));
GV->eraseFromParent();
GV = NGV;
}
// Clean up any obviously simplifiable users now.
CleanupConstantGlobalUsers(GV, DL);
if (GV->use_empty()) {
LLVM_DEBUG(dbgs() << " *** Substituting initializer allowed us to "
<< "simplify all users and delete global!\n");
GV->eraseFromParent();
++NumDeleted;
}
++NumSubstitute;
return true;
}
// Try to optimize globals based on the knowledge that only one value
// (besides its initializer) is ever stored to the global.
if (optimizeOnceStoredGlobal(GV, StoredOnceValue, DL, GetTLI))
return true;
// Try to forward the store to any loads. If we have more than one store, we
// may have a store of the initializer between StoredOnceStore and a load.
if (GS.NumStores == 1)
if (forwardStoredOnceStore(GV, GS.StoredOnceStore, LookupDomTree))
return true;
// Otherwise, if the global was not a boolean, we can shrink it to be a
// boolean. Skip this optimization for AS that doesn't allow an initializer.
if (SOVConstant && GS.Ordering == AtomicOrdering::NotAtomic &&
(!isa<UndefValue>(GV->getInitializer()) ||
CanHaveNonUndefGlobalInitializer)) {
if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
++NumShrunkToBool;
return true;
}
}
}
return Changed;
}
/// Analyze the specified global variable and optimize it if possible. If we
/// make a change, return true.
static bool
processGlobal(GlobalValue &GV,
function_ref<TargetTransformInfo &(Function &)> GetTTI,
function_ref<TargetLibraryInfo &(Function &)> GetTLI,
function_ref<DominatorTree &(Function &)> LookupDomTree) {
if (GV.getName().startswith("llvm."))
return false;
GlobalStatus GS;
if (GlobalStatus::analyzeGlobal(&GV, GS))
return false;
bool Changed = false;
if (!GS.IsCompared && !GV.hasGlobalUnnamedAddr()) {
auto NewUnnamedAddr = GV.hasLocalLinkage() ? GlobalValue::UnnamedAddr::Global
: GlobalValue::UnnamedAddr::Local;
if (NewUnnamedAddr != GV.getUnnamedAddr()) {
GV.setUnnamedAddr(NewUnnamedAddr);
NumUnnamed++;
Changed = true;
}
}
// Do more involved optimizations if the global is internal.
if (!GV.hasLocalLinkage())
return Changed;
auto *GVar = dyn_cast<GlobalVariable>(&GV);
if (!GVar)
return Changed;
if (GVar->isConstant() || !GVar->hasInitializer())
return Changed;
return processInternalGlobal(GVar, GS, GetTTI, GetTLI, LookupDomTree) ||
Changed;
}
/// Walk all of the direct calls of the specified function, changing them to
/// FastCC.
static void ChangeCalleesToFastCall(Function *F) {
for (User *U : F->users()) {
if (isa<BlockAddress>(U))
continue;
cast<CallBase>(U)->setCallingConv(CallingConv::Fast);
}
}
static AttributeList StripAttr(LLVMContext &C, AttributeList Attrs,
Attribute::AttrKind A) {
unsigned AttrIndex;
if (Attrs.hasAttrSomewhere(A, &AttrIndex))
return Attrs.removeAttributeAtIndex(C, AttrIndex, A);
return Attrs;
}
static void RemoveAttribute(Function *F, Attribute::AttrKind A) {
F->setAttributes(StripAttr(F->getContext(), F->getAttributes(), A));
for (User *U : F->users()) {
if (isa<BlockAddress>(U))
continue;
CallBase *CB = cast<CallBase>(U);
CB->setAttributes(StripAttr(F->getContext(), CB->getAttributes(), A));
}
}
/// Return true if this is a calling convention that we'd like to change. The
/// idea here is that we don't want to mess with the convention if the user
/// explicitly requested something with performance implications like coldcc,
/// GHC, or anyregcc.
static bool hasChangeableCC(Function *F) {
CallingConv::ID CC = F->getCallingConv();
// FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc?
if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall)
return false;
// FIXME: Change CC for the whole chain of musttail calls when possible.
//
// Can't change CC of the function that either has musttail calls, or is a
// musttail callee itself
for (User *U : F->users()) {
if (isa<BlockAddress>(U))
continue;
CallInst* CI = dyn_cast<CallInst>(U);
if (!CI)
continue;
if (CI->isMustTailCall())
return false;
}
for (BasicBlock &BB : *F)
if (BB.getTerminatingMustTailCall())
return false;
return true;
}
/// Return true if the block containing the call site has a BlockFrequency of
/// less than ColdCCRelFreq% of the entry block.
static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI) {
const BranchProbability ColdProb(ColdCCRelFreq, 100);
auto *CallSiteBB = CB.getParent();
auto CallSiteFreq = CallerBFI.getBlockFreq(CallSiteBB);
auto CallerEntryFreq =
CallerBFI.getBlockFreq(&(CB.getCaller()->getEntryBlock()));
return CallSiteFreq < CallerEntryFreq * ColdProb;
}
// This function checks if the input function F is cold at all call sites. It
// also looks each call site's containing function, returning false if the
// caller function contains other non cold calls. The input vector AllCallsCold
// contains a list of functions that only have call sites in cold blocks.
static bool
isValidCandidateForColdCC(Function &F,
function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
const std::vector<Function *> &AllCallsCold) {
if (F.user_empty())
return false;
for (User *U : F.users()) {
if (isa<BlockAddress>(U))
continue;
CallBase &CB = cast<CallBase>(*U);
Function *CallerFunc = CB.getParent()->getParent();
BlockFrequencyInfo &CallerBFI = GetBFI(*CallerFunc);
if (!isColdCallSite(CB, CallerBFI))
return false;
if (!llvm::is_contained(AllCallsCold, CallerFunc))
return false;
}
return true;
}
static void changeCallSitesToColdCC(Function *F) {
for (User *U : F->users()) {
if (isa<BlockAddress>(U))
continue;
cast<CallBase>(U)->setCallingConv(CallingConv::Cold);
}
}
// This function iterates over all the call instructions in the input Function
// and checks that all call sites are in cold blocks and are allowed to use the
// coldcc calling convention.
static bool
hasOnlyColdCalls(Function &F,
function_ref<BlockFrequencyInfo &(Function &)> GetBFI) {
for (BasicBlock &BB : F) {
for (Instruction &I : BB) {
if (CallInst *CI = dyn_cast<CallInst>(&I)) {
// Skip over isline asm instructions since they aren't function calls.
if (CI->isInlineAsm())
continue;
Function *CalledFn = CI->getCalledFunction();
if (!CalledFn)
return false;
if (!CalledFn->hasLocalLinkage())
return false;
// Skip over intrinsics since they won't remain as function calls.
if (CalledFn->getIntrinsicID() != Intrinsic::not_intrinsic)
continue;
// Check if it's valid to use coldcc calling convention.
if (!hasChangeableCC(CalledFn) || CalledFn->isVarArg() ||
CalledFn->hasAddressTaken())
return false;
BlockFrequencyInfo &CallerBFI = GetBFI(F);
if (!isColdCallSite(*CI, CallerBFI))
return false;
}
}
}
return true;
}
static bool hasMustTailCallers(Function *F) {
for (User *U : F->users()) {
CallBase *CB = dyn_cast<CallBase>(U);
if (!CB) {
assert(isa<BlockAddress>(U) &&
"Expected either CallBase or BlockAddress");
continue;
}
if (CB->isMustTailCall())
return true;
}
return false;
}
static bool hasInvokeCallers(Function *F) {
for (User *U : F->users())
if (isa<InvokeInst>(U))
return true;
return false;
}
static void RemovePreallocated(Function *F) {
RemoveAttribute(F, Attribute::Preallocated);
auto *M = F->getParent();
IRBuilder<> Builder(M->getContext());
// Cannot modify users() while iterating over it, so make a copy.
SmallVector<User *, 4> PreallocatedCalls(F->users());
for (User *U : PreallocatedCalls) {
CallBase *CB = dyn_cast<CallBase>(U);
if (!CB)
continue;
assert(
!CB->isMustTailCall() &&
"Shouldn't call RemotePreallocated() on a musttail preallocated call");
// Create copy of call without "preallocated" operand bundle.
SmallVector<OperandBundleDef, 1> OpBundles;
CB->getOperandBundlesAsDefs(OpBundles);
CallBase *PreallocatedSetup = nullptr;
for (auto *It = OpBundles.begin(); It != OpBundles.end(); ++It) {
if (It->getTag() == "preallocated") {
PreallocatedSetup = cast<CallBase>(*It->input_begin());
OpBundles.erase(It);
break;
}
}
assert(PreallocatedSetup && "Did not find preallocated bundle");
uint64_t ArgCount =
cast<ConstantInt>(PreallocatedSetup->getArgOperand(0))->getZExtValue();
assert((isa<CallInst>(CB) || isa<InvokeInst>(CB)) &&
"Unknown indirect call type");
CallBase *NewCB = CallBase::Create(CB, OpBundles, CB);
CB->replaceAllUsesWith(NewCB);
NewCB->takeName(CB);
CB->eraseFromParent();
Builder.SetInsertPoint(PreallocatedSetup);
auto *StackSave =
Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stacksave));
Builder.SetInsertPoint(NewCB->getNextNonDebugInstruction());
Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stackrestore),
StackSave);
// Replace @llvm.call.preallocated.arg() with alloca.
// Cannot modify users() while iterating over it, so make a copy.
// @llvm.call.preallocated.arg() can be called with the same index multiple
// times. So for each @llvm.call.preallocated.arg(), we see if we have
// already created a Value* for the index, and if not, create an alloca and
// bitcast right after the @llvm.call.preallocated.setup() so that it
// dominates all uses.
SmallVector<Value *, 2> ArgAllocas(ArgCount);
SmallVector<User *, 2> PreallocatedArgs(PreallocatedSetup->users());
for (auto *User : PreallocatedArgs) {
auto *UseCall = cast<CallBase>(User);
assert(UseCall->getCalledFunction()->getIntrinsicID() ==
Intrinsic::call_preallocated_arg &&
"preallocated token use was not a llvm.call.preallocated.arg");
uint64_t AllocArgIndex =
cast<ConstantInt>(UseCall->getArgOperand(1))->getZExtValue();
Value *AllocaReplacement = ArgAllocas[AllocArgIndex];
if (!AllocaReplacement) {
auto AddressSpace = UseCall->getType()->getPointerAddressSpace();
auto *ArgType =
UseCall->getFnAttr(Attribute::Preallocated).getValueAsType();
auto *InsertBefore = PreallocatedSetup->getNextNonDebugInstruction();
Builder.SetInsertPoint(InsertBefore);
auto *Alloca =
Builder.CreateAlloca(ArgType, AddressSpace, nullptr, "paarg");
auto *BitCast = Builder.CreateBitCast(
Alloca, Type::getInt8PtrTy(M->getContext()), UseCall->getName());
ArgAllocas[AllocArgIndex] = BitCast;
AllocaReplacement = BitCast;
}
UseCall->replaceAllUsesWith(AllocaReplacement);
UseCall->eraseFromParent();
}
// Remove @llvm.call.preallocated.setup().
cast<Instruction>(PreallocatedSetup)->eraseFromParent();
}
}
static bool
OptimizeFunctions(Module &M,
function_ref<TargetLibraryInfo &(Function &)> GetTLI,
function_ref<TargetTransformInfo &(Function &)> GetTTI,
function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
function_ref<DominatorTree &(Function &)> LookupDomTree,
SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats,
function_ref<void(Function &F)> ChangedCFGCallback,
function_ref<void(Function &F)> DeleteFnCallback) {
bool Changed = false;
std::vector<Function *> AllCallsCold;
for (Function &F : llvm::make_early_inc_range(M))
if (hasOnlyColdCalls(F, GetBFI))
AllCallsCold.push_back(&F);
// Optimize functions.
for (Function &F : llvm::make_early_inc_range(M)) {
// Don't perform global opt pass on naked functions; we don't want fast
// calling conventions for naked functions.
if (F.hasFnAttribute(Attribute::Naked))
continue;
// Functions without names cannot be referenced outside this module.
if (!F.hasName() && !F.isDeclaration() && !F.hasLocalLinkage())
F.setLinkage(GlobalValue::InternalLinkage);
if (deleteIfDead(F, NotDiscardableComdats, DeleteFnCallback)) {
Changed = true;
continue;
}
// LLVM's definition of dominance allows instructions that are cyclic
// in unreachable blocks, e.g.:
// %pat = select i1 %condition, @global, i16* %pat
// because any instruction dominates an instruction in a block that's
// not reachable from entry.
// So, remove unreachable blocks from the function, because a) there's
// no point in analyzing them and b) GlobalOpt should otherwise grow
// some more complicated logic to break these cycles.
// Notify the analysis manager that we've modified the function's CFG.
if (!F.isDeclaration()) {
if (removeUnreachableBlocks(F)) {
Changed = true;
ChangedCFGCallback(F);
}
}
Changed |= processGlobal(F, GetTTI, GetTLI, LookupDomTree);
if (!F.hasLocalLinkage())
continue;
// If we have an inalloca parameter that we can safely remove the
// inalloca attribute from, do so. This unlocks optimizations that
// wouldn't be safe in the presence of inalloca.
// FIXME: We should also hoist alloca affected by this to the entry
// block if possible.
if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) &&
- !F.hasAddressTaken() && !hasMustTailCallers(&F)) {
+ !F.hasAddressTaken() && !hasMustTailCallers(&F) && !F.isVarArg()) {
RemoveAttribute(&F, Attribute::InAlloca);
Changed = true;
}
// FIXME: handle invokes
// FIXME: handle musttail
if (F.getAttributes().hasAttrSomewhere(Attribute::Preallocated)) {
if (!F.hasAddressTaken() && !hasMustTailCallers(&F) &&
!hasInvokeCallers(&F)) {
RemovePreallocated(&F);
Changed = true;
}
continue;
}
if (hasChangeableCC(&F) && !F.isVarArg() && !F.hasAddressTaken()) {
NumInternalFunc++;
TargetTransformInfo &TTI = GetTTI(F);
// Change the calling convention to coldcc if either stress testing is
// enabled or the target would like to use coldcc on functions which are
// cold at all call sites and the callers contain no other non coldcc
// calls.
if (EnableColdCCStressTest ||
(TTI.useColdCCForColdCall(F) &&
isValidCandidateForColdCC(F, GetBFI, AllCallsCold))) {
F.setCallingConv(CallingConv::Cold);
changeCallSitesToColdCC(&F);
Changed = true;
NumColdCC++;
}
}
if (hasChangeableCC(&F) && !F.isVarArg() && !F.hasAddressTaken()) {
// If this function has a calling convention worth changing, is not a
// varargs function, and is only called directly, promote it to use the
// Fast calling convention.
F.setCallingConv(CallingConv::Fast);
ChangeCalleesToFastCall(&F);
++NumFastCallFns;
Changed = true;
}
if (F.getAttributes().hasAttrSomewhere(Attribute::Nest) &&
!F.hasAddressTaken()) {
// The function is not used by a trampoline intrinsic, so it is safe
// to remove the 'nest' attribute.
RemoveAttribute(&F, Attribute::Nest);
++NumNestRemoved;
Changed = true;
}
}
return Changed;
}
static bool
OptimizeGlobalVars(Module &M,
function_ref<TargetTransformInfo &(Function &)> GetTTI,
function_ref<TargetLibraryInfo &(Function &)> GetTLI,
function_ref<DominatorTree &(Function &)> LookupDomTree,
SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
bool Changed = false;
for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) {
// Global variables without names cannot be referenced outside this module.
if (!GV.hasName() && !GV.isDeclaration() && !GV.hasLocalLinkage())
GV.setLinkage(GlobalValue::InternalLinkage);
// Simplify the initializer.
if (GV.hasInitializer())
if (auto *C = dyn_cast<Constant>(GV.getInitializer())) {
auto &DL = M.getDataLayout();
// TLI is not used in the case of a Constant, so use default nullptr
// for that optional parameter, since we don't have a Function to
// provide GetTLI anyway.
Constant *New = ConstantFoldConstant(C, DL, /*TLI*/ nullptr);
if (New != C)
GV.setInitializer(New);
}
if (deleteIfDead(GV, NotDiscardableComdats)) {
Changed = true;
continue;
}
Changed |= processGlobal(GV, GetTTI, GetTLI, LookupDomTree);
}
return Changed;
}
/// Evaluate static constructors in the function, if we can. Return true if we
/// can, false otherwise.
static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL,
TargetLibraryInfo *TLI) {
// Skip external functions.
if (F->isDeclaration())
return false;
// Call the function.
Evaluator Eval(DL, TLI);
Constant *RetValDummy;
bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy,
SmallVector<Constant*, 0>());
if (EvalSuccess) {
++NumCtorsEvaluated;
// We succeeded at evaluation: commit the result.
auto NewInitializers = Eval.getMutatedInitializers();
LLVM_DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
<< F->getName() << "' to " << NewInitializers.size()
<< " stores.\n");
for (const auto &Pair : NewInitializers)
Pair.first->setInitializer(Pair.second);
for (GlobalVariable *GV : Eval.getInvariants())
GV->setConstant(true);
}
return EvalSuccess;
}
static int compareNames(Constant *const *A, Constant *const *B) {
Value *AStripped = (*A)->stripPointerCasts();
Value *BStripped = (*B)->stripPointerCasts();
return AStripped->getName().compare(BStripped->getName());
}
static void setUsedInitializer(GlobalVariable &V,
const SmallPtrSetImpl<GlobalValue *> &Init) {
if (Init.empty()) {
V.eraseFromParent();
return;
}
// Type of pointer to the array of pointers.
PointerType *Int8PtrTy = Type::getInt8PtrTy(V.getContext(), 0);
SmallVector<Constant *, 8> UsedArray;
for (GlobalValue *GV : Init) {
Constant *Cast
= ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, Int8PtrTy);
UsedArray.push_back(Cast);
}
// Sort to get deterministic order.
array_pod_sort(UsedArray.begin(), UsedArray.end(), compareNames);
ArrayType *ATy = ArrayType::get(Int8PtrTy, UsedArray.size());
Module *M = V.getParent();
V.removeFromParent();
GlobalVariable *NV =
new GlobalVariable(*M, ATy, false, GlobalValue::AppendingLinkage,
ConstantArray::get(ATy, UsedArray), "");
NV->takeName(&V);
NV->setSection("llvm.metadata");
delete &V;
}
namespace {
/// An easy to access representation of llvm.used and llvm.compiler.used.
class LLVMUsed {
SmallPtrSet<GlobalValue *, 4> Used;
SmallPtrSet<GlobalValue *, 4> CompilerUsed;
GlobalVariable *UsedV;
GlobalVariable *CompilerUsedV;
public:
LLVMUsed(Module &M) {
SmallVector<GlobalValue *, 4> Vec;
UsedV = collectUsedGlobalVariables(M, Vec, false);
Used = {Vec.begin(), Vec.end()};
Vec.clear();
CompilerUsedV = collectUsedGlobalVariables(M, Vec, true);
CompilerUsed = {Vec.begin(), Vec.end()};
}
using iterator = SmallPtrSet<GlobalValue *, 4>::iterator;
using used_iterator_range = iterator_range<iterator>;
iterator usedBegin() { return Used.begin(); }
iterator usedEnd() { return Used.end(); }
used_iterator_range used() {
return used_iterator_range(usedBegin(), usedEnd());
}
iterator compilerUsedBegin() { return CompilerUsed.begin(); }
iterator compilerUsedEnd() { return CompilerUsed.end(); }
used_iterator_range compilerUsed() {
return used_iterator_range(compilerUsedBegin(), compilerUsedEnd());
}
bool usedCount(GlobalValue *GV) const { return Used.count(GV); }
bool compilerUsedCount(GlobalValue *GV) const {
return CompilerUsed.count(GV);
}
bool usedErase(GlobalValue *GV) { return Used.erase(GV); }
bool compilerUsedErase(GlobalValue *GV) { return CompilerUsed.erase(GV); }
bool usedInsert(GlobalValue *GV) { return Used.insert(GV).second; }
bool compilerUsedInsert(GlobalValue *GV) {
return CompilerUsed.insert(GV).second;
}
void syncVariablesAndSets() {
if (UsedV)
setUsedInitializer(*UsedV, Used);
if (CompilerUsedV)
setUsedInitializer(*CompilerUsedV, CompilerUsed);
}
};
} // end anonymous namespace
static bool hasUseOtherThanLLVMUsed(GlobalAlias &GA, const LLVMUsed &U) {
if (GA.use_empty()) // No use at all.
return false;
assert((!U.usedCount(&GA) || !U.compilerUsedCount(&GA)) &&
"We should have removed the duplicated "
"element from llvm.compiler.used");
if (!GA.hasOneUse())
// Strictly more than one use. So at least one is not in llvm.used and
// llvm.compiler.used.
return true;
// Exactly one use. Check if it is in llvm.used or llvm.compiler.used.
return !U.usedCount(&GA) && !U.compilerUsedCount(&GA);
}
static bool hasMoreThanOneUseOtherThanLLVMUsed(GlobalValue &V,
const LLVMUsed &U) {
unsigned N = 2;
assert((!U.usedCount(&V) || !U.compilerUsedCount(&V)) &&
"We should have removed the duplicated "
"element from llvm.compiler.used");
if (U.usedCount(&V) || U.compilerUsedCount(&V))
++N;
return V.hasNUsesOrMore(N);
}
static bool mayHaveOtherReferences(GlobalAlias &GA, const LLVMUsed &U) {
if (!GA.hasLocalLinkage())
return true;
return U.usedCount(&GA) || U.compilerUsedCount(&GA);
}
static bool hasUsesToReplace(GlobalAlias &GA, const LLVMUsed &U,
bool &RenameTarget) {
RenameTarget = false;
bool Ret = false;
if (hasUseOtherThanLLVMUsed(GA, U))
Ret = true;
// If the alias is externally visible, we may still be able to simplify it.
if (!mayHaveOtherReferences(GA, U))
return Ret;
// If the aliasee has internal linkage, give it the name and linkage
// of the alias, and delete the alias. This turns:
// define internal ... @f(...)
// @a = alias ... @f
// into:
// define ... @a(...)
Constant *Aliasee = GA.getAliasee();
GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
if (!Target->hasLocalLinkage())
return Ret;
// Do not perform the transform if multiple aliases potentially target the
// aliasee. This check also ensures that it is safe to replace the section
// and other attributes of the aliasee with those of the alias.
if (hasMoreThanOneUseOtherThanLLVMUsed(*Target, U))
return Ret;
RenameTarget = true;
return true;
}
static bool
OptimizeGlobalAliases(Module &M,
SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
bool Changed = false;
LLVMUsed Used(M);
for (GlobalValue *GV : Used.used())
Used.compilerUsedErase(GV);
// Return whether GV is explicitly or implicitly dso_local and not replaceable
// by another definition in the current linkage unit.
auto IsModuleLocal = [](GlobalValue &GV) {
return !GlobalValue::isInterposableLinkage(GV.getLinkage()) &&
(GV.isDSOLocal() || GV.isImplicitDSOLocal());
};
for (GlobalAlias &J : llvm::make_early_inc_range(M.aliases())) {
// Aliases without names cannot be referenced outside this module.
if (!J.hasName() && !J.isDeclaration() && !J.hasLocalLinkage())
J.setLinkage(GlobalValue::InternalLinkage);
if (deleteIfDead(J, NotDiscardableComdats)) {
Changed = true;
continue;
}
// If the alias can change at link time, nothing can be done - bail out.
if (!IsModuleLocal(J))
continue;
Constant *Aliasee = J.getAliasee();
GlobalValue *Target = dyn_cast<GlobalValue>(Aliasee->stripPointerCasts());
// We can't trivially replace the alias with the aliasee if the aliasee is
// non-trivial in some way. We also can't replace the alias with the aliasee
// if the aliasee may be preemptible at runtime. On ELF, a non-preemptible
// alias can be used to access the definition as if preemption did not
// happen.
// TODO: Try to handle non-zero GEPs of local aliasees.
if (!Target || !IsModuleLocal(*Target))
continue;
Target->removeDeadConstantUsers();
// Make all users of the alias use the aliasee instead.
bool RenameTarget;
if (!hasUsesToReplace(J, Used, RenameTarget))
continue;
J.replaceAllUsesWith(ConstantExpr::getBitCast(Aliasee, J.getType()));
++NumAliasesResolved;
Changed = true;
if (RenameTarget) {
// Give the aliasee the name, linkage and other attributes of the alias.
Target->takeName(&J);
Target->setLinkage(J.getLinkage());
Target->setDSOLocal(J.isDSOLocal());
Target->setVisibility(J.getVisibility());
Target->setDLLStorageClass(J.getDLLStorageClass());
if (Used.usedErase(&J))
Used.usedInsert(Target);
if (Used.compilerUsedErase(&J))
Used.compilerUsedInsert(Target);
} else if (mayHaveOtherReferences(J, Used))
continue;
// Delete the alias.
M.getAliasList().erase(&J);
++NumAliasesRemoved;
Changed = true;
}
Used.syncVariablesAndSets();
return Changed;
}
static Function *
FindCXAAtExit(Module &M, function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
// Hack to get a default TLI before we have actual Function.
auto FuncIter = M.begin();
if (FuncIter == M.end())
return nullptr;
auto *TLI = &GetTLI(*FuncIter);
LibFunc F = LibFunc_cxa_atexit;
if (!TLI->has(F))
return nullptr;
Function *Fn = M.getFunction(TLI->getName(F));
if (!Fn)
return nullptr;
// Now get the actual TLI for Fn.
TLI = &GetTLI(*Fn);
// Make sure that the function has the correct prototype.
if (!TLI->getLibFunc(*Fn, F) || F != LibFunc_cxa_atexit)
return nullptr;
return Fn;
}
/// Returns whether the given function is an empty C++ destructor and can
/// therefore be eliminated.
/// Note that we assume that other optimization passes have already simplified
/// the code so we simply check for 'ret'.
static bool cxxDtorIsEmpty(const Function &Fn) {
// FIXME: We could eliminate C++ destructors if they're readonly/readnone and
// nounwind, but that doesn't seem worth doing.
if (Fn.isDeclaration())
return false;
for (auto &I : Fn.getEntryBlock()) {
if (I.isDebugOrPseudoInst())
continue;
if (isa<ReturnInst>(I))
return true;
break;
}
return false;
}
static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
/// Itanium C++ ABI p3.3.5:
///
/// After constructing a global (or local static) object, that will require
/// destruction on exit, a termination function is registered as follows:
///
/// extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d );
///
/// This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the
/// call f(p) when DSO d is unloaded, before all such termination calls
/// registered before this one. It returns zero if registration is
/// successful, nonzero on failure.
// This pass will look for calls to __cxa_atexit where the function is trivial
// and remove them.
bool Changed = false;
for (User *U : llvm::make_early_inc_range(CXAAtExitFn->users())) {
// We're only interested in calls. Theoretically, we could handle invoke
// instructions as well, but neither llvm-gcc nor clang generate invokes
// to __cxa_atexit.
CallInst *CI = dyn_cast<CallInst>(U);
if (!CI)
continue;
Function *DtorFn =
dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts());
if (!DtorFn || !cxxDtorIsEmpty(*DtorFn))
continue;
// Just remove the call.
CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
CI->eraseFromParent();
++NumCXXDtorsRemoved;
Changed |= true;
}
return Changed;
}
static bool
optimizeGlobalsInModule(Module &M, const DataLayout &DL,
function_ref<TargetLibraryInfo &(Function &)> GetTLI,
function_ref<TargetTransformInfo &(Function &)> GetTTI,
function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
function_ref<DominatorTree &(Function &)> LookupDomTree,
function_ref<void(Function &F)> ChangedCFGCallback,
function_ref<void(Function &F)> DeleteFnCallback) {
SmallPtrSet<const Comdat *, 8> NotDiscardableComdats;
bool Changed = false;
bool LocalChange = true;
Optional<uint32_t> FirstNotFullyEvaluatedPriority;
while (LocalChange) {
LocalChange = false;
NotDiscardableComdats.clear();
for (const GlobalVariable &GV : M.globals())
if (const Comdat *C = GV.getComdat())
if (!GV.isDiscardableIfUnused() || !GV.use_empty())
NotDiscardableComdats.insert(C);
for (Function &F : M)
if (const Comdat *C = F.getComdat())
if (!F.isDefTriviallyDead())
NotDiscardableComdats.insert(C);
for (GlobalAlias &GA : M.aliases())
if (const Comdat *C = GA.getComdat())
if (!GA.isDiscardableIfUnused() || !GA.use_empty())
NotDiscardableComdats.insert(C);
// Delete functions that are trivially dead, ccc -> fastcc
LocalChange |= OptimizeFunctions(M, GetTLI, GetTTI, GetBFI, LookupDomTree,
NotDiscardableComdats, ChangedCFGCallback,
DeleteFnCallback);
// Optimize global_ctors list.
LocalChange |=
optimizeGlobalCtorsList(M, [&](uint32_t Priority, Function *F) {
if (FirstNotFullyEvaluatedPriority &&
*FirstNotFullyEvaluatedPriority != Priority)
return false;
bool Evaluated = EvaluateStaticConstructor(F, DL, &GetTLI(*F));
if (!Evaluated)
FirstNotFullyEvaluatedPriority = Priority;
return Evaluated;
});
// Optimize non-address-taken globals.
LocalChange |= OptimizeGlobalVars(M, GetTTI, GetTLI, LookupDomTree,
NotDiscardableComdats);
// Resolve aliases, when possible.
LocalChange |= OptimizeGlobalAliases(M, NotDiscardableComdats);
// Try to remove trivial global destructors if they are not removed
// already.
Function *CXAAtExitFn = FindCXAAtExit(M, GetTLI);
if (CXAAtExitFn)
LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn);
Changed |= LocalChange;
}
// TODO: Move all global ctors functions to the end of the module for code
// layout.
return Changed;
}
PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) {
auto &DL = M.getDataLayout();
auto &FAM =
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
auto LookupDomTree = [&FAM](Function &F) -> DominatorTree &{
return FAM.getResult<DominatorTreeAnalysis>(F);
};
auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
return FAM.getResult<TargetLibraryAnalysis>(F);
};
auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
return FAM.getResult<TargetIRAnalysis>(F);
};
auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
return FAM.getResult<BlockFrequencyAnalysis>(F);
};
auto ChangedCFGCallback = [&FAM](Function &F) {
FAM.invalidate(F, PreservedAnalyses::none());
};
auto DeleteFnCallback = [&FAM](Function &F) { FAM.clear(F, F.getName()); };
if (!optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, LookupDomTree,
ChangedCFGCallback, DeleteFnCallback))
return PreservedAnalyses::all();
PreservedAnalyses PA = PreservedAnalyses::none();
// We made sure to clear analyses for deleted functions.
PA.preserve<FunctionAnalysisManagerModuleProxy>();
// The only place we modify the CFG is when calling
// removeUnreachableBlocks(), but there we make sure to invalidate analyses
// for modified functions.
PA.preserveSet<CFGAnalyses>();
return PA;
}
namespace {
struct GlobalOptLegacyPass : public ModulePass {
static char ID; // Pass identification, replacement for typeid
GlobalOptLegacyPass() : ModulePass(ID) {
initializeGlobalOptLegacyPassPass(*PassRegistry::getPassRegistry());
}
bool runOnModule(Module &M) override {
if (skipModule(M))
return false;
auto &DL = M.getDataLayout();
auto LookupDomTree = [this](Function &F) -> DominatorTree & {
return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
};
auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
};
auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
};
auto GetBFI = [this](Function &F) -> BlockFrequencyInfo & {
return this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
};
auto ChangedCFGCallback = [&LookupDomTree](Function &F) {
auto &DT = LookupDomTree(F);
DT.recalculate(F);
};
return optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, LookupDomTree,
ChangedCFGCallback, nullptr);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<BlockFrequencyInfoWrapperPass>();
}
};
} // end anonymous namespace
char GlobalOptLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(GlobalOptLegacyPass, "globalopt",
"Global Variable Optimizer", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(GlobalOptLegacyPass, "globalopt",
"Global Variable Optimizer", false, false)
ModulePass *llvm::createGlobalOptimizerPass() {
return new GlobalOptLegacyPass();
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index bc01d2ef7fe2..52596b30494f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1,3632 +1,3636 @@
//===- InstCombineCalls.cpp -----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the visitCall, visitInvoke, and visitCallBr functions.
//
//===----------------------------------------------------------------------===//
#include "InstCombineInternal.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumeBundleQueries.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/IntrinsicsHexagon.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Statepoint.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <utility>
#include <vector>
#define DEBUG_TYPE "instcombine"
#include "llvm/Transforms/Utils/InstructionWorklist.h"
using namespace llvm;
using namespace PatternMatch;
STATISTIC(NumSimplified, "Number of library calls simplified");
static cl::opt<unsigned> GuardWideningWindow(
"instcombine-guard-widening-window",
cl::init(3),
cl::desc("How wide an instruction window to bypass looking for "
"another guard"));
namespace llvm {
/// enable preservation of attributes in assume like:
/// call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
extern cl::opt<bool> EnableKnowledgeRetention;
} // namespace llvm
/// Return the specified type promoted as it would be to pass though a va_arg
/// area.
static Type *getPromotedType(Type *Ty) {
if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
if (ITy->getBitWidth() < 32)
return Type::getInt32Ty(Ty->getContext());
}
return Ty;
}
/// Recognize a memcpy/memmove from a trivially otherwise unused alloca.
/// TODO: This should probably be integrated with visitAllocSites, but that
/// requires a deeper change to allow either unread or unwritten objects.
static bool hasUndefSource(AnyMemTransferInst *MI) {
auto *Src = MI->getRawSource();
while (isa<GetElementPtrInst>(Src) || isa<BitCastInst>(Src)) {
if (!Src->hasOneUse())
return false;
Src = cast<Instruction>(Src)->getOperand(0);
}
return isa<AllocaInst>(Src) && Src->hasOneUse();
}
Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
Align DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT);
MaybeAlign CopyDstAlign = MI->getDestAlign();
if (!CopyDstAlign || *CopyDstAlign < DstAlign) {
MI->setDestAlignment(DstAlign);
return MI;
}
Align SrcAlign = getKnownAlignment(MI->getRawSource(), DL, MI, &AC, &DT);
MaybeAlign CopySrcAlign = MI->getSourceAlign();
if (!CopySrcAlign || *CopySrcAlign < SrcAlign) {
MI->setSourceAlignment(SrcAlign);
return MI;
}
// If we have a store to a location which is known constant, we can conclude
// that the store must be storing the constant value (else the memory
// wouldn't be constant), and this must be a noop.
if (AA->pointsToConstantMemory(MI->getDest())) {
// Set the size of the copy to 0, it will be deleted on the next iteration.
MI->setLength(Constant::getNullValue(MI->getLength()->getType()));
return MI;
}
// If the source is provably undef, the memcpy/memmove doesn't do anything
// (unless the transfer is volatile).
if (hasUndefSource(MI) && !MI->isVolatile()) {
// Set the size of the copy to 0, it will be deleted on the next iteration.
MI->setLength(Constant::getNullValue(MI->getLength()->getType()));
return MI;
}
// If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
// load/store.
ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getLength());
if (!MemOpLength) return nullptr;
// Source and destination pointer types are always "i8*" for intrinsic. See
// if the size is something we can handle with a single primitive load/store.
// A single load+store correctly handles overlapping memory in the memmove
// case.
uint64_t Size = MemOpLength->getLimitedValue();
assert(Size && "0-sized memory transferring should be removed already.");
if (Size > 8 || (Size&(Size-1)))
return nullptr; // If not 1/2/4/8 bytes, exit.
// If it is an atomic and alignment is less than the size then we will
// introduce the unaligned memory access which will be later transformed
// into libcall in CodeGen. This is not evident performance gain so disable
// it now.
if (isa<AtomicMemTransferInst>(MI))
if (*CopyDstAlign < Size || *CopySrcAlign < Size)
return nullptr;
// Use an integer load+store unless we can find something better.
unsigned SrcAddrSp =
cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
unsigned DstAddrSp =
cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace();
IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);
// If the memcpy has metadata describing the members, see if we can get the
// TBAA tag describing our copy.
MDNode *CopyMD = nullptr;
if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa)) {
CopyMD = M;
} else if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) {
if (M->getNumOperands() == 3 && M->getOperand(0) &&
mdconst::hasa<ConstantInt>(M->getOperand(0)) &&
mdconst::extract<ConstantInt>(M->getOperand(0))->isZero() &&
M->getOperand(1) &&
mdconst::hasa<ConstantInt>(M->getOperand(1)) &&
mdconst::extract<ConstantInt>(M->getOperand(1))->getValue() ==
Size &&
M->getOperand(2) && isa<MDNode>(M->getOperand(2)))
CopyMD = cast<MDNode>(M->getOperand(2));
}
Value *Src = Builder.CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
Value *Dest = Builder.CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
LoadInst *L = Builder.CreateLoad(IntType, Src);
// Alignment from the mem intrinsic will be better, so use it.
L->setAlignment(*CopySrcAlign);
if (CopyMD)
L->setMetadata(LLVMContext::MD_tbaa, CopyMD);
MDNode *LoopMemParallelMD =
MI->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
if (LoopMemParallelMD)
L->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
MDNode *AccessGroupMD = MI->getMetadata(LLVMContext::MD_access_group);
if (AccessGroupMD)
L->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
StoreInst *S = Builder.CreateStore(L, Dest);
// Alignment from the mem intrinsic will be better, so use it.
S->setAlignment(*CopyDstAlign);
if (CopyMD)
S->setMetadata(LLVMContext::MD_tbaa, CopyMD);
if (LoopMemParallelMD)
S->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
if (AccessGroupMD)
S->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
if (auto *MT = dyn_cast<MemTransferInst>(MI)) {
// non-atomics can be volatile
L->setVolatile(MT->isVolatile());
S->setVolatile(MT->isVolatile());
}
if (isa<AtomicMemTransferInst>(MI)) {
// atomics have to be unordered
L->setOrdering(AtomicOrdering::Unordered);
S->setOrdering(AtomicOrdering::Unordered);
}
// Set the size of the copy to 0, it will be deleted on the next iteration.
MI->setLength(Constant::getNullValue(MemOpLength->getType()));
return MI;
}
Instruction *InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst *MI) {
const Align KnownAlignment =
getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT);
MaybeAlign MemSetAlign = MI->getDestAlign();
if (!MemSetAlign || *MemSetAlign < KnownAlignment) {
MI->setDestAlignment(KnownAlignment);
return MI;
}
// If we have a store to a location which is known constant, we can conclude
// that the store must be storing the constant value (else the memory
// wouldn't be constant), and this must be a noop.
if (AA->pointsToConstantMemory(MI->getDest())) {
// Set the size of the copy to 0, it will be deleted on the next iteration.
MI->setLength(Constant::getNullValue(MI->getLength()->getType()));
return MI;
}
// Remove memset with an undef value.
// FIXME: This is technically incorrect because it might overwrite a poison
// value. Change to PoisonValue once #52930 is resolved.
if (isa<UndefValue>(MI->getValue())) {
// Set the size of the copy to 0, it will be deleted on the next iteration.
MI->setLength(Constant::getNullValue(MI->getLength()->getType()));
return MI;
}
// Extract the length and alignment and fill if they are constant.
ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
return nullptr;
const uint64_t Len = LenC->getLimitedValue();
assert(Len && "0-sized memory setting should be removed already.");
const Align Alignment = MI->getDestAlign().valueOrOne();
// If it is an atomic and alignment is less than the size then we will
// introduce the unaligned memory access which will be later transformed
// into libcall in CodeGen. This is not evident performance gain so disable
// it now.
if (isa<AtomicMemSetInst>(MI))
if (Alignment < Len)
return nullptr;
// memset(s,c,n) -> store s, c (for n=1,2,4,8)
if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8.
Value *Dest = MI->getDest();
unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
Dest = Builder.CreateBitCast(Dest, NewDstPtrTy);
// Extract the fill value and store.
uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
StoreInst *S = Builder.CreateStore(ConstantInt::get(ITy, Fill), Dest,
MI->isVolatile());
S->setAlignment(Alignment);
if (isa<AtomicMemSetInst>(MI))
S->setOrdering(AtomicOrdering::Unordered);
// Set the size of the copy to 0, it will be deleted on the next iteration.
MI->setLength(Constant::getNullValue(LenC->getType()));
return MI;
}
return nullptr;
}
// TODO, Obvious Missing Transforms:
// * Narrow width by halfs excluding zero/undef lanes
Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
Value *LoadPtr = II.getArgOperand(0);
const Align Alignment =
cast<ConstantInt>(II.getArgOperand(1))->getAlignValue();
// If the mask is all ones or undefs, this is a plain vector load of the 1st
// argument.
if (maskIsAllOneOrUndef(II.getArgOperand(2))) {
LoadInst *L = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
"unmaskedload");
L->copyMetadata(II);
return L;
}
// If we can unconditionally load from this address, replace with a
// load/select idiom. TODO: use DT for context sensitive query
if (isDereferenceablePointer(LoadPtr, II.getType(),
II.getModule()->getDataLayout(), &II, nullptr)) {
LoadInst *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
"unmaskedload");
LI->copyMetadata(II);
return Builder.CreateSelect(II.getArgOperand(2), LI, II.getArgOperand(3));
}
return nullptr;
}
// TODO, Obvious Missing Transforms:
// * Single constant active lane -> store
// * Narrow width by halfs excluding zero/undef lanes
Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
if (!ConstMask)
return nullptr;
// If the mask is all zeros, this instruction does nothing.
if (ConstMask->isNullValue())
return eraseInstFromFunction(II);
// If the mask is all ones, this is a plain vector store of the 1st argument.
if (ConstMask->isAllOnesValue()) {
Value *StorePtr = II.getArgOperand(1);
Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
StoreInst *S =
new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
S->copyMetadata(II);
return S;
}
if (isa<ScalableVectorType>(ConstMask->getType()))
return nullptr;
// Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
APInt UndefElts(DemandedElts.getBitWidth(), 0);
if (Value *V =
SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts, UndefElts))
return replaceOperand(II, 0, V);
return nullptr;
}
// TODO, Obvious Missing Transforms:
// * Single constant active lane load -> load
// * Dereferenceable address & few lanes -> scalarize speculative load/selects
// * Adjacent vector addresses -> masked.load
// * Narrow width by halfs excluding zero/undef lanes
// * Vector incrementing address -> vector masked load
Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) {
auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
if (!ConstMask)
return nullptr;
// Vector splat address w/known mask -> scalar load
// Fold the gather to load the source vector first lane
// because it is reloading the same value each time
if (ConstMask->isAllOnesValue())
if (auto *SplatPtr = getSplatValue(II.getArgOperand(0))) {
auto *VecTy = cast<VectorType>(II.getType());
const Align Alignment =
cast<ConstantInt>(II.getArgOperand(1))->getAlignValue();
LoadInst *L = Builder.CreateAlignedLoad(VecTy->getElementType(), SplatPtr,
Alignment, "load.scalar");
Value *Shuf =
Builder.CreateVectorSplat(VecTy->getElementCount(), L, "broadcast");
return replaceInstUsesWith(II, cast<Instruction>(Shuf));
}
return nullptr;
}
// TODO, Obvious Missing Transforms:
// * Single constant active lane -> store
// * Adjacent vector addresses -> masked.store
// * Narrow store width by halfs excluding zero/undef lanes
// * Vector incrementing address -> vector masked store
Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
if (!ConstMask)
return nullptr;
// If the mask is all zeros, a scatter does nothing.
if (ConstMask->isNullValue())
return eraseInstFromFunction(II);
// Vector splat address -> scalar store
if (auto *SplatPtr = getSplatValue(II.getArgOperand(1))) {
// scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr
if (auto *SplatValue = getSplatValue(II.getArgOperand(0))) {
Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
StoreInst *S =
new StoreInst(SplatValue, SplatPtr, /*IsVolatile=*/false, Alignment);
S->copyMetadata(II);
return S;
}
// scatter(vector, splat(ptr), splat(true)) -> store extract(vector,
// lastlane), ptr
if (ConstMask->isAllOnesValue()) {
Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
VectorType *WideLoadTy = cast<VectorType>(II.getArgOperand(1)->getType());
ElementCount VF = WideLoadTy->getElementCount();
Constant *EC =
ConstantInt::get(Builder.getInt32Ty(), VF.getKnownMinValue());
Value *RunTimeVF = VF.isScalable() ? Builder.CreateVScale(EC) : EC;
Value *LastLane = Builder.CreateSub(RunTimeVF, Builder.getInt32(1));
Value *Extract =
Builder.CreateExtractElement(II.getArgOperand(0), LastLane);
StoreInst *S =
new StoreInst(Extract, SplatPtr, /*IsVolatile=*/false, Alignment);
S->copyMetadata(II);
return S;
}
}
if (isa<ScalableVectorType>(ConstMask->getType()))
return nullptr;
// Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
APInt UndefElts(DemandedElts.getBitWidth(), 0);
if (Value *V =
SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts, UndefElts))
return replaceOperand(II, 0, V);
if (Value *V =
SimplifyDemandedVectorElts(II.getOperand(1), DemandedElts, UndefElts))
return replaceOperand(II, 1, V);
return nullptr;
}
/// This function transforms launder.invariant.group and strip.invariant.group
/// like:
/// launder(launder(%x)) -> launder(%x) (the result is not the argument)
/// launder(strip(%x)) -> launder(%x)
/// strip(strip(%x)) -> strip(%x) (the result is not the argument)
/// strip(launder(%x)) -> strip(%x)
/// This is legal because it preserves the most recent information about
/// the presence or absence of invariant.group.
static Instruction *simplifyInvariantGroupIntrinsic(IntrinsicInst &II,
InstCombinerImpl &IC) {
auto *Arg = II.getArgOperand(0);
auto *StrippedArg = Arg->stripPointerCasts();
auto *StrippedInvariantGroupsArg = StrippedArg;
while (auto *Intr = dyn_cast<IntrinsicInst>(StrippedInvariantGroupsArg)) {
if (Intr->getIntrinsicID() != Intrinsic::launder_invariant_group &&
Intr->getIntrinsicID() != Intrinsic::strip_invariant_group)
break;
StrippedInvariantGroupsArg = Intr->getArgOperand(0)->stripPointerCasts();
}
if (StrippedArg == StrippedInvariantGroupsArg)
return nullptr; // No launders/strips to remove.
Value *Result = nullptr;
if (II.getIntrinsicID() == Intrinsic::launder_invariant_group)
Result = IC.Builder.CreateLaunderInvariantGroup(StrippedInvariantGroupsArg);
else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group)
Result = IC.Builder.CreateStripInvariantGroup(StrippedInvariantGroupsArg);
else
llvm_unreachable(
"simplifyInvariantGroupIntrinsic only handles launder and strip");
if (Result->getType()->getPointerAddressSpace() !=
II.getType()->getPointerAddressSpace())
Result = IC.Builder.CreateAddrSpaceCast(Result, II.getType());
if (Result->getType() != II.getType())
Result = IC.Builder.CreateBitCast(Result, II.getType());
return cast<Instruction>(Result);
}
static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
assert((II.getIntrinsicID() == Intrinsic::cttz ||
II.getIntrinsicID() == Intrinsic::ctlz) &&
"Expected cttz or ctlz intrinsic");
bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
Value *Op0 = II.getArgOperand(0);
Value *Op1 = II.getArgOperand(1);
Value *X;
// ctlz(bitreverse(x)) -> cttz(x)
// cttz(bitreverse(x)) -> ctlz(x)
if (match(Op0, m_BitReverse(m_Value(X)))) {
Intrinsic::ID ID = IsTZ ? Intrinsic::ctlz : Intrinsic::cttz;
Function *F = Intrinsic::getDeclaration(II.getModule(), ID, II.getType());
return CallInst::Create(F, {X, II.getArgOperand(1)});
}
if (II.getType()->isIntOrIntVectorTy(1)) {
// ctlz/cttz i1 Op0 --> not Op0
if (match(Op1, m_Zero()))
return BinaryOperator::CreateNot(Op0);
// If zero is poison, then the input can be assumed to be "true", so the
// instruction simplifies to "false".
assert(match(Op1, m_One()) && "Expected ctlz/cttz operand to be 0 or 1");
return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(II.getType()));
}
// If the operand is a select with constant arm(s), try to hoist ctlz/cttz.
if (auto *Sel = dyn_cast<SelectInst>(Op0))
if (Instruction *R = IC.FoldOpIntoSelect(II, Sel))
return R;
if (IsTZ) {
// cttz(-x) -> cttz(x)
if (match(Op0, m_Neg(m_Value(X))))
return IC.replaceOperand(II, 0, X);
// cttz(sext(x)) -> cttz(zext(x))
if (match(Op0, m_OneUse(m_SExt(m_Value(X))))) {
auto *Zext = IC.Builder.CreateZExt(X, II.getType());
auto *CttzZext =
IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, Zext, Op1);
return IC.replaceInstUsesWith(II, CttzZext);
}
// Zext doesn't change the number of trailing zeros, so narrow:
// cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsPoison' parameter is 'true'.
if (match(Op0, m_OneUse(m_ZExt(m_Value(X)))) && match(Op1, m_One())) {
auto *Cttz = IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, X,
IC.Builder.getTrue());
auto *ZextCttz = IC.Builder.CreateZExt(Cttz, II.getType());
return IC.replaceInstUsesWith(II, ZextCttz);
}
// cttz(abs(x)) -> cttz(x)
// cttz(nabs(x)) -> cttz(x)
Value *Y;
SelectPatternFlavor SPF = matchSelectPattern(Op0, X, Y).Flavor;
if (SPF == SPF_ABS || SPF == SPF_NABS)
return IC.replaceOperand(II, 0, X);
if (match(Op0, m_Intrinsic<Intrinsic::abs>(m_Value(X))))
return IC.replaceOperand(II, 0, X);
}
KnownBits Known = IC.computeKnownBits(Op0, 0, &II);
// Create a mask for bits above (ctlz) or below (cttz) the first known one.
unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
: Known.countMaxLeadingZeros();
unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
: Known.countMinLeadingZeros();
// If all bits above (ctlz) or below (cttz) the first known one are known
// zero, this value is constant.
// FIXME: This should be in InstSimplify because we're replacing an
// instruction with a constant.
if (PossibleZeros == DefiniteZeros) {
auto *C = ConstantInt::get(Op0->getType(), DefiniteZeros);
return IC.replaceInstUsesWith(II, C);
}
// If the input to cttz/ctlz is known to be non-zero,
// then change the 'ZeroIsPoison' parameter to 'true'
// because we know the zero behavior can't affect the result.
if (!Known.One.isZero() ||
isKnownNonZero(Op0, IC.getDataLayout(), 0, &IC.getAssumptionCache(), &II,
&IC.getDominatorTree())) {
if (!match(II.getArgOperand(1), m_One()))
return IC.replaceOperand(II, 1, IC.Builder.getTrue());
}
// Add range metadata since known bits can't completely reflect what we know.
// TODO: Handle splat vectors.
auto *IT = dyn_cast<IntegerType>(Op0->getType());
if (IT && IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
Metadata *LowAndHigh[] = {
ConstantAsMetadata::get(ConstantInt::get(IT, DefiniteZeros)),
ConstantAsMetadata::get(ConstantInt::get(IT, PossibleZeros + 1))};
II.setMetadata(LLVMContext::MD_range,
MDNode::get(II.getContext(), LowAndHigh));
return &II;
}
return nullptr;
}
static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) {
assert(II.getIntrinsicID() == Intrinsic::ctpop &&
"Expected ctpop intrinsic");
Type *Ty = II.getType();
unsigned BitWidth = Ty->getScalarSizeInBits();
Value *Op0 = II.getArgOperand(0);
Value *X, *Y;
// ctpop(bitreverse(x)) -> ctpop(x)
// ctpop(bswap(x)) -> ctpop(x)
if (match(Op0, m_BitReverse(m_Value(X))) || match(Op0, m_BSwap(m_Value(X))))
return IC.replaceOperand(II, 0, X);
// ctpop(rot(x)) -> ctpop(x)
if ((match(Op0, m_FShl(m_Value(X), m_Value(Y), m_Value())) ||
match(Op0, m_FShr(m_Value(X), m_Value(Y), m_Value()))) &&
X == Y)
return IC.replaceOperand(II, 0, X);
// ctpop(x | -x) -> bitwidth - cttz(x, false)
if (Op0->hasOneUse() &&
match(Op0, m_c_Or(m_Value(X), m_Neg(m_Deferred(X))))) {
Function *F =
Intrinsic::getDeclaration(II.getModule(), Intrinsic::cttz, Ty);
auto *Cttz = IC.Builder.CreateCall(F, {X, IC.Builder.getFalse()});
auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
}
// ctpop(~x & (x - 1)) -> cttz(x, false)
if (match(Op0,
m_c_And(m_Not(m_Value(X)), m_Add(m_Deferred(X), m_AllOnes())))) {
Function *F =
Intrinsic::getDeclaration(II.getModule(), Intrinsic::cttz, Ty);
return CallInst::Create(F, {X, IC.Builder.getFalse()});
}
// Zext doesn't change the number of set bits, so narrow:
// ctpop (zext X) --> zext (ctpop X)
if (match(Op0, m_OneUse(m_ZExt(m_Value(X))))) {
Value *NarrowPop = IC.Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, X);
return CastInst::Create(Instruction::ZExt, NarrowPop, Ty);
}
// If the operand is a select with constant arm(s), try to hoist ctpop.
if (auto *Sel = dyn_cast<SelectInst>(Op0))
if (Instruction *R = IC.FoldOpIntoSelect(II, Sel))
return R;
KnownBits Known(BitWidth);
IC.computeKnownBits(Op0, Known, 0, &II);
// If all bits are zero except for exactly one fixed bit, then the result
// must be 0 or 1, and we can get that answer by shifting to LSB:
// ctpop (X & 32) --> (X & 32) >> 5
if ((~Known.Zero).isPowerOf2())
return BinaryOperator::CreateLShr(
Op0, ConstantInt::get(Ty, (~Known.Zero).exactLogBase2()));
// FIXME: Try to simplify vectors of integers.
auto *IT = dyn_cast<IntegerType>(Ty);
if (!IT)
return nullptr;
// Add range metadata since known bits can't completely reflect what we know.
unsigned MinCount = Known.countMinPopulation();
unsigned MaxCount = Known.countMaxPopulation();
if (IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
Metadata *LowAndHigh[] = {
ConstantAsMetadata::get(ConstantInt::get(IT, MinCount)),
ConstantAsMetadata::get(ConstantInt::get(IT, MaxCount + 1))};
II.setMetadata(LLVMContext::MD_range,
MDNode::get(II.getContext(), LowAndHigh));
return &II;
}
return nullptr;
}
/// Convert a table lookup to shufflevector if the mask is constant.
/// This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in
/// which case we could lower the shufflevector with rev64 instructions
/// as it's actually a byte reverse.
static Value *simplifyNeonTbl1(const IntrinsicInst &II,
InstCombiner::BuilderTy &Builder) {
// Bail out if the mask is not a constant.
auto *C = dyn_cast<Constant>(II.getArgOperand(1));
if (!C)
return nullptr;
auto *VecTy = cast<FixedVectorType>(II.getType());
unsigned NumElts = VecTy->getNumElements();
// Only perform this transformation for <8 x i8> vector types.
if (!VecTy->getElementType()->isIntegerTy(8) || NumElts != 8)
return nullptr;
int Indexes[8];
for (unsigned I = 0; I < NumElts; ++I) {
Constant *COp = C->getAggregateElement(I);
if (!COp || !isa<ConstantInt>(COp))
return nullptr;
Indexes[I] = cast<ConstantInt>(COp)->getLimitedValue();
// Make sure the mask indices are in range.
if ((unsigned)Indexes[I] >= NumElts)
return nullptr;
}
auto *V1 = II.getArgOperand(0);
auto *V2 = Constant::getNullValue(V1->getType());
return Builder.CreateShuffleVector(V1, V2, makeArrayRef(Indexes));
}
// Returns true iff the 2 intrinsics have the same operands, limiting the
// comparison to the first NumOperands.
static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
unsigned NumOperands) {
assert(I.arg_size() >= NumOperands && "Not enough operands");
assert(E.arg_size() >= NumOperands && "Not enough operands");
for (unsigned i = 0; i < NumOperands; i++)
if (I.getArgOperand(i) != E.getArgOperand(i))
return false;
return true;
}
// Remove trivially empty start/end intrinsic ranges, i.e. a start
// immediately followed by an end (ignoring debuginfo or other
// start/end intrinsics in between). As this handles only the most trivial
// cases, tracking the nesting level is not needed:
//
// call @llvm.foo.start(i1 0)
// call @llvm.foo.start(i1 0) ; This one won't be skipped: it will be removed
// call @llvm.foo.end(i1 0)
// call @llvm.foo.end(i1 0) ; &I
static bool
removeTriviallyEmptyRange(IntrinsicInst &EndI, InstCombinerImpl &IC,
std::function<bool(const IntrinsicInst &)> IsStart) {
// We start from the end intrinsic and scan backwards, so that InstCombine
// has already processed (and potentially removed) all the instructions
// before the end intrinsic.
BasicBlock::reverse_iterator BI(EndI), BE(EndI.getParent()->rend());
for (; BI != BE; ++BI) {
if (auto *I = dyn_cast<IntrinsicInst>(&*BI)) {
if (I->isDebugOrPseudoInst() ||
I->getIntrinsicID() == EndI.getIntrinsicID())
continue;
if (IsStart(*I)) {
if (haveSameOperands(EndI, *I, EndI.arg_size())) {
IC.eraseInstFromFunction(*I);
IC.eraseInstFromFunction(EndI);
return true;
}
// Skip start intrinsics that don't pair with this end intrinsic.
continue;
}
}
break;
}
return false;
}
Instruction *InstCombinerImpl::visitVAEndInst(VAEndInst &I) {
removeTriviallyEmptyRange(I, *this, [](const IntrinsicInst &I) {
return I.getIntrinsicID() == Intrinsic::vastart ||
I.getIntrinsicID() == Intrinsic::vacopy;
});
return nullptr;
}
static CallInst *canonicalizeConstantArg0ToArg1(CallInst &Call) {
assert(Call.arg_size() > 1 && "Need at least 2 args to swap");
Value *Arg0 = Call.getArgOperand(0), *Arg1 = Call.getArgOperand(1);
if (isa<Constant>(Arg0) && !isa<Constant>(Arg1)) {
Call.setArgOperand(0, Arg1);
Call.setArgOperand(1, Arg0);
return &Call;
}
return nullptr;
}
/// Creates a result tuple for an overflow intrinsic \p II with a given
/// \p Result and a constant \p Overflow value.
static Instruction *createOverflowTuple(IntrinsicInst *II, Value *Result,
Constant *Overflow) {
Constant *V[] = {PoisonValue::get(Result->getType()), Overflow};
StructType *ST = cast<StructType>(II->getType());
Constant *Struct = ConstantStruct::get(ST, V);
return InsertValueInst::Create(Struct, Result, 0);
}
Instruction *
InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {
WithOverflowInst *WO = cast<WithOverflowInst>(II);
Value *OperationResult = nullptr;
Constant *OverflowResult = nullptr;
if (OptimizeOverflowCheck(WO->getBinaryOp(), WO->isSigned(), WO->getLHS(),
WO->getRHS(), *WO, OperationResult, OverflowResult))
return createOverflowTuple(WO, OperationResult, OverflowResult);
return nullptr;
}
static Optional<bool> getKnownSign(Value *Op, Instruction *CxtI,
const DataLayout &DL, AssumptionCache *AC,
DominatorTree *DT) {
KnownBits Known = computeKnownBits(Op, DL, 0, AC, CxtI, DT);
if (Known.isNonNegative())
return false;
if (Known.isNegative())
return true;
Value *X, *Y;
if (match(Op, m_NSWSub(m_Value(X), m_Value(Y))))
return isImpliedByDomCondition(ICmpInst::ICMP_SLT, X, Y, CxtI, DL);
return isImpliedByDomCondition(
ICmpInst::ICMP_SLT, Op, Constant::getNullValue(Op->getType()), CxtI, DL);
}
/// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This
/// can trigger other combines.
static Instruction *moveAddAfterMinMax(IntrinsicInst *II,
InstCombiner::BuilderTy &Builder) {
Intrinsic::ID MinMaxID = II->getIntrinsicID();
assert((MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin ||
MinMaxID == Intrinsic::umax || MinMaxID == Intrinsic::umin) &&
"Expected a min or max intrinsic");
// TODO: Match vectors with undef elements, but undef may not propagate.
Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
Value *X;
const APInt *C0, *C1;
if (!match(Op0, m_OneUse(m_Add(m_Value(X), m_APInt(C0)))) ||
!match(Op1, m_APInt(C1)))
return nullptr;
// Check for necessary no-wrap and overflow constraints.
bool IsSigned = MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin;
auto *Add = cast<BinaryOperator>(Op0);
if ((IsSigned && !Add->hasNoSignedWrap()) ||
(!IsSigned && !Add->hasNoUnsignedWrap()))
return nullptr;
// If the constant difference overflows, then instsimplify should reduce the
// min/max to the add or C1.
bool Overflow;
APInt CDiff =
IsSigned ? C1->ssub_ov(*C0, Overflow) : C1->usub_ov(*C0, Overflow);
assert(!Overflow && "Expected simplify of min/max");
// min/max (add X, C0), C1 --> add (min/max X, C1 - C0), C0
// Note: the "mismatched" no-overflow setting does not propagate.
Constant *NewMinMaxC = ConstantInt::get(II->getType(), CDiff);
Value *NewMinMax = Builder.CreateBinaryIntrinsic(MinMaxID, X, NewMinMaxC);
return IsSigned ? BinaryOperator::CreateNSWAdd(NewMinMax, Add->getOperand(1))
: BinaryOperator::CreateNUWAdd(NewMinMax, Add->getOperand(1));
}
/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) {
Type *Ty = MinMax1.getType();
// We are looking for a tree of:
// max(INT_MIN, min(INT_MAX, add(sext(A), sext(B))))
// Where the min and max could be reversed
Instruction *MinMax2;
BinaryOperator *AddSub;
const APInt *MinValue, *MaxValue;
if (match(&MinMax1, m_SMin(m_Instruction(MinMax2), m_APInt(MaxValue)))) {
if (!match(MinMax2, m_SMax(m_BinOp(AddSub), m_APInt(MinValue))))
return nullptr;
} else if (match(&MinMax1,
m_SMax(m_Instruction(MinMax2), m_APInt(MinValue)))) {
if (!match(MinMax2, m_SMin(m_BinOp(AddSub), m_APInt(MaxValue))))
return nullptr;
} else
return nullptr;
// Check that the constants clamp a saturate, and that the new type would be
// sensible to convert to.
if (!(*MaxValue + 1).isPowerOf2() || -*MinValue != *MaxValue + 1)
return nullptr;
// In what bitwidth can this be treated as saturating arithmetics?
unsigned NewBitWidth = (*MaxValue + 1).logBase2() + 1;
// FIXME: This isn't quite right for vectors, but using the scalar type is a
// good first approximation for what should be done there.
if (!shouldChangeType(Ty->getScalarType()->getIntegerBitWidth(), NewBitWidth))
return nullptr;
// Also make sure that the inner min/max and the add/sub have one use.
if (!MinMax2->hasOneUse() || !AddSub->hasOneUse())
return nullptr;
// Create the new type (which can be a vector type)
Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth);
Intrinsic::ID IntrinsicID;
if (AddSub->getOpcode() == Instruction::Add)
IntrinsicID = Intrinsic::sadd_sat;
else if (AddSub->getOpcode() == Instruction::Sub)
IntrinsicID = Intrinsic::ssub_sat;
else
return nullptr;
// The two operands of the add/sub must be nsw-truncatable to the NewTy. This
// is usually achieved via a sext from a smaller type.
if (ComputeMaxSignificantBits(AddSub->getOperand(0), 0, AddSub) >
NewBitWidth ||
ComputeMaxSignificantBits(AddSub->getOperand(1), 0, AddSub) > NewBitWidth)
return nullptr;
// Finally create and return the sat intrinsic, truncated to the new type
Function *F = Intrinsic::getDeclaration(MinMax1.getModule(), IntrinsicID, NewTy);
Value *AT = Builder.CreateTrunc(AddSub->getOperand(0), NewTy);
Value *BT = Builder.CreateTrunc(AddSub->getOperand(1), NewTy);
Value *Sat = Builder.CreateCall(F, {AT, BT});
return CastInst::Create(Instruction::SExt, Sat, Ty);
}
/// If we have a clamp pattern like max (min X, 42), 41 -- where the output
/// can only be one of two possible constant values -- turn that into a select
/// of constants.
static Instruction *foldClampRangeOfTwo(IntrinsicInst *II,
InstCombiner::BuilderTy &Builder) {
Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
Value *X;
const APInt *C0, *C1;
if (!match(I1, m_APInt(C1)) || !I0->hasOneUse())
return nullptr;
CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
switch (II->getIntrinsicID()) {
case Intrinsic::smax:
if (match(I0, m_SMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
Pred = ICmpInst::ICMP_SGT;
break;
case Intrinsic::smin:
if (match(I0, m_SMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
Pred = ICmpInst::ICMP_SLT;
break;
case Intrinsic::umax:
if (match(I0, m_UMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
Pred = ICmpInst::ICMP_UGT;
break;
case Intrinsic::umin:
if (match(I0, m_UMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
Pred = ICmpInst::ICMP_ULT;
break;
default:
llvm_unreachable("Expected min/max intrinsic");
}
if (Pred == CmpInst::BAD_ICMP_PREDICATE)
return nullptr;
// max (min X, 42), 41 --> X > 41 ? 42 : 41
// min (max X, 42), 43 --> X < 43 ? 42 : 43
Value *Cmp = Builder.CreateICmp(Pred, X, I1);
return SelectInst::Create(Cmp, ConstantInt::get(II->getType(), *C0), I1);
}
/// If this min/max has a constant operand and an operand that is a matching
/// min/max with a constant operand, constant-fold the 2 constant operands.
static Instruction *reassociateMinMaxWithConstants(IntrinsicInst *II) {
Intrinsic::ID MinMaxID = II->getIntrinsicID();
auto *LHS = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
if (!LHS || LHS->getIntrinsicID() != MinMaxID)
return nullptr;
Constant *C0, *C1;
if (!match(LHS->getArgOperand(1), m_ImmConstant(C0)) ||
!match(II->getArgOperand(1), m_ImmConstant(C1)))
return nullptr;
// max (max X, C0), C1 --> max X, (max C0, C1) --> max X, NewC
ICmpInst::Predicate Pred = MinMaxIntrinsic::getPredicate(MinMaxID);
Constant *CondC = ConstantExpr::getICmp(Pred, C0, C1);
Constant *NewC = ConstantExpr::getSelect(CondC, C0, C1);
Module *Mod = II->getModule();
Function *MinMax = Intrinsic::getDeclaration(Mod, MinMaxID, II->getType());
return CallInst::Create(MinMax, {LHS->getArgOperand(0), NewC});
}
/// If this min/max has a matching min/max operand with a constant, try to push
/// the constant operand into this instruction. This can enable more folds.
static Instruction *
reassociateMinMaxWithConstantInOperand(IntrinsicInst *II,
InstCombiner::BuilderTy &Builder) {
// Match and capture a min/max operand candidate.
Value *X, *Y;
Constant *C;
Instruction *Inner;
if (!match(II, m_c_MaxOrMin(m_OneUse(m_CombineAnd(
m_Instruction(Inner),
m_MaxOrMin(m_Value(X), m_ImmConstant(C)))),
m_Value(Y))))
return nullptr;
// The inner op must match. Check for constants to avoid infinite loops.
Intrinsic::ID MinMaxID = II->getIntrinsicID();
auto *InnerMM = dyn_cast<IntrinsicInst>(Inner);
if (!InnerMM || InnerMM->getIntrinsicID() != MinMaxID ||
match(X, m_ImmConstant()) || match(Y, m_ImmConstant()))
return nullptr;
// max (max X, C), Y --> max (max X, Y), C
Function *MinMax =
Intrinsic::getDeclaration(II->getModule(), MinMaxID, II->getType());
Value *NewInner = Builder.CreateBinaryIntrinsic(MinMaxID, X, Y);
NewInner->takeName(Inner);
return CallInst::Create(MinMax, {NewInner, C});
}
/// Reduce a sequence of min/max intrinsics with a common operand.
static Instruction *factorizeMinMaxTree(IntrinsicInst *II) {
// Match 3 of the same min/max ops. Example: umin(umin(), umin()).
auto *LHS = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
auto *RHS = dyn_cast<IntrinsicInst>(II->getArgOperand(1));
Intrinsic::ID MinMaxID = II->getIntrinsicID();
if (!LHS || !RHS || LHS->getIntrinsicID() != MinMaxID ||
RHS->getIntrinsicID() != MinMaxID ||
(!LHS->hasOneUse() && !RHS->hasOneUse()))
return nullptr;
Value *A = LHS->getArgOperand(0);
Value *B = LHS->getArgOperand(1);
Value *C = RHS->getArgOperand(0);
Value *D = RHS->getArgOperand(1);
// Look for a common operand.
Value *MinMaxOp = nullptr;
Value *ThirdOp = nullptr;
if (LHS->hasOneUse()) {
// If the LHS is only used in this chain and the RHS is used outside of it,
// reuse the RHS min/max because that will eliminate the LHS.
if (D == A || C == A) {
// min(min(a, b), min(c, a)) --> min(min(c, a), b)
// min(min(a, b), min(a, d)) --> min(min(a, d), b)
MinMaxOp = RHS;
ThirdOp = B;
} else if (D == B || C == B) {
// min(min(a, b), min(c, b)) --> min(min(c, b), a)
// min(min(a, b), min(b, d)) --> min(min(b, d), a)
MinMaxOp = RHS;
ThirdOp = A;
}
} else {
assert(RHS->hasOneUse() && "Expected one-use operand");
// Reuse the LHS. This will eliminate the RHS.
if (D == A || D == B) {
// min(min(a, b), min(c, a)) --> min(min(a, b), c)
// min(min(a, b), min(c, b)) --> min(min(a, b), c)
MinMaxOp = LHS;
ThirdOp = C;
} else if (C == A || C == B) {
// min(min(a, b), min(b, d)) --> min(min(a, b), d)
// min(min(a, b), min(c, b)) --> min(min(a, b), d)
MinMaxOp = LHS;
ThirdOp = D;
}
}
if (!MinMaxOp || !ThirdOp)
return nullptr;
Module *Mod = II->getModule();
Function *MinMax = Intrinsic::getDeclaration(Mod, MinMaxID, II->getType());
return CallInst::Create(MinMax, { MinMaxOp, ThirdOp });
}
/// If all arguments of the intrinsic are unary shuffles with the same mask,
/// try to shuffle after the intrinsic.
static Instruction *
foldShuffledIntrinsicOperands(IntrinsicInst *II,
InstCombiner::BuilderTy &Builder) {
// TODO: This should be extended to handle other intrinsics like fshl, ctpop,
// etc. Use llvm::isTriviallyVectorizable() and related to determine
// which intrinsics are safe to shuffle?
switch (II->getIntrinsicID()) {
case Intrinsic::smax:
case Intrinsic::smin:
case Intrinsic::umax:
case Intrinsic::umin:
case Intrinsic::fma:
case Intrinsic::fshl:
case Intrinsic::fshr:
break;
default:
return nullptr;
}
Value *X;
ArrayRef<int> Mask;
if (!match(II->getArgOperand(0),
m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask))))
return nullptr;
// At least 1 operand must have 1 use because we are creating 2 instructions.
if (none_of(II->args(), [](Value *V) { return V->hasOneUse(); }))
return nullptr;
// See if all arguments are shuffled with the same mask.
SmallVector<Value *, 4> NewArgs(II->arg_size());
NewArgs[0] = X;
Type *SrcTy = X->getType();
for (unsigned i = 1, e = II->arg_size(); i != e; ++i) {
if (!match(II->getArgOperand(i),
m_Shuffle(m_Value(X), m_Undef(), m_SpecificMask(Mask))) ||
X->getType() != SrcTy)
return nullptr;
NewArgs[i] = X;
}
// intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
Value *NewIntrinsic =
Builder.CreateIntrinsic(II->getIntrinsicID(), SrcTy, NewArgs, FPI);
return new ShuffleVectorInst(NewIntrinsic, Mask);
}
/// CallInst simplification. This mostly only handles folding of intrinsic
/// instructions. For normal calls, it allows visitCallBase to do the heavy
/// lifting.
Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
// Don't try to simplify calls without uses. It will not do anything useful,
// but will result in the following folds being skipped.
if (!CI.use_empty())
if (Value *V = simplifyCall(&CI, SQ.getWithInstruction(&CI)))
return replaceInstUsesWith(CI, V);
if (Value *FreedOp = getFreedOperand(&CI, &TLI))
return visitFree(CI, FreedOp);
// If the caller function (i.e. us, the function that contains this CallInst)
// is nounwind, mark the call as nounwind, even if the callee isn't.
if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
CI.setDoesNotThrow();
return &CI;
}
IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
if (!II) return visitCallBase(CI);
// For atomic unordered mem intrinsics if len is not a positive or
// not a multiple of element size then behavior is undefined.
if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(II))
if (ConstantInt *NumBytes = dyn_cast<ConstantInt>(AMI->getLength()))
if (NumBytes->getSExtValue() < 0 ||
(NumBytes->getZExtValue() % AMI->getElementSizeInBytes() != 0)) {
CreateNonTerminatorUnreachable(AMI);
assert(AMI->getType()->isVoidTy() &&
"non void atomic unordered mem intrinsic");
return eraseInstFromFunction(*AMI);
}
// Intrinsics cannot occur in an invoke or a callbr, so handle them here
// instead of in visitCallBase.
if (auto *MI = dyn_cast<AnyMemIntrinsic>(II)) {
bool Changed = false;
// memmove/cpy/set of zero bytes is a noop.
if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
if (NumBytes->isNullValue())
return eraseInstFromFunction(CI);
}
// No other transformations apply to volatile transfers.
if (auto *M = dyn_cast<MemIntrinsic>(MI))
if (M->isVolatile())
return nullptr;
// If we have a memmove and the source operation is a constant global,
// then the source and dest pointers can't alias, so we can change this
// into a call to memcpy.
if (auto *MMI = dyn_cast<AnyMemMoveInst>(MI)) {
if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
if (GVSrc->isConstant()) {
Module *M = CI.getModule();
Intrinsic::ID MemCpyID =
isa<AtomicMemMoveInst>(MMI)
? Intrinsic::memcpy_element_unordered_atomic
: Intrinsic::memcpy;
Type *Tys[3] = { CI.getArgOperand(0)->getType(),
CI.getArgOperand(1)->getType(),
CI.getArgOperand(2)->getType() };
CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys));
Changed = true;
}
}
if (AnyMemTransferInst *MTI = dyn_cast<AnyMemTransferInst>(MI)) {
// memmove(x,x,size) -> noop.
if (MTI->getSource() == MTI->getDest())
return eraseInstFromFunction(CI);
}
// If we can determine a pointer alignment that is bigger than currently
// set, update the alignment.
if (auto *MTI = dyn_cast<AnyMemTransferInst>(MI)) {
if (Instruction *I = SimplifyAnyMemTransfer(MTI))
return I;
} else if (auto *MSI = dyn_cast<AnyMemSetInst>(MI)) {
if (Instruction *I = SimplifyAnyMemSet(MSI))
return I;
}
if (Changed) return II;
}
// For fixed width vector result intrinsics, use the generic demanded vector
// support.
if (auto *IIFVTy = dyn_cast<FixedVectorType>(II->getType())) {
auto VWidth = IIFVTy->getNumElements();
APInt UndefElts(VWidth, 0);
APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
if (V != II)
return replaceInstUsesWith(*II, V);
return II;
}
}
if (II->isCommutative()) {
if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(CI))
return NewCall;
}
// Unused constrained FP intrinsic calls may have declared side effect, which
// prevents it from being removed. In some cases however the side effect is
// actually absent. To detect this case, call SimplifyConstrainedFPCall. If it
// returns a replacement, the call may be removed.
if (CI.use_empty() && isa<ConstrainedFPIntrinsic>(CI)) {
if (simplifyConstrainedFPCall(&CI, SQ.getWithInstruction(&CI)))
return eraseInstFromFunction(CI);
}
Intrinsic::ID IID = II->getIntrinsicID();
switch (IID) {
case Intrinsic::objectsize:
if (Value *V = lowerObjectSizeCall(II, DL, &TLI, AA, /*MustSucceed=*/false))
return replaceInstUsesWith(CI, V);
return nullptr;
case Intrinsic::abs: {
Value *IIOperand = II->getArgOperand(0);
bool IntMinIsPoison = cast<Constant>(II->getArgOperand(1))->isOneValue();
// abs(-x) -> abs(x)
// TODO: Copy nsw if it was present on the neg?
Value *X;
if (match(IIOperand, m_Neg(m_Value(X))))
return replaceOperand(*II, 0, X);
if (match(IIOperand, m_Select(m_Value(), m_Value(X), m_Neg(m_Deferred(X)))))
return replaceOperand(*II, 0, X);
if (match(IIOperand, m_Select(m_Value(), m_Neg(m_Value(X)), m_Deferred(X))))
return replaceOperand(*II, 0, X);
if (Optional<bool> Sign = getKnownSign(IIOperand, II, DL, &AC, &DT)) {
// abs(x) -> x if x >= 0
if (!*Sign)
return replaceInstUsesWith(*II, IIOperand);
// abs(x) -> -x if x < 0
if (IntMinIsPoison)
return BinaryOperator::CreateNSWNeg(IIOperand);
return BinaryOperator::CreateNeg(IIOperand);
}
// abs (sext X) --> zext (abs X*)
// Clear the IsIntMin (nsw) bit on the abs to allow narrowing.
if (match(IIOperand, m_OneUse(m_SExt(m_Value(X))))) {
Value *NarrowAbs =
Builder.CreateBinaryIntrinsic(Intrinsic::abs, X, Builder.getFalse());
return CastInst::Create(Instruction::ZExt, NarrowAbs, II->getType());
}
// Match a complicated way to check if a number is odd/even:
// abs (srem X, 2) --> and X, 1
const APInt *C;
if (match(IIOperand, m_SRem(m_Value(X), m_APInt(C))) && *C == 2)
return BinaryOperator::CreateAnd(X, ConstantInt::get(II->getType(), 1));
break;
}
case Intrinsic::umin: {
Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
// umin(x, 1) == zext(x != 0)
if (match(I1, m_One())) {
Value *Zero = Constant::getNullValue(I0->getType());
Value *Cmp = Builder.CreateICmpNE(I0, Zero);
return CastInst::Create(Instruction::ZExt, Cmp, II->getType());
}
LLVM_FALLTHROUGH;
}
case Intrinsic::umax: {
Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
Value *X, *Y;
if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_ZExt(m_Value(Y))) &&
(I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
}
Constant *C;
if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_Constant(C)) &&
I0->hasOneUse()) {
Constant *NarrowC = ConstantExpr::getTrunc(C, X->getType());
if (ConstantExpr::getZExt(NarrowC, II->getType()) == C) {
Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
}
}
// If both operands of unsigned min/max are sign-extended, it is still ok
// to narrow the operation.
LLVM_FALLTHROUGH;
}
case Intrinsic::smax:
case Intrinsic::smin: {
Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
Value *X, *Y;
if (match(I0, m_SExt(m_Value(X))) && match(I1, m_SExt(m_Value(Y))) &&
(I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
}
Constant *C;
if (match(I0, m_SExt(m_Value(X))) && match(I1, m_Constant(C)) &&
I0->hasOneUse()) {
Constant *NarrowC = ConstantExpr::getTrunc(C, X->getType());
if (ConstantExpr::getSExt(NarrowC, II->getType()) == C) {
Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
}
}
if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
// smax (neg nsw X), (neg nsw Y) --> neg nsw (smin X, Y)
// smin (neg nsw X), (neg nsw Y) --> neg nsw (smax X, Y)
// TODO: Canonicalize neg after min/max if I1 is constant.
if (match(I0, m_NSWNeg(m_Value(X))) && match(I1, m_NSWNeg(m_Value(Y))) &&
(I0->hasOneUse() || I1->hasOneUse())) {
Intrinsic::ID InvID = getInverseMinMaxIntrinsic(IID);
Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, Y);
return BinaryOperator::CreateNSWNeg(InvMaxMin);
}
}
// If we can eliminate ~A and Y is free to invert:
// max ~A, Y --> ~(min A, ~Y)
//
// Examples:
// max ~A, ~Y --> ~(min A, Y)
// max ~A, C --> ~(min A, ~C)
// max ~A, (max ~Y, ~Z) --> ~min( A, (min Y, Z))
auto moveNotAfterMinMax = [&](Value *X, Value *Y) -> Instruction * {
Value *A;
if (match(X, m_OneUse(m_Not(m_Value(A)))) &&
!isFreeToInvert(A, A->hasOneUse()) &&
isFreeToInvert(Y, Y->hasOneUse())) {
Value *NotY = Builder.CreateNot(Y);
Intrinsic::ID InvID = getInverseMinMaxIntrinsic(IID);
Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, A, NotY);
return BinaryOperator::CreateNot(InvMaxMin);
}
return nullptr;
};
if (Instruction *I = moveNotAfterMinMax(I0, I1))
return I;
if (Instruction *I = moveNotAfterMinMax(I1, I0))
return I;
if (Instruction *I = moveAddAfterMinMax(II, Builder))
return I;
// smax(X, -X) --> abs(X)
// smin(X, -X) --> -abs(X)
// umax(X, -X) --> -abs(X)
// umin(X, -X) --> abs(X)
if (isKnownNegation(I0, I1)) {
// We can choose either operand as the input to abs(), but if we can
// eliminate the only use of a value, that's better for subsequent
// transforms/analysis.
if (I0->hasOneUse() && !I1->hasOneUse())
std::swap(I0, I1);
// This is some variant of abs(). See if we can propagate 'nsw' to the abs
// operation and potentially its negation.
bool IntMinIsPoison = isKnownNegation(I0, I1, /* NeedNSW */ true);
Value *Abs = Builder.CreateBinaryIntrinsic(
Intrinsic::abs, I0,
ConstantInt::getBool(II->getContext(), IntMinIsPoison));
// We don't have a "nabs" intrinsic, so negate if needed based on the
// max/min operation.
if (IID == Intrinsic::smin || IID == Intrinsic::umax)
Abs = Builder.CreateNeg(Abs, "nabs", /* NUW */ false, IntMinIsPoison);
return replaceInstUsesWith(CI, Abs);
}
if (Instruction *Sel = foldClampRangeOfTwo(II, Builder))
return Sel;
if (Instruction *SAdd = matchSAddSubSat(*II))
return SAdd;
if (match(I1, m_ImmConstant()))
if (auto *Sel = dyn_cast<SelectInst>(I0))
if (Instruction *R = FoldOpIntoSelect(*II, Sel))
return R;
if (Instruction *NewMinMax = reassociateMinMaxWithConstants(II))
return NewMinMax;
if (Instruction *R = reassociateMinMaxWithConstantInOperand(II, Builder))
return R;
if (Instruction *NewMinMax = factorizeMinMaxTree(II))
return NewMinMax;
break;
}
case Intrinsic::bswap: {
Value *IIOperand = II->getArgOperand(0);
// Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
// inverse-shift-of-bswap:
// bswap (shl X, Y) --> lshr (bswap X), Y
// bswap (lshr X, Y) --> shl (bswap X), Y
Value *X, *Y;
if (match(IIOperand, m_OneUse(m_LogicalShift(m_Value(X), m_Value(Y))))) {
// The transform allows undef vector elements, so try a constant match
// first. If knownbits can handle that case, that clause could be removed.
unsigned BitWidth = IIOperand->getType()->getScalarSizeInBits();
const APInt *C;
if ((match(Y, m_APIntAllowUndef(C)) && (*C & 7) == 0) ||
MaskedValueIsZero(Y, APInt::getLowBitsSet(BitWidth, 3))) {
Value *NewSwap = Builder.CreateUnaryIntrinsic(Intrinsic::bswap, X);
BinaryOperator::BinaryOps InverseShift =
cast<BinaryOperator>(IIOperand)->getOpcode() == Instruction::Shl
? Instruction::LShr
: Instruction::Shl;
return BinaryOperator::Create(InverseShift, NewSwap, Y);
}
}
KnownBits Known = computeKnownBits(IIOperand, 0, II);
uint64_t LZ = alignDown(Known.countMinLeadingZeros(), 8);
uint64_t TZ = alignDown(Known.countMinTrailingZeros(), 8);
unsigned BW = Known.getBitWidth();
// bswap(x) -> shift(x) if x has exactly one "active byte"
if (BW - LZ - TZ == 8) {
assert(LZ != TZ && "active byte cannot be in the middle");
if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x
return BinaryOperator::CreateNUWShl(
IIOperand, ConstantInt::get(IIOperand->getType(), LZ - TZ));
// -> lshr(x) if the "active byte" is in the high part of x
return BinaryOperator::CreateExactLShr(
IIOperand, ConstantInt::get(IIOperand->getType(), TZ - LZ));
}
// bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
unsigned C = X->getType()->getScalarSizeInBits() - BW;
Value *CV = ConstantInt::get(X->getType(), C);
Value *V = Builder.CreateLShr(X, CV);
return new TruncInst(V, IIOperand->getType());
}
break;
}
case Intrinsic::masked_load:
if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II))
return replaceInstUsesWith(CI, SimplifiedMaskedOp);
break;
case Intrinsic::masked_store:
return simplifyMaskedStore(*II);
case Intrinsic::masked_gather:
return simplifyMaskedGather(*II);
case Intrinsic::masked_scatter:
return simplifyMaskedScatter(*II);
case Intrinsic::launder_invariant_group:
case Intrinsic::strip_invariant_group:
if (auto *SkippedBarrier = simplifyInvariantGroupIntrinsic(*II, *this))
return replaceInstUsesWith(*II, SkippedBarrier);
break;
case Intrinsic::powi:
if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
// 0 and 1 are handled in instsimplify
// powi(x, -1) -> 1/x
if (Power->isMinusOne())
return BinaryOperator::CreateFDivFMF(ConstantFP::get(CI.getType(), 1.0),
II->getArgOperand(0), II);
// powi(x, 2) -> x*x
if (Power->equalsInt(2))
return BinaryOperator::CreateFMulFMF(II->getArgOperand(0),
II->getArgOperand(0), II);
if (!Power->getValue()[0]) {
Value *X;
// If power is even:
// powi(-x, p) -> powi(x, p)
// powi(fabs(x), p) -> powi(x, p)
// powi(copysign(x, y), p) -> powi(x, p)
if (match(II->getArgOperand(0), m_FNeg(m_Value(X))) ||
match(II->getArgOperand(0), m_FAbs(m_Value(X))) ||
match(II->getArgOperand(0),
m_Intrinsic<Intrinsic::copysign>(m_Value(X), m_Value())))
return replaceOperand(*II, 0, X);
}
}
break;
case Intrinsic::cttz:
case Intrinsic::ctlz:
if (auto *I = foldCttzCtlz(*II, *this))
return I;
break;
case Intrinsic::ctpop:
if (auto *I = foldCtpop(*II, *this))
return I;
break;
case Intrinsic::fshl:
case Intrinsic::fshr: {
Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
Type *Ty = II->getType();
unsigned BitWidth = Ty->getScalarSizeInBits();
Constant *ShAmtC;
if (match(II->getArgOperand(2), m_ImmConstant(ShAmtC))) {
// Canonicalize a shift amount constant operand to modulo the bit-width.
Constant *WidthC = ConstantInt::get(Ty, BitWidth);
Constant *ModuloC =
ConstantFoldBinaryOpOperands(Instruction::URem, ShAmtC, WidthC, DL);
if (!ModuloC)
return nullptr;
if (ModuloC != ShAmtC)
return replaceOperand(*II, 2, ModuloC);
assert(ConstantExpr::getICmp(ICmpInst::ICMP_UGT, WidthC, ShAmtC) ==
ConstantInt::getTrue(CmpInst::makeCmpResultType(Ty)) &&
"Shift amount expected to be modulo bitwidth");
// Canonicalize funnel shift right by constant to funnel shift left. This
// is not entirely arbitrary. For historical reasons, the backend may
// recognize rotate left patterns but miss rotate right patterns.
if (IID == Intrinsic::fshr) {
// fshr X, Y, C --> fshl X, Y, (BitWidth - C)
Constant *LeftShiftC = ConstantExpr::getSub(WidthC, ShAmtC);
Module *Mod = II->getModule();
Function *Fshl = Intrinsic::getDeclaration(Mod, Intrinsic::fshl, Ty);
return CallInst::Create(Fshl, { Op0, Op1, LeftShiftC });
}
assert(IID == Intrinsic::fshl &&
"All funnel shifts by simple constants should go left");
// fshl(X, 0, C) --> shl X, C
// fshl(X, undef, C) --> shl X, C
if (match(Op1, m_ZeroInt()) || match(Op1, m_Undef()))
return BinaryOperator::CreateShl(Op0, ShAmtC);
// fshl(0, X, C) --> lshr X, (BW-C)
// fshl(undef, X, C) --> lshr X, (BW-C)
if (match(Op0, m_ZeroInt()) || match(Op0, m_Undef()))
return BinaryOperator::CreateLShr(Op1,
ConstantExpr::getSub(WidthC, ShAmtC));
// fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form)
if (Op0 == Op1 && BitWidth == 16 && match(ShAmtC, m_SpecificInt(8))) {
Module *Mod = II->getModule();
Function *Bswap = Intrinsic::getDeclaration(Mod, Intrinsic::bswap, Ty);
return CallInst::Create(Bswap, { Op0 });
}
}
// Left or right might be masked.
if (SimplifyDemandedInstructionBits(*II))
return &CI;
// The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
// so only the low bits of the shift amount are demanded if the bitwidth is
// a power-of-2.
if (!isPowerOf2_32(BitWidth))
break;
APInt Op2Demanded = APInt::getLowBitsSet(BitWidth, Log2_32_Ceil(BitWidth));
KnownBits Op2Known(BitWidth);
if (SimplifyDemandedBits(II, 2, Op2Demanded, Op2Known))
return &CI;
break;
}
case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow: {
if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
return I;
// Given 2 constant operands whose sum does not overflow:
// uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
// saddo (X +nsw C0), C1 -> saddo X, C0 + C1
Value *X;
const APInt *C0, *C1;
Value *Arg0 = II->getArgOperand(0);
Value *Arg1 = II->getArgOperand(1);
bool IsSigned = IID == Intrinsic::sadd_with_overflow;
bool HasNWAdd = IsSigned ? match(Arg0, m_NSWAdd(m_Value(X), m_APInt(C0)))
: match(Arg0, m_NUWAdd(m_Value(X), m_APInt(C0)));
if (HasNWAdd && match(Arg1, m_APInt(C1))) {
bool Overflow;
APInt NewC =
IsSigned ? C1->sadd_ov(*C0, Overflow) : C1->uadd_ov(*C0, Overflow);
if (!Overflow)
return replaceInstUsesWith(
*II, Builder.CreateBinaryIntrinsic(
IID, X, ConstantInt::get(Arg1->getType(), NewC)));
}
break;
}
case Intrinsic::umul_with_overflow:
case Intrinsic::smul_with_overflow:
case Intrinsic::usub_with_overflow:
if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
return I;
break;
case Intrinsic::ssub_with_overflow: {
if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
return I;
Constant *C;
Value *Arg0 = II->getArgOperand(0);
Value *Arg1 = II->getArgOperand(1);
// Given a constant C that is not the minimum signed value
// for an integer of a given bit width:
//
// ssubo X, C -> saddo X, -C
if (match(Arg1, m_Constant(C)) && C->isNotMinSignedValue()) {
Value *NegVal = ConstantExpr::getNeg(C);
// Build a saddo call that is equivalent to the discovered
// ssubo call.
return replaceInstUsesWith(
*II, Builder.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow,
Arg0, NegVal));
}
break;
}
case Intrinsic::uadd_sat:
case Intrinsic::sadd_sat:
case Intrinsic::usub_sat:
case Intrinsic::ssub_sat: {
SaturatingInst *SI = cast<SaturatingInst>(II);
Type *Ty = SI->getType();
Value *Arg0 = SI->getLHS();
Value *Arg1 = SI->getRHS();
// Make use of known overflow information.
OverflowResult OR = computeOverflow(SI->getBinaryOp(), SI->isSigned(),
Arg0, Arg1, SI);
switch (OR) {
case OverflowResult::MayOverflow:
break;
case OverflowResult::NeverOverflows:
if (SI->isSigned())
return BinaryOperator::CreateNSW(SI->getBinaryOp(), Arg0, Arg1);
else
return BinaryOperator::CreateNUW(SI->getBinaryOp(), Arg0, Arg1);
case OverflowResult::AlwaysOverflowsLow: {
unsigned BitWidth = Ty->getScalarSizeInBits();
APInt Min = APSInt::getMinValue(BitWidth, !SI->isSigned());
return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Min));
}
case OverflowResult::AlwaysOverflowsHigh: {
unsigned BitWidth = Ty->getScalarSizeInBits();
APInt Max = APSInt::getMaxValue(BitWidth, !SI->isSigned());
return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Max));
}
}
// ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
Constant *C;
if (IID == Intrinsic::ssub_sat && match(Arg1, m_Constant(C)) &&
C->isNotMinSignedValue()) {
Value *NegVal = ConstantExpr::getNeg(C);
return replaceInstUsesWith(
*II, Builder.CreateBinaryIntrinsic(
Intrinsic::sadd_sat, Arg0, NegVal));
}
// sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2))
// sat(sat(X - Val2) - Val) -> sat(X - (Val+Val2))
// if Val and Val2 have the same sign
if (auto *Other = dyn_cast<IntrinsicInst>(Arg0)) {
Value *X;
const APInt *Val, *Val2;
APInt NewVal;
bool IsUnsigned =
IID == Intrinsic::uadd_sat || IID == Intrinsic::usub_sat;
if (Other->getIntrinsicID() == IID &&
match(Arg1, m_APInt(Val)) &&
match(Other->getArgOperand(0), m_Value(X)) &&
match(Other->getArgOperand(1), m_APInt(Val2))) {
if (IsUnsigned)
NewVal = Val->uadd_sat(*Val2);
else if (Val->isNonNegative() == Val2->isNonNegative()) {
bool Overflow;
NewVal = Val->sadd_ov(*Val2, Overflow);
if (Overflow) {
// Both adds together may add more than SignedMaxValue
// without saturating the final result.
break;
}
} else {
// Cannot fold saturated addition with different signs.
break;
}
return replaceInstUsesWith(
*II, Builder.CreateBinaryIntrinsic(
IID, X, ConstantInt::get(II->getType(), NewVal)));
}
}
break;
}
case Intrinsic::minnum:
case Intrinsic::maxnum:
case Intrinsic::minimum:
case Intrinsic::maximum: {
Value *Arg0 = II->getArgOperand(0);
Value *Arg1 = II->getArgOperand(1);
Value *X, *Y;
if (match(Arg0, m_FNeg(m_Value(X))) && match(Arg1, m_FNeg(m_Value(Y))) &&
(Arg0->hasOneUse() || Arg1->hasOneUse())) {
// If both operands are negated, invert the call and negate the result:
// min(-X, -Y) --> -(max(X, Y))
// max(-X, -Y) --> -(min(X, Y))
Intrinsic::ID NewIID;
switch (IID) {
case Intrinsic::maxnum:
NewIID = Intrinsic::minnum;
break;
case Intrinsic::minnum:
NewIID = Intrinsic::maxnum;
break;
case Intrinsic::maximum:
NewIID = Intrinsic::minimum;
break;
case Intrinsic::minimum:
NewIID = Intrinsic::maximum;
break;
default:
llvm_unreachable("unexpected intrinsic ID");
}
Value *NewCall = Builder.CreateBinaryIntrinsic(NewIID, X, Y, II);
Instruction *FNeg = UnaryOperator::CreateFNeg(NewCall);
FNeg->copyIRFlags(II);
return FNeg;
}
// m(m(X, C2), C1) -> m(X, C)
const APFloat *C1, *C2;
if (auto *M = dyn_cast<IntrinsicInst>(Arg0)) {
if (M->getIntrinsicID() == IID && match(Arg1, m_APFloat(C1)) &&
((match(M->getArgOperand(0), m_Value(X)) &&
match(M->getArgOperand(1), m_APFloat(C2))) ||
(match(M->getArgOperand(1), m_Value(X)) &&
match(M->getArgOperand(0), m_APFloat(C2))))) {
APFloat Res(0.0);
switch (IID) {
case Intrinsic::maxnum:
Res = maxnum(*C1, *C2);
break;
case Intrinsic::minnum:
Res = minnum(*C1, *C2);
break;
case Intrinsic::maximum:
Res = maximum(*C1, *C2);
break;
case Intrinsic::minimum:
Res = minimum(*C1, *C2);
break;
default:
llvm_unreachable("unexpected intrinsic ID");
}
Instruction *NewCall = Builder.CreateBinaryIntrinsic(
IID, X, ConstantFP::get(Arg0->getType(), Res), II);
// TODO: Conservatively intersecting FMF. If Res == C2, the transform
// was a simplification (so Arg0 and its original flags could
// propagate?)
NewCall->andIRFlags(M);
return replaceInstUsesWith(*II, NewCall);
}
}
// m((fpext X), (fpext Y)) -> fpext (m(X, Y))
if (match(Arg0, m_OneUse(m_FPExt(m_Value(X)))) &&
match(Arg1, m_OneUse(m_FPExt(m_Value(Y)))) &&
X->getType() == Y->getType()) {
Value *NewCall =
Builder.CreateBinaryIntrinsic(IID, X, Y, II, II->getName());
return new FPExtInst(NewCall, II->getType());
}
// max X, -X --> fabs X
// min X, -X --> -(fabs X)
// TODO: Remove one-use limitation? That is obviously better for max.
// It would be an extra instruction for min (fnabs), but that is
// still likely better for analysis and codegen.
if ((match(Arg0, m_OneUse(m_FNeg(m_Value(X)))) && Arg1 == X) ||
(match(Arg1, m_OneUse(m_FNeg(m_Value(X)))) && Arg0 == X)) {
Value *R = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, II);
if (IID == Intrinsic::minimum || IID == Intrinsic::minnum)
R = Builder.CreateFNegFMF(R, II);
return replaceInstUsesWith(*II, R);
}
break;
}
case Intrinsic::fmuladd: {
// Canonicalize fast fmuladd to the separate fmul + fadd.
if (II->isFast()) {
BuilderTy::FastMathFlagGuard Guard(Builder);
Builder.setFastMathFlags(II->getFastMathFlags());
Value *Mul = Builder.CreateFMul(II->getArgOperand(0),
II->getArgOperand(1));
Value *Add = Builder.CreateFAdd(Mul, II->getArgOperand(2));
Add->takeName(II);
return replaceInstUsesWith(*II, Add);
}
// Try to simplify the underlying FMul.
if (Value *V = simplifyFMulInst(II->getArgOperand(0), II->getArgOperand(1),
II->getFastMathFlags(),
SQ.getWithInstruction(II))) {
auto *FAdd = BinaryOperator::CreateFAdd(V, II->getArgOperand(2));
FAdd->copyFastMathFlags(II);
return FAdd;
}
LLVM_FALLTHROUGH;
}
case Intrinsic::fma: {
// fma fneg(x), fneg(y), z -> fma x, y, z
Value *Src0 = II->getArgOperand(0);
Value *Src1 = II->getArgOperand(1);
Value *X, *Y;
if (match(Src0, m_FNeg(m_Value(X))) && match(Src1, m_FNeg(m_Value(Y)))) {
replaceOperand(*II, 0, X);
replaceOperand(*II, 1, Y);
return II;
}
// fma fabs(x), fabs(x), z -> fma x, x, z
if (match(Src0, m_FAbs(m_Value(X))) &&
match(Src1, m_FAbs(m_Specific(X)))) {
replaceOperand(*II, 0, X);
replaceOperand(*II, 1, X);
return II;
}
// Try to simplify the underlying FMul. We can only apply simplifications
// that do not require rounding.
if (Value *V = simplifyFMAFMul(II->getArgOperand(0), II->getArgOperand(1),
II->getFastMathFlags(),
SQ.getWithInstruction(II))) {
auto *FAdd = BinaryOperator::CreateFAdd(V, II->getArgOperand(2));
FAdd->copyFastMathFlags(II);
return FAdd;
}
// fma x, y, 0 -> fmul x, y
// This is always valid for -0.0, but requires nsz for +0.0 as
// -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own.
if (match(II->getArgOperand(2), m_NegZeroFP()) ||
(match(II->getArgOperand(2), m_PosZeroFP()) &&
II->getFastMathFlags().noSignedZeros()))
return BinaryOperator::CreateFMulFMF(Src0, Src1, II);
break;
}
case Intrinsic::copysign: {
Value *Mag = II->getArgOperand(0), *Sign = II->getArgOperand(1);
if (SignBitMustBeZero(Sign, &TLI)) {
// If we know that the sign argument is positive, reduce to FABS:
// copysign Mag, +Sign --> fabs Mag
Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
return replaceInstUsesWith(*II, Fabs);
}
// TODO: There should be a ValueTracking sibling like SignBitMustBeOne.
const APFloat *C;
if (match(Sign, m_APFloat(C)) && C->isNegative()) {
// If we know that the sign argument is negative, reduce to FNABS:
// copysign Mag, -Sign --> fneg (fabs Mag)
Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
return replaceInstUsesWith(*II, Builder.CreateFNegFMF(Fabs, II));
}
// Propagate sign argument through nested calls:
// copysign Mag, (copysign ?, X) --> copysign Mag, X
Value *X;
if (match(Sign, m_Intrinsic<Intrinsic::copysign>(m_Value(), m_Value(X))))
return replaceOperand(*II, 1, X);
// Peek through changes of magnitude's sign-bit. This call rewrites those:
// copysign (fabs X), Sign --> copysign X, Sign
// copysign (fneg X), Sign --> copysign X, Sign
if (match(Mag, m_FAbs(m_Value(X))) || match(Mag, m_FNeg(m_Value(X))))
return replaceOperand(*II, 0, X);
break;
}
case Intrinsic::fabs: {
Value *Cond, *TVal, *FVal;
if (match(II->getArgOperand(0),
m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))) {
// fabs (select Cond, TrueC, FalseC) --> select Cond, AbsT, AbsF
if (isa<Constant>(TVal) && isa<Constant>(FVal)) {
CallInst *AbsT = Builder.CreateCall(II->getCalledFunction(), {TVal});
CallInst *AbsF = Builder.CreateCall(II->getCalledFunction(), {FVal});
return SelectInst::Create(Cond, AbsT, AbsF);
}
// fabs (select Cond, -FVal, FVal) --> fabs FVal
if (match(TVal, m_FNeg(m_Specific(FVal))))
return replaceOperand(*II, 0, FVal);
// fabs (select Cond, TVal, -TVal) --> fabs TVal
if (match(FVal, m_FNeg(m_Specific(TVal))))
return replaceOperand(*II, 0, TVal);
}
LLVM_FALLTHROUGH;
}
case Intrinsic::ceil:
case Intrinsic::floor:
case Intrinsic::round:
case Intrinsic::roundeven:
case Intrinsic::nearbyint:
case Intrinsic::rint:
case Intrinsic::trunc: {
Value *ExtSrc;
if (match(II->getArgOperand(0), m_OneUse(m_FPExt(m_Value(ExtSrc))))) {
// Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
Value *NarrowII = Builder.CreateUnaryIntrinsic(IID, ExtSrc, II);
return new FPExtInst(NarrowII, II->getType());
}
break;
}
case Intrinsic::cos:
case Intrinsic::amdgcn_cos: {
Value *X;
Value *Src = II->getArgOperand(0);
if (match(Src, m_FNeg(m_Value(X))) || match(Src, m_FAbs(m_Value(X)))) {
// cos(-x) -> cos(x)
// cos(fabs(x)) -> cos(x)
return replaceOperand(*II, 0, X);
}
break;
}
case Intrinsic::sin: {
Value *X;
if (match(II->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X))))) {
// sin(-x) --> -sin(x)
Value *NewSin = Builder.CreateUnaryIntrinsic(Intrinsic::sin, X, II);
Instruction *FNeg = UnaryOperator::CreateFNeg(NewSin);
FNeg->copyFastMathFlags(II);
return FNeg;
}
break;
}
case Intrinsic::arm_neon_vtbl1:
case Intrinsic::aarch64_neon_tbl1:
if (Value *V = simplifyNeonTbl1(*II, Builder))
return replaceInstUsesWith(*II, V);
break;
case Intrinsic::arm_neon_vmulls:
case Intrinsic::arm_neon_vmullu:
case Intrinsic::aarch64_neon_smull:
case Intrinsic::aarch64_neon_umull: {
Value *Arg0 = II->getArgOperand(0);
Value *Arg1 = II->getArgOperand(1);
// Handle mul by zero first:
if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) {
return replaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
}
// Check for constant LHS & RHS - in this case we just simplify.
bool Zext = (IID == Intrinsic::arm_neon_vmullu ||
IID == Intrinsic::aarch64_neon_umull);
VectorType *NewVT = cast<VectorType>(II->getType());
if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
CV0 = ConstantExpr::getIntegerCast(CV0, NewVT, /*isSigned=*/!Zext);
CV1 = ConstantExpr::getIntegerCast(CV1, NewVT, /*isSigned=*/!Zext);
return replaceInstUsesWith(CI, ConstantExpr::getMul(CV0, CV1));
}
// Couldn't simplify - canonicalize constant to the RHS.
std::swap(Arg0, Arg1);
}
// Handle mul by one:
if (Constant *CV1 = dyn_cast<Constant>(Arg1))
if (ConstantInt *Splat =
dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
if (Splat->isOne())
return CastInst::CreateIntegerCast(Arg0, II->getType(),
/*isSigned=*/!Zext);
break;
}
case Intrinsic::arm_neon_aesd:
case Intrinsic::arm_neon_aese:
case Intrinsic::aarch64_crypto_aesd:
case Intrinsic::aarch64_crypto_aese: {
Value *DataArg = II->getArgOperand(0);
Value *KeyArg = II->getArgOperand(1);
// Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
Value *Data, *Key;
if (match(KeyArg, m_ZeroInt()) &&
match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) {
replaceOperand(*II, 0, Data);
replaceOperand(*II, 1, Key);
return II;
}
break;
}
case Intrinsic::hexagon_V6_vandvrt:
case Intrinsic::hexagon_V6_vandvrt_128B: {
// Simplify Q -> V -> Q conversion.
if (auto Op0 = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
Intrinsic::ID ID0 = Op0->getIntrinsicID();
if (ID0 != Intrinsic::hexagon_V6_vandqrt &&
ID0 != Intrinsic::hexagon_V6_vandqrt_128B)
break;
Value *Bytes = Op0->getArgOperand(1), *Mask = II->getArgOperand(1);
uint64_t Bytes1 = computeKnownBits(Bytes, 0, Op0).One.getZExtValue();
uint64_t Mask1 = computeKnownBits(Mask, 0, II).One.getZExtValue();
// Check if every byte has common bits in Bytes and Mask.
uint64_t C = Bytes1 & Mask1;
if ((C & 0xFF) && (C & 0xFF00) && (C & 0xFF0000) && (C & 0xFF000000))
return replaceInstUsesWith(*II, Op0->getArgOperand(0));
}
break;
}
case Intrinsic::stackrestore: {
enum class ClassifyResult {
None,
Alloca,
StackRestore,
CallWithSideEffects,
};
auto Classify = [](const Instruction *I) {
if (isa<AllocaInst>(I))
return ClassifyResult::Alloca;
if (auto *CI = dyn_cast<CallInst>(I)) {
if (auto *II = dyn_cast<IntrinsicInst>(CI)) {
if (II->getIntrinsicID() == Intrinsic::stackrestore)
return ClassifyResult::StackRestore;
if (II->mayHaveSideEffects())
return ClassifyResult::CallWithSideEffects;
} else {
// Consider all non-intrinsic calls to be side effects
return ClassifyResult::CallWithSideEffects;
}
}
return ClassifyResult::None;
};
// If the stacksave and the stackrestore are in the same BB, and there is
// no intervening call, alloca, or stackrestore of a different stacksave,
// remove the restore. This can happen when variable allocas are DCE'd.
if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
if (SS->getIntrinsicID() == Intrinsic::stacksave &&
SS->getParent() == II->getParent()) {
BasicBlock::iterator BI(SS);
bool CannotRemove = false;
for (++BI; &*BI != II; ++BI) {
switch (Classify(&*BI)) {
case ClassifyResult::None:
// So far so good, look at next instructions.
break;
case ClassifyResult::StackRestore:
// If we found an intervening stackrestore for a different
// stacksave, we can't remove the stackrestore. Otherwise, continue.
if (cast<IntrinsicInst>(*BI).getArgOperand(0) != SS)
CannotRemove = true;
break;
case ClassifyResult::Alloca:
case ClassifyResult::CallWithSideEffects:
// If we found an alloca, a non-intrinsic call, or an intrinsic
// call with side effects, we can't remove the stackrestore.
CannotRemove = true;
break;
}
if (CannotRemove)
break;
}
if (!CannotRemove)
return eraseInstFromFunction(CI);
}
}
// Scan down this block to see if there is another stack restore in the
// same block without an intervening call/alloca.
BasicBlock::iterator BI(II);
Instruction *TI = II->getParent()->getTerminator();
bool CannotRemove = false;
for (++BI; &*BI != TI; ++BI) {
switch (Classify(&*BI)) {
case ClassifyResult::None:
// So far so good, look at next instructions.
break;
case ClassifyResult::StackRestore:
// If there is a stackrestore below this one, remove this one.
return eraseInstFromFunction(CI);
case ClassifyResult::Alloca:
case ClassifyResult::CallWithSideEffects:
// If we found an alloca, a non-intrinsic call, or an intrinsic call
// with side effects (such as llvm.stacksave and llvm.read_register),
// we can't remove the stack restore.
CannotRemove = true;
break;
}
if (CannotRemove)
break;
}
// If the stack restore is in a return, resume, or unwind block and if there
// are no allocas or calls between the restore and the return, nuke the
// restore.
if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
return eraseInstFromFunction(CI);
break;
}
case Intrinsic::lifetime_end:
// Asan needs to poison memory to detect invalid access which is possible
// even for empty lifetime range.
if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
II->getFunction()->hasFnAttribute(Attribute::SanitizeMemory) ||
II->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress))
break;
if (removeTriviallyEmptyRange(*II, *this, [](const IntrinsicInst &I) {
return I.getIntrinsicID() == Intrinsic::lifetime_start;
}))
return nullptr;
break;
case Intrinsic::assume: {
Value *IIOperand = II->getArgOperand(0);
SmallVector<OperandBundleDef, 4> OpBundles;
II->getOperandBundlesAsDefs(OpBundles);
/// This will remove the boolean Condition from the assume given as
/// argument and remove the assume if it becomes useless.
/// always returns nullptr for use as a return values.
auto RemoveConditionFromAssume = [&](Instruction *Assume) -> Instruction * {
assert(isa<AssumeInst>(Assume));
if (isAssumeWithEmptyBundle(*cast<AssumeInst>(II)))
return eraseInstFromFunction(CI);
replaceUse(II->getOperandUse(0), ConstantInt::getTrue(II->getContext()));
return nullptr;
};
// Remove an assume if it is followed by an identical assume.
// TODO: Do we need this? Unless there are conflicting assumptions, the
// computeKnownBits(IIOperand) below here eliminates redundant assumes.
Instruction *Next = II->getNextNonDebugInstruction();
if (match(Next, m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand))))
return RemoveConditionFromAssume(Next);
// Canonicalize assume(a && b) -> assume(a); assume(b);
// Note: New assumption intrinsics created here are registered by
// the InstCombineIRInserter object.
FunctionType *AssumeIntrinsicTy = II->getFunctionType();
Value *AssumeIntrinsic = II->getCalledOperand();
Value *A, *B;
if (match(IIOperand, m_LogicalAnd(m_Value(A), m_Value(B)))) {
Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, A, OpBundles,
II->getName());
Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, B, II->getName());
return eraseInstFromFunction(*II);
}
// assume(!(a || b)) -> assume(!a); assume(!b);
if (match(IIOperand, m_Not(m_LogicalOr(m_Value(A), m_Value(B))))) {
Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
Builder.CreateNot(A), OpBundles, II->getName());
Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
Builder.CreateNot(B), II->getName());
return eraseInstFromFunction(*II);
}
// assume( (load addr) != null ) -> add 'nonnull' metadata to load
// (if assume is valid at the load)
CmpInst::Predicate Pred;
Instruction *LHS;
if (match(IIOperand, m_ICmp(Pred, m_Instruction(LHS), m_Zero())) &&
Pred == ICmpInst::ICMP_NE && LHS->getOpcode() == Instruction::Load &&
LHS->getType()->isPointerTy() &&
isValidAssumeForContext(II, LHS, &DT)) {
MDNode *MD = MDNode::get(II->getContext(), None);
LHS->setMetadata(LLVMContext::MD_nonnull, MD);
return RemoveConditionFromAssume(II);
// TODO: apply nonnull return attributes to calls and invokes
// TODO: apply range metadata for range check patterns?
}
// Convert nonnull assume like:
// %A = icmp ne i32* %PTR, null
// call void @llvm.assume(i1 %A)
// into
// call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
if (EnableKnowledgeRetention &&
match(IIOperand, m_Cmp(Pred, m_Value(A), m_Zero())) &&
Pred == CmpInst::ICMP_NE && A->getType()->isPointerTy()) {
if (auto *Replacement = buildAssumeFromKnowledge(
{RetainedKnowledge{Attribute::NonNull, 0, A}}, Next, &AC, &DT)) {
Replacement->insertBefore(Next);
AC.registerAssumption(Replacement);
return RemoveConditionFromAssume(II);
}
}
// Convert alignment assume like:
// %B = ptrtoint i32* %A to i64
// %C = and i64 %B, Constant
// %D = icmp eq i64 %C, 0
// call void @llvm.assume(i1 %D)
// into
// call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 Constant + 1)]
uint64_t AlignMask;
if (EnableKnowledgeRetention &&
match(IIOperand,
m_Cmp(Pred, m_And(m_Value(A), m_ConstantInt(AlignMask)),
m_Zero())) &&
Pred == CmpInst::ICMP_EQ) {
if (isPowerOf2_64(AlignMask + 1)) {
uint64_t Offset = 0;
match(A, m_Add(m_Value(A), m_ConstantInt(Offset)));
if (match(A, m_PtrToInt(m_Value(A)))) {
/// Note: this doesn't preserve the offset information but merges
/// offset and alignment.
/// TODO: we can generate a GEP instead of merging the alignment with
/// the offset.
RetainedKnowledge RK{Attribute::Alignment,
(unsigned)MinAlign(Offset, AlignMask + 1), A};
if (auto *Replacement =
buildAssumeFromKnowledge(RK, Next, &AC, &DT)) {
Replacement->insertAfter(II);
AC.registerAssumption(Replacement);
}
return RemoveConditionFromAssume(II);
}
}
}
/// Canonicalize Knowledge in operand bundles.
if (EnableKnowledgeRetention && II->hasOperandBundles()) {
for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) {
auto &BOI = II->bundle_op_info_begin()[Idx];
RetainedKnowledge RK =
llvm::getKnowledgeFromBundle(cast<AssumeInst>(*II), BOI);
if (BOI.End - BOI.Begin > 2)
continue; // Prevent reducing knowledge in an align with offset since
// extracting a RetainedKnowledge form them looses offset
// information
RetainedKnowledge CanonRK =
llvm::simplifyRetainedKnowledge(cast<AssumeInst>(II), RK,
&getAssumptionCache(),
&getDominatorTree());
if (CanonRK == RK)
continue;
if (!CanonRK) {
if (BOI.End - BOI.Begin > 0) {
Worklist.pushValue(II->op_begin()[BOI.Begin]);
Value::dropDroppableUse(II->op_begin()[BOI.Begin]);
}
continue;
}
assert(RK.AttrKind == CanonRK.AttrKind);
if (BOI.End - BOI.Begin > 0)
II->op_begin()[BOI.Begin].set(CanonRK.WasOn);
if (BOI.End - BOI.Begin > 1)
II->op_begin()[BOI.Begin + 1].set(ConstantInt::get(
Type::getInt64Ty(II->getContext()), CanonRK.ArgValue));
if (RK.WasOn)
Worklist.pushValue(RK.WasOn);
return II;
}
}
// If there is a dominating assume with the same condition as this one,
// then this one is redundant, and should be removed.
KnownBits Known(1);
computeKnownBits(IIOperand, Known, 0, II);
if (Known.isAllOnes() && isAssumeWithEmptyBundle(cast<AssumeInst>(*II)))
return eraseInstFromFunction(*II);
// Update the cache of affected values for this assumption (we might be
// here because we just simplified the condition).
AC.updateAffectedValues(cast<AssumeInst>(II));
break;
}
case Intrinsic::experimental_guard: {
// Is this guard followed by another guard? We scan forward over a small
// fixed window of instructions to handle common cases with conditions
// computed between guards.
Instruction *NextInst = II->getNextNonDebugInstruction();
for (unsigned i = 0; i < GuardWideningWindow; i++) {
// Note: Using context-free form to avoid compile time blow up
if (!isSafeToSpeculativelyExecute(NextInst))
break;
NextInst = NextInst->getNextNonDebugInstruction();
}
Value *NextCond = nullptr;
if (match(NextInst,
m_Intrinsic<Intrinsic::experimental_guard>(m_Value(NextCond)))) {
Value *CurrCond = II->getArgOperand(0);
// Remove a guard that it is immediately preceded by an identical guard.
// Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
if (CurrCond != NextCond) {
Instruction *MoveI = II->getNextNonDebugInstruction();
while (MoveI != NextInst) {
auto *Temp = MoveI;
MoveI = MoveI->getNextNonDebugInstruction();
Temp->moveBefore(II);
}
replaceOperand(*II, 0, Builder.CreateAnd(CurrCond, NextCond));
}
eraseInstFromFunction(*NextInst);
return II;
}
break;
}
case Intrinsic::vector_insert: {
Value *Vec = II->getArgOperand(0);
Value *SubVec = II->getArgOperand(1);
Value *Idx = II->getArgOperand(2);
auto *DstTy = dyn_cast<FixedVectorType>(II->getType());
auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
auto *SubVecTy = dyn_cast<FixedVectorType>(SubVec->getType());
// Only canonicalize if the destination vector, Vec, and SubVec are all
// fixed vectors.
if (DstTy && VecTy && SubVecTy) {
unsigned DstNumElts = DstTy->getNumElements();
unsigned VecNumElts = VecTy->getNumElements();
unsigned SubVecNumElts = SubVecTy->getNumElements();
unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
// An insert that entirely overwrites Vec with SubVec is a nop.
if (VecNumElts == SubVecNumElts)
return replaceInstUsesWith(CI, SubVec);
// Widen SubVec into a vector of the same width as Vec, since
// shufflevector requires the two input vectors to be the same width.
// Elements beyond the bounds of SubVec within the widened vector are
// undefined.
SmallVector<int, 8> WidenMask;
unsigned i;
for (i = 0; i != SubVecNumElts; ++i)
WidenMask.push_back(i);
for (; i != VecNumElts; ++i)
WidenMask.push_back(UndefMaskElem);
Value *WidenShuffle = Builder.CreateShuffleVector(SubVec, WidenMask);
SmallVector<int, 8> Mask;
for (unsigned i = 0; i != IdxN; ++i)
Mask.push_back(i);
for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i)
Mask.push_back(i);
for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i)
Mask.push_back(i);
Value *Shuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask);
return replaceInstUsesWith(CI, Shuffle);
}
break;
}
case Intrinsic::vector_extract: {
Value *Vec = II->getArgOperand(0);
Value *Idx = II->getArgOperand(1);
auto *DstTy = dyn_cast<FixedVectorType>(II->getType());
auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
// Only canonicalize if the the destination vector and Vec are fixed
// vectors.
if (DstTy && VecTy) {
unsigned DstNumElts = DstTy->getNumElements();
unsigned VecNumElts = VecTy->getNumElements();
unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
// Extracting the entirety of Vec is a nop.
if (VecNumElts == DstNumElts) {
replaceInstUsesWith(CI, Vec);
return eraseInstFromFunction(CI);
}
SmallVector<int, 8> Mask;
for (unsigned i = 0; i != DstNumElts; ++i)
Mask.push_back(IdxN + i);
Value *Shuffle = Builder.CreateShuffleVector(Vec, Mask);
return replaceInstUsesWith(CI, Shuffle);
}
break;
}
case Intrinsic::experimental_vector_reverse: {
Value *BO0, *BO1, *X, *Y;
Value *Vec = II->getArgOperand(0);
if (match(Vec, m_OneUse(m_BinOp(m_Value(BO0), m_Value(BO1))))) {
auto *OldBinOp = cast<BinaryOperator>(Vec);
if (match(BO0, m_Intrinsic<Intrinsic::experimental_vector_reverse>(
m_Value(X)))) {
// rev(binop rev(X), rev(Y)) --> binop X, Y
if (match(BO1, m_Intrinsic<Intrinsic::experimental_vector_reverse>(
m_Value(Y))))
return replaceInstUsesWith(CI,
BinaryOperator::CreateWithCopiedFlags(
OldBinOp->getOpcode(), X, Y, OldBinOp,
OldBinOp->getName(), II));
// rev(binop rev(X), BO1Splat) --> binop X, BO1Splat
if (isSplatValue(BO1))
return replaceInstUsesWith(CI,
BinaryOperator::CreateWithCopiedFlags(
OldBinOp->getOpcode(), X, BO1,
OldBinOp, OldBinOp->getName(), II));
}
// rev(binop BO0Splat, rev(Y)) --> binop BO0Splat, Y
if (match(BO1, m_Intrinsic<Intrinsic::experimental_vector_reverse>(
m_Value(Y))) &&
isSplatValue(BO0))
return replaceInstUsesWith(CI, BinaryOperator::CreateWithCopiedFlags(
OldBinOp->getOpcode(), BO0, Y,
OldBinOp, OldBinOp->getName(), II));
}
// rev(unop rev(X)) --> unop X
if (match(Vec, m_OneUse(m_UnOp(
m_Intrinsic<Intrinsic::experimental_vector_reverse>(
m_Value(X)))))) {
auto *OldUnOp = cast<UnaryOperator>(Vec);
auto *NewUnOp = UnaryOperator::CreateWithCopiedFlags(
OldUnOp->getOpcode(), X, OldUnOp, OldUnOp->getName(), II);
return replaceInstUsesWith(CI, NewUnOp);
}
break;
}
case Intrinsic::vector_reduce_or:
case Intrinsic::vector_reduce_and: {
// Canonicalize logical or/and reductions:
// Or reduction for i1 is represented as:
// %val = bitcast <ReduxWidth x i1> to iReduxWidth
// %res = cmp ne iReduxWidth %val, 0
// And reduction for i1 is represented as:
// %val = bitcast <ReduxWidth x i1> to iReduxWidth
// %res = cmp eq iReduxWidth %val, 11111
Value *Arg = II->getArgOperand(0);
Value *Vect;
if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
if (FTy->getElementType() == Builder.getInt1Ty()) {
Value *Res = Builder.CreateBitCast(
Vect, Builder.getIntNTy(FTy->getNumElements()));
if (IID == Intrinsic::vector_reduce_and) {
Res = Builder.CreateICmpEQ(
Res, ConstantInt::getAllOnesValue(Res->getType()));
} else {
assert(IID == Intrinsic::vector_reduce_or &&
"Expected or reduction.");
Res = Builder.CreateIsNotNull(Res);
}
if (Arg != Vect)
Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
II->getType());
return replaceInstUsesWith(CI, Res);
}
}
LLVM_FALLTHROUGH;
}
case Intrinsic::vector_reduce_add: {
if (IID == Intrinsic::vector_reduce_add) {
// Convert vector_reduce_add(ZExt(<n x i1>)) to
// ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
// Convert vector_reduce_add(SExt(<n x i1>)) to
// -ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
// Convert vector_reduce_add(<n x i1>) to
// Trunc(ctpop(bitcast <n x i1> to in)).
Value *Arg = II->getArgOperand(0);
Value *Vect;
if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
if (FTy->getElementType() == Builder.getInt1Ty()) {
Value *V = Builder.CreateBitCast(
Vect, Builder.getIntNTy(FTy->getNumElements()));
Value *Res = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, V);
if (Res->getType() != II->getType())
Res = Builder.CreateZExtOrTrunc(Res, II->getType());
if (Arg != Vect &&
cast<Instruction>(Arg)->getOpcode() == Instruction::SExt)
Res = Builder.CreateNeg(Res);
return replaceInstUsesWith(CI, Res);
}
}
}
LLVM_FALLTHROUGH;
}
case Intrinsic::vector_reduce_xor: {
if (IID == Intrinsic::vector_reduce_xor) {
// Exclusive disjunction reduction over the vector with
// (potentially-extended) i1 element type is actually a
// (potentially-extended) arithmetic `add` reduction over the original
// non-extended value:
// vector_reduce_xor(?ext(<n x i1>))
// -->
// ?ext(vector_reduce_add(<n x i1>))
Value *Arg = II->getArgOperand(0);
Value *Vect;
if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
if (FTy->getElementType() == Builder.getInt1Ty()) {
Value *Res = Builder.CreateAddReduce(Vect);
if (Arg != Vect)
Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
II->getType());
return replaceInstUsesWith(CI, Res);
}
}
}
LLVM_FALLTHROUGH;
}
case Intrinsic::vector_reduce_mul: {
if (IID == Intrinsic::vector_reduce_mul) {
// Multiplicative reduction over the vector with (potentially-extended)
// i1 element type is actually a (potentially zero-extended)
// logical `and` reduction over the original non-extended value:
// vector_reduce_mul(?ext(<n x i1>))
// -->
// zext(vector_reduce_and(<n x i1>))
Value *Arg = II->getArgOperand(0);
Value *Vect;
if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
if (FTy->getElementType() == Builder.getInt1Ty()) {
Value *Res = Builder.CreateAndReduce(Vect);
if (Res->getType() != II->getType())
Res = Builder.CreateZExt(Res, II->getType());
return replaceInstUsesWith(CI, Res);
}
}
}
LLVM_FALLTHROUGH;
}
case Intrinsic::vector_reduce_umin:
case Intrinsic::vector_reduce_umax: {
if (IID == Intrinsic::vector_reduce_umin ||
IID == Intrinsic::vector_reduce_umax) {
// UMin/UMax reduction over the vector with (potentially-extended)
// i1 element type is actually a (potentially-extended)
// logical `and`/`or` reduction over the original non-extended value:
// vector_reduce_u{min,max}(?ext(<n x i1>))
// -->
// ?ext(vector_reduce_{and,or}(<n x i1>))
Value *Arg = II->getArgOperand(0);
Value *Vect;
if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
if (FTy->getElementType() == Builder.getInt1Ty()) {
Value *Res = IID == Intrinsic::vector_reduce_umin
? Builder.CreateAndReduce(Vect)
: Builder.CreateOrReduce(Vect);
if (Arg != Vect)
Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
II->getType());
return replaceInstUsesWith(CI, Res);
}
}
}
LLVM_FALLTHROUGH;
}
case Intrinsic::vector_reduce_smin:
case Intrinsic::vector_reduce_smax: {
if (IID == Intrinsic::vector_reduce_smin ||
IID == Intrinsic::vector_reduce_smax) {
// SMin/SMax reduction over the vector with (potentially-extended)
// i1 element type is actually a (potentially-extended)
// logical `and`/`or` reduction over the original non-extended value:
// vector_reduce_s{min,max}(<n x i1>)
// -->
// vector_reduce_{or,and}(<n x i1>)
// and
// vector_reduce_s{min,max}(sext(<n x i1>))
// -->
// sext(vector_reduce_{or,and}(<n x i1>))
// and
// vector_reduce_s{min,max}(zext(<n x i1>))
// -->
// zext(vector_reduce_{and,or}(<n x i1>))
Value *Arg = II->getArgOperand(0);
Value *Vect;
if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
if (FTy->getElementType() == Builder.getInt1Ty()) {
Instruction::CastOps ExtOpc = Instruction::CastOps::CastOpsEnd;
if (Arg != Vect)
ExtOpc = cast<CastInst>(Arg)->getOpcode();
Value *Res = ((IID == Intrinsic::vector_reduce_smin) ==
(ExtOpc == Instruction::CastOps::ZExt))
? Builder.CreateAndReduce(Vect)
: Builder.CreateOrReduce(Vect);
if (Arg != Vect)
Res = Builder.CreateCast(ExtOpc, Res, II->getType());
return replaceInstUsesWith(CI, Res);
}
}
}
LLVM_FALLTHROUGH;
}
case Intrinsic::vector_reduce_fmax:
case Intrinsic::vector_reduce_fmin:
case Intrinsic::vector_reduce_fadd:
case Intrinsic::vector_reduce_fmul: {
bool CanBeReassociated = (IID != Intrinsic::vector_reduce_fadd &&
IID != Intrinsic::vector_reduce_fmul) ||
II->hasAllowReassoc();
const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd ||
IID == Intrinsic::vector_reduce_fmul)
? 1
: 0;
Value *Arg = II->getArgOperand(ArgIdx);
Value *V;
ArrayRef<int> Mask;
if (!isa<FixedVectorType>(Arg->getType()) || !CanBeReassociated ||
!match(Arg, m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))) ||
!cast<ShuffleVectorInst>(Arg)->isSingleSource())
break;
int Sz = Mask.size();
SmallBitVector UsedIndices(Sz);
for (int Idx : Mask) {
if (Idx == UndefMaskElem || UsedIndices.test(Idx))
break;
UsedIndices.set(Idx);
}
// Can remove shuffle iff just shuffled elements, no repeats, undefs, or
// other changes.
if (UsedIndices.all()) {
replaceUse(II->getOperandUse(ArgIdx), V);
return nullptr;
}
break;
}
default: {
// Handle target specific intrinsics
Optional<Instruction *> V = targetInstCombineIntrinsic(*II);
if (V)
return V.value();
break;
}
}
if (Instruction *Shuf = foldShuffledIntrinsicOperands(II, Builder))
return Shuf;
// Some intrinsics (like experimental_gc_statepoint) can be used in invoke
// context, so it is handled in visitCallBase and we should trigger it.
return visitCallBase(*II);
}
// Fence instruction simplification
Instruction *InstCombinerImpl::visitFenceInst(FenceInst &FI) {
auto *NFI = dyn_cast<FenceInst>(FI.getNextNonDebugInstruction());
// This check is solely here to handle arbitrary target-dependent syncscopes.
// TODO: Can remove if does not matter in practice.
if (NFI && FI.isIdenticalTo(NFI))
return eraseInstFromFunction(FI);
// Returns true if FI1 is identical or stronger fence than FI2.
auto isIdenticalOrStrongerFence = [](FenceInst *FI1, FenceInst *FI2) {
auto FI1SyncScope = FI1->getSyncScopeID();
// Consider same scope, where scope is global or single-thread.
if (FI1SyncScope != FI2->getSyncScopeID() ||
(FI1SyncScope != SyncScope::System &&
FI1SyncScope != SyncScope::SingleThread))
return false;
return isAtLeastOrStrongerThan(FI1->getOrdering(), FI2->getOrdering());
};
if (NFI && isIdenticalOrStrongerFence(NFI, &FI))
return eraseInstFromFunction(FI);
if (auto *PFI = dyn_cast_or_null<FenceInst>(FI.getPrevNonDebugInstruction()))
if (isIdenticalOrStrongerFence(PFI, &FI))
return eraseInstFromFunction(FI);
return nullptr;
}
// InvokeInst simplification
Instruction *InstCombinerImpl::visitInvokeInst(InvokeInst &II) {
return visitCallBase(II);
}
// CallBrInst simplification
Instruction *InstCombinerImpl::visitCallBrInst(CallBrInst &CBI) {
return visitCallBase(CBI);
}
/// If this cast does not affect the value passed through the varargs area, we
/// can eliminate the use of the cast.
static bool isSafeToEliminateVarargsCast(const CallBase &Call,
const DataLayout &DL,
const CastInst *const CI,
const int ix) {
if (!CI->isLosslessCast())
return false;
// If this is a GC intrinsic, avoid munging types. We need types for
// statepoint reconstruction in SelectionDAG.
// TODO: This is probably something which should be expanded to all
// intrinsics since the entire point of intrinsics is that
// they are understandable by the optimizer.
if (isa<GCStatepointInst>(Call) || isa<GCRelocateInst>(Call) ||
isa<GCResultInst>(Call))
return false;
// Opaque pointers are compatible with any byval types.
PointerType *SrcTy = cast<PointerType>(CI->getOperand(0)->getType());
if (SrcTy->isOpaque())
return true;
// The size of ByVal or InAlloca arguments is derived from the type, so we
// can't change to a type with a different size. If the size were
// passed explicitly we could avoid this check.
if (!Call.isPassPointeeByValueArgument(ix))
return true;
// The transform currently only handles type replacement for byval, not other
// type-carrying attributes.
if (!Call.isByValArgument(ix))
return false;
Type *SrcElemTy = SrcTy->getNonOpaquePointerElementType();
Type *DstElemTy = Call.getParamByValType(ix);
if (!SrcElemTy->isSized() || !DstElemTy->isSized())
return false;
if (DL.getTypeAllocSize(SrcElemTy) != DL.getTypeAllocSize(DstElemTy))
return false;
return true;
}
Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {
if (!CI->getCalledFunction()) return nullptr;
// Skip optimizing notail and musttail calls so
// LibCallSimplifier::optimizeCall doesn't have to preserve those invariants.
// LibCallSimplifier::optimizeCall should try to preseve tail calls though.
if (CI->isMustTailCall() || CI->isNoTailCall())
return nullptr;
auto InstCombineRAUW = [this](Instruction *From, Value *With) {
replaceInstUsesWith(*From, With);
};
auto InstCombineErase = [this](Instruction *I) {
eraseInstFromFunction(*I);
};
LibCallSimplifier Simplifier(DL, &TLI, ORE, BFI, PSI, InstCombineRAUW,
InstCombineErase);
if (Value *With = Simplifier.optimizeCall(CI, Builder)) {
++NumSimplified;
return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
}
return nullptr;
}
static IntrinsicInst *findInitTrampolineFromAlloca(Value *TrampMem) {
// Strip off at most one level of pointer casts, looking for an alloca. This
// is good enough in practice and simpler than handling any number of casts.
Value *Underlying = TrampMem->stripPointerCasts();
if (Underlying != TrampMem &&
(!Underlying->hasOneUse() || Underlying->user_back() != TrampMem))
return nullptr;
if (!isa<AllocaInst>(Underlying))
return nullptr;
IntrinsicInst *InitTrampoline = nullptr;
for (User *U : TrampMem->users()) {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
if (!II)
return nullptr;
if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
if (InitTrampoline)
// More than one init_trampoline writes to this value. Give up.
return nullptr;
InitTrampoline = II;
continue;
}
if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
// Allow any number of calls to adjust.trampoline.
continue;
return nullptr;
}
// No call to init.trampoline found.
if (!InitTrampoline)
return nullptr;
// Check that the alloca is being used in the expected way.
if (InitTrampoline->getOperand(0) != TrampMem)
return nullptr;
return InitTrampoline;
}
static IntrinsicInst *findInitTrampolineFromBB(IntrinsicInst *AdjustTramp,
Value *TrampMem) {
// Visit all the previous instructions in the basic block, and try to find a
// init.trampoline which has a direct path to the adjust.trampoline.
for (BasicBlock::iterator I = AdjustTramp->getIterator(),
E = AdjustTramp->getParent()->begin();
I != E;) {
Instruction *Inst = &*--I;
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
II->getOperand(0) == TrampMem)
return II;
if (Inst->mayWriteToMemory())
return nullptr;
}
return nullptr;
}
// Given a call to llvm.adjust.trampoline, find and return the corresponding
// call to llvm.init.trampoline if the call to the trampoline can be optimized
// to a direct call to a function. Otherwise return NULL.
static IntrinsicInst *findInitTrampoline(Value *Callee) {
Callee = Callee->stripPointerCasts();
IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
if (!AdjustTramp ||
AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
return nullptr;
Value *TrampMem = AdjustTramp->getOperand(0);
if (IntrinsicInst *IT = findInitTrampolineFromAlloca(TrampMem))
return IT;
if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
return IT;
return nullptr;
}
bool InstCombinerImpl::annotateAnyAllocSite(CallBase &Call,
const TargetLibraryInfo *TLI) {
// Note: We only handle cases which can't be driven from generic attributes
// here. So, for example, nonnull and noalias (which are common properties
// of some allocation functions) are expected to be handled via annotation
// of the respective allocator declaration with generic attributes.
bool Changed = false;
if (!Call.getType()->isPointerTy())
return Changed;
Optional<APInt> Size = getAllocSize(&Call, TLI);
if (Size && *Size != 0) {
// TODO: We really should just emit deref_or_null here and then
// let the generic inference code combine that with nonnull.
if (Call.hasRetAttr(Attribute::NonNull)) {
Changed = !Call.hasRetAttr(Attribute::Dereferenceable);
Call.addRetAttr(Attribute::getWithDereferenceableBytes(
Call.getContext(), Size->getLimitedValue()));
} else {
Changed = !Call.hasRetAttr(Attribute::DereferenceableOrNull);
Call.addRetAttr(Attribute::getWithDereferenceableOrNullBytes(
Call.getContext(), Size->getLimitedValue()));
}
}
// Add alignment attribute if alignment is a power of two constant.
Value *Alignment = getAllocAlignment(&Call, TLI);
if (!Alignment)
return Changed;
ConstantInt *AlignOpC = dyn_cast<ConstantInt>(Alignment);
if (AlignOpC && AlignOpC->getValue().ult(llvm::Value::MaximumAlignment)) {
uint64_t AlignmentVal = AlignOpC->getZExtValue();
if (llvm::isPowerOf2_64(AlignmentVal)) {
Align ExistingAlign = Call.getRetAlign().valueOrOne();
Align NewAlign = Align(AlignmentVal);
if (NewAlign > ExistingAlign) {
Call.addRetAttr(
Attribute::getWithAlignment(Call.getContext(), NewAlign));
Changed = true;
}
}
}
return Changed;
}
/// Improvements for call, callbr and invoke instructions.
Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
bool Changed = annotateAnyAllocSite(Call, &TLI);
// Mark any parameters that are known to be non-null with the nonnull
// attribute. This is helpful for inlining calls to functions with null
// checks on their arguments.
SmallVector<unsigned, 4> ArgNos;
unsigned ArgNo = 0;
for (Value *V : Call.args()) {
if (V->getType()->isPointerTy() &&
!Call.paramHasAttr(ArgNo, Attribute::NonNull) &&
isKnownNonZero(V, DL, 0, &AC, &Call, &DT))
ArgNos.push_back(ArgNo);
ArgNo++;
}
assert(ArgNo == Call.arg_size() && "Call arguments not processed correctly.");
if (!ArgNos.empty()) {
AttributeList AS = Call.getAttributes();
LLVMContext &Ctx = Call.getContext();
AS = AS.addParamAttribute(Ctx, ArgNos,
Attribute::get(Ctx, Attribute::NonNull));
Call.setAttributes(AS);
Changed = true;
}
// If the callee is a pointer to a function, attempt to move any casts to the
// arguments of the call/callbr/invoke.
Value *Callee = Call.getCalledOperand();
Function *CalleeF = dyn_cast<Function>(Callee);
if ((!CalleeF || CalleeF->getFunctionType() != Call.getFunctionType()) &&
transformConstExprCastCall(Call))
return nullptr;
if (CalleeF) {
// Remove the convergent attr on calls when the callee is not convergent.
if (Call.isConvergent() && !CalleeF->isConvergent() &&
!CalleeF->isIntrinsic()) {
LLVM_DEBUG(dbgs() << "Removing convergent attr from instr " << Call
<< "\n");
Call.setNotConvergent();
return &Call;
}
// If the call and callee calling conventions don't match, and neither one
// of the calling conventions is compatible with C calling convention
// this call must be unreachable, as the call is undefined.
if ((CalleeF->getCallingConv() != Call.getCallingConv() &&
!(CalleeF->getCallingConv() == llvm::CallingConv::C &&
TargetLibraryInfoImpl::isCallingConvCCompatible(&Call)) &&
!(Call.getCallingConv() == llvm::CallingConv::C &&
TargetLibraryInfoImpl::isCallingConvCCompatible(CalleeF))) &&
// Only do this for calls to a function with a body. A prototype may
// not actually end up matching the implementation's calling conv for a
// variety of reasons (e.g. it may be written in assembly).
!CalleeF->isDeclaration()) {
Instruction *OldCall = &Call;
CreateNonTerminatorUnreachable(OldCall);
// If OldCall does not return void then replaceInstUsesWith poison.
// This allows ValueHandlers and custom metadata to adjust itself.
if (!OldCall->getType()->isVoidTy())
replaceInstUsesWith(*OldCall, PoisonValue::get(OldCall->getType()));
if (isa<CallInst>(OldCall))
return eraseInstFromFunction(*OldCall);
// We cannot remove an invoke or a callbr, because it would change thexi
// CFG, just change the callee to a null pointer.
cast<CallBase>(OldCall)->setCalledFunction(
CalleeF->getFunctionType(),
Constant::getNullValue(CalleeF->getType()));
return nullptr;
}
}
// Calling a null function pointer is undefined if a null address isn't
// dereferenceable.
if ((isa<ConstantPointerNull>(Callee) &&
!NullPointerIsDefined(Call.getFunction())) ||
isa<UndefValue>(Callee)) {
// If Call does not return void then replaceInstUsesWith poison.
// This allows ValueHandlers and custom metadata to adjust itself.
if (!Call.getType()->isVoidTy())
replaceInstUsesWith(Call, PoisonValue::get(Call.getType()));
if (Call.isTerminator()) {
// Can't remove an invoke or callbr because we cannot change the CFG.
return nullptr;
}
// This instruction is not reachable, just remove it.
CreateNonTerminatorUnreachable(&Call);
return eraseInstFromFunction(Call);
}
if (IntrinsicInst *II = findInitTrampoline(Callee))
return transformCallThroughTrampoline(Call, *II);
// TODO: Drop this transform once opaque pointer transition is done.
FunctionType *FTy = Call.getFunctionType();
if (FTy->isVarArg()) {
int ix = FTy->getNumParams();
// See if we can optimize any arguments passed through the varargs area of
// the call.
for (auto I = Call.arg_begin() + FTy->getNumParams(), E = Call.arg_end();
I != E; ++I, ++ix) {
CastInst *CI = dyn_cast<CastInst>(*I);
if (CI && isSafeToEliminateVarargsCast(Call, DL, CI, ix)) {
replaceUse(*I, CI->getOperand(0));
// Update the byval type to match the pointer type.
// Not necessary for opaque pointers.
PointerType *NewTy = cast<PointerType>(CI->getOperand(0)->getType());
if (!NewTy->isOpaque() && Call.isByValArgument(ix)) {
Call.removeParamAttr(ix, Attribute::ByVal);
Call.addParamAttr(ix, Attribute::getWithByValType(
Call.getContext(),
NewTy->getNonOpaquePointerElementType()));
}
Changed = true;
}
}
}
if (isa<InlineAsm>(Callee) && !Call.doesNotThrow()) {
InlineAsm *IA = cast<InlineAsm>(Callee);
if (!IA->canThrow()) {
// Normal inline asm calls cannot throw - mark them
// 'nounwind'.
Call.setDoesNotThrow();
Changed = true;
}
}
// Try to optimize the call if possible, we require DataLayout for most of
// this. None of these calls are seen as possibly dead so go ahead and
// delete the instruction now.
if (CallInst *CI = dyn_cast<CallInst>(&Call)) {
Instruction *I = tryOptimizeCall(CI);
// If we changed something return the result, etc. Otherwise let
// the fallthrough check.
if (I) return eraseInstFromFunction(*I);
}
if (!Call.use_empty() && !Call.isMustTailCall())
if (Value *ReturnedArg = Call.getReturnedArgOperand()) {
Type *CallTy = Call.getType();
Type *RetArgTy = ReturnedArg->getType();
if (RetArgTy->canLosslesslyBitCastTo(CallTy))
return replaceInstUsesWith(
Call, Builder.CreateBitOrPointerCast(ReturnedArg, CallTy));
}
if (isRemovableAlloc(&Call, &TLI))
return visitAllocSite(Call);
// Handle intrinsics which can be used in both call and invoke context.
switch (Call.getIntrinsicID()) {
case Intrinsic::experimental_gc_statepoint: {
GCStatepointInst &GCSP = *cast<GCStatepointInst>(&Call);
SmallPtrSet<Value *, 32> LiveGcValues;
for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
// Remove the relocation if unused.
if (GCR.use_empty()) {
eraseInstFromFunction(GCR);
continue;
}
Value *DerivedPtr = GCR.getDerivedPtr();
Value *BasePtr = GCR.getBasePtr();
// Undef is undef, even after relocation.
if (isa<UndefValue>(DerivedPtr) || isa<UndefValue>(BasePtr)) {
replaceInstUsesWith(GCR, UndefValue::get(GCR.getType()));
eraseInstFromFunction(GCR);
continue;
}
if (auto *PT = dyn_cast<PointerType>(GCR.getType())) {
// The relocation of null will be null for most any collector.
// TODO: provide a hook for this in GCStrategy. There might be some
// weird collector this property does not hold for.
if (isa<ConstantPointerNull>(DerivedPtr)) {
// Use null-pointer of gc_relocate's type to replace it.
replaceInstUsesWith(GCR, ConstantPointerNull::get(PT));
eraseInstFromFunction(GCR);
continue;
}
// isKnownNonNull -> nonnull attribute
if (!GCR.hasRetAttr(Attribute::NonNull) &&
isKnownNonZero(DerivedPtr, DL, 0, &AC, &Call, &DT)) {
GCR.addRetAttr(Attribute::NonNull);
// We discovered new fact, re-check users.
Worklist.pushUsersToWorkList(GCR);
}
}
// If we have two copies of the same pointer in the statepoint argument
// list, canonicalize to one. This may let us common gc.relocates.
if (GCR.getBasePtr() == GCR.getDerivedPtr() &&
GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) {
auto *OpIntTy = GCR.getOperand(2)->getType();
GCR.setOperand(2, ConstantInt::get(OpIntTy, GCR.getBasePtrIndex()));
}
// TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
// Canonicalize on the type from the uses to the defs
// TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
LiveGcValues.insert(BasePtr);
LiveGcValues.insert(DerivedPtr);
}
Optional<OperandBundleUse> Bundle =
GCSP.getOperandBundle(LLVMContext::OB_gc_live);
unsigned NumOfGCLives = LiveGcValues.size();
if (!Bundle || NumOfGCLives == Bundle->Inputs.size())
break;
// We can reduce the size of gc live bundle.
DenseMap<Value *, unsigned> Val2Idx;
std::vector<Value *> NewLiveGc;
for (unsigned I = 0, E = Bundle->Inputs.size(); I < E; ++I) {
Value *V = Bundle->Inputs[I];
if (Val2Idx.count(V))
continue;
if (LiveGcValues.count(V)) {
Val2Idx[V] = NewLiveGc.size();
NewLiveGc.push_back(V);
} else
Val2Idx[V] = NumOfGCLives;
}
// Update all gc.relocates
for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
Value *BasePtr = GCR.getBasePtr();
assert(Val2Idx.count(BasePtr) && Val2Idx[BasePtr] != NumOfGCLives &&
"Missed live gc for base pointer");
auto *OpIntTy1 = GCR.getOperand(1)->getType();
GCR.setOperand(1, ConstantInt::get(OpIntTy1, Val2Idx[BasePtr]));
Value *DerivedPtr = GCR.getDerivedPtr();
assert(Val2Idx.count(DerivedPtr) && Val2Idx[DerivedPtr] != NumOfGCLives &&
"Missed live gc for derived pointer");
auto *OpIntTy2 = GCR.getOperand(2)->getType();
GCR.setOperand(2, ConstantInt::get(OpIntTy2, Val2Idx[DerivedPtr]));
}
// Create new statepoint instruction.
OperandBundleDef NewBundle("gc-live", NewLiveGc);
return CallBase::Create(&Call, NewBundle);
}
default: { break; }
}
return Changed ? &Call : nullptr;
}
/// If the callee is a constexpr cast of a function, attempt to move the cast to
/// the arguments of the call/callbr/invoke.
bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
auto *Callee =
dyn_cast<Function>(Call.getCalledOperand()->stripPointerCasts());
if (!Callee)
return false;
// If this is a call to a thunk function, don't remove the cast. Thunks are
// used to transparently forward all incoming parameters and outgoing return
// values, so it's important to leave the cast in place.
if (Callee->hasFnAttribute("thunk"))
return false;
// If this is a musttail call, the callee's prototype must match the caller's
// prototype with the exception of pointee types. The code below doesn't
// implement that, so we can't do this transform.
// TODO: Do the transform if it only requires adding pointer casts.
if (Call.isMustTailCall())
return false;
Instruction *Caller = &Call;
const AttributeList &CallerPAL = Call.getAttributes();
// Okay, this is a cast from a function to a different type. Unless doing so
// would cause a type conversion of one of our arguments, change this call to
// be a direct call with arguments casted to the appropriate types.
FunctionType *FT = Callee->getFunctionType();
Type *OldRetTy = Caller->getType();
Type *NewRetTy = FT->getReturnType();
// Check to see if we are changing the return type...
if (OldRetTy != NewRetTy) {
if (NewRetTy->isStructTy())
return false; // TODO: Handle multiple return values.
if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) {
if (Callee->isDeclaration())
return false; // Cannot transform this return value.
if (!Caller->use_empty() &&
// void -> non-void is handled specially
!NewRetTy->isVoidTy())
return false; // Cannot transform this return value.
}
if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(NewRetTy)))
return false; // Attribute not compatible with transformed value.
}
// If the callbase is an invoke/callbr instruction, and the return value is
// used by a PHI node in a successor, we cannot change the return type of
// the call because there is no place to put the cast instruction (without
// breaking the critical edge). Bail out in this case.
if (!Caller->use_empty()) {
BasicBlock *PhisNotSupportedBlock = nullptr;
if (auto *II = dyn_cast<InvokeInst>(Caller))
PhisNotSupportedBlock = II->getNormalDest();
if (auto *CB = dyn_cast<CallBrInst>(Caller))
PhisNotSupportedBlock = CB->getDefaultDest();
if (PhisNotSupportedBlock)
for (User *U : Caller->users())
if (PHINode *PN = dyn_cast<PHINode>(U))
if (PN->getParent() == PhisNotSupportedBlock)
return false;
}
}
unsigned NumActualArgs = Call.arg_size();
unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
// Prevent us turning:
// declare void @takes_i32_inalloca(i32* inalloca)
// call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
//
// into:
// call void @takes_i32_inalloca(i32* null)
//
// Similarly, avoid folding away bitcasts of byval calls.
if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
Callee->getAttributes().hasAttrSomewhere(Attribute::Preallocated))
return false;
auto AI = Call.arg_begin();
for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
Type *ParamTy = FT->getParamType(i);
Type *ActTy = (*AI)->getType();
if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
return false; // Cannot transform this parameter value.
// Check if there are any incompatible attributes we cannot drop safely.
if (AttrBuilder(FT->getContext(), CallerPAL.getParamAttrs(i))
.overlaps(AttributeFuncs::typeIncompatible(
ParamTy, AttributeFuncs::ASK_UNSAFE_TO_DROP)))
return false; // Attribute not compatible with transformed value.
if (Call.isInAllocaArgument(i) ||
CallerPAL.hasParamAttr(i, Attribute::Preallocated))
return false; // Cannot transform to and from inalloca/preallocated.
if (CallerPAL.hasParamAttr(i, Attribute::SwiftError))
return false;
+ if (CallerPAL.hasParamAttr(i, Attribute::ByVal) !=
+ Callee->getAttributes().hasParamAttr(i, Attribute::ByVal))
+ return false; // Cannot transform to or from byval.
+
// If the parameter is passed as a byval argument, then we have to have a
// sized type and the sized type has to have the same size as the old type.
if (ParamTy != ActTy && CallerPAL.hasParamAttr(i, Attribute::ByVal)) {
PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
if (!ParamPTy)
return false;
if (!ParamPTy->isOpaque()) {
Type *ParamElTy = ParamPTy->getNonOpaquePointerElementType();
if (!ParamElTy->isSized())
return false;
Type *CurElTy = Call.getParamByValType(i);
if (DL.getTypeAllocSize(CurElTy) != DL.getTypeAllocSize(ParamElTy))
return false;
}
}
}
if (Callee->isDeclaration()) {
// Do not delete arguments unless we have a function body.
if (FT->getNumParams() < NumActualArgs && !FT->isVarArg())
return false;
// If the callee is just a declaration, don't change the varargsness of the
// call. We don't want to introduce a varargs call where one doesn't
// already exist.
if (FT->isVarArg() != Call.getFunctionType()->isVarArg())
return false;
// If both the callee and the cast type are varargs, we still have to make
// sure the number of fixed parameters are the same or we have the same
// ABI issues as if we introduce a varargs call.
if (FT->isVarArg() && Call.getFunctionType()->isVarArg() &&
FT->getNumParams() != Call.getFunctionType()->getNumParams())
return false;
}
if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
!CallerPAL.isEmpty()) {
// In this case we have more arguments than the new function type, but we
// won't be dropping them. Check that these extra arguments have attributes
// that are compatible with being a vararg call argument.
unsigned SRetIdx;
if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) &&
SRetIdx - AttributeList::FirstArgIndex >= FT->getNumParams())
return false;
}
// Okay, we decided that this is a safe thing to do: go ahead and start
// inserting cast instructions as necessary.
SmallVector<Value *, 8> Args;
SmallVector<AttributeSet, 8> ArgAttrs;
Args.reserve(NumActualArgs);
ArgAttrs.reserve(NumActualArgs);
// Get any return attributes.
AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
// If the return value is not being used, the type may not be compatible
// with the existing attributes. Wipe out any problematic attributes.
RAttrs.remove(AttributeFuncs::typeIncompatible(NewRetTy));
LLVMContext &Ctx = Call.getContext();
AI = Call.arg_begin();
for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
Type *ParamTy = FT->getParamType(i);
Value *NewArg = *AI;
if ((*AI)->getType() != ParamTy)
NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy);
Args.push_back(NewArg);
// Add any parameter attributes except the ones incompatible with the new
// type. Note that we made sure all incompatible ones are safe to drop.
AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible(
ParamTy, AttributeFuncs::ASK_SAFE_TO_DROP);
if (CallerPAL.hasParamAttr(i, Attribute::ByVal) &&
!ParamTy->isOpaquePointerTy()) {
AttrBuilder AB(Ctx, CallerPAL.getParamAttrs(i).removeAttributes(
Ctx, IncompatibleAttrs));
AB.addByValAttr(ParamTy->getNonOpaquePointerElementType());
ArgAttrs.push_back(AttributeSet::get(Ctx, AB));
} else {
ArgAttrs.push_back(
CallerPAL.getParamAttrs(i).removeAttributes(Ctx, IncompatibleAttrs));
}
}
// If the function takes more arguments than the call was taking, add them
// now.
for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) {
Args.push_back(Constant::getNullValue(FT->getParamType(i)));
ArgAttrs.push_back(AttributeSet());
}
// If we are removing arguments to the function, emit an obnoxious warning.
if (FT->getNumParams() < NumActualArgs) {
// TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
if (FT->isVarArg()) {
// Add all of the arguments in their promoted form to the arg list.
for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
Type *PTy = getPromotedType((*AI)->getType());
Value *NewArg = *AI;
if (PTy != (*AI)->getType()) {
// Must promote to pass through va_arg area!
Instruction::CastOps opcode =
CastInst::getCastOpcode(*AI, false, PTy, false);
NewArg = Builder.CreateCast(opcode, *AI, PTy);
}
Args.push_back(NewArg);
// Add any parameter attributes.
ArgAttrs.push_back(CallerPAL.getParamAttrs(i));
}
}
}
AttributeSet FnAttrs = CallerPAL.getFnAttrs();
if (NewRetTy->isVoidTy())
Caller->setName(""); // Void type should not have a name.
assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) &&
"missing argument attributes");
AttributeList NewCallerPAL = AttributeList::get(
Ctx, FnAttrs, AttributeSet::get(Ctx, RAttrs), ArgAttrs);
SmallVector<OperandBundleDef, 1> OpBundles;
Call.getOperandBundlesAsDefs(OpBundles);
CallBase *NewCall;
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
NewCall = Builder.CreateInvoke(Callee, II->getNormalDest(),
II->getUnwindDest(), Args, OpBundles);
} else if (CallBrInst *CBI = dyn_cast<CallBrInst>(Caller)) {
NewCall = Builder.CreateCallBr(Callee, CBI->getDefaultDest(),
CBI->getIndirectDests(), Args, OpBundles);
} else {
NewCall = Builder.CreateCall(Callee, Args, OpBundles);
cast<CallInst>(NewCall)->setTailCallKind(
cast<CallInst>(Caller)->getTailCallKind());
}
NewCall->takeName(Caller);
NewCall->setCallingConv(Call.getCallingConv());
NewCall->setAttributes(NewCallerPAL);
// Preserve prof metadata if any.
NewCall->copyMetadata(*Caller, {LLVMContext::MD_prof});
// Insert a cast of the return type as necessary.
Instruction *NC = NewCall;
Value *NV = NC;
if (OldRetTy != NV->getType() && !Caller->use_empty()) {
if (!NV->getType()->isVoidTy()) {
NV = NC = CastInst::CreateBitOrPointerCast(NC, OldRetTy);
NC->setDebugLoc(Caller->getDebugLoc());
// If this is an invoke/callbr instruction, we should insert it after the
// first non-phi instruction in the normal successor block.
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt();
InsertNewInstBefore(NC, *I);
} else if (CallBrInst *CBI = dyn_cast<CallBrInst>(Caller)) {
BasicBlock::iterator I = CBI->getDefaultDest()->getFirstInsertionPt();
InsertNewInstBefore(NC, *I);
} else {
// Otherwise, it's a call, just insert cast right after the call.
InsertNewInstBefore(NC, *Caller);
}
Worklist.pushUsersToWorkList(*Caller);
} else {
NV = UndefValue::get(Caller->getType());
}
}
if (!Caller->use_empty())
replaceInstUsesWith(*Caller, NV);
else if (Caller->hasValueHandle()) {
if (OldRetTy == NV->getType())
ValueHandleBase::ValueIsRAUWd(Caller, NV);
else
// We cannot call ValueIsRAUWd with a different type, and the
// actual tracked value will disappear.
ValueHandleBase::ValueIsDeleted(Caller);
}
eraseInstFromFunction(*Caller);
return true;
}
/// Turn a call to a function created by init_trampoline / adjust_trampoline
/// intrinsic pair into a direct call to the underlying function.
Instruction *
InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
IntrinsicInst &Tramp) {
Value *Callee = Call.getCalledOperand();
Type *CalleeTy = Callee->getType();
FunctionType *FTy = Call.getFunctionType();
AttributeList Attrs = Call.getAttributes();
// If the call already has the 'nest' attribute somewhere then give up -
// otherwise 'nest' would occur twice after splicing in the chain.
if (Attrs.hasAttrSomewhere(Attribute::Nest))
return nullptr;
Function *NestF = cast<Function>(Tramp.getArgOperand(1)->stripPointerCasts());
FunctionType *NestFTy = NestF->getFunctionType();
AttributeList NestAttrs = NestF->getAttributes();
if (!NestAttrs.isEmpty()) {
unsigned NestArgNo = 0;
Type *NestTy = nullptr;
AttributeSet NestAttr;
// Look for a parameter marked with the 'nest' attribute.
for (FunctionType::param_iterator I = NestFTy->param_begin(),
E = NestFTy->param_end();
I != E; ++NestArgNo, ++I) {
AttributeSet AS = NestAttrs.getParamAttrs(NestArgNo);
if (AS.hasAttribute(Attribute::Nest)) {
// Record the parameter type and any other attributes.
NestTy = *I;
NestAttr = AS;
break;
}
}
if (NestTy) {
std::vector<Value*> NewArgs;
std::vector<AttributeSet> NewArgAttrs;
NewArgs.reserve(Call.arg_size() + 1);
NewArgAttrs.reserve(Call.arg_size());
// Insert the nest argument into the call argument list, which may
// mean appending it. Likewise for attributes.
{
unsigned ArgNo = 0;
auto I = Call.arg_begin(), E = Call.arg_end();
do {
if (ArgNo == NestArgNo) {
// Add the chain argument and attributes.
Value *NestVal = Tramp.getArgOperand(2);
if (NestVal->getType() != NestTy)
NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest");
NewArgs.push_back(NestVal);
NewArgAttrs.push_back(NestAttr);
}
if (I == E)
break;
// Add the original argument and attributes.
NewArgs.push_back(*I);
NewArgAttrs.push_back(Attrs.getParamAttrs(ArgNo));
++ArgNo;
++I;
} while (true);
}
// The trampoline may have been bitcast to a bogus type (FTy).
// Handle this by synthesizing a new function type, equal to FTy
// with the chain parameter inserted.
std::vector<Type*> NewTypes;
NewTypes.reserve(FTy->getNumParams()+1);
// Insert the chain's type into the list of parameter types, which may
// mean appending it.
{
unsigned ArgNo = 0;
FunctionType::param_iterator I = FTy->param_begin(),
E = FTy->param_end();
do {
if (ArgNo == NestArgNo)
// Add the chain's type.
NewTypes.push_back(NestTy);
if (I == E)
break;
// Add the original type.
NewTypes.push_back(*I);
++ArgNo;
++I;
} while (true);
}
// Replace the trampoline call with a direct call. Let the generic
// code sort out any function type mismatches.
FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,
FTy->isVarArg());
Constant *NewCallee =
NestF->getType() == PointerType::getUnqual(NewFTy) ?
NestF : ConstantExpr::getBitCast(NestF,
PointerType::getUnqual(NewFTy));
AttributeList NewPAL =
AttributeList::get(FTy->getContext(), Attrs.getFnAttrs(),
Attrs.getRetAttrs(), NewArgAttrs);
SmallVector<OperandBundleDef, 1> OpBundles;
Call.getOperandBundlesAsDefs(OpBundles);
Instruction *NewCaller;
if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
NewCaller = InvokeInst::Create(NewFTy, NewCallee,
II->getNormalDest(), II->getUnwindDest(),
NewArgs, OpBundles);
cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
} else if (CallBrInst *CBI = dyn_cast<CallBrInst>(&Call)) {
NewCaller =
CallBrInst::Create(NewFTy, NewCallee, CBI->getDefaultDest(),
CBI->getIndirectDests(), NewArgs, OpBundles);
cast<CallBrInst>(NewCaller)->setCallingConv(CBI->getCallingConv());
cast<CallBrInst>(NewCaller)->setAttributes(NewPAL);
} else {
NewCaller = CallInst::Create(NewFTy, NewCallee, NewArgs, OpBundles);
cast<CallInst>(NewCaller)->setTailCallKind(
cast<CallInst>(Call).getTailCallKind());
cast<CallInst>(NewCaller)->setCallingConv(
cast<CallInst>(Call).getCallingConv());
cast<CallInst>(NewCaller)->setAttributes(NewPAL);
}
NewCaller->setDebugLoc(Call.getDebugLoc());
return NewCaller;
}
}
// Replace the trampoline call with a direct call. Since there is no 'nest'
// parameter, there is no need to adjust the argument list. Let the generic
// code sort out any function type mismatches.
Constant *NewCallee = ConstantExpr::getBitCast(NestF, CalleeTy);
Call.setCalledFunction(FTy, NewCallee);
return &Call;
}
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 143a035749c7..644c5c82e58e 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -1,4815 +1,4820 @@
//===- SROA.cpp - Scalar Replacement Of Aggregates ------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
/// This transformation implements the well known scalar replacement of
/// aggregates transformation. It tries to identify promotable elements of an
/// aggregate alloca, and promote them to registers. It will also try to
/// convert uses of an element (or set of elements) of an alloca into a vector
/// or bitfield-style integer scalar if appropriate.
///
/// It works to do this with minimal slicing of the alloca so that regions
/// which are merely transferred in and out of external memory remain unchanged
/// and are not decomposed to scalar code.
///
/// Because this also performs alloca promotion, it can be thought of as also
/// serving the purpose of SSA formation. The algorithm iterates on the
/// function until all opportunities for promotion have been realized.
///
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/SROA.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/PtrUseVisitor.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/ConstantFolder.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <iterator>
#include <string>
#include <tuple>
#include <utility>
#include <vector>
using namespace llvm;
using namespace llvm::sroa;
#define DEBUG_TYPE "sroa"
STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");
STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed");
STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions per alloca");
STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses rewritten");
STATISTIC(MaxUsesPerAllocaPartition, "Maximum number of uses of a partition");
STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced");
STATISTIC(NumPromoted, "Number of allocas promoted to SSA values");
STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");
STATISTIC(NumDeleted, "Number of instructions deleted");
STATISTIC(NumVectorized, "Number of vectorized aggregates");
/// Hidden option to experiment with completely strict handling of inbounds
/// GEPs.
static cl::opt<bool> SROAStrictInbounds("sroa-strict-inbounds", cl::init(false),
cl::Hidden);
namespace {
/// A custom IRBuilder inserter which prefixes all names, but only in
/// Assert builds.
class IRBuilderPrefixedInserter final : public IRBuilderDefaultInserter {
std::string Prefix;
Twine getNameWithPrefix(const Twine &Name) const {
return Name.isTriviallyEmpty() ? Name : Prefix + Name;
}
public:
void SetNamePrefix(const Twine &P) { Prefix = P.str(); }
void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB,
BasicBlock::iterator InsertPt) const override {
IRBuilderDefaultInserter::InsertHelper(I, getNameWithPrefix(Name), BB,
InsertPt);
}
};
/// Provide a type for IRBuilder that drops names in release builds.
using IRBuilderTy = IRBuilder<ConstantFolder, IRBuilderPrefixedInserter>;
/// A used slice of an alloca.
///
/// This structure represents a slice of an alloca used by some instruction. It
/// stores both the begin and end offsets of this use, a pointer to the use
/// itself, and a flag indicating whether we can classify the use as splittable
/// or not when forming partitions of the alloca.
class Slice {
/// The beginning offset of the range.
uint64_t BeginOffset = 0;
/// The ending offset, not included in the range.
uint64_t EndOffset = 0;
/// Storage for both the use of this slice and whether it can be
/// split.
PointerIntPair<Use *, 1, bool> UseAndIsSplittable;
public:
Slice() = default;
Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U, bool IsSplittable)
: BeginOffset(BeginOffset), EndOffset(EndOffset),
UseAndIsSplittable(U, IsSplittable) {}
uint64_t beginOffset() const { return BeginOffset; }
uint64_t endOffset() const { return EndOffset; }
bool isSplittable() const { return UseAndIsSplittable.getInt(); }
void makeUnsplittable() { UseAndIsSplittable.setInt(false); }
Use *getUse() const { return UseAndIsSplittable.getPointer(); }
bool isDead() const { return getUse() == nullptr; }
void kill() { UseAndIsSplittable.setPointer(nullptr); }
/// Support for ordering ranges.
///
/// This provides an ordering over ranges such that start offsets are
/// always increasing, and within equal start offsets, the end offsets are
/// decreasing. Thus the spanning range comes first in a cluster with the
/// same start position.
bool operator<(const Slice &RHS) const {
if (beginOffset() < RHS.beginOffset())
return true;
if (beginOffset() > RHS.beginOffset())
return false;
if (isSplittable() != RHS.isSplittable())
return !isSplittable();
if (endOffset() > RHS.endOffset())
return true;
return false;
}
/// Support comparison with a single offset to allow binary searches.
friend LLVM_ATTRIBUTE_UNUSED bool operator<(const Slice &LHS,
uint64_t RHSOffset) {
return LHS.beginOffset() < RHSOffset;
}
friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
const Slice &RHS) {
return LHSOffset < RHS.beginOffset();
}
bool operator==(const Slice &RHS) const {
return isSplittable() == RHS.isSplittable() &&
beginOffset() == RHS.beginOffset() && endOffset() == RHS.endOffset();
}
bool operator!=(const Slice &RHS) const { return !operator==(RHS); }
};
} // end anonymous namespace
/// Representation of the alloca slices.
///
/// This class represents the slices of an alloca which are formed by its
/// various uses. If a pointer escapes, we can't fully build a representation
/// for the slices used and we reflect that in this structure. The uses are
/// stored, sorted by increasing beginning offset and with unsplittable slices
/// starting at a particular offset before splittable slices.
class llvm::sroa::AllocaSlices {
public:
/// Construct the slices of a particular alloca.
AllocaSlices(const DataLayout &DL, AllocaInst &AI);
/// Test whether a pointer to the allocation escapes our analysis.
///
/// If this is true, the slices are never fully built and should be
/// ignored.
bool isEscaped() const { return PointerEscapingInstr; }
/// Support for iterating over the slices.
/// @{
using iterator = SmallVectorImpl<Slice>::iterator;
using range = iterator_range<iterator>;
iterator begin() { return Slices.begin(); }
iterator end() { return Slices.end(); }
using const_iterator = SmallVectorImpl<Slice>::const_iterator;
using const_range = iterator_range<const_iterator>;
const_iterator begin() const { return Slices.begin(); }
const_iterator end() const { return Slices.end(); }
/// @}
/// Erase a range of slices.
void erase(iterator Start, iterator Stop) { Slices.erase(Start, Stop); }
/// Insert new slices for this alloca.
///
/// This moves the slices into the alloca's slices collection, and re-sorts
/// everything so that the usual ordering properties of the alloca's slices
/// hold.
void insert(ArrayRef<Slice> NewSlices) {
int OldSize = Slices.size();
Slices.append(NewSlices.begin(), NewSlices.end());
auto SliceI = Slices.begin() + OldSize;
llvm::sort(SliceI, Slices.end());
std::inplace_merge(Slices.begin(), SliceI, Slices.end());
}
// Forward declare the iterator and range accessor for walking the
// partitions.
class partition_iterator;
iterator_range<partition_iterator> partitions();
/// Access the dead users for this alloca.
ArrayRef<Instruction *> getDeadUsers() const { return DeadUsers; }
/// Access Uses that should be dropped if the alloca is promotable.
ArrayRef<Use *> getDeadUsesIfPromotable() const {
return DeadUseIfPromotable;
}
/// Access the dead operands referring to this alloca.
///
/// These are operands which have cannot actually be used to refer to the
/// alloca as they are outside its range and the user doesn't correct for
/// that. These mostly consist of PHI node inputs and the like which we just
/// need to replace with undef.
ArrayRef<Use *> getDeadOperands() const { return DeadOperands; }
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void print(raw_ostream &OS, const_iterator I, StringRef Indent = " ") const;
void printSlice(raw_ostream &OS, const_iterator I,
StringRef Indent = " ") const;
void printUse(raw_ostream &OS, const_iterator I,
StringRef Indent = " ") const;
void print(raw_ostream &OS) const;
void dump(const_iterator I) const;
void dump() const;
#endif
private:
template <typename DerivedT, typename RetT = void> class BuilderBase;
class SliceBuilder;
friend class AllocaSlices::SliceBuilder;
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Handle to alloca instruction to simplify method interfaces.
AllocaInst &AI;
#endif
/// The instruction responsible for this alloca not having a known set
/// of slices.
///
/// When an instruction (potentially) escapes the pointer to the alloca, we
/// store a pointer to that here and abort trying to form slices of the
/// alloca. This will be null if the alloca slices are analyzed successfully.
Instruction *PointerEscapingInstr;
/// The slices of the alloca.
///
/// We store a vector of the slices formed by uses of the alloca here. This
/// vector is sorted by increasing begin offset, and then the unsplittable
/// slices before the splittable ones. See the Slice inner class for more
/// details.
SmallVector<Slice, 8> Slices;
/// Instructions which will become dead if we rewrite the alloca.
///
/// Note that these are not separated by slice. This is because we expect an
/// alloca to be completely rewritten or not rewritten at all. If rewritten,
/// all these instructions can simply be removed and replaced with poison as
/// they come from outside of the allocated space.
SmallVector<Instruction *, 8> DeadUsers;
/// Uses which will become dead if can promote the alloca.
SmallVector<Use *, 8> DeadUseIfPromotable;
/// Operands which will become dead if we rewrite the alloca.
///
/// These are operands that in their particular use can be replaced with
/// poison when we rewrite the alloca. These show up in out-of-bounds inputs
/// to PHI nodes and the like. They aren't entirely dead (there might be
/// a GEP back into the bounds using it elsewhere) and nor is the PHI, but we
/// want to swap this particular input for poison to simplify the use lists of
/// the alloca.
SmallVector<Use *, 8> DeadOperands;
};
/// A partition of the slices.
///
/// An ephemeral representation for a range of slices which can be viewed as
/// a partition of the alloca. This range represents a span of the alloca's
/// memory which cannot be split, and provides access to all of the slices
/// overlapping some part of the partition.
///
/// Objects of this type are produced by traversing the alloca's slices, but
/// are only ephemeral and not persistent.
class llvm::sroa::Partition {
private:
friend class AllocaSlices;
friend class AllocaSlices::partition_iterator;
using iterator = AllocaSlices::iterator;
/// The beginning and ending offsets of the alloca for this
/// partition.
uint64_t BeginOffset = 0, EndOffset = 0;
/// The start and end iterators of this partition.
iterator SI, SJ;
/// A collection of split slice tails overlapping the partition.
SmallVector<Slice *, 4> SplitTails;
/// Raw constructor builds an empty partition starting and ending at
/// the given iterator.
Partition(iterator SI) : SI(SI), SJ(SI) {}
public:
/// The start offset of this partition.
///
/// All of the contained slices start at or after this offset.
uint64_t beginOffset() const { return BeginOffset; }
/// The end offset of this partition.
///
/// All of the contained slices end at or before this offset.
uint64_t endOffset() const { return EndOffset; }
/// The size of the partition.
///
/// Note that this can never be zero.
uint64_t size() const {
assert(BeginOffset < EndOffset && "Partitions must span some bytes!");
return EndOffset - BeginOffset;
}
/// Test whether this partition contains no slices, and merely spans
/// a region occupied by split slices.
bool empty() const { return SI == SJ; }
/// \name Iterate slices that start within the partition.
/// These may be splittable or unsplittable. They have a begin offset >= the
/// partition begin offset.
/// @{
// FIXME: We should probably define a "concat_iterator" helper and use that
// to stitch together pointee_iterators over the split tails and the
// contiguous iterators of the partition. That would give a much nicer
// interface here. We could then additionally expose filtered iterators for
// split, unsplit, and unsplittable splices based on the usage patterns.
iterator begin() const { return SI; }
iterator end() const { return SJ; }
/// @}
/// Get the sequence of split slice tails.
///
/// These tails are of slices which start before this partition but are
/// split and overlap into the partition. We accumulate these while forming
/// partitions.
ArrayRef<Slice *> splitSliceTails() const { return SplitTails; }
};
/// An iterator over partitions of the alloca's slices.
///
/// This iterator implements the core algorithm for partitioning the alloca's
/// slices. It is a forward iterator as we don't support backtracking for
/// efficiency reasons, and re-use a single storage area to maintain the
/// current set of split slices.
///
/// It is templated on the slice iterator type to use so that it can operate
/// with either const or non-const slice iterators.
class AllocaSlices::partition_iterator
: public iterator_facade_base<partition_iterator, std::forward_iterator_tag,
Partition> {
friend class AllocaSlices;
/// Most of the state for walking the partitions is held in a class
/// with a nice interface for examining them.
Partition P;
/// We need to keep the end of the slices to know when to stop.
AllocaSlices::iterator SE;
/// We also need to keep track of the maximum split end offset seen.
/// FIXME: Do we really?
uint64_t MaxSplitSliceEndOffset = 0;
/// Sets the partition to be empty at given iterator, and sets the
/// end iterator.
partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE)
: P(SI), SE(SE) {
// If not already at the end, advance our state to form the initial
// partition.
if (SI != SE)
advance();
}
/// Advance the iterator to the next partition.
///
/// Requires that the iterator not be at the end of the slices.
void advance() {
assert((P.SI != SE || !P.SplitTails.empty()) &&
"Cannot advance past the end of the slices!");
// Clear out any split uses which have ended.
if (!P.SplitTails.empty()) {
if (P.EndOffset >= MaxSplitSliceEndOffset) {
// If we've finished all splits, this is easy.
P.SplitTails.clear();
MaxSplitSliceEndOffset = 0;
} else {
// Remove the uses which have ended in the prior partition. This
// cannot change the max split slice end because we just checked that
// the prior partition ended prior to that max.
llvm::erase_if(P.SplitTails,
[&](Slice *S) { return S->endOffset() <= P.EndOffset; });
assert(llvm::any_of(P.SplitTails,
[&](Slice *S) {
return S->endOffset() == MaxSplitSliceEndOffset;
}) &&
"Could not find the current max split slice offset!");
assert(llvm::all_of(P.SplitTails,
[&](Slice *S) {
return S->endOffset() <= MaxSplitSliceEndOffset;
}) &&
"Max split slice end offset is not actually the max!");
}
}
// If P.SI is already at the end, then we've cleared the split tail and
// now have an end iterator.
if (P.SI == SE) {
assert(P.SplitTails.empty() && "Failed to clear the split slices!");
return;
}
// If we had a non-empty partition previously, set up the state for
// subsequent partitions.
if (P.SI != P.SJ) {
// Accumulate all the splittable slices which started in the old
// partition into the split list.
for (Slice &S : P)
if (S.isSplittable() && S.endOffset() > P.EndOffset) {
P.SplitTails.push_back(&S);
MaxSplitSliceEndOffset =
std::max(S.endOffset(), MaxSplitSliceEndOffset);
}
// Start from the end of the previous partition.
P.SI = P.SJ;
// If P.SI is now at the end, we at most have a tail of split slices.
if (P.SI == SE) {
P.BeginOffset = P.EndOffset;
P.EndOffset = MaxSplitSliceEndOffset;
return;
}
// If the we have split slices and the next slice is after a gap and is
// not splittable immediately form an empty partition for the split
// slices up until the next slice begins.
if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
!P.SI->isSplittable()) {
P.BeginOffset = P.EndOffset;
P.EndOffset = P.SI->beginOffset();
return;
}
}
// OK, we need to consume new slices. Set the end offset based on the
// current slice, and step SJ past it. The beginning offset of the
// partition is the beginning offset of the next slice unless we have
// pre-existing split slices that are continuing, in which case we begin
// at the prior end offset.
P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
P.EndOffset = P.SI->endOffset();
++P.SJ;
// There are two strategies to form a partition based on whether the
// partition starts with an unsplittable slice or a splittable slice.
if (!P.SI->isSplittable()) {
// When we're forming an unsplittable region, it must always start at
// the first slice and will extend through its end.
assert(P.BeginOffset == P.SI->beginOffset());
// Form a partition including all of the overlapping slices with this
// unsplittable slice.
while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
if (!P.SJ->isSplittable())
P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
++P.SJ;
}
// We have a partition across a set of overlapping unsplittable
// partitions.
return;
}
// If we're starting with a splittable slice, then we need to form
// a synthetic partition spanning it and any other overlapping splittable
// splices.
assert(P.SI->isSplittable() && "Forming a splittable partition!");
// Collect all of the overlapping splittable slices.
while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
P.SJ->isSplittable()) {
P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
++P.SJ;
}
// Back upiP.EndOffset if we ended the span early when encountering an
// unsplittable slice. This synthesizes the early end offset of
// a partition spanning only splittable slices.
if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
assert(!P.SJ->isSplittable());
P.EndOffset = P.SJ->beginOffset();
}
}
public:
bool operator==(const partition_iterator &RHS) const {
assert(SE == RHS.SE &&
"End iterators don't match between compared partition iterators!");
// The observed positions of partitions is marked by the P.SI iterator and
// the emptiness of the split slices. The latter is only relevant when
// P.SI == SE, as the end iterator will additionally have an empty split
// slices list, but the prior may have the same P.SI and a tail of split
// slices.
if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.empty()) {
assert(P.SJ == RHS.P.SJ &&
"Same set of slices formed two different sized partitions!");
assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&
"Same slice position with differently sized non-empty split "
"slice tails!");
return true;
}
return false;
}
partition_iterator &operator++() {
advance();
return *this;
}
Partition &operator*() { return P; }
};
/// A forward range over the partitions of the alloca's slices.
///
/// This accesses an iterator range over the partitions of the alloca's
/// slices. It computes these partitions on the fly based on the overlapping
/// offsets of the slices and the ability to split them. It will visit "empty"
/// partitions to cover regions of the alloca only accessed via split
/// slices.
iterator_range<AllocaSlices::partition_iterator> AllocaSlices::partitions() {
return make_range(partition_iterator(begin(), end()),
partition_iterator(end(), end()));
}
static Value *foldSelectInst(SelectInst &SI) {
// If the condition being selected on is a constant or the same value is
// being selected between, fold the select. Yes this does (rarely) happen
// early on.
if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition()))
return SI.getOperand(1 + CI->isZero());
if (SI.getOperand(1) == SI.getOperand(2))
return SI.getOperand(1);
return nullptr;
}
/// A helper that folds a PHI node or a select.
static Value *foldPHINodeOrSelectInst(Instruction &I) {
if (PHINode *PN = dyn_cast<PHINode>(&I)) {
// If PN merges together the same value, return that value.
return PN->hasConstantValue();
}
return foldSelectInst(cast<SelectInst>(I));
}
/// Builder for the alloca slices.
///
/// This class builds a set of alloca slices by recursively visiting the uses
/// of an alloca and making a slice for each load and store at each offset.
class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
friend class PtrUseVisitor<SliceBuilder>;
friend class InstVisitor<SliceBuilder>;
using Base = PtrUseVisitor<SliceBuilder>;
const uint64_t AllocSize;
AllocaSlices &AS;
SmallDenseMap<Instruction *, unsigned> MemTransferSliceMap;
SmallDenseMap<Instruction *, uint64_t> PHIOrSelectSizes;
/// Set to de-duplicate dead instructions found in the use walk.
SmallPtrSet<Instruction *, 4> VisitedDeadInsts;
public:
SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
: PtrUseVisitor<SliceBuilder>(DL),
AllocSize(DL.getTypeAllocSize(AI.getAllocatedType()).getFixedSize()),
AS(AS) {}
private:
void markAsDead(Instruction &I) {
if (VisitedDeadInsts.insert(&I).second)
AS.DeadUsers.push_back(&I);
}
void insertUse(Instruction &I, const APInt &Offset, uint64_t Size,
bool IsSplittable = false) {
// Completely skip uses which have a zero size or start either before or
// past the end of the allocation.
if (Size == 0 || Offset.uge(AllocSize)) {
LLVM_DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @"
<< Offset
<< " which has zero size or starts outside of the "
<< AllocSize << " byte alloca:\n"
<< " alloca: " << AS.AI << "\n"
<< " use: " << I << "\n");
return markAsDead(I);
}
uint64_t BeginOffset = Offset.getZExtValue();
uint64_t EndOffset = BeginOffset + Size;
// Clamp the end offset to the end of the allocation. Note that this is
// formulated to handle even the case where "BeginOffset + Size" overflows.
// This may appear superficially to be something we could ignore entirely,
// but that is not so! There may be widened loads or PHI-node uses where
// some instructions are dead but not others. We can't completely ignore
// them, and so have to record at least the information here.
assert(AllocSize >= BeginOffset); // Established above.
if (Size > AllocSize - BeginOffset) {
LLVM_DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @"
<< Offset << " to remain within the " << AllocSize
<< " byte alloca:\n"
<< " alloca: " << AS.AI << "\n"
<< " use: " << I << "\n");
EndOffset = AllocSize;
}
AS.Slices.push_back(Slice(BeginOffset, EndOffset, U, IsSplittable));
}
void visitBitCastInst(BitCastInst &BC) {
if (BC.use_empty())
return markAsDead(BC);
return Base::visitBitCastInst(BC);
}
void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
if (ASC.use_empty())
return markAsDead(ASC);
return Base::visitAddrSpaceCastInst(ASC);
}
void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
if (GEPI.use_empty())
return markAsDead(GEPI);
if (SROAStrictInbounds && GEPI.isInBounds()) {
// FIXME: This is a manually un-factored variant of the basic code inside
// of GEPs with checking of the inbounds invariant specified in the
// langref in a very strict sense. If we ever want to enable
// SROAStrictInbounds, this code should be factored cleanly into
// PtrUseVisitor, but it is easier to experiment with SROAStrictInbounds
// by writing out the code here where we have the underlying allocation
// size readily available.
APInt GEPOffset = Offset;
const DataLayout &DL = GEPI.getModule()->getDataLayout();
for (gep_type_iterator GTI = gep_type_begin(GEPI),
GTE = gep_type_end(GEPI);
GTI != GTE; ++GTI) {
ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
if (!OpC)
break;
// Handle a struct index, which adds its field offset to the pointer.
if (StructType *STy = GTI.getStructTypeOrNull()) {
unsigned ElementIdx = OpC->getZExtValue();
const StructLayout *SL = DL.getStructLayout(STy);
GEPOffset +=
APInt(Offset.getBitWidth(), SL->getElementOffset(ElementIdx));
} else {
// For array or vector indices, scale the index by the size of the
// type.
APInt Index = OpC->getValue().sextOrTrunc(Offset.getBitWidth());
GEPOffset +=
Index *
APInt(Offset.getBitWidth(),
DL.getTypeAllocSize(GTI.getIndexedType()).getFixedSize());
}
// If this index has computed an intermediate pointer which is not
// inbounds, then the result of the GEP is a poison value and we can
// delete it and all uses.
if (GEPOffset.ugt(AllocSize))
return markAsDead(GEPI);
}
}
return Base::visitGetElementPtrInst(GEPI);
}
void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset,
uint64_t Size, bool IsVolatile) {
// We allow splitting of non-volatile loads and stores where the type is an
// integer type. These may be used to implement 'memcpy' or other "transfer
// of bits" patterns.
bool IsSplittable =
Ty->isIntegerTy() && !IsVolatile && DL.typeSizeEqualsStoreSize(Ty);
insertUse(I, Offset, Size, IsSplittable);
}
void visitLoadInst(LoadInst &LI) {
assert((!LI.isSimple() || LI.getType()->isSingleValueType()) &&
"All simple FCA loads should have been pre-split");
if (!IsOffsetKnown)
return PI.setAborted(&LI);
if (LI.isVolatile() &&
LI.getPointerAddressSpace() != DL.getAllocaAddrSpace())
return PI.setAborted(&LI);
if (isa<ScalableVectorType>(LI.getType()))
return PI.setAborted(&LI);
uint64_t Size = DL.getTypeStoreSize(LI.getType()).getFixedSize();
return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile());
}
void visitStoreInst(StoreInst &SI) {
Value *ValOp = SI.getValueOperand();
if (ValOp == *U)
return PI.setEscapedAndAborted(&SI);
if (!IsOffsetKnown)
return PI.setAborted(&SI);
if (SI.isVolatile() &&
SI.getPointerAddressSpace() != DL.getAllocaAddrSpace())
return PI.setAborted(&SI);
if (isa<ScalableVectorType>(ValOp->getType()))
return PI.setAborted(&SI);
uint64_t Size = DL.getTypeStoreSize(ValOp->getType()).getFixedSize();
// If this memory access can be shown to *statically* extend outside the
// bounds of the allocation, it's behavior is undefined, so simply
// ignore it. Note that this is more strict than the generic clamping
// behavior of insertUse. We also try to handle cases which might run the
// risk of overflow.
// FIXME: We should instead consider the pointer to have escaped if this
// function is being instrumented for addressing bugs or race conditions.
if (Size > AllocSize || Offset.ugt(AllocSize - Size)) {
LLVM_DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @"
<< Offset << " which extends past the end of the "
<< AllocSize << " byte alloca:\n"
<< " alloca: " << AS.AI << "\n"
<< " use: " << SI << "\n");
return markAsDead(SI);
}
assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
"All simple FCA stores should have been pre-split");
handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile());
}
void visitMemSetInst(MemSetInst &II) {
assert(II.getRawDest() == *U && "Pointer use is not the destination?");
ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
if ((Length && Length->getValue() == 0) ||
(IsOffsetKnown && Offset.uge(AllocSize)))
// Zero-length mem transfer intrinsics can be ignored entirely.
return markAsDead(II);
if (!IsOffsetKnown)
return PI.setAborted(&II);
// Don't replace this with a store with a different address space. TODO:
// Use a store with the casted new alloca?
if (II.isVolatile() && II.getDestAddressSpace() != DL.getAllocaAddrSpace())
return PI.setAborted(&II);
insertUse(II, Offset, Length ? Length->getLimitedValue()
: AllocSize - Offset.getLimitedValue(),
(bool)Length);
}
void visitMemTransferInst(MemTransferInst &II) {
ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
if (Length && Length->getValue() == 0)
// Zero-length mem transfer intrinsics can be ignored entirely.
return markAsDead(II);
// Because we can visit these intrinsics twice, also check to see if the
// first time marked this instruction as dead. If so, skip it.
if (VisitedDeadInsts.count(&II))
return;
if (!IsOffsetKnown)
return PI.setAborted(&II);
// Don't replace this with a load/store with a different address space.
// TODO: Use a store with the casted new alloca?
if (II.isVolatile() &&
(II.getDestAddressSpace() != DL.getAllocaAddrSpace() ||
II.getSourceAddressSpace() != DL.getAllocaAddrSpace()))
return PI.setAborted(&II);
// This side of the transfer is completely out-of-bounds, and so we can
// nuke the entire transfer. However, we also need to nuke the other side
// if already added to our partitions.
// FIXME: Yet another place we really should bypass this when
// instrumenting for ASan.
if (Offset.uge(AllocSize)) {
SmallDenseMap<Instruction *, unsigned>::iterator MTPI =
MemTransferSliceMap.find(&II);
if (MTPI != MemTransferSliceMap.end())
AS.Slices[MTPI->second].kill();
return markAsDead(II);
}
uint64_t RawOffset = Offset.getLimitedValue();
uint64_t Size = Length ? Length->getLimitedValue() : AllocSize - RawOffset;
// Check for the special case where the same exact value is used for both
// source and dest.
if (*U == II.getRawDest() && *U == II.getRawSource()) {
// For non-volatile transfers this is a no-op.
if (!II.isVolatile())
return markAsDead(II);
return insertUse(II, Offset, Size, /*IsSplittable=*/false);
}
// If we have seen both source and destination for a mem transfer, then
// they both point to the same alloca.
bool Inserted;
SmallDenseMap<Instruction *, unsigned>::iterator MTPI;
std::tie(MTPI, Inserted) =
MemTransferSliceMap.insert(std::make_pair(&II, AS.Slices.size()));
unsigned PrevIdx = MTPI->second;
if (!Inserted) {
Slice &PrevP = AS.Slices[PrevIdx];
// Check if the begin offsets match and this is a non-volatile transfer.
// In that case, we can completely elide the transfer.
if (!II.isVolatile() && PrevP.beginOffset() == RawOffset) {
PrevP.kill();
return markAsDead(II);
}
// Otherwise we have an offset transfer within the same alloca. We can't
// split those.
PrevP.makeUnsplittable();
}
// Insert the use now that we've fixed up the splittable nature.
insertUse(II, Offset, Size, /*IsSplittable=*/Inserted && Length);
// Check that we ended up with a valid index in the map.
assert(AS.Slices[PrevIdx].getUse()->getUser() == &II &&
"Map index doesn't point back to a slice with this user.");
}
// Disable SRoA for any intrinsics except for lifetime invariants and
// invariant group.
// FIXME: What about debug intrinsics? This matches old behavior, but
// doesn't make sense.
void visitIntrinsicInst(IntrinsicInst &II) {
if (II.isDroppable()) {
AS.DeadUseIfPromotable.push_back(U);
return;
}
if (!IsOffsetKnown)
return PI.setAborted(&II);
if (II.isLifetimeStartOrEnd()) {
ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0));
uint64_t Size = std::min(AllocSize - Offset.getLimitedValue(),
Length->getLimitedValue());
insertUse(II, Offset, Size, true);
return;
}
if (II.isLaunderOrStripInvariantGroup()) {
enqueueUsers(II);
return;
}
Base::visitIntrinsicInst(II);
}
Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &Size) {
// We consider any PHI or select that results in a direct load or store of
// the same offset to be a viable use for slicing purposes. These uses
// are considered unsplittable and the size is the maximum loaded or stored
// size.
SmallPtrSet<Instruction *, 4> Visited;
SmallVector<std::pair<Instruction *, Instruction *>, 4> Uses;
Visited.insert(Root);
Uses.push_back(std::make_pair(cast<Instruction>(*U), Root));
const DataLayout &DL = Root->getModule()->getDataLayout();
// If there are no loads or stores, the access is dead. We mark that as
// a size zero access.
Size = 0;
do {
Instruction *I, *UsedI;
std::tie(UsedI, I) = Uses.pop_back_val();
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
Size = std::max(Size,
DL.getTypeStoreSize(LI->getType()).getFixedSize());
continue;
}
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
Value *Op = SI->getOperand(0);
if (Op == UsedI)
return SI;
Size = std::max(Size,
DL.getTypeStoreSize(Op->getType()).getFixedSize());
continue;
}
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
if (!GEP->hasAllZeroIndices())
return GEP;
} else if (!isa<BitCastInst>(I) && !isa<PHINode>(I) &&
!isa<SelectInst>(I) && !isa<AddrSpaceCastInst>(I)) {
return I;
}
for (User *U : I->users())
if (Visited.insert(cast<Instruction>(U)).second)
Uses.push_back(std::make_pair(I, cast<Instruction>(U)));
} while (!Uses.empty());
return nullptr;
}
void visitPHINodeOrSelectInst(Instruction &I) {
assert(isa<PHINode>(I) || isa<SelectInst>(I));
if (I.use_empty())
return markAsDead(I);
// If this is a PHI node before a catchswitch, we cannot insert any non-PHI
// instructions in this BB, which may be required during rewriting. Bail out
// on these cases.
if (isa<PHINode>(I) &&
I.getParent()->getFirstInsertionPt() == I.getParent()->end())
return PI.setAborted(&I);
// TODO: We could use simplifyInstruction here to fold PHINodes and
// SelectInsts. However, doing so requires to change the current
// dead-operand-tracking mechanism. For instance, suppose neither loading
// from %U nor %other traps. Then "load (select undef, %U, %other)" does not
// trap either. However, if we simply replace %U with undef using the
// current dead-operand-tracking mechanism, "load (select undef, undef,
// %other)" may trap because the select may return the first operand
// "undef".
if (Value *Result = foldPHINodeOrSelectInst(I)) {
if (Result == *U)
// If the result of the constant fold will be the pointer, recurse
// through the PHI/select as if we had RAUW'ed it.
enqueueUsers(I);
else
// Otherwise the operand to the PHI/select is dead, and we can replace
// it with poison.
AS.DeadOperands.push_back(U);
return;
}
if (!IsOffsetKnown)
return PI.setAborted(&I);
// See if we already have computed info on this node.
uint64_t &Size = PHIOrSelectSizes[&I];
if (!Size) {
// This is a new PHI/Select, check for an unsafe use of it.
if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&I, Size))
return PI.setAborted(UnsafeI);
}
// For PHI and select operands outside the alloca, we can't nuke the entire
// phi or select -- the other side might still be relevant, so we special
// case them here and use a separate structure to track the operands
// themselves which should be replaced with poison.
// FIXME: This should instead be escaped in the event we're instrumenting
// for address sanitization.
if (Offset.uge(AllocSize)) {
AS.DeadOperands.push_back(U);
return;
}
insertUse(I, Offset, Size);
}
void visitPHINode(PHINode &PN) { visitPHINodeOrSelectInst(PN); }
void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
/// Disable SROA entirely if there are unhandled users of the alloca.
void visitInstruction(Instruction &I) { PI.setAborted(&I); }
};
AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
:
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
AI(AI),
#endif
PointerEscapingInstr(nullptr) {
SliceBuilder PB(DL, AI, *this);
SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);
if (PtrI.isEscaped() || PtrI.isAborted()) {
// FIXME: We should sink the escape vs. abort info into the caller nicely,
// possibly by just storing the PtrInfo in the AllocaSlices.
PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
: PtrI.getAbortingInst();
assert(PointerEscapingInstr && "Did not track a bad instruction");
return;
}
llvm::erase_if(Slices, [](const Slice &S) { return S.isDead(); });
// Sort the uses. This arranges for the offsets to be in ascending order,
// and the sizes to be in descending order.
llvm::stable_sort(Slices);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void AllocaSlices::print(raw_ostream &OS, const_iterator I,
StringRef Indent) const {
printSlice(OS, I, Indent);
OS << "\n";
printUse(OS, I, Indent);
}
void AllocaSlices::printSlice(raw_ostream &OS, const_iterator I,
StringRef Indent) const {
OS << Indent << "[" << I->beginOffset() << "," << I->endOffset() << ")"
<< " slice #" << (I - begin())
<< (I->isSplittable() ? " (splittable)" : "");
}
void AllocaSlices::printUse(raw_ostream &OS, const_iterator I,
StringRef Indent) const {
OS << Indent << " used by: " << *I->getUse()->getUser() << "\n";
}
void AllocaSlices::print(raw_ostream &OS) const {
if (PointerEscapingInstr) {
OS << "Can't analyze slices for alloca: " << AI << "\n"
<< " A pointer to this alloca escaped by:\n"
<< " " << *PointerEscapingInstr << "\n";
return;
}
OS << "Slices of alloca: " << AI << "\n";
for (const_iterator I = begin(), E = end(); I != E; ++I)
print(OS, I);
}
LLVM_DUMP_METHOD void AllocaSlices::dump(const_iterator I) const {
print(dbgs(), I);
}
LLVM_DUMP_METHOD void AllocaSlices::dump() const { print(dbgs()); }
#endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Walk the range of a partitioning looking for a common type to cover this
/// sequence of slices.
static std::pair<Type *, IntegerType *>
findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E,
uint64_t EndOffset) {
Type *Ty = nullptr;
bool TyIsCommon = true;
IntegerType *ITy = nullptr;
// Note that we need to look at *every* alloca slice's Use to ensure we
// always get consistent results regardless of the order of slices.
for (AllocaSlices::const_iterator I = B; I != E; ++I) {
Use *U = I->getUse();
if (isa<IntrinsicInst>(*U->getUser()))
continue;
if (I->beginOffset() != B->beginOffset() || I->endOffset() != EndOffset)
continue;
Type *UserTy = nullptr;
if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
UserTy = LI->getType();
} else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
UserTy = SI->getValueOperand()->getType();
}
if (IntegerType *UserITy = dyn_cast_or_null<IntegerType>(UserTy)) {
// If the type is larger than the partition, skip it. We only encounter
// this for split integer operations where we want to use the type of the
// entity causing the split. Also skip if the type is not a byte width
// multiple.
if (UserITy->getBitWidth() % 8 != 0 ||
UserITy->getBitWidth() / 8 > (EndOffset - B->beginOffset()))
continue;
// Track the largest bitwidth integer type used in this way in case there
// is no common type.
if (!ITy || ITy->getBitWidth() < UserITy->getBitWidth())
ITy = UserITy;
}
// To avoid depending on the order of slices, Ty and TyIsCommon must not
// depend on types skipped above.
if (!UserTy || (Ty && Ty != UserTy))
TyIsCommon = false; // Give up on anything but an iN type.
else
Ty = UserTy;
}
return {TyIsCommon ? Ty : nullptr, ITy};
}
/// PHI instructions that use an alloca and are subsequently loaded can be
/// rewritten to load both input pointers in the pred blocks and then PHI the
/// results, allowing the load of the alloca to be promoted.
/// From this:
/// %P2 = phi [i32* %Alloca, i32* %Other]
/// %V = load i32* %P2
/// to:
/// %V1 = load i32* %Alloca -> will be mem2reg'd
/// ...
/// %V2 = load i32* %Other
/// ...
/// %V = phi [i32 %V1, i32 %V2]
///
/// We can do this to a select if its only uses are loads and if the operands
/// to the select can be loaded unconditionally.
///
/// FIXME: This should be hoisted into a generic utility, likely in
/// Transforms/Util/Local.h
static bool isSafePHIToSpeculate(PHINode &PN) {
const DataLayout &DL = PN.getModule()->getDataLayout();
// For now, we can only do this promotion if the load is in the same block
// as the PHI, and if there are no stores between the phi and load.
// TODO: Allow recursive phi users.
// TODO: Allow stores.
BasicBlock *BB = PN.getParent();
Align MaxAlign;
uint64_t APWidth = DL.getIndexTypeSizeInBits(PN.getType());
- APInt MaxSize(APWidth, 0);
- bool HaveLoad = false;
+ Type *LoadType = nullptr;
for (User *U : PN.users()) {
LoadInst *LI = dyn_cast<LoadInst>(U);
if (!LI || !LI->isSimple())
return false;
// For now we only allow loads in the same block as the PHI. This is
// a common case that happens when instcombine merges two loads through
// a PHI.
if (LI->getParent() != BB)
return false;
+ if (LoadType) {
+ if (LoadType != LI->getType())
+ return false;
+ } else {
+ LoadType = LI->getType();
+ }
+
// Ensure that there are no instructions between the PHI and the load that
// could store.
for (BasicBlock::iterator BBI(PN); &*BBI != LI; ++BBI)
if (BBI->mayWriteToMemory())
return false;
- uint64_t Size = DL.getTypeStoreSize(LI->getType()).getFixedSize();
MaxAlign = std::max(MaxAlign, LI->getAlign());
- MaxSize = MaxSize.ult(Size) ? APInt(APWidth, Size) : MaxSize;
- HaveLoad = true;
}
- if (!HaveLoad)
+ if (!LoadType)
return false;
+ APInt LoadSize = APInt(APWidth, DL.getTypeStoreSize(LoadType).getFixedSize());
+
// We can only transform this if it is safe to push the loads into the
// predecessor blocks. The only thing to watch out for is that we can't put
// a possibly trapping load in the predecessor if it is a critical edge.
for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
Instruction *TI = PN.getIncomingBlock(Idx)->getTerminator();
Value *InVal = PN.getIncomingValue(Idx);
// If the value is produced by the terminator of the predecessor (an
// invoke) or it has side-effects, there is no valid place to put a load
// in the predecessor.
if (TI == InVal || TI->mayHaveSideEffects())
return false;
// If the predecessor has a single successor, then the edge isn't
// critical.
if (TI->getNumSuccessors() == 1)
continue;
// If this pointer is always safe to load, or if we can prove that there
// is already a load in the block, then we can move the load to the pred
// block.
- if (isSafeToLoadUnconditionally(InVal, MaxAlign, MaxSize, DL, TI))
+ if (isSafeToLoadUnconditionally(InVal, MaxAlign, LoadSize, DL, TI))
continue;
return false;
}
return true;
}
static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN) {
LLVM_DEBUG(dbgs() << " original: " << PN << "\n");
LoadInst *SomeLoad = cast<LoadInst>(PN.user_back());
Type *LoadTy = SomeLoad->getType();
IRB.SetInsertPoint(&PN);
PHINode *NewPN = IRB.CreatePHI(LoadTy, PN.getNumIncomingValues(),
PN.getName() + ".sroa.speculated");
// Get the AA tags and alignment to use from one of the loads. It does not
// matter which one we get and if any differ.
AAMDNodes AATags = SomeLoad->getAAMetadata();
Align Alignment = SomeLoad->getAlign();
// Rewrite all loads of the PN to use the new PHI.
while (!PN.use_empty()) {
LoadInst *LI = cast<LoadInst>(PN.user_back());
LI->replaceAllUsesWith(NewPN);
LI->eraseFromParent();
}
// Inject loads into all of the pred blocks.
DenseMap<BasicBlock*, Value*> InjectedLoads;
for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
BasicBlock *Pred = PN.getIncomingBlock(Idx);
Value *InVal = PN.getIncomingValue(Idx);
// A PHI node is allowed to have multiple (duplicated) entries for the same
// basic block, as long as the value is the same. So if we already injected
// a load in the predecessor, then we should reuse the same load for all
// duplicated entries.
if (Value* V = InjectedLoads.lookup(Pred)) {
NewPN->addIncoming(V, Pred);
continue;
}
Instruction *TI = Pred->getTerminator();
IRB.SetInsertPoint(TI);
LoadInst *Load = IRB.CreateAlignedLoad(
LoadTy, InVal, Alignment,
(PN.getName() + ".sroa.speculate.load." + Pred->getName()));
++NumLoadsSpeculated;
if (AATags)
Load->setAAMetadata(AATags);
NewPN->addIncoming(Load, Pred);
InjectedLoads[Pred] = Load;
}
LLVM_DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");
PN.eraseFromParent();
}
/// Select instructions that use an alloca and are subsequently loaded can be
/// rewritten to load both input pointers and then select between the result,
/// allowing the load of the alloca to be promoted.
/// From this:
/// %P2 = select i1 %cond, i32* %Alloca, i32* %Other
/// %V = load i32* %P2
/// to:
/// %V1 = load i32* %Alloca -> will be mem2reg'd
/// %V2 = load i32* %Other
/// %V = select i1 %cond, i32 %V1, i32 %V2
///
/// We can do this to a select if its only uses are loads and if the operand
/// to the select can be loaded unconditionally. If found an intervening bitcast
/// with a single use of the load, allow the promotion.
static bool isSafeSelectToSpeculate(SelectInst &SI) {
Value *TValue = SI.getTrueValue();
Value *FValue = SI.getFalseValue();
const DataLayout &DL = SI.getModule()->getDataLayout();
for (User *U : SI.users()) {
LoadInst *LI;
BitCastInst *BC = dyn_cast<BitCastInst>(U);
if (BC && BC->hasOneUse())
LI = dyn_cast<LoadInst>(*BC->user_begin());
else
LI = dyn_cast<LoadInst>(U);
if (!LI || !LI->isSimple())
return false;
// Both operands to the select need to be dereferenceable, either
// absolutely (e.g. allocas) or at this point because we can see other
// accesses to it.
if (!isSafeToLoadUnconditionally(TValue, LI->getType(),
LI->getAlign(), DL, LI))
return false;
if (!isSafeToLoadUnconditionally(FValue, LI->getType(),
LI->getAlign(), DL, LI))
return false;
}
return true;
}
static void speculateSelectInstLoads(IRBuilderTy &IRB, SelectInst &SI) {
LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
IRB.SetInsertPoint(&SI);
Value *TV = SI.getTrueValue();
Value *FV = SI.getFalseValue();
// Replace the loads of the select with a select of two loads.
while (!SI.use_empty()) {
LoadInst *LI;
BitCastInst *BC = dyn_cast<BitCastInst>(SI.user_back());
if (BC) {
assert(BC->hasOneUse() && "Bitcast should have a single use.");
LI = cast<LoadInst>(BC->user_back());
} else {
LI = cast<LoadInst>(SI.user_back());
}
assert(LI->isSimple() && "We only speculate simple loads");
IRB.SetInsertPoint(LI);
Value *NewTV =
BC ? IRB.CreateBitCast(TV, BC->getType(), TV->getName() + ".sroa.cast")
: TV;
Value *NewFV =
BC ? IRB.CreateBitCast(FV, BC->getType(), FV->getName() + ".sroa.cast")
: FV;
LoadInst *TL = IRB.CreateLoad(LI->getType(), NewTV,
LI->getName() + ".sroa.speculate.load.true");
LoadInst *FL = IRB.CreateLoad(LI->getType(), NewFV,
LI->getName() + ".sroa.speculate.load.false");
NumLoadsSpeculated += 2;
// Transfer alignment and AA info if present.
TL->setAlignment(LI->getAlign());
FL->setAlignment(LI->getAlign());
AAMDNodes Tags = LI->getAAMetadata();
if (Tags) {
TL->setAAMetadata(Tags);
FL->setAAMetadata(Tags);
}
Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,
LI->getName() + ".sroa.speculated");
LLVM_DEBUG(dbgs() << " speculated to: " << *V << "\n");
LI->replaceAllUsesWith(V);
LI->eraseFromParent();
if (BC)
BC->eraseFromParent();
}
SI.eraseFromParent();
}
/// Build a GEP out of a base pointer and indices.
///
/// This will return the BasePtr if that is valid, or build a new GEP
/// instruction using the IRBuilder if GEP-ing is needed.
static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
SmallVectorImpl<Value *> &Indices,
const Twine &NamePrefix) {
if (Indices.empty())
return BasePtr;
// A single zero index is a no-op, so check for this and avoid building a GEP
// in that case.
if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())
return BasePtr;
// buildGEP() is only called for non-opaque pointers.
return IRB.CreateInBoundsGEP(
BasePtr->getType()->getNonOpaquePointerElementType(), BasePtr, Indices,
NamePrefix + "sroa_idx");
}
/// Get a natural GEP off of the BasePtr walking through Ty toward
/// TargetTy without changing the offset of the pointer.
///
/// This routine assumes we've already established a properly offset GEP with
/// Indices, and arrived at the Ty type. The goal is to continue to GEP with
/// zero-indices down through type layers until we find one the same as
/// TargetTy. If we can't find one with the same type, we at least try to use
/// one with the same size. If none of that works, we just produce the GEP as
/// indicated by Indices to have the correct offset.
static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL,
Value *BasePtr, Type *Ty, Type *TargetTy,
SmallVectorImpl<Value *> &Indices,
const Twine &NamePrefix) {
if (Ty == TargetTy)
return buildGEP(IRB, BasePtr, Indices, NamePrefix);
// Offset size to use for the indices.
unsigned OffsetSize = DL.getIndexTypeSizeInBits(BasePtr->getType());
// See if we can descend into a struct and locate a field with the correct
// type.
unsigned NumLayers = 0;
Type *ElementTy = Ty;
do {
if (ElementTy->isPointerTy())
break;
if (ArrayType *ArrayTy = dyn_cast<ArrayType>(ElementTy)) {
ElementTy = ArrayTy->getElementType();
Indices.push_back(IRB.getIntN(OffsetSize, 0));
} else if (VectorType *VectorTy = dyn_cast<VectorType>(ElementTy)) {
ElementTy = VectorTy->getElementType();
Indices.push_back(IRB.getInt32(0));
} else if (StructType *STy = dyn_cast<StructType>(ElementTy)) {
if (STy->element_begin() == STy->element_end())
break; // Nothing left to descend into.
ElementTy = *STy->element_begin();
Indices.push_back(IRB.getInt32(0));
} else {
break;
}
++NumLayers;
} while (ElementTy != TargetTy);
if (ElementTy != TargetTy)
Indices.erase(Indices.end() - NumLayers, Indices.end());
return buildGEP(IRB, BasePtr, Indices, NamePrefix);
}
/// Get a natural GEP from a base pointer to a particular offset and
/// resulting in a particular type.
///
/// The goal is to produce a "natural" looking GEP that works with the existing
/// composite types to arrive at the appropriate offset and element type for
/// a pointer. TargetTy is the element type the returned GEP should point-to if
/// possible. We recurse by decreasing Offset, adding the appropriate index to
/// Indices, and setting Ty to the result subtype.
///
/// If no natural GEP can be constructed, this function returns null.
static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
Value *Ptr, APInt Offset, Type *TargetTy,
SmallVectorImpl<Value *> &Indices,
const Twine &NamePrefix) {
PointerType *Ty = cast<PointerType>(Ptr->getType());
// Don't consider any GEPs through an i8* as natural unless the TargetTy is
// an i8.
if (Ty == IRB.getInt8PtrTy(Ty->getAddressSpace()) && TargetTy->isIntegerTy(8))
return nullptr;
Type *ElementTy = Ty->getNonOpaquePointerElementType();
if (!ElementTy->isSized())
return nullptr; // We can't GEP through an unsized element.
SmallVector<APInt> IntIndices = DL.getGEPIndicesForOffset(ElementTy, Offset);
if (Offset != 0)
return nullptr;
for (const APInt &Index : IntIndices)
Indices.push_back(IRB.getInt(Index));
return getNaturalGEPWithType(IRB, DL, Ptr, ElementTy, TargetTy, Indices,
NamePrefix);
}
/// Compute an adjusted pointer from Ptr by Offset bytes where the
/// resulting pointer has PointerTy.
///
/// This tries very hard to compute a "natural" GEP which arrives at the offset
/// and produces the pointer type desired. Where it cannot, it will try to use
/// the natural GEP to arrive at the offset and bitcast to the type. Where that
/// fails, it will try to use an existing i8* and GEP to the byte offset and
/// bitcast to the type.
///
/// The strategy for finding the more natural GEPs is to peel off layers of the
/// pointer, walking back through bit casts and GEPs, searching for a base
/// pointer from which we can compute a natural GEP with the desired
/// properties. The algorithm tries to fold as many constant indices into
/// a single GEP as possible, thus making each GEP more independent of the
/// surrounding code.
static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
APInt Offset, Type *PointerTy,
const Twine &NamePrefix) {
// Create i8 GEP for opaque pointers.
if (Ptr->getType()->isOpaquePointerTy()) {
if (Offset != 0)
Ptr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(Offset),
NamePrefix + "sroa_idx");
return IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr, PointerTy,
NamePrefix + "sroa_cast");
}
// Even though we don't look through PHI nodes, we could be called on an
// instruction in an unreachable block, which may be on a cycle.
SmallPtrSet<Value *, 4> Visited;
Visited.insert(Ptr);
SmallVector<Value *, 4> Indices;
// We may end up computing an offset pointer that has the wrong type. If we
// never are able to compute one directly that has the correct type, we'll
// fall back to it, so keep it and the base it was computed from around here.
Value *OffsetPtr = nullptr;
Value *OffsetBasePtr;
// Remember any i8 pointer we come across to re-use if we need to do a raw
// byte offset.
Value *Int8Ptr = nullptr;
APInt Int8PtrOffset(Offset.getBitWidth(), 0);
PointerType *TargetPtrTy = cast<PointerType>(PointerTy);
Type *TargetTy = TargetPtrTy->getNonOpaquePointerElementType();
// As `addrspacecast` is , `Ptr` (the storage pointer) may have different
// address space from the expected `PointerTy` (the pointer to be used).
// Adjust the pointer type based the original storage pointer.
auto AS = cast<PointerType>(Ptr->getType())->getAddressSpace();
PointerTy = TargetTy->getPointerTo(AS);
do {
// First fold any existing GEPs into the offset.
while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
APInt GEPOffset(Offset.getBitWidth(), 0);
if (!GEP->accumulateConstantOffset(DL, GEPOffset))
break;
Offset += GEPOffset;
Ptr = GEP->getPointerOperand();
if (!Visited.insert(Ptr).second)
break;
}
// See if we can perform a natural GEP here.
Indices.clear();
if (Value *P = getNaturalGEPWithOffset(IRB, DL, Ptr, Offset, TargetTy,
Indices, NamePrefix)) {
// If we have a new natural pointer at the offset, clear out any old
// offset pointer we computed. Unless it is the base pointer or
// a non-instruction, we built a GEP we don't need. Zap it.
if (OffsetPtr && OffsetPtr != OffsetBasePtr)
if (Instruction *I = dyn_cast<Instruction>(OffsetPtr)) {
assert(I->use_empty() && "Built a GEP with uses some how!");
I->eraseFromParent();
}
OffsetPtr = P;
OffsetBasePtr = Ptr;
// If we also found a pointer of the right type, we're done.
if (P->getType() == PointerTy)
break;
}
// Stash this pointer if we've found an i8*.
if (Ptr->getType()->isIntegerTy(8)) {
Int8Ptr = Ptr;
Int8PtrOffset = Offset;
}
// Peel off a layer of the pointer and update the offset appropriately.
if (Operator::getOpcode(Ptr) == Instruction::BitCast) {
Ptr = cast<Operator>(Ptr)->getOperand(0);
} else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) {
if (GA->isInterposable())
break;
Ptr = GA->getAliasee();
} else {
break;
}
assert(Ptr->getType()->isPointerTy() && "Unexpected operand type!");
} while (Visited.insert(Ptr).second);
if (!OffsetPtr) {
if (!Int8Ptr) {
Int8Ptr = IRB.CreateBitCast(
Ptr, IRB.getInt8PtrTy(PointerTy->getPointerAddressSpace()),
NamePrefix + "sroa_raw_cast");
Int8PtrOffset = Offset;
}
OffsetPtr = Int8PtrOffset == 0
? Int8Ptr
: IRB.CreateInBoundsGEP(IRB.getInt8Ty(), Int8Ptr,
IRB.getInt(Int8PtrOffset),
NamePrefix + "sroa_raw_idx");
}
Ptr = OffsetPtr;
// On the off chance we were targeting i8*, guard the bitcast here.
if (cast<PointerType>(Ptr->getType()) != TargetPtrTy) {
Ptr = IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr,
TargetPtrTy,
NamePrefix + "sroa_cast");
}
return Ptr;
}
/// Compute the adjusted alignment for a load or store from an offset.
static Align getAdjustedAlignment(Instruction *I, uint64_t Offset) {
return commonAlignment(getLoadStoreAlignment(I), Offset);
}
/// Test whether we can convert a value from the old to the new type.
///
/// This predicate should be used to guard calls to convertValue in order to
/// ensure that we only try to convert viable values. The strategy is that we
/// will peel off single element struct and array wrappings to get to an
/// underlying value, and convert that value.
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
if (OldTy == NewTy)
return true;
// For integer types, we can't handle any bit-width differences. This would
// break both vector conversions with extension and introduce endianness
// issues when in conjunction with loads and stores.
if (isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) {
assert(cast<IntegerType>(OldTy)->getBitWidth() !=
cast<IntegerType>(NewTy)->getBitWidth() &&
"We can't have the same bitwidth for different int types");
return false;
}
if (DL.getTypeSizeInBits(NewTy).getFixedSize() !=
DL.getTypeSizeInBits(OldTy).getFixedSize())
return false;
if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
return false;
// We can convert pointers to integers and vice-versa. Same for vectors
// of pointers and integers.
OldTy = OldTy->getScalarType();
NewTy = NewTy->getScalarType();
if (NewTy->isPointerTy() || OldTy->isPointerTy()) {
if (NewTy->isPointerTy() && OldTy->isPointerTy()) {
unsigned OldAS = OldTy->getPointerAddressSpace();
unsigned NewAS = NewTy->getPointerAddressSpace();
// Convert pointers if they are pointers from the same address space or
// different integral (not non-integral) address spaces with the same
// pointer size.
return OldAS == NewAS ||
(!DL.isNonIntegralAddressSpace(OldAS) &&
!DL.isNonIntegralAddressSpace(NewAS) &&
DL.getPointerSize(OldAS) == DL.getPointerSize(NewAS));
}
// We can convert integers to integral pointers, but not to non-integral
// pointers.
if (OldTy->isIntegerTy())
return !DL.isNonIntegralPointerType(NewTy);
// We can convert integral pointers to integers, but non-integral pointers
// need to remain pointers.
if (!DL.isNonIntegralPointerType(OldTy))
return NewTy->isIntegerTy();
return false;
}
return true;
}
/// Generic routine to convert an SSA value to a value of a different
/// type.
///
/// This will try various different casting techniques, such as bitcasts,
/// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test
/// two types for viability with this routine.
static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
Type *NewTy) {
Type *OldTy = V->getType();
assert(canConvertValue(DL, OldTy, NewTy) && "Value not convertable to type");
if (OldTy == NewTy)
return V;
assert(!(isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) &&
"Integer types must be the exact same to convert.");
// See if we need inttoptr for this type pair. May require additional bitcast.
if (OldTy->isIntOrIntVectorTy() && NewTy->isPtrOrPtrVectorTy()) {
// Expand <2 x i32> to i8* --> <2 x i32> to i64 to i8*
// Expand i128 to <2 x i8*> --> i128 to <2 x i64> to <2 x i8*>
// Expand <4 x i32> to <2 x i8*> --> <4 x i32> to <2 x i64> to <2 x i8*>
// Directly handle i64 to i8*
return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)),
NewTy);
}
// See if we need ptrtoint for this type pair. May require additional bitcast.
if (OldTy->isPtrOrPtrVectorTy() && NewTy->isIntOrIntVectorTy()) {
// Expand <2 x i8*> to i128 --> <2 x i8*> to <2 x i64> to i128
// Expand i8* to <2 x i32> --> i8* to i64 to <2 x i32>
// Expand <2 x i8*> to <4 x i32> --> <2 x i8*> to <2 x i64> to <4 x i32>
// Expand i8* to i64 --> i8* to i64 to i64
return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
NewTy);
}
if (OldTy->isPtrOrPtrVectorTy() && NewTy->isPtrOrPtrVectorTy()) {
unsigned OldAS = OldTy->getPointerAddressSpace();
unsigned NewAS = NewTy->getPointerAddressSpace();
// To convert pointers with different address spaces (they are already
// checked convertible, i.e. they have the same pointer size), so far we
// cannot use `bitcast` (which has restrict on the same address space) or
// `addrspacecast` (which is not always no-op casting). Instead, use a pair
// of no-op `ptrtoint`/`inttoptr` casts through an integer with the same bit
// size.
if (OldAS != NewAS) {
assert(DL.getPointerSize(OldAS) == DL.getPointerSize(NewAS));
return IRB.CreateIntToPtr(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
NewTy);
}
}
return IRB.CreateBitCast(V, NewTy);
}
/// Test whether the given slice use can be promoted to a vector.
///
/// This function is called to test each entry in a partition which is slated
/// for a single slice.
static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
VectorType *Ty,
uint64_t ElementSize,
const DataLayout &DL) {
// First validate the slice offsets.
uint64_t BeginOffset =
std::max(S.beginOffset(), P.beginOffset()) - P.beginOffset();
uint64_t BeginIndex = BeginOffset / ElementSize;
if (BeginIndex * ElementSize != BeginOffset ||
BeginIndex >= cast<FixedVectorType>(Ty)->getNumElements())
return false;
uint64_t EndOffset =
std::min(S.endOffset(), P.endOffset()) - P.beginOffset();
uint64_t EndIndex = EndOffset / ElementSize;
if (EndIndex * ElementSize != EndOffset ||
EndIndex > cast<FixedVectorType>(Ty)->getNumElements())
return false;
assert(EndIndex > BeginIndex && "Empty vector!");
uint64_t NumElements = EndIndex - BeginIndex;
Type *SliceTy = (NumElements == 1)
? Ty->getElementType()
: FixedVectorType::get(Ty->getElementType(), NumElements);
Type *SplitIntTy =
Type::getIntNTy(Ty->getContext(), NumElements * ElementSize * 8);
Use *U = S.getUse();
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
if (MI->isVolatile())
return false;
if (!S.isSplittable())
return false; // Skip any unsplittable intrinsics.
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
if (!II->isLifetimeStartOrEnd() && !II->isDroppable())
return false;
} else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
if (LI->isVolatile())
return false;
Type *LTy = LI->getType();
// Disable vector promotion when there are loads or stores of an FCA.
if (LTy->isStructTy())
return false;
if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
assert(LTy->isIntegerTy());
LTy = SplitIntTy;
}
if (!canConvertValue(DL, SliceTy, LTy))
return false;
} else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
if (SI->isVolatile())
return false;
Type *STy = SI->getValueOperand()->getType();
// Disable vector promotion when there are loads or stores of an FCA.
if (STy->isStructTy())
return false;
if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
assert(STy->isIntegerTy());
STy = SplitIntTy;
}
if (!canConvertValue(DL, STy, SliceTy))
return false;
} else {
return false;
}
return true;
}
/// Test whether the given alloca partitioning and range of slices can be
/// promoted to a vector.
///
/// This is a quick test to check whether we can rewrite a particular alloca
/// partition (and its newly formed alloca) into a vector alloca with only
/// whole-vector loads and stores such that it could be promoted to a vector
/// SSA value. We only can ensure this for a limited set of operations, and we
/// don't want to do the rewrites unless we are confident that the result will
/// be promotable, so we have an early test here.
static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
// Collect the candidate types for vector-based promotion. Also track whether
// we have different element types.
SmallVector<VectorType *, 4> CandidateTys;
Type *CommonEltTy = nullptr;
bool HaveCommonEltTy = true;
auto CheckCandidateType = [&](Type *Ty) {
if (auto *VTy = dyn_cast<VectorType>(Ty)) {
// Return if bitcast to vectors is different for total size in bits.
if (!CandidateTys.empty()) {
VectorType *V = CandidateTys[0];
if (DL.getTypeSizeInBits(VTy).getFixedSize() !=
DL.getTypeSizeInBits(V).getFixedSize()) {
CandidateTys.clear();
return;
}
}
CandidateTys.push_back(VTy);
if (!CommonEltTy)
CommonEltTy = VTy->getElementType();
else if (CommonEltTy != VTy->getElementType())
HaveCommonEltTy = false;
}
};
// Consider any loads or stores that are the exact size of the slice.
for (const Slice &S : P)
if (S.beginOffset() == P.beginOffset() &&
S.endOffset() == P.endOffset()) {
if (auto *LI = dyn_cast<LoadInst>(S.getUse()->getUser()))
CheckCandidateType(LI->getType());
else if (auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser()))
CheckCandidateType(SI->getValueOperand()->getType());
}
// If we didn't find a vector type, nothing to do here.
if (CandidateTys.empty())
return nullptr;
// Remove non-integer vector types if we had multiple common element types.
// FIXME: It'd be nice to replace them with integer vector types, but we can't
// do that until all the backends are known to produce good code for all
// integer vector types.
if (!HaveCommonEltTy) {
llvm::erase_if(CandidateTys, [](VectorType *VTy) {
return !VTy->getElementType()->isIntegerTy();
});
// If there were no integer vector types, give up.
if (CandidateTys.empty())
return nullptr;
// Rank the remaining candidate vector types. This is easy because we know
// they're all integer vectors. We sort by ascending number of elements.
auto RankVectorTypes = [&DL](VectorType *RHSTy, VectorType *LHSTy) {
(void)DL;
assert(DL.getTypeSizeInBits(RHSTy).getFixedSize() ==
DL.getTypeSizeInBits(LHSTy).getFixedSize() &&
"Cannot have vector types of different sizes!");
assert(RHSTy->getElementType()->isIntegerTy() &&
"All non-integer types eliminated!");
assert(LHSTy->getElementType()->isIntegerTy() &&
"All non-integer types eliminated!");
return cast<FixedVectorType>(RHSTy)->getNumElements() <
cast<FixedVectorType>(LHSTy)->getNumElements();
};
llvm::sort(CandidateTys, RankVectorTypes);
CandidateTys.erase(
std::unique(CandidateTys.begin(), CandidateTys.end(), RankVectorTypes),
CandidateTys.end());
} else {
// The only way to have the same element type in every vector type is to
// have the same vector type. Check that and remove all but one.
#ifndef NDEBUG
for (VectorType *VTy : CandidateTys) {
assert(VTy->getElementType() == CommonEltTy &&
"Unaccounted for element type!");
assert(VTy == CandidateTys[0] &&
"Different vector types with the same element type!");
}
#endif
CandidateTys.resize(1);
}
// Try each vector type, and return the one which works.
auto CheckVectorTypeForPromotion = [&](VectorType *VTy) {
uint64_t ElementSize =
DL.getTypeSizeInBits(VTy->getElementType()).getFixedSize();
// While the definition of LLVM vectors is bitpacked, we don't support sizes
// that aren't byte sized.
if (ElementSize % 8)
return false;
assert((DL.getTypeSizeInBits(VTy).getFixedSize() % 8) == 0 &&
"vector size not a multiple of element size?");
ElementSize /= 8;
for (const Slice &S : P)
if (!isVectorPromotionViableForSlice(P, S, VTy, ElementSize, DL))
return false;
for (const Slice *S : P.splitSliceTails())
if (!isVectorPromotionViableForSlice(P, *S, VTy, ElementSize, DL))
return false;
return true;
};
for (VectorType *VTy : CandidateTys)
if (CheckVectorTypeForPromotion(VTy))
return VTy;
return nullptr;
}
/// Test whether a slice of an alloca is valid for integer widening.
///
/// This implements the necessary checking for the \c isIntegerWideningViable
/// test below on a single slice of the alloca.
static bool isIntegerWideningViableForSlice(const Slice &S,
uint64_t AllocBeginOffset,
Type *AllocaTy,
const DataLayout &DL,
bool &WholeAllocaOp) {
uint64_t Size = DL.getTypeStoreSize(AllocaTy).getFixedSize();
uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
Use *U = S.getUse();
// Lifetime intrinsics operate over the whole alloca whose sizes are usually
// larger than other load/store slices (RelEnd > Size). But lifetime are
// always promotable and should not impact other slices' promotability of the
// partition.
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
if (II->isLifetimeStartOrEnd() || II->isDroppable())
return true;
}
// We can't reasonably handle cases where the load or store extends past
// the end of the alloca's type and into its padding.
if (RelEnd > Size)
return false;
if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
if (LI->isVolatile())
return false;
// We can't handle loads that extend past the allocated memory.
if (DL.getTypeStoreSize(LI->getType()).getFixedSize() > Size)
return false;
// So far, AllocaSliceRewriter does not support widening split slice tails
// in rewriteIntegerLoad.
if (S.beginOffset() < AllocBeginOffset)
return false;
// Note that we don't count vector loads or stores as whole-alloca
// operations which enable integer widening because we would prefer to use
// vector widening instead.
if (!isa<VectorType>(LI->getType()) && RelBegin == 0 && RelEnd == Size)
WholeAllocaOp = true;
if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy).getFixedSize())
return false;
} else if (RelBegin != 0 || RelEnd != Size ||
!canConvertValue(DL, AllocaTy, LI->getType())) {
// Non-integer loads need to be convertible from the alloca type so that
// they are promotable.
return false;
}
} else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
Type *ValueTy = SI->getValueOperand()->getType();
if (SI->isVolatile())
return false;
// We can't handle stores that extend past the allocated memory.
if (DL.getTypeStoreSize(ValueTy).getFixedSize() > Size)
return false;
// So far, AllocaSliceRewriter does not support widening split slice tails
// in rewriteIntegerStore.
if (S.beginOffset() < AllocBeginOffset)
return false;
// Note that we don't count vector loads or stores as whole-alloca
// operations which enable integer widening because we would prefer to use
// vector widening instead.
if (!isa<VectorType>(ValueTy) && RelBegin == 0 && RelEnd == Size)
WholeAllocaOp = true;
if (IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) {
if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy).getFixedSize())
return false;
} else if (RelBegin != 0 || RelEnd != Size ||
!canConvertValue(DL, ValueTy, AllocaTy)) {
// Non-integer stores need to be convertible to the alloca type so that
// they are promotable.
return false;
}
} else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
if (MI->isVolatile() || !isa<Constant>(MI->getLength()))
return false;
if (!S.isSplittable())
return false; // Skip any unsplittable intrinsics.
} else {
return false;
}
return true;
}
/// Test whether the given alloca partition's integer operations can be
/// widened to promotable ones.
///
/// This is a quick test to check whether we can rewrite the integer loads and
/// stores to a particular alloca into wider loads and stores and be able to
/// promote the resulting alloca.
static bool isIntegerWideningViable(Partition &P, Type *AllocaTy,
const DataLayout &DL) {
uint64_t SizeInBits = DL.getTypeSizeInBits(AllocaTy).getFixedSize();
// Don't create integer types larger than the maximum bitwidth.
if (SizeInBits > IntegerType::MAX_INT_BITS)
return false;
// Don't try to handle allocas with bit-padding.
if (SizeInBits != DL.getTypeStoreSizeInBits(AllocaTy).getFixedSize())
return false;
// We need to ensure that an integer type with the appropriate bitwidth can
// be converted to the alloca type, whatever that is. We don't want to force
// the alloca itself to have an integer type if there is a more suitable one.
Type *IntTy = Type::getIntNTy(AllocaTy->getContext(), SizeInBits);
if (!canConvertValue(DL, AllocaTy, IntTy) ||
!canConvertValue(DL, IntTy, AllocaTy))
return false;
// While examining uses, we ensure that the alloca has a covering load or
// store. We don't want to widen the integer operations only to fail to
// promote due to some other unsplittable entry (which we may make splittable
// later). However, if there are only splittable uses, go ahead and assume
// that we cover the alloca.
// FIXME: We shouldn't consider split slices that happen to start in the
// partition here...
bool WholeAllocaOp = P.empty() && DL.isLegalInteger(SizeInBits);
for (const Slice &S : P)
if (!isIntegerWideningViableForSlice(S, P.beginOffset(), AllocaTy, DL,
WholeAllocaOp))
return false;
for (const Slice *S : P.splitSliceTails())
if (!isIntegerWideningViableForSlice(*S, P.beginOffset(), AllocaTy, DL,
WholeAllocaOp))
return false;
return WholeAllocaOp;
}
static Value *extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
IntegerType *Ty, uint64_t Offset,
const Twine &Name) {
LLVM_DEBUG(dbgs() << " start: " << *V << "\n");
IntegerType *IntTy = cast<IntegerType>(V->getType());
assert(DL.getTypeStoreSize(Ty).getFixedSize() + Offset <=
DL.getTypeStoreSize(IntTy).getFixedSize() &&
"Element extends past full value");
uint64_t ShAmt = 8 * Offset;
if (DL.isBigEndian())
ShAmt = 8 * (DL.getTypeStoreSize(IntTy).getFixedSize() -
DL.getTypeStoreSize(Ty).getFixedSize() - Offset);
if (ShAmt) {
V = IRB.CreateLShr(V, ShAmt, Name + ".shift");
LLVM_DEBUG(dbgs() << " shifted: " << *V << "\n");
}
assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
"Cannot extract to a larger integer!");
if (Ty != IntTy) {
V = IRB.CreateTrunc(V, Ty, Name + ".trunc");
LLVM_DEBUG(dbgs() << " trunced: " << *V << "\n");
}
return V;
}
static Value *insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old,
Value *V, uint64_t Offset, const Twine &Name) {
IntegerType *IntTy = cast<IntegerType>(Old->getType());
IntegerType *Ty = cast<IntegerType>(V->getType());
assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
"Cannot insert a larger integer!");
LLVM_DEBUG(dbgs() << " start: " << *V << "\n");
if (Ty != IntTy) {
V = IRB.CreateZExt(V, IntTy, Name + ".ext");
LLVM_DEBUG(dbgs() << " extended: " << *V << "\n");
}
assert(DL.getTypeStoreSize(Ty).getFixedSize() + Offset <=
DL.getTypeStoreSize(IntTy).getFixedSize() &&
"Element store outside of alloca store");
uint64_t ShAmt = 8 * Offset;
if (DL.isBigEndian())
ShAmt = 8 * (DL.getTypeStoreSize(IntTy).getFixedSize() -
DL.getTypeStoreSize(Ty).getFixedSize() - Offset);
if (ShAmt) {
V = IRB.CreateShl(V, ShAmt, Name + ".shift");
LLVM_DEBUG(dbgs() << " shifted: " << *V << "\n");
}
if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
Old = IRB.CreateAnd(Old, Mask, Name + ".mask");
LLVM_DEBUG(dbgs() << " masked: " << *Old << "\n");
V = IRB.CreateOr(Old, V, Name + ".insert");
LLVM_DEBUG(dbgs() << " inserted: " << *V << "\n");
}
return V;
}
static Value *extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex,
unsigned EndIndex, const Twine &Name) {
auto *VecTy = cast<FixedVectorType>(V->getType());
unsigned NumElements = EndIndex - BeginIndex;
assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
if (NumElements == VecTy->getNumElements())
return V;
if (NumElements == 1) {
V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
Name + ".extract");
LLVM_DEBUG(dbgs() << " extract: " << *V << "\n");
return V;
}
auto Mask = llvm::to_vector<8>(llvm::seq<int>(BeginIndex, EndIndex));
V = IRB.CreateShuffleVector(V, Mask, Name + ".extract");
LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n");
return V;
}
static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
unsigned BeginIndex, const Twine &Name) {
VectorType *VecTy = cast<VectorType>(Old->getType());
assert(VecTy && "Can only insert a vector into a vector");
VectorType *Ty = dyn_cast<VectorType>(V->getType());
if (!Ty) {
// Single element to insert.
V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
Name + ".insert");
LLVM_DEBUG(dbgs() << " insert: " << *V << "\n");
return V;
}
assert(cast<FixedVectorType>(Ty)->getNumElements() <=
cast<FixedVectorType>(VecTy)->getNumElements() &&
"Too many elements!");
if (cast<FixedVectorType>(Ty)->getNumElements() ==
cast<FixedVectorType>(VecTy)->getNumElements()) {
assert(V->getType() == VecTy && "Vector type mismatch");
return V;
}
unsigned EndIndex = BeginIndex + cast<FixedVectorType>(Ty)->getNumElements();
// When inserting a smaller vector into the larger to store, we first
// use a shuffle vector to widen it with undef elements, and then
// a second shuffle vector to select between the loaded vector and the
// incoming vector.
SmallVector<int, 8> Mask;
Mask.reserve(cast<FixedVectorType>(VecTy)->getNumElements());
for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
if (i >= BeginIndex && i < EndIndex)
Mask.push_back(i - BeginIndex);
else
Mask.push_back(-1);
V = IRB.CreateShuffleVector(V, Mask, Name + ".expand");
LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n");
SmallVector<Constant *, 8> Mask2;
Mask2.reserve(cast<FixedVectorType>(VecTy)->getNumElements());
for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
Mask2.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
V = IRB.CreateSelect(ConstantVector::get(Mask2), V, Old, Name + "blend");
LLVM_DEBUG(dbgs() << " blend: " << *V << "\n");
return V;
}
/// Visitor to rewrite instructions using p particular slice of an alloca
/// to use a new alloca.
///
/// Also implements the rewriting to vector-based accesses when the partition
/// passes the isVectorPromotionViable predicate. Most of the rewriting logic
/// lives here.
class llvm::sroa::AllocaSliceRewriter
: public InstVisitor<AllocaSliceRewriter, bool> {
// Befriend the base class so it can delegate to private visit methods.
friend class InstVisitor<AllocaSliceRewriter, bool>;
using Base = InstVisitor<AllocaSliceRewriter, bool>;
const DataLayout &DL;
AllocaSlices &AS;
SROAPass &Pass;
AllocaInst &OldAI, &NewAI;
const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
Type *NewAllocaTy;
// This is a convenience and flag variable that will be null unless the new
// alloca's integer operations should be widened to this integer type due to
// passing isIntegerWideningViable above. If it is non-null, the desired
// integer type will be stored here for easy access during rewriting.
IntegerType *IntTy;
// If we are rewriting an alloca partition which can be written as pure
// vector operations, we stash extra information here. When VecTy is
// non-null, we have some strict guarantees about the rewritten alloca:
// - The new alloca is exactly the size of the vector type here.
// - The accesses all either map to the entire vector or to a single
// element.
// - The set of accessing instructions is only one of those handled above
// in isVectorPromotionViable. Generally these are the same access kinds
// which are promotable via mem2reg.
VectorType *VecTy;
Type *ElementTy;
uint64_t ElementSize;
// The original offset of the slice currently being rewritten relative to
// the original alloca.
uint64_t BeginOffset = 0;
uint64_t EndOffset = 0;
// The new offsets of the slice currently being rewritten relative to the
// original alloca.
uint64_t NewBeginOffset = 0, NewEndOffset = 0;
uint64_t SliceSize = 0;
bool IsSplittable = false;
bool IsSplit = false;
Use *OldUse = nullptr;
Instruction *OldPtr = nullptr;
// Track post-rewrite users which are PHI nodes and Selects.
SmallSetVector<PHINode *, 8> &PHIUsers;
SmallSetVector<SelectInst *, 8> &SelectUsers;
// Utility IR builder, whose name prefix is setup for each visited use, and
// the insertion point is set to point to the user.
IRBuilderTy IRB;
public:
AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROAPass &Pass,
AllocaInst &OldAI, AllocaInst &NewAI,
uint64_t NewAllocaBeginOffset,
uint64_t NewAllocaEndOffset, bool IsIntegerPromotable,
VectorType *PromotableVecTy,
SmallSetVector<PHINode *, 8> &PHIUsers,
SmallSetVector<SelectInst *, 8> &SelectUsers)
: DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI),
NewAllocaBeginOffset(NewAllocaBeginOffset),
NewAllocaEndOffset(NewAllocaEndOffset),
NewAllocaTy(NewAI.getAllocatedType()),
IntTy(
IsIntegerPromotable
? Type::getIntNTy(NewAI.getContext(),
DL.getTypeSizeInBits(NewAI.getAllocatedType())
.getFixedSize())
: nullptr),
VecTy(PromotableVecTy),
ElementTy(VecTy ? VecTy->getElementType() : nullptr),
ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy).getFixedSize() / 8
: 0),
PHIUsers(PHIUsers), SelectUsers(SelectUsers),
IRB(NewAI.getContext(), ConstantFolder()) {
if (VecTy) {
assert((DL.getTypeSizeInBits(ElementTy).getFixedSize() % 8) == 0 &&
"Only multiple-of-8 sized vector elements are viable");
++NumVectorized;
}
assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));
}
bool visit(AllocaSlices::const_iterator I) {
bool CanSROA = true;
BeginOffset = I->beginOffset();
EndOffset = I->endOffset();
IsSplittable = I->isSplittable();
IsSplit =
BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
LLVM_DEBUG(dbgs() << " rewriting " << (IsSplit ? "split " : ""));
LLVM_DEBUG(AS.printSlice(dbgs(), I, ""));
LLVM_DEBUG(dbgs() << "\n");
// Compute the intersecting offset range.
assert(BeginOffset < NewAllocaEndOffset);
assert(EndOffset > NewAllocaBeginOffset);
NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
SliceSize = NewEndOffset - NewBeginOffset;
OldUse = I->getUse();
OldPtr = cast<Instruction>(OldUse->get());
Instruction *OldUserI = cast<Instruction>(OldUse->getUser());
IRB.SetInsertPoint(OldUserI);
IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc());
IRB.getInserter().SetNamePrefix(
Twine(NewAI.getName()) + "." + Twine(BeginOffset) + ".");
CanSROA &= visit(cast<Instruction>(OldUse->getUser()));
if (VecTy || IntTy)
assert(CanSROA);
return CanSROA;
}
private:
// Make sure the other visit overloads are visible.
using Base::visit;
// Every instruction which can end up as a user must have a rewrite rule.
bool visitInstruction(Instruction &I) {
LLVM_DEBUG(dbgs() << " !!!! Cannot rewrite: " << I << "\n");
llvm_unreachable("No rewrite rule for this instruction!");
}
Value *getNewAllocaSlicePtr(IRBuilderTy &IRB, Type *PointerTy) {
// Note that the offset computation can use BeginOffset or NewBeginOffset
// interchangeably for unsplit slices.
assert(IsSplit || BeginOffset == NewBeginOffset);
uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
#ifndef NDEBUG
StringRef OldName = OldPtr->getName();
// Skip through the last '.sroa.' component of the name.
size_t LastSROAPrefix = OldName.rfind(".sroa.");
if (LastSROAPrefix != StringRef::npos) {
OldName = OldName.substr(LastSROAPrefix + strlen(".sroa."));
// Look for an SROA slice index.
size_t IndexEnd = OldName.find_first_not_of("0123456789");
if (IndexEnd != StringRef::npos && OldName[IndexEnd] == '.') {
// Strip the index and look for the offset.
OldName = OldName.substr(IndexEnd + 1);
size_t OffsetEnd = OldName.find_first_not_of("0123456789");
if (OffsetEnd != StringRef::npos && OldName[OffsetEnd] == '.')
// Strip the offset.
OldName = OldName.substr(OffsetEnd + 1);
}
}
// Strip any SROA suffixes as well.
OldName = OldName.substr(0, OldName.find(".sroa_"));
#endif
return getAdjustedPtr(IRB, DL, &NewAI,
APInt(DL.getIndexTypeSizeInBits(PointerTy), Offset),
PointerTy,
#ifndef NDEBUG
Twine(OldName) + "."
#else
Twine()
#endif
);
}
/// Compute suitable alignment to access this slice of the *new*
/// alloca.
///
/// You can optionally pass a type to this routine and if that type's ABI
/// alignment is itself suitable, this will return zero.
Align getSliceAlign() {
return commonAlignment(NewAI.getAlign(),
NewBeginOffset - NewAllocaBeginOffset);
}
unsigned getIndex(uint64_t Offset) {
assert(VecTy && "Can only call getIndex when rewriting a vector");
uint64_t RelOffset = Offset - NewAllocaBeginOffset;
assert(RelOffset / ElementSize < UINT32_MAX && "Index out of bounds");
uint32_t Index = RelOffset / ElementSize;
assert(Index * ElementSize == RelOffset);
return Index;
}
void deleteIfTriviallyDead(Value *V) {
Instruction *I = cast<Instruction>(V);
if (isInstructionTriviallyDead(I))
Pass.DeadInsts.push_back(I);
}
Value *rewriteVectorizedLoadInst(LoadInst &LI) {
unsigned BeginIndex = getIndex(NewBeginOffset);
unsigned EndIndex = getIndex(NewEndOffset);
assert(EndIndex > BeginIndex && "Empty vector!");
LoadInst *Load = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
NewAI.getAlign(), "load");
Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group});
return extractVector(IRB, Load, BeginIndex, EndIndex, "vec");
}
Value *rewriteIntegerLoad(LoadInst &LI) {
assert(IntTy && "We cannot insert an integer to the alloca");
assert(!LI.isVolatile());
Value *V = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
NewAI.getAlign(), "load");
V = convertValue(DL, IRB, V, IntTy);
assert(NewBeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
if (Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
IntegerType *ExtractTy = Type::getIntNTy(LI.getContext(), SliceSize * 8);
V = extractInteger(DL, IRB, V, ExtractTy, Offset, "extract");
}
// It is possible that the extracted type is not the load type. This
// happens if there is a load past the end of the alloca, and as
// a consequence the slice is narrower but still a candidate for integer
// lowering. To handle this case, we just zero extend the extracted
// integer.
assert(cast<IntegerType>(LI.getType())->getBitWidth() >= SliceSize * 8 &&
"Can only handle an extract for an overly wide load");
if (cast<IntegerType>(LI.getType())->getBitWidth() > SliceSize * 8)
V = IRB.CreateZExt(V, LI.getType());
return V;
}
bool visitLoadInst(LoadInst &LI) {
LLVM_DEBUG(dbgs() << " original: " << LI << "\n");
Value *OldOp = LI.getOperand(0);
assert(OldOp == OldPtr);
AAMDNodes AATags = LI.getAAMetadata();
unsigned AS = LI.getPointerAddressSpace();
Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8)
: LI.getType();
const bool IsLoadPastEnd =
DL.getTypeStoreSize(TargetTy).getFixedSize() > SliceSize;
bool IsPtrAdjusted = false;
Value *V;
if (VecTy) {
V = rewriteVectorizedLoadInst(LI);
} else if (IntTy && LI.getType()->isIntegerTy()) {
V = rewriteIntegerLoad(LI);
} else if (NewBeginOffset == NewAllocaBeginOffset &&
NewEndOffset == NewAllocaEndOffset &&
(canConvertValue(DL, NewAllocaTy, TargetTy) ||
(IsLoadPastEnd && NewAllocaTy->isIntegerTy() &&
TargetTy->isIntegerTy()))) {
LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
NewAI.getAlign(), LI.isVolatile(),
LI.getName());
if (AATags)
NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
if (LI.isVolatile())
NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
if (NewLI->isAtomic())
NewLI->setAlignment(LI.getAlign());
// Any !nonnull metadata or !range metadata on the old load is also valid
// on the new load. This is even true in some cases even when the loads
// are different types, for example by mapping !nonnull metadata to
// !range metadata by modeling the null pointer constant converted to the
// integer type.
// FIXME: Add support for range metadata here. Currently the utilities
// for this don't propagate range metadata in trivial cases from one
// integer load to another, don't handle non-addrspace-0 null pointers
// correctly, and don't have any support for mapping ranges as the
// integer type becomes winder or narrower.
if (MDNode *N = LI.getMetadata(LLVMContext::MD_nonnull))
copyNonnullMetadata(LI, N, *NewLI);
// Try to preserve nonnull metadata
V = NewLI;
// If this is an integer load past the end of the slice (which means the
// bytes outside the slice are undef or this load is dead) just forcibly
// fix the integer size with correct handling of endianness.
if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
if (auto *TITy = dyn_cast<IntegerType>(TargetTy))
if (AITy->getBitWidth() < TITy->getBitWidth()) {
V = IRB.CreateZExt(V, TITy, "load.ext");
if (DL.isBigEndian())
V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
"endian_shift");
}
} else {
Type *LTy = TargetTy->getPointerTo(AS);
LoadInst *NewLI =
IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
getSliceAlign(), LI.isVolatile(), LI.getName());
if (AATags)
NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
if (LI.isVolatile())
NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
NewLI->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group});
V = NewLI;
IsPtrAdjusted = true;
}
V = convertValue(DL, IRB, V, TargetTy);
if (IsSplit) {
assert(!LI.isVolatile());
assert(LI.getType()->isIntegerTy() &&
"Only integer type loads and stores are split");
assert(SliceSize < DL.getTypeStoreSize(LI.getType()).getFixedSize() &&
"Split load isn't smaller than original load");
assert(DL.typeSizeEqualsStoreSize(LI.getType()) &&
"Non-byte-multiple bit width");
// Move the insertion point just past the load so that we can refer to it.
IRB.SetInsertPoint(&*std::next(BasicBlock::iterator(&LI)));
// Create a placeholder value with the same type as LI to use as the
// basis for the new value. This allows us to replace the uses of LI with
// the computed value, and then replace the placeholder with LI, leaving
// LI only used for this computation.
Value *Placeholder = new LoadInst(
LI.getType(), PoisonValue::get(LI.getType()->getPointerTo(AS)), "",
false, Align(1));
V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset - BeginOffset,
"insert");
LI.replaceAllUsesWith(V);
Placeholder->replaceAllUsesWith(&LI);
Placeholder->deleteValue();
} else {
LI.replaceAllUsesWith(V);
}
Pass.DeadInsts.push_back(&LI);
deleteIfTriviallyDead(OldOp);
LLVM_DEBUG(dbgs() << " to: " << *V << "\n");
return !LI.isVolatile() && !IsPtrAdjusted;
}
bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp,
AAMDNodes AATags) {
if (V->getType() != VecTy) {
unsigned BeginIndex = getIndex(NewBeginOffset);
unsigned EndIndex = getIndex(NewEndOffset);
assert(EndIndex > BeginIndex && "Empty vector!");
unsigned NumElements = EndIndex - BeginIndex;
assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() &&
"Too many elements!");
Type *SliceTy = (NumElements == 1)
? ElementTy
: FixedVectorType::get(ElementTy, NumElements);
if (V->getType() != SliceTy)
V = convertValue(DL, IRB, V, SliceTy);
// Mix in the existing elements.
Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
NewAI.getAlign(), "load");
V = insertVector(IRB, Old, V, BeginIndex, "vec");
}
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign());
Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group});
if (AATags)
Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
Pass.DeadInsts.push_back(&SI);
LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
return true;
}
bool rewriteIntegerStore(Value *V, StoreInst &SI, AAMDNodes AATags) {
assert(IntTy && "We cannot extract an integer from the alloca");
assert(!SI.isVolatile());
if (DL.getTypeSizeInBits(V->getType()).getFixedSize() !=
IntTy->getBitWidth()) {
Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
NewAI.getAlign(), "oldload");
Old = convertValue(DL, IRB, Old, IntTy);
assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
V = insertInteger(DL, IRB, Old, SI.getValueOperand(), Offset, "insert");
}
V = convertValue(DL, IRB, V, NewAllocaTy);
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign());
Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group});
if (AATags)
Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
Pass.DeadInsts.push_back(&SI);
LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
return true;
}
bool visitStoreInst(StoreInst &SI) {
LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
Value *OldOp = SI.getOperand(1);
assert(OldOp == OldPtr);
AAMDNodes AATags = SI.getAAMetadata();
Value *V = SI.getValueOperand();
// Strip all inbounds GEPs and pointer casts to try to dig out any root
// alloca that should be re-examined after promoting this alloca.
if (V->getType()->isPointerTy())
if (AllocaInst *AI = dyn_cast<AllocaInst>(V->stripInBoundsOffsets()))
Pass.PostPromotionWorklist.insert(AI);
if (SliceSize < DL.getTypeStoreSize(V->getType()).getFixedSize()) {
assert(!SI.isVolatile());
assert(V->getType()->isIntegerTy() &&
"Only integer type loads and stores are split");
assert(DL.typeSizeEqualsStoreSize(V->getType()) &&
"Non-byte-multiple bit width");
IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), SliceSize * 8);
V = extractInteger(DL, IRB, V, NarrowTy, NewBeginOffset - BeginOffset,
"extract");
}
if (VecTy)
return rewriteVectorizedStoreInst(V, SI, OldOp, AATags);
if (IntTy && V->getType()->isIntegerTy())
return rewriteIntegerStore(V, SI, AATags);
const bool IsStorePastEnd =
DL.getTypeStoreSize(V->getType()).getFixedSize() > SliceSize;
StoreInst *NewSI;
if (NewBeginOffset == NewAllocaBeginOffset &&
NewEndOffset == NewAllocaEndOffset &&
(canConvertValue(DL, V->getType(), NewAllocaTy) ||
(IsStorePastEnd && NewAllocaTy->isIntegerTy() &&
V->getType()->isIntegerTy()))) {
// If this is an integer store past the end of slice (and thus the bytes
// past that point are irrelevant or this is unreachable), truncate the
// value prior to storing.
if (auto *VITy = dyn_cast<IntegerType>(V->getType()))
if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
if (VITy->getBitWidth() > AITy->getBitWidth()) {
if (DL.isBigEndian())
V = IRB.CreateLShr(V, VITy->getBitWidth() - AITy->getBitWidth(),
"endian_shift");
V = IRB.CreateTrunc(V, AITy, "load.trunc");
}
V = convertValue(DL, IRB, V, NewAllocaTy);
NewSI =
IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign(), SI.isVolatile());
} else {
unsigned AS = SI.getPointerAddressSpace();
Value *NewPtr = getNewAllocaSlicePtr(IRB, V->getType()->getPointerTo(AS));
NewSI =
IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(), SI.isVolatile());
}
NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group});
if (AATags)
NewSI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
if (SI.isVolatile())
NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
if (NewSI->isAtomic())
NewSI->setAlignment(SI.getAlign());
Pass.DeadInsts.push_back(&SI);
deleteIfTriviallyDead(OldOp);
LLVM_DEBUG(dbgs() << " to: " << *NewSI << "\n");
return NewSI->getPointerOperand() == &NewAI &&
NewSI->getValueOperand()->getType() == NewAllocaTy &&
!SI.isVolatile();
}
/// Compute an integer value from splatting an i8 across the given
/// number of bytes.
///
/// Note that this routine assumes an i8 is a byte. If that isn't true, don't
/// call this routine.
/// FIXME: Heed the advice above.
///
/// \param V The i8 value to splat.
/// \param Size The number of bytes in the output (assuming i8 is one byte)
Value *getIntegerSplat(Value *V, unsigned Size) {
assert(Size > 0 && "Expected a positive number of bytes.");
IntegerType *VTy = cast<IntegerType>(V->getType());
assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte");
if (Size == 1)
return V;
Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size * 8);
V = IRB.CreateMul(
IRB.CreateZExt(V, SplatIntTy, "zext"),
IRB.CreateUDiv(Constant::getAllOnesValue(SplatIntTy),
IRB.CreateZExt(Constant::getAllOnesValue(V->getType()),
SplatIntTy)),
"isplat");
return V;
}
/// Compute a vector splat for a given element value.
Value *getVectorSplat(Value *V, unsigned NumElements) {
V = IRB.CreateVectorSplat(NumElements, V, "vsplat");
LLVM_DEBUG(dbgs() << " splat: " << *V << "\n");
return V;
}
bool visitMemSetInst(MemSetInst &II) {
LLVM_DEBUG(dbgs() << " original: " << II << "\n");
assert(II.getRawDest() == OldPtr);
AAMDNodes AATags = II.getAAMetadata();
// If the memset has a variable size, it cannot be split, just adjust the
// pointer to the new alloca.
if (!isa<ConstantInt>(II.getLength())) {
assert(!IsSplit);
assert(NewBeginOffset == BeginOffset);
II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType()));
II.setDestAlignment(getSliceAlign());
deleteIfTriviallyDead(OldPtr);
return false;
}
// Record this instruction for deletion.
Pass.DeadInsts.push_back(&II);
Type *AllocaTy = NewAI.getAllocatedType();
Type *ScalarTy = AllocaTy->getScalarType();
const bool CanContinue = [&]() {
if (VecTy || IntTy)
return true;
if (BeginOffset > NewAllocaBeginOffset ||
EndOffset < NewAllocaEndOffset)
return false;
// Length must be in range for FixedVectorType.
auto *C = cast<ConstantInt>(II.getLength());
const uint64_t Len = C->getLimitedValue();
if (Len > std::numeric_limits<unsigned>::max())
return false;
auto *Int8Ty = IntegerType::getInt8Ty(NewAI.getContext());
auto *SrcTy = FixedVectorType::get(Int8Ty, Len);
return canConvertValue(DL, SrcTy, AllocaTy) &&
DL.isLegalInteger(DL.getTypeSizeInBits(ScalarTy).getFixedSize());
}();
// If this doesn't map cleanly onto the alloca type, and that type isn't
// a single value type, just emit a memset.
if (!CanContinue) {
Type *SizeTy = II.getLength()->getType();
Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
CallInst *New = IRB.CreateMemSet(
getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,
MaybeAlign(getSliceAlign()), II.isVolatile());
if (AATags)
New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
return false;
}
// If we can represent this as a simple value, we have to build the actual
// value to store, which requires expanding the byte present in memset to
// a sensible representation for the alloca type. This is essentially
// splatting the byte to a sufficiently wide integer, splatting it across
// any desired vector width, and bitcasting to the final type.
Value *V;
if (VecTy) {
// If this is a memset of a vectorized alloca, insert it.
assert(ElementTy == ScalarTy);
unsigned BeginIndex = getIndex(NewBeginOffset);
unsigned EndIndex = getIndex(NewEndOffset);
assert(EndIndex > BeginIndex && "Empty vector!");
unsigned NumElements = EndIndex - BeginIndex;
assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() &&
"Too many elements!");
Value *Splat = getIntegerSplat(
II.getValue(), DL.getTypeSizeInBits(ElementTy).getFixedSize() / 8);
Splat = convertValue(DL, IRB, Splat, ElementTy);
if (NumElements > 1)
Splat = getVectorSplat(Splat, NumElements);
Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
NewAI.getAlign(), "oldload");
V = insertVector(IRB, Old, Splat, BeginIndex, "vec");
} else if (IntTy) {
// If this is a memset on an alloca where we can widen stores, insert the
// set integer.
assert(!II.isVolatile());
uint64_t Size = NewEndOffset - NewBeginOffset;
V = getIntegerSplat(II.getValue(), Size);
if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
EndOffset != NewAllocaBeginOffset)) {
Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
NewAI.getAlign(), "oldload");
Old = convertValue(DL, IRB, Old, IntTy);
uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
V = insertInteger(DL, IRB, Old, V, Offset, "insert");
} else {
assert(V->getType() == IntTy &&
"Wrong type for an alloca wide integer!");
}
V = convertValue(DL, IRB, V, AllocaTy);
} else {
// Established these invariants above.
assert(NewBeginOffset == NewAllocaBeginOffset);
assert(NewEndOffset == NewAllocaEndOffset);
V = getIntegerSplat(II.getValue(),
DL.getTypeSizeInBits(ScalarTy).getFixedSize() / 8);
if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy))
V = getVectorSplat(
V, cast<FixedVectorType>(AllocaVecTy)->getNumElements());
V = convertValue(DL, IRB, V, AllocaTy);
}
StoreInst *New =
IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign(), II.isVolatile());
New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group});
if (AATags)
New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
return !II.isVolatile();
}
bool visitMemTransferInst(MemTransferInst &II) {
// Rewriting of memory transfer instructions can be a bit tricky. We break
// them into two categories: split intrinsics and unsplit intrinsics.
LLVM_DEBUG(dbgs() << " original: " << II << "\n");
AAMDNodes AATags = II.getAAMetadata();
bool IsDest = &II.getRawDestUse() == OldUse;
assert((IsDest && II.getRawDest() == OldPtr) ||
(!IsDest && II.getRawSource() == OldPtr));
Align SliceAlign = getSliceAlign();
// For unsplit intrinsics, we simply modify the source and destination
// pointers in place. This isn't just an optimization, it is a matter of
// correctness. With unsplit intrinsics we may be dealing with transfers
// within a single alloca before SROA ran, or with transfers that have
// a variable length. We may also be dealing with memmove instead of
// memcpy, and so simply updating the pointers is the necessary for us to
// update both source and dest of a single call.
if (!IsSplittable) {
Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
if (IsDest) {
II.setDest(AdjustedPtr);
II.setDestAlignment(SliceAlign);
}
else {
II.setSource(AdjustedPtr);
II.setSourceAlignment(SliceAlign);
}
LLVM_DEBUG(dbgs() << " to: " << II << "\n");
deleteIfTriviallyDead(OldPtr);
return false;
}
// For split transfer intrinsics we have an incredibly useful assurance:
// the source and destination do not reside within the same alloca, and at
// least one of them does not escape. This means that we can replace
// memmove with memcpy, and we don't need to worry about all manner of
// downsides to splitting and transforming the operations.
// If this doesn't map cleanly onto the alloca type, and that type isn't
// a single value type, just emit a memcpy.
bool EmitMemCpy =
!VecTy && !IntTy &&
(BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
SliceSize !=
DL.getTypeStoreSize(NewAI.getAllocatedType()).getFixedSize() ||
!NewAI.getAllocatedType()->isSingleValueType());
// If we're just going to emit a memcpy, the alloca hasn't changed, and the
// size hasn't been shrunk based on analysis of the viable range, this is
// a no-op.
if (EmitMemCpy && &OldAI == &NewAI) {
// Ensure the start lines up.
assert(NewBeginOffset == BeginOffset);
// Rewrite the size as needed.
if (NewEndOffset != EndOffset)
II.setLength(ConstantInt::get(II.getLength()->getType(),
NewEndOffset - NewBeginOffset));
return false;
}
// Record this instruction for deletion.
Pass.DeadInsts.push_back(&II);
// Strip all inbounds GEPs and pointer casts to try to dig out any root
// alloca that should be re-examined after rewriting this instruction.
Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();
if (AllocaInst *AI =
dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets())) {
assert(AI != &OldAI && AI != &NewAI &&
"Splittable transfers cannot reach the same alloca on both ends.");
Pass.Worklist.insert(AI);
}
Type *OtherPtrTy = OtherPtr->getType();
unsigned OtherAS = OtherPtrTy->getPointerAddressSpace();
// Compute the relative offset for the other pointer within the transfer.
unsigned OffsetWidth = DL.getIndexSizeInBits(OtherAS);
APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
Align OtherAlign =
(IsDest ? II.getSourceAlign() : II.getDestAlign()).valueOrOne();
OtherAlign =
commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());
if (EmitMemCpy) {
// Compute the other pointer, folding as much as possible to produce
// a single, simple GEP in most cases.
OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
OtherPtr->getName() + ".");
Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
Type *SizeTy = II.getLength()->getType();
Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
Value *DestPtr, *SrcPtr;
MaybeAlign DestAlign, SrcAlign;
// Note: IsDest is true iff we're copying into the new alloca slice
if (IsDest) {
DestPtr = OurPtr;
DestAlign = SliceAlign;
SrcPtr = OtherPtr;
SrcAlign = OtherAlign;
} else {
DestPtr = OtherPtr;
DestAlign = OtherAlign;
SrcPtr = OurPtr;
SrcAlign = SliceAlign;
}
CallInst *New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
Size, II.isVolatile());
if (AATags)
New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
return false;
}
bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
NewEndOffset == NewAllocaEndOffset;
uint64_t Size = NewEndOffset - NewBeginOffset;
unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
unsigned NumElements = EndIndex - BeginIndex;
IntegerType *SubIntTy =
IntTy ? Type::getIntNTy(IntTy->getContext(), Size * 8) : nullptr;
// Reset the other pointer type to match the register type we're going to
// use, but using the address space of the original other pointer.
Type *OtherTy;
if (VecTy && !IsWholeAlloca) {
if (NumElements == 1)
OtherTy = VecTy->getElementType();
else
OtherTy = FixedVectorType::get(VecTy->getElementType(), NumElements);
} else if (IntTy && !IsWholeAlloca) {
OtherTy = SubIntTy;
} else {
OtherTy = NewAllocaTy;
}
OtherPtrTy = OtherTy->getPointerTo(OtherAS);
Value *SrcPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
OtherPtr->getName() + ".");
MaybeAlign SrcAlign = OtherAlign;
Value *DstPtr = &NewAI;
MaybeAlign DstAlign = SliceAlign;
if (!IsDest) {
std::swap(SrcPtr, DstPtr);
std::swap(SrcAlign, DstAlign);
}
Value *Src;
if (VecTy && !IsWholeAlloca && !IsDest) {
Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
NewAI.getAlign(), "load");
Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec");
} else if (IntTy && !IsWholeAlloca && !IsDest) {
Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
NewAI.getAlign(), "load");
Src = convertValue(DL, IRB, Src, IntTy);
uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
Src = extractInteger(DL, IRB, Src, SubIntTy, Offset, "extract");
} else {
LoadInst *Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
II.isVolatile(), "copyload");
Load->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group});
if (AATags)
Load->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
Src = Load;
}
if (VecTy && !IsWholeAlloca && IsDest) {
Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
NewAI.getAlign(), "oldload");
Src = insertVector(IRB, Old, Src, BeginIndex, "vec");
} else if (IntTy && !IsWholeAlloca && IsDest) {
Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
NewAI.getAlign(), "oldload");
Old = convertValue(DL, IRB, Old, IntTy);
uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
Src = insertInteger(DL, IRB, Old, Src, Offset, "insert");
Src = convertValue(DL, IRB, Src, NewAllocaTy);
}
StoreInst *Store = cast<StoreInst>(
IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile()));
Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group});
if (AATags)
Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
return !II.isVolatile();
}
bool visitIntrinsicInst(IntrinsicInst &II) {
assert((II.isLifetimeStartOrEnd() || II.isDroppable()) &&
"Unexpected intrinsic!");
LLVM_DEBUG(dbgs() << " original: " << II << "\n");
// Record this instruction for deletion.
Pass.DeadInsts.push_back(&II);
if (II.isDroppable()) {
assert(II.getIntrinsicID() == Intrinsic::assume && "Expected assume");
// TODO For now we forget assumed information, this can be improved.
OldPtr->dropDroppableUsesIn(II);
return true;
}
assert(II.getArgOperand(1) == OldPtr);
// Lifetime intrinsics are only promotable if they cover the whole alloca.
// Therefore, we drop lifetime intrinsics which don't cover the whole
// alloca.
// (In theory, intrinsics which partially cover an alloca could be
// promoted, but PromoteMemToReg doesn't handle that case.)
// FIXME: Check whether the alloca is promotable before dropping the
// lifetime intrinsics?
if (NewBeginOffset != NewAllocaBeginOffset ||
NewEndOffset != NewAllocaEndOffset)
return true;
ConstantInt *Size =
ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()),
NewEndOffset - NewBeginOffset);
// Lifetime intrinsics always expect an i8* so directly get such a pointer
// for the new alloca slice.
Type *PointerTy = IRB.getInt8PtrTy(OldPtr->getType()->getPointerAddressSpace());
Value *Ptr = getNewAllocaSlicePtr(IRB, PointerTy);
Value *New;
if (II.getIntrinsicID() == Intrinsic::lifetime_start)
New = IRB.CreateLifetimeStart(Ptr, Size);
else
New = IRB.CreateLifetimeEnd(Ptr, Size);
(void)New;
LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
return true;
}
void fixLoadStoreAlign(Instruction &Root) {
// This algorithm implements the same visitor loop as
// hasUnsafePHIOrSelectUse, and fixes the alignment of each load
// or store found.
SmallPtrSet<Instruction *, 4> Visited;
SmallVector<Instruction *, 4> Uses;
Visited.insert(&Root);
Uses.push_back(&Root);
do {
Instruction *I = Uses.pop_back_val();
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
LI->setAlignment(std::min(LI->getAlign(), getSliceAlign()));
continue;
}
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
SI->setAlignment(std::min(SI->getAlign(), getSliceAlign()));
continue;
}
assert(isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I) ||
isa<PHINode>(I) || isa<SelectInst>(I) ||
isa<GetElementPtrInst>(I));
for (User *U : I->users())
if (Visited.insert(cast<Instruction>(U)).second)
Uses.push_back(cast<Instruction>(U));
} while (!Uses.empty());
}
bool visitPHINode(PHINode &PN) {
LLVM_DEBUG(dbgs() << " original: " << PN << "\n");
assert(BeginOffset >= NewAllocaBeginOffset && "PHIs are unsplittable");
assert(EndOffset <= NewAllocaEndOffset && "PHIs are unsplittable");
// We would like to compute a new pointer in only one place, but have it be
// as local as possible to the PHI. To do that, we re-use the location of
// the old pointer, which necessarily must be in the right position to
// dominate the PHI.
IRBuilderBase::InsertPointGuard Guard(IRB);
if (isa<PHINode>(OldPtr))
IRB.SetInsertPoint(&*OldPtr->getParent()->getFirstInsertionPt());
else
IRB.SetInsertPoint(OldPtr);
IRB.SetCurrentDebugLocation(OldPtr->getDebugLoc());
Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
// Replace the operands which were using the old pointer.
std::replace(PN.op_begin(), PN.op_end(), cast<Value>(OldPtr), NewPtr);
LLVM_DEBUG(dbgs() << " to: " << PN << "\n");
deleteIfTriviallyDead(OldPtr);
// Fix the alignment of any loads or stores using this PHI node.
fixLoadStoreAlign(PN);
// PHIs can't be promoted on their own, but often can be speculated. We
// check the speculation outside of the rewriter so that we see the
// fully-rewritten alloca.
PHIUsers.insert(&PN);
return true;
}
bool visitSelectInst(SelectInst &SI) {
LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
assert((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) &&
"Pointer isn't an operand!");
assert(BeginOffset >= NewAllocaBeginOffset && "Selects are unsplittable");
assert(EndOffset <= NewAllocaEndOffset && "Selects are unsplittable");
Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
// Replace the operands which were using the old pointer.
if (SI.getOperand(1) == OldPtr)
SI.setOperand(1, NewPtr);
if (SI.getOperand(2) == OldPtr)
SI.setOperand(2, NewPtr);
LLVM_DEBUG(dbgs() << " to: " << SI << "\n");
deleteIfTriviallyDead(OldPtr);
// Fix the alignment of any loads or stores using this select.
fixLoadStoreAlign(SI);
// Selects can't be promoted on their own, but often can be speculated. We
// check the speculation outside of the rewriter so that we see the
// fully-rewritten alloca.
SelectUsers.insert(&SI);
return true;
}
};
namespace {
/// Visitor to rewrite aggregate loads and stores as scalar.
///
/// This pass aggressively rewrites all aggregate loads and stores on
/// a particular pointer (or any pointer derived from it which we can identify)
/// with scalar loads and stores.
class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
// Befriend the base class so it can delegate to private visit methods.
friend class InstVisitor<AggLoadStoreRewriter, bool>;
/// Queue of pointer uses to analyze and potentially rewrite.
SmallVector<Use *, 8> Queue;
/// Set to prevent us from cycling with phi nodes and loops.
SmallPtrSet<User *, 8> Visited;
/// The current pointer use being rewritten. This is used to dig up the used
/// value (as opposed to the user).
Use *U = nullptr;
/// Used to calculate offsets, and hence alignment, of subobjects.
const DataLayout &DL;
IRBuilderTy &IRB;
public:
AggLoadStoreRewriter(const DataLayout &DL, IRBuilderTy &IRB)
: DL(DL), IRB(IRB) {}
/// Rewrite loads and stores through a pointer and all pointers derived from
/// it.
bool rewrite(Instruction &I) {
LLVM_DEBUG(dbgs() << " Rewriting FCA loads and stores...\n");
enqueueUsers(I);
bool Changed = false;
while (!Queue.empty()) {
U = Queue.pop_back_val();
Changed |= visit(cast<Instruction>(U->getUser()));
}
return Changed;
}
private:
/// Enqueue all the users of the given instruction for further processing.
/// This uses a set to de-duplicate users.
void enqueueUsers(Instruction &I) {
for (Use &U : I.uses())
if (Visited.insert(U.getUser()).second)
Queue.push_back(&U);
}
// Conservative default is to not rewrite anything.
bool visitInstruction(Instruction &I) { return false; }
/// Generic recursive split emission class.
template <typename Derived> class OpSplitter {
protected:
/// The builder used to form new instructions.
IRBuilderTy &IRB;
/// The indices which to be used with insert- or extractvalue to select the
/// appropriate value within the aggregate.
SmallVector<unsigned, 4> Indices;
/// The indices to a GEP instruction which will move Ptr to the correct slot
/// within the aggregate.
SmallVector<Value *, 4> GEPIndices;
/// The base pointer of the original op, used as a base for GEPing the
/// split operations.
Value *Ptr;
/// The base pointee type being GEPed into.
Type *BaseTy;
/// Known alignment of the base pointer.
Align BaseAlign;
/// To calculate offset of each component so we can correctly deduce
/// alignments.
const DataLayout &DL;
/// Initialize the splitter with an insertion point, Ptr and start with a
/// single zero GEP index.
OpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
Align BaseAlign, const DataLayout &DL, IRBuilderTy &IRB)
: IRB(IRB), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr), BaseTy(BaseTy),
BaseAlign(BaseAlign), DL(DL) {
IRB.SetInsertPoint(InsertionPoint);
}
public:
/// Generic recursive split emission routine.
///
/// This method recursively splits an aggregate op (load or store) into
/// scalar or vector ops. It splits recursively until it hits a single value
/// and emits that single value operation via the template argument.
///
/// The logic of this routine relies on GEPs and insertvalue and
/// extractvalue all operating with the same fundamental index list, merely
/// formatted differently (GEPs need actual values).
///
/// \param Ty The type being split recursively into smaller ops.
/// \param Agg The aggregate value being built up or stored, depending on
/// whether this is splitting a load or a store respectively.
void emitSplitOps(Type *Ty, Value *&Agg, const Twine &Name) {
if (Ty->isSingleValueType()) {
unsigned Offset = DL.getIndexedOffsetInType(BaseTy, GEPIndices);
return static_cast<Derived *>(this)->emitFunc(
Ty, Agg, commonAlignment(BaseAlign, Offset), Name);
}
if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
unsigned OldSize = Indices.size();
(void)OldSize;
for (unsigned Idx = 0, Size = ATy->getNumElements(); Idx != Size;
++Idx) {
assert(Indices.size() == OldSize && "Did not return to the old size");
Indices.push_back(Idx);
GEPIndices.push_back(IRB.getInt32(Idx));
emitSplitOps(ATy->getElementType(), Agg, Name + "." + Twine(Idx));
GEPIndices.pop_back();
Indices.pop_back();
}
return;
}
if (StructType *STy = dyn_cast<StructType>(Ty)) {
unsigned OldSize = Indices.size();
(void)OldSize;
for (unsigned Idx = 0, Size = STy->getNumElements(); Idx != Size;
++Idx) {
assert(Indices.size() == OldSize && "Did not return to the old size");
Indices.push_back(Idx);
GEPIndices.push_back(IRB.getInt32(Idx));
emitSplitOps(STy->getElementType(Idx), Agg, Name + "." + Twine(Idx));
GEPIndices.pop_back();
Indices.pop_back();
}
return;
}
llvm_unreachable("Only arrays and structs are aggregate loadable types");
}
};
struct LoadOpSplitter : public OpSplitter<LoadOpSplitter> {
AAMDNodes AATags;
LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
AAMDNodes AATags, Align BaseAlign, const DataLayout &DL,
IRBuilderTy &IRB)
: OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign, DL,
IRB),
AATags(AATags) {}
/// Emit a leaf load of a single value. This is called at the leaves of the
/// recursive emission to actually load values.
void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) {
assert(Ty->isSingleValueType());
// Load the single value and insert it using the indices.
Value *GEP =
IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
LoadInst *Load =
IRB.CreateAlignedLoad(Ty, GEP, Alignment, Name + ".load");
APInt Offset(
DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
if (AATags &&
GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset))
Load->setAAMetadata(AATags.shift(Offset.getZExtValue()));
Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
LLVM_DEBUG(dbgs() << " to: " << *Load << "\n");
}
};
bool visitLoadInst(LoadInst &LI) {
assert(LI.getPointerOperand() == *U);
if (!LI.isSimple() || LI.getType()->isSingleValueType())
return false;
// We have an aggregate being loaded, split it apart.
LLVM_DEBUG(dbgs() << " original: " << LI << "\n");
LoadOpSplitter Splitter(&LI, *U, LI.getType(), LI.getAAMetadata(),
getAdjustedAlignment(&LI, 0), DL, IRB);
Value *V = PoisonValue::get(LI.getType());
Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca");
Visited.erase(&LI);
LI.replaceAllUsesWith(V);
LI.eraseFromParent();
return true;
}
struct StoreOpSplitter : public OpSplitter<StoreOpSplitter> {
StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
AAMDNodes AATags, Align BaseAlign, const DataLayout &DL,
IRBuilderTy &IRB)
: OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
DL, IRB),
AATags(AATags) {}
AAMDNodes AATags;
/// Emit a leaf store of a single value. This is called at the leaves of the
/// recursive emission to actually produce stores.
void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) {
assert(Ty->isSingleValueType());
// Extract the single value and store it using the indices.
//
// The gep and extractvalue values are factored out of the CreateStore
// call to make the output independent of the argument evaluation order.
Value *ExtractValue =
IRB.CreateExtractValue(Agg, Indices, Name + ".extract");
Value *InBoundsGEP =
IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
StoreInst *Store =
IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);
APInt Offset(
DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
if (AATags &&
GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset))
Store->setAAMetadata(AATags.shift(Offset.getZExtValue()));
LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
}
};
bool visitStoreInst(StoreInst &SI) {
if (!SI.isSimple() || SI.getPointerOperand() != *U)
return false;
Value *V = SI.getValueOperand();
if (V->getType()->isSingleValueType())
return false;
// We have an aggregate being stored, split it apart.
LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
StoreOpSplitter Splitter(&SI, *U, V->getType(), SI.getAAMetadata(),
getAdjustedAlignment(&SI, 0), DL, IRB);
Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca");
Visited.erase(&SI);
SI.eraseFromParent();
return true;
}
bool visitBitCastInst(BitCastInst &BC) {
enqueueUsers(BC);
return false;
}
bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
enqueueUsers(ASC);
return false;
}
// Fold gep (select cond, ptr1, ptr2) => select cond, gep(ptr1), gep(ptr2)
bool foldGEPSelect(GetElementPtrInst &GEPI) {
if (!GEPI.hasAllConstantIndices())
return false;
SelectInst *Sel = cast<SelectInst>(GEPI.getPointerOperand());
LLVM_DEBUG(dbgs() << " Rewriting gep(select) -> select(gep):"
<< "\n original: " << *Sel
<< "\n " << GEPI);
IRB.SetInsertPoint(&GEPI);
SmallVector<Value *, 4> Index(GEPI.indices());
bool IsInBounds = GEPI.isInBounds();
Type *Ty = GEPI.getSourceElementType();
Value *True = Sel->getTrueValue();
Value *NTrue = IRB.CreateGEP(Ty, True, Index, True->getName() + ".sroa.gep",
IsInBounds);
Value *False = Sel->getFalseValue();
Value *NFalse = IRB.CreateGEP(Ty, False, Index,
False->getName() + ".sroa.gep", IsInBounds);
Value *NSel = IRB.CreateSelect(Sel->getCondition(), NTrue, NFalse,
Sel->getName() + ".sroa.sel");
Visited.erase(&GEPI);
GEPI.replaceAllUsesWith(NSel);
GEPI.eraseFromParent();
Instruction *NSelI = cast<Instruction>(NSel);
Visited.insert(NSelI);
enqueueUsers(*NSelI);
LLVM_DEBUG(dbgs() << "\n to: " << *NTrue
<< "\n " << *NFalse
<< "\n " << *NSel << '\n');
return true;
}
// Fold gep (phi ptr1, ptr2) => phi gep(ptr1), gep(ptr2)
bool foldGEPPhi(GetElementPtrInst &GEPI) {
if (!GEPI.hasAllConstantIndices())
return false;
PHINode *PHI = cast<PHINode>(GEPI.getPointerOperand());
if (GEPI.getParent() != PHI->getParent() ||
llvm::any_of(PHI->incoming_values(), [](Value *In)
{ Instruction *I = dyn_cast<Instruction>(In);
return !I || isa<GetElementPtrInst>(I) || isa<PHINode>(I) ||
succ_empty(I->getParent()) ||
!I->getParent()->isLegalToHoistInto();
}))
return false;
LLVM_DEBUG(dbgs() << " Rewriting gep(phi) -> phi(gep):"
<< "\n original: " << *PHI
<< "\n " << GEPI
<< "\n to: ");
SmallVector<Value *, 4> Index(GEPI.indices());
bool IsInBounds = GEPI.isInBounds();
IRB.SetInsertPoint(GEPI.getParent()->getFirstNonPHI());
PHINode *NewPN = IRB.CreatePHI(GEPI.getType(), PHI->getNumIncomingValues(),
PHI->getName() + ".sroa.phi");
for (unsigned I = 0, E = PHI->getNumIncomingValues(); I != E; ++I) {
BasicBlock *B = PHI->getIncomingBlock(I);
Value *NewVal = nullptr;
int Idx = NewPN->getBasicBlockIndex(B);
if (Idx >= 0) {
NewVal = NewPN->getIncomingValue(Idx);
} else {
Instruction *In = cast<Instruction>(PHI->getIncomingValue(I));
IRB.SetInsertPoint(In->getParent(), std::next(In->getIterator()));
Type *Ty = GEPI.getSourceElementType();
NewVal = IRB.CreateGEP(Ty, In, Index, In->getName() + ".sroa.gep",
IsInBounds);
}
NewPN->addIncoming(NewVal, B);
}
Visited.erase(&GEPI);
GEPI.replaceAllUsesWith(NewPN);
GEPI.eraseFromParent();
Visited.insert(NewPN);
enqueueUsers(*NewPN);
LLVM_DEBUG(for (Value *In : NewPN->incoming_values())
dbgs() << "\n " << *In;
dbgs() << "\n " << *NewPN << '\n');
return true;
}
bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
if (isa<SelectInst>(GEPI.getPointerOperand()) &&
foldGEPSelect(GEPI))
return true;
if (isa<PHINode>(GEPI.getPointerOperand()) &&
foldGEPPhi(GEPI))
return true;
enqueueUsers(GEPI);
return false;
}
bool visitPHINode(PHINode &PN) {
enqueueUsers(PN);
return false;
}
bool visitSelectInst(SelectInst &SI) {
enqueueUsers(SI);
return false;
}
};
} // end anonymous namespace
/// Strip aggregate type wrapping.
///
/// This removes no-op aggregate types wrapping an underlying type. It will
/// strip as many layers of types as it can without changing either the type
/// size or the allocated size.
static Type *stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty) {
if (Ty->isSingleValueType())
return Ty;
uint64_t AllocSize = DL.getTypeAllocSize(Ty).getFixedSize();
uint64_t TypeSize = DL.getTypeSizeInBits(Ty).getFixedSize();
Type *InnerTy;
if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
InnerTy = ArrTy->getElementType();
} else if (StructType *STy = dyn_cast<StructType>(Ty)) {
const StructLayout *SL = DL.getStructLayout(STy);
unsigned Index = SL->getElementContainingOffset(0);
InnerTy = STy->getElementType(Index);
} else {
return Ty;
}
if (AllocSize > DL.getTypeAllocSize(InnerTy).getFixedSize() ||
TypeSize > DL.getTypeSizeInBits(InnerTy).getFixedSize())
return Ty;
return stripAggregateTypeWrapping(DL, InnerTy);
}
/// Try to find a partition of the aggregate type passed in for a given
/// offset and size.
///
/// This recurses through the aggregate type and tries to compute a subtype
/// based on the offset and size. When the offset and size span a sub-section
/// of an array, it will even compute a new array type for that sub-section,
/// and the same for structs.
///
/// Note that this routine is very strict and tries to find a partition of the
/// type which produces the *exact* right offset and size. It is not forgiving
/// when the size or offset cause either end of type-based partition to be off.
/// Also, this is a best-effort routine. It is reasonable to give up and not
/// return a type if necessary.
static Type *getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset,
uint64_t Size) {
if (Offset == 0 && DL.getTypeAllocSize(Ty).getFixedSize() == Size)
return stripAggregateTypeWrapping(DL, Ty);
if (Offset > DL.getTypeAllocSize(Ty).getFixedSize() ||
(DL.getTypeAllocSize(Ty).getFixedSize() - Offset) < Size)
return nullptr;
if (isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
Type *ElementTy;
uint64_t TyNumElements;
if (auto *AT = dyn_cast<ArrayType>(Ty)) {
ElementTy = AT->getElementType();
TyNumElements = AT->getNumElements();
} else {
// FIXME: This isn't right for vectors with non-byte-sized or
// non-power-of-two sized elements.
auto *VT = cast<FixedVectorType>(Ty);
ElementTy = VT->getElementType();
TyNumElements = VT->getNumElements();
}
uint64_t ElementSize = DL.getTypeAllocSize(ElementTy).getFixedSize();
uint64_t NumSkippedElements = Offset / ElementSize;
if (NumSkippedElements >= TyNumElements)
return nullptr;
Offset -= NumSkippedElements * ElementSize;
// First check if we need to recurse.
if (Offset > 0 || Size < ElementSize) {
// Bail if the partition ends in a different array element.
if ((Offset + Size) > ElementSize)
return nullptr;
// Recurse through the element type trying to peel off offset bytes.
return getTypePartition(DL, ElementTy, Offset, Size);
}
assert(Offset == 0);
if (Size == ElementSize)
return stripAggregateTypeWrapping(DL, ElementTy);
assert(Size > ElementSize);
uint64_t NumElements = Size / ElementSize;
if (NumElements * ElementSize != Size)
return nullptr;
return ArrayType::get(ElementTy, NumElements);
}
StructType *STy = dyn_cast<StructType>(Ty);
if (!STy)
return nullptr;
const StructLayout *SL = DL.getStructLayout(STy);
if (Offset >= SL->getSizeInBytes())
return nullptr;
uint64_t EndOffset = Offset + Size;
if (EndOffset > SL->getSizeInBytes())
return nullptr;
unsigned Index = SL->getElementContainingOffset(Offset);
Offset -= SL->getElementOffset(Index);
Type *ElementTy = STy->getElementType(Index);
uint64_t ElementSize = DL.getTypeAllocSize(ElementTy).getFixedSize();
if (Offset >= ElementSize)
return nullptr; // The offset points into alignment padding.
// See if any partition must be contained by the element.
if (Offset > 0 || Size < ElementSize) {
if ((Offset + Size) > ElementSize)
return nullptr;
return getTypePartition(DL, ElementTy, Offset, Size);
}
assert(Offset == 0);
if (Size == ElementSize)
return stripAggregateTypeWrapping(DL, ElementTy);
StructType::element_iterator EI = STy->element_begin() + Index,
EE = STy->element_end();
if (EndOffset < SL->getSizeInBytes()) {
unsigned EndIndex = SL->getElementContainingOffset(EndOffset);
if (Index == EndIndex)
return nullptr; // Within a single element and its padding.
// Don't try to form "natural" types if the elements don't line up with the
// expected size.
// FIXME: We could potentially recurse down through the last element in the
// sub-struct to find a natural end point.
if (SL->getElementOffset(EndIndex) != EndOffset)
return nullptr;
assert(Index < EndIndex);
EE = STy->element_begin() + EndIndex;
}
// Try to build up a sub-structure.
StructType *SubTy =
StructType::get(STy->getContext(), makeArrayRef(EI, EE), STy->isPacked());
const StructLayout *SubSL = DL.getStructLayout(SubTy);
if (Size != SubSL->getSizeInBytes())
return nullptr; // The sub-struct doesn't have quite the size needed.
return SubTy;
}
/// Pre-split loads and stores to simplify rewriting.
///
/// We want to break up the splittable load+store pairs as much as
/// possible. This is important to do as a preprocessing step, as once we
/// start rewriting the accesses to partitions of the alloca we lose the
/// necessary information to correctly split apart paired loads and stores
/// which both point into this alloca. The case to consider is something like
/// the following:
///
/// %a = alloca [12 x i8]
/// %gep1 = getelementptr [12 x i8]* %a, i32 0, i32 0
/// %gep2 = getelementptr [12 x i8]* %a, i32 0, i32 4
/// %gep3 = getelementptr [12 x i8]* %a, i32 0, i32 8
/// %iptr1 = bitcast i8* %gep1 to i64*
/// %iptr2 = bitcast i8* %gep2 to i64*
/// %fptr1 = bitcast i8* %gep1 to float*
/// %fptr2 = bitcast i8* %gep2 to float*
/// %fptr3 = bitcast i8* %gep3 to float*
/// store float 0.0, float* %fptr1
/// store float 1.0, float* %fptr2
/// %v = load i64* %iptr1
/// store i64 %v, i64* %iptr2
/// %f1 = load float* %fptr2
/// %f2 = load float* %fptr3
///
/// Here we want to form 3 partitions of the alloca, each 4 bytes large, and
/// promote everything so we recover the 2 SSA values that should have been
/// there all along.
///
/// \returns true if any changes are made.
bool SROAPass::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
LLVM_DEBUG(dbgs() << "Pre-splitting loads and stores\n");
// Track the loads and stores which are candidates for pre-splitting here, in
// the order they first appear during the partition scan. These give stable
// iteration order and a basis for tracking which loads and stores we
// actually split.
SmallVector<LoadInst *, 4> Loads;
SmallVector<StoreInst *, 4> Stores;
// We need to accumulate the splits required of each load or store where we
// can find them via a direct lookup. This is important to cross-check loads
// and stores against each other. We also track the slice so that we can kill
// all the slices that end up split.
struct SplitOffsets {
Slice *S;
std::vector<uint64_t> Splits;
};
SmallDenseMap<Instruction *, SplitOffsets, 8> SplitOffsetsMap;
// Track loads out of this alloca which cannot, for any reason, be pre-split.
// This is important as we also cannot pre-split stores of those loads!
// FIXME: This is all pretty gross. It means that we can be more aggressive
// in pre-splitting when the load feeding the store happens to come from
// a separate alloca. Put another way, the effectiveness of SROA would be
// decreased by a frontend which just concatenated all of its local allocas
// into one big flat alloca. But defeating such patterns is exactly the job
// SROA is tasked with! Sadly, to not have this discrepancy we would have
// change store pre-splitting to actually force pre-splitting of the load
// that feeds it *and all stores*. That makes pre-splitting much harder, but
// maybe it would make it more principled?
SmallPtrSet<LoadInst *, 8> UnsplittableLoads;
LLVM_DEBUG(dbgs() << " Searching for candidate loads and stores\n");
for (auto &P : AS.partitions()) {
for (Slice &S : P) {
Instruction *I = cast<Instruction>(S.getUse()->getUser());
if (!S.isSplittable() || S.endOffset() <= P.endOffset()) {
// If this is a load we have to track that it can't participate in any
// pre-splitting. If this is a store of a load we have to track that
// that load also can't participate in any pre-splitting.
if (auto *LI = dyn_cast<LoadInst>(I))
UnsplittableLoads.insert(LI);
else if (auto *SI = dyn_cast<StoreInst>(I))
if (auto *LI = dyn_cast<LoadInst>(SI->getValueOperand()))
UnsplittableLoads.insert(LI);
continue;
}
assert(P.endOffset() > S.beginOffset() &&
"Empty or backwards partition!");
// Determine if this is a pre-splittable slice.
if (auto *LI = dyn_cast<LoadInst>(I)) {
assert(!LI->isVolatile() && "Cannot split volatile loads!");
// The load must be used exclusively to store into other pointers for
// us to be able to arbitrarily pre-split it. The stores must also be
// simple to avoid changing semantics.
auto IsLoadSimplyStored = [](LoadInst *LI) {
for (User *LU : LI->users()) {
auto *SI = dyn_cast<StoreInst>(LU);
if (!SI || !SI->isSimple())
return false;
}
return true;
};
if (!IsLoadSimplyStored(LI)) {
UnsplittableLoads.insert(LI);
continue;
}
Loads.push_back(LI);
} else if (auto *SI = dyn_cast<StoreInst>(I)) {
if (S.getUse() != &SI->getOperandUse(SI->getPointerOperandIndex()))
// Skip stores *of* pointers. FIXME: This shouldn't even be possible!
continue;
auto *StoredLoad = dyn_cast<LoadInst>(SI->getValueOperand());
if (!StoredLoad || !StoredLoad->isSimple())
continue;
assert(!SI->isVolatile() && "Cannot split volatile stores!");
Stores.push_back(SI);
} else {
// Other uses cannot be pre-split.
continue;
}
// Record the initial split.
LLVM_DEBUG(dbgs() << " Candidate: " << *I << "\n");
auto &Offsets = SplitOffsetsMap[I];
assert(Offsets.Splits.empty() &&
"Should not have splits the first time we see an instruction!");
Offsets.S = &S;
Offsets.Splits.push_back(P.endOffset() - S.beginOffset());
}
// Now scan the already split slices, and add a split for any of them which
// we're going to pre-split.
for (Slice *S : P.splitSliceTails()) {
auto SplitOffsetsMapI =
SplitOffsetsMap.find(cast<Instruction>(S->getUse()->getUser()));
if (SplitOffsetsMapI == SplitOffsetsMap.end())
continue;
auto &Offsets = SplitOffsetsMapI->second;
assert(Offsets.S == S && "Found a mismatched slice!");
assert(!Offsets.Splits.empty() &&
"Cannot have an empty set of splits on the second partition!");
assert(Offsets.Splits.back() ==
P.beginOffset() - Offsets.S->beginOffset() &&
"Previous split does not end where this one begins!");
// Record each split. The last partition's end isn't needed as the size
// of the slice dictates that.
if (S->endOffset() > P.endOffset())
Offsets.Splits.push_back(P.endOffset() - Offsets.S->beginOffset());
}
}
// We may have split loads where some of their stores are split stores. For
// such loads and stores, we can only pre-split them if their splits exactly
// match relative to their starting offset. We have to verify this prior to
// any rewriting.
llvm::erase_if(Stores, [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
// Lookup the load we are storing in our map of split
// offsets.
auto *LI = cast<LoadInst>(SI->getValueOperand());
// If it was completely unsplittable, then we're done,
// and this store can't be pre-split.
if (UnsplittableLoads.count(LI))
return true;
auto LoadOffsetsI = SplitOffsetsMap.find(LI);
if (LoadOffsetsI == SplitOffsetsMap.end())
return false; // Unrelated loads are definitely safe.
auto &LoadOffsets = LoadOffsetsI->second;
// Now lookup the store's offsets.
auto &StoreOffsets = SplitOffsetsMap[SI];
// If the relative offsets of each split in the load and
// store match exactly, then we can split them and we
// don't need to remove them here.
if (LoadOffsets.Splits == StoreOffsets.Splits)
return false;
LLVM_DEBUG(dbgs() << " Mismatched splits for load and store:\n"
<< " " << *LI << "\n"
<< " " << *SI << "\n");
// We've found a store and load that we need to split
// with mismatched relative splits. Just give up on them
// and remove both instructions from our list of
// candidates.
UnsplittableLoads.insert(LI);
return true;
});
// Now we have to go *back* through all the stores, because a later store may
// have caused an earlier store's load to become unsplittable and if it is
// unsplittable for the later store, then we can't rely on it being split in
// the earlier store either.
llvm::erase_if(Stores, [&UnsplittableLoads](StoreInst *SI) {
auto *LI = cast<LoadInst>(SI->getValueOperand());
return UnsplittableLoads.count(LI);
});
// Once we've established all the loads that can't be split for some reason,
// filter any that made it into our list out.
llvm::erase_if(Loads, [&UnsplittableLoads](LoadInst *LI) {
return UnsplittableLoads.count(LI);
});
// If no loads or stores are left, there is no pre-splitting to be done for
// this alloca.
if (Loads.empty() && Stores.empty())
return false;
// From here on, we can't fail and will be building new accesses, so rig up
// an IR builder.
IRBuilderTy IRB(&AI);
// Collect the new slices which we will merge into the alloca slices.
SmallVector<Slice, 4> NewSlices;
// Track any allocas we end up splitting loads and stores for so we iterate
// on them.
SmallPtrSet<AllocaInst *, 4> ResplitPromotableAllocas;
// At this point, we have collected all of the loads and stores we can
// pre-split, and the specific splits needed for them. We actually do the
// splitting in a specific order in order to handle when one of the loads in
// the value operand to one of the stores.
//
// First, we rewrite all of the split loads, and just accumulate each split
// load in a parallel structure. We also build the slices for them and append
// them to the alloca slices.
SmallDenseMap<LoadInst *, std::vector<LoadInst *>, 1> SplitLoadsMap;
std::vector<LoadInst *> SplitLoads;
const DataLayout &DL = AI.getModule()->getDataLayout();
for (LoadInst *LI : Loads) {
SplitLoads.clear();
auto &Offsets = SplitOffsetsMap[LI];
unsigned SliceSize = Offsets.S->endOffset() - Offsets.S->beginOffset();
assert(LI->getType()->getIntegerBitWidth() % 8 == 0 &&
"Load must have type size equal to store size");
assert(LI->getType()->getIntegerBitWidth() / 8 >= SliceSize &&
"Load must be >= slice size");
uint64_t BaseOffset = Offsets.S->beginOffset();
assert(BaseOffset + SliceSize > BaseOffset &&
"Cannot represent alloca access size using 64-bit integers!");
Instruction *BasePtr = cast<Instruction>(LI->getPointerOperand());
IRB.SetInsertPoint(LI);
LLVM_DEBUG(dbgs() << " Splitting load: " << *LI << "\n");
uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();
int Idx = 0, Size = Offsets.Splits.size();
for (;;) {
auto *PartTy = Type::getIntNTy(LI->getContext(), PartSize * 8);
auto AS = LI->getPointerAddressSpace();
auto *PartPtrTy = PartTy->getPointerTo(AS);
LoadInst *PLoad = IRB.CreateAlignedLoad(
PartTy,
getAdjustedPtr(IRB, DL, BasePtr,
APInt(DL.getIndexSizeInBits(AS), PartOffset),
PartPtrTy, BasePtr->getName() + "."),
getAdjustedAlignment(LI, PartOffset),
/*IsVolatile*/ false, LI->getName());
PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group});
// Append this load onto the list of split loads so we can find it later
// to rewrite the stores.
SplitLoads.push_back(PLoad);
// Now build a new slice for the alloca.
NewSlices.push_back(
Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
&PLoad->getOperandUse(PLoad->getPointerOperandIndex()),
/*IsSplittable*/ false));
LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
<< ", " << NewSlices.back().endOffset()
<< "): " << *PLoad << "\n");
// See if we've handled all the splits.
if (Idx >= Size)
break;
// Setup the next partition.
PartOffset = Offsets.Splits[Idx];
++Idx;
PartSize = (Idx < Size ? Offsets.Splits[Idx] : SliceSize) - PartOffset;
}
// Now that we have the split loads, do the slow walk over all uses of the
// load and rewrite them as split stores, or save the split loads to use
// below if the store is going to be split there anyways.
bool DeferredStores = false;
for (User *LU : LI->users()) {
StoreInst *SI = cast<StoreInst>(LU);
if (!Stores.empty() && SplitOffsetsMap.count(SI)) {
DeferredStores = true;
LLVM_DEBUG(dbgs() << " Deferred splitting of store: " << *SI
<< "\n");
continue;
}
Value *StoreBasePtr = SI->getPointerOperand();
IRB.SetInsertPoint(SI);
LLVM_DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n");
for (int Idx = 0, Size = SplitLoads.size(); Idx < Size; ++Idx) {
LoadInst *PLoad = SplitLoads[Idx];
uint64_t PartOffset = Idx == 0 ? 0 : Offsets.Splits[Idx - 1];
auto *PartPtrTy =
PLoad->getType()->getPointerTo(SI->getPointerAddressSpace());
auto AS = SI->getPointerAddressSpace();
StoreInst *PStore = IRB.CreateAlignedStore(
PLoad,
getAdjustedPtr(IRB, DL, StoreBasePtr,
APInt(DL.getIndexSizeInBits(AS), PartOffset),
PartPtrTy, StoreBasePtr->getName() + "."),
getAdjustedAlignment(SI, PartOffset),
/*IsVolatile*/ false);
PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group});
LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n");
}
// We want to immediately iterate on any allocas impacted by splitting
// this store, and we have to track any promotable alloca (indicated by
// a direct store) as needing to be resplit because it is no longer
// promotable.
if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(StoreBasePtr)) {
ResplitPromotableAllocas.insert(OtherAI);
Worklist.insert(OtherAI);
} else if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(
StoreBasePtr->stripInBoundsOffsets())) {
Worklist.insert(OtherAI);
}
// Mark the original store as dead.
DeadInsts.push_back(SI);
}
// Save the split loads if there are deferred stores among the users.
if (DeferredStores)
SplitLoadsMap.insert(std::make_pair(LI, std::move(SplitLoads)));
// Mark the original load as dead and kill the original slice.
DeadInsts.push_back(LI);
Offsets.S->kill();
}
// Second, we rewrite all of the split stores. At this point, we know that
// all loads from this alloca have been split already. For stores of such
// loads, we can simply look up the pre-existing split loads. For stores of
// other loads, we split those loads first and then write split stores of
// them.
for (StoreInst *SI : Stores) {
auto *LI = cast<LoadInst>(SI->getValueOperand());
IntegerType *Ty = cast<IntegerType>(LI->getType());
assert(Ty->getBitWidth() % 8 == 0);
uint64_t StoreSize = Ty->getBitWidth() / 8;
assert(StoreSize > 0 && "Cannot have a zero-sized integer store!");
auto &Offsets = SplitOffsetsMap[SI];
assert(StoreSize == Offsets.S->endOffset() - Offsets.S->beginOffset() &&
"Slice size should always match load size exactly!");
uint64_t BaseOffset = Offsets.S->beginOffset();
assert(BaseOffset + StoreSize > BaseOffset &&
"Cannot represent alloca access size using 64-bit integers!");
Value *LoadBasePtr = LI->getPointerOperand();
Instruction *StoreBasePtr = cast<Instruction>(SI->getPointerOperand());
LLVM_DEBUG(dbgs() << " Splitting store: " << *SI << "\n");
// Check whether we have an already split load.
auto SplitLoadsMapI = SplitLoadsMap.find(LI);
std::vector<LoadInst *> *SplitLoads = nullptr;
if (SplitLoadsMapI != SplitLoadsMap.end()) {
SplitLoads = &SplitLoadsMapI->second;
assert(SplitLoads->size() == Offsets.Splits.size() + 1 &&
"Too few split loads for the number of splits in the store!");
} else {
LLVM_DEBUG(dbgs() << " of load: " << *LI << "\n");
}
uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();
int Idx = 0, Size = Offsets.Splits.size();
for (;;) {
auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8);
auto *LoadPartPtrTy = PartTy->getPointerTo(LI->getPointerAddressSpace());
auto *StorePartPtrTy = PartTy->getPointerTo(SI->getPointerAddressSpace());
// Either lookup a split load or create one.
LoadInst *PLoad;
if (SplitLoads) {
PLoad = (*SplitLoads)[Idx];
} else {
IRB.SetInsertPoint(LI);
auto AS = LI->getPointerAddressSpace();
PLoad = IRB.CreateAlignedLoad(
PartTy,
getAdjustedPtr(IRB, DL, LoadBasePtr,
APInt(DL.getIndexSizeInBits(AS), PartOffset),
LoadPartPtrTy, LoadBasePtr->getName() + "."),
getAdjustedAlignment(LI, PartOffset),
/*IsVolatile*/ false, LI->getName());
PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group});
}
// And store this partition.
IRB.SetInsertPoint(SI);
auto AS = SI->getPointerAddressSpace();
StoreInst *PStore = IRB.CreateAlignedStore(
PLoad,
getAdjustedPtr(IRB, DL, StoreBasePtr,
APInt(DL.getIndexSizeInBits(AS), PartOffset),
StorePartPtrTy, StoreBasePtr->getName() + "."),
getAdjustedAlignment(SI, PartOffset),
/*IsVolatile*/ false);
PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group});
// Now build a new slice for the alloca.
NewSlices.push_back(
Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
&PStore->getOperandUse(PStore->getPointerOperandIndex()),
/*IsSplittable*/ false));
LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
<< ", " << NewSlices.back().endOffset()
<< "): " << *PStore << "\n");
if (!SplitLoads) {
LLVM_DEBUG(dbgs() << " of split load: " << *PLoad << "\n");
}
// See if we've finished all the splits.
if (Idx >= Size)
break;
// Setup the next partition.
PartOffset = Offsets.Splits[Idx];
++Idx;
PartSize = (Idx < Size ? Offsets.Splits[Idx] : StoreSize) - PartOffset;
}
// We want to immediately iterate on any allocas impacted by splitting
// this load, which is only relevant if it isn't a load of this alloca and
// thus we didn't already split the loads above. We also have to keep track
// of any promotable allocas we split loads on as they can no longer be
// promoted.
if (!SplitLoads) {
if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(LoadBasePtr)) {
assert(OtherAI != &AI && "We can't re-split our own alloca!");
ResplitPromotableAllocas.insert(OtherAI);
Worklist.insert(OtherAI);
} else if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(
LoadBasePtr->stripInBoundsOffsets())) {
assert(OtherAI != &AI && "We can't re-split our own alloca!");
Worklist.insert(OtherAI);
}
}
// Mark the original store as dead now that we've split it up and kill its
// slice. Note that we leave the original load in place unless this store
// was its only use. It may in turn be split up if it is an alloca load
// for some other alloca, but it may be a normal load. This may introduce
// redundant loads, but where those can be merged the rest of the optimizer
// should handle the merging, and this uncovers SSA splits which is more
// important. In practice, the original loads will almost always be fully
// split and removed eventually, and the splits will be merged by any
// trivial CSE, including instcombine.
if (LI->hasOneUse()) {
assert(*LI->user_begin() == SI && "Single use isn't this store!");
DeadInsts.push_back(LI);
}
DeadInsts.push_back(SI);
Offsets.S->kill();
}
// Remove the killed slices that have ben pre-split.
llvm::erase_if(AS, [](const Slice &S) { return S.isDead(); });
// Insert our new slices. This will sort and merge them into the sorted
// sequence.
AS.insert(NewSlices);
LLVM_DEBUG(dbgs() << " Pre-split slices:\n");
#ifndef NDEBUG
for (auto I = AS.begin(), E = AS.end(); I != E; ++I)
LLVM_DEBUG(AS.print(dbgs(), I, " "));
#endif
// Finally, don't try to promote any allocas that new require re-splitting.
// They have already been added to the worklist above.
llvm::erase_if(PromotableAllocas, [&](AllocaInst *AI) {
return ResplitPromotableAllocas.count(AI);
});
return true;
}
/// Rewrite an alloca partition's users.
///
/// This routine drives both of the rewriting goals of the SROA pass. It tries
/// to rewrite uses of an alloca partition to be conducive for SSA value
/// promotion. If the partition needs a new, more refined alloca, this will
/// build that new alloca, preserving as much type information as possible, and
/// rewrite the uses of the old alloca to point at the new one and have the
/// appropriate new offsets. It also evaluates how successful the rewrite was
/// at enabling promotion and if it was successful queues the alloca to be
/// promoted.
AllocaInst *SROAPass::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
Partition &P) {
// Try to compute a friendly type for this partition of the alloca. This
// won't always succeed, in which case we fall back to a legal integer type
// or an i8 array of an appropriate size.
Type *SliceTy = nullptr;
const DataLayout &DL = AI.getModule()->getDataLayout();
std::pair<Type *, IntegerType *> CommonUseTy =
findCommonType(P.begin(), P.end(), P.endOffset());
// Do all uses operate on the same type?
if (CommonUseTy.first)
if (DL.getTypeAllocSize(CommonUseTy.first).getFixedSize() >= P.size())
SliceTy = CommonUseTy.first;
// If not, can we find an appropriate subtype in the original allocated type?
if (!SliceTy)
if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(),
P.beginOffset(), P.size()))
SliceTy = TypePartitionTy;
// If still not, can we use the largest bitwidth integer type used?
if (!SliceTy && CommonUseTy.second)
if (DL.getTypeAllocSize(CommonUseTy.second).getFixedSize() >= P.size())
SliceTy = CommonUseTy.second;
if ((!SliceTy || (SliceTy->isArrayTy() &&
SliceTy->getArrayElementType()->isIntegerTy())) &&
DL.isLegalInteger(P.size() * 8))
SliceTy = Type::getIntNTy(*C, P.size() * 8);
if (!SliceTy)
SliceTy = ArrayType::get(Type::getInt8Ty(*C), P.size());
assert(DL.getTypeAllocSize(SliceTy).getFixedSize() >= P.size());
bool IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, DL);
VectorType *VecTy =
IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, DL);
if (VecTy)
SliceTy = VecTy;
// Check for the case where we're going to rewrite to a new alloca of the
// exact same type as the original, and with the same access offsets. In that
// case, re-use the existing alloca, but still run through the rewriter to
// perform phi and select speculation.
// P.beginOffset() can be non-zero even with the same type in a case with
// out-of-bounds access (e.g. @PR35657 function in SROA/basictest.ll).
AllocaInst *NewAI;
if (SliceTy == AI.getAllocatedType() && P.beginOffset() == 0) {
NewAI = &AI;
// FIXME: We should be able to bail at this point with "nothing changed".
// FIXME: We might want to defer PHI speculation until after here.
// FIXME: return nullptr;
} else {
// Make sure the alignment is compatible with P.beginOffset().
const Align Alignment = commonAlignment(AI.getAlign(), P.beginOffset());
// If we will get at least this much alignment from the type alone, leave
// the alloca's alignment unconstrained.
const bool IsUnconstrained = Alignment <= DL.getABITypeAlign(SliceTy);
NewAI = new AllocaInst(
SliceTy, AI.getType()->getAddressSpace(), nullptr,
IsUnconstrained ? DL.getPrefTypeAlign(SliceTy) : Alignment,
AI.getName() + ".sroa." + Twine(P.begin() - AS.begin()), &AI);
// Copy the old AI debug location over to the new one.
NewAI->setDebugLoc(AI.getDebugLoc());
++NumNewAllocas;
}
LLVM_DEBUG(dbgs() << "Rewriting alloca partition "
<< "[" << P.beginOffset() << "," << P.endOffset()
<< ") to: " << *NewAI << "\n");
// Track the high watermark on the worklist as it is only relevant for
// promoted allocas. We will reset it to this point if the alloca is not in
// fact scheduled for promotion.
unsigned PPWOldSize = PostPromotionWorklist.size();
unsigned NumUses = 0;
SmallSetVector<PHINode *, 8> PHIUsers;
SmallSetVector<SelectInst *, 8> SelectUsers;
AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(),
P.endOffset(), IsIntegerPromotable, VecTy,
PHIUsers, SelectUsers);
bool Promotable = true;
for (Slice *S : P.splitSliceTails()) {
Promotable &= Rewriter.visit(S);
++NumUses;
}
for (Slice &S : P) {
Promotable &= Rewriter.visit(&S);
++NumUses;
}
NumAllocaPartitionUses += NumUses;
MaxUsesPerAllocaPartition.updateMax(NumUses);
// Now that we've processed all the slices in the new partition, check if any
// PHIs or Selects would block promotion.
for (PHINode *PHI : PHIUsers)
if (!isSafePHIToSpeculate(*PHI)) {
Promotable = false;
PHIUsers.clear();
SelectUsers.clear();
break;
}
for (SelectInst *Sel : SelectUsers)
if (!isSafeSelectToSpeculate(*Sel)) {
Promotable = false;
PHIUsers.clear();
SelectUsers.clear();
break;
}
if (Promotable) {
for (Use *U : AS.getDeadUsesIfPromotable()) {
auto *OldInst = dyn_cast<Instruction>(U->get());
Value::dropDroppableUse(*U);
if (OldInst)
if (isInstructionTriviallyDead(OldInst))
DeadInsts.push_back(OldInst);
}
if (PHIUsers.empty() && SelectUsers.empty()) {
// Promote the alloca.
PromotableAllocas.push_back(NewAI);
} else {
// If we have either PHIs or Selects to speculate, add them to those
// worklists and re-queue the new alloca so that we promote in on the
// next iteration.
for (PHINode *PHIUser : PHIUsers)
SpeculatablePHIs.insert(PHIUser);
for (SelectInst *SelectUser : SelectUsers)
SpeculatableSelects.insert(SelectUser);
Worklist.insert(NewAI);
}
} else {
// Drop any post-promotion work items if promotion didn't happen.
while (PostPromotionWorklist.size() > PPWOldSize)
PostPromotionWorklist.pop_back();
// We couldn't promote and we didn't create a new partition, nothing
// happened.
if (NewAI == &AI)
return nullptr;
// If we can't promote the alloca, iterate on it to check for new
// refinements exposed by splitting the current alloca. Don't iterate on an
// alloca which didn't actually change and didn't get promoted.
Worklist.insert(NewAI);
}
return NewAI;
}
/// Walks the slices of an alloca and form partitions based on them,
/// rewriting each of their uses.
bool SROAPass::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
if (AS.begin() == AS.end())
return false;
unsigned NumPartitions = 0;
bool Changed = false;
const DataLayout &DL = AI.getModule()->getDataLayout();
// First try to pre-split loads and stores.
Changed |= presplitLoadsAndStores(AI, AS);
// Now that we have identified any pre-splitting opportunities,
// mark loads and stores unsplittable except for the following case.
// We leave a slice splittable if all other slices are disjoint or fully
// included in the slice, such as whole-alloca loads and stores.
// If we fail to split these during pre-splitting, we want to force them
// to be rewritten into a partition.
bool IsSorted = true;
uint64_t AllocaSize =
DL.getTypeAllocSize(AI.getAllocatedType()).getFixedSize();
const uint64_t MaxBitVectorSize = 1024;
if (AllocaSize <= MaxBitVectorSize) {
// If a byte boundary is included in any load or store, a slice starting or
// ending at the boundary is not splittable.
SmallBitVector SplittableOffset(AllocaSize + 1, true);
for (Slice &S : AS)
for (unsigned O = S.beginOffset() + 1;
O < S.endOffset() && O < AllocaSize; O++)
SplittableOffset.reset(O);
for (Slice &S : AS) {
if (!S.isSplittable())
continue;
if ((S.beginOffset() > AllocaSize || SplittableOffset[S.beginOffset()]) &&
(S.endOffset() > AllocaSize || SplittableOffset[S.endOffset()]))
continue;
if (isa<LoadInst>(S.getUse()->getUser()) ||
isa<StoreInst>(S.getUse()->getUser())) {
S.makeUnsplittable();
IsSorted = false;
}
}
}
else {
// We only allow whole-alloca splittable loads and stores
// for a large alloca to avoid creating too large BitVector.
for (Slice &S : AS) {
if (!S.isSplittable())
continue;
if (S.beginOffset() == 0 && S.endOffset() >= AllocaSize)
continue;
if (isa<LoadInst>(S.getUse()->getUser()) ||
isa<StoreInst>(S.getUse()->getUser())) {
S.makeUnsplittable();
IsSorted = false;
}
}
}
if (!IsSorted)
llvm::sort(AS);
/// Describes the allocas introduced by rewritePartition in order to migrate
/// the debug info.
struct Fragment {
AllocaInst *Alloca;
uint64_t Offset;
uint64_t Size;
Fragment(AllocaInst *AI, uint64_t O, uint64_t S)
: Alloca(AI), Offset(O), Size(S) {}
};
SmallVector<Fragment, 4> Fragments;
// Rewrite each partition.
for (auto &P : AS.partitions()) {
if (AllocaInst *NewAI = rewritePartition(AI, AS, P)) {
Changed = true;
if (NewAI != &AI) {
uint64_t SizeOfByte = 8;
uint64_t AllocaSize =
DL.getTypeSizeInBits(NewAI->getAllocatedType()).getFixedSize();
// Don't include any padding.
uint64_t Size = std::min(AllocaSize, P.size() * SizeOfByte);
Fragments.push_back(Fragment(NewAI, P.beginOffset() * SizeOfByte, Size));
}
}
++NumPartitions;
}
NumAllocaPartitions += NumPartitions;
MaxPartitionsPerAlloca.updateMax(NumPartitions);
// Migrate debug information from the old alloca to the new alloca(s)
// and the individual partitions.
TinyPtrVector<DbgVariableIntrinsic *> DbgDeclares = FindDbgAddrUses(&AI);
for (DbgVariableIntrinsic *DbgDeclare : DbgDeclares) {
auto *Expr = DbgDeclare->getExpression();
DIBuilder DIB(*AI.getModule(), /*AllowUnresolved*/ false);
uint64_t AllocaSize =
DL.getTypeSizeInBits(AI.getAllocatedType()).getFixedSize();
for (auto Fragment : Fragments) {
// Create a fragment expression describing the new partition or reuse AI's
// expression if there is only one partition.
auto *FragmentExpr = Expr;
if (Fragment.Size < AllocaSize || Expr->isFragment()) {
// If this alloca is already a scalar replacement of a larger aggregate,
// Fragment.Offset describes the offset inside the scalar.
auto ExprFragment = Expr->getFragmentInfo();
uint64_t Offset = ExprFragment ? ExprFragment->OffsetInBits : 0;
uint64_t Start = Offset + Fragment.Offset;
uint64_t Size = Fragment.Size;
if (ExprFragment) {
uint64_t AbsEnd =
ExprFragment->OffsetInBits + ExprFragment->SizeInBits;
if (Start >= AbsEnd)
// No need to describe a SROAed padding.
continue;
Size = std::min(Size, AbsEnd - Start);
}
// The new, smaller fragment is stenciled out from the old fragment.
if (auto OrigFragment = FragmentExpr->getFragmentInfo()) {
assert(Start >= OrigFragment->OffsetInBits &&
"new fragment is outside of original fragment");
Start -= OrigFragment->OffsetInBits;
}
// The alloca may be larger than the variable.
auto VarSize = DbgDeclare->getVariable()->getSizeInBits();
if (VarSize) {
if (Size > *VarSize)
Size = *VarSize;
if (Size == 0 || Start + Size > *VarSize)
continue;
}
// Avoid creating a fragment expression that covers the entire variable.
if (!VarSize || *VarSize != Size) {
if (auto E =
DIExpression::createFragmentExpression(Expr, Start, Size))
FragmentExpr = *E;
else
continue;
}
}
// Remove any existing intrinsics on the new alloca describing
// the variable fragment.
for (DbgVariableIntrinsic *OldDII : FindDbgAddrUses(Fragment.Alloca)) {
auto SameVariableFragment = [](const DbgVariableIntrinsic *LHS,
const DbgVariableIntrinsic *RHS) {
return LHS->getVariable() == RHS->getVariable() &&
LHS->getDebugLoc()->getInlinedAt() ==
RHS->getDebugLoc()->getInlinedAt();
};
if (SameVariableFragment(OldDII, DbgDeclare))
OldDII->eraseFromParent();
}
DIB.insertDeclare(Fragment.Alloca, DbgDeclare->getVariable(), FragmentExpr,
DbgDeclare->getDebugLoc(), &AI);
}
}
return Changed;
}
/// Clobber a use with poison, deleting the used value if it becomes dead.
void SROAPass::clobberUse(Use &U) {
Value *OldV = U;
// Replace the use with an poison value.
U = PoisonValue::get(OldV->getType());
// Check for this making an instruction dead. We have to garbage collect
// all the dead instructions to ensure the uses of any alloca end up being
// minimal.
if (Instruction *OldI = dyn_cast<Instruction>(OldV))
if (isInstructionTriviallyDead(OldI)) {
DeadInsts.push_back(OldI);
}
}
/// Analyze an alloca for SROA.
///
/// This analyzes the alloca to ensure we can reason about it, builds
/// the slices of the alloca, and then hands it off to be split and
/// rewritten as needed.
bool SROAPass::runOnAlloca(AllocaInst &AI) {
LLVM_DEBUG(dbgs() << "SROA alloca: " << AI << "\n");
++NumAllocasAnalyzed;
// Special case dead allocas, as they're trivial.
if (AI.use_empty()) {
AI.eraseFromParent();
return true;
}
const DataLayout &DL = AI.getModule()->getDataLayout();
// Skip alloca forms that this analysis can't handle.
auto *AT = AI.getAllocatedType();
if (AI.isArrayAllocation() || !AT->isSized() || isa<ScalableVectorType>(AT) ||
DL.getTypeAllocSize(AT).getFixedSize() == 0)
return false;
bool Changed = false;
// First, split any FCA loads and stores touching this alloca to promote
// better splitting and promotion opportunities.
IRBuilderTy IRB(&AI);
AggLoadStoreRewriter AggRewriter(DL, IRB);
Changed |= AggRewriter.rewrite(AI);
// Build the slices using a recursive instruction-visiting builder.
AllocaSlices AS(DL, AI);
LLVM_DEBUG(AS.print(dbgs()));
if (AS.isEscaped())
return Changed;
// Delete all the dead users of this alloca before splitting and rewriting it.
for (Instruction *DeadUser : AS.getDeadUsers()) {
// Free up everything used by this instruction.
for (Use &DeadOp : DeadUser->operands())
clobberUse(DeadOp);
// Now replace the uses of this instruction.
DeadUser->replaceAllUsesWith(PoisonValue::get(DeadUser->getType()));
// And mark it for deletion.
DeadInsts.push_back(DeadUser);
Changed = true;
}
for (Use *DeadOp : AS.getDeadOperands()) {
clobberUse(*DeadOp);
Changed = true;
}
// No slices to split. Leave the dead alloca for a later pass to clean up.
if (AS.begin() == AS.end())
return Changed;
Changed |= splitAlloca(AI, AS);
LLVM_DEBUG(dbgs() << " Speculating PHIs\n");
while (!SpeculatablePHIs.empty())
speculatePHINodeLoads(IRB, *SpeculatablePHIs.pop_back_val());
LLVM_DEBUG(dbgs() << " Speculating Selects\n");
while (!SpeculatableSelects.empty())
speculateSelectInstLoads(IRB, *SpeculatableSelects.pop_back_val());
return Changed;
}
/// Delete the dead instructions accumulated in this run.
///
/// Recursively deletes the dead instructions we've accumulated. This is done
/// at the very end to maximize locality of the recursive delete and to
/// minimize the problems of invalidated instruction pointers as such pointers
/// are used heavily in the intermediate stages of the algorithm.
///
/// We also record the alloca instructions deleted here so that they aren't
/// subsequently handed to mem2reg to promote.
bool SROAPass::deleteDeadInstructions(
SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {
bool Changed = false;
while (!DeadInsts.empty()) {
Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
if (!I) continue;
LLVM_DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");
// If the instruction is an alloca, find the possible dbg.declare connected
// to it, and remove it too. We must do this before calling RAUW or we will
// not be able to find it.
if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
DeletedAllocas.insert(AI);
for (DbgVariableIntrinsic *OldDII : FindDbgAddrUses(AI))
OldDII->eraseFromParent();
}
I->replaceAllUsesWith(UndefValue::get(I->getType()));
for (Use &Operand : I->operands())
if (Instruction *U = dyn_cast<Instruction>(Operand)) {
// Zero out the operand and see if it becomes trivially dead.
Operand = nullptr;
if (isInstructionTriviallyDead(U))
DeadInsts.push_back(U);
}
++NumDeleted;
I->eraseFromParent();
Changed = true;
}
return Changed;
}
/// Promote the allocas, using the best available technique.
///
/// This attempts to promote whatever allocas have been identified as viable in
/// the PromotableAllocas list. If that list is empty, there is nothing to do.
/// This function returns whether any promotion occurred.
bool SROAPass::promoteAllocas(Function &F) {
if (PromotableAllocas.empty())
return false;
NumPromoted += PromotableAllocas.size();
LLVM_DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
PromoteMemToReg(PromotableAllocas, *DT, AC);
PromotableAllocas.clear();
return true;
}
PreservedAnalyses SROAPass::runImpl(Function &F, DominatorTree &RunDT,
AssumptionCache &RunAC) {
LLVM_DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
C = &F.getContext();
DT = &RunDT;
AC = &RunAC;
BasicBlock &EntryBB = F.getEntryBlock();
for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end());
I != E; ++I) {
if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
if (isa<ScalableVectorType>(AI->getAllocatedType())) {
if (isAllocaPromotable(AI))
PromotableAllocas.push_back(AI);
} else {
Worklist.insert(AI);
}
}
}
bool Changed = false;
// A set of deleted alloca instruction pointers which should be removed from
// the list of promotable allocas.
SmallPtrSet<AllocaInst *, 4> DeletedAllocas;
do {
while (!Worklist.empty()) {
Changed |= runOnAlloca(*Worklist.pop_back_val());
Changed |= deleteDeadInstructions(DeletedAllocas);
// Remove the deleted allocas from various lists so that we don't try to
// continue processing them.
if (!DeletedAllocas.empty()) {
auto IsInSet = [&](AllocaInst *AI) { return DeletedAllocas.count(AI); };
Worklist.remove_if(IsInSet);
PostPromotionWorklist.remove_if(IsInSet);
llvm::erase_if(PromotableAllocas, IsInSet);
DeletedAllocas.clear();
}
}
Changed |= promoteAllocas(F);
Worklist = PostPromotionWorklist;
PostPromotionWorklist.clear();
} while (!Worklist.empty());
if (!Changed)
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
return PA;
}
PreservedAnalyses SROAPass::run(Function &F, FunctionAnalysisManager &AM) {
return runImpl(F, AM.getResult<DominatorTreeAnalysis>(F),
AM.getResult<AssumptionAnalysis>(F));
}
/// A legacy pass for the legacy pass manager that wraps the \c SROA pass.
///
/// This is in the llvm namespace purely to allow it to be a friend of the \c
/// SROA pass.
class llvm::sroa::SROALegacyPass : public FunctionPass {
/// The SROA implementation.
SROAPass Impl;
public:
static char ID;
SROALegacyPass() : FunctionPass(ID) {
initializeSROALegacyPassPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override {
if (skipFunction(F))
return false;
auto PA = Impl.runImpl(
F, getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F));
return !PA.areAllPreserved();
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
AU.setPreservesCFG();
}
StringRef getPassName() const override { return "SROA"; }
};
char SROALegacyPass::ID = 0;
FunctionPass *llvm::createSROAPass() { return new SROALegacyPass(); }
INITIALIZE_PASS_BEGIN(SROALegacyPass, "sroa",
"Scalar Replacement Of Aggregates", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(SROALegacyPass, "sroa", "Scalar Replacement Of Aggregates",
false, false)
diff --git a/llvm/lib/Transforms/Utils/VNCoercion.cpp b/llvm/lib/Transforms/Utils/VNCoercion.cpp
index 42be67f3cfc0..264da2187754 100644
--- a/llvm/lib/Transforms/Utils/VNCoercion.cpp
+++ b/llvm/lib/Transforms/Utils/VNCoercion.cpp
@@ -1,590 +1,590 @@
#include "llvm/Transforms/Utils/VNCoercion.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "vncoerce"
namespace llvm {
namespace VNCoercion {
static bool isFirstClassAggregateOrScalableType(Type *Ty) {
return Ty->isStructTy() || Ty->isArrayTy() || isa<ScalableVectorType>(Ty);
}
/// Return true if coerceAvailableValueToLoadType will succeed.
bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
const DataLayout &DL) {
Type *StoredTy = StoredVal->getType();
if (StoredTy == LoadTy)
return true;
// If the loaded/stored value is a first class array/struct, or scalable type,
// don't try to transform them. We need to be able to bitcast to integer.
if (isFirstClassAggregateOrScalableType(LoadTy) ||
isFirstClassAggregateOrScalableType(StoredTy))
return false;
uint64_t StoreSize = DL.getTypeSizeInBits(StoredTy).getFixedSize();
// The store size must be byte-aligned to support future type casts.
if (llvm::alignTo(StoreSize, 8) != StoreSize)
return false;
// The store has to be at least as big as the load.
if (StoreSize < DL.getTypeSizeInBits(LoadTy).getFixedSize())
return false;
bool StoredNI = DL.isNonIntegralPointerType(StoredTy->getScalarType());
bool LoadNI = DL.isNonIntegralPointerType(LoadTy->getScalarType());
// Don't coerce non-integral pointers to integers or vice versa.
if (StoredNI != LoadNI) {
// As a special case, allow coercion of memset used to initialize
// an array w/null. Despite non-integral pointers not generally having a
// specific bit pattern, we do assume null is zero.
if (auto *CI = dyn_cast<Constant>(StoredVal))
return CI->isNullValue();
return false;
} else if (StoredNI && LoadNI &&
StoredTy->getPointerAddressSpace() !=
LoadTy->getPointerAddressSpace()) {
return false;
}
// The implementation below uses inttoptr for vectors of unequal size; we
// can't allow this for non integral pointers. We could teach it to extract
// exact subvectors if desired.
if (StoredNI && StoreSize != DL.getTypeSizeInBits(LoadTy).getFixedSize())
return false;
return true;
}
/// If we saw a store of a value to memory, and
/// then a load from a must-aliased pointer of a different type, try to coerce
/// the stored value. LoadedTy is the type of the load we want to replace.
/// IRB is IRBuilder used to insert new instructions.
///
/// If we can't do it, return null.
Value *coerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy,
IRBuilderBase &Helper,
const DataLayout &DL) {
assert(canCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL) &&
"precondition violation - materialization can't fail");
if (auto *C = dyn_cast<Constant>(StoredVal))
StoredVal = ConstantFoldConstant(C, DL);
// If this is already the right type, just return it.
Type *StoredValTy = StoredVal->getType();
uint64_t StoredValSize = DL.getTypeSizeInBits(StoredValTy).getFixedSize();
uint64_t LoadedValSize = DL.getTypeSizeInBits(LoadedTy).getFixedSize();
// If the store and reload are the same size, we can always reuse it.
if (StoredValSize == LoadedValSize) {
// Pointer to Pointer -> use bitcast.
if (StoredValTy->isPtrOrPtrVectorTy() && LoadedTy->isPtrOrPtrVectorTy()) {
StoredVal = Helper.CreateBitCast(StoredVal, LoadedTy);
} else {
// Convert source pointers to integers, which can be bitcast.
if (StoredValTy->isPtrOrPtrVectorTy()) {
StoredValTy = DL.getIntPtrType(StoredValTy);
StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy);
}
Type *TypeToCastTo = LoadedTy;
if (TypeToCastTo->isPtrOrPtrVectorTy())
TypeToCastTo = DL.getIntPtrType(TypeToCastTo);
if (StoredValTy != TypeToCastTo)
StoredVal = Helper.CreateBitCast(StoredVal, TypeToCastTo);
// Cast to pointer if the load needs a pointer type.
if (LoadedTy->isPtrOrPtrVectorTy())
StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy);
}
if (auto *C = dyn_cast<ConstantExpr>(StoredVal))
StoredVal = ConstantFoldConstant(C, DL);
return StoredVal;
}
// If the loaded value is smaller than the available value, then we can
// extract out a piece from it. If the available value is too small, then we
// can't do anything.
assert(StoredValSize >= LoadedValSize &&
"canCoerceMustAliasedValueToLoad fail");
// Convert source pointers to integers, which can be manipulated.
if (StoredValTy->isPtrOrPtrVectorTy()) {
StoredValTy = DL.getIntPtrType(StoredValTy);
StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy);
}
// Convert vectors and fp to integer, which can be manipulated.
if (!StoredValTy->isIntegerTy()) {
StoredValTy = IntegerType::get(StoredValTy->getContext(), StoredValSize);
StoredVal = Helper.CreateBitCast(StoredVal, StoredValTy);
}
// If this is a big-endian system, we need to shift the value down to the low
// bits so that a truncate will work.
if (DL.isBigEndian()) {
uint64_t ShiftAmt = DL.getTypeStoreSizeInBits(StoredValTy).getFixedSize() -
DL.getTypeStoreSizeInBits(LoadedTy).getFixedSize();
StoredVal = Helper.CreateLShr(
StoredVal, ConstantInt::get(StoredVal->getType(), ShiftAmt));
}
// Truncate the integer to the right size now.
Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadedValSize);
StoredVal = Helper.CreateTruncOrBitCast(StoredVal, NewIntTy);
if (LoadedTy != NewIntTy) {
// If the result is a pointer, inttoptr.
if (LoadedTy->isPtrOrPtrVectorTy())
StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy);
else
// Otherwise, bitcast.
StoredVal = Helper.CreateBitCast(StoredVal, LoadedTy);
}
if (auto *C = dyn_cast<Constant>(StoredVal))
StoredVal = ConstantFoldConstant(C, DL);
return StoredVal;
}
/// This function is called when we have a memdep query of a load that ends up
/// being a clobbering memory write (store, memset, memcpy, memmove). This
/// means that the write *may* provide bits used by the load but we can't be
/// sure because the pointers don't must-alias.
///
/// Check this case to see if there is anything more we can do before we give
/// up. This returns -1 if we have to give up, or a byte number in the stored
/// value of the piece that feeds the load.
static int analyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
Value *WritePtr,
uint64_t WriteSizeInBits,
const DataLayout &DL) {
// If the loaded/stored value is a first class array/struct, or scalable type,
// don't try to transform them. We need to be able to bitcast to integer.
if (isFirstClassAggregateOrScalableType(LoadTy))
return -1;
int64_t StoreOffset = 0, LoadOffset = 0;
Value *StoreBase =
GetPointerBaseWithConstantOffset(WritePtr, StoreOffset, DL);
Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, DL);
if (StoreBase != LoadBase)
return -1;
uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy).getFixedSize();
if ((WriteSizeInBits & 7) | (LoadSize & 7))
return -1;
uint64_t StoreSize = WriteSizeInBits / 8; // Convert to bytes.
LoadSize /= 8;
// If the Load isn't completely contained within the stored bits, we don't
// have all the bits to feed it. We could do something crazy in the future
// (issue a smaller load then merge the bits in) but this seems unlikely to be
// valuable.
if (StoreOffset > LoadOffset ||
StoreOffset + int64_t(StoreSize) < LoadOffset + int64_t(LoadSize))
return -1;
// Okay, we can do this transformation. Return the number of bytes into the
// store that the load is.
return LoadOffset - StoreOffset;
}
/// This function is called when we have a
/// memdep query of a load that ends up being a clobbering store.
int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
StoreInst *DepSI, const DataLayout &DL) {
auto *StoredVal = DepSI->getValueOperand();
// Cannot handle reading from store of first-class aggregate or scalable type.
if (isFirstClassAggregateOrScalableType(StoredVal->getType()))
return -1;
if (!canCoerceMustAliasedValueToLoad(StoredVal, LoadTy, DL))
return -1;
Value *StorePtr = DepSI->getPointerOperand();
uint64_t StoreSize =
DL.getTypeSizeInBits(DepSI->getValueOperand()->getType()).getFixedSize();
return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, StorePtr, StoreSize,
DL);
}
/// Looks at a memory location for a load (specified by MemLocBase, Offs, and
/// Size) and compares it against a load.
///
/// If the specified load could be safely widened to a larger integer load
/// that is 1) still efficient, 2) safe for the target, and 3) would provide
/// the specified memory location value, then this function returns the size
/// in bytes of the load width to use. If not, this returns zero.
static unsigned getLoadLoadClobberFullWidthSize(const Value *MemLocBase,
int64_t MemLocOffs,
unsigned MemLocSize,
const LoadInst *LI) {
// We can only extend simple integer loads.
if (!isa<IntegerType>(LI->getType()) || !LI->isSimple())
return 0;
// Load widening is hostile to ThreadSanitizer: it may cause false positives
// or make the reports more cryptic (access sizes are wrong).
if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread))
return 0;
const DataLayout &DL = LI->getModule()->getDataLayout();
// Get the base of this load.
int64_t LIOffs = 0;
const Value *LIBase =
GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, DL);
// If the two pointers are not based on the same pointer, we can't tell that
// they are related.
if (LIBase != MemLocBase)
return 0;
// Okay, the two values are based on the same pointer, but returned as
// no-alias. This happens when we have things like two byte loads at "P+1"
// and "P+3". Check to see if increasing the size of the "LI" load up to its
// alignment (or the largest native integer type) will allow us to load all
// the bits required by MemLoc.
// If MemLoc is before LI, then no widening of LI will help us out.
if (MemLocOffs < LIOffs)
return 0;
// Get the alignment of the load in bytes. We assume that it is safe to load
// any legal integer up to this size without a problem. For example, if we're
// looking at an i8 load on x86-32 that is known 1024 byte aligned, we can
// widen it up to an i32 load. If it is known 2-byte aligned, we can widen it
// to i16.
unsigned LoadAlign = LI->getAlign().value();
int64_t MemLocEnd = MemLocOffs + MemLocSize;
// If no amount of rounding up will let MemLoc fit into LI, then bail out.
if (LIOffs + LoadAlign < MemLocEnd)
return 0;
// This is the size of the load to try. Start with the next larger power of
// two.
unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits() / 8U;
NewLoadByteSize = NextPowerOf2(NewLoadByteSize);
while (true) {
// If this load size is bigger than our known alignment or would not fit
// into a native integer register, then we fail.
if (NewLoadByteSize > LoadAlign ||
!DL.fitsInLegalInteger(NewLoadByteSize * 8))
return 0;
if (LIOffs + NewLoadByteSize > MemLocEnd &&
(LI->getParent()->getParent()->hasFnAttribute(
Attribute::SanitizeAddress) ||
LI->getParent()->getParent()->hasFnAttribute(
Attribute::SanitizeHWAddress)))
// We will be reading past the location accessed by the original program.
// While this is safe in a regular build, Address Safety analysis tools
// may start reporting false warnings. So, don't do widening.
return 0;
// If a load of this width would include all of MemLoc, then we succeed.
if (LIOffs + NewLoadByteSize >= MemLocEnd)
return NewLoadByteSize;
NewLoadByteSize <<= 1;
}
}
/// This function is called when we have a
/// memdep query of a load that ends up being clobbered by another load. See if
/// the other load can feed into the second load.
int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI,
const DataLayout &DL) {
// Cannot handle reading from store of first-class aggregate yet.
if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
return -1;
if (!canCoerceMustAliasedValueToLoad(DepLI, LoadTy, DL))
return -1;
Value *DepPtr = DepLI->getPointerOperand();
uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType()).getFixedSize();
int R = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL);
if (R != -1)
return R;
// If we have a load/load clobber an DepLI can be widened to cover this load,
// then we should widen it!
int64_t LoadOffs = 0;
const Value *LoadBase =
GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, DL);
unsigned LoadSize = DL.getTypeStoreSize(LoadTy).getFixedSize();
unsigned Size =
getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI);
if (Size == 0)
return -1;
// Check non-obvious conditions enforced by MDA which we rely on for being
// able to materialize this potentially available value
assert(DepLI->isSimple() && "Cannot widen volatile/atomic load!");
assert(DepLI->getType()->isIntegerTy() && "Can't widen non-integer load");
return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size * 8, DL);
}
int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
MemIntrinsic *MI, const DataLayout &DL) {
// If the mem operation is a non-constant size, we can't handle it.
ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
if (!SizeCst)
return -1;
uint64_t MemSizeInBits = SizeCst->getZExtValue() * 8;
// If this is memset, we just need to see if the offset is valid in the size
// of the memset..
- if (MI->getIntrinsicID() == Intrinsic::memset) {
+ if (const auto *memset_inst = dyn_cast<MemSetInst>(MI)) {
if (DL.isNonIntegralPointerType(LoadTy->getScalarType())) {
- auto *CI = dyn_cast<ConstantInt>(cast<MemSetInst>(MI)->getValue());
+ auto *CI = dyn_cast<ConstantInt>(memset_inst->getValue());
if (!CI || !CI->isZero())
return -1;
}
return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
MemSizeInBits, DL);
}
// If we have a memcpy/memmove, the only case we can handle is if this is a
// copy from constant memory. In that case, we can read directly from the
// constant memory.
MemTransferInst *MTI = cast<MemTransferInst>(MI);
Constant *Src = dyn_cast<Constant>(MTI->getSource());
if (!Src)
return -1;
GlobalVariable *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(Src));
if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
return -1;
// See if the access is within the bounds of the transfer.
int Offset = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
MemSizeInBits, DL);
if (Offset == -1)
return Offset;
// Otherwise, see if we can constant fold a load from the constant with the
// offset applied as appropriate.
unsigned IndexSize = DL.getIndexTypeSizeInBits(Src->getType());
if (ConstantFoldLoadFromConstPtr(Src, LoadTy, APInt(IndexSize, Offset), DL))
return Offset;
return -1;
}
static Value *getStoreValueForLoadHelper(Value *SrcVal, unsigned Offset,
Type *LoadTy, IRBuilderBase &Builder,
const DataLayout &DL) {
LLVMContext &Ctx = SrcVal->getType()->getContext();
// If two pointers are in the same address space, they have the same size,
// so we don't need to do any truncation, etc. This avoids introducing
// ptrtoint instructions for pointers that may be non-integral.
if (SrcVal->getType()->isPointerTy() && LoadTy->isPointerTy() &&
cast<PointerType>(SrcVal->getType())->getAddressSpace() ==
cast<PointerType>(LoadTy)->getAddressSpace()) {
return SrcVal;
}
uint64_t StoreSize =
(DL.getTypeSizeInBits(SrcVal->getType()).getFixedSize() + 7) / 8;
uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy).getFixedSize() + 7) / 8;
// Compute which bits of the stored value are being used by the load. Convert
// to an integer type to start with.
if (SrcVal->getType()->isPtrOrPtrVectorTy())
SrcVal =
Builder.CreatePtrToInt(SrcVal, DL.getIntPtrType(SrcVal->getType()));
if (!SrcVal->getType()->isIntegerTy())
SrcVal =
Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize * 8));
// Shift the bits to the least significant depending on endianness.
unsigned ShiftAmt;
if (DL.isLittleEndian())
ShiftAmt = Offset * 8;
else
ShiftAmt = (StoreSize - LoadSize - Offset) * 8;
if (ShiftAmt)
SrcVal = Builder.CreateLShr(SrcVal,
ConstantInt::get(SrcVal->getType(), ShiftAmt));
if (LoadSize != StoreSize)
SrcVal = Builder.CreateTruncOrBitCast(SrcVal,
IntegerType::get(Ctx, LoadSize * 8));
return SrcVal;
}
/// This function is called when we have a memdep query of a load that ends up
/// being a clobbering store. This means that the store provides bits used by
/// the load but the pointers don't must-alias. Check this case to see if
/// there is anything more we can do before we give up.
Value *getStoreValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy,
Instruction *InsertPt, const DataLayout &DL) {
IRBuilder<> Builder(InsertPt);
SrcVal = getStoreValueForLoadHelper(SrcVal, Offset, LoadTy, Builder, DL);
return coerceAvailableValueToLoadType(SrcVal, LoadTy, Builder, DL);
}
Constant *getConstantStoreValueForLoad(Constant *SrcVal, unsigned Offset,
Type *LoadTy, const DataLayout &DL) {
return ConstantFoldLoadFromConst(SrcVal, LoadTy, APInt(32, Offset), DL);
}
/// This function is called when we have a memdep query of a load that ends up
/// being a clobbering load. This means that the load *may* provide bits used
/// by the load but we can't be sure because the pointers don't must-alias.
/// Check this case to see if there is anything more we can do before we give
/// up.
Value *getLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy,
Instruction *InsertPt, const DataLayout &DL) {
// If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
// widen SrcVal out to a larger load.
unsigned SrcValStoreSize =
DL.getTypeStoreSize(SrcVal->getType()).getFixedSize();
unsigned LoadSize = DL.getTypeStoreSize(LoadTy).getFixedSize();
if (Offset + LoadSize > SrcValStoreSize) {
assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!");
assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load");
// If we have a load/load clobber an DepLI can be widened to cover this
// load, then we should widen it to the next power of 2 size big enough!
unsigned NewLoadSize = Offset + LoadSize;
if (!isPowerOf2_32(NewLoadSize))
NewLoadSize = NextPowerOf2(NewLoadSize);
Value *PtrVal = SrcVal->getPointerOperand();
// Insert the new load after the old load. This ensures that subsequent
// memdep queries will find the new load. We can't easily remove the old
// load completely because it is already in the value numbering table.
IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal));
Type *DestTy = IntegerType::get(LoadTy->getContext(), NewLoadSize * 8);
Type *DestPTy =
PointerType::get(DestTy, PtrVal->getType()->getPointerAddressSpace());
Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc());
PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
LoadInst *NewLoad = Builder.CreateLoad(DestTy, PtrVal);
NewLoad->takeName(SrcVal);
NewLoad->setAlignment(SrcVal->getAlign());
LLVM_DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n");
LLVM_DEBUG(dbgs() << "TO: " << *NewLoad << "\n");
// Replace uses of the original load with the wider load. On a big endian
// system, we need to shift down to get the relevant bits.
Value *RV = NewLoad;
if (DL.isBigEndian())
RV = Builder.CreateLShr(RV, (NewLoadSize - SrcValStoreSize) * 8);
RV = Builder.CreateTrunc(RV, SrcVal->getType());
SrcVal->replaceAllUsesWith(RV);
SrcVal = NewLoad;
}
return getStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, DL);
}
Constant *getConstantLoadValueForLoad(Constant *SrcVal, unsigned Offset,
Type *LoadTy, const DataLayout &DL) {
unsigned SrcValStoreSize =
DL.getTypeStoreSize(SrcVal->getType()).getFixedSize();
unsigned LoadSize = DL.getTypeStoreSize(LoadTy).getFixedSize();
if (Offset + LoadSize > SrcValStoreSize)
return nullptr;
return getConstantStoreValueForLoad(SrcVal, Offset, LoadTy, DL);
}
/// This function is called when we have a
/// memdep query of a load that ends up being a clobbering mem intrinsic.
Value *getMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
Type *LoadTy, Instruction *InsertPt,
const DataLayout &DL) {
LLVMContext &Ctx = LoadTy->getContext();
uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy).getFixedSize() / 8;
IRBuilder<> Builder(InsertPt);
// We know that this method is only called when the mem transfer fully
// provides the bits for the load.
if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
// memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
// independently of what the offset is.
Value *Val = MSI->getValue();
if (LoadSize != 1)
Val =
Builder.CreateZExtOrBitCast(Val, IntegerType::get(Ctx, LoadSize * 8));
Value *OneElt = Val;
// Splat the value out to the right number of bits.
for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize;) {
// If we can double the number of bytes set, do it.
if (NumBytesSet * 2 <= LoadSize) {
Value *ShVal = Builder.CreateShl(
Val, ConstantInt::get(Val->getType(), NumBytesSet * 8));
Val = Builder.CreateOr(Val, ShVal);
NumBytesSet <<= 1;
continue;
}
// Otherwise insert one byte at a time.
Value *ShVal =
Builder.CreateShl(Val, ConstantInt::get(Val->getType(), 1 * 8));
Val = Builder.CreateOr(OneElt, ShVal);
++NumBytesSet;
}
return coerceAvailableValueToLoadType(Val, LoadTy, Builder, DL);
}
// Otherwise, this is a memcpy/memmove from a constant global.
MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
Constant *Src = cast<Constant>(MTI->getSource());
unsigned IndexSize = DL.getIndexTypeSizeInBits(Src->getType());
return ConstantFoldLoadFromConstPtr(Src, LoadTy, APInt(IndexSize, Offset),
DL);
}
Constant *getConstantMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
Type *LoadTy, const DataLayout &DL) {
LLVMContext &Ctx = LoadTy->getContext();
uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy).getFixedSize() / 8;
// We know that this method is only called when the mem transfer fully
// provides the bits for the load.
if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
auto *Val = dyn_cast<ConstantInt>(MSI->getValue());
if (!Val)
return nullptr;
Val = ConstantInt::get(Ctx, APInt::getSplat(LoadSize * 8, Val->getValue()));
return ConstantFoldLoadFromConst(Val, LoadTy, DL);
}
// Otherwise, this is a memcpy/memmove from a constant global.
MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
Constant *Src = cast<Constant>(MTI->getSource());
unsigned IndexSize = DL.getIndexTypeSizeInBits(Src->getType());
return ConstantFoldLoadFromConstPtr(Src, LoadTy, APInt(IndexSize, Offset),
DL);
}
} // namespace VNCoercion
} // namespace llvm

File Metadata

Mime Type
application/octet-stream
Expires
Sun, Jul 7, 1:03 AM (1 d, 23 h)
Storage Engine
chunks
Storage Format
Chunks
Storage Handle
rCTZPjyYMGYY
Default Alt Text
(5 MB)

Event Timeline