Index: projects/clang800-import/contrib/compiler-rt =================================================================== --- projects/clang800-import/contrib/compiler-rt (revision 344547) +++ projects/clang800-import/contrib/compiler-rt (revision 344548) Property changes on: projects/clang800-import/contrib/compiler-rt ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /vendor/compiler-rt/dist-release_80:r344177-344546 Index: projects/clang800-import/contrib/libc++ =================================================================== --- projects/clang800-import/contrib/libc++ (revision 344547) +++ projects/clang800-import/contrib/libc++ (revision 344548) Property changes on: projects/clang800-import/contrib/libc++ ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /vendor/libc++/dist-release_80:r344177-344546 Index: projects/clang800-import/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- projects/clang800-import/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp (revision 344547) +++ projects/clang800-import/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp (revision 344548) @@ -1,42738 +1,42747 @@ //===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines the interfaces that X86 uses to lower LLVM code into a // selection DAG. // //===----------------------------------------------------------------------===// #include "X86ISelLowering.h" #include "Utils/X86ShuffleDecode.h" #include "X86CallingConv.h" #include "X86FrameLowering.h" #include "X86InstrBuilder.h" #include "X86IntrinsicsInfo.h" #include "X86MachineFunctionInfo.h" #include "X86TargetMachine.h" #include "X86TargetObjectFile.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetOptions.h" #include #include #include #include using namespace llvm; #define DEBUG_TYPE "x86-isel" STATISTIC(NumTailCalls, "Number of tail calls"); static cl::opt ExperimentalVectorWideningLegalization( "x86-experimental-vector-widening-legalization", cl::init(false), cl::desc("Enable an experimental vector type legalization through widening " "rather than promotion."), cl::Hidden); static cl::opt ExperimentalPrefLoopAlignment( "x86-experimental-pref-loop-alignment", cl::init(4), cl::desc("Sets the preferable loop alignment for experiments " "(the last x86-experimental-pref-loop-alignment bits" " of the loop header PC will be 0)."), cl::Hidden); static cl::opt MulConstantOptimization( "mul-constant-optimization", cl::init(true), cl::desc("Replace 'mul x, Const' with more effective instructions like " "SHIFT, LEA, etc."), cl::Hidden); /// Call this when the user attempts to do something unsupported, like /// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike /// report_fatal_error, so calling code should attempt to recover without /// crashing. static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl, const char *Msg) { MachineFunction &MF = DAG.getMachineFunction(); DAG.getContext()->diagnose( DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc())); } X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, const X86Subtarget &STI) : TargetLowering(TM), Subtarget(STI) { bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87(); X86ScalarSSEf64 = Subtarget.hasSSE2(); X86ScalarSSEf32 = Subtarget.hasSSE1(); MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0)); // Set up the TargetLowering object. // X86 is weird. It always uses i8 for shift amounts and setcc results. setBooleanContents(ZeroOrOneBooleanContent); // X86-SSE is even stranger. It uses -1 or 0 for vector masks. setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // For 64-bit, since we have so many registers, use the ILP scheduler. // For 32-bit, use the register pressure specific scheduling. // For Atom, always use ILP scheduling. if (Subtarget.isAtom()) setSchedulingPreference(Sched::ILP); else if (Subtarget.is64Bit()) setSchedulingPreference(Sched::ILP); else setSchedulingPreference(Sched::RegPressure); const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister()); // Bypass expensive divides and use cheaper ones. if (TM.getOptLevel() >= CodeGenOpt::Default) { if (Subtarget.hasSlowDivide32()) addBypassSlowDiv(32, 8); if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit()) addBypassSlowDiv(64, 32); } if (Subtarget.isTargetKnownWindowsMSVC() || Subtarget.isTargetWindowsItanium()) { // Setup Windows compiler runtime calls. setLibcallName(RTLIB::SDIV_I64, "_alldiv"); setLibcallName(RTLIB::UDIV_I64, "_aulldiv"); setLibcallName(RTLIB::SREM_I64, "_allrem"); setLibcallName(RTLIB::UREM_I64, "_aullrem"); setLibcallName(RTLIB::MUL_I64, "_allmul"); setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall); setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall); setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall); setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall); setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall); } if (Subtarget.isTargetDarwin()) { // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. setUseUnderscoreSetJmp(false); setUseUnderscoreLongJmp(false); } else if (Subtarget.isTargetWindowsGNU()) { // MS runtime is weird: it exports _setjmp, but longjmp! setUseUnderscoreSetJmp(true); setUseUnderscoreLongJmp(false); } else { setUseUnderscoreSetJmp(true); setUseUnderscoreLongJmp(true); } // Set up the register classes. addRegisterClass(MVT::i8, &X86::GR8RegClass); addRegisterClass(MVT::i16, &X86::GR16RegClass); addRegisterClass(MVT::i32, &X86::GR32RegClass); if (Subtarget.is64Bit()) addRegisterClass(MVT::i64, &X86::GR64RegClass); for (MVT VT : MVT::integer_valuetypes()) setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); // We don't accept any truncstore of integer registers. setTruncStoreAction(MVT::i64, MVT::i32, Expand); setTruncStoreAction(MVT::i64, MVT::i16, Expand); setTruncStoreAction(MVT::i64, MVT::i8 , Expand); setTruncStoreAction(MVT::i32, MVT::i16, Expand); setTruncStoreAction(MVT::i32, MVT::i8 , Expand); setTruncStoreAction(MVT::i16, MVT::i8, Expand); setTruncStoreAction(MVT::f64, MVT::f32, Expand); // SETOEQ and SETUNE require checking two conditions. setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand); setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand); setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand); setCondCodeAction(ISD::SETUNE, MVT::f32, Expand); setCondCodeAction(ISD::SETUNE, MVT::f64, Expand); setCondCodeAction(ISD::SETUNE, MVT::f80, Expand); // Integer absolute. if (Subtarget.hasCMov()) { setOperationAction(ISD::ABS , MVT::i16 , Custom); setOperationAction(ISD::ABS , MVT::i32 , Custom); if (Subtarget.is64Bit()) setOperationAction(ISD::ABS , MVT::i64 , Custom); } // Funnel shifts. for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) { setOperationAction(ShiftOp , MVT::i16 , Custom); setOperationAction(ShiftOp , MVT::i32 , Custom); if (Subtarget.is64Bit()) setOperationAction(ShiftOp , MVT::i64 , Custom); } // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this // operation. setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); if (Subtarget.is64Bit()) { if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) // f32/f64 are legal, f80 is custom. setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom); else setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); } else if (!Subtarget.useSoftFloat()) { // We have an algorithm for SSE2->double, and we turn this into a // 64-bit FILD followed by conditional FADD for other targets. setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); // We have an algorithm for SSE2, and we turn this into a 64-bit // FILD or VCVTUSI2SS/SD for other targets. setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom); } else { setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); } // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have // this operation. setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); if (!Subtarget.useSoftFloat()) { // SSE has no i16 to fp conversion, only i32. if (X86ScalarSSEf32) { setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); // f32 and f64 cases are Legal, f80 case is not setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); } else { setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); } } else { setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Expand); } // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have // this operation. setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); if (!Subtarget.useSoftFloat()) { // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64 // are Legal, f80 is custom lowered. setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); if (X86ScalarSSEf32) { setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); // f32 and f64 cases are Legal, f80 case is not setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); } else { setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); } } else { setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Expand); setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Expand); } // Handle FP_TO_UINT by promoting the destination to a larger signed // conversion. setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); if (Subtarget.is64Bit()) { if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) { // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80. setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom); setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom); } else { setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); } } else if (!Subtarget.useSoftFloat()) { // Since AVX is a superset of SSE3, only check for SSE here. if (Subtarget.hasSSE1() && !Subtarget.hasSSE3()) // Expand FP_TO_UINT into a select. // FIXME: We would like to use a Custom expander here eventually to do // the optimal thing for SSE vs. the default expansion in the legalizer. setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); else // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom. // With SSE3 we can use fisttpll to convert to a signed i64; without // SSE, we're stuck with a fistpll. setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom); setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom); } // TODO: when we have SSE, these could be more efficient, by using movd/movq. if (!X86ScalarSSEf64) { setOperationAction(ISD::BITCAST , MVT::f32 , Expand); setOperationAction(ISD::BITCAST , MVT::i32 , Expand); if (Subtarget.is64Bit()) { setOperationAction(ISD::BITCAST , MVT::f64 , Expand); // Without SSE, i64->f64 goes through memory. setOperationAction(ISD::BITCAST , MVT::i64 , Expand); } } else if (!Subtarget.is64Bit()) setOperationAction(ISD::BITCAST , MVT::i64 , Custom); // Scalar integer divide and remainder are lowered to use operations that // produce two results, to match the available instructions. This exposes // the two-result form to trivial CSE, which is able to combine x/y and x%y // into a single instruction. // // Scalar integer multiply-high is also lowered to use two-result // operations, to match the available instructions. However, plain multiply // (low) operations are left as Legal, as there are single-result // instructions for this in x86. Using the two-result multiply instructions // when both high and low results are needed must be arranged by dagcombine. for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { setOperationAction(ISD::MULHS, VT, Expand); setOperationAction(ISD::MULHU, VT, Expand); setOperationAction(ISD::SDIV, VT, Expand); setOperationAction(ISD::UDIV, VT, Expand); setOperationAction(ISD::SREM, VT, Expand); setOperationAction(ISD::UREM, VT, Expand); } setOperationAction(ISD::BR_JT , MVT::Other, Expand); setOperationAction(ISD::BRCOND , MVT::Other, Custom); for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128, MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { setOperationAction(ISD::BR_CC, VT, Expand); setOperationAction(ISD::SELECT_CC, VT, Expand); } if (Subtarget.is64Bit()) setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); setOperationAction(ISD::FREM , MVT::f32 , Expand); setOperationAction(ISD::FREM , MVT::f64 , Expand); setOperationAction(ISD::FREM , MVT::f80 , Expand); setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom); // Promote the i8 variants and force them on up to i32 which has a shorter // encoding. setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32); setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32); if (!Subtarget.hasBMI()) { setOperationAction(ISD::CTTZ , MVT::i16 , Custom); setOperationAction(ISD::CTTZ , MVT::i32 , Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal); if (Subtarget.is64Bit()) { setOperationAction(ISD::CTTZ , MVT::i64 , Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal); } } if (Subtarget.hasLZCNT()) { // When promoting the i8 variants, force them to i32 for a shorter // encoding. setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32); setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32); } else { setOperationAction(ISD::CTLZ , MVT::i8 , Custom); setOperationAction(ISD::CTLZ , MVT::i16 , Custom); setOperationAction(ISD::CTLZ , MVT::i32 , Custom); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom); if (Subtarget.is64Bit()) { setOperationAction(ISD::CTLZ , MVT::i64 , Custom); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom); } } // Special handling for half-precision floating point conversions. // If we don't have F16C support, then lower half float conversions // into library calls. if (Subtarget.useSoftFloat() || !Subtarget.hasF16C()) { setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); } // There's never any support for operations beyond MVT::f32. setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand); setTruncStoreAction(MVT::f32, MVT::f16, Expand); setTruncStoreAction(MVT::f64, MVT::f16, Expand); setTruncStoreAction(MVT::f80, MVT::f16, Expand); if (Subtarget.hasPOPCNT()) { setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32); } else { setOperationAction(ISD::CTPOP , MVT::i8 , Expand); setOperationAction(ISD::CTPOP , MVT::i16 , Expand); setOperationAction(ISD::CTPOP , MVT::i32 , Expand); if (Subtarget.is64Bit()) setOperationAction(ISD::CTPOP , MVT::i64 , Expand); } setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); if (!Subtarget.hasMOVBE()) setOperationAction(ISD::BSWAP , MVT::i16 , Expand); // These should be promoted to a larger select which is supported. setOperationAction(ISD::SELECT , MVT::i1 , Promote); // X86 wants to expand cmov itself. for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) { setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::SETCC, VT, Custom); } for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { if (VT == MVT::i64 && !Subtarget.is64Bit()) continue; setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::SETCC, VT, Custom); } // Custom action for SELECT MMX and expand action for SELECT_CC MMX setOperationAction(ISD::SELECT, MVT::x86mmx, Custom); setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand); setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since // LLVM/Clang supports zero-cost DWARF and SEH exception handling. setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom); if (TM.Options.ExceptionModel == ExceptionHandling::SjLj) setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); // Darwin ABI issue. for (auto VT : { MVT::i32, MVT::i64 }) { if (VT == MVT::i64 && !Subtarget.is64Bit()) continue; setOperationAction(ISD::ConstantPool , VT, Custom); setOperationAction(ISD::JumpTable , VT, Custom); setOperationAction(ISD::GlobalAddress , VT, Custom); setOperationAction(ISD::GlobalTLSAddress, VT, Custom); setOperationAction(ISD::ExternalSymbol , VT, Custom); setOperationAction(ISD::BlockAddress , VT, Custom); } // 64-bit shl, sra, srl (iff 32-bit x86) for (auto VT : { MVT::i32, MVT::i64 }) { if (VT == MVT::i64 && !Subtarget.is64Bit()) continue; setOperationAction(ISD::SHL_PARTS, VT, Custom); setOperationAction(ISD::SRA_PARTS, VT, Custom); setOperationAction(ISD::SRL_PARTS, VT, Custom); } if (Subtarget.hasSSEPrefetch() || Subtarget.has3DNow()) setOperationAction(ISD::PREFETCH , MVT::Other, Legal); setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom); // Expand certain atomics for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom); setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom); setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom); setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom); setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom); setOperationAction(ISD::ATOMIC_STORE, VT, Custom); } if (Subtarget.hasCmpxchg16b()) { setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom); } // FIXME - use subtarget debug flags if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() && !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() && TM.Options.ExceptionModel != ExceptionHandling::SjLj) { setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); } setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom); setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); setOperationAction(ISD::TRAP, MVT::Other, Legal); setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); setOperationAction(ISD::VAEND , MVT::Other, Expand); bool Is64Bit = Subtarget.is64Bit(); setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand); setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand); setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom); // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering. setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom); setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom); if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) { // f32 and f64 use SSE. // Set up the FP register classes. addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass); addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass); for (auto VT : { MVT::f32, MVT::f64 }) { // Use ANDPD to simulate FABS. setOperationAction(ISD::FABS, VT, Custom); // Use XORP to simulate FNEG. setOperationAction(ISD::FNEG, VT, Custom); // Use ANDPD and ORPD to simulate FCOPYSIGN. setOperationAction(ISD::FCOPYSIGN, VT, Custom); // These might be better off as horizontal vector ops. setOperationAction(ISD::FADD, VT, Custom); setOperationAction(ISD::FSUB, VT, Custom); // We don't support sin/cos/fmod setOperationAction(ISD::FSIN , VT, Expand); setOperationAction(ISD::FCOS , VT, Expand); setOperationAction(ISD::FSINCOS, VT, Expand); } // Lower this to MOVMSK plus an AND. setOperationAction(ISD::FGETSIGN, MVT::i64, Custom); setOperationAction(ISD::FGETSIGN, MVT::i32, Custom); } else if (!useSoftFloat() && X86ScalarSSEf32 && (UseX87 || Is64Bit)) { // Use SSE for f32, x87 for f64. // Set up the FP register classes. addRegisterClass(MVT::f32, &X86::FR32RegClass); if (UseX87) addRegisterClass(MVT::f64, &X86::RFP64RegClass); // Use ANDPS to simulate FABS. setOperationAction(ISD::FABS , MVT::f32, Custom); // Use XORP to simulate FNEG. setOperationAction(ISD::FNEG , MVT::f32, Custom); if (UseX87) setOperationAction(ISD::UNDEF, MVT::f64, Expand); // Use ANDPS and ORPS to simulate FCOPYSIGN. if (UseX87) setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); // We don't support sin/cos/fmod setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FSINCOS, MVT::f32, Expand); if (UseX87) { // Always expand sin/cos functions even though x87 has an instruction. setOperationAction(ISD::FSIN, MVT::f64, Expand); setOperationAction(ISD::FCOS, MVT::f64, Expand); setOperationAction(ISD::FSINCOS, MVT::f64, Expand); } } else if (UseX87) { // f32 and f64 in x87. // Set up the FP register classes. addRegisterClass(MVT::f64, &X86::RFP64RegClass); addRegisterClass(MVT::f32, &X86::RFP32RegClass); for (auto VT : { MVT::f32, MVT::f64 }) { setOperationAction(ISD::UNDEF, VT, Expand); setOperationAction(ISD::FCOPYSIGN, VT, Expand); // Always expand sin/cos functions even though x87 has an instruction. setOperationAction(ISD::FSIN , VT, Expand); setOperationAction(ISD::FCOS , VT, Expand); setOperationAction(ISD::FSINCOS, VT, Expand); } } // Expand FP32 immediates into loads from the stack, save special cases. if (isTypeLegal(MVT::f32)) { if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) { addLegalFPImmediate(APFloat(+0.0f)); // FLD0 addLegalFPImmediate(APFloat(+1.0f)); // FLD1 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS } else // SSE immediates. addLegalFPImmediate(APFloat(+0.0f)); // xorps } // Expand FP64 immediates into loads from the stack, save special cases. if (isTypeLegal(MVT::f64)) { if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) { addLegalFPImmediate(APFloat(+0.0)); // FLD0 addLegalFPImmediate(APFloat(+1.0)); // FLD1 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS } else // SSE immediates. addLegalFPImmediate(APFloat(+0.0)); // xorpd } // We don't support FMA. setOperationAction(ISD::FMA, MVT::f64, Expand); setOperationAction(ISD::FMA, MVT::f32, Expand); // Long double always uses X87, except f128 in MMX. if (UseX87) { if (Subtarget.is64Bit() && Subtarget.hasMMX()) { addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass); ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat); setOperationAction(ISD::FABS , MVT::f128, Custom); setOperationAction(ISD::FNEG , MVT::f128, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom); } addRegisterClass(MVT::f80, &X86::RFP80RegClass); setOperationAction(ISD::UNDEF, MVT::f80, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); { APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended()); addLegalFPImmediate(TmpFlt); // FLD0 TmpFlt.changeSign(); addLegalFPImmediate(TmpFlt); // FLD0/FCHS bool ignored; APFloat TmpFlt2(+1.0); TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven, &ignored); addLegalFPImmediate(TmpFlt2); // FLD1 TmpFlt2.changeSign(); addLegalFPImmediate(TmpFlt2); // FLD1/FCHS } // Always expand sin/cos functions even though x87 has an instruction. setOperationAction(ISD::FSIN , MVT::f80, Expand); setOperationAction(ISD::FCOS , MVT::f80, Expand); setOperationAction(ISD::FSINCOS, MVT::f80, Expand); setOperationAction(ISD::FFLOOR, MVT::f80, Expand); setOperationAction(ISD::FCEIL, MVT::f80, Expand); setOperationAction(ISD::FTRUNC, MVT::f80, Expand); setOperationAction(ISD::FRINT, MVT::f80, Expand); setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand); setOperationAction(ISD::FMA, MVT::f80, Expand); } // Always use a library call for pow. setOperationAction(ISD::FPOW , MVT::f32 , Expand); setOperationAction(ISD::FPOW , MVT::f64 , Expand); setOperationAction(ISD::FPOW , MVT::f80 , Expand); setOperationAction(ISD::FLOG, MVT::f80, Expand); setOperationAction(ISD::FLOG2, MVT::f80, Expand); setOperationAction(ISD::FLOG10, MVT::f80, Expand); setOperationAction(ISD::FEXP, MVT::f80, Expand); setOperationAction(ISD::FEXP2, MVT::f80, Expand); setOperationAction(ISD::FMINNUM, MVT::f80, Expand); setOperationAction(ISD::FMAXNUM, MVT::f80, Expand); // Some FP actions are always expanded for vector types. for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32, MVT::v2f64, MVT::v4f64, MVT::v8f64 }) { setOperationAction(ISD::FSIN, VT, Expand); setOperationAction(ISD::FSINCOS, VT, Expand); setOperationAction(ISD::FCOS, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); setOperationAction(ISD::FCOPYSIGN, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); setOperationAction(ISD::FLOG, VT, Expand); setOperationAction(ISD::FLOG2, VT, Expand); setOperationAction(ISD::FLOG10, VT, Expand); setOperationAction(ISD::FEXP, VT, Expand); setOperationAction(ISD::FEXP2, VT, Expand); } // First set operation action for all vector types to either promote // (for widening) or expand (for scalarization). Then we will selectively // turn on ones that can be effectively codegen'd. for (MVT VT : MVT::vector_valuetypes()) { setOperationAction(ISD::SDIV, VT, Expand); setOperationAction(ISD::UDIV, VT, Expand); setOperationAction(ISD::SREM, VT, Expand); setOperationAction(ISD::UREM, VT, Expand); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand); setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand); setOperationAction(ISD::FMA, VT, Expand); setOperationAction(ISD::FFLOOR, VT, Expand); setOperationAction(ISD::FCEIL, VT, Expand); setOperationAction(ISD::FTRUNC, VT, Expand); setOperationAction(ISD::FRINT, VT, Expand); setOperationAction(ISD::FNEARBYINT, VT, Expand); setOperationAction(ISD::SMUL_LOHI, VT, Expand); setOperationAction(ISD::MULHS, VT, Expand); setOperationAction(ISD::UMUL_LOHI, VT, Expand); setOperationAction(ISD::MULHU, VT, Expand); setOperationAction(ISD::SDIVREM, VT, Expand); setOperationAction(ISD::UDIVREM, VT, Expand); setOperationAction(ISD::CTPOP, VT, Expand); setOperationAction(ISD::CTTZ, VT, Expand); setOperationAction(ISD::CTLZ, VT, Expand); setOperationAction(ISD::ROTL, VT, Expand); setOperationAction(ISD::ROTR, VT, Expand); setOperationAction(ISD::BSWAP, VT, Expand); setOperationAction(ISD::SETCC, VT, Expand); setOperationAction(ISD::FP_TO_UINT, VT, Expand); setOperationAction(ISD::FP_TO_SINT, VT, Expand); setOperationAction(ISD::UINT_TO_FP, VT, Expand); setOperationAction(ISD::SINT_TO_FP, VT, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand); setOperationAction(ISD::TRUNCATE, VT, Expand); setOperationAction(ISD::SIGN_EXTEND, VT, Expand); setOperationAction(ISD::ZERO_EXTEND, VT, Expand); setOperationAction(ISD::ANY_EXTEND, VT, Expand); setOperationAction(ISD::SELECT_CC, VT, Expand); for (MVT InnerVT : MVT::vector_valuetypes()) { setTruncStoreAction(InnerVT, VT, Expand); setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand); setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand); // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like // types, we have to deal with them whether we ask for Expansion or not. // Setting Expand causes its own optimisation problems though, so leave // them legal. if (VT.getVectorElementType() == MVT::i1) setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand); // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are // split/scalarized right now. if (VT.getVectorElementType() == MVT::f16) setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand); } } // FIXME: In order to prevent SSE instructions being expanded to MMX ones // with -msoft-float, disable use of MMX as well. if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) { addRegisterClass(MVT::x86mmx, &X86::VR64RegClass); // No operations on x86mmx supported, everything uses intrinsics. } if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) { addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass); setOperationAction(ISD::FNEG, MVT::v4f32, Custom); setOperationAction(ISD::FABS, MVT::v4f32, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); setOperationAction(ISD::VSELECT, MVT::v4f32, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); setOperationAction(ISD::SELECT, MVT::v4f32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom); } if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) { addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass); // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM // registers cannot be used even for integer operations. addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass); addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass); addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass); addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass); for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16, MVT::v2i32 }) { setOperationAction(ISD::SDIV, VT, Custom); setOperationAction(ISD::SREM, VT, Custom); setOperationAction(ISD::UDIV, VT, Custom); setOperationAction(ISD::UREM, VT, Custom); } setOperationAction(ISD::MUL, MVT::v2i8, Custom); setOperationAction(ISD::MUL, MVT::v2i16, Custom); setOperationAction(ISD::MUL, MVT::v2i32, Custom); setOperationAction(ISD::MUL, MVT::v4i8, Custom); setOperationAction(ISD::MUL, MVT::v4i16, Custom); setOperationAction(ISD::MUL, MVT::v8i8, Custom); setOperationAction(ISD::MUL, MVT::v16i8, Custom); setOperationAction(ISD::MUL, MVT::v4i32, Custom); setOperationAction(ISD::MUL, MVT::v2i64, Custom); setOperationAction(ISD::MULHU, MVT::v4i32, Custom); setOperationAction(ISD::MULHS, MVT::v4i32, Custom); setOperationAction(ISD::MULHU, MVT::v16i8, Custom); setOperationAction(ISD::MULHS, MVT::v16i8, Custom); setOperationAction(ISD::MULHU, MVT::v8i16, Legal); setOperationAction(ISD::MULHS, MVT::v8i16, Legal); setOperationAction(ISD::MUL, MVT::v8i16, Legal); setOperationAction(ISD::FNEG, MVT::v2f64, Custom); setOperationAction(ISD::FABS, MVT::v2f64, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom); for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom); setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom); setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom); setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom); } setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal); setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal); setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal); setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal); setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal); setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal); setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal); setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal); if (!ExperimentalVectorWideningLegalization) { // Use widening instead of promotion. for (auto VT : { MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16 }) { setOperationAction(ISD::UADDSAT, VT, Custom); setOperationAction(ISD::SADDSAT, VT, Custom); setOperationAction(ISD::USUBSAT, VT, Custom); setOperationAction(ISD::SSUBSAT, VT, Custom); } } setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); // Provide custom widening for v2f32 setcc. This is really for VLX when // setcc result type returns v2i1/v4i1 vector for v2f32/v4f32 leading to // type legalization changing the result type to v4i1 during widening. // It works fine for SSE2 and is probably faster so no need to qualify with // VLX support. setOperationAction(ISD::SETCC, MVT::v2i32, Custom); for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { setOperationAction(ISD::SETCC, VT, Custom); setOperationAction(ISD::CTPOP, VT, Custom); setOperationAction(ISD::ABS, VT, Custom); // The condition codes aren't legal in SSE/AVX and under AVX512 we use // setcc all the way to isel and prefer SETGT in some isel patterns. setCondCodeAction(ISD::SETLT, VT, Custom); setCondCodeAction(ISD::SETLE, VT, Custom); } for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) { setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::VSELECT, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); } // We support custom legalizing of sext and anyext loads for specific // memory vector types which we can load as a scalar (or sequence of // scalars) and extend in-register to a legal 128-bit vector type. For sext // loads these must work with a single scalar load. for (MVT VT : MVT::integer_vector_valuetypes()) { setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom); setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom); setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom); setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom); setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom); setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom); } for (auto VT : { MVT::v2f64, MVT::v2i64 }) { setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::VSELECT, VT, Custom); if (VT == MVT::v2i64 && !Subtarget.is64Bit()) continue; setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); } // Custom lower v2i64 and v2f64 selects. setOperationAction(ISD::SELECT, MVT::v2f64, Custom); setOperationAction(ISD::SELECT, MVT::v2i64, Custom); setOperationAction(ISD::SELECT, MVT::v4i32, Custom); setOperationAction(ISD::SELECT, MVT::v8i16, Custom); setOperationAction(ISD::SELECT, MVT::v16i8, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom); // Custom legalize these to avoid over promotion or custom promotion. setOperationAction(ISD::FP_TO_SINT, MVT::v2i8, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::v4i8, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::v8i8, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::v2i8, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::v4i8, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::v8i8, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::v2i16, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); // By marking FP_TO_SINT v8i16 as Custom, will trick type legalization into // promoting v8i8 FP_TO_UINT into FP_TO_SINT. When the v8i16 FP_TO_SINT is // split again based on the input type, this will cause an AssertSExt i16 to // be emitted instead of an AssertZExt. This will allow packssdw followed by // packuswb to be used to truncate to v8i8. This is necessary since packusdw // isn't available until sse4.1. setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom); // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion. setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom); setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom); setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom); for (MVT VT : MVT::fp_vector_valuetypes()) setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal); // We want to legalize this to an f64 load rather than an i64 load on // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for // store. setOperationAction(ISD::LOAD, MVT::v2f32, Custom); setOperationAction(ISD::LOAD, MVT::v2i32, Custom); setOperationAction(ISD::LOAD, MVT::v4i16, Custom); setOperationAction(ISD::LOAD, MVT::v8i8, Custom); setOperationAction(ISD::STORE, MVT::v2f32, Custom); setOperationAction(ISD::STORE, MVT::v2i32, Custom); setOperationAction(ISD::STORE, MVT::v4i16, Custom); setOperationAction(ISD::STORE, MVT::v8i8, Custom); setOperationAction(ISD::BITCAST, MVT::v2i32, Custom); setOperationAction(ISD::BITCAST, MVT::v4i16, Custom); setOperationAction(ISD::BITCAST, MVT::v8i8, Custom); if (!Subtarget.hasAVX512()) setOperationAction(ISD::BITCAST, MVT::v16i1, Custom); setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom); setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom); setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom); if (ExperimentalVectorWideningLegalization) { setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom); setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom); setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom); setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom); setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom); setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom); } else { setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i64, Custom); } // In the customized shift lowering, the legal v4i32/v2i64 cases // in AVX2 will be recognized. for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { setOperationAction(ISD::SRL, VT, Custom); setOperationAction(ISD::SHL, VT, Custom); setOperationAction(ISD::SRA, VT, Custom); } setOperationAction(ISD::ROTL, MVT::v4i32, Custom); setOperationAction(ISD::ROTL, MVT::v8i16, Custom); // With AVX512, expanding (and promoting the shifts) is better. if (!Subtarget.hasAVX512()) setOperationAction(ISD::ROTL, MVT::v16i8, Custom); } if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) { setOperationAction(ISD::ABS, MVT::v16i8, Legal); setOperationAction(ISD::ABS, MVT::v8i16, Legal); setOperationAction(ISD::ABS, MVT::v4i32, Legal); setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom); setOperationAction(ISD::CTLZ, MVT::v16i8, Custom); setOperationAction(ISD::CTLZ, MVT::v8i16, Custom); setOperationAction(ISD::CTLZ, MVT::v4i32, Custom); setOperationAction(ISD::CTLZ, MVT::v2i64, Custom); // These might be better off as horizontal vector ops. setOperationAction(ISD::ADD, MVT::i16, Custom); setOperationAction(ISD::ADD, MVT::i32, Custom); setOperationAction(ISD::SUB, MVT::i16, Custom); setOperationAction(ISD::SUB, MVT::i32, Custom); } if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) { for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) { setOperationAction(ISD::FFLOOR, RoundedTy, Legal); setOperationAction(ISD::FCEIL, RoundedTy, Legal); setOperationAction(ISD::FTRUNC, RoundedTy, Legal); setOperationAction(ISD::FRINT, RoundedTy, Legal); setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal); } setOperationAction(ISD::SMAX, MVT::v16i8, Legal); setOperationAction(ISD::SMAX, MVT::v4i32, Legal); setOperationAction(ISD::UMAX, MVT::v8i16, Legal); setOperationAction(ISD::UMAX, MVT::v4i32, Legal); setOperationAction(ISD::SMIN, MVT::v16i8, Legal); setOperationAction(ISD::SMIN, MVT::v4i32, Legal); setOperationAction(ISD::UMIN, MVT::v8i16, Legal); setOperationAction(ISD::UMIN, MVT::v4i32, Legal); // FIXME: Do we need to handle scalar-to-vector here? setOperationAction(ISD::MUL, MVT::v4i32, Legal); // We directly match byte blends in the backend as they match the VSELECT // condition form. setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); // SSE41 brings specific instructions for doing vector sign extend even in // cases where we don't have SRA. for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal); setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal); } if (!ExperimentalVectorWideningLegalization) { // Avoid narrow result types when widening. The legal types are listed // in the next loop. for (MVT VT : MVT::integer_vector_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom); setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom); setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom); } } // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) { setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal); setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal); if (!ExperimentalVectorWideningLegalization) setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8, Legal); setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal); setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal); setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal); setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal); } // i8 vectors are custom because the source register and source // source memory operand types are not the same width. setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom); } if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) { for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) setOperationAction(ISD::ROTL, VT, Custom); // XOP can efficiently perform BITREVERSE with VPPERM. for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) setOperationAction(ISD::BITREVERSE, VT, Custom); for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) setOperationAction(ISD::BITREVERSE, VT, Custom); } if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) { bool HasInt256 = Subtarget.hasInt256(); addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass); addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass); addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass); addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass); addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass); addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass); for (auto VT : { MVT::v8f32, MVT::v4f64 }) { setOperationAction(ISD::FFLOOR, VT, Legal); setOperationAction(ISD::FCEIL, VT, Legal); setOperationAction(ISD::FTRUNC, VT, Legal); setOperationAction(ISD::FRINT, VT, Legal); setOperationAction(ISD::FNEARBYINT, VT, Legal); setOperationAction(ISD::FNEG, VT, Custom); setOperationAction(ISD::FABS, VT, Custom); setOperationAction(ISD::FCOPYSIGN, VT, Custom); } // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted // even though v8i16 is a legal type. setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32); setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32); setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal); if (!Subtarget.hasAVX512()) setOperationAction(ISD::BITCAST, MVT::v32i1, Custom); for (MVT VT : MVT::fp_vector_valuetypes()) setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal); // In the customized shift lowering, the legal v8i32/v4i64 cases // in AVX2 will be recognized. for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { setOperationAction(ISD::SRL, VT, Custom); setOperationAction(ISD::SHL, VT, Custom); setOperationAction(ISD::SRA, VT, Custom); } if (ExperimentalVectorWideningLegalization) { // These types need custom splitting if their input is a 128-bit vector. setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); } setOperationAction(ISD::ROTL, MVT::v8i32, Custom); setOperationAction(ISD::ROTL, MVT::v16i16, Custom); // With BWI, expanding (and promoting the shifts) is the better. if (!Subtarget.hasBWI()) setOperationAction(ISD::ROTL, MVT::v32i8, Custom); setOperationAction(ISD::SELECT, MVT::v4f64, Custom); setOperationAction(ISD::SELECT, MVT::v4i64, Custom); setOperationAction(ISD::SELECT, MVT::v8i32, Custom); setOperationAction(ISD::SELECT, MVT::v16i16, Custom); setOperationAction(ISD::SELECT, MVT::v32i8, Custom); setOperationAction(ISD::SELECT, MVT::v8f32, Custom); for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { setOperationAction(ISD::SIGN_EXTEND, VT, Custom); setOperationAction(ISD::ZERO_EXTEND, VT, Custom); setOperationAction(ISD::ANY_EXTEND, VT, Custom); } setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom); setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom); setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom); setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom); for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { setOperationAction(ISD::SETCC, VT, Custom); setOperationAction(ISD::CTPOP, VT, Custom); setOperationAction(ISD::CTLZ, VT, Custom); // TODO - remove this once 256-bit X86ISD::ANDNP correctly split. setOperationAction(ISD::CTTZ, VT, HasInt256 ? Expand : Custom); // The condition codes aren't legal in SSE/AVX and under AVX512 we use // setcc all the way to isel and prefer SETGT in some isel patterns. setCondCodeAction(ISD::SETLT, VT, Custom); setCondCodeAction(ISD::SETLE, VT, Custom); } if (Subtarget.hasAnyFMA()) { for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) setOperationAction(ISD::FMA, VT, Legal); } for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom); setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom); } setOperationAction(ISD::MUL, MVT::v4i64, Custom); setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom); setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom); setOperationAction(ISD::MUL, MVT::v32i8, Custom); setOperationAction(ISD::MULHU, MVT::v8i32, Custom); setOperationAction(ISD::MULHS, MVT::v8i32, Custom); setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom); setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom); setOperationAction(ISD::MULHU, MVT::v32i8, Custom); setOperationAction(ISD::MULHS, MVT::v32i8, Custom); setOperationAction(ISD::ABS, MVT::v4i64, Custom); setOperationAction(ISD::SMAX, MVT::v4i64, Custom); setOperationAction(ISD::UMAX, MVT::v4i64, Custom); setOperationAction(ISD::SMIN, MVT::v4i64, Custom); setOperationAction(ISD::UMIN, MVT::v4i64, Custom); setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom); setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom); setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom); setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom); setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom); setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom); setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom); setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom); for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) { setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom); setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom); setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom); setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom); setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom); } for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) { setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom); setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom); } if (HasInt256) { // The custom lowering for UINT_TO_FP for v8i32 becomes interesting // when we have a 256bit-wide blend with immediate. setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom); // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) { setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal); setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal); setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal); setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal); setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal); setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal); } } for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) { setOperationAction(ISD::MLOAD, VT, Legal); setOperationAction(ISD::MSTORE, VT, Legal); } // Extract subvector is special because the value type // (result) is 128-bit but the source is 256-bit wide. for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64 }) { setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); } // Custom lower several nodes for 256-bit types. for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64 }) { setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::VSELECT, VT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal); setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); } if (HasInt256) setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); if (HasInt256) { // Custom legalize 2x32 to get a little better code. setOperationAction(ISD::MGATHER, MVT::v2f32, Custom); setOperationAction(ISD::MGATHER, MVT::v2i32, Custom); for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) setOperationAction(ISD::MGATHER, VT, Custom); } } // This block controls legalization of the mask vector sizes that are // available with AVX512. 512-bit vectors are in a separate block controlled // by useAVX512Regs. if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) { addRegisterClass(MVT::v1i1, &X86::VK1RegClass); addRegisterClass(MVT::v2i1, &X86::VK2RegClass); addRegisterClass(MVT::v4i1, &X86::VK4RegClass); addRegisterClass(MVT::v8i1, &X86::VK8RegClass); addRegisterClass(MVT::v16i1, &X86::VK16RegClass); setOperationAction(ISD::SELECT, MVT::v1i1, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom); setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32); setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32); setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32); setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32); setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom); // There is no byte sized k-register load or store without AVX512DQ. if (!Subtarget.hasDQI()) { setOperationAction(ISD::LOAD, MVT::v1i1, Custom); setOperationAction(ISD::LOAD, MVT::v2i1, Custom); setOperationAction(ISD::LOAD, MVT::v4i1, Custom); setOperationAction(ISD::LOAD, MVT::v8i1, Custom); setOperationAction(ISD::STORE, MVT::v1i1, Custom); setOperationAction(ISD::STORE, MVT::v2i1, Custom); setOperationAction(ISD::STORE, MVT::v4i1, Custom); setOperationAction(ISD::STORE, MVT::v8i1, Custom); } // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors. for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { setOperationAction(ISD::SIGN_EXTEND, VT, Custom); setOperationAction(ISD::ZERO_EXTEND, VT, Custom); setOperationAction(ISD::ANY_EXTEND, VT, Custom); } for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) { setOperationAction(ISD::ADD, VT, Custom); setOperationAction(ISD::SUB, VT, Custom); setOperationAction(ISD::MUL, VT, Custom); setOperationAction(ISD::SETCC, VT, Custom); setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::TRUNCATE, VT, Custom); setOperationAction(ISD::UADDSAT, VT, Custom); setOperationAction(ISD::SADDSAT, VT, Custom); setOperationAction(ISD::USUBSAT, VT, Custom); setOperationAction(ISD::SSUBSAT, VT, Custom); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::VSELECT, VT, Expand); } setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v2i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom); for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 }) setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); } // This block controls legalization for 512-bit operations with 32/64 bit // elements. 512-bits can be disabled based on prefer-vector-width and // required-vector-width function attributes. if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) { addRegisterClass(MVT::v16i32, &X86::VR512RegClass); addRegisterClass(MVT::v16f32, &X86::VR512RegClass); addRegisterClass(MVT::v8i64, &X86::VR512RegClass); addRegisterClass(MVT::v8f64, &X86::VR512RegClass); for (MVT VT : MVT::fp_vector_valuetypes()) setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal); for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) { setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal); setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal); setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal); setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal); setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal); } for (MVT VT : { MVT::v16f32, MVT::v8f64 }) { setOperationAction(ISD::FNEG, VT, Custom); setOperationAction(ISD::FABS, VT, Custom); setOperationAction(ISD::FMA, VT, Legal); setOperationAction(ISD::FCOPYSIGN, VT, Custom); } setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal); setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i16, MVT::v16i32); setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i8, MVT::v16i32); setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i1, MVT::v16i32); setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal); setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i1, MVT::v16i32); setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i8, MVT::v16i32); setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i16, MVT::v16i32); setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal); setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal); setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal); setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal); setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal); setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal); if (!Subtarget.hasVLX()) { // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE // to 512-bit rather than use the AVX2 instructions so that we can use // k-masks. for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) { setOperationAction(ISD::MLOAD, VT, Custom); setOperationAction(ISD::MSTORE, VT, Custom); } } setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom); setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom); setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); if (ExperimentalVectorWideningLegalization) { // Need to custom widen this if we don't have AVX512BW. setOperationAction(ISD::ANY_EXTEND, MVT::v8i8, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v8i8, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v8i8, Custom); } for (auto VT : { MVT::v16f32, MVT::v8f64 }) { setOperationAction(ISD::FFLOOR, VT, Legal); setOperationAction(ISD::FCEIL, VT, Legal); setOperationAction(ISD::FTRUNC, VT, Legal); setOperationAction(ISD::FRINT, VT, Legal); setOperationAction(ISD::FNEARBYINT, VT, Legal); } // Without BWI we need to use custom lowering to handle MVT::v64i8 input. for (auto VT : {MVT::v16i32, MVT::v8i64, MVT::v64i8}) { setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom); setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom); } setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom); setOperationAction(ISD::MUL, MVT::v8i64, Custom); setOperationAction(ISD::MUL, MVT::v16i32, Legal); setOperationAction(ISD::MULHU, MVT::v16i32, Custom); setOperationAction(ISD::MULHS, MVT::v16i32, Custom); setOperationAction(ISD::SELECT, MVT::v8f64, Custom); setOperationAction(ISD::SELECT, MVT::v8i64, Custom); setOperationAction(ISD::SELECT, MVT::v16i32, Custom); setOperationAction(ISD::SELECT, MVT::v32i16, Custom); setOperationAction(ISD::SELECT, MVT::v64i8, Custom); setOperationAction(ISD::SELECT, MVT::v16f32, Custom); for (auto VT : { MVT::v16i32, MVT::v8i64 }) { setOperationAction(ISD::SMAX, VT, Legal); setOperationAction(ISD::UMAX, VT, Legal); setOperationAction(ISD::SMIN, VT, Legal); setOperationAction(ISD::UMIN, VT, Legal); setOperationAction(ISD::ABS, VT, Legal); setOperationAction(ISD::SRL, VT, Custom); setOperationAction(ISD::SHL, VT, Custom); setOperationAction(ISD::SRA, VT, Custom); setOperationAction(ISD::CTPOP, VT, Custom); setOperationAction(ISD::ROTL, VT, Custom); setOperationAction(ISD::ROTR, VT, Custom); setOperationAction(ISD::SETCC, VT, Custom); // The condition codes aren't legal in SSE/AVX and under AVX512 we use // setcc all the way to isel and prefer SETGT in some isel patterns. setCondCodeAction(ISD::SETLT, VT, Custom); setCondCodeAction(ISD::SETLE, VT, Custom); } if (Subtarget.hasDQI()) { setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal); setOperationAction(ISD::MUL, MVT::v8i64, Legal); } if (Subtarget.hasCDI()) { // NonVLX sub-targets extend 128/256 vectors to use the 512 version. for (auto VT : { MVT::v16i32, MVT::v8i64} ) { setOperationAction(ISD::CTLZ, VT, Legal); } } // Subtarget.hasCDI() if (Subtarget.hasVPOPCNTDQ()) { for (auto VT : { MVT::v16i32, MVT::v8i64 }) setOperationAction(ISD::CTPOP, VT, Legal); } // Extract subvector is special because the value type // (result) is 256-bit but the source is 512-bit wide. // 128-bit was made Legal under AVX1. for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64 }) setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) { setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VSELECT, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal); setOperationAction(ISD::MLOAD, VT, Legal); setOperationAction(ISD::MSTORE, VT, Legal); setOperationAction(ISD::MGATHER, VT, Custom); setOperationAction(ISD::MSCATTER, VT, Custom); } // Need to custom split v32i16/v64i8 bitcasts. if (!Subtarget.hasBWI()) { setOperationAction(ISD::BITCAST, MVT::v32i16, Custom); setOperationAction(ISD::BITCAST, MVT::v64i8, Custom); } if (Subtarget.hasVBMI2()) { for (auto VT : { MVT::v16i32, MVT::v8i64 }) { setOperationAction(ISD::FSHL, VT, Custom); setOperationAction(ISD::FSHR, VT, Custom); } } }// has AVX-512 // This block controls legalization for operations that don't have // pre-AVX512 equivalents. Without VLX we use 512-bit operations for // narrower widths. if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) { // These operations are handled on non-VLX by artificially widening in // isel patterns. // TODO: Custom widen in lowering on non-VLX and drop the isel patterns? setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); for (auto VT : { MVT::v2i64, MVT::v4i64 }) { setOperationAction(ISD::SMAX, VT, Legal); setOperationAction(ISD::UMAX, VT, Legal); setOperationAction(ISD::SMIN, VT, Legal); setOperationAction(ISD::UMIN, VT, Legal); setOperationAction(ISD::ABS, VT, Legal); } for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) { setOperationAction(ISD::ROTL, VT, Custom); setOperationAction(ISD::ROTR, VT, Custom); } // Custom legalize 2x32 to get a little better code. setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom); setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom); for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) setOperationAction(ISD::MSCATTER, VT, Custom); if (Subtarget.hasDQI()) { for (auto VT : { MVT::v2i64, MVT::v4i64 }) { setOperationAction(ISD::SINT_TO_FP, VT, Legal); setOperationAction(ISD::UINT_TO_FP, VT, Legal); setOperationAction(ISD::FP_TO_SINT, VT, Legal); setOperationAction(ISD::FP_TO_UINT, VT, Legal); setOperationAction(ISD::MUL, VT, Legal); } } if (Subtarget.hasCDI()) { for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) { setOperationAction(ISD::CTLZ, VT, Legal); } } // Subtarget.hasCDI() if (Subtarget.hasVPOPCNTDQ()) { for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) setOperationAction(ISD::CTPOP, VT, Legal); } } // This block control legalization of v32i1/v64i1 which are available with // AVX512BW. 512-bit v32i16 and v64i8 vector legalization is controlled with // useBWIRegs. if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) { addRegisterClass(MVT::v32i1, &X86::VK32RegClass); addRegisterClass(MVT::v64i1, &X86::VK64RegClass); for (auto VT : { MVT::v32i1, MVT::v64i1 }) { setOperationAction(ISD::ADD, VT, Custom); setOperationAction(ISD::SUB, VT, Custom); setOperationAction(ISD::MUL, VT, Custom); setOperationAction(ISD::VSELECT, VT, Expand); setOperationAction(ISD::UADDSAT, VT, Custom); setOperationAction(ISD::SADDSAT, VT, Custom); setOperationAction(ISD::USUBSAT, VT, Custom); setOperationAction(ISD::SSUBSAT, VT, Custom); setOperationAction(ISD::TRUNCATE, VT, Custom); setOperationAction(ISD::SETCC, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); } setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom); for (auto VT : { MVT::v16i1, MVT::v32i1 }) setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); // Extends from v32i1 masks to 256-bit vectors. setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom); setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom); } // This block controls legalization for v32i16 and v64i8. 512-bits can be // disabled based on prefer-vector-width and required-vector-width function // attributes. if (!Subtarget.useSoftFloat() && Subtarget.useBWIRegs()) { addRegisterClass(MVT::v32i16, &X86::VR512RegClass); addRegisterClass(MVT::v64i8, &X86::VR512RegClass); // Extends from v64i1 masks to 512-bit vectors. setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom); setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom); setOperationAction(ISD::MUL, MVT::v32i16, Legal); setOperationAction(ISD::MUL, MVT::v64i8, Custom); setOperationAction(ISD::MULHS, MVT::v32i16, Legal); setOperationAction(ISD::MULHU, MVT::v32i16, Legal); setOperationAction(ISD::MULHS, MVT::v64i8, Custom); setOperationAction(ISD::MULHU, MVT::v64i8, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Legal); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i16, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v64i8, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom); setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom); setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom); setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom); setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v32i16, Custom); setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v32i16, Custom); setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal); for (auto VT : { MVT::v64i8, MVT::v32i16 }) { setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VSELECT, VT, Custom); setOperationAction(ISD::ABS, VT, Legal); setOperationAction(ISD::SRL, VT, Custom); setOperationAction(ISD::SHL, VT, Custom); setOperationAction(ISD::SRA, VT, Custom); setOperationAction(ISD::MLOAD, VT, Legal); setOperationAction(ISD::MSTORE, VT, Legal); setOperationAction(ISD::CTPOP, VT, Custom); setOperationAction(ISD::CTLZ, VT, Custom); setOperationAction(ISD::SMAX, VT, Legal); setOperationAction(ISD::UMAX, VT, Legal); setOperationAction(ISD::SMIN, VT, Legal); setOperationAction(ISD::UMIN, VT, Legal); setOperationAction(ISD::SETCC, VT, Custom); setOperationAction(ISD::UADDSAT, VT, Legal); setOperationAction(ISD::SADDSAT, VT, Legal); setOperationAction(ISD::USUBSAT, VT, Legal); setOperationAction(ISD::SSUBSAT, VT, Legal); // The condition codes aren't legal in SSE/AVX and under AVX512 we use // setcc all the way to isel and prefer SETGT in some isel patterns. setCondCodeAction(ISD::SETLT, VT, Custom); setCondCodeAction(ISD::SETLE, VT, Custom); } for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) { setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal); } if (Subtarget.hasBITALG()) { for (auto VT : { MVT::v64i8, MVT::v32i16 }) setOperationAction(ISD::CTPOP, VT, Legal); } if (Subtarget.hasVBMI2()) { setOperationAction(ISD::FSHL, MVT::v32i16, Custom); setOperationAction(ISD::FSHR, MVT::v32i16, Custom); } } if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) { for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) { setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom); setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom); } // These operations are handled on non-VLX by artificially widening in // isel patterns. // TODO: Custom widen in lowering on non-VLX and drop the isel patterns? if (Subtarget.hasBITALG()) { for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 }) setOperationAction(ISD::CTPOP, VT, Legal); } } if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) { setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal); setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal); setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal); setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal); setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal); setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal); setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal); setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal); setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal); setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal); if (Subtarget.hasDQI()) { // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion. // v2f32 UINT_TO_FP is already custom under SSE2. setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom); assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && "Unexpected operation action!"); // v2i64 FP_TO_S/UINT(v2f32) custom conversion. setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom); } if (Subtarget.hasBWI()) { setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal); setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal); } if (Subtarget.hasVBMI2()) { // TODO: Make these legal even without VLX? for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { setOperationAction(ISD::FSHL, VT, Custom); setOperationAction(ISD::FSHR, VT, Custom); } } } // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); if (!Subtarget.is64Bit()) { setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); } // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't // handle type legalization for these operations here. // // FIXME: We really should do custom legalization for addition and // subtraction on x86-32 once PR3203 is fixed. We really can't do much better // than generic legalization for 64-bit multiplication-with-overflow, though. for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { if (VT == MVT::i64 && !Subtarget.is64Bit()) continue; // Add/Sub/Mul with overflow operations are custom lowered. setOperationAction(ISD::SADDO, VT, Custom); setOperationAction(ISD::UADDO, VT, Custom); setOperationAction(ISD::SSUBO, VT, Custom); setOperationAction(ISD::USUBO, VT, Custom); setOperationAction(ISD::SMULO, VT, Custom); setOperationAction(ISD::UMULO, VT, Custom); // Support carry in as value rather than glue. setOperationAction(ISD::ADDCARRY, VT, Custom); setOperationAction(ISD::SUBCARRY, VT, Custom); setOperationAction(ISD::SETCCCARRY, VT, Custom); } if (!Subtarget.is64Bit()) { // These libcalls are not available in 32-bit. setLibcallName(RTLIB::SHL_I128, nullptr); setLibcallName(RTLIB::SRL_I128, nullptr); setLibcallName(RTLIB::SRA_I128, nullptr); setLibcallName(RTLIB::MUL_I128, nullptr); } // Combine sin / cos into _sincos_stret if it is available. if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr && getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) { setOperationAction(ISD::FSINCOS, MVT::f64, Custom); setOperationAction(ISD::FSINCOS, MVT::f32, Custom); } if (Subtarget.isTargetWin64()) { setOperationAction(ISD::SDIV, MVT::i128, Custom); setOperationAction(ISD::UDIV, MVT::i128, Custom); setOperationAction(ISD::SREM, MVT::i128, Custom); setOperationAction(ISD::UREM, MVT::i128, Custom); setOperationAction(ISD::SDIVREM, MVT::i128, Custom); setOperationAction(ISD::UDIVREM, MVT::i128, Custom); } // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)` // is. We should promote the value to 64-bits to solve this. // This is what the CRT headers do - `fmodf` is an inline header // function casting to f64 and calling `fmod`. if (Subtarget.is32Bit() && (Subtarget.isTargetKnownWindowsMSVC() || Subtarget.isTargetWindowsItanium())) for (ISD::NodeType Op : {ISD::FCEIL, ISD::FCOS, ISD::FEXP, ISD::FFLOOR, ISD::FREM, ISD::FLOG, ISD::FLOG10, ISD::FPOW, ISD::FSIN}) if (isOperationExpand(Op, MVT::f32)) setOperationAction(Op, MVT::f32, Promote); // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::SCALAR_TO_VECTOR); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); setTargetDAGCombine(ISD::INSERT_SUBVECTOR); setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR); setTargetDAGCombine(ISD::BITCAST); setTargetDAGCombine(ISD::VSELECT); setTargetDAGCombine(ISD::SELECT); setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::SRA); setTargetDAGCombine(ISD::SRL); setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::ADD); setTargetDAGCombine(ISD::FADD); setTargetDAGCombine(ISD::FSUB); setTargetDAGCombine(ISD::FNEG); setTargetDAGCombine(ISD::FMA); setTargetDAGCombine(ISD::FMINNUM); setTargetDAGCombine(ISD::FMAXNUM); setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::LOAD); setTargetDAGCombine(ISD::MLOAD); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::MSTORE); setTargetDAGCombine(ISD::TRUNCATE); setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::ANY_EXTEND); setTargetDAGCombine(ISD::SIGN_EXTEND); setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::UINT_TO_FP); setTargetDAGCombine(ISD::SETCC); setTargetDAGCombine(ISD::MUL); setTargetDAGCombine(ISD::XOR); setTargetDAGCombine(ISD::MSCATTER); setTargetDAGCombine(ISD::MGATHER); computeRegisterProperties(Subtarget.getRegisterInfo()); MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores MaxStoresPerMemsetOptSize = 8; MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores MaxStoresPerMemcpyOptSize = 4; MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores MaxStoresPerMemmoveOptSize = 4; // TODO: These control memcmp expansion in CGP and could be raised higher, but // that needs to benchmarked and balanced with the potential use of vector // load/store types (PR33329, PR33914). MaxLoadsPerMemcmp = 2; MaxLoadsPerMemcmpOptSize = 2; // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4). setPrefLoopAlignment(ExperimentalPrefLoopAlignment); // An out-of-order CPU can speculatively execute past a predictable branch, // but a conditional move could be stalled by an expensive earlier operation. PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder(); EnableExtLdPromotion = true; setPrefFunctionAlignment(4); // 2^4 bytes. verifyIntrinsicTables(); } // This has so far only been implemented for 64-bit MachO. bool X86TargetLowering::useLoadStackGuardNode() const { return Subtarget.isTargetMachO() && Subtarget.is64Bit(); } bool X86TargetLowering::useStackGuardXorFP() const { // Currently only MSVC CRTs XOR the frame pointer into the stack guard value. return Subtarget.getTargetTriple().isOSMSVCRT(); } SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, const SDLoc &DL) const { EVT PtrTy = getPointerTy(DAG.getDataLayout()); unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP; MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val); return SDValue(Node, 0); } TargetLoweringBase::LegalizeTypeAction X86TargetLowering::getPreferredVectorAction(MVT VT) const { if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI()) return TypeSplitVector; if (ExperimentalVectorWideningLegalization && VT.getVectorNumElements() != 1 && VT.getVectorElementType() != MVT::i1) return TypeWidenVector; return TargetLoweringBase::getPreferredVectorAction(VT); } MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const { if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI()) return MVT::v32i8; return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); } unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const { if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI()) return 1; return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); } EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext& Context, EVT VT) const { if (!VT.isVector()) return MVT::i8; if (Subtarget.hasAVX512()) { const unsigned NumElts = VT.getVectorNumElements(); // Figure out what this type will be legalized to. EVT LegalVT = VT; while (getTypeAction(Context, LegalVT) != TypeLegal) LegalVT = getTypeToTransformTo(Context, LegalVT); // If we got a 512-bit vector then we'll definitely have a vXi1 compare. if (LegalVT.getSimpleVT().is512BitVector()) return EVT::getVectorVT(Context, MVT::i1, NumElts); if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) { // If we legalized to less than a 512-bit vector, then we will use a vXi1 // compare for vXi32/vXi64 for sure. If we have BWI we will also support // vXi16/vXi8. MVT EltVT = LegalVT.getSimpleVT().getVectorElementType(); if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32) return EVT::getVectorVT(Context, MVT::i1, NumElts); } } return VT.changeVectorElementTypeToInteger(); } /// Helper for getByValTypeAlignment to determine /// the desired ByVal argument alignment. static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) { if (MaxAlign == 16) return; if (VectorType *VTy = dyn_cast(Ty)) { if (VTy->getBitWidth() == 128) MaxAlign = 16; } else if (ArrayType *ATy = dyn_cast(Ty)) { unsigned EltAlign = 0; getMaxByValAlign(ATy->getElementType(), EltAlign); if (EltAlign > MaxAlign) MaxAlign = EltAlign; } else if (StructType *STy = dyn_cast(Ty)) { for (auto *EltTy : STy->elements()) { unsigned EltAlign = 0; getMaxByValAlign(EltTy, EltAlign); if (EltAlign > MaxAlign) MaxAlign = EltAlign; if (MaxAlign == 16) break; } } } /// Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. For X86, aggregates /// that contain SSE vectors are placed at 16-byte boundaries while the rest /// are at 4-byte boundaries. unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty, const DataLayout &DL) const { if (Subtarget.is64Bit()) { // Max of 8 and alignment of type. unsigned TyAlign = DL.getABITypeAlignment(Ty); if (TyAlign > 8) return TyAlign; return 8; } unsigned Align = 4; if (Subtarget.hasSSE1()) getMaxByValAlign(Ty, Align); return Align; } /// Returns the target specific optimal type for load /// and store operations as a result of memset, memcpy, and memmove /// lowering. If DstAlign is zero that means it's safe to destination /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, /// probably because the source does not need to be loaded. If 'IsMemset' is /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy /// source is constant so it does not need to be loaded. /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. EVT X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { const Function &F = MF.getFunction(); if (!F.hasFnAttribute(Attribute::NoImplicitFloat)) { if (Size >= 16 && (!Subtarget.isUnalignedMem16Slow() || ((DstAlign == 0 || DstAlign >= 16) && (SrcAlign == 0 || SrcAlign >= 16)))) { // FIXME: Check if unaligned 32-byte accesses are slow. if (Size >= 32 && Subtarget.hasAVX()) { // Although this isn't a well-supported type for AVX1, we'll let // legalization and shuffle lowering produce the optimal codegen. If we // choose an optimal type with a vector element larger than a byte, // getMemsetStores() may create an intermediate splat (using an integer // multiply) before we splat as a vector. return MVT::v32i8; } if (Subtarget.hasSSE2()) return MVT::v16i8; // TODO: Can SSE1 handle a byte vector? // If we have SSE1 registers we should be able to use them. if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87())) return MVT::v4f32; } else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) { // Do not use f64 to lower memcpy if source is string constant. It's // better to use i32 to avoid the loads. // Also, do not use f64 to lower memset unless this is a memset of zeros. // The gymnastics of splatting a byte value into an XMM register and then // only using 8-byte stores (because this is a CPU with slow unaligned // 16-byte accesses) makes that a loser. return MVT::f64; } } // This is a compromise. If we reach here, unaligned accesses may be slow on // this target. However, creating smaller, aligned accesses could be even // slower and would certainly be a lot more code. if (Subtarget.is64Bit() && Size >= 8) return MVT::i64; return MVT::i32; } bool X86TargetLowering::isSafeMemOpType(MVT VT) const { if (VT == MVT::f32) return X86ScalarSSEf32; else if (VT == MVT::f64) return X86ScalarSSEf64; return true; } bool X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, unsigned, bool *Fast) const { if (Fast) { switch (VT.getSizeInBits()) { default: // 8-byte and under are always assumed to be fast. *Fast = true; break; case 128: *Fast = !Subtarget.isUnalignedMem16Slow(); break; case 256: *Fast = !Subtarget.isUnalignedMem32Slow(); break; // TODO: What about AVX-512 (512-bit) accesses? } } // Misaligned accesses of any size are always allowed. return true; } /// Return the entry encoding for a jump table in the /// current function. The returned value is a member of the /// MachineJumpTableInfo::JTEntryKind enum. unsigned X86TargetLowering::getJumpTableEncoding() const { // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF // symbol. if (isPositionIndependent() && Subtarget.isPICStyleGOT()) return MachineJumpTableInfo::EK_Custom32; // Otherwise, use the normal jump table encoding heuristics. return TargetLowering::getJumpTableEncoding(); } bool X86TargetLowering::useSoftFloat() const { return Subtarget.useSoftFloat(); } void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC, ArgListTy &Args) const { // Only relabel X86-32 for C / Stdcall CCs. if (Subtarget.is64Bit()) return; if (CC != CallingConv::C && CC != CallingConv::X86_StdCall) return; unsigned ParamRegs = 0; if (auto *M = MF->getFunction().getParent()) ParamRegs = M->getNumberRegisterParameters(); // Mark the first N int arguments as having reg for (unsigned Idx = 0; Idx < Args.size(); Idx++) { Type *T = Args[Idx].Ty; if (T->isIntOrPtrTy()) if (MF->getDataLayout().getTypeAllocSize(T) <= 8) { unsigned numRegs = 1; if (MF->getDataLayout().getTypeAllocSize(T) > 4) numRegs = 2; if (ParamRegs < numRegs) return; ParamRegs -= numRegs; Args[Idx].IsInReg = true; } } } const MCExpr * X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid,MCContext &Ctx) const{ assert(isPositionIndependent() && Subtarget.isPICStyleGOT()); // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF // entries. return MCSymbolRefExpr::create(MBB->getSymbol(), MCSymbolRefExpr::VK_GOTOFF, Ctx); } /// Returns relocation base for the given PIC jumptable. SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const { if (!Subtarget.is64Bit()) // This doesn't have SDLoc associated with it, but is not really the // same as a Register. return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy(DAG.getDataLayout())); return Table; } /// This returns the relocation base for the given PIC jumptable, /// the same as getPICJumpTableRelocBase, but as an MCExpr. const MCExpr *X86TargetLowering:: getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const { // X86-64 uses RIP relative addressing based on the jump table label. if (Subtarget.isPICStyleRIPRel()) return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); // Otherwise, the reference is relative to the PIC base. return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx); } std::pair X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const { const TargetRegisterClass *RRC = nullptr; uint8_t Cost = 1; switch (VT.SimpleTy) { default: return TargetLowering::findRepresentativeClass(TRI, VT); case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64: RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass; break; case MVT::x86mmx: RRC = &X86::VR64RegClass; break; case MVT::f32: case MVT::f64: case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: case MVT::v4f32: case MVT::v2f64: case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32: case MVT::v4f64: case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64: case MVT::v16f32: case MVT::v8f64: RRC = &X86::VR128XRegClass; break; } return std::make_pair(RRC, Cost); } unsigned X86TargetLowering::getAddressSpace() const { if (Subtarget.is64Bit()) return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257; return 256; } static bool hasStackGuardSlotTLS(const Triple &TargetTriple) { return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() || (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17)); } static Constant* SegmentOffset(IRBuilder<> &IRB, unsigned Offset, unsigned AddressSpace) { return ConstantExpr::getIntToPtr( ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset), Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace)); } Value *X86TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const { // glibc, bionic, and Fuchsia have a special slot for the stack guard in // tcbhead_t; use it instead of the usual global variable (see // sysdeps/{i386,x86_64}/nptl/tls.h) if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) { if (Subtarget.isTargetFuchsia()) { // defines ZX_TLS_STACK_GUARD_OFFSET with this value. return SegmentOffset(IRB, 0x10, getAddressSpace()); } else { // %fs:0x28, unless we're using a Kernel code model, in which case // it's %gs:0x28. gs:0x14 on i386. unsigned Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14; return SegmentOffset(IRB, Offset, getAddressSpace()); } } return TargetLowering::getIRStackGuard(IRB); } void X86TargetLowering::insertSSPDeclarations(Module &M) const { // MSVC CRT provides functionalities for stack protection. if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() || Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) { // MSVC CRT has a global variable holding security cookie. M.getOrInsertGlobal("__security_cookie", Type::getInt8PtrTy(M.getContext())); // MSVC CRT has a function to validate security cookie. auto *SecurityCheckCookie = cast( M.getOrInsertFunction("__security_check_cookie", Type::getVoidTy(M.getContext()), Type::getInt8PtrTy(M.getContext()))); SecurityCheckCookie->setCallingConv(CallingConv::X86_FastCall); SecurityCheckCookie->addAttribute(1, Attribute::AttrKind::InReg); return; } // glibc, bionic, and Fuchsia have a special slot for the stack guard. if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) return; TargetLowering::insertSSPDeclarations(M); } Value *X86TargetLowering::getSDagStackGuard(const Module &M) const { // MSVC CRT has a global variable holding security cookie. if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() || Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) { return M.getGlobalVariable("__security_cookie"); } return TargetLowering::getSDagStackGuard(M); } Value *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const { // MSVC CRT has a function to validate security cookie. if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() || Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) { return M.getFunction("__security_check_cookie"); } return TargetLowering::getSSPStackGuardCheck(M); } Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const { if (Subtarget.getTargetTriple().isOSContiki()) return getDefaultSafeStackPointerLocation(IRB, false); // Android provides a fixed TLS slot for the SafeStack pointer. See the // definition of TLS_SLOT_SAFESTACK in // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h if (Subtarget.isTargetAndroid()) { // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs: // %gs:0x24 on i386 unsigned Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24; return SegmentOffset(IRB, Offset, getAddressSpace()); } // Fuchsia is similar. if (Subtarget.isTargetFuchsia()) { // defines ZX_TLS_UNSAFE_SP_OFFSET with this value. return SegmentOffset(IRB, 0x18, getAddressSpace()); } return TargetLowering::getSafeStackPointerLocation(IRB); } bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { assert(SrcAS != DestAS && "Expected different address spaces!"); return SrcAS < 256 && DestAS < 256; } //===----------------------------------------------------------------------===// // Return Value Calling Convention Implementation //===----------------------------------------------------------------------===// #include "X86GenCallingConv.inc" bool X86TargetLowering::CanLowerReturn( CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl &Outs, LLVMContext &Context) const { SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); return CCInfo.CheckReturn(Outs, RetCC_X86); } const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const { static const MCPhysReg ScratchRegs[] = { X86::R11, 0 }; return ScratchRegs; } /// Lowers masks values (v*i1) to the local register values /// \returns DAG node after lowering to register type static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc, const SDLoc &Dl, SelectionDAG &DAG) { EVT ValVT = ValArg.getValueType(); if (ValVT == MVT::v1i1) return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg, DAG.getIntPtrConstant(0, Dl)); if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) || (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) { // Two stage lowering might be required // bitcast: v8i1 -> i8 / v16i1 -> i16 // anyextend: i8 -> i32 / i16 -> i32 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16; SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg); if (ValLoc == MVT::i32) ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy); return ValToCopy; } if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) || (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) { // One stage lowering is required // bitcast: v32i1 -> i32 / v64i1 -> i64 return DAG.getBitcast(ValLoc, ValArg); } return DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValArg); } /// Breaks v64i1 value into two registers and adds the new node to the DAG static void Passv64i1ArgInRegs( const SDLoc &Dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg, SmallVector, 8> &RegsToPass, CCValAssign &VA, CCValAssign &NextVA, const X86Subtarget &Subtarget) { assert(Subtarget.hasBWI() && "Expected AVX512BW target!"); assert(Subtarget.is32Bit() && "Expecting 32 bit target"); assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value"); assert(VA.isRegLoc() && NextVA.isRegLoc() && "The value should reside in two registers"); // Before splitting the value we cast it to i64 Arg = DAG.getBitcast(MVT::i64, Arg); // Splitting the value into two i32 types SDValue Lo, Hi; Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg, DAG.getConstant(0, Dl, MVT::i32)); Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg, DAG.getConstant(1, Dl, MVT::i32)); // Attach the two i32 types into corresponding registers RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo)); RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi)); } SDValue X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &dl, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); X86MachineFunctionInfo *FuncInfo = MF.getInfo(); // In some cases we need to disable registers from the default CSR list. // For example, when they are used for argument passing. bool ShouldDisableCalleeSavedRegister = CallConv == CallingConv::X86_RegCall || MF.getFunction().hasFnAttribute("no_caller_saved_registers"); if (CallConv == CallingConv::X86_INTR && !Outs.empty()) report_fatal_error("X86 interrupts may not return any value"); SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext()); CCInfo.AnalyzeReturn(Outs, RetCC_X86); SDValue Flag; SmallVector RetOps; RetOps.push_back(Chain); // Operand #0 = Chain (updated below) // Operand #1 = Bytes To Pop RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl, MVT::i32)); // Copy the result values into the output registers. for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E; ++I, ++OutsIndex) { CCValAssign &VA = RVLocs[I]; assert(VA.isRegLoc() && "Can only return in registers!"); // Add the register to the CalleeSaveDisableRegs list. if (ShouldDisableCalleeSavedRegister) MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg()); SDValue ValToCopy = OutVals[OutsIndex]; EVT ValVT = ValToCopy.getValueType(); // Promote values to the appropriate types. if (VA.getLocInfo() == CCValAssign::SExt) ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy); else if (VA.getLocInfo() == CCValAssign::ZExt) ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy); else if (VA.getLocInfo() == CCValAssign::AExt) { if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1) ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG); else ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy); } else if (VA.getLocInfo() == CCValAssign::BCvt) ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy); assert(VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."); // If this is x86-64, and we disabled SSE, we can't return FP values, // or SSE or MMX vectors. if ((ValVT == MVT::f32 || ValVT == MVT::f64 || VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) && (Subtarget.is64Bit() && !Subtarget.hasSSE1())) { errorUnsupported(DAG, dl, "SSE register return with SSE disabled"); VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts. } else if (ValVT == MVT::f64 && (Subtarget.is64Bit() && !Subtarget.hasSSE2())) { // Likewise we can't return F64 values with SSE1 only. gcc does so, but // llvm-gcc has never done it right and no one has noticed, so this // should be OK for now. errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled"); VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts. } // Returns in ST0/ST1 are handled specially: these are pushed as operands to // the RET instruction and handled by the FP Stackifier. if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) { // If this is a copy from an xmm register to ST(0), use an FPExtend to // change the value to the FP stack register class. if (isScalarFPTypeInSSEReg(VA.getValVT())) ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy); RetOps.push_back(ValToCopy); // Don't emit a copytoreg. continue; } // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64 // which is returned in RAX / RDX. if (Subtarget.is64Bit()) { if (ValVT == MVT::x86mmx) { if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) { ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy); ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, ValToCopy); // If we don't have SSE2 available, convert to v4f32 so the generated // register is legal. if (!Subtarget.hasSSE2()) ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy); } } } SmallVector, 8> RegsToPass; if (VA.needsCustom()) { assert(VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"); Passv64i1ArgInRegs(dl, DAG, Chain, ValToCopy, RegsToPass, VA, RVLocs[++I], Subtarget); assert(2 == RegsToPass.size() && "Expecting two registers after Pass64BitArgInRegs"); // Add the second register to the CalleeSaveDisableRegs list. if (ShouldDisableCalleeSavedRegister) MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg()); } else { RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy)); } // Add nodes to the DAG and add the values into the RetOps list for (auto &Reg : RegsToPass) { Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); } } // Swift calling convention does not require we copy the sret argument // into %rax/%eax for the return, and SRetReturnReg is not set for Swift. // All x86 ABIs require that for returning structs by value we copy // the sret argument into %rax/%eax (depending on ABI) for the return. // We saved the argument into a virtual register in the entry block, // so now we copy the value out and into %rax/%eax. // // Checking Function.hasStructRetAttr() here is insufficient because the IR // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is // false, then an sret argument may be implicitly inserted in the SelDAG. In // either case FuncInfo->setSRetReturnReg() will have been called. if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) { // When we have both sret and another return value, we should use the // original Chain stored in RetOps[0], instead of the current Chain updated // in the above loop. If we only have sret, RetOps[0] equals to Chain. // For the case of sret and another return value, we have // Chain_0 at the function entry // Chain_1 = getCopyToReg(Chain_0) in the above loop // If we use Chain_1 in getCopyFromReg, we will have // Val = getCopyFromReg(Chain_1) // Chain_2 = getCopyToReg(Chain_1, Val) from below // getCopyToReg(Chain_0) will be glued together with // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be // in Unit B, and we will have cyclic dependency between Unit A and Unit B: // Data dependency from Unit B to Unit A due to usage of Val in // getCopyToReg(Chain_1, Val) // Chain dependency from Unit A to Unit B // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg. SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg, getPointerTy(MF.getDataLayout())); unsigned RetValReg = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ? X86::RAX : X86::EAX; Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag); Flag = Chain.getValue(1); // RAX/EAX now acts like a return value. RetOps.push_back( DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout()))); // Add the returned register to the CalleeSaveDisableRegs list. if (ShouldDisableCalleeSavedRegister) MF.getRegInfo().disableCalleeSavedRegister(RetValReg); } const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); const MCPhysReg *I = TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); if (I) { for (; *I; ++I) { if (X86::GR64RegClass.contains(*I)) RetOps.push_back(DAG.getRegister(*I, MVT::i64)); else llvm_unreachable("Unexpected register class in CSRsViaCopy!"); } } RetOps[0] = Chain; // Update chain. // Add the flag if we have it. if (Flag.getNode()) RetOps.push_back(Flag); X86ISD::NodeType opcode = X86ISD::RET_FLAG; if (CallConv == CallingConv::X86_INTR) opcode = X86ISD::IRET; return DAG.getNode(opcode, dl, MVT::Other, RetOps); } bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0)) return false; SDValue TCChain = Chain; SDNode *Copy = *N->use_begin(); if (Copy->getOpcode() == ISD::CopyToReg) { // If the copy has a glue operand, we conservatively assume it isn't safe to // perform a tail call. if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) return false; TCChain = Copy->getOperand(0); } else if (Copy->getOpcode() != ISD::FP_EXTEND) return false; bool HasRet = false; for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); UI != UE; ++UI) { if (UI->getOpcode() != X86ISD::RET_FLAG) return false; // If we are returning more than one value, we can definitely // not make a tail call see PR19530 if (UI->getNumOperands() > 4) return false; if (UI->getNumOperands() == 4 && UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue) return false; HasRet = true; } if (!HasRet) return false; Chain = TCChain; return true; } EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT, ISD::NodeType ExtendKind) const { MVT ReturnMVT = MVT::i32; bool Darwin = Subtarget.getTargetTriple().isOSDarwin(); if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) { // The ABI does not require i1, i8 or i16 to be extended. // // On Darwin, there is code in the wild relying on Clang's old behaviour of // always extending i8/i16 return values, so keep doing that for now. // (PR26665). ReturnMVT = MVT::i8; } EVT MinVT = getRegisterType(Context, ReturnMVT); return VT.bitsLT(MinVT) ? MinVT : VT; } /// Reads two 32 bit registers and creates a 64 bit mask value. /// \param VA The current 32 bit value that need to be assigned. /// \param NextVA The next 32 bit value that need to be assigned. /// \param Root The parent DAG node. /// \param [in,out] InFlag Represents SDvalue in the parent DAG node for /// glue purposes. In the case the DAG is already using /// physical register instead of virtual, we should glue /// our new SDValue to InFlag SDvalue. /// \return a new SDvalue of size 64bit. static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA, SDValue &Root, SelectionDAG &DAG, const SDLoc &Dl, const X86Subtarget &Subtarget, SDValue *InFlag = nullptr) { assert((Subtarget.hasBWI()) && "Expected AVX512BW target!"); assert(Subtarget.is32Bit() && "Expecting 32 bit target"); assert(VA.getValVT() == MVT::v64i1 && "Expecting first location of 64 bit width type"); assert(NextVA.getValVT() == VA.getValVT() && "The locations should have the same type"); assert(VA.isRegLoc() && NextVA.isRegLoc() && "The values should reside in two registers"); SDValue Lo, Hi; unsigned Reg; SDValue ArgValueLo, ArgValueHi; MachineFunction &MF = DAG.getMachineFunction(); const TargetRegisterClass *RC = &X86::GR32RegClass; // Read a 32 bit value from the registers. if (nullptr == InFlag) { // When no physical register is present, // create an intermediate virtual register. Reg = MF.addLiveIn(VA.getLocReg(), RC); ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32); Reg = MF.addLiveIn(NextVA.getLocReg(), RC); ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32); } else { // When a physical register is available read the value from it and glue // the reads together. ArgValueLo = DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag); *InFlag = ArgValueLo.getValue(2); ArgValueHi = DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag); *InFlag = ArgValueHi.getValue(2); } // Convert the i32 type into v32i1 type. Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo); // Convert the i32 type into v32i1 type. Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi); // Concatenate the two values together. return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi); } /// The function will lower a register of various sizes (8/16/32/64) /// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1) /// \returns a DAG node contains the operand after lowering to mask type. static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT, const EVT &ValLoc, const SDLoc &Dl, SelectionDAG &DAG) { SDValue ValReturned = ValArg; if (ValVT == MVT::v1i1) return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned); if (ValVT == MVT::v64i1) { // In 32 bit machine, this case is handled by getv64i1Argument assert(ValLoc == MVT::i64 && "Expecting only i64 locations"); // In 64 bit machine, There is no need to truncate the value only bitcast } else { MVT maskLen; switch (ValVT.getSimpleVT().SimpleTy) { case MVT::v8i1: maskLen = MVT::i8; break; case MVT::v16i1: maskLen = MVT::i16; break; case MVT::v32i1: maskLen = MVT::i32; break; default: llvm_unreachable("Expecting a vector of i1 types"); } ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned); } return DAG.getBitcast(ValVT, ValReturned); } /// Lower the result values of a call into the /// appropriate copies out of appropriate physical registers. /// SDValue X86TargetLowering::LowerCallResult( SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals, uint32_t *RegMask) const { const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); // Assign locations to each value returned by this call. SmallVector RVLocs; bool Is64Bit = Subtarget.is64Bit(); CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeCallResult(Ins, RetCC_X86); // Copy all of the result registers out of their specified physreg. for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E; ++I, ++InsIndex) { CCValAssign &VA = RVLocs[I]; EVT CopyVT = VA.getLocVT(); // In some calling conventions we need to remove the used registers // from the register mask. if (RegMask) { for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true); SubRegs.isValid(); ++SubRegs) RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32)); } // If this is x86-64, and we disabled SSE, we can't return FP values if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) && ((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) { errorUnsupported(DAG, dl, "SSE register return with SSE disabled"); VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts. } // If we prefer to use the value in xmm registers, copy it out as f80 and // use a truncate to move it from fp stack reg to xmm reg. bool RoundAfterCopy = false; if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) && isScalarFPTypeInSSEReg(VA.getValVT())) { if (!Subtarget.hasX87()) report_fatal_error("X87 register return with X87 disabled"); CopyVT = MVT::f80; RoundAfterCopy = (CopyVT != VA.getLocVT()); } SDValue Val; if (VA.needsCustom()) { assert(VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"); Val = getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag); } else { Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag) .getValue(1); Val = Chain.getValue(0); InFlag = Chain.getValue(2); } if (RoundAfterCopy) Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val, // This truncation won't change the value. DAG.getIntPtrConstant(1, dl)); if (VA.isExtInLoc() && (VA.getValVT().getScalarType() == MVT::i1)) { if (VA.getValVT().isVector() && ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) || (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) { // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG); } else Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); } InVals.push_back(Val); } return Chain; } //===----------------------------------------------------------------------===// // C & StdCall & Fast Calling Convention implementation //===----------------------------------------------------------------------===// // StdCall calling convention seems to be standard for many Windows' API // routines and around. It differs from C calling convention just a little: // callee should clean up the stack, not caller. Symbols should be also // decorated in some fancy way :) It doesn't support any vector arguments. // For info on fast calling convention see Fast Calling Convention (tail call) // implementation LowerX86_32FastCCCallTo. /// CallIsStructReturn - Determines whether a call uses struct return /// semantics. enum StructReturnType { NotStructReturn, RegStructReturn, StackStructReturn }; static StructReturnType callIsStructReturn(ArrayRef Outs, bool IsMCU) { if (Outs.empty()) return NotStructReturn; const ISD::ArgFlagsTy &Flags = Outs[0].Flags; if (!Flags.isSRet()) return NotStructReturn; if (Flags.isInReg() || IsMCU) return RegStructReturn; return StackStructReturn; } /// Determines whether a function uses struct return semantics. static StructReturnType argsAreStructReturn(ArrayRef Ins, bool IsMCU) { if (Ins.empty()) return NotStructReturn; const ISD::ArgFlagsTy &Flags = Ins[0].Flags; if (!Flags.isSRet()) return NotStructReturn; if (Flags.isInReg() || IsMCU) return RegStructReturn; return StackStructReturn; } /// Make a copy of an aggregate at address specified by "Src" to address /// "Dst" with size and alignment information specified by the specific /// parameter attribute. The copy will be passed as a byval function parameter. static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl) { SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32); return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), /*isVolatile*/false, /*AlwaysInline=*/true, /*isTailCall*/false, MachinePointerInfo(), MachinePointerInfo()); } /// Return true if the calling convention is one that we can guarantee TCO for. static bool canGuaranteeTCO(CallingConv::ID CC) { return (CC == CallingConv::Fast || CC == CallingConv::GHC || CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE || CC == CallingConv::HHVM); } /// Return true if we might ever do TCO for calls with this calling convention. static bool mayTailCallThisCC(CallingConv::ID CC) { switch (CC) { // C calling conventions: case CallingConv::C: case CallingConv::Win64: case CallingConv::X86_64_SysV: // Callee pop conventions: case CallingConv::X86_ThisCall: case CallingConv::X86_StdCall: case CallingConv::X86_VectorCall: case CallingConv::X86_FastCall: return true; default: return canGuaranteeTCO(CC); } } /// Return true if the function is being made into a tailcall target by /// changing its ABI. static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) { return GuaranteedTailCallOpt && canGuaranteeTCO(CC); } bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { auto Attr = CI->getParent()->getParent()->getFnAttribute("disable-tail-calls"); if (!CI->isTailCall() || Attr.getValueAsString() == "true") return false; ImmutableCallSite CS(CI); CallingConv::ID CalleeCC = CS.getCallingConv(); if (!mayTailCallThisCC(CalleeCC)) return false; return true; } SDValue X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, MachineFrameInfo &MFI, unsigned i) const { // Create the nodes corresponding to a load from this parameter slot. ISD::ArgFlagsTy Flags = Ins[i].Flags; bool AlwaysUseMutable = shouldGuaranteeTCO( CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt); bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); EVT ValVT; MVT PtrVT = getPointerTy(DAG.getDataLayout()); // If value is passed by pointer we have address passed instead of the value // itself. No need to extend if the mask value and location share the same // absolute size. bool ExtendedInMem = VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 && VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits(); if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem) ValVT = VA.getLocVT(); else ValVT = VA.getValVT(); // Calculate SP offset of interrupt parameter, re-arrange the slot normally // taken by a return address. int Offset = 0; if (CallConv == CallingConv::X86_INTR) { // X86 interrupts may take one or two arguments. // On the stack there will be no return address as in regular call. // Offset of last argument need to be set to -4/-8 bytes. // Where offset of the first argument out of two, should be set to 0 bytes. Offset = (Subtarget.is64Bit() ? 8 : 4) * ((i + 1) % Ins.size() - 1); if (Subtarget.is64Bit() && Ins.size() == 2) { // The stack pointer needs to be realigned for 64 bit handlers with error // code, so the argument offset changes by 8 bytes. Offset += 8; } } // FIXME: For now, all byval parameter objects are marked mutable. This can be // changed with more analysis. // In case of tail call optimization mark all arguments mutable. Since they // could be overwritten by lowering of arguments in case of a tail call. if (Flags.isByVal()) { unsigned Bytes = Flags.getByValSize(); if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects. // FIXME: For now, all byval parameter objects are marked as aliasing. This // can be improved with deeper analysis. int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable, /*isAliased=*/true); // Adjust SP offset of interrupt parameter. if (CallConv == CallingConv::X86_INTR) { MFI.setObjectOffset(FI, Offset); } return DAG.getFrameIndex(FI, PtrVT); } // This is an argument in memory. We might be able to perform copy elision. if (Flags.isCopyElisionCandidate()) { EVT ArgVT = Ins[i].ArgVT; SDValue PartAddr; if (Ins[i].PartOffset == 0) { // If this is a one-part value or the first part of a multi-part value, // create a stack object for the entire argument value type and return a // load from our portion of it. This assumes that if the first part of an // argument is in memory, the rest will also be in memory. int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(), /*Immutable=*/false); PartAddr = DAG.getFrameIndex(FI, PtrVT); return DAG.getLoad( ValVT, dl, Chain, PartAddr, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); } else { // This is not the first piece of an argument in memory. See if there is // already a fixed stack object including this offset. If so, assume it // was created by the PartOffset == 0 branch above and create a load from // the appropriate offset into it. int64_t PartBegin = VA.getLocMemOffset(); int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8; int FI = MFI.getObjectIndexBegin(); for (; MFI.isFixedObjectIndex(FI); ++FI) { int64_t ObjBegin = MFI.getObjectOffset(FI); int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI); if (ObjBegin <= PartBegin && PartEnd <= ObjEnd) break; } if (MFI.isFixedObjectIndex(FI)) { SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT), DAG.getIntPtrConstant(Ins[i].PartOffset, dl)); return DAG.getLoad( ValVT, dl, Chain, Addr, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI, Ins[i].PartOffset)); } } } int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, VA.getLocMemOffset(), isImmutable); // Set SExt or ZExt flag. if (VA.getLocInfo() == CCValAssign::ZExt) { MFI.setObjectZExt(FI, true); } else if (VA.getLocInfo() == CCValAssign::SExt) { MFI.setObjectSExt(FI, true); } // Adjust SP offset of interrupt parameter. if (CallConv == CallingConv::X86_INTR) { MFI.setObjectOffset(FI, Offset); } SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getLoad( ValVT, dl, Chain, FIN, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); return ExtendedInMem ? (VA.getValVT().isVector() ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val) : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val)) : Val; } // FIXME: Get this from tablegen. static ArrayRef get64BitArgumentGPRs(CallingConv::ID CallConv, const X86Subtarget &Subtarget) { assert(Subtarget.is64Bit()); if (Subtarget.isCallingConvWin64(CallConv)) { static const MCPhysReg GPR64ArgRegsWin64[] = { X86::RCX, X86::RDX, X86::R8, X86::R9 }; return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64)); } static const MCPhysReg GPR64ArgRegs64Bit[] = { X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 }; return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit)); } // FIXME: Get this from tablegen. static ArrayRef get64BitArgumentXMMs(MachineFunction &MF, CallingConv::ID CallConv, const X86Subtarget &Subtarget) { assert(Subtarget.is64Bit()); if (Subtarget.isCallingConvWin64(CallConv)) { // The XMM registers which might contain var arg parameters are shadowed // in their paired GPR. So we only need to save the GPR to their home // slots. // TODO: __vectorcall will change this. return None; } const Function &F = MF.getFunction(); bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat); bool isSoftFloat = Subtarget.useSoftFloat(); assert(!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"); if (isSoftFloat || NoImplicitFloatOps || !Subtarget.hasSSE1()) // Kernel mode asks for SSE to be disabled, so there are no XMM argument // registers. return None; static const MCPhysReg XMMArgRegs64Bit[] = { X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 }; return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit)); } #ifndef NDEBUG static bool isSortedByValueNo(ArrayRef ArgLocs) { return std::is_sorted(ArgLocs.begin(), ArgLocs.end(), [](const CCValAssign &A, const CCValAssign &B) -> bool { return A.getValNo() < B.getValNo(); }); } #endif SDValue X86TargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); X86MachineFunctionInfo *FuncInfo = MF.getInfo(); const TargetFrameLowering &TFI = *Subtarget.getFrameLowering(); const Function &F = MF.getFunction(); if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() && F.getName() == "main") FuncInfo->setForceFramePointer(true); MachineFrameInfo &MFI = MF.getFrameInfo(); bool Is64Bit = Subtarget.is64Bit(); bool IsWin64 = Subtarget.isCallingConvWin64(CallConv); assert( !(isVarArg && canGuaranteeTCO(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"); if (CallConv == CallingConv::X86_INTR) { bool isLegal = Ins.size() == 1 || (Ins.size() == 2 && ((Is64Bit && Ins[1].VT == MVT::i64) || (!Is64Bit && Ins[1].VT == MVT::i32))); if (!isLegal) report_fatal_error("X86 interrupts may take one or two arguments"); } // Assign locations to all of the incoming arguments. SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); // Allocate shadow area for Win64. if (IsWin64) CCInfo.AllocateStack(32, 8); CCInfo.AnalyzeArguments(Ins, CC_X86); // In vectorcall calling convention a second pass is required for the HVA // types. if (CallingConv::X86_VectorCall == CallConv) { CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86); } // The next loop assumes that the locations are in the same order of the // input arguments. assert(isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"); SDValue ArgValue; for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E; ++I, ++InsIndex) { assert(InsIndex < Ins.size() && "Invalid Ins index"); CCValAssign &VA = ArgLocs[I]; if (VA.isRegLoc()) { EVT RegVT = VA.getLocVT(); if (VA.needsCustom()) { assert( VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"); // v64i1 values, in regcall calling convention, that are // compiled to 32 bit arch, are split up into two registers. ArgValue = getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget); } else { const TargetRegisterClass *RC; if (RegVT == MVT::i8) RC = &X86::GR8RegClass; else if (RegVT == MVT::i16) RC = &X86::GR16RegClass; else if (RegVT == MVT::i32) RC = &X86::GR32RegClass; else if (Is64Bit && RegVT == MVT::i64) RC = &X86::GR64RegClass; else if (RegVT == MVT::f32) RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass; else if (RegVT == MVT::f64) RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass; else if (RegVT == MVT::f80) RC = &X86::RFP80RegClass; else if (RegVT == MVT::f128) RC = &X86::VR128RegClass; else if (RegVT.is512BitVector()) RC = &X86::VR512RegClass; else if (RegVT.is256BitVector()) RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass; else if (RegVT.is128BitVector()) RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass; else if (RegVT == MVT::x86mmx) RC = &X86::VR64RegClass; else if (RegVT == MVT::v1i1) RC = &X86::VK1RegClass; else if (RegVT == MVT::v8i1) RC = &X86::VK8RegClass; else if (RegVT == MVT::v16i1) RC = &X86::VK16RegClass; else if (RegVT == MVT::v32i1) RC = &X86::VK32RegClass; else if (RegVT == MVT::v64i1) RC = &X86::VK64RegClass; else llvm_unreachable("Unknown argument type!"); unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); } // If this is an 8 or 16-bit value, it is really passed promoted to 32 // bits. Insert an assert[sz]ext to capture this, then truncate to the // right size. if (VA.getLocInfo() == CCValAssign::SExt) ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, DAG.getValueType(VA.getValVT())); else if (VA.getLocInfo() == CCValAssign::ZExt) ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, DAG.getValueType(VA.getValVT())); else if (VA.getLocInfo() == CCValAssign::BCvt) ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue); if (VA.isExtInLoc()) { // Handle MMX values passed in XMM regs. if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1) ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue); else if (VA.getValVT().isVector() && VA.getValVT().getScalarType() == MVT::i1 && ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) || (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) { // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG); } else ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); } } else { assert(VA.isMemLoc()); ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex); } // If value is passed via pointer - do a load. if (VA.getLocInfo() == CCValAssign::Indirect && !Ins[I].Flags.isByVal()) ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo()); InVals.push_back(ArgValue); } for (unsigned I = 0, E = Ins.size(); I != E; ++I) { // Swift calling convention does not require we copy the sret argument // into %rax/%eax for the return. We don't set SRetReturnReg for Swift. if (CallConv == CallingConv::Swift) continue; // All x86 ABIs require that for returning structs by value we copy the // sret argument into %rax/%eax (depending on ABI) for the return. Save // the argument into a virtual register so that we can access it from the // return points. if (Ins[I].Flags.isSRet()) { unsigned Reg = FuncInfo->getSRetReturnReg(); if (!Reg) { MVT PtrTy = getPointerTy(DAG.getDataLayout()); Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy)); FuncInfo->setSRetReturnReg(Reg); } SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]); Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain); break; } } unsigned StackSize = CCInfo.getNextStackOffset(); // Align stack specially for tail calls. if (shouldGuaranteeTCO(CallConv, MF.getTarget().Options.GuaranteedTailCallOpt)) StackSize = GetAlignedArgumentStackSize(StackSize, DAG); // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. We // can skip this if there are no va_start calls. if (MFI.hasVAStart() && (Is64Bit || (CallConv != CallingConv::X86_FastCall && CallConv != CallingConv::X86_ThisCall))) { FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true)); } // Figure out if XMM registers are in use. assert(!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"); // 64-bit calling conventions support varargs and register parameters, so we // have to do extra work to spill them in the prologue. if (Is64Bit && isVarArg && MFI.hasVAStart()) { // Find the first unallocated argument registers. ArrayRef ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget); ArrayRef ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget); unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs); unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs); assert(!(NumXMMRegs && !Subtarget.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"); // Gather all the live in physical registers. SmallVector LiveGPRs; SmallVector LiveXMMRegs; SDValue ALVal; for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) { unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass); LiveGPRs.push_back( DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64)); } if (!ArgXMMs.empty()) { unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass); ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8); for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) { unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass); LiveXMMRegs.push_back( DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32)); } } if (IsWin64) { // Get to the caller-allocated home save location. Add 8 to account // for the return address. int HomeOffset = TFI.getOffsetOfLocalArea() + 8; FuncInfo->setRegSaveFrameIndex( MFI.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false)); // Fixup to set vararg frame on shadow area (4 x i64). if (NumIntRegs < 4) FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex()); } else { // For X86-64, if there are vararg parameters that are passed via // registers, then we must store them to their spots on the stack so // they may be loaded by dereferencing the result of va_next. FuncInfo->setVarArgsGPOffset(NumIntRegs * 8); FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16); FuncInfo->setRegSaveFrameIndex(MFI.CreateStackObject( ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false)); } // Store the integer parameter registers. SmallVector MemOps; SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), getPointerTy(DAG.getDataLayout())); unsigned Offset = FuncInfo->getVarArgsGPOffset(); for (SDValue Val : LiveGPRs) { SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), RSFIN, DAG.getIntPtrConstant(Offset, dl)); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo::getFixedStack( DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(), Offset)); MemOps.push_back(Store); Offset += 8; } if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) { // Now store the XMM (fp + vector) parameter registers. SmallVector SaveXMMOps; SaveXMMOps.push_back(Chain); SaveXMMOps.push_back(ALVal); SaveXMMOps.push_back(DAG.getIntPtrConstant( FuncInfo->getRegSaveFrameIndex(), dl)); SaveXMMOps.push_back(DAG.getIntPtrConstant( FuncInfo->getVarArgsFPOffset(), dl)); SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(), LiveXMMRegs.end()); MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl, MVT::Other, SaveXMMOps)); } if (!MemOps.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); } if (isVarArg && MFI.hasMustTailInVarArgFunc()) { // Find the largest legal vector type. MVT VecVT = MVT::Other; // FIXME: Only some x86_32 calling conventions support AVX512. if (Subtarget.hasAVX512() && (Is64Bit || (CallConv == CallingConv::X86_VectorCall || CallConv == CallingConv::Intel_OCL_BI))) VecVT = MVT::v16f32; else if (Subtarget.hasAVX()) VecVT = MVT::v8f32; else if (Subtarget.hasSSE2()) VecVT = MVT::v4f32; // We forward some GPRs and some vector types. SmallVector RegParmTypes; MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32; RegParmTypes.push_back(IntVT); if (VecVT != MVT::Other) RegParmTypes.push_back(VecVT); // Compute the set of forwarded registers. The rest are scratch. SmallVectorImpl &Forwards = FuncInfo->getForwardedMustTailRegParms(); CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86); // Conservatively forward AL on x86_64, since it might be used for varargs. if (Is64Bit && !CCInfo.isAllocated(X86::AL)) { unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass); Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8)); } // Copy all forwards from physical to virtual registers. for (ForwardedRegister &F : Forwards) { // FIXME: Can we use a less constrained schedule? SDValue RegVal = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT); F.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(F.VT)); Chain = DAG.getCopyToReg(Chain, dl, F.VReg, RegVal); } } // Some CCs need callee pop. if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, MF.getTarget().Options.GuaranteedTailCallOpt)) { FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything. } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) { // X86 interrupts must pop the error code (and the alignment padding) if // present. FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4); } else { FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing. // If this is an sret function, the return should pop the hidden pointer. if (!Is64Bit && !canGuaranteeTCO(CallConv) && !Subtarget.getTargetTriple().isOSMSVCRT() && argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn) FuncInfo->setBytesToPopOnReturn(4); } if (!Is64Bit) { // RegSaveFrameIndex is X86-64 only. FuncInfo->setRegSaveFrameIndex(0xAAAAAAA); if (CallConv == CallingConv::X86_FastCall || CallConv == CallingConv::X86_ThisCall) // fastcc functions can't have varargs. FuncInfo->setVarArgsFrameIndex(0xAAAAAAA); } FuncInfo->setArgumentStackSize(StackSize); if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) { EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn()); if (Personality == EHPersonality::CoreCLR) { assert(Is64Bit); // TODO: Add a mechanism to frame lowering that will allow us to indicate // that we'd prefer this slot be allocated towards the bottom of the frame // (i.e. near the stack pointer after allocating the frame). Every // funclet needs a copy of this slot in its (mostly empty) frame, and the // offset from the bottom of this and each funclet's frame must be the // same, so the size of funclets' (mostly empty) frames is dictated by // how far this slot is from the bottom (since they allocate just enough // space to accommodate holding this slot at the correct offset). int PSPSymFI = MFI.CreateStackObject(8, 8, /*isSS=*/false); EHInfo->PSPSymFrameIdx = PSPSymFI; } } if (CallConv == CallingConv::X86_RegCall || F.hasFnAttribute("no_caller_saved_registers")) { MachineRegisterInfo &MRI = MF.getRegInfo(); for (std::pair Pair : MRI.liveins()) MRI.disableCalleeSavedRegister(Pair.first); } return Chain; } SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, ISD::ArgFlagsTy Flags) const { unsigned LocMemOffset = VA.getLocMemOffset(); SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), StackPtr, PtrOff); if (Flags.isByVal()) return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); return DAG.getStore( Chain, dl, Arg, PtrOff, MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset)); } /// Emit a load of return address if tail call /// optimization is performed and it is required. SDValue X86TargetLowering::EmitTailCallLoadRetAddr( SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall, bool Is64Bit, int FPDiff, const SDLoc &dl) const { // Adjust the Return address stack slot. EVT VT = getPointerTy(DAG.getDataLayout()); OutRetAddr = getReturnAddressFrameIndex(DAG); // Load the "old" Return address. OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo()); return SDValue(OutRetAddr.getNode(), 1); } /// Emit a store of the return address if tail call /// optimization is performed and it is required (FPDiff!=0). static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue RetAddrFrIdx, EVT PtrVT, unsigned SlotSize, int FPDiff, const SDLoc &dl) { // Store the return address to the appropriate stack slot. if (!FPDiff) return Chain; // Calculate the new stack slot for the return address. int NewReturnAddrFI = MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize, false); SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT); Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx, MachinePointerInfo::getFixedStack( DAG.getMachineFunction(), NewReturnAddrFI)); return Chain; } /// Returns a vector_shuffle mask for an movs{s|d}, movd /// operation of specified width. static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1, SDValue V2) { unsigned NumElems = VT.getVectorNumElements(); SmallVector Mask; Mask.push_back(NumElems); for (unsigned i = 1; i != NumElems; ++i) Mask.push_back(i); return DAG.getVectorShuffle(VT, dl, V1, V2, Mask); } SDValue X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; SDLoc &dl = CLI.DL; SmallVectorImpl &Outs = CLI.Outs; SmallVectorImpl &OutVals = CLI.OutVals; SmallVectorImpl &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; CallingConv::ID CallConv = CLI.CallConv; bool &isTailCall = CLI.IsTailCall; bool isVarArg = CLI.IsVarArg; MachineFunction &MF = DAG.getMachineFunction(); bool Is64Bit = Subtarget.is64Bit(); bool IsWin64 = Subtarget.isCallingConvWin64(CallConv); StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU()); bool IsSibcall = false; X86MachineFunctionInfo *X86Info = MF.getInfo(); auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls"); const auto *CI = dyn_cast_or_null(CLI.CS.getInstruction()); const Function *Fn = CI ? CI->getCalledFunction() : nullptr; bool HasNCSR = (CI && CI->hasFnAttr("no_caller_saved_registers")) || (Fn && Fn->hasFnAttribute("no_caller_saved_registers")); const auto *II = dyn_cast_or_null(CLI.CS.getInstruction()); bool HasNoCfCheck = (CI && CI->doesNoCfCheck()) || (II && II->doesNoCfCheck()); const Module *M = MF.getMMI().getModule(); Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch"); if (CallConv == CallingConv::X86_INTR) report_fatal_error("X86 interrupts may not be called directly"); if (Attr.getValueAsString() == "true") isTailCall = false; if (Subtarget.isPICStyleGOT() && !MF.getTarget().Options.GuaranteedTailCallOpt) { // If we are using a GOT, disable tail calls to external symbols with // default visibility. Tail calling such a symbol requires using a GOT // relocation, which forces early binding of the symbol. This breaks code // that require lazy function symbol resolution. Using musttail or // GuaranteedTailCallOpt will override this. GlobalAddressSDNode *G = dyn_cast(Callee); if (!G || (!G->getGlobal()->hasLocalLinkage() && G->getGlobal()->hasDefaultVisibility())) isTailCall = false; } bool IsMustTail = CLI.CS && CLI.CS.isMustTailCall(); if (IsMustTail) { // Force this to be a tail call. The verifier rules are enough to ensure // that we can lower this successfully without moving the return address // around. isTailCall = true; } else if (isTailCall) { // Check if it's really possible to do a tail call. isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, SR != NotStructReturn, MF.getFunction().hasStructRetAttr(), CLI.RetTy, Outs, OutVals, Ins, DAG); // Sibcalls are automatically detected tailcalls which do not require // ABI changes. if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall) IsSibcall = true; if (isTailCall) ++NumTailCalls; } assert(!(isVarArg && canGuaranteeTCO(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe"); // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); // Allocate shadow area for Win64. if (IsWin64) CCInfo.AllocateStack(32, 8); CCInfo.AnalyzeArguments(Outs, CC_X86); // In vectorcall calling convention a second pass is required for the HVA // types. if (CallingConv::X86_VectorCall == CallConv) { CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86); } // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); if (IsSibcall) // This is a sibcall. The memory operands are available in caller's // own caller's stack. NumBytes = 0; else if (MF.getTarget().Options.GuaranteedTailCallOpt && canGuaranteeTCO(CallConv)) NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); int FPDiff = 0; if (isTailCall && !IsSibcall && !IsMustTail) { // Lower arguments at fp - stackoffset + fpdiff. unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn(); FPDiff = NumBytesCallerPushed - NumBytes; // Set the delta of movement of the returnaddr stackslot. // But only set if delta is greater than previous delta. if (FPDiff < X86Info->getTCReturnAddrDelta()) X86Info->setTCReturnAddrDelta(FPDiff); } unsigned NumBytesToPush = NumBytes; unsigned NumBytesToPop = NumBytes; // If we have an inalloca argument, all stack space has already been allocated // for us and be right at the top of the stack. We don't support multiple // arguments passed in memory when using inalloca. if (!Outs.empty() && Outs.back().Flags.isInAlloca()) { NumBytesToPush = 0; if (!ArgLocs.back().isMemLoc()) report_fatal_error("cannot use inalloca attribute on a register " "parameter"); if (ArgLocs.back().getLocMemOffset() != 0) report_fatal_error("any parameter with the inalloca attribute must be " "the only memory argument"); } if (!IsSibcall) Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush, NumBytes - NumBytesToPush, dl); SDValue RetAddrFrIdx; // Load return address for tail calls. if (isTailCall && FPDiff) Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall, Is64Bit, FPDiff, dl); SmallVector, 8> RegsToPass; SmallVector MemOpChains; SDValue StackPtr; // The next loop assumes that the locations are in the same order of the // input arguments. assert(isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"); // Walk the register/memloc assignments, inserting copies/loads. In the case // of tail call optimization arguments are handle later. const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E; ++I, ++OutIndex) { assert(OutIndex < Outs.size() && "Invalid Out index"); // Skip inalloca arguments, they have already been written. ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags; if (Flags.isInAlloca()) continue; CCValAssign &VA = ArgLocs[I]; EVT RegVT = VA.getLocVT(); SDValue Arg = OutVals[OutIndex]; bool isByVal = Flags.isByVal(); // Promote the value if needed. switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::SExt: Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg); break; case CCValAssign::ZExt: Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg); break; case CCValAssign::AExt: if (Arg.getValueType().isVector() && Arg.getValueType().getVectorElementType() == MVT::i1) Arg = lowerMasksToReg(Arg, RegVT, dl, DAG); else if (RegVT.is128BitVector()) { // Special case: passing MMX values in XMM registers. Arg = DAG.getBitcast(MVT::i64, Arg); Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg); Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg); } else Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg); break; case CCValAssign::BCvt: Arg = DAG.getBitcast(RegVT, Arg); break; case CCValAssign::Indirect: { if (isByVal) { // Memcpy the argument to a temporary stack slot to prevent // the caller from seeing any modifications the callee may make // as guaranteed by the `byval` attribute. int FrameIdx = MF.getFrameInfo().CreateStackObject( Flags.getByValSize(), std::max(16, (int)Flags.getByValAlign()), false); SDValue StackSlot = DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout())); Chain = CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl); // From now on treat this as a regular pointer Arg = StackSlot; isByVal = false; } else { // Store the argument. SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT()); int FI = cast(SpillSlot)->getIndex(); Chain = DAG.getStore( Chain, dl, Arg, SpillSlot, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); Arg = SpillSlot; } break; } } if (VA.needsCustom()) { assert(VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"); // Split v64i1 value into two registers Passv64i1ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget); } else if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); if (isVarArg && IsWin64) { // Win64 ABI requires argument XMM reg to be copied to the corresponding // shadow reg if callee is a varargs function. unsigned ShadowReg = 0; switch (VA.getLocReg()) { case X86::XMM0: ShadowReg = X86::RCX; break; case X86::XMM1: ShadowReg = X86::RDX; break; case X86::XMM2: ShadowReg = X86::R8; break; case X86::XMM3: ShadowReg = X86::R9; break; } if (ShadowReg) RegsToPass.push_back(std::make_pair(ShadowReg, Arg)); } } else if (!IsSibcall && (!isTailCall || isByVal)) { assert(VA.isMemLoc()); if (!StackPtr.getNode()) StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), getPointerTy(DAG.getDataLayout())); MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, dl, DAG, VA, Flags)); } } if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); if (Subtarget.isPICStyleGOT()) { // ELF / PIC requires GOT in the EBX register before function calls via PLT // GOT pointer. if (!isTailCall) { RegsToPass.push_back(std::make_pair( unsigned(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy(DAG.getDataLayout())))); } else { // If we are tail calling and generating PIC/GOT style code load the // address of the callee into ECX. The value in ecx is used as target of // the tail jump. This is done to circumvent the ebx/callee-saved problem // for tail calls on PIC/GOT architectures. Normally we would just put the // address of GOT into ebx and then call target@PLT. But for tail calls // ebx would be restored (since ebx is callee saved) before jumping to the // target@PLT. // Note: The actual moving to ECX is done further down. GlobalAddressSDNode *G = dyn_cast(Callee); if (G && !G->getGlobal()->hasLocalLinkage() && G->getGlobal()->hasDefaultVisibility()) Callee = LowerGlobalAddress(Callee, DAG); else if (isa(Callee)) Callee = LowerExternalSymbol(Callee, DAG); } } if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) { // From AMD64 ABI document: // For calls that may call functions that use varargs or stdargs // (prototype-less calls or calls to functions containing ellipsis (...) in // the declaration) %al is used as hidden argument to specify the number // of SSE registers used. The contents of %al do not need to match exactly // the number of registers, but must be an ubound on the number of SSE // registers used and is in the range 0 - 8 inclusive. // Count the number of XMM registers allocated. static const MCPhysReg XMMArgRegs[] = { X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 }; unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs); assert((Subtarget.hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"); RegsToPass.push_back(std::make_pair(unsigned(X86::AL), DAG.getConstant(NumXMMRegs, dl, MVT::i8))); } if (isVarArg && IsMustTail) { const auto &Forwards = X86Info->getForwardedMustTailRegParms(); for (const auto &F : Forwards) { SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT); RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val)); } } // For tail calls lower the arguments to the 'real' stack slots. Sibcalls // don't need this because the eligibility check rejects calls that require // shuffling arguments passed in memory. if (!IsSibcall && isTailCall) { // Force all the incoming stack arguments to be loaded from the stack // before any new outgoing arguments are stored to the stack, because the // outgoing stack slots may alias the incoming argument stack slots, and // the alias isn't otherwise explicit. This is slightly more conservative // than necessary, because it means that each store effectively depends // on every argument instead of just those arguments it would clobber. SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain); SmallVector MemOpChains2; SDValue FIN; int FI = 0; for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E; ++I, ++OutsIndex) { CCValAssign &VA = ArgLocs[I]; if (VA.isRegLoc()) { if (VA.needsCustom()) { assert((CallConv == CallingConv::X86_RegCall) && "Expecting custom case only in regcall calling convention"); // This means that we are in special case where one argument was // passed through two register locations - Skip the next location ++I; } continue; } assert(VA.isMemLoc()); SDValue Arg = OutVals[OutsIndex]; ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags; // Skip inalloca arguments. They don't require any work. if (Flags.isInAlloca()) continue; // Create frame index. int32_t Offset = VA.getLocMemOffset()+FPDiff; uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8; FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true); FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); if (Flags.isByVal()) { // Copy relative to framepointer. SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl); if (!StackPtr.getNode()) StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), getPointerTy(DAG.getDataLayout())); Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), StackPtr, Source); MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, ArgChain, Flags, DAG, dl)); } else { // Store relative to framepointer. MemOpChains2.push_back(DAG.getStore( ArgChain, dl, Arg, FIN, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI))); } } if (!MemOpChains2.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2); // Store the return address to the appropriate stack slot. Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, getPointerTy(DAG.getDataLayout()), RegInfo->getSlotSize(), FPDiff, dl); } // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into registers. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } if (DAG.getTarget().getCodeModel() == CodeModel::Large) { assert(Is64Bit && "Large code model is only legal in 64-bit mode."); // In the 64-bit large code model, we have to make all calls // through a register, since the call instruction's 32-bit // pc-relative offset may not be large enough to hold the whole // address. } else if (Callee->getOpcode() == ISD::GlobalAddress) { // If the callee is a GlobalAddress node (quite common, every direct call // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack // it. GlobalAddressSDNode* G = cast(Callee); // We should use extra load for direct calls to dllimported functions in // non-JIT mode. const GlobalValue *GV = G->getGlobal(); if (!GV->hasDLLImportStorageClass()) { unsigned char OpFlags = Subtarget.classifyGlobalFunctionReference(GV); Callee = DAG.getTargetGlobalAddress( GV, dl, getPointerTy(DAG.getDataLayout()), G->getOffset(), OpFlags); if (OpFlags == X86II::MO_GOTPCREL) { // Add a wrapper. Callee = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(DAG.getDataLayout()), Callee); // Add extra indirection Callee = DAG.getLoad( getPointerTy(DAG.getDataLayout()), dl, DAG.getEntryNode(), Callee, MachinePointerInfo::getGOT(DAG.getMachineFunction())); } } } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { const Module *Mod = DAG.getMachineFunction().getFunction().getParent(); unsigned char OpFlags = Subtarget.classifyGlobalFunctionReference(nullptr, *Mod); Callee = DAG.getTargetExternalSymbol( S->getSymbol(), getPointerTy(DAG.getDataLayout()), OpFlags); if (OpFlags == X86II::MO_GOTPCREL) { Callee = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(DAG.getDataLayout()), Callee); Callee = DAG.getLoad( getPointerTy(DAG.getDataLayout()), dl, DAG.getEntryNode(), Callee, MachinePointerInfo::getGOT(DAG.getMachineFunction())); } } else if (Subtarget.isTarget64BitILP32() && Callee->getValueType(0) == MVT::i32) { // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee); } // Returns a chain & a flag for retval copy to use. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SmallVector Ops; if (!IsSibcall && isTailCall) { Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytesToPop, dl, true), DAG.getIntPtrConstant(0, dl, true), InFlag, dl); InFlag = Chain.getValue(1); } Ops.push_back(Chain); Ops.push_back(Callee); if (isTailCall) Ops.push_back(DAG.getConstant(FPDiff, dl, MVT::i32)); // Add argument registers to the end of the list so that they are known live // into the call. for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); // Add a register mask operand representing the call-preserved registers. // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists) then we // set X86_INTR calling convention because it has the same CSR mask // (same preserved registers). const uint32_t *Mask = RegInfo->getCallPreservedMask( MF, HasNCSR ? (CallingConv::ID)CallingConv::X86_INTR : CallConv); assert(Mask && "Missing call preserved mask for calling convention"); // If this is an invoke in a 32-bit function using a funclet-based // personality, assume the function clobbers all registers. If an exception // is thrown, the runtime will not restore CSRs. // FIXME: Model this more precisely so that we can register allocate across // the normal edge and spill and fill across the exceptional edge. if (!Is64Bit && CLI.CS && CLI.CS.isInvoke()) { const Function &CallerFn = MF.getFunction(); EHPersonality Pers = CallerFn.hasPersonalityFn() ? classifyEHPersonality(CallerFn.getPersonalityFn()) : EHPersonality::Unknown; if (isFuncletEHPersonality(Pers)) Mask = RegInfo->getNoPreservedMask(); } // Define a new register mask from the existing mask. uint32_t *RegMask = nullptr; // In some calling conventions we need to remove the used physical registers // from the reg mask. if (CallConv == CallingConv::X86_RegCall || HasNCSR) { const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); // Allocate a new Reg Mask and copy Mask. RegMask = MF.allocateRegMask(); unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs()); memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize); // Make sure all sub registers of the argument registers are reset // in the RegMask. for (auto const &RegPair : RegsToPass) for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true); SubRegs.isValid(); ++SubRegs) RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32)); // Create the RegMask Operand according to our updated mask. Ops.push_back(DAG.getRegisterMask(RegMask)); } else { // Create the RegMask Operand according to the static mask. Ops.push_back(DAG.getRegisterMask(Mask)); } if (InFlag.getNode()) Ops.push_back(InFlag); if (isTailCall) { // We used to do: //// If this is the first return lowered for this function, add the regs //// to the liveout set for the function. // This isn't right, although it's probably harmless on x86; liveouts // should be computed from returns not tail calls. Consider a void // function making a tail call to a function returning int. MF.getFrameInfo().setHasTailCall(); return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops); } if (HasNoCfCheck && IsCFProtectionSupported) { Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops); } else { Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops); } InFlag = Chain.getValue(1); // Create the CALLSEQ_END node. unsigned NumBytesForCalleeToPop; if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, DAG.getTarget().Options.GuaranteedTailCallOpt)) NumBytesForCalleeToPop = NumBytes; // Callee pops everything else if (!Is64Bit && !canGuaranteeTCO(CallConv) && !Subtarget.getTargetTriple().isOSMSVCRT() && SR == StackStructReturn) // If this is a call to a struct-return function, the callee // pops the hidden struct pointer, so we have to push it back. // This is common for Darwin/X86, Linux & Mingw32 targets. // For MSVC Win32 targets, the caller pops the hidden struct pointer. NumBytesForCalleeToPop = 4; else NumBytesForCalleeToPop = 0; // Callee pops nothing. if (CLI.DoesNotReturn && !getTargetMachine().Options.TrapUnreachable) { // No need to reset the stack after the call if the call doesn't return. To // make the MI verify, we'll pretend the callee does it for us. NumBytesForCalleeToPop = NumBytes; } // Returns a flag for retval copy to use. if (!IsSibcall) { Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytesToPop, dl, true), DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl, true), InFlag, dl); InFlag = Chain.getValue(1); } // Handle result values, copying them out of physregs into vregs that we // return. return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, InVals, RegMask); } //===----------------------------------------------------------------------===// // Fast Calling Convention (tail call) implementation //===----------------------------------------------------------------------===// // Like std call, callee cleans arguments, convention except that ECX is // reserved for storing the tail called function address. Only 2 registers are // free for argument passing (inreg). Tail call optimization is performed // provided: // * tailcallopt is enabled // * caller/callee are fastcc // On X86_64 architecture with GOT-style position independent code only local // (within module) calls are supported at the moment. // To keep the stack aligned according to platform abi the function // GetAlignedArgumentStackSize ensures that argument delta is always multiples // of stack alignment. (Dynamic linkers need this - darwin's dyld for example) // If a tail called function callee has more arguments than the caller the // caller needs to make sure that there is room to move the RETADDR to. This is // achieved by reserving an area the size of the argument delta right after the // original RETADDR, but before the saved framepointer or the spilled registers // e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4) // stack layout: // arg1 // arg2 // RETADDR // [ new RETADDR // move area ] // (possible EBP) // ESI // EDI // local1 .. /// Make the stack size align e.g 16n + 12 aligned for a 16-byte align /// requirement. unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG& DAG) const { const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); const TargetFrameLowering &TFI = *Subtarget.getFrameLowering(); unsigned StackAlignment = TFI.getStackAlignment(); uint64_t AlignMask = StackAlignment - 1; int64_t Offset = StackSize; unsigned SlotSize = RegInfo->getSlotSize(); if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) { // Number smaller than 12 so just add the difference. Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask)); } else { // Mask out lower bits, add stackalignment once plus the 12 bytes. Offset = ((~AlignMask) & Offset) + StackAlignment + (StackAlignment-SlotSize); } return Offset; } /// Return true if the given stack call argument is already available in the /// same position (relatively) of the caller's incoming argument stack. static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, const X86InstrInfo *TII, const CCValAssign &VA) { unsigned Bytes = Arg.getValueSizeInBits() / 8; for (;;) { // Look through nodes that don't alter the bits of the incoming value. unsigned Op = Arg.getOpcode(); if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) { Arg = Arg.getOperand(0); continue; } if (Op == ISD::TRUNCATE) { const SDValue &TruncInput = Arg.getOperand(0); if (TruncInput.getOpcode() == ISD::AssertZext && cast(TruncInput.getOperand(1))->getVT() == Arg.getValueType()) { Arg = TruncInput.getOperand(0); continue; } } break; } int FI = INT_MAX; if (Arg.getOpcode() == ISD::CopyFromReg) { unsigned VR = cast(Arg.getOperand(1))->getReg(); if (!TargetRegisterInfo::isVirtualRegister(VR)) return false; MachineInstr *Def = MRI->getVRegDef(VR); if (!Def) return false; if (!Flags.isByVal()) { if (!TII->isLoadFromStackSlot(*Def, FI)) return false; } else { unsigned Opcode = Def->getOpcode(); if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r || Opcode == X86::LEA64_32r) && Def->getOperand(1).isFI()) { FI = Def->getOperand(1).getIndex(); Bytes = Flags.getByValSize(); } else return false; } } else if (LoadSDNode *Ld = dyn_cast(Arg)) { if (Flags.isByVal()) // ByVal argument is passed in as a pointer but it's now being // dereferenced. e.g. // define @foo(%struct.X* %A) { // tail call @bar(%struct.X* byval %A) // } return false; SDValue Ptr = Ld->getBasePtr(); FrameIndexSDNode *FINode = dyn_cast(Ptr); if (!FINode) return false; FI = FINode->getIndex(); } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) { FrameIndexSDNode *FINode = cast(Arg); FI = FINode->getIndex(); Bytes = Flags.getByValSize(); } else return false; assert(FI != INT_MAX); if (!MFI.isFixedObjectIndex(FI)) return false; if (Offset != MFI.getObjectOffset(FI)) return false; // If this is not byval, check that the argument stack object is immutable. // inalloca and argument copy elision can create mutable argument stack // objects. Byval objects can be mutated, but a byval call intends to pass the // mutated memory. if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI)) return false; if (VA.getLocVT().getSizeInBits() > Arg.getValueSizeInBits()) { // If the argument location is wider than the argument type, check that any // extension flags match. if (Flags.isZExt() != MFI.isObjectZExt(FI) || Flags.isSExt() != MFI.isObjectSExt(FI)) { return false; } } return Bytes == MFI.getObjectSize(FI); } /// Check whether the call is eligible for tail call optimization. Targets /// that want to do tail call optimization should implement this function. bool X86TargetLowering::IsEligibleForTailCallOptimization( SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, SelectionDAG &DAG) const { if (!mayTailCallThisCC(CalleeCC)) return false; // If -tailcallopt is specified, make fastcc functions tail-callable. MachineFunction &MF = DAG.getMachineFunction(); const Function &CallerF = MF.getFunction(); // If the function return type is x86_fp80 and the callee return type is not, // then the FP_EXTEND of the call result is not a nop. It's not safe to // perform a tailcall optimization here. if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty()) return false; CallingConv::ID CallerCC = CallerF.getCallingConv(); bool CCMatch = CallerCC == CalleeCC; bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC); bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC); // Win64 functions have extra shadow space for argument homing. Don't do the // sibcall if the caller and callee have mismatched expectations for this // space. if (IsCalleeWin64 != IsCallerWin64) return false; if (DAG.getTarget().Options.GuaranteedTailCallOpt) { if (canGuaranteeTCO(CalleeCC) && CCMatch) return true; return false; } // Look for obvious safe cases to perform tail call optimization that do not // require ABI changes. This is what gcc calls sibcall. // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to // emit a special epilogue. const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); if (RegInfo->needsStackRealignment(MF)) return false; // Also avoid sibcall optimization if either caller or callee uses struct // return semantics. if (isCalleeStructRet || isCallerStructRet) return false; // Do not sibcall optimize vararg calls unless all arguments are passed via // registers. LLVMContext &C = *DAG.getContext(); if (isVarArg && !Outs.empty()) { // Optimizing for varargs on Win64 is unlikely to be safe without // additional testing. if (IsCalleeWin64 || IsCallerWin64) return false; SmallVector ArgLocs; CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C); CCInfo.AnalyzeCallOperands(Outs, CC_X86); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) if (!ArgLocs[i].isRegLoc()) return false; } // If the call result is in ST0 / ST1, it needs to be popped off the x87 // stack. Therefore, if it's not used by the call it is not safe to optimize // this into a sibcall. bool Unused = false; for (unsigned i = 0, e = Ins.size(); i != e; ++i) { if (!Ins[i].Used) { Unused = true; break; } } if (Unused) { SmallVector RVLocs; CCState CCInfo(CalleeCC, false, MF, RVLocs, C); CCInfo.AnalyzeCallResult(Ins, RetCC_X86); for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { CCValAssign &VA = RVLocs[i]; if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) return false; } } // Check that the call results are passed in the same way. if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins, RetCC_X86, RetCC_X86)) return false; // The callee has to preserve all registers the caller needs to preserve. const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); if (!CCMatch) { const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) return false; } unsigned StackArgsSize = 0; // If the callee takes no arguments then go on to check the results of the // call. if (!Outs.empty()) { // Check if stack adjustment is needed. For now, do not do this if any // argument is passed on the stack. SmallVector ArgLocs; CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C); // Allocate shadow area for Win64 if (IsCalleeWin64) CCInfo.AllocateStack(32, 8); CCInfo.AnalyzeCallOperands(Outs, CC_X86); StackArgsSize = CCInfo.getNextStackOffset(); if (CCInfo.getNextStackOffset()) { // Check if the arguments are already laid out in the right way as // the caller's fixed stack objects. MachineFrameInfo &MFI = MF.getFrameInfo(); const MachineRegisterInfo *MRI = &MF.getRegInfo(); const X86InstrInfo *TII = Subtarget.getInstrInfo(); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; if (VA.getLocInfo() == CCValAssign::Indirect) return false; if (!VA.isRegLoc()) { if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, MFI, MRI, TII, VA)) return false; } } } bool PositionIndependent = isPositionIndependent(); // If the tailcall address may be in a register, then make sure it's // possible to register allocate for it. In 32-bit, the call address can // only target EAX, EDX, or ECX since the tail call must be scheduled after // callee-saved registers are restored. These happen to be the same // registers used to pass 'inreg' arguments so watch out for those. if (!Subtarget.is64Bit() && ((!isa(Callee) && !isa(Callee)) || PositionIndependent)) { unsigned NumInRegs = 0; // In PIC we need an extra register to formulate the address computation // for the callee. unsigned MaxInRegs = PositionIndependent ? 2 : 3; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; if (!VA.isRegLoc()) continue; unsigned Reg = VA.getLocReg(); switch (Reg) { default: break; case X86::EAX: case X86::EDX: case X86::ECX: if (++NumInRegs == MaxInRegs) return false; break; } } } const MachineRegisterInfo &MRI = MF.getRegInfo(); if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) return false; } bool CalleeWillPop = X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg, MF.getTarget().Options.GuaranteedTailCallOpt); if (unsigned BytesToPop = MF.getInfo()->getBytesToPopOnReturn()) { // If we have bytes to pop, the callee must pop them. bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize; if (!CalleePopMatches) return false; } else if (CalleeWillPop && StackArgsSize > 0) { // If we don't have bytes to pop, make sure the callee doesn't pop any. return false; } return true; } FastISel * X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const { return X86::createFastISel(funcInfo, libInfo); } //===----------------------------------------------------------------------===// // Other Lowering Hooks //===----------------------------------------------------------------------===// static bool MayFoldLoad(SDValue Op) { return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode()); } static bool MayFoldIntoStore(SDValue Op) { return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin()); } static bool MayFoldIntoZeroExtend(SDValue Op) { if (Op.hasOneUse()) { unsigned Opcode = Op.getNode()->use_begin()->getOpcode(); return (ISD::ZERO_EXTEND == Opcode); } return false; } static bool isTargetShuffle(unsigned Opcode) { switch(Opcode) { default: return false; case X86ISD::BLENDI: case X86ISD::PSHUFB: case X86ISD::PSHUFD: case X86ISD::PSHUFHW: case X86ISD::PSHUFLW: case X86ISD::SHUFP: case X86ISD::INSERTPS: case X86ISD::EXTRQI: case X86ISD::INSERTQI: case X86ISD::PALIGNR: case X86ISD::VSHLDQ: case X86ISD::VSRLDQ: case X86ISD::MOVLHPS: case X86ISD::MOVHLPS: case X86ISD::MOVSHDUP: case X86ISD::MOVSLDUP: case X86ISD::MOVDDUP: case X86ISD::MOVSS: case X86ISD::MOVSD: case X86ISD::UNPCKL: case X86ISD::UNPCKH: case X86ISD::VBROADCAST: case X86ISD::VPERMILPI: case X86ISD::VPERMILPV: case X86ISD::VPERM2X128: case X86ISD::SHUF128: case X86ISD::VPERMIL2: case X86ISD::VPERMI: case X86ISD::VPPERM: case X86ISD::VPERMV: case X86ISD::VPERMV3: case X86ISD::VZEXT_MOVL: return true; } } static bool isTargetShuffleVariableMask(unsigned Opcode) { switch (Opcode) { default: return false; // Target Shuffles. case X86ISD::PSHUFB: case X86ISD::VPERMILPV: case X86ISD::VPERMIL2: case X86ISD::VPPERM: case X86ISD::VPERMV: case X86ISD::VPERMV3: return true; // 'Faux' Target Shuffles. case ISD::OR: case ISD::AND: case X86ISD::ANDNP: return true; } } SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); X86MachineFunctionInfo *FuncInfo = MF.getInfo(); int ReturnAddrIndex = FuncInfo->getRAIndex(); if (ReturnAddrIndex == 0) { // Set up a frame object for the return address. unsigned SlotSize = RegInfo->getSlotSize(); ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize, -(int64_t)SlotSize, false); FuncInfo->setRAIndex(ReturnAddrIndex); } return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout())); } bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, bool hasSymbolicDisplacement) { // Offset should fit into 32 bit immediate field. if (!isInt<32>(Offset)) return false; // If we don't have a symbolic displacement - we don't have any extra // restrictions. if (!hasSymbolicDisplacement) return true; // FIXME: Some tweaks might be needed for medium code model. if (M != CodeModel::Small && M != CodeModel::Kernel) return false; // For small code model we assume that latest object is 16MB before end of 31 // bits boundary. We may also accept pretty large negative constants knowing // that all objects are in the positive half of address space. if (M == CodeModel::Small && Offset < 16*1024*1024) return true; // For kernel code model we know that all object resist in the negative half // of 32bits address space. We may not accept negative offsets, since they may // be just off and we may accept pretty large positive ones. if (M == CodeModel::Kernel && Offset >= 0) return true; return false; } /// Determines whether the callee is required to pop its own arguments. /// Callee pop is necessary to support tail calls. bool X86::isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO) { // If GuaranteeTCO is true, we force some calls to be callee pop so that we // can guarantee TCO. if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO)) return true; switch (CallingConv) { default: return false; case CallingConv::X86_StdCall: case CallingConv::X86_FastCall: case CallingConv::X86_ThisCall: case CallingConv::X86_VectorCall: return !is64Bit; } } /// Return true if the condition is an unsigned comparison operation. static bool isX86CCUnsigned(unsigned X86CC) { switch (X86CC) { default: llvm_unreachable("Invalid integer condition!"); case X86::COND_E: case X86::COND_NE: case X86::COND_B: case X86::COND_A: case X86::COND_BE: case X86::COND_AE: return true; case X86::COND_G: case X86::COND_GE: case X86::COND_L: case X86::COND_LE: return false; } } static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) { switch (SetCCOpcode) { default: llvm_unreachable("Invalid integer condition!"); case ISD::SETEQ: return X86::COND_E; case ISD::SETGT: return X86::COND_G; case ISD::SETGE: return X86::COND_GE; case ISD::SETLT: return X86::COND_L; case ISD::SETLE: return X86::COND_LE; case ISD::SETNE: return X86::COND_NE; case ISD::SETULT: return X86::COND_B; case ISD::SETUGT: return X86::COND_A; case ISD::SETULE: return X86::COND_BE; case ISD::SETUGE: return X86::COND_AE; } } /// Do a one-to-one translation of a ISD::CondCode to the X86-specific /// condition code, returning the condition code and the LHS/RHS of the /// comparison to make. static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL, bool isFP, SDValue &LHS, SDValue &RHS, SelectionDAG &DAG) { if (!isFP) { if (ConstantSDNode *RHSC = dyn_cast(RHS)) { if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) { // X > -1 -> X == 0, jump !sign. RHS = DAG.getConstant(0, DL, RHS.getValueType()); return X86::COND_NS; } if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { // X < 0 -> X == 0, jump on sign. return X86::COND_S; } if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) { // X < 1 -> X <= 0 RHS = DAG.getConstant(0, DL, RHS.getValueType()); return X86::COND_LE; } } return TranslateIntegerX86CC(SetCCOpcode); } // First determine if it is required or is profitable to flip the operands. // If LHS is a foldable load, but RHS is not, flip the condition. if (ISD::isNON_EXTLoad(LHS.getNode()) && !ISD::isNON_EXTLoad(RHS.getNode())) { SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode); std::swap(LHS, RHS); } switch (SetCCOpcode) { default: break; case ISD::SETOLT: case ISD::SETOLE: case ISD::SETUGT: case ISD::SETUGE: std::swap(LHS, RHS); break; } // On a floating point condition, the flags are set as follows: // ZF PF CF op // 0 | 0 | 0 | X > Y // 0 | 0 | 1 | X < Y // 1 | 0 | 0 | X == Y // 1 | 1 | 1 | unordered switch (SetCCOpcode) { default: llvm_unreachable("Condcode should be pre-legalized away"); case ISD::SETUEQ: case ISD::SETEQ: return X86::COND_E; case ISD::SETOLT: // flipped case ISD::SETOGT: case ISD::SETGT: return X86::COND_A; case ISD::SETOLE: // flipped case ISD::SETOGE: case ISD::SETGE: return X86::COND_AE; case ISD::SETUGT: // flipped case ISD::SETULT: case ISD::SETLT: return X86::COND_B; case ISD::SETUGE: // flipped case ISD::SETULE: case ISD::SETLE: return X86::COND_BE; case ISD::SETONE: case ISD::SETNE: return X86::COND_NE; case ISD::SETUO: return X86::COND_P; case ISD::SETO: return X86::COND_NP; case ISD::SETOEQ: case ISD::SETUNE: return X86::COND_INVALID; } } /// Is there a floating point cmov for the specific X86 condition code? /// Current x86 isa includes the following FP cmov instructions: /// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. static bool hasFPCMov(unsigned X86CC) { switch (X86CC) { default: return false; case X86::COND_B: case X86::COND_BE: case X86::COND_E: case X86::COND_P: case X86::COND_A: case X86::COND_AE: case X86::COND_NE: case X86::COND_NP: return true; } } bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const { const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic); if (!IntrData) return false; Info.opc = ISD::INTRINSIC_W_CHAIN; Info.flags = MachineMemOperand::MONone; Info.offset = 0; switch (IntrData->Type) { case TRUNCATE_TO_MEM_VI8: case TRUNCATE_TO_MEM_VI16: case TRUNCATE_TO_MEM_VI32: { Info.ptrVal = I.getArgOperand(0); MVT VT = MVT::getVT(I.getArgOperand(1)->getType()); MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE; if (IntrData->Type == TRUNCATE_TO_MEM_VI8) ScalarVT = MVT::i8; else if (IntrData->Type == TRUNCATE_TO_MEM_VI16) ScalarVT = MVT::i16; else if (IntrData->Type == TRUNCATE_TO_MEM_VI32) ScalarVT = MVT::i32; Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements()); Info.align = 1; Info.flags |= MachineMemOperand::MOStore; break; } default: return false; } return true; } /// Returns true if the target can instruction select the /// specified FP immediate natively. If false, the legalizer will /// materialize the FP immediate as a load from a constant pool. bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) { if (Imm.bitwiseIsEqual(LegalFPImmediates[i])) return true; } return false; } bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const { // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF // relocation target a movq or addq instruction: don't let the load shrink. SDValue BasePtr = cast(Load)->getBasePtr(); if (BasePtr.getOpcode() == X86ISD::WrapperRIP) if (const auto *GA = dyn_cast(BasePtr.getOperand(0))) return GA->getTargetFlags() != X86II::MO_GOTTPOFF; return true; } /// Returns true if it is beneficial to convert a load of a constant /// to just the constant itself. bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); if (BitSize == 0 || BitSize > 64) return false; return true; } bool X86TargetLowering::reduceSelectOfFPConstantLoads(bool IsFPSetCC) const { // If we are using XMM registers in the ABI and the condition of the select is // a floating-point compare and we have blendv or conditional move, then it is // cheaper to select instead of doing a cross-register move and creating a // load that depends on the compare result. return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX(); } bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const { // TODO: It might be a win to ease or lift this restriction, but the generic // folds in DAGCombiner conflict with vector folds for an AVX512 target. if (VT.isVector() && Subtarget.hasAVX512()) return false; return true; } bool X86TargetLowering::decomposeMulByConstant(EVT VT, SDValue C) const { // TODO: We handle scalars using custom code, but generic combining could make // that unnecessary. APInt MulC; if (!ISD::isConstantSplatVector(C.getNode(), MulC)) return false; // If vector multiply is legal, assume that's faster than shl + add/sub. // TODO: Multiply is a complex op with higher latency and lower througput in // most implementations, so this check could be loosened based on type // and/or a CPU attribute. if (isOperationLegal(ISD::MUL, VT)) return false; // shl+add, shl+sub, shl+add+neg return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() || (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2(); } bool X86TargetLowering::shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, bool IsSigned) const { // f80 UINT_TO_FP is more efficient using Strict code if FCMOV is available. return !IsSigned && FpVT == MVT::f80 && Subtarget.hasCMov(); } bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const { if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT)) return false; // Mask vectors support all subregister combinations and operations that // extract half of vector. if (ResVT.getVectorElementType() == MVT::i1) return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) && (Index == ResVT.getVectorNumElements())); return (Index % ResVT.getVectorNumElements()) == 0; } bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const { // If the vector op is not supported, try to convert to scalar. EVT VecVT = VecOp.getValueType(); if (!isOperationLegalOrCustomOrPromote(VecOp.getOpcode(), VecVT)) return true; // If the vector op is supported, but the scalar op is not, the transform may // not be worthwhile. EVT ScalarVT = VecVT.getScalarType(); return isOperationLegalOrCustomOrPromote(VecOp.getOpcode(), ScalarVT); } bool X86TargetLowering::isCheapToSpeculateCttz() const { // Speculate cttz only if we can directly use TZCNT. return Subtarget.hasBMI(); } bool X86TargetLowering::isCheapToSpeculateCtlz() const { // Speculate ctlz only if we can directly use LZCNT. return Subtarget.hasLZCNT(); } bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT) const { if (!Subtarget.hasAVX512() && !LoadVT.isVector() && BitcastVT.isVector() && BitcastVT.getVectorElementType() == MVT::i1) return false; if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8) return false; return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT); } bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT, const SelectionDAG &DAG) const { // Do not merge to float value size (128 bytes) if no implicit // float attribute is set. bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute( Attribute::NoImplicitFloat); if (NoFloat) { unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32; return (MemVT.getSizeInBits() <= MaxIntSize); } return true; } bool X86TargetLowering::isCtlzFast() const { return Subtarget.hasFastLZCNT(); } bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial( const Instruction &AndI) const { return true; } bool X86TargetLowering::hasAndNotCompare(SDValue Y) const { EVT VT = Y.getValueType(); if (VT.isVector()) return false; if (!Subtarget.hasBMI()) return false; // There are only 32-bit and 64-bit forms for 'andn'. if (VT != MVT::i32 && VT != MVT::i64) return false; return !isa(Y); } bool X86TargetLowering::hasAndNot(SDValue Y) const { EVT VT = Y.getValueType(); if (!VT.isVector()) return hasAndNotCompare(Y); // Vector. if (!Subtarget.hasSSE1() || VT.getSizeInBits() < 128) return false; if (VT == MVT::v4i32) return true; return Subtarget.hasSSE2(); } bool X86TargetLowering::preferShiftsToClearExtremeBits(SDValue Y) const { EVT VT = Y.getValueType(); // For vectors, we don't have a preference, but we probably want a mask. if (VT.isVector()) return false; // 64-bit shifts on 32-bit targets produce really bad bloated code. if (VT == MVT::i64 && !Subtarget.is64Bit()) return false; return true; } bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const { // Any legal vector type can be splatted more efficiently than // loading/spilling from memory. return isTypeLegal(VT); } MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const { MVT VT = MVT::getIntegerVT(NumBits); if (isTypeLegal(VT)) return VT; // PMOVMSKB can handle this. if (NumBits == 128 && isTypeLegal(MVT::v16i8)) return MVT::v16i8; // VPMOVMSKB can handle this. if (NumBits == 256 && isTypeLegal(MVT::v32i8)) return MVT::v32i8; // TODO: Allow 64-bit type for 32-bit target. // TODO: 512-bit types should be allowed, but make sure that those // cases are handled in combineVectorSizedSetCCEquality(). return MVT::INVALID_SIMPLE_VALUE_TYPE; } /// Val is the undef sentinel value or equal to the specified value. static bool isUndefOrEqual(int Val, int CmpVal) { return ((Val == SM_SentinelUndef) || (Val == CmpVal)); } /// Val is either the undef or zero sentinel value. static bool isUndefOrZero(int Val) { return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero)); } /// Return true if every element in Mask, beginning /// from position Pos and ending in Pos+Size is the undef sentinel value. static bool isUndefInRange(ArrayRef Mask, unsigned Pos, unsigned Size) { for (unsigned i = Pos, e = Pos + Size; i != e; ++i) if (Mask[i] != SM_SentinelUndef) return false; return true; } /// Return true if Val falls within the specified range (L, H]. static bool isInRange(int Val, int Low, int Hi) { return (Val >= Low && Val < Hi); } /// Return true if the value of any element in Mask falls within the specified /// range (L, H]. static bool isAnyInRange(ArrayRef Mask, int Low, int Hi) { for (int M : Mask) if (isInRange(M, Low, Hi)) return true; return false; } /// Return true if Val is undef or if its value falls within the /// specified range (L, H]. static bool isUndefOrInRange(int Val, int Low, int Hi) { return (Val == SM_SentinelUndef) || isInRange(Val, Low, Hi); } /// Return true if every element in Mask is undef or if its value /// falls within the specified range (L, H]. static bool isUndefOrInRange(ArrayRef Mask, int Low, int Hi) { for (int M : Mask) if (!isUndefOrInRange(M, Low, Hi)) return false; return true; } /// Return true if Val is undef, zero or if its value falls within the /// specified range (L, H]. static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) { return isUndefOrZero(Val) || isInRange(Val, Low, Hi); } /// Return true if every element in Mask is undef, zero or if its value /// falls within the specified range (L, H]. static bool isUndefOrZeroOrInRange(ArrayRef Mask, int Low, int Hi) { for (int M : Mask) if (!isUndefOrZeroOrInRange(M, Low, Hi)) return false; return true; } /// Return true if every element in Mask, beginning /// from position Pos and ending in Pos + Size, falls within the specified /// sequence (Low, Low + Step, ..., Low + (Size - 1) * Step) or is undef. static bool isSequentialOrUndefInRange(ArrayRef Mask, unsigned Pos, unsigned Size, int Low, int Step = 1) { for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step) if (!isUndefOrEqual(Mask[i], Low)) return false; return true; } /// Return true if every element in Mask, beginning /// from position Pos and ending in Pos+Size, falls within the specified /// sequential range (Low, Low+Size], or is undef or is zero. static bool isSequentialOrUndefOrZeroInRange(ArrayRef Mask, unsigned Pos, unsigned Size, int Low) { for (unsigned i = Pos, e = Pos + Size; i != e; ++i, ++Low) if (!isUndefOrZero(Mask[i]) && Mask[i] != Low) return false; return true; } /// Return true if every element in Mask, beginning /// from position Pos and ending in Pos+Size is undef or is zero. static bool isUndefOrZeroInRange(ArrayRef Mask, unsigned Pos, unsigned Size) { for (unsigned i = Pos, e = Pos + Size; i != e; ++i) if (!isUndefOrZero(Mask[i])) return false; return true; } /// Helper function to test whether a shuffle mask could be /// simplified by widening the elements being shuffled. /// /// Appends the mask for wider elements in WidenedMask if valid. Otherwise /// leaves it in an unspecified state. /// /// NOTE: This must handle normal vector shuffle masks and *target* vector /// shuffle masks. The latter have the special property of a '-2' representing /// a zero-ed lane of a vector. static bool canWidenShuffleElements(ArrayRef Mask, SmallVectorImpl &WidenedMask) { WidenedMask.assign(Mask.size() / 2, 0); for (int i = 0, Size = Mask.size(); i < Size; i += 2) { int M0 = Mask[i]; int M1 = Mask[i + 1]; // If both elements are undef, its trivial. if (M0 == SM_SentinelUndef && M1 == SM_SentinelUndef) { WidenedMask[i / 2] = SM_SentinelUndef; continue; } // Check for an undef mask and a mask value properly aligned to fit with // a pair of values. If we find such a case, use the non-undef mask's value. if (M0 == SM_SentinelUndef && M1 >= 0 && (M1 % 2) == 1) { WidenedMask[i / 2] = M1 / 2; continue; } if (M1 == SM_SentinelUndef && M0 >= 0 && (M0 % 2) == 0) { WidenedMask[i / 2] = M0 / 2; continue; } // When zeroing, we need to spread the zeroing across both lanes to widen. if (M0 == SM_SentinelZero || M1 == SM_SentinelZero) { if ((M0 == SM_SentinelZero || M0 == SM_SentinelUndef) && (M1 == SM_SentinelZero || M1 == SM_SentinelUndef)) { WidenedMask[i / 2] = SM_SentinelZero; continue; } return false; } // Finally check if the two mask values are adjacent and aligned with // a pair. if (M0 != SM_SentinelUndef && (M0 % 2) == 0 && (M0 + 1) == M1) { WidenedMask[i / 2] = M0 / 2; continue; } // Otherwise we can't safely widen the elements used in this shuffle. return false; } assert(WidenedMask.size() == Mask.size() / 2 && "Incorrect size of mask after widening the elements!"); return true; } static bool canWidenShuffleElements(ArrayRef Mask, const APInt &Zeroable, SmallVectorImpl &WidenedMask) { SmallVector TargetMask(Mask.begin(), Mask.end()); for (int i = 0, Size = TargetMask.size(); i < Size; ++i) { if (TargetMask[i] == SM_SentinelUndef) continue; if (Zeroable[i]) TargetMask[i] = SM_SentinelZero; } return canWidenShuffleElements(TargetMask, WidenedMask); } static bool canWidenShuffleElements(ArrayRef Mask) { SmallVector WidenedMask; return canWidenShuffleElements(Mask, WidenedMask); } /// Returns true if Elt is a constant zero or a floating point constant +0.0. bool X86::isZeroNode(SDValue Elt) { return isNullConstant(Elt) || isNullFPConstant(Elt); } // Build a vector of constants. // Use an UNDEF node if MaskElt == -1. // Split 64-bit constants in the 32-bit mode. static SDValue getConstVector(ArrayRef Values, MVT VT, SelectionDAG &DAG, const SDLoc &dl, bool IsMask = false) { SmallVector Ops; bool Split = false; MVT ConstVecVT = VT; unsigned NumElts = VT.getVectorNumElements(); bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64); if (!In64BitMode && VT.getVectorElementType() == MVT::i64) { ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2); Split = true; } MVT EltVT = ConstVecVT.getVectorElementType(); for (unsigned i = 0; i < NumElts; ++i) { bool IsUndef = Values[i] < 0 && IsMask; SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) : DAG.getConstant(Values[i], dl, EltVT); Ops.push_back(OpNode); if (Split) Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) : DAG.getConstant(0, dl, EltVT)); } SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops); if (Split) ConstsNode = DAG.getBitcast(VT, ConstsNode); return ConstsNode; } static SDValue getConstVector(ArrayRef Bits, APInt &Undefs, MVT VT, SelectionDAG &DAG, const SDLoc &dl) { assert(Bits.size() == Undefs.getBitWidth() && "Unequal constant and undef arrays"); SmallVector Ops; bool Split = false; MVT ConstVecVT = VT; unsigned NumElts = VT.getVectorNumElements(); bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64); if (!In64BitMode && VT.getVectorElementType() == MVT::i64) { ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2); Split = true; } MVT EltVT = ConstVecVT.getVectorElementType(); for (unsigned i = 0, e = Bits.size(); i != e; ++i) { if (Undefs[i]) { Ops.append(Split ? 2 : 1, DAG.getUNDEF(EltVT)); continue; } const APInt &V = Bits[i]; assert(V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes"); if (Split) { Ops.push_back(DAG.getConstant(V.trunc(32), dl, EltVT)); Ops.push_back(DAG.getConstant(V.lshr(32).trunc(32), dl, EltVT)); } else if (EltVT == MVT::f32) { APFloat FV(APFloat::IEEEsingle(), V); Ops.push_back(DAG.getConstantFP(FV, dl, EltVT)); } else if (EltVT == MVT::f64) { APFloat FV(APFloat::IEEEdouble(), V); Ops.push_back(DAG.getConstantFP(FV, dl, EltVT)); } else { Ops.push_back(DAG.getConstant(V, dl, EltVT)); } } SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops); return DAG.getBitcast(VT, ConstsNode); } /// Returns a vector of specified type with all zero elements. static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget, SelectionDAG &DAG, const SDLoc &dl) { assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"); // Try to build SSE/AVX zero vectors as bitcasted to their dest // type. This ensures they get CSE'd. But if the integer type is not // available, use a floating-point +0.0 instead. SDValue Vec; if (!Subtarget.hasSSE2() && VT.is128BitVector()) { Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32); } else if (VT.getVectorElementType() == MVT::i1) { assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && "Unexpected vector type"); Vec = DAG.getConstant(0, dl, VT); } else { unsigned Num32BitElts = VT.getSizeInBits() / 32; Vec = DAG.getConstant(0, dl, MVT::getVectorVT(MVT::i32, Num32BitElts)); } return DAG.getBitcast(VT, Vec); } static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, const SDLoc &dl, unsigned vectorWidth) { EVT VT = Vec.getValueType(); EVT ElVT = VT.getVectorElementType(); unsigned Factor = VT.getSizeInBits()/vectorWidth; EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT, VT.getVectorNumElements()/Factor); // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits(); assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"); // This is the index of the first element of the vectorWidth-bit chunk // we want. Since ElemsPerChunk is a power of 2 just need to clear bits. IdxVal &= ~(ElemsPerChunk - 1); // If the input is a buildvector just emit a smaller one. if (Vec.getOpcode() == ISD::BUILD_VECTOR) return DAG.getBuildVector(ResultVT, dl, Vec->ops().slice(IdxVal, ElemsPerChunk)); SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx); } /// Generate a DAG to grab 128-bits from a vector > 128 bits. This /// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128 /// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4 /// instructions or a simple subregister reference. Idx is an index in the /// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes /// lowering EXTRACT_VECTOR_ELT operations easier. static SDValue extract128BitVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, const SDLoc &dl) { assert((Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && "Unexpected vector size!"); return extractSubVector(Vec, IdxVal, DAG, dl, 128); } /// Generate a DAG to grab 256-bits from a 512-bit vector. static SDValue extract256BitVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, const SDLoc &dl) { assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!"); return extractSubVector(Vec, IdxVal, DAG, dl, 256); } static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, const SDLoc &dl, unsigned vectorWidth) { assert((vectorWidth == 128 || vectorWidth == 256) && "Unsupported vector width"); // Inserting UNDEF is Result if (Vec.isUndef()) return Result; EVT VT = Vec.getValueType(); EVT ElVT = VT.getVectorElementType(); EVT ResultVT = Result.getValueType(); // Insert the relevant vectorWidth bits. unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits(); assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"); // This is the index of the first element of the vectorWidth-bit chunk // we want. Since ElemsPerChunk is a power of 2 just need to clear bits. IdxVal &= ~(ElemsPerChunk - 1); SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl); return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx); } /// Generate a DAG to put 128-bits into a vector > 128 bits. This /// sets things up to match to an AVX VINSERTF128/VINSERTI128 or /// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a /// simple superregister reference. Idx is an index in the 128 bits /// we want. It need not be aligned to a 128-bit boundary. That makes /// lowering INSERT_VECTOR_ELT operations easier. static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, const SDLoc &dl) { assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!"); return insertSubVector(Result, Vec, IdxVal, DAG, dl, 128); } /// Widen a vector to a larger size with the same scalar type, with the new /// elements either zero or undef. static SDValue widenSubVector(MVT VT, SDValue Vec, bool ZeroNewElements, const X86Subtarget &Subtarget, SelectionDAG &DAG, const SDLoc &dl) { assert(Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && "Unsupported vector widening type"); SDValue Res = ZeroNewElements ? getZeroVector(VT, Subtarget, DAG, dl) : DAG.getUNDEF(VT); return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, Vec, DAG.getIntPtrConstant(0, dl)); } // Helper for splitting operands of an operation to legal target size and // apply a function on each part. // Useful for operations that are available on SSE2 in 128-bit, on AVX2 in // 256-bit and on AVX512BW in 512-bit. The argument VT is the type used for // deciding if/how to split Ops. Ops elements do *not* have to be of type VT. // The argument Builder is a function that will be applied on each split part: // SDValue Builder(SelectionDAG&G, SDLoc, ArrayRef) template SDValue SplitOpsAndApply(SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &DL, EVT VT, ArrayRef Ops, F Builder, bool CheckBWI = true) { assert(Subtarget.hasSSE2() && "Target assumed to support at least SSE2"); unsigned NumSubs = 1; if ((CheckBWI && Subtarget.useBWIRegs()) || (!CheckBWI && Subtarget.useAVX512Regs())) { if (VT.getSizeInBits() > 512) { NumSubs = VT.getSizeInBits() / 512; assert((VT.getSizeInBits() % 512) == 0 && "Illegal vector size"); } } else if (Subtarget.hasAVX2()) { if (VT.getSizeInBits() > 256) { NumSubs = VT.getSizeInBits() / 256; assert((VT.getSizeInBits() % 256) == 0 && "Illegal vector size"); } } else { if (VT.getSizeInBits() > 128) { NumSubs = VT.getSizeInBits() / 128; assert((VT.getSizeInBits() % 128) == 0 && "Illegal vector size"); } } if (NumSubs == 1) return Builder(DAG, DL, Ops); SmallVector Subs; for (unsigned i = 0; i != NumSubs; ++i) { SmallVector SubOps; for (SDValue Op : Ops) { EVT OpVT = Op.getValueType(); unsigned NumSubElts = OpVT.getVectorNumElements() / NumSubs; unsigned SizeSub = OpVT.getSizeInBits() / NumSubs; SubOps.push_back(extractSubVector(Op, i * NumSubElts, DAG, DL, SizeSub)); } Subs.push_back(Builder(DAG, DL, SubOps)); } return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs); } // Return true if the instruction zeroes the unused upper part of the // destination and accepts mask. static bool isMaskedZeroUpperBitsvXi1(unsigned int Opcode) { switch (Opcode) { default: return false; case X86ISD::CMPM: case X86ISD::CMPM_RND: case ISD::SETCC: return true; } } /// Insert i1-subvector to i1-vector. static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { SDLoc dl(Op); SDValue Vec = Op.getOperand(0); SDValue SubVec = Op.getOperand(1); SDValue Idx = Op.getOperand(2); if (!isa(Idx)) return SDValue(); // Inserting undef is a nop. We can just return the original vector. if (SubVec.isUndef()) return Vec; unsigned IdxVal = cast(Idx)->getZExtValue(); if (IdxVal == 0 && Vec.isUndef()) // the operation is legal return Op; MVT OpVT = Op.getSimpleValueType(); unsigned NumElems = OpVT.getVectorNumElements(); SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl); // Extend to natively supported kshift. MVT WideOpVT = OpVT; if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) WideOpVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; // Inserting into the lsbs of a zero vector is legal. ISel will insert shifts // if necessary. if (IdxVal == 0 && ISD::isBuildVectorAllZeros(Vec.getNode())) { // May need to promote to a legal type. Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, getZeroVector(WideOpVT, Subtarget, DAG, dl), SubVec, Idx); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx); } MVT SubVecVT = SubVec.getSimpleValueType(); unsigned SubVecNumElems = SubVecVT.getVectorNumElements(); assert(IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"); SDValue Undef = DAG.getUNDEF(WideOpVT); if (IdxVal == 0) { // Zero lower bits of the Vec SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8); Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx); Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits); Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits); // Merge them together, SubVec should be zero extended. SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, getZeroVector(WideOpVT, Subtarget, DAG, dl), SubVec, ZeroIdx); Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx); } SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, SubVec, ZeroIdx); if (Vec.isUndef()) { assert(IdxVal != 0 && "Unexpected index"); SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, DAG.getConstant(IdxVal, dl, MVT::i8)); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx); } if (ISD::isBuildVectorAllZeros(Vec.getNode())) { assert(IdxVal != 0 && "Unexpected index"); NumElems = WideOpVT.getVectorNumElements(); unsigned ShiftLeft = NumElems - SubVecNumElems; unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal; SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, DAG.getConstant(ShiftLeft, dl, MVT::i8)); if (ShiftRight != 0) SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec, DAG.getConstant(ShiftRight, dl, MVT::i8)); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx); } // Simple case when we put subvector in the upper part if (IdxVal + SubVecNumElems == NumElems) { SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, DAG.getConstant(IdxVal, dl, MVT::i8)); if (SubVecNumElems * 2 == NumElems) { // Special case, use legal zero extending insert_subvector. This allows // isel to opimitize when bits are known zero. Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVecVT, Vec, ZeroIdx); Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, getZeroVector(WideOpVT, Subtarget, DAG, dl), Vec, ZeroIdx); } else { // Otherwise use explicit shifts to zero the bits. Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx); NumElems = WideOpVT.getVectorNumElements(); SDValue ShiftBits = DAG.getConstant(NumElems - IdxVal, dl, MVT::i8); Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits); Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits); } Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx); } // Inserting into the middle is more complicated. NumElems = WideOpVT.getVectorNumElements(); // Widen the vector if needed. Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx); // Move the current value of the bit to be replace to the lsbs. Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, DAG.getConstant(IdxVal, dl, MVT::i8)); // Xor with the new bit. Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Op, SubVec); // Shift to MSB, filling bottom bits with 0. unsigned ShiftLeft = NumElems - SubVecNumElems; Op = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Op, DAG.getConstant(ShiftLeft, dl, MVT::i8)); // Shift to the final position, filling upper bits with 0. unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal; Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Op, DAG.getConstant(ShiftRight, dl, MVT::i8)); // Xor with original vector leaving the new value. Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Vec, Op); // Reduce to original width if needed. return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx); } static SDValue concatSubVectors(SDValue V1, SDValue V2, EVT VT, unsigned NumElems, SelectionDAG &DAG, const SDLoc &dl, unsigned VectorWidth) { SDValue V = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, dl, VectorWidth); return insertSubVector(V, V2, NumElems / 2, DAG, dl, VectorWidth); } /// Returns a vector of specified type with all bits set. /// Always build ones vectors as <4 x i32>, <8 x i32> or <16 x i32>. /// Then bitcast to their original type, ensuring they get CSE'd. static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) { assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && "Expected a 128/256/512-bit vector type"); APInt Ones = APInt::getAllOnesValue(32); unsigned NumElts = VT.getSizeInBits() / 32; SDValue Vec = DAG.getConstant(Ones, dl, MVT::getVectorVT(MVT::i32, NumElts)); return DAG.getBitcast(VT, Vec); } static SDValue getExtendInVec(bool Signed, const SDLoc &DL, EVT VT, SDValue In, SelectionDAG &DAG) { EVT InVT = In.getValueType(); assert(VT.isVector() && InVT.isVector() && "Expected vector VTs."); // For 256-bit vectors, we only need the lower (128-bit) input half. // For 512-bit vectors, we only need the lower input half or quarter. if (InVT.getSizeInBits() > 128) { assert(VT.getSizeInBits() == InVT.getSizeInBits() && "Expected VTs to be the same size!"); unsigned Scale = VT.getScalarSizeInBits() / InVT.getScalarSizeInBits(); In = extractSubVector(In, 0, DAG, DL, std::max(128U, VT.getSizeInBits() / Scale)); InVT = In.getValueType(); } if (VT.getVectorNumElements() == InVT.getVectorNumElements()) return DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, VT, In); return DAG.getNode(Signed ? ISD::SIGN_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, In); } /// Returns a vector_shuffle node for an unpackl operation. static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1, SDValue V2) { SmallVector Mask; createUnpackShuffleMask(VT, Mask, /* Lo = */ true, /* Unary = */ false); return DAG.getVectorShuffle(VT, dl, V1, V2, Mask); } /// Returns a vector_shuffle node for an unpackh operation. static SDValue getUnpackh(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1, SDValue V2) { SmallVector Mask; createUnpackShuffleMask(VT, Mask, /* Lo = */ false, /* Unary = */ false); return DAG.getVectorShuffle(VT, dl, V1, V2, Mask); } /// Return a vector_shuffle of the specified vector of zero or undef vector. /// This produces a shuffle where the low element of V2 is swizzled into the /// zero/undef vector, landing at element Idx. /// This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3). static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx, bool IsZero, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = V2.getSimpleValueType(); SDValue V1 = IsZero ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT); int NumElems = VT.getVectorNumElements(); SmallVector MaskVec(NumElems); for (int i = 0; i != NumElems; ++i) // If this is the insertion idx, put the low elt of V2 here. MaskVec[i] = (i == Idx) ? NumElems : i; return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec); } // Peek through EXTRACT_SUBVECTORs - typically used for AVX1 256-bit intops. static SDValue peekThroughEXTRACT_SUBVECTORs(SDValue V) { while (V.getOpcode() == ISD::EXTRACT_SUBVECTOR) V = V.getOperand(0); return V; } static const Constant *getTargetConstantFromNode(SDValue Op) { Op = peekThroughBitcasts(Op); auto *Load = dyn_cast(Op); if (!Load) return nullptr; SDValue Ptr = Load->getBasePtr(); if (Ptr->getOpcode() == X86ISD::Wrapper || Ptr->getOpcode() == X86ISD::WrapperRIP) Ptr = Ptr->getOperand(0); auto *CNode = dyn_cast(Ptr); if (!CNode || CNode->isMachineConstantPoolEntry() || CNode->getOffset() != 0) return nullptr; return CNode->getConstVal(); } // Extract raw constant bits from constant pools. static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, APInt &UndefElts, SmallVectorImpl &EltBits, bool AllowWholeUndefs = true, bool AllowPartialUndefs = true) { assert(EltBits.empty() && "Expected an empty EltBits vector"); Op = peekThroughBitcasts(Op); EVT VT = Op.getValueType(); unsigned SizeInBits = VT.getSizeInBits(); assert((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!"); unsigned NumElts = SizeInBits / EltSizeInBits; // Bitcast a source array of element bits to the target size. auto CastBitData = [&](APInt &UndefSrcElts, ArrayRef SrcEltBits) { unsigned NumSrcElts = UndefSrcElts.getBitWidth(); unsigned SrcEltSizeInBits = SrcEltBits[0].getBitWidth(); assert((NumSrcElts * SrcEltSizeInBits) == SizeInBits && "Constant bit sizes don't match"); // Don't split if we don't allow undef bits. bool AllowUndefs = AllowWholeUndefs || AllowPartialUndefs; if (UndefSrcElts.getBoolValue() && !AllowUndefs) return false; // If we're already the right size, don't bother bitcasting. if (NumSrcElts == NumElts) { UndefElts = UndefSrcElts; EltBits.assign(SrcEltBits.begin(), SrcEltBits.end()); return true; } // Extract all the undef/constant element data and pack into single bitsets. APInt UndefBits(SizeInBits, 0); APInt MaskBits(SizeInBits, 0); for (unsigned i = 0; i != NumSrcElts; ++i) { unsigned BitOffset = i * SrcEltSizeInBits; if (UndefSrcElts[i]) UndefBits.setBits(BitOffset, BitOffset + SrcEltSizeInBits); MaskBits.insertBits(SrcEltBits[i], BitOffset); } // Split the undef/constant single bitset data into the target elements. UndefElts = APInt(NumElts, 0); EltBits.resize(NumElts, APInt(EltSizeInBits, 0)); for (unsigned i = 0; i != NumElts; ++i) { unsigned BitOffset = i * EltSizeInBits; APInt UndefEltBits = UndefBits.extractBits(EltSizeInBits, BitOffset); // Only treat an element as UNDEF if all bits are UNDEF. if (UndefEltBits.isAllOnesValue()) { if (!AllowWholeUndefs) return false; UndefElts.setBit(i); continue; } // If only some bits are UNDEF then treat them as zero (or bail if not // supported). if (UndefEltBits.getBoolValue() && !AllowPartialUndefs) return false; APInt Bits = MaskBits.extractBits(EltSizeInBits, BitOffset); EltBits[i] = Bits.getZExtValue(); } return true; }; // Collect constant bits and insert into mask/undef bit masks. auto CollectConstantBits = [](const Constant *Cst, APInt &Mask, APInt &Undefs, unsigned UndefBitIndex) { if (!Cst) return false; if (isa(Cst)) { Undefs.setBit(UndefBitIndex); return true; } if (auto *CInt = dyn_cast(Cst)) { Mask = CInt->getValue(); return true; } if (auto *CFP = dyn_cast(Cst)) { Mask = CFP->getValueAPF().bitcastToAPInt(); return true; } return false; }; // Handle UNDEFs. if (Op.isUndef()) { APInt UndefSrcElts = APInt::getAllOnesValue(NumElts); SmallVector SrcEltBits(NumElts, APInt(EltSizeInBits, 0)); return CastBitData(UndefSrcElts, SrcEltBits); } // Extract scalar constant bits. if (auto *Cst = dyn_cast(Op)) { APInt UndefSrcElts = APInt::getNullValue(1); SmallVector SrcEltBits(1, Cst->getAPIntValue()); return CastBitData(UndefSrcElts, SrcEltBits); } if (auto *Cst = dyn_cast(Op)) { APInt UndefSrcElts = APInt::getNullValue(1); APInt RawBits = Cst->getValueAPF().bitcastToAPInt(); SmallVector SrcEltBits(1, RawBits); return CastBitData(UndefSrcElts, SrcEltBits); } // Extract constant bits from build vector. if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { unsigned SrcEltSizeInBits = VT.getScalarSizeInBits(); unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; APInt UndefSrcElts(NumSrcElts, 0); SmallVector SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0)); for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { const SDValue &Src = Op.getOperand(i); if (Src.isUndef()) { UndefSrcElts.setBit(i); continue; } auto *Cst = cast(Src); SrcEltBits[i] = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits); } return CastBitData(UndefSrcElts, SrcEltBits); } if (ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) { unsigned SrcEltSizeInBits = VT.getScalarSizeInBits(); unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; APInt UndefSrcElts(NumSrcElts, 0); SmallVector SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0)); for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { const SDValue &Src = Op.getOperand(i); if (Src.isUndef()) { UndefSrcElts.setBit(i); continue; } auto *Cst = cast(Src); APInt RawBits = Cst->getValueAPF().bitcastToAPInt(); SrcEltBits[i] = RawBits.zextOrTrunc(SrcEltSizeInBits); } return CastBitData(UndefSrcElts, SrcEltBits); } // Extract constant bits from constant pool vector. if (auto *Cst = getTargetConstantFromNode(Op)) { Type *CstTy = Cst->getType(); unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits(); if (!CstTy->isVectorTy() || (CstSizeInBits % SizeInBits) != 0) return false; unsigned SrcEltSizeInBits = CstTy->getScalarSizeInBits(); unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; APInt UndefSrcElts(NumSrcElts, 0); SmallVector SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0)); for (unsigned i = 0; i != NumSrcElts; ++i) if (!CollectConstantBits(Cst->getAggregateElement(i), SrcEltBits[i], UndefSrcElts, i)) return false; return CastBitData(UndefSrcElts, SrcEltBits); } // Extract constant bits from a broadcasted constant pool scalar. if (Op.getOpcode() == X86ISD::VBROADCAST && EltSizeInBits <= VT.getScalarSizeInBits()) { if (auto *Broadcast = getTargetConstantFromNode(Op.getOperand(0))) { unsigned SrcEltSizeInBits = Broadcast->getType()->getScalarSizeInBits(); unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; APInt UndefSrcElts(NumSrcElts, 0); SmallVector SrcEltBits(1, APInt(SrcEltSizeInBits, 0)); if (CollectConstantBits(Broadcast, SrcEltBits[0], UndefSrcElts, 0)) { if (UndefSrcElts[0]) UndefSrcElts.setBits(0, NumSrcElts); SrcEltBits.append(NumSrcElts - 1, SrcEltBits[0]); return CastBitData(UndefSrcElts, SrcEltBits); } } } // Extract a rematerialized scalar constant insertion. if (Op.getOpcode() == X86ISD::VZEXT_MOVL && Op.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && isa(Op.getOperand(0).getOperand(0))) { unsigned SrcEltSizeInBits = VT.getScalarSizeInBits(); unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; APInt UndefSrcElts(NumSrcElts, 0); SmallVector SrcEltBits; auto *CN = cast(Op.getOperand(0).getOperand(0)); SrcEltBits.push_back(CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits)); SrcEltBits.append(NumSrcElts - 1, APInt(SrcEltSizeInBits, 0)); return CastBitData(UndefSrcElts, SrcEltBits); } // Extract constant bits from a subvector's source. if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR && isa(Op.getOperand(1))) { // TODO - support extract_subvector through bitcasts. if (EltSizeInBits != VT.getScalarSizeInBits()) return false; if (getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits, UndefElts, EltBits, AllowWholeUndefs, AllowPartialUndefs)) { EVT SrcVT = Op.getOperand(0).getValueType(); unsigned NumSrcElts = SrcVT.getVectorNumElements(); unsigned NumSubElts = VT.getVectorNumElements(); unsigned BaseIdx = Op.getConstantOperandVal(1); UndefElts = UndefElts.extractBits(NumSubElts, BaseIdx); if ((BaseIdx + NumSubElts) != NumSrcElts) EltBits.erase(EltBits.begin() + BaseIdx + NumSubElts, EltBits.end()); if (BaseIdx != 0) EltBits.erase(EltBits.begin(), EltBits.begin() + BaseIdx); return true; } } // Extract constant bits from shuffle node sources. if (auto *SVN = dyn_cast(Op)) { // TODO - support shuffle through bitcasts. if (EltSizeInBits != VT.getScalarSizeInBits()) return false; ArrayRef Mask = SVN->getMask(); if ((!AllowWholeUndefs || !AllowPartialUndefs) && llvm::any_of(Mask, [](int M) { return M < 0; })) return false; APInt UndefElts0, UndefElts1; SmallVector EltBits0, EltBits1; if (isAnyInRange(Mask, 0, NumElts) && !getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits, UndefElts0, EltBits0, AllowWholeUndefs, AllowPartialUndefs)) return false; if (isAnyInRange(Mask, NumElts, 2 * NumElts) && !getTargetConstantBitsFromNode(Op.getOperand(1), EltSizeInBits, UndefElts1, EltBits1, AllowWholeUndefs, AllowPartialUndefs)) return false; UndefElts = APInt::getNullValue(NumElts); for (int i = 0; i != (int)NumElts; ++i) { int M = Mask[i]; if (M < 0) { UndefElts.setBit(i); EltBits.push_back(APInt::getNullValue(EltSizeInBits)); } else if (M < (int)NumElts) { if (UndefElts0[M]) UndefElts.setBit(i); EltBits.push_back(EltBits0[M]); } else { if (UndefElts1[M - NumElts]) UndefElts.setBit(i); EltBits.push_back(EltBits1[M - NumElts]); } } return true; } return false; } static bool isConstantSplat(SDValue Op, APInt &SplatVal) { APInt UndefElts; SmallVector EltBits; if (getTargetConstantBitsFromNode(Op, Op.getScalarValueSizeInBits(), UndefElts, EltBits, true, false)) { int SplatIndex = -1; for (int i = 0, e = EltBits.size(); i != e; ++i) { if (UndefElts[i]) continue; if (0 <= SplatIndex && EltBits[i] != EltBits[SplatIndex]) { SplatIndex = -1; break; } SplatIndex = i; } if (0 <= SplatIndex) { SplatVal = EltBits[SplatIndex]; return true; } } return false; } static bool getTargetShuffleMaskIndices(SDValue MaskNode, unsigned MaskEltSizeInBits, SmallVectorImpl &RawMask, APInt &UndefElts) { // Extract the raw target constant bits. SmallVector EltBits; if (!getTargetConstantBitsFromNode(MaskNode, MaskEltSizeInBits, UndefElts, EltBits, /* AllowWholeUndefs */ true, /* AllowPartialUndefs */ false)) return false; // Insert the extracted elements into the mask. for (APInt Elt : EltBits) RawMask.push_back(Elt.getZExtValue()); return true; } /// Create a shuffle mask that matches the PACKSS/PACKUS truncation. /// Note: This ignores saturation, so inputs must be checked first. static void createPackShuffleMask(MVT VT, SmallVectorImpl &Mask, bool Unary) { assert(Mask.empty() && "Expected an empty shuffle mask vector"); unsigned NumElts = VT.getVectorNumElements(); unsigned NumLanes = VT.getSizeInBits() / 128; unsigned NumEltsPerLane = 128 / VT.getScalarSizeInBits(); unsigned Offset = Unary ? 0 : NumElts; for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { for (unsigned Elt = 0; Elt != NumEltsPerLane; Elt += 2) Mask.push_back(Elt + (Lane * NumEltsPerLane)); for (unsigned Elt = 0; Elt != NumEltsPerLane; Elt += 2) Mask.push_back(Elt + (Lane * NumEltsPerLane) + Offset); } } // Split the demanded elts of a PACKSS/PACKUS node between its operands. static void getPackDemandedElts(EVT VT, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS) { int NumLanes = VT.getSizeInBits() / 128; int NumElts = DemandedElts.getBitWidth(); int NumInnerElts = NumElts / 2; int NumEltsPerLane = NumElts / NumLanes; int NumInnerEltsPerLane = NumInnerElts / NumLanes; DemandedLHS = APInt::getNullValue(NumInnerElts); DemandedRHS = APInt::getNullValue(NumInnerElts); // Map DemandedElts to the packed operands. for (int Lane = 0; Lane != NumLanes; ++Lane) { for (int Elt = 0; Elt != NumInnerEltsPerLane; ++Elt) { int OuterIdx = (Lane * NumEltsPerLane) + Elt; int InnerIdx = (Lane * NumInnerEltsPerLane) + Elt; if (DemandedElts[OuterIdx]) DemandedLHS.setBit(InnerIdx); if (DemandedElts[OuterIdx + NumInnerEltsPerLane]) DemandedRHS.setBit(InnerIdx); } } } /// Calculates the shuffle mask corresponding to the target-specific opcode. /// If the mask could be calculated, returns it in \p Mask, returns the shuffle /// operands in \p Ops, and returns true. /// Sets \p IsUnary to true if only one source is used. Note that this will set /// IsUnary for shuffles which use a single input multiple times, and in those /// cases it will adjust the mask to only have indices within that single input. /// It is an error to call this with non-empty Mask/Ops vectors. static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, SmallVectorImpl &Ops, SmallVectorImpl &Mask, bool &IsUnary) { unsigned NumElems = VT.getVectorNumElements(); unsigned MaskEltSize = VT.getScalarSizeInBits(); SmallVector RawMask; APInt RawUndefs; SDValue ImmN; assert(Mask.empty() && "getTargetShuffleMask expects an empty Mask vector"); assert(Ops.empty() && "getTargetShuffleMask expects an empty Ops vector"); IsUnary = false; bool IsFakeUnary = false; switch (N->getOpcode()) { case X86ISD::BLENDI: assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); ImmN = N->getOperand(N->getNumOperands() - 1); DecodeBLENDMask(NumElems, cast(ImmN)->getZExtValue(), Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; case X86ISD::SHUFP: assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); ImmN = N->getOperand(N->getNumOperands() - 1); DecodeSHUFPMask(NumElems, MaskEltSize, cast(ImmN)->getZExtValue(), Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; case X86ISD::INSERTPS: assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); ImmN = N->getOperand(N->getNumOperands() - 1); DecodeINSERTPSMask(cast(ImmN)->getZExtValue(), Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; case X86ISD::EXTRQI: assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); if (isa(N->getOperand(1)) && isa(N->getOperand(2))) { int BitLen = N->getConstantOperandVal(1); int BitIdx = N->getConstantOperandVal(2); DecodeEXTRQIMask(NumElems, MaskEltSize, BitLen, BitIdx, Mask); IsUnary = true; } break; case X86ISD::INSERTQI: assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); if (isa(N->getOperand(2)) && isa(N->getOperand(3))) { int BitLen = N->getConstantOperandVal(2); int BitIdx = N->getConstantOperandVal(3); DecodeINSERTQIMask(NumElems, MaskEltSize, BitLen, BitIdx, Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); } break; case X86ISD::UNPCKH: assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); DecodeUNPCKHMask(NumElems, MaskEltSize, Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; case X86ISD::UNPCKL: assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); DecodeUNPCKLMask(NumElems, MaskEltSize, Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; case X86ISD::MOVHLPS: assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); DecodeMOVHLPSMask(NumElems, Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; case X86ISD::MOVLHPS: assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); DecodeMOVLHPSMask(NumElems, Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; case X86ISD::PALIGNR: assert(VT.getScalarType() == MVT::i8 && "Byte vector expected"); assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); ImmN = N->getOperand(N->getNumOperands() - 1); DecodePALIGNRMask(NumElems, cast(ImmN)->getZExtValue(), Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); Ops.push_back(N->getOperand(1)); Ops.push_back(N->getOperand(0)); break; case X86ISD::VSHLDQ: assert(VT.getScalarType() == MVT::i8 && "Byte vector expected"); assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); ImmN = N->getOperand(N->getNumOperands() - 1); DecodePSLLDQMask(NumElems, cast(ImmN)->getZExtValue(), Mask); IsUnary = true; break; case X86ISD::VSRLDQ: assert(VT.getScalarType() == MVT::i8 && "Byte vector expected"); assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); ImmN = N->getOperand(N->getNumOperands() - 1); DecodePSRLDQMask(NumElems, cast(ImmN)->getZExtValue(), Mask); IsUnary = true; break; case X86ISD::PSHUFD: case X86ISD::VPERMILPI: assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); ImmN = N->getOperand(N->getNumOperands() - 1); DecodePSHUFMask(NumElems, MaskEltSize, cast(ImmN)->getZExtValue(), Mask); IsUnary = true; break; case X86ISD::PSHUFHW: assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); ImmN = N->getOperand(N->getNumOperands() - 1); DecodePSHUFHWMask(NumElems, cast(ImmN)->getZExtValue(), Mask); IsUnary = true; break; case X86ISD::PSHUFLW: assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); ImmN = N->getOperand(N->getNumOperands() - 1); DecodePSHUFLWMask(NumElems, cast(ImmN)->getZExtValue(), Mask); IsUnary = true; break; case X86ISD::VZEXT_MOVL: assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); DecodeZeroMoveLowMask(NumElems, Mask); IsUnary = true; break; case X86ISD::VBROADCAST: { SDValue N0 = N->getOperand(0); // See if we're broadcasting from index 0 of an EXTRACT_SUBVECTOR. If so, // add the pre-extracted value to the Ops vector. if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.getOperand(0).getValueType() == VT && N0.getConstantOperandVal(1) == 0) Ops.push_back(N0.getOperand(0)); // We only decode broadcasts of same-sized vectors, unless the broadcast // came from an extract from the original width. If we found one, we // pushed it the Ops vector above. if (N0.getValueType() == VT || !Ops.empty()) { DecodeVectorBroadcast(NumElems, Mask); IsUnary = true; break; } return false; } case X86ISD::VPERMILPV: { assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); IsUnary = true; SDValue MaskNode = N->getOperand(1); if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask, RawUndefs)) { DecodeVPERMILPMask(NumElems, MaskEltSize, RawMask, RawUndefs, Mask); break; } return false; } case X86ISD::PSHUFB: { assert(VT.getScalarType() == MVT::i8 && "Byte vector expected"); assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); IsUnary = true; SDValue MaskNode = N->getOperand(1); if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask, RawUndefs)) { DecodePSHUFBMask(RawMask, RawUndefs, Mask); break; } return false; } case X86ISD::VPERMI: assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); ImmN = N->getOperand(N->getNumOperands() - 1); DecodeVPERMMask(NumElems, cast(ImmN)->getZExtValue(), Mask); IsUnary = true; break; case X86ISD::MOVSS: case X86ISD::MOVSD: assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); DecodeScalarMoveMask(NumElems, /* IsLoad */ false, Mask); break; case X86ISD::VPERM2X128: assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); ImmN = N->getOperand(N->getNumOperands() - 1); DecodeVPERM2X128Mask(NumElems, cast(ImmN)->getZExtValue(), Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; case X86ISD::SHUF128: assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); ImmN = N->getOperand(N->getNumOperands() - 1); decodeVSHUF64x2FamilyMask(NumElems, MaskEltSize, cast(ImmN)->getZExtValue(), Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; case X86ISD::MOVSLDUP: assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); DecodeMOVSLDUPMask(NumElems, Mask); IsUnary = true; break; case X86ISD::MOVSHDUP: assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); DecodeMOVSHDUPMask(NumElems, Mask); IsUnary = true; break; case X86ISD::MOVDDUP: assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); DecodeMOVDDUPMask(NumElems, Mask); IsUnary = true; break; case X86ISD::VPERMIL2: { assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); SDValue MaskNode = N->getOperand(2); SDValue CtrlNode = N->getOperand(3); if (ConstantSDNode *CtrlOp = dyn_cast(CtrlNode)) { unsigned CtrlImm = CtrlOp->getZExtValue(); if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask, RawUndefs)) { DecodeVPERMIL2PMask(NumElems, MaskEltSize, CtrlImm, RawMask, RawUndefs, Mask); break; } } return false; } case X86ISD::VPPERM: { assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); SDValue MaskNode = N->getOperand(2); if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask, RawUndefs)) { DecodeVPPERMMask(RawMask, RawUndefs, Mask); break; } return false; } case X86ISD::VPERMV: { assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); IsUnary = true; // Unlike most shuffle nodes, VPERMV's mask operand is operand 0. Ops.push_back(N->getOperand(1)); SDValue MaskNode = N->getOperand(0); if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask, RawUndefs)) { DecodeVPERMVMask(RawMask, RawUndefs, Mask); break; } return false; } case X86ISD::VPERMV3: { assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); assert(N->getOperand(2).getValueType() == VT && "Unexpected value type"); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(2); // Unlike most shuffle nodes, VPERMV3's mask operand is the middle one. Ops.push_back(N->getOperand(0)); Ops.push_back(N->getOperand(2)); SDValue MaskNode = N->getOperand(1); if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask, RawUndefs)) { DecodeVPERMV3Mask(RawMask, RawUndefs, Mask); break; } return false; } default: llvm_unreachable("unknown target shuffle node"); } // Empty mask indicates the decode failed. if (Mask.empty()) return false; // Check if we're getting a shuffle mask with zero'd elements. if (!AllowSentinelZero) if (any_of(Mask, [](int M) { return M == SM_SentinelZero; })) return false; // If we have a fake unary shuffle, the shuffle mask is spread across two // inputs that are actually the same node. Re-map the mask to always point // into the first input. if (IsFakeUnary) for (int &M : Mask) if (M >= (int)Mask.size()) M -= Mask.size(); // If we didn't already add operands in the opcode-specific code, default to // adding 1 or 2 operands starting at 0. if (Ops.empty()) { Ops.push_back(N->getOperand(0)); if (!IsUnary || IsFakeUnary) Ops.push_back(N->getOperand(1)); } return true; } /// Check a target shuffle mask's inputs to see if we can set any values to /// SM_SentinelZero - this is for elements that are known to be zero /// (not just zeroable) from their inputs. /// Returns true if the target shuffle mask was decoded. static bool setTargetShuffleZeroElements(SDValue N, SmallVectorImpl &Mask, SmallVectorImpl &Ops) { bool IsUnary; if (!isTargetShuffle(N.getOpcode())) return false; MVT VT = N.getSimpleValueType(); if (!getTargetShuffleMask(N.getNode(), VT, true, Ops, Mask, IsUnary)) return false; SDValue V1 = Ops[0]; SDValue V2 = IsUnary ? V1 : Ops[1]; V1 = peekThroughBitcasts(V1); V2 = peekThroughBitcasts(V2); assert((VT.getSizeInBits() % Mask.size()) == 0 && "Illegal split of shuffle value type"); unsigned EltSizeInBits = VT.getSizeInBits() / Mask.size(); // Extract known constant input data. APInt UndefSrcElts[2]; SmallVector SrcEltBits[2]; bool IsSrcConstant[2] = { getTargetConstantBitsFromNode(V1, EltSizeInBits, UndefSrcElts[0], SrcEltBits[0], true, false), getTargetConstantBitsFromNode(V2, EltSizeInBits, UndefSrcElts[1], SrcEltBits[1], true, false)}; for (int i = 0, Size = Mask.size(); i < Size; ++i) { int M = Mask[i]; // Already decoded as SM_SentinelZero / SM_SentinelUndef. if (M < 0) continue; // Determine shuffle input and normalize the mask. unsigned SrcIdx = M / Size; SDValue V = M < Size ? V1 : V2; M %= Size; // We are referencing an UNDEF input. if (V.isUndef()) { Mask[i] = SM_SentinelUndef; continue; } // SCALAR_TO_VECTOR - only the first element is defined, and the rest UNDEF. // TODO: We currently only set UNDEF for integer types - floats use the same // registers as vectors and many of the scalar folded loads rely on the // SCALAR_TO_VECTOR pattern. if (V.getOpcode() == ISD::SCALAR_TO_VECTOR && (Size % V.getValueType().getVectorNumElements()) == 0) { int Scale = Size / V.getValueType().getVectorNumElements(); int Idx = M / Scale; if (Idx != 0 && !VT.isFloatingPoint()) Mask[i] = SM_SentinelUndef; else if (Idx == 0 && X86::isZeroNode(V.getOperand(0))) Mask[i] = SM_SentinelZero; continue; } // Attempt to extract from the source's constant bits. if (IsSrcConstant[SrcIdx]) { if (UndefSrcElts[SrcIdx][M]) Mask[i] = SM_SentinelUndef; else if (SrcEltBits[SrcIdx][M] == 0) Mask[i] = SM_SentinelZero; } } assert(VT.getVectorNumElements() == Mask.size() && "Different mask size from vector size!"); return true; } // Forward declaration (for getFauxShuffleMask recursive check). static bool resolveTargetShuffleInputs(SDValue Op, SmallVectorImpl &Inputs, SmallVectorImpl &Mask, const SelectionDAG &DAG); // Attempt to decode ops that could be represented as a shuffle mask. // The decoded shuffle mask may contain a different number of elements to the // destination value type. static bool getFauxShuffleMask(SDValue N, SmallVectorImpl &Mask, SmallVectorImpl &Ops, const SelectionDAG &DAG) { Mask.clear(); Ops.clear(); MVT VT = N.getSimpleValueType(); unsigned NumElts = VT.getVectorNumElements(); unsigned NumSizeInBits = VT.getSizeInBits(); unsigned NumBitsPerElt = VT.getScalarSizeInBits(); assert((NumBitsPerElt % 8) == 0 && (NumSizeInBits % 8) == 0 && "Expected byte aligned value types"); unsigned Opcode = N.getOpcode(); switch (Opcode) { case ISD::VECTOR_SHUFFLE: { // Don't treat ISD::VECTOR_SHUFFLE as a target shuffle so decode it here. ArrayRef ShuffleMask = cast(N)->getMask(); if (isUndefOrInRange(ShuffleMask, 0, 2 * NumElts)) { Mask.append(ShuffleMask.begin(), ShuffleMask.end()); Ops.push_back(N.getOperand(0)); Ops.push_back(N.getOperand(1)); return true; } return false; } case ISD::AND: case X86ISD::ANDNP: { // Attempt to decode as a per-byte mask. APInt UndefElts; SmallVector EltBits; SDValue N0 = N.getOperand(0); SDValue N1 = N.getOperand(1); bool IsAndN = (X86ISD::ANDNP == Opcode); uint64_t ZeroMask = IsAndN ? 255 : 0; if (!getTargetConstantBitsFromNode(IsAndN ? N0 : N1, 8, UndefElts, EltBits)) return false; for (int i = 0, e = (int)EltBits.size(); i != e; ++i) { if (UndefElts[i]) { Mask.push_back(SM_SentinelUndef); continue; } uint64_t ByteBits = EltBits[i].getZExtValue(); if (ByteBits != 0 && ByteBits != 255) return false; Mask.push_back(ByteBits == ZeroMask ? SM_SentinelZero : i); } Ops.push_back(IsAndN ? N1 : N0); return true; } case ISD::OR: { // Handle OR(SHUFFLE,SHUFFLE) case where one source is zero and the other // is a valid shuffle index. SDValue N0 = peekThroughOneUseBitcasts(N.getOperand(0)); SDValue N1 = peekThroughOneUseBitcasts(N.getOperand(1)); if (!N0.getValueType().isVector() || !N1.getValueType().isVector()) return false; SmallVector SrcMask0, SrcMask1; SmallVector SrcInputs0, SrcInputs1; if (!resolveTargetShuffleInputs(N0, SrcInputs0, SrcMask0, DAG) || !resolveTargetShuffleInputs(N1, SrcInputs1, SrcMask1, DAG)) return false; int MaskSize = std::max(SrcMask0.size(), SrcMask1.size()); SmallVector Mask0, Mask1; scaleShuffleMask(MaskSize / SrcMask0.size(), SrcMask0, Mask0); scaleShuffleMask(MaskSize / SrcMask1.size(), SrcMask1, Mask1); for (int i = 0; i != MaskSize; ++i) { if (Mask0[i] == SM_SentinelUndef && Mask1[i] == SM_SentinelUndef) Mask.push_back(SM_SentinelUndef); else if (Mask0[i] == SM_SentinelZero && Mask1[i] == SM_SentinelZero) Mask.push_back(SM_SentinelZero); else if (Mask1[i] == SM_SentinelZero) Mask.push_back(Mask0[i]); else if (Mask0[i] == SM_SentinelZero) Mask.push_back(Mask1[i] + (MaskSize * SrcInputs0.size())); else return false; } for (SDValue &Op : SrcInputs0) Ops.push_back(Op); for (SDValue &Op : SrcInputs1) Ops.push_back(Op); return true; } case ISD::INSERT_SUBVECTOR: { // Handle INSERT_SUBVECTOR(SRC0, SHUFFLE(EXTRACT_SUBVECTOR(SRC1)) where // SRC0/SRC1 are both of the same valuetype VT. // TODO - add peekThroughOneUseBitcasts support. SDValue Src = N.getOperand(0); SDValue Sub = N.getOperand(1); EVT SubVT = Sub.getValueType(); unsigned NumSubElts = SubVT.getVectorNumElements(); if (!isa(N.getOperand(2)) || !N->isOnlyUserOf(Sub.getNode())) return false; SmallVector SubMask; SmallVector SubInputs; if (!resolveTargetShuffleInputs(Sub, SubInputs, SubMask, DAG) || SubMask.size() != NumSubElts) return false; Ops.push_back(Src); for (SDValue &SubInput : SubInputs) { if (SubInput.getOpcode() != ISD::EXTRACT_SUBVECTOR || SubInput.getOperand(0).getValueType() != VT || !isa(SubInput.getOperand(1))) return false; Ops.push_back(SubInput.getOperand(0)); } int InsertIdx = N.getConstantOperandVal(2); for (int i = 0; i != (int)NumElts; ++i) Mask.push_back(i); for (int i = 0; i != (int)NumSubElts; ++i) { int M = SubMask[i]; if (0 <= M) { int InputIdx = M / NumSubElts; int ExtractIdx = SubInputs[InputIdx].getConstantOperandVal(1); M = (NumElts * (1 + InputIdx)) + ExtractIdx + (M % NumSubElts); } Mask[i + InsertIdx] = M; } return true; } case ISD::SCALAR_TO_VECTOR: { // Match against a scalar_to_vector of an extract from a vector, // for PEXTRW/PEXTRB we must handle the implicit zext of the scalar. SDValue N0 = N.getOperand(0); SDValue SrcExtract; if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && N0.getOperand(0).getValueType() == VT) || (N0.getOpcode() == X86ISD::PEXTRW && N0.getOperand(0).getValueType() == MVT::v8i16) || (N0.getOpcode() == X86ISD::PEXTRB && N0.getOperand(0).getValueType() == MVT::v16i8)) { SrcExtract = N0; } if (!SrcExtract || !isa(SrcExtract.getOperand(1))) return false; SDValue SrcVec = SrcExtract.getOperand(0); EVT SrcVT = SrcVec.getValueType(); unsigned NumSrcElts = SrcVT.getVectorNumElements(); unsigned NumZeros = (NumBitsPerElt / SrcVT.getScalarSizeInBits()) - 1; unsigned SrcIdx = SrcExtract.getConstantOperandVal(1); if (NumSrcElts <= SrcIdx) return false; Ops.push_back(SrcVec); Mask.push_back(SrcIdx); Mask.append(NumZeros, SM_SentinelZero); Mask.append(NumSrcElts - Mask.size(), SM_SentinelUndef); return true; } case X86ISD::PINSRB: case X86ISD::PINSRW: { SDValue InVec = N.getOperand(0); SDValue InScl = N.getOperand(1); SDValue InIndex = N.getOperand(2); if (!isa(InIndex) || cast(InIndex)->getAPIntValue().uge(NumElts)) return false; uint64_t InIdx = N.getConstantOperandVal(2); // Attempt to recognise a PINSR*(VEC, 0, Idx) shuffle pattern. if (X86::isZeroNode(InScl)) { Ops.push_back(InVec); for (unsigned i = 0; i != NumElts; ++i) Mask.push_back(i == InIdx ? SM_SentinelZero : (int)i); return true; } // Attempt to recognise a PINSR*(PEXTR*) shuffle pattern. // TODO: Expand this to support INSERT_VECTOR_ELT/etc. unsigned ExOp = (X86ISD::PINSRB == Opcode ? X86ISD::PEXTRB : X86ISD::PEXTRW); if (InScl.getOpcode() != ExOp) return false; SDValue ExVec = InScl.getOperand(0); SDValue ExIndex = InScl.getOperand(1); if (!isa(ExIndex) || cast(ExIndex)->getAPIntValue().uge(NumElts)) return false; uint64_t ExIdx = InScl.getConstantOperandVal(1); Ops.push_back(InVec); Ops.push_back(ExVec); for (unsigned i = 0; i != NumElts; ++i) Mask.push_back(i == InIdx ? NumElts + ExIdx : i); return true; } case X86ISD::PACKSS: case X86ISD::PACKUS: { SDValue N0 = N.getOperand(0); SDValue N1 = N.getOperand(1); assert(N0.getValueType().getVectorNumElements() == (NumElts / 2) && N1.getValueType().getVectorNumElements() == (NumElts / 2) && "Unexpected input value type"); // If we know input saturation won't happen we can treat this // as a truncation shuffle. if (Opcode == X86ISD::PACKSS) { if ((!N0.isUndef() && DAG.ComputeNumSignBits(N0) <= NumBitsPerElt) || (!N1.isUndef() && DAG.ComputeNumSignBits(N1) <= NumBitsPerElt)) return false; } else { APInt ZeroMask = APInt::getHighBitsSet(2 * NumBitsPerElt, NumBitsPerElt); if ((!N0.isUndef() && !DAG.MaskedValueIsZero(N0, ZeroMask)) || (!N1.isUndef() && !DAG.MaskedValueIsZero(N1, ZeroMask))) return false; } bool IsUnary = (N0 == N1); Ops.push_back(N0); if (!IsUnary) Ops.push_back(N1); createPackShuffleMask(VT, Mask, IsUnary); return true; } case X86ISD::VSHLI: case X86ISD::VSRLI: { uint64_t ShiftVal = N.getConstantOperandVal(1); // Out of range bit shifts are guaranteed to be zero. if (NumBitsPerElt <= ShiftVal) { Mask.append(NumElts, SM_SentinelZero); return true; } // We can only decode 'whole byte' bit shifts as shuffles. if ((ShiftVal % 8) != 0) break; uint64_t ByteShift = ShiftVal / 8; unsigned NumBytes = NumSizeInBits / 8; unsigned NumBytesPerElt = NumBitsPerElt / 8; Ops.push_back(N.getOperand(0)); // Clear mask to all zeros and insert the shifted byte indices. Mask.append(NumBytes, SM_SentinelZero); if (X86ISD::VSHLI == Opcode) { for (unsigned i = 0; i != NumBytes; i += NumBytesPerElt) for (unsigned j = ByteShift; j != NumBytesPerElt; ++j) Mask[i + j] = i + j - ByteShift; } else { for (unsigned i = 0; i != NumBytes; i += NumBytesPerElt) for (unsigned j = ByteShift; j != NumBytesPerElt; ++j) Mask[i + j - ByteShift] = i + j; } return true; } case ISD::ZERO_EXTEND_VECTOR_INREG: case ISD::ZERO_EXTEND: { // TODO - add support for VPMOVZX with smaller input vector types. SDValue Src = N.getOperand(0); MVT SrcVT = Src.getSimpleValueType(); if (NumSizeInBits != SrcVT.getSizeInBits()) break; DecodeZeroExtendMask(SrcVT.getScalarSizeInBits(), NumBitsPerElt, NumElts, Mask); Ops.push_back(Src); return true; } } return false; } /// Removes unused shuffle source inputs and adjusts the shuffle mask accordingly. static void resolveTargetShuffleInputsAndMask(SmallVectorImpl &Inputs, SmallVectorImpl &Mask) { int MaskWidth = Mask.size(); SmallVector UsedInputs; for (int i = 0, e = Inputs.size(); i < e; ++i) { int lo = UsedInputs.size() * MaskWidth; int hi = lo + MaskWidth; // Strip UNDEF input usage. if (Inputs[i].isUndef()) for (int &M : Mask) if ((lo <= M) && (M < hi)) M = SM_SentinelUndef; // Check for unused inputs. if (any_of(Mask, [lo, hi](int i) { return (lo <= i) && (i < hi); })) { UsedInputs.push_back(Inputs[i]); continue; } for (int &M : Mask) if (lo <= M) M -= MaskWidth; } Inputs = UsedInputs; } /// Calls setTargetShuffleZeroElements to resolve a target shuffle mask's inputs /// and set the SM_SentinelUndef and SM_SentinelZero values. Then check the /// remaining input indices in case we now have a unary shuffle and adjust the /// inputs accordingly. /// Returns true if the target shuffle mask was decoded. static bool resolveTargetShuffleInputs(SDValue Op, SmallVectorImpl &Inputs, SmallVectorImpl &Mask, const SelectionDAG &DAG) { if (!setTargetShuffleZeroElements(Op, Mask, Inputs)) if (!getFauxShuffleMask(Op, Mask, Inputs, DAG)) return false; resolveTargetShuffleInputsAndMask(Inputs, Mask); return true; } /// Returns the scalar element that will make up the ith /// element of the result of the vector shuffle. static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG, unsigned Depth) { if (Depth == 6) return SDValue(); // Limit search depth. SDValue V = SDValue(N, 0); EVT VT = V.getValueType(); unsigned Opcode = V.getOpcode(); // Recurse into ISD::VECTOR_SHUFFLE node to find scalars. if (const ShuffleVectorSDNode *SV = dyn_cast(N)) { int Elt = SV->getMaskElt(Index); if (Elt < 0) return DAG.getUNDEF(VT.getVectorElementType()); unsigned NumElems = VT.getVectorNumElements(); SDValue NewV = (Elt < (int)NumElems) ? SV->getOperand(0) : SV->getOperand(1); return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1); } // Recurse into target specific vector shuffles to find scalars. if (isTargetShuffle(Opcode)) { MVT ShufVT = V.getSimpleValueType(); MVT ShufSVT = ShufVT.getVectorElementType(); int NumElems = (int)ShufVT.getVectorNumElements(); SmallVector ShuffleMask; SmallVector ShuffleOps; bool IsUnary; if (!getTargetShuffleMask(N, ShufVT, true, ShuffleOps, ShuffleMask, IsUnary)) return SDValue(); int Elt = ShuffleMask[Index]; if (Elt == SM_SentinelZero) return ShufSVT.isInteger() ? DAG.getConstant(0, SDLoc(N), ShufSVT) : DAG.getConstantFP(+0.0, SDLoc(N), ShufSVT); if (Elt == SM_SentinelUndef) return DAG.getUNDEF(ShufSVT); assert(0 <= Elt && Elt < (2*NumElems) && "Shuffle index out of range"); SDValue NewV = (Elt < NumElems) ? ShuffleOps[0] : ShuffleOps[1]; return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1); } // Actual nodes that may contain scalar elements if (Opcode == ISD::BITCAST) { V = V.getOperand(0); EVT SrcVT = V.getValueType(); unsigned NumElems = VT.getVectorNumElements(); if (!SrcVT.isVector() || SrcVT.getVectorNumElements() != NumElems) return SDValue(); } if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) return (Index == 0) ? V.getOperand(0) : DAG.getUNDEF(VT.getVectorElementType()); if (V.getOpcode() == ISD::BUILD_VECTOR) return V.getOperand(Index); return SDValue(); } // Use PINSRB/PINSRW/PINSRD to create a build vector. static SDValue LowerBuildVectorAsInsert(SDValue Op, unsigned NonZeros, unsigned NumNonZero, unsigned NumZero, SelectionDAG &DAG, const X86Subtarget &Subtarget) { MVT VT = Op.getSimpleValueType(); unsigned NumElts = VT.getVectorNumElements(); assert(((VT == MVT::v8i16 && Subtarget.hasSSE2()) || ((VT == MVT::v16i8 || VT == MVT::v4i32) && Subtarget.hasSSE41())) && "Illegal vector insertion"); SDLoc dl(Op); SDValue V; bool First = true; for (unsigned i = 0; i < NumElts; ++i) { bool IsNonZero = (NonZeros & (1 << i)) != 0; if (!IsNonZero) continue; // If the build vector contains zeros or our first insertion is not the // first index then insert into zero vector to break any register // dependency else use SCALAR_TO_VECTOR/VZEXT_MOVL. if (First) { First = false; if (NumZero || 0 != i) V = getZeroVector(VT, Subtarget, DAG, dl); else { assert(0 == i && "Expected insertion into zero-index"); V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32); V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V); V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V); V = DAG.getBitcast(VT, V); continue; } } V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, V, Op.getOperand(i), DAG.getIntPtrConstant(i, dl)); } return V; } /// Custom lower build_vector of v16i8. static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, unsigned NumNonZero, unsigned NumZero, SelectionDAG &DAG, const X86Subtarget &Subtarget) { if (NumNonZero > 8 && !Subtarget.hasSSE41()) return SDValue(); // SSE4.1 - use PINSRB to insert each byte directly. if (Subtarget.hasSSE41()) return LowerBuildVectorAsInsert(Op, NonZeros, NumNonZero, NumZero, DAG, Subtarget); SDLoc dl(Op); SDValue V; bool First = true; // Pre-SSE4.1 - merge byte pairs and insert with PINSRW. for (unsigned i = 0; i < 16; ++i) { bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; if (ThisIsNonZero && First) { if (NumZero) V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl); else V = DAG.getUNDEF(MVT::v8i16); First = false; } if ((i & 1) != 0) { // FIXME: Investigate extending to i32 instead of just i16. // FIXME: Investigate combining the first 4 bytes as a i32 instead. SDValue ThisElt, LastElt; bool LastIsNonZero = (NonZeros & (1 << (i - 1))) != 0; if (LastIsNonZero) { LastElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i - 1)); } if (ThisIsNonZero) { ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i)); ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16, ThisElt, DAG.getConstant(8, dl, MVT::i8)); if (LastIsNonZero) ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt); } else ThisElt = LastElt; if (ThisElt) { if (1 == i) { V = NumZero ? DAG.getZExtOrTrunc(ThisElt, dl, MVT::i32) : DAG.getAnyExtOrTrunc(ThisElt, dl, MVT::i32); V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V); V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V); V = DAG.getBitcast(MVT::v8i16, V); } else { V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt, DAG.getIntPtrConstant(i / 2, dl)); } } } } return DAG.getBitcast(MVT::v16i8, V); } /// Custom lower build_vector of v8i16. static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, unsigned NumNonZero, unsigned NumZero, SelectionDAG &DAG, const X86Subtarget &Subtarget) { if (NumNonZero > 4 && !Subtarget.hasSSE41()) return SDValue(); // Use PINSRW to insert each byte directly. return LowerBuildVectorAsInsert(Op, NonZeros, NumNonZero, NumZero, DAG, Subtarget); } /// Custom lower build_vector of v4i32 or v4f32. static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { // If this is a splat of a pair of elements, use MOVDDUP (unless the target // has XOP; in that case defer lowering to potentially use VPERMIL2PS). // Because we're creating a less complicated build vector here, we may enable // further folding of the MOVDDUP via shuffle transforms. if (Subtarget.hasSSE3() && !Subtarget.hasXOP() && Op.getOperand(0) == Op.getOperand(2) && Op.getOperand(1) == Op.getOperand(3) && Op.getOperand(0) != Op.getOperand(1)) { SDLoc DL(Op); MVT VT = Op.getSimpleValueType(); MVT EltVT = VT.getVectorElementType(); // Create a new build vector with the first 2 elements followed by undef // padding, bitcast to v2f64, duplicate, and bitcast back. SDValue Ops[4] = { Op.getOperand(0), Op.getOperand(1), DAG.getUNDEF(EltVT), DAG.getUNDEF(EltVT) }; SDValue NewBV = DAG.getBitcast(MVT::v2f64, DAG.getBuildVector(VT, DL, Ops)); SDValue Dup = DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v2f64, NewBV); return DAG.getBitcast(VT, Dup); } // Find all zeroable elements. std::bitset<4> Zeroable; for (int i=0; i < 4; ++i) { SDValue Elt = Op->getOperand(i); Zeroable[i] = (Elt.isUndef() || X86::isZeroNode(Elt)); } assert(Zeroable.size() - Zeroable.count() > 1 && "We expect at least two non-zero elements!"); // We only know how to deal with build_vector nodes where elements are either // zeroable or extract_vector_elt with constant index. SDValue FirstNonZero; unsigned FirstNonZeroIdx; for (unsigned i=0; i < 4; ++i) { if (Zeroable[i]) continue; SDValue Elt = Op->getOperand(i); if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || !isa(Elt.getOperand(1))) return SDValue(); // Make sure that this node is extracting from a 128-bit vector. MVT VT = Elt.getOperand(0).getSimpleValueType(); if (!VT.is128BitVector()) return SDValue(); if (!FirstNonZero.getNode()) { FirstNonZero = Elt; FirstNonZeroIdx = i; } } assert(FirstNonZero.getNode() && "Unexpected build vector of all zeros!"); SDValue V1 = FirstNonZero.getOperand(0); MVT VT = V1.getSimpleValueType(); // See if this build_vector can be lowered as a blend with zero. SDValue Elt; unsigned EltMaskIdx, EltIdx; int Mask[4]; for (EltIdx = 0; EltIdx < 4; ++EltIdx) { if (Zeroable[EltIdx]) { // The zero vector will be on the right hand side. Mask[EltIdx] = EltIdx+4; continue; } Elt = Op->getOperand(EltIdx); // By construction, Elt is a EXTRACT_VECTOR_ELT with constant index. EltMaskIdx = Elt.getConstantOperandVal(1); if (Elt.getOperand(0) != V1 || EltMaskIdx != EltIdx) break; Mask[EltIdx] = EltIdx; } if (EltIdx == 4) { // Let the shuffle legalizer deal with blend operations. SDValue VZero = getZeroVector(VT, Subtarget, DAG, SDLoc(Op)); if (V1.getSimpleValueType() != VT) V1 = DAG.getBitcast(VT, V1); return DAG.getVectorShuffle(VT, SDLoc(V1), V1, VZero, Mask); } // See if we can lower this build_vector to a INSERTPS. if (!Subtarget.hasSSE41()) return SDValue(); SDValue V2 = Elt.getOperand(0); if (Elt == FirstNonZero && EltIdx == FirstNonZeroIdx) V1 = SDValue(); bool CanFold = true; for (unsigned i = EltIdx + 1; i < 4 && CanFold; ++i) { if (Zeroable[i]) continue; SDValue Current = Op->getOperand(i); SDValue SrcVector = Current->getOperand(0); if (!V1.getNode()) V1 = SrcVector; CanFold = (SrcVector == V1) && (Current.getConstantOperandVal(1) == i); } if (!CanFold) return SDValue(); assert(V1.getNode() && "Expected at least two non-zero elements!"); if (V1.getSimpleValueType() != MVT::v4f32) V1 = DAG.getBitcast(MVT::v4f32, V1); if (V2.getSimpleValueType() != MVT::v4f32) V2 = DAG.getBitcast(MVT::v4f32, V2); // Ok, we can emit an INSERTPS instruction. unsigned ZMask = Zeroable.to_ulong(); unsigned InsertPSMask = EltMaskIdx << 6 | EltIdx << 4 | ZMask; assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!"); SDLoc DL(Op); SDValue Result = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2, DAG.getIntPtrConstant(InsertPSMask, DL)); return DAG.getBitcast(VT, Result); } /// Return a vector logical shift node. static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits, SelectionDAG &DAG, const TargetLowering &TLI, const SDLoc &dl) { assert(VT.is128BitVector() && "Unknown type for VShift"); MVT ShVT = MVT::v16i8; unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ; SrcOp = DAG.getBitcast(ShVT, SrcOp); assert(NumBits % 8 == 0 && "Only support byte sized shifts"); SDValue ShiftVal = DAG.getConstant(NumBits/8, dl, MVT::i8); return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal)); } static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl, SelectionDAG &DAG) { // Check if the scalar load can be widened into a vector load. And if // the address is "base + cst" see if the cst can be "absorbed" into // the shuffle mask. if (LoadSDNode *LD = dyn_cast(SrcOp)) { SDValue Ptr = LD->getBasePtr(); if (!ISD::isNormalLoad(LD) || LD->isVolatile()) return SDValue(); EVT PVT = LD->getValueType(0); if (PVT != MVT::i32 && PVT != MVT::f32) return SDValue(); int FI = -1; int64_t Offset = 0; if (FrameIndexSDNode *FINode = dyn_cast(Ptr)) { FI = FINode->getIndex(); Offset = 0; } else if (DAG.isBaseWithConstantOffset(Ptr) && isa(Ptr.getOperand(0))) { FI = cast(Ptr.getOperand(0))->getIndex(); Offset = Ptr.getConstantOperandVal(1); Ptr = Ptr.getOperand(0); } else { return SDValue(); } // FIXME: 256-bit vector instructions don't require a strict alignment, // improve this code to support it better. unsigned RequiredAlign = VT.getSizeInBits()/8; SDValue Chain = LD->getChain(); // Make sure the stack object alignment is at least 16 or 32. MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); if (DAG.InferPtrAlignment(Ptr) < RequiredAlign) { if (MFI.isFixedObjectIndex(FI)) { // Can't change the alignment. FIXME: It's possible to compute // the exact stack offset and reference FI + adjust offset instead. // If someone *really* cares about this. That's the way to implement it. return SDValue(); } else { MFI.setObjectAlignment(FI, RequiredAlign); } } // (Offset % 16 or 32) must be multiple of 4. Then address is then // Ptr + (Offset & ~15). if (Offset < 0) return SDValue(); if ((Offset % RequiredAlign) & 3) return SDValue(); int64_t StartOffset = Offset & ~int64_t(RequiredAlign - 1); if (StartOffset) { SDLoc DL(Ptr); Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, DAG.getConstant(StartOffset, DL, Ptr.getValueType())); } int EltNo = (Offset - StartOffset) >> 2; unsigned NumElems = VT.getVectorNumElements(); EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems); SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(StartOffset)); SmallVector Mask(NumElems, EltNo); return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), Mask); } return SDValue(); } /// Given the initializing elements 'Elts' of a vector of type 'VT', see if the /// elements can be replaced by a single large load which has the same value as /// a build_vector or insert_subvector whose loaded operands are 'Elts'. /// /// Example: -> zextload a static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget, bool isAfterLegalize) { unsigned NumElems = Elts.size(); int LastLoadedElt = -1; SmallBitVector LoadMask(NumElems, false); SmallBitVector ZeroMask(NumElems, false); SmallBitVector UndefMask(NumElems, false); // For each element in the initializer, see if we've found a load, zero or an // undef. for (unsigned i = 0; i < NumElems; ++i) { SDValue Elt = peekThroughBitcasts(Elts[i]); if (!Elt.getNode()) return SDValue(); if (Elt.isUndef()) UndefMask[i] = true; else if (X86::isZeroNode(Elt) || ISD::isBuildVectorAllZeros(Elt.getNode())) ZeroMask[i] = true; else if (ISD::isNON_EXTLoad(Elt.getNode())) { LoadMask[i] = true; LastLoadedElt = i; // Each loaded element must be the correct fractional portion of the // requested vector load. if ((NumElems * Elt.getValueSizeInBits()) != VT.getSizeInBits()) return SDValue(); } else return SDValue(); } assert((ZeroMask | UndefMask | LoadMask).count() == NumElems && "Incomplete element masks"); // Handle Special Cases - all undef or undef/zero. if (UndefMask.count() == NumElems) return DAG.getUNDEF(VT); // FIXME: Should we return this as a BUILD_VECTOR instead? if ((ZeroMask | UndefMask).count() == NumElems) return VT.isInteger() ? DAG.getConstant(0, DL, VT) : DAG.getConstantFP(0.0, DL, VT); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); int FirstLoadedElt = LoadMask.find_first(); SDValue EltBase = peekThroughBitcasts(Elts[FirstLoadedElt]); LoadSDNode *LDBase = cast(EltBase); EVT LDBaseVT = EltBase.getValueType(); // Consecutive loads can contain UNDEFS but not ZERO elements. // Consecutive loads with UNDEFs and ZEROs elements require a // an additional shuffle stage to clear the ZERO elements. bool IsConsecutiveLoad = true; bool IsConsecutiveLoadWithZeros = true; for (int i = FirstLoadedElt + 1; i <= LastLoadedElt; ++i) { if (LoadMask[i]) { SDValue Elt = peekThroughBitcasts(Elts[i]); LoadSDNode *LD = cast(Elt); if (!DAG.areNonVolatileConsecutiveLoads( LD, LDBase, Elt.getValueType().getStoreSizeInBits() / 8, i - FirstLoadedElt)) { IsConsecutiveLoad = false; IsConsecutiveLoadWithZeros = false; break; } } else if (ZeroMask[i]) { IsConsecutiveLoad = false; } } SmallVector Loads; for (int i = FirstLoadedElt; i <= LastLoadedElt; ++i) if (LoadMask[i]) Loads.push_back(cast(peekThroughBitcasts(Elts[i]))); auto CreateLoad = [&DAG, &DL, &Loads](EVT VT, LoadSDNode *LDBase) { auto MMOFlags = LDBase->getMemOperand()->getFlags(); assert(!(MMOFlags & MachineMemOperand::MOVolatile) && "Cannot merge volatile loads."); SDValue NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(), LDBase->getPointerInfo(), LDBase->getAlignment(), MMOFlags); for (auto *LD : Loads) DAG.makeEquivalentMemoryOrdering(LD, NewLd); return NewLd; }; // LOAD - all consecutive load/undefs (must start/end with a load). // If we have found an entire vector of loads and undefs, then return a large // load of the entire vector width starting at the base pointer. // If the vector contains zeros, then attempt to shuffle those elements. if (FirstLoadedElt == 0 && LastLoadedElt == (int)(NumElems - 1) && (IsConsecutiveLoad || IsConsecutiveLoadWithZeros)) { assert(LDBase && "Did not find base load for merging consecutive loads"); EVT EltVT = LDBase->getValueType(0); // Ensure that the input vector size for the merged loads matches the // cumulative size of the input elements. if (VT.getSizeInBits() != EltVT.getSizeInBits() * NumElems) return SDValue(); if (isAfterLegalize && !TLI.isOperationLegal(ISD::LOAD, VT)) return SDValue(); // Don't create 256-bit non-temporal aligned loads without AVX2 as these // will lower to regular temporal loads and use the cache. if (LDBase->isNonTemporal() && LDBase->getAlignment() >= 32 && VT.is256BitVector() && !Subtarget.hasInt256()) return SDValue(); if (IsConsecutiveLoad) return CreateLoad(VT, LDBase); // IsConsecutiveLoadWithZeros - we need to create a shuffle of the loaded // vector and a zero vector to clear out the zero elements. if (!isAfterLegalize && NumElems == VT.getVectorNumElements()) { SmallVector ClearMask(NumElems, -1); for (unsigned i = 0; i < NumElems; ++i) { if (ZeroMask[i]) ClearMask[i] = i + NumElems; else if (LoadMask[i]) ClearMask[i] = i; } SDValue V = CreateLoad(VT, LDBase); SDValue Z = VT.isInteger() ? DAG.getConstant(0, DL, VT) : DAG.getConstantFP(0.0, DL, VT); return DAG.getVectorShuffle(VT, DL, V, Z, ClearMask); } } int LoadSize = (1 + LastLoadedElt - FirstLoadedElt) * LDBaseVT.getStoreSizeInBits(); // VZEXT_LOAD - consecutive 32/64-bit load/undefs followed by zeros/undefs. if (IsConsecutiveLoad && FirstLoadedElt == 0 && (LoadSize == 32 || LoadSize == 64) && ((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()))) { MVT VecSVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(LoadSize) : MVT::getIntegerVT(LoadSize); MVT VecVT = MVT::getVectorVT(VecSVT, VT.getSizeInBits() / LoadSize); if (TLI.isTypeLegal(VecVT)) { SDVTList Tys = DAG.getVTList(VecVT, MVT::Other); SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() }; SDValue ResNode = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, VecSVT, LDBase->getPointerInfo(), LDBase->getAlignment(), MachineMemOperand::MOLoad); for (auto *LD : Loads) DAG.makeEquivalentMemoryOrdering(LD, ResNode); return DAG.getBitcast(VT, ResNode); } } return SDValue(); } static Constant *getConstantVector(MVT VT, const APInt &SplatValue, unsigned SplatBitSize, LLVMContext &C) { unsigned ScalarSize = VT.getScalarSizeInBits(); unsigned NumElm = SplatBitSize / ScalarSize; SmallVector ConstantVec; for (unsigned i = 0; i < NumElm; i++) { APInt Val = SplatValue.extractBits(ScalarSize, ScalarSize * i); Constant *Const; if (VT.isFloatingPoint()) { if (ScalarSize == 32) { Const = ConstantFP::get(C, APFloat(APFloat::IEEEsingle(), Val)); } else { assert(ScalarSize == 64 && "Unsupported floating point scalar size"); Const = ConstantFP::get(C, APFloat(APFloat::IEEEdouble(), Val)); } } else Const = Constant::getIntegerValue(Type::getIntNTy(C, ScalarSize), Val); ConstantVec.push_back(Const); } return ConstantVector::get(ArrayRef(ConstantVec)); } static bool isUseOfShuffle(SDNode *N) { for (auto *U : N->uses()) { if (isTargetShuffle(U->getOpcode())) return true; if (U->getOpcode() == ISD::BITCAST) // Ignore bitcasts return isUseOfShuffle(U); } return false; } // Check if the current node of build vector is a zero extended vector. // // If so, return the value extended. // // For example: (0,0,0,a,0,0,0,a,0,0,0,a,0,0,0,a) returns a. // // NumElt - return the number of zero extended identical values. // // EltType - return the type of the value include the zero extend. static SDValue isSplatZeroExtended(const BuildVectorSDNode *Op, unsigned &NumElt, MVT &EltType) { SDValue ExtValue = Op->getOperand(0); unsigned NumElts = Op->getNumOperands(); unsigned Delta = NumElts; for (unsigned i = 1; i < NumElts; i++) { if (Op->getOperand(i) == ExtValue) { Delta = i; break; } if (!(Op->getOperand(i).isUndef() || isNullConstant(Op->getOperand(i)))) return SDValue(); } if (!isPowerOf2_32(Delta) || Delta == 1) return SDValue(); for (unsigned i = Delta; i < NumElts; i++) { if (i % Delta == 0) { if (Op->getOperand(i) != ExtValue) return SDValue(); } else if (!(isNullConstant(Op->getOperand(i)) || Op->getOperand(i).isUndef())) return SDValue(); } unsigned EltSize = Op->getSimpleValueType(0).getScalarSizeInBits(); unsigned ExtVTSize = EltSize * Delta; EltType = MVT::getIntegerVT(ExtVTSize); NumElt = NumElts / Delta; return ExtValue; } /// Attempt to use the vbroadcast instruction to generate a splat value /// from a splat BUILD_VECTOR which uses: /// a. A single scalar load, or a constant. /// b. Repeated pattern of constants (e.g. <0,1,0,1> or <0,1,2,3,0,1,2,3>). /// /// The VBROADCAST node is returned when a pattern is found, /// or SDValue() otherwise. static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp, const X86Subtarget &Subtarget, SelectionDAG &DAG) { // VBROADCAST requires AVX. // TODO: Splats could be generated for non-AVX CPUs using SSE // instructions, but there's less potential gain for only 128-bit vectors. if (!Subtarget.hasAVX()) return SDValue(); MVT VT = BVOp->getSimpleValueType(0); SDLoc dl(BVOp); assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && "Unsupported vector type for broadcast."); BitVector UndefElements; SDValue Ld = BVOp->getSplatValue(&UndefElements); // Attempt to use VBROADCASTM // From this paterrn: // a. t0 = (zext_i64 (bitcast_i8 v2i1 X)) // b. t1 = (build_vector t0 t0) // // Create (VBROADCASTM v2i1 X) if (Subtarget.hasCDI() && (VT.is512BitVector() || Subtarget.hasVLX())) { MVT EltType = VT.getScalarType(); unsigned NumElts = VT.getVectorNumElements(); SDValue BOperand; SDValue ZeroExtended = isSplatZeroExtended(BVOp, NumElts, EltType); if ((ZeroExtended && ZeroExtended.getOpcode() == ISD::BITCAST) || (Ld && Ld.getOpcode() == ISD::ZERO_EXTEND && Ld.getOperand(0).getOpcode() == ISD::BITCAST)) { if (ZeroExtended) BOperand = ZeroExtended.getOperand(0); else BOperand = Ld.getOperand(0).getOperand(0); MVT MaskVT = BOperand.getSimpleValueType(); if ((EltType == MVT::i64 && MaskVT == MVT::v8i1) || // for broadcastmb2q (EltType == MVT::i32 && MaskVT == MVT::v16i1)) { // for broadcastmw2d SDValue Brdcst = DAG.getNode(X86ISD::VBROADCASTM, dl, MVT::getVectorVT(EltType, NumElts), BOperand); return DAG.getBitcast(VT, Brdcst); } } } unsigned NumElts = VT.getVectorNumElements(); unsigned NumUndefElts = UndefElements.count(); if (!Ld || (NumElts - NumUndefElts) <= 1) { APInt SplatValue, Undef; unsigned SplatBitSize; bool HasUndef; // Check if this is a repeated constant pattern suitable for broadcasting. if (BVOp->isConstantSplat(SplatValue, Undef, SplatBitSize, HasUndef) && SplatBitSize > VT.getScalarSizeInBits() && SplatBitSize < VT.getSizeInBits()) { // Avoid replacing with broadcast when it's a use of a shuffle // instruction to preserve the present custom lowering of shuffles. if (isUseOfShuffle(BVOp) || BVOp->hasOneUse()) return SDValue(); // replace BUILD_VECTOR with broadcast of the repeated constants. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); LLVMContext *Ctx = DAG.getContext(); MVT PVT = TLI.getPointerTy(DAG.getDataLayout()); if (Subtarget.hasAVX()) { if (SplatBitSize <= 64 && Subtarget.hasAVX2() && !(SplatBitSize == 64 && Subtarget.is32Bit())) { // Splatted value can fit in one INTEGER constant in constant pool. // Load the constant and broadcast it. MVT CVT = MVT::getIntegerVT(SplatBitSize); Type *ScalarTy = Type::getIntNTy(*Ctx, SplatBitSize); Constant *C = Constant::getIntegerValue(ScalarTy, SplatValue); SDValue CP = DAG.getConstantPool(C, PVT); unsigned Repeat = VT.getSizeInBits() / SplatBitSize; unsigned Alignment = cast(CP)->getAlignment(); Ld = DAG.getLoad( CVT, dl, DAG.getEntryNode(), CP, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment); SDValue Brdcst = DAG.getNode(X86ISD::VBROADCAST, dl, MVT::getVectorVT(CVT, Repeat), Ld); return DAG.getBitcast(VT, Brdcst); } else if (SplatBitSize == 32 || SplatBitSize == 64) { // Splatted value can fit in one FLOAT constant in constant pool. // Load the constant and broadcast it. // AVX have support for 32 and 64 bit broadcast for floats only. // No 64bit integer in 32bit subtarget. MVT CVT = MVT::getFloatingPointVT(SplatBitSize); // Lower the splat via APFloat directly, to avoid any conversion. Constant *C = SplatBitSize == 32 ? ConstantFP::get(*Ctx, APFloat(APFloat::IEEEsingle(), SplatValue)) : ConstantFP::get(*Ctx, APFloat(APFloat::IEEEdouble(), SplatValue)); SDValue CP = DAG.getConstantPool(C, PVT); unsigned Repeat = VT.getSizeInBits() / SplatBitSize; unsigned Alignment = cast(CP)->getAlignment(); Ld = DAG.getLoad( CVT, dl, DAG.getEntryNode(), CP, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment); SDValue Brdcst = DAG.getNode(X86ISD::VBROADCAST, dl, MVT::getVectorVT(CVT, Repeat), Ld); return DAG.getBitcast(VT, Brdcst); } else if (SplatBitSize > 64) { // Load the vector of constants and broadcast it. MVT CVT = VT.getScalarType(); Constant *VecC = getConstantVector(VT, SplatValue, SplatBitSize, *Ctx); SDValue VCP = DAG.getConstantPool(VecC, PVT); unsigned NumElm = SplatBitSize / VT.getScalarSizeInBits(); unsigned Alignment = cast(VCP)->getAlignment(); Ld = DAG.getLoad( MVT::getVectorVT(CVT, NumElm), dl, DAG.getEntryNode(), VCP, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment); SDValue Brdcst = DAG.getNode(X86ISD::SUBV_BROADCAST, dl, VT, Ld); return DAG.getBitcast(VT, Brdcst); } } } // If we are moving a scalar into a vector (Ld must be set and all elements // but 1 are undef) and that operation is not obviously supported by // vmovd/vmovq/vmovss/vmovsd, then keep trying to form a broadcast. // That's better than general shuffling and may eliminate a load to GPR and // move from scalar to vector register. if (!Ld || NumElts - NumUndefElts != 1) return SDValue(); unsigned ScalarSize = Ld.getValueSizeInBits(); if (!(UndefElements[0] || (ScalarSize != 32 && ScalarSize != 64))) return SDValue(); } bool ConstSplatVal = (Ld.getOpcode() == ISD::Constant || Ld.getOpcode() == ISD::ConstantFP); // Make sure that all of the users of a non-constant load are from the // BUILD_VECTOR node. if (!ConstSplatVal && !BVOp->isOnlyUserOf(Ld.getNode())) return SDValue(); unsigned ScalarSize = Ld.getValueSizeInBits(); bool IsGE256 = (VT.getSizeInBits() >= 256); // When optimizing for size, generate up to 5 extra bytes for a broadcast // instruction to save 8 or more bytes of constant pool data. // TODO: If multiple splats are generated to load the same constant, // it may be detrimental to overall size. There needs to be a way to detect // that condition to know if this is truly a size win. bool OptForSize = DAG.getMachineFunction().getFunction().optForSize(); // Handle broadcasting a single constant scalar from the constant pool // into a vector. // On Sandybridge (no AVX2), it is still better to load a constant vector // from the constant pool and not to broadcast it from a scalar. // But override that restriction when optimizing for size. // TODO: Check if splatting is recommended for other AVX-capable CPUs. if (ConstSplatVal && (Subtarget.hasAVX2() || OptForSize)) { EVT CVT = Ld.getValueType(); assert(!CVT.isVector() && "Must not broadcast a vector type"); // Splat f32, i32, v4f64, v4i64 in all cases with AVX2. // For size optimization, also splat v2f64 and v2i64, and for size opt // with AVX2, also splat i8 and i16. // With pattern matching, the VBROADCAST node may become a VMOVDDUP. if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) || (OptForSize && (ScalarSize == 64 || Subtarget.hasAVX2()))) { const Constant *C = nullptr; if (ConstantSDNode *CI = dyn_cast(Ld)) C = CI->getConstantIntValue(); else if (ConstantFPSDNode *CF = dyn_cast(Ld)) C = CF->getConstantFPValue(); assert(C && "Invalid constant type"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout())); unsigned Alignment = cast(CP)->getAlignment(); Ld = DAG.getLoad( CVT, dl, DAG.getEntryNode(), CP, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment); return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); } } bool IsLoad = ISD::isNormalLoad(Ld.getNode()); // Handle AVX2 in-register broadcasts. if (!IsLoad && Subtarget.hasInt256() && (ScalarSize == 32 || (IsGE256 && ScalarSize == 64))) return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); // The scalar source must be a normal load. if (!IsLoad) return SDValue(); if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) || (Subtarget.hasVLX() && ScalarSize == 64)) return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); // The integer check is needed for the 64-bit into 128-bit so it doesn't match // double since there is no vbroadcastsd xmm if (Subtarget.hasInt256() && Ld.getValueType().isInteger()) { if (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64) return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); } // Unsupported broadcast. return SDValue(); } /// For an EXTRACT_VECTOR_ELT with a constant index return the real /// underlying vector and index. /// /// Modifies \p ExtractedFromVec to the real vector and returns the real /// index. static int getUnderlyingExtractedFromVec(SDValue &ExtractedFromVec, SDValue ExtIdx) { int Idx = cast(ExtIdx)->getZExtValue(); if (!isa(ExtractedFromVec)) return Idx; // For 256-bit vectors, LowerEXTRACT_VECTOR_ELT_SSE4 may have already // lowered this: // (extract_vector_elt (v8f32 %1), Constant<6>) // to: // (extract_vector_elt (vector_shuffle<2,u,u,u> // (extract_subvector (v8f32 %0), Constant<4>), // undef) // Constant<0>) // In this case the vector is the extract_subvector expression and the index // is 2, as specified by the shuffle. ShuffleVectorSDNode *SVOp = cast(ExtractedFromVec); SDValue ShuffleVec = SVOp->getOperand(0); MVT ShuffleVecVT = ShuffleVec.getSimpleValueType(); assert(ShuffleVecVT.getVectorElementType() == ExtractedFromVec.getSimpleValueType().getVectorElementType()); int ShuffleIdx = SVOp->getMaskElt(Idx); if (isUndefOrInRange(ShuffleIdx, 0, ShuffleVecVT.getVectorNumElements())) { ExtractedFromVec = ShuffleVec; return ShuffleIdx; } return Idx; } static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); // Skip if insert_vec_elt is not supported. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!TLI.isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT)) return SDValue(); SDLoc DL(Op); unsigned NumElems = Op.getNumOperands(); SDValue VecIn1; SDValue VecIn2; SmallVector InsertIndices; SmallVector Mask(NumElems, -1); for (unsigned i = 0; i != NumElems; ++i) { unsigned Opc = Op.getOperand(i).getOpcode(); if (Opc == ISD::UNDEF) continue; if (Opc != ISD::EXTRACT_VECTOR_ELT) { // Quit if more than 1 elements need inserting. if (InsertIndices.size() > 1) return SDValue(); InsertIndices.push_back(i); continue; } SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0); SDValue ExtIdx = Op.getOperand(i).getOperand(1); // Quit if non-constant index. if (!isa(ExtIdx)) return SDValue(); int Idx = getUnderlyingExtractedFromVec(ExtractedFromVec, ExtIdx); // Quit if extracted from vector of different type. if (ExtractedFromVec.getValueType() != VT) return SDValue(); if (!VecIn1.getNode()) VecIn1 = ExtractedFromVec; else if (VecIn1 != ExtractedFromVec) { if (!VecIn2.getNode()) VecIn2 = ExtractedFromVec; else if (VecIn2 != ExtractedFromVec) // Quit if more than 2 vectors to shuffle return SDValue(); } if (ExtractedFromVec == VecIn1) Mask[i] = Idx; else if (ExtractedFromVec == VecIn2) Mask[i] = Idx + NumElems; } if (!VecIn1.getNode()) return SDValue(); VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, Mask); for (unsigned Idx : InsertIndices) NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx), DAG.getIntPtrConstant(Idx, DL)); return NV; } static SDValue ConvertI1VectorToInteger(SDValue Op, SelectionDAG &DAG) { assert(ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && Op.getScalarValueSizeInBits() == 1 && "Can not convert non-constant vector"); uint64_t Immediate = 0; for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) { SDValue In = Op.getOperand(idx); if (!In.isUndef()) Immediate |= (cast(In)->getZExtValue() & 0x1) << idx; } SDLoc dl(Op); MVT VT = MVT::getIntegerVT(std::max((int)Op.getValueSizeInBits(), 8)); return DAG.getConstant(Immediate, dl, VT); } // Lower BUILD_VECTOR operation for v8i1 and v16i1 types. static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { MVT VT = Op.getSimpleValueType(); assert((VT.getVectorElementType() == MVT::i1) && "Unexpected type in LowerBUILD_VECTORvXi1!"); SDLoc dl(Op); if (ISD::isBuildVectorAllZeros(Op.getNode())) return Op; if (ISD::isBuildVectorAllOnes(Op.getNode())) return Op; if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { if (VT == MVT::v64i1 && !Subtarget.is64Bit()) { // Split the pieces. SDValue Lower = DAG.getBuildVector(MVT::v32i1, dl, Op.getNode()->ops().slice(0, 32)); SDValue Upper = DAG.getBuildVector(MVT::v32i1, dl, Op.getNode()->ops().slice(32, 32)); // We have to manually lower both halves so getNode doesn't try to // reassemble the build_vector. Lower = LowerBUILD_VECTORvXi1(Lower, DAG, Subtarget); Upper = LowerBUILD_VECTORvXi1(Upper, DAG, Subtarget); return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lower, Upper); } SDValue Imm = ConvertI1VectorToInteger(Op, DAG); if (Imm.getValueSizeInBits() == VT.getSizeInBits()) return DAG.getBitcast(VT, Imm); SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec, DAG.getIntPtrConstant(0, dl)); } // Vector has one or more non-const elements uint64_t Immediate = 0; SmallVector NonConstIdx; bool IsSplat = true; bool HasConstElts = false; int SplatIdx = -1; for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) { SDValue In = Op.getOperand(idx); if (In.isUndef()) continue; if (!isa(In)) NonConstIdx.push_back(idx); else { Immediate |= (cast(In)->getZExtValue() & 0x1) << idx; HasConstElts = true; } if (SplatIdx < 0) SplatIdx = idx; else if (In != Op.getOperand(SplatIdx)) IsSplat = false; } // for splat use " (select i1 splat_elt, all-ones, all-zeroes)" if (IsSplat) return DAG.getSelect(dl, VT, Op.getOperand(SplatIdx), DAG.getConstant(1, dl, VT), DAG.getConstant(0, dl, VT)); // insert elements one by one SDValue DstVec; SDValue Imm; if (Immediate) { MVT ImmVT = MVT::getIntegerVT(std::max((int)VT.getSizeInBits(), 8)); Imm = DAG.getConstant(Immediate, dl, ImmVT); } else if (HasConstElts) Imm = DAG.getConstant(0, dl, VT); else Imm = DAG.getUNDEF(VT); if (Imm.getValueSizeInBits() == VT.getSizeInBits()) DstVec = DAG.getBitcast(VT, Imm); else { SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm); DstVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec, DAG.getIntPtrConstant(0, dl)); } for (unsigned i = 0, e = NonConstIdx.size(); i != e; ++i) { unsigned InsertIdx = NonConstIdx[i]; DstVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec, Op.getOperand(InsertIdx), DAG.getIntPtrConstant(InsertIdx, dl)); } return DstVec; } /// This is a helper function of LowerToHorizontalOp(). /// This function checks that the build_vector \p N in input implements a /// 128-bit partial horizontal operation on a 256-bit vector, but that operation /// may not match the layout of an x86 256-bit horizontal instruction. /// In other words, if this returns true, then some extraction/insertion will /// be required to produce a valid horizontal instruction. /// /// Parameter \p Opcode defines the kind of horizontal operation to match. /// For example, if \p Opcode is equal to ISD::ADD, then this function /// checks if \p N implements a horizontal arithmetic add; if instead \p Opcode /// is equal to ISD::SUB, then this function checks if this is a horizontal /// arithmetic sub. /// /// This function only analyzes elements of \p N whose indices are /// in range [BaseIdx, LastIdx). /// /// TODO: This function was originally used to match both real and fake partial /// horizontal operations, but the index-matching logic is incorrect for that. /// See the corrected implementation in isHopBuildVector(). Can we reduce this /// code because it is only used for partial h-op matching now? static bool isHorizontalBinOpPart(const BuildVectorSDNode *N, unsigned Opcode, SelectionDAG &DAG, unsigned BaseIdx, unsigned LastIdx, SDValue &V0, SDValue &V1) { EVT VT = N->getValueType(0); assert(VT.is256BitVector() && "Only use for matching partial 256-bit h-ops"); assert(BaseIdx * 2 <= LastIdx && "Invalid Indices in input!"); assert(VT.isVector() && VT.getVectorNumElements() >= LastIdx && "Invalid Vector in input!"); bool IsCommutable = (Opcode == ISD::ADD || Opcode == ISD::FADD); bool CanFold = true; unsigned ExpectedVExtractIdx = BaseIdx; unsigned NumElts = LastIdx - BaseIdx; V0 = DAG.getUNDEF(VT); V1 = DAG.getUNDEF(VT); // Check if N implements a horizontal binop. for (unsigned i = 0, e = NumElts; i != e && CanFold; ++i) { SDValue Op = N->getOperand(i + BaseIdx); // Skip UNDEFs. if (Op->isUndef()) { // Update the expected vector extract index. if (i * 2 == NumElts) ExpectedVExtractIdx = BaseIdx; ExpectedVExtractIdx += 2; continue; } CanFold = Op->getOpcode() == Opcode && Op->hasOneUse(); if (!CanFold) break; SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); // Try to match the following pattern: // (BINOP (extract_vector_elt A, I), (extract_vector_elt A, I+1)) CanFold = (Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT && Op0.getOperand(0) == Op1.getOperand(0) && isa(Op0.getOperand(1)) && isa(Op1.getOperand(1))); if (!CanFold) break; unsigned I0 = cast(Op0.getOperand(1))->getZExtValue(); unsigned I1 = cast(Op1.getOperand(1))->getZExtValue(); if (i * 2 < NumElts) { if (V0.isUndef()) { V0 = Op0.getOperand(0); if (V0.getValueType() != VT) return false; } } else { if (V1.isUndef()) { V1 = Op0.getOperand(0); if (V1.getValueType() != VT) return false; } if (i * 2 == NumElts) ExpectedVExtractIdx = BaseIdx; } SDValue Expected = (i * 2 < NumElts) ? V0 : V1; if (I0 == ExpectedVExtractIdx) CanFold = I1 == I0 + 1 && Op0.getOperand(0) == Expected; else if (IsCommutable && I1 == ExpectedVExtractIdx) { // Try to match the following dag sequence: // (BINOP (extract_vector_elt A, I+1), (extract_vector_elt A, I)) CanFold = I0 == I1 + 1 && Op1.getOperand(0) == Expected; } else CanFold = false; ExpectedVExtractIdx += 2; } return CanFold; } /// Emit a sequence of two 128-bit horizontal add/sub followed by /// a concat_vector. /// /// This is a helper function of LowerToHorizontalOp(). /// This function expects two 256-bit vectors called V0 and V1. /// At first, each vector is split into two separate 128-bit vectors. /// Then, the resulting 128-bit vectors are used to implement two /// horizontal binary operations. /// /// The kind of horizontal binary operation is defined by \p X86Opcode. /// /// \p Mode specifies how the 128-bit parts of V0 and V1 are passed in input to /// the two new horizontal binop. /// When Mode is set, the first horizontal binop dag node would take as input /// the lower 128-bit of V0 and the upper 128-bit of V0. The second /// horizontal binop dag node would take as input the lower 128-bit of V1 /// and the upper 128-bit of V1. /// Example: /// HADD V0_LO, V0_HI /// HADD V1_LO, V1_HI /// /// Otherwise, the first horizontal binop dag node takes as input the lower /// 128-bit of V0 and the lower 128-bit of V1, and the second horizontal binop /// dag node takes the upper 128-bit of V0 and the upper 128-bit of V1. /// Example: /// HADD V0_LO, V1_LO /// HADD V0_HI, V1_HI /// /// If \p isUndefLO is set, then the algorithm propagates UNDEF to the lower /// 128-bits of the result. If \p isUndefHI is set, then UNDEF is propagated to /// the upper 128-bits of the result. static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1, const SDLoc &DL, SelectionDAG &DAG, unsigned X86Opcode, bool Mode, bool isUndefLO, bool isUndefHI) { MVT VT = V0.getSimpleValueType(); assert(VT.is256BitVector() && VT == V1.getSimpleValueType() && "Invalid nodes in input!"); unsigned NumElts = VT.getVectorNumElements(); SDValue V0_LO = extract128BitVector(V0, 0, DAG, DL); SDValue V0_HI = extract128BitVector(V0, NumElts/2, DAG, DL); SDValue V1_LO = extract128BitVector(V1, 0, DAG, DL); SDValue V1_HI = extract128BitVector(V1, NumElts/2, DAG, DL); MVT NewVT = V0_LO.getSimpleValueType(); SDValue LO = DAG.getUNDEF(NewVT); SDValue HI = DAG.getUNDEF(NewVT); if (Mode) { // Don't emit a horizontal binop if the result is expected to be UNDEF. if (!isUndefLO && !V0->isUndef()) LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V0_HI); if (!isUndefHI && !V1->isUndef()) HI = DAG.getNode(X86Opcode, DL, NewVT, V1_LO, V1_HI); } else { // Don't emit a horizontal binop if the result is expected to be UNDEF. if (!isUndefLO && (!V0_LO->isUndef() || !V1_LO->isUndef())) LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V1_LO); if (!isUndefHI && (!V0_HI->isUndef() || !V1_HI->isUndef())) HI = DAG.getNode(X86Opcode, DL, NewVT, V0_HI, V1_HI); } return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI); } /// Returns true iff \p BV builds a vector with the result equivalent to /// the result of ADDSUB/SUBADD operation. /// If true is returned then the operands of ADDSUB = Opnd0 +- Opnd1 /// (SUBADD = Opnd0 -+ Opnd1) operation are written to the parameters /// \p Opnd0 and \p Opnd1. static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV, const X86Subtarget &Subtarget, SelectionDAG &DAG, SDValue &Opnd0, SDValue &Opnd1, unsigned &NumExtracts, bool &IsSubAdd) { MVT VT = BV->getSimpleValueType(0); if (!Subtarget.hasSSE3() || !VT.isFloatingPoint()) return false; unsigned NumElts = VT.getVectorNumElements(); SDValue InVec0 = DAG.getUNDEF(VT); SDValue InVec1 = DAG.getUNDEF(VT); NumExtracts = 0; // Odd-numbered elements in the input build vector are obtained from // adding/subtracting two integer/float elements. // Even-numbered elements in the input build vector are obtained from // subtracting/adding two integer/float elements. unsigned Opc[2] = {0, 0}; for (unsigned i = 0, e = NumElts; i != e; ++i) { SDValue Op = BV->getOperand(i); // Skip 'undef' values. unsigned Opcode = Op.getOpcode(); if (Opcode == ISD::UNDEF) continue; // Early exit if we found an unexpected opcode. if (Opcode != ISD::FADD && Opcode != ISD::FSUB) return false; SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); // Try to match the following pattern: // (BINOP (extract_vector_elt A, i), (extract_vector_elt B, i)) // Early exit if we cannot match that sequence. if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT || Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT || !isa(Op0.getOperand(1)) || !isa(Op1.getOperand(1)) || Op0.getOperand(1) != Op1.getOperand(1)) return false; unsigned I0 = cast(Op0.getOperand(1))->getZExtValue(); if (I0 != i) return false; // We found a valid add/sub node, make sure its the same opcode as previous // elements for this parity. if (Opc[i % 2] != 0 && Opc[i % 2] != Opcode) return false; Opc[i % 2] = Opcode; // Update InVec0 and InVec1. if (InVec0.isUndef()) { InVec0 = Op0.getOperand(0); if (InVec0.getSimpleValueType() != VT) return false; } if (InVec1.isUndef()) { InVec1 = Op1.getOperand(0); if (InVec1.getSimpleValueType() != VT) return false; } // Make sure that operands in input to each add/sub node always // come from a same pair of vectors. if (InVec0 != Op0.getOperand(0)) { if (Opcode == ISD::FSUB) return false; // FADD is commutable. Try to commute the operands // and then test again. std::swap(Op0, Op1); if (InVec0 != Op0.getOperand(0)) return false; } if (InVec1 != Op1.getOperand(0)) return false; // Increment the number of extractions done. ++NumExtracts; } // Ensure we have found an opcode for both parities and that they are // different. Don't try to fold this build_vector into an ADDSUB/SUBADD if the // inputs are undef. if (!Opc[0] || !Opc[1] || Opc[0] == Opc[1] || InVec0.isUndef() || InVec1.isUndef()) return false; IsSubAdd = Opc[0] == ISD::FADD; Opnd0 = InVec0; Opnd1 = InVec1; return true; } /// Returns true if is possible to fold MUL and an idiom that has already been /// recognized as ADDSUB/SUBADD(\p Opnd0, \p Opnd1) into /// FMADDSUB/FMSUBADD(x, y, \p Opnd1). If (and only if) true is returned, the /// operands of FMADDSUB/FMSUBADD are written to parameters \p Opnd0, \p Opnd1, \p Opnd2. /// /// Prior to calling this function it should be known that there is some /// SDNode that potentially can be replaced with an X86ISD::ADDSUB operation /// using \p Opnd0 and \p Opnd1 as operands. Also, this method is called /// before replacement of such SDNode with ADDSUB operation. Thus the number /// of \p Opnd0 uses is expected to be equal to 2. /// For example, this function may be called for the following IR: /// %AB = fmul fast <2 x double> %A, %B /// %Sub = fsub fast <2 x double> %AB, %C /// %Add = fadd fast <2 x double> %AB, %C /// %Addsub = shufflevector <2 x double> %Sub, <2 x double> %Add, /// <2 x i32> /// There is a def for %Addsub here, which potentially can be replaced by /// X86ISD::ADDSUB operation: /// %Addsub = X86ISD::ADDSUB %AB, %C /// and such ADDSUB can further be replaced with FMADDSUB: /// %Addsub = FMADDSUB %A, %B, %C. /// /// The main reason why this method is called before the replacement of the /// recognized ADDSUB idiom with ADDSUB operation is that such replacement /// is illegal sometimes. E.g. 512-bit ADDSUB is not available, while 512-bit /// FMADDSUB is. static bool isFMAddSubOrFMSubAdd(const X86Subtarget &Subtarget, SelectionDAG &DAG, SDValue &Opnd0, SDValue &Opnd1, SDValue &Opnd2, unsigned ExpectedUses) { if (Opnd0.getOpcode() != ISD::FMUL || !Opnd0->hasNUsesOfValue(ExpectedUses, 0) || !Subtarget.hasAnyFMA()) return false; // FIXME: These checks must match the similar ones in // DAGCombiner::visitFADDForFMACombine. It would be good to have one // function that would answer if it is Ok to fuse MUL + ADD to FMADD // or MUL + ADDSUB to FMADDSUB. const TargetOptions &Options = DAG.getTarget().Options; bool AllowFusion = (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath); if (!AllowFusion) return false; Opnd2 = Opnd1; Opnd1 = Opnd0.getOperand(1); Opnd0 = Opnd0.getOperand(0); return true; } /// Try to fold a build_vector that performs an 'addsub' or 'fmaddsub' or /// 'fsubadd' operation accordingly to X86ISD::ADDSUB or X86ISD::FMADDSUB or /// X86ISD::FMSUBADD node. static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV, const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDValue Opnd0, Opnd1; unsigned NumExtracts; bool IsSubAdd; if (!isAddSubOrSubAdd(BV, Subtarget, DAG, Opnd0, Opnd1, NumExtracts, IsSubAdd)) return SDValue(); MVT VT = BV->getSimpleValueType(0); SDLoc DL(BV); // Try to generate X86ISD::FMADDSUB node here. SDValue Opnd2; if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, NumExtracts)) { unsigned Opc = IsSubAdd ? X86ISD::FMSUBADD : X86ISD::FMADDSUB; return DAG.getNode(Opc, DL, VT, Opnd0, Opnd1, Opnd2); } // We only support ADDSUB. if (IsSubAdd) return SDValue(); // Do not generate X86ISD::ADDSUB node for 512-bit types even though // the ADDSUB idiom has been successfully recognized. There are no known // X86 targets with 512-bit ADDSUB instructions! // 512-bit ADDSUB idiom recognition was needed only as part of FMADDSUB idiom // recognition. if (VT.is512BitVector()) return SDValue(); return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1); } static bool isHopBuildVector(const BuildVectorSDNode *BV, SelectionDAG &DAG, unsigned &HOpcode, SDValue &V0, SDValue &V1) { // Initialize outputs to known values. MVT VT = BV->getSimpleValueType(0); HOpcode = ISD::DELETED_NODE; V0 = DAG.getUNDEF(VT); V1 = DAG.getUNDEF(VT); // x86 256-bit horizontal ops are defined in a non-obvious way. Each 128-bit // half of the result is calculated independently from the 128-bit halves of // the inputs, so that makes the index-checking logic below more complicated. unsigned NumElts = VT.getVectorNumElements(); unsigned GenericOpcode = ISD::DELETED_NODE; unsigned Num128BitChunks = VT.is256BitVector() ? 2 : 1; unsigned NumEltsIn128Bits = NumElts / Num128BitChunks; unsigned NumEltsIn64Bits = NumEltsIn128Bits / 2; for (unsigned i = 0; i != Num128BitChunks; ++i) { for (unsigned j = 0; j != NumEltsIn128Bits; ++j) { // Ignore undef elements. SDValue Op = BV->getOperand(i * NumEltsIn128Bits + j); if (Op.isUndef()) continue; // If there's an opcode mismatch, we're done. if (HOpcode != ISD::DELETED_NODE && Op.getOpcode() != GenericOpcode) return false; // Initialize horizontal opcode. if (HOpcode == ISD::DELETED_NODE) { GenericOpcode = Op.getOpcode(); switch (GenericOpcode) { case ISD::ADD: HOpcode = X86ISD::HADD; break; case ISD::SUB: HOpcode = X86ISD::HSUB; break; case ISD::FADD: HOpcode = X86ISD::FHADD; break; case ISD::FSUB: HOpcode = X86ISD::FHSUB; break; default: return false; } } SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT || Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT || Op0.getOperand(0) != Op1.getOperand(0) || !isa(Op0.getOperand(1)) || !isa(Op1.getOperand(1)) || !Op.hasOneUse()) return false; // The source vector is chosen based on which 64-bit half of the // destination vector is being calculated. if (j < NumEltsIn64Bits) { if (V0.isUndef()) V0 = Op0.getOperand(0); } else { if (V1.isUndef()) V1 = Op0.getOperand(0); } SDValue SourceVec = (j < NumEltsIn64Bits) ? V0 : V1; if (SourceVec != Op0.getOperand(0)) return false; // op (extract_vector_elt A, I), (extract_vector_elt A, I+1) unsigned ExtIndex0 = Op0.getConstantOperandVal(1); unsigned ExtIndex1 = Op1.getConstantOperandVal(1); unsigned ExpectedIndex = i * NumEltsIn128Bits + (j % NumEltsIn64Bits) * 2; if (ExpectedIndex == ExtIndex0 && ExtIndex1 == ExtIndex0 + 1) continue; // If this is not a commutative op, this does not match. if (GenericOpcode != ISD::ADD && GenericOpcode != ISD::FADD) return false; // Addition is commutative, so try swapping the extract indexes. // op (extract_vector_elt A, I+1), (extract_vector_elt A, I) if (ExpectedIndex == ExtIndex1 && ExtIndex0 == ExtIndex1 + 1) continue; // Extract indexes do not match horizontal requirement. return false; } } // We matched. Opcode and operands are returned by reference as arguments. return true; } static SDValue getHopForBuildVector(const BuildVectorSDNode *BV, SelectionDAG &DAG, unsigned HOpcode, SDValue V0, SDValue V1) { // If either input vector is not the same size as the build vector, // extract/insert the low bits to the correct size. // This is free (examples: zmm --> xmm, xmm --> ymm). MVT VT = BV->getSimpleValueType(0); unsigned Width = VT.getSizeInBits(); if (V0.getValueSizeInBits() > Width) V0 = extractSubVector(V0, 0, DAG, SDLoc(BV), Width); else if (V0.getValueSizeInBits() < Width) V0 = insertSubVector(DAG.getUNDEF(VT), V0, 0, DAG, SDLoc(BV), Width); if (V1.getValueSizeInBits() > Width) V1 = extractSubVector(V1, 0, DAG, SDLoc(BV), Width); else if (V1.getValueSizeInBits() < Width) V1 = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, SDLoc(BV), Width); return DAG.getNode(HOpcode, SDLoc(BV), VT, V0, V1); } /// Lower BUILD_VECTOR to a horizontal add/sub operation if possible. static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV, const X86Subtarget &Subtarget, SelectionDAG &DAG) { // We need at least 2 non-undef elements to make this worthwhile by default. unsigned NumNonUndefs = 0; for (const SDValue &V : BV->op_values()) if (!V.isUndef()) ++NumNonUndefs; if (NumNonUndefs < 2) return SDValue(); // There are 4 sets of horizontal math operations distinguished by type: // int/FP at 128-bit/256-bit. Each type was introduced with a different // subtarget feature. Try to match those "native" patterns first. MVT VT = BV->getSimpleValueType(0); unsigned HOpcode; SDValue V0, V1; if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget.hasSSE3()) if (isHopBuildVector(BV, DAG, HOpcode, V0, V1)) return getHopForBuildVector(BV, DAG, HOpcode, V0, V1); if ((VT == MVT::v8i16 || VT == MVT::v4i32) && Subtarget.hasSSSE3()) if (isHopBuildVector(BV, DAG, HOpcode, V0, V1)) return getHopForBuildVector(BV, DAG, HOpcode, V0, V1); if ((VT == MVT::v8f32 || VT == MVT::v4f64) && Subtarget.hasAVX()) if (isHopBuildVector(BV, DAG, HOpcode, V0, V1)) return getHopForBuildVector(BV, DAG, HOpcode, V0, V1); if ((VT == MVT::v16i16 || VT == MVT::v8i32) && Subtarget.hasAVX2()) if (isHopBuildVector(BV, DAG, HOpcode, V0, V1)) return getHopForBuildVector(BV, DAG, HOpcode, V0, V1); // Try harder to match 256-bit ops by using extract/concat. if (!Subtarget.hasAVX() || !VT.is256BitVector()) return SDValue(); // Count the number of UNDEF operands in the build_vector in input. unsigned NumElts = VT.getVectorNumElements(); unsigned Half = NumElts / 2; unsigned NumUndefsLO = 0; unsigned NumUndefsHI = 0; for (unsigned i = 0, e = Half; i != e; ++i) if (BV->getOperand(i)->isUndef()) NumUndefsLO++; for (unsigned i = Half, e = NumElts; i != e; ++i) if (BV->getOperand(i)->isUndef()) NumUndefsHI++; SDLoc DL(BV); SDValue InVec0, InVec1; if (VT == MVT::v8i32 || VT == MVT::v16i16) { SDValue InVec2, InVec3; unsigned X86Opcode; bool CanFold = true; if (isHorizontalBinOpPart(BV, ISD::ADD, DAG, 0, Half, InVec0, InVec1) && isHorizontalBinOpPart(BV, ISD::ADD, DAG, Half, NumElts, InVec2, InVec3) && ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) && ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3)) X86Opcode = X86ISD::HADD; else if (isHorizontalBinOpPart(BV, ISD::SUB, DAG, 0, Half, InVec0, InVec1) && isHorizontalBinOpPart(BV, ISD::SUB, DAG, Half, NumElts, InVec2, InVec3) && ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) && ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3)) X86Opcode = X86ISD::HSUB; else CanFold = false; if (CanFold) { // Do not try to expand this build_vector into a pair of horizontal // add/sub if we can emit a pair of scalar add/sub. if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half) return SDValue(); // Convert this build_vector into a pair of horizontal binops followed by // a concat vector. We must adjust the outputs from the partial horizontal // matching calls above to account for undefined vector halves. SDValue V0 = InVec0.isUndef() ? InVec2 : InVec0; SDValue V1 = InVec1.isUndef() ? InVec3 : InVec1; assert((!V0.isUndef() || !V1.isUndef()) && "Horizontal-op of undefs?"); bool isUndefLO = NumUndefsLO == Half; bool isUndefHI = NumUndefsHI == Half; return ExpandHorizontalBinOp(V0, V1, DL, DAG, X86Opcode, false, isUndefLO, isUndefHI); } } if (VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 || VT == MVT::v16i16) { unsigned X86Opcode; if (isHorizontalBinOpPart(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1)) X86Opcode = X86ISD::HADD; else if (isHorizontalBinOpPart(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1)) X86Opcode = X86ISD::HSUB; else if (isHorizontalBinOpPart(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1)) X86Opcode = X86ISD::FHADD; else if (isHorizontalBinOpPart(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1)) X86Opcode = X86ISD::FHSUB; else return SDValue(); // Don't try to expand this build_vector into a pair of horizontal add/sub // if we can simply emit a pair of scalar add/sub. if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half) return SDValue(); // Convert this build_vector into two horizontal add/sub followed by // a concat vector. bool isUndefLO = NumUndefsLO == Half; bool isUndefHI = NumUndefsHI == Half; return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, true, isUndefLO, isUndefHI); } return SDValue(); } /// If a BUILD_VECTOR's source elements all apply the same bit operation and /// one of their operands is constant, lower to a pair of BUILD_VECTOR and /// just apply the bit to the vectors. /// NOTE: Its not in our interest to start make a general purpose vectorizer /// from this, but enough scalar bit operations are created from the later /// legalization + scalarization stages to need basic support. static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op, SelectionDAG &DAG) { SDLoc DL(Op); MVT VT = Op->getSimpleValueType(0); unsigned NumElems = VT.getVectorNumElements(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Check that all elements have the same opcode. // TODO: Should we allow UNDEFS and if so how many? unsigned Opcode = Op->getOperand(0).getOpcode(); for (unsigned i = 1; i < NumElems; ++i) if (Opcode != Op->getOperand(i).getOpcode()) return SDValue(); // TODO: We may be able to add support for other Ops (ADD/SUB + shifts). switch (Opcode) { default: return SDValue(); case ISD::AND: case ISD::XOR: case ISD::OR: // Don't do this if the buildvector is a splat - we'd replace one // constant with an entire vector. if (Op->getSplatValue()) return SDValue(); if (!TLI.isOperationLegalOrPromote(Opcode, VT)) return SDValue(); break; } SmallVector LHSElts, RHSElts; for (SDValue Elt : Op->ops()) { SDValue LHS = Elt.getOperand(0); SDValue RHS = Elt.getOperand(1); // We expect the canonicalized RHS operand to be the constant. if (!isa(RHS)) return SDValue(); LHSElts.push_back(LHS); RHSElts.push_back(RHS); } SDValue LHS = DAG.getBuildVector(VT, DL, LHSElts); SDValue RHS = DAG.getBuildVector(VT, DL, RHSElts); return DAG.getNode(Opcode, DL, VT, LHS, RHS); } /// Create a vector constant without a load. SSE/AVX provide the bare minimum /// functionality to do this, so it's all zeros, all ones, or some derivation /// that is cheap to calculate. static SDValue materializeVectorConstant(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { SDLoc DL(Op); MVT VT = Op.getSimpleValueType(); // Vectors containing all zeros can be matched by pxor and xorps. if (ISD::isBuildVectorAllZeros(Op.getNode())) { // Canonicalize this to <4 x i32> to 1) ensure the zero vectors are CSE'd // and 2) ensure that i64 scalars are eliminated on x86-32 hosts. if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) return Op; return getZeroVector(VT, Subtarget, DAG, DL); } // Vectors containing all ones can be matched by pcmpeqd on 128-bit width // vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use // vpcmpeqd on 256-bit vectors. if (Subtarget.hasSSE2() && ISD::isBuildVectorAllOnes(Op.getNode())) { if (VT == MVT::v4i32 || VT == MVT::v16i32 || (VT == MVT::v8i32 && Subtarget.hasInt256())) return Op; return getOnesVector(VT, DAG, DL); } return SDValue(); } /// Look for opportunities to create a VPERMV/VPERMILPV/PSHUFB variable permute /// from a vector of source values and a vector of extraction indices. /// The vectors might be manipulated to match the type of the permute op. static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec, SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget) { MVT ShuffleVT = VT; EVT IndicesVT = EVT(VT).changeVectorElementTypeToInteger(); unsigned NumElts = VT.getVectorNumElements(); unsigned SizeInBits = VT.getSizeInBits(); // Adjust IndicesVec to match VT size. assert(IndicesVec.getValueType().getVectorNumElements() >= NumElts && "Illegal variable permute mask size"); if (IndicesVec.getValueType().getVectorNumElements() > NumElts) IndicesVec = extractSubVector(IndicesVec, 0, DAG, SDLoc(IndicesVec), NumElts * VT.getScalarSizeInBits()); IndicesVec = DAG.getZExtOrTrunc(IndicesVec, SDLoc(IndicesVec), IndicesVT); // Handle SrcVec that don't match VT type. if (SrcVec.getValueSizeInBits() != SizeInBits) { if ((SrcVec.getValueSizeInBits() % SizeInBits) == 0) { // Handle larger SrcVec by treating it as a larger permute. unsigned Scale = SrcVec.getValueSizeInBits() / SizeInBits; VT = MVT::getVectorVT(VT.getScalarType(), Scale * NumElts); IndicesVT = EVT(VT).changeVectorElementTypeToInteger(); IndicesVec = widenSubVector(IndicesVT.getSimpleVT(), IndicesVec, false, Subtarget, DAG, SDLoc(IndicesVec)); return extractSubVector( createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget), 0, DAG, DL, SizeInBits); } else if (SrcVec.getValueSizeInBits() < SizeInBits) { // Widen smaller SrcVec to match VT. SrcVec = widenSubVector(VT, SrcVec, false, Subtarget, DAG, SDLoc(SrcVec)); } else return SDValue(); } auto ScaleIndices = [&DAG](SDValue Idx, uint64_t Scale) { assert(isPowerOf2_64(Scale) && "Illegal variable permute shuffle scale"); EVT SrcVT = Idx.getValueType(); unsigned NumDstBits = SrcVT.getScalarSizeInBits() / Scale; uint64_t IndexScale = 0; uint64_t IndexOffset = 0; // If we're scaling a smaller permute op, then we need to repeat the // indices, scaling and offsetting them as well. // e.g. v4i32 -> v16i8 (Scale = 4) // IndexScale = v4i32 Splat(4 << 24 | 4 << 16 | 4 << 8 | 4) // IndexOffset = v4i32 Splat(3 << 24 | 2 << 16 | 1 << 8 | 0) for (uint64_t i = 0; i != Scale; ++i) { IndexScale |= Scale << (i * NumDstBits); IndexOffset |= i << (i * NumDstBits); } Idx = DAG.getNode(ISD::MUL, SDLoc(Idx), SrcVT, Idx, DAG.getConstant(IndexScale, SDLoc(Idx), SrcVT)); Idx = DAG.getNode(ISD::ADD, SDLoc(Idx), SrcVT, Idx, DAG.getConstant(IndexOffset, SDLoc(Idx), SrcVT)); return Idx; }; unsigned Opcode = 0; switch (VT.SimpleTy) { default: break; case MVT::v16i8: if (Subtarget.hasSSSE3()) Opcode = X86ISD::PSHUFB; break; case MVT::v8i16: if (Subtarget.hasVLX() && Subtarget.hasBWI()) Opcode = X86ISD::VPERMV; else if (Subtarget.hasSSSE3()) { Opcode = X86ISD::PSHUFB; ShuffleVT = MVT::v16i8; } break; case MVT::v4f32: case MVT::v4i32: if (Subtarget.hasAVX()) { Opcode = X86ISD::VPERMILPV; ShuffleVT = MVT::v4f32; } else if (Subtarget.hasSSSE3()) { Opcode = X86ISD::PSHUFB; ShuffleVT = MVT::v16i8; } break; case MVT::v2f64: case MVT::v2i64: if (Subtarget.hasAVX()) { // VPERMILPD selects using bit#1 of the index vector, so scale IndicesVec. IndicesVec = DAG.getNode(ISD::ADD, DL, IndicesVT, IndicesVec, IndicesVec); Opcode = X86ISD::VPERMILPV; ShuffleVT = MVT::v2f64; } else if (Subtarget.hasSSE41()) { // SSE41 can compare v2i64 - select between indices 0 and 1. return DAG.getSelectCC( DL, IndicesVec, getZeroVector(IndicesVT.getSimpleVT(), Subtarget, DAG, DL), DAG.getVectorShuffle(VT, DL, SrcVec, SrcVec, {0, 0}), DAG.getVectorShuffle(VT, DL, SrcVec, SrcVec, {1, 1}), ISD::CondCode::SETEQ); } break; case MVT::v32i8: if (Subtarget.hasVLX() && Subtarget.hasVBMI()) Opcode = X86ISD::VPERMV; else if (Subtarget.hasXOP()) { SDValue LoSrc = extract128BitVector(SrcVec, 0, DAG, DL); SDValue HiSrc = extract128BitVector(SrcVec, 16, DAG, DL); SDValue LoIdx = extract128BitVector(IndicesVec, 0, DAG, DL); SDValue HiIdx = extract128BitVector(IndicesVec, 16, DAG, DL); return DAG.getNode( ISD::CONCAT_VECTORS, DL, VT, DAG.getNode(X86ISD::VPPERM, DL, MVT::v16i8, LoSrc, HiSrc, LoIdx), DAG.getNode(X86ISD::VPPERM, DL, MVT::v16i8, LoSrc, HiSrc, HiIdx)); } else if (Subtarget.hasAVX()) { SDValue Lo = extract128BitVector(SrcVec, 0, DAG, DL); SDValue Hi = extract128BitVector(SrcVec, 16, DAG, DL); SDValue LoLo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Lo); SDValue HiHi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Hi, Hi); auto PSHUFBBuilder = [](SelectionDAG &DAG, const SDLoc &DL, ArrayRef Ops) { // Permute Lo and Hi and then select based on index range. // This works as SHUFB uses bits[3:0] to permute elements and we don't // care about the bit[7] as its just an index vector. SDValue Idx = Ops[2]; EVT VT = Idx.getValueType(); return DAG.getSelectCC(DL, Idx, DAG.getConstant(15, DL, VT), DAG.getNode(X86ISD::PSHUFB, DL, VT, Ops[1], Idx), DAG.getNode(X86ISD::PSHUFB, DL, VT, Ops[0], Idx), ISD::CondCode::SETGT); }; SDValue Ops[] = {LoLo, HiHi, IndicesVec}; return SplitOpsAndApply(DAG, Subtarget, DL, MVT::v32i8, Ops, PSHUFBBuilder); } break; case MVT::v16i16: if (Subtarget.hasVLX() && Subtarget.hasBWI()) Opcode = X86ISD::VPERMV; else if (Subtarget.hasAVX()) { // Scale to v32i8 and perform as v32i8. IndicesVec = ScaleIndices(IndicesVec, 2); return DAG.getBitcast( VT, createVariablePermute( MVT::v32i8, DAG.getBitcast(MVT::v32i8, SrcVec), DAG.getBitcast(MVT::v32i8, IndicesVec), DL, DAG, Subtarget)); } break; case MVT::v8f32: case MVT::v8i32: if (Subtarget.hasAVX2()) Opcode = X86ISD::VPERMV; else if (Subtarget.hasAVX()) { SrcVec = DAG.getBitcast(MVT::v8f32, SrcVec); SDValue LoLo = DAG.getVectorShuffle(MVT::v8f32, DL, SrcVec, SrcVec, {0, 1, 2, 3, 0, 1, 2, 3}); SDValue HiHi = DAG.getVectorShuffle(MVT::v8f32, DL, SrcVec, SrcVec, {4, 5, 6, 7, 4, 5, 6, 7}); if (Subtarget.hasXOP()) return DAG.getBitcast(VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v8f32, LoLo, HiHi, IndicesVec, DAG.getConstant(0, DL, MVT::i8))); // Permute Lo and Hi and then select based on index range. // This works as VPERMILPS only uses index bits[0:1] to permute elements. SDValue Res = DAG.getSelectCC( DL, IndicesVec, DAG.getConstant(3, DL, MVT::v8i32), DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v8f32, HiHi, IndicesVec), DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v8f32, LoLo, IndicesVec), ISD::CondCode::SETGT); return DAG.getBitcast(VT, Res); } break; case MVT::v4i64: case MVT::v4f64: if (Subtarget.hasAVX512()) { if (!Subtarget.hasVLX()) { MVT WidenSrcVT = MVT::getVectorVT(VT.getScalarType(), 8); SrcVec = widenSubVector(WidenSrcVT, SrcVec, false, Subtarget, DAG, SDLoc(SrcVec)); IndicesVec = widenSubVector(MVT::v8i64, IndicesVec, false, Subtarget, DAG, SDLoc(IndicesVec)); SDValue Res = createVariablePermute(WidenSrcVT, SrcVec, IndicesVec, DL, DAG, Subtarget); return extract256BitVector(Res, 0, DAG, DL); } Opcode = X86ISD::VPERMV; } else if (Subtarget.hasAVX()) { SrcVec = DAG.getBitcast(MVT::v4f64, SrcVec); SDValue LoLo = DAG.getVectorShuffle(MVT::v4f64, DL, SrcVec, SrcVec, {0, 1, 0, 1}); SDValue HiHi = DAG.getVectorShuffle(MVT::v4f64, DL, SrcVec, SrcVec, {2, 3, 2, 3}); // VPERMIL2PD selects with bit#1 of the index vector, so scale IndicesVec. IndicesVec = DAG.getNode(ISD::ADD, DL, IndicesVT, IndicesVec, IndicesVec); if (Subtarget.hasXOP()) return DAG.getBitcast(VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v4f64, LoLo, HiHi, IndicesVec, DAG.getConstant(0, DL, MVT::i8))); // Permute Lo and Hi and then select based on index range. // This works as VPERMILPD only uses index bit[1] to permute elements. SDValue Res = DAG.getSelectCC( DL, IndicesVec, DAG.getConstant(2, DL, MVT::v4i64), DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v4f64, HiHi, IndicesVec), DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v4f64, LoLo, IndicesVec), ISD::CondCode::SETGT); return DAG.getBitcast(VT, Res); } break; case MVT::v64i8: if (Subtarget.hasVBMI()) Opcode = X86ISD::VPERMV; break; case MVT::v32i16: if (Subtarget.hasBWI()) Opcode = X86ISD::VPERMV; break; case MVT::v16f32: case MVT::v16i32: case MVT::v8f64: case MVT::v8i64: if (Subtarget.hasAVX512()) Opcode = X86ISD::VPERMV; break; } if (!Opcode) return SDValue(); assert((VT.getSizeInBits() == ShuffleVT.getSizeInBits()) && (VT.getScalarSizeInBits() % ShuffleVT.getScalarSizeInBits()) == 0 && "Illegal variable permute shuffle type"); uint64_t Scale = VT.getScalarSizeInBits() / ShuffleVT.getScalarSizeInBits(); if (Scale > 1) IndicesVec = ScaleIndices(IndicesVec, Scale); EVT ShuffleIdxVT = EVT(ShuffleVT).changeVectorElementTypeToInteger(); IndicesVec = DAG.getBitcast(ShuffleIdxVT, IndicesVec); SrcVec = DAG.getBitcast(ShuffleVT, SrcVec); SDValue Res = Opcode == X86ISD::VPERMV ? DAG.getNode(Opcode, DL, ShuffleVT, IndicesVec, SrcVec) : DAG.getNode(Opcode, DL, ShuffleVT, SrcVec, IndicesVec); return DAG.getBitcast(VT, Res); } // Tries to lower a BUILD_VECTOR composed of extract-extract chains that can be // reasoned to be a permutation of a vector by indices in a non-constant vector. // (build_vector (extract_elt V, (extract_elt I, 0)), // (extract_elt V, (extract_elt I, 1)), // ... // -> // (vpermv I, V) // // TODO: Handle undefs // TODO: Utilize pshufb and zero mask blending to support more efficient // construction of vectors with constant-0 elements. static SDValue LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG, const X86Subtarget &Subtarget) { SDValue SrcVec, IndicesVec; // Check for a match of the permute source vector and permute index elements. // This is done by checking that the i-th build_vector operand is of the form: // (extract_elt SrcVec, (extract_elt IndicesVec, i)). for (unsigned Idx = 0, E = V.getNumOperands(); Idx != E; ++Idx) { SDValue Op = V.getOperand(Idx); if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT) return SDValue(); // If this is the first extract encountered in V, set the source vector, // otherwise verify the extract is from the previously defined source // vector. if (!SrcVec) SrcVec = Op.getOperand(0); else if (SrcVec != Op.getOperand(0)) return SDValue(); SDValue ExtractedIndex = Op->getOperand(1); // Peek through extends. if (ExtractedIndex.getOpcode() == ISD::ZERO_EXTEND || ExtractedIndex.getOpcode() == ISD::SIGN_EXTEND) ExtractedIndex = ExtractedIndex.getOperand(0); if (ExtractedIndex.getOpcode() != ISD::EXTRACT_VECTOR_ELT) return SDValue(); // If this is the first extract from the index vector candidate, set the // indices vector, otherwise verify the extract is from the previously // defined indices vector. if (!IndicesVec) IndicesVec = ExtractedIndex.getOperand(0); else if (IndicesVec != ExtractedIndex.getOperand(0)) return SDValue(); auto *PermIdx = dyn_cast(ExtractedIndex.getOperand(1)); if (!PermIdx || PermIdx->getZExtValue() != Idx) return SDValue(); } SDLoc DL(V); MVT VT = V.getSimpleValueType(); return createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget); } SDValue X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); MVT EltVT = VT.getVectorElementType(); unsigned NumElems = Op.getNumOperands(); // Generate vectors for predicate vectors. if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) return LowerBUILD_VECTORvXi1(Op, DAG, Subtarget); if (SDValue VectorConstant = materializeVectorConstant(Op, DAG, Subtarget)) return VectorConstant; BuildVectorSDNode *BV = cast(Op.getNode()); if (SDValue AddSub = lowerToAddSubOrFMAddSub(BV, Subtarget, DAG)) return AddSub; if (SDValue HorizontalOp = LowerToHorizontalOp(BV, Subtarget, DAG)) return HorizontalOp; if (SDValue Broadcast = lowerBuildVectorAsBroadcast(BV, Subtarget, DAG)) return Broadcast; if (SDValue BitOp = lowerBuildVectorToBitOp(BV, DAG)) return BitOp; unsigned EVTBits = EltVT.getSizeInBits(); unsigned NumZero = 0; unsigned NumNonZero = 0; uint64_t NonZeros = 0; bool IsAllConstants = true; SmallSet Values; unsigned NumConstants = NumElems; for (unsigned i = 0; i < NumElems; ++i) { SDValue Elt = Op.getOperand(i); if (Elt.isUndef()) continue; Values.insert(Elt); if (!isa(Elt) && !isa(Elt)) { IsAllConstants = false; NumConstants--; } if (X86::isZeroNode(Elt)) NumZero++; else { assert(i < sizeof(NonZeros) * 8); // Make sure the shift is within range. NonZeros |= ((uint64_t)1 << i); NumNonZero++; } } // All undef vector. Return an UNDEF. All zero vectors were handled above. if (NumNonZero == 0) return DAG.getUNDEF(VT); // If we are inserting one variable into a vector of non-zero constants, try // to avoid loading each constant element as a scalar. Load the constants as a // vector and then insert the variable scalar element. If insertion is not // supported, fall back to a shuffle to get the scalar blended with the // constants. Insertion into a zero vector is handled as a special-case // somewhere below here. if (NumConstants == NumElems - 1 && NumNonZero != 1 && (isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT) || isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT))) { // Create an all-constant vector. The variable element in the old // build vector is replaced by undef in the constant vector. Save the // variable scalar element and its index for use in the insertelement. LLVMContext &Context = *DAG.getContext(); Type *EltType = Op.getValueType().getScalarType().getTypeForEVT(Context); SmallVector ConstVecOps(NumElems, UndefValue::get(EltType)); SDValue VarElt; SDValue InsIndex; for (unsigned i = 0; i != NumElems; ++i) { SDValue Elt = Op.getOperand(i); if (auto *C = dyn_cast(Elt)) ConstVecOps[i] = ConstantInt::get(Context, C->getAPIntValue()); else if (auto *C = dyn_cast(Elt)) ConstVecOps[i] = ConstantFP::get(Context, C->getValueAPF()); else if (!Elt.isUndef()) { assert(!VarElt.getNode() && !InsIndex.getNode() && "Expected one variable element in this vector"); VarElt = Elt; InsIndex = DAG.getConstant(i, dl, getVectorIdxTy(DAG.getDataLayout())); } } Constant *CV = ConstantVector::get(ConstVecOps); SDValue DAGConstVec = DAG.getConstantPool(CV, VT); // The constants we just created may not be legal (eg, floating point). We // must lower the vector right here because we can not guarantee that we'll // legalize it before loading it. This is also why we could not just create // a new build vector here. If the build vector contains illegal constants, // it could get split back up into a series of insert elements. // TODO: Improve this by using shorter loads with broadcast/VZEXT_LOAD. SDValue LegalDAGConstVec = LowerConstantPool(DAGConstVec, DAG); MachineFunction &MF = DAG.getMachineFunction(); MachinePointerInfo MPI = MachinePointerInfo::getConstantPool(MF); SDValue Ld = DAG.getLoad(VT, dl, DAG.getEntryNode(), LegalDAGConstVec, MPI); unsigned InsertC = cast(InsIndex)->getZExtValue(); unsigned NumEltsInLow128Bits = 128 / VT.getScalarSizeInBits(); if (InsertC < NumEltsInLow128Bits) return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ld, VarElt, InsIndex); // There's no good way to insert into the high elements of a >128-bit // vector, so use shuffles to avoid an extract/insert sequence. assert(VT.getSizeInBits() > 128 && "Invalid insertion index?"); assert(Subtarget.hasAVX() && "Must have AVX with >16-byte vector"); SmallVector ShuffleMask; unsigned NumElts = VT.getVectorNumElements(); for (unsigned i = 0; i != NumElts; ++i) ShuffleMask.push_back(i == InsertC ? NumElts : i); SDValue S2V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, VarElt); return DAG.getVectorShuffle(VT, dl, Ld, S2V, ShuffleMask); } // Special case for single non-zero, non-undef, element. if (NumNonZero == 1) { unsigned Idx = countTrailingZeros(NonZeros); SDValue Item = Op.getOperand(Idx); // If we have a constant or non-constant insertion into the low element of // a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into // the rest of the elements. This will be matched as movd/movq/movss/movsd // depending on what the source datatype is. if (Idx == 0) { if (NumZero == 0) return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); if (EltVT == MVT::i32 || EltVT == MVT::f32 || EltVT == MVT::f64 || (EltVT == MVT::i64 && Subtarget.is64Bit())) { assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && "Expected an SSE value type!"); Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); } // We can't directly insert an i8 or i16 into a vector, so zero extend // it to i32 first. if (EltVT == MVT::i16 || EltVT == MVT::i8) { Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); if (VT.getSizeInBits() >= 256) { MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits()/32); if (Subtarget.hasAVX()) { Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ShufVT, Item); Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); } else { // Without AVX, we need to extend to a 128-bit vector and then // insert into the 256-bit vector. Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item); SDValue ZeroVec = getZeroVector(ShufVT, Subtarget, DAG, dl); Item = insert128BitVector(ZeroVec, Item, 0, DAG, dl); } } else { assert(VT.is128BitVector() && "Expected an SSE value type!"); Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item); Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); } return DAG.getBitcast(VT, Item); } } // Is it a vector logical left shift? if (NumElems == 2 && Idx == 1 && X86::isZeroNode(Op.getOperand(0)) && !X86::isZeroNode(Op.getOperand(1))) { unsigned NumBits = VT.getSizeInBits(); return getVShift(true, VT, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(1)), NumBits/2, DAG, *this, dl); } if (IsAllConstants) // Otherwise, it's better to do a constpool load. return SDValue(); // Otherwise, if this is a vector with i32 or f32 elements, and the element // is a non-constant being inserted into an element other than the low one, // we can't use a constant pool load. Instead, use SCALAR_TO_VECTOR (aka // movd/movss) to move this into the low element, then shuffle it into // place. if (EVTBits == 32) { Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); return getShuffleVectorZeroOrUndef(Item, Idx, NumZero > 0, Subtarget, DAG); } } // Splat is obviously ok. Let legalizer expand it to a shuffle. if (Values.size() == 1) { if (EVTBits == 32) { // Instead of a shuffle like this: // shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0> // Check if it's possible to issue this instead. // shuffle (vload ptr)), undef, <1, 1, 1, 1> unsigned Idx = countTrailingZeros(NonZeros); SDValue Item = Op.getOperand(Idx); if (Op.getNode()->isOnlyUserOf(Item.getNode())) return LowerAsSplatVectorLoad(Item, VT, dl, DAG); } return SDValue(); } // A vector full of immediates; various special cases are already // handled, so this is best done with a single constant-pool load. if (IsAllConstants) return SDValue(); if (SDValue V = LowerBUILD_VECTORAsVariablePermute(Op, DAG, Subtarget)) return V; // See if we can use a vector load to get all of the elements. { SmallVector Ops(Op->op_begin(), Op->op_begin() + NumElems); if (SDValue LD = EltsFromConsecutiveLoads(VT, Ops, dl, DAG, Subtarget, false)) return LD; } // If this is a splat of pairs of 32-bit elements, we can use a narrower // build_vector and broadcast it. // TODO: We could probably generalize this more. if (Subtarget.hasAVX2() && EVTBits == 32 && Values.size() == 2) { SDValue Ops[4] = { Op.getOperand(0), Op.getOperand(1), DAG.getUNDEF(EltVT), DAG.getUNDEF(EltVT) }; auto CanSplat = [](SDValue Op, unsigned NumElems, ArrayRef Ops) { // Make sure all the even/odd operands match. for (unsigned i = 2; i != NumElems; ++i) if (Ops[i % 2] != Op.getOperand(i)) return false; return true; }; if (CanSplat(Op, NumElems, Ops)) { MVT WideEltVT = VT.isFloatingPoint() ? MVT::f64 : MVT::i64; MVT NarrowVT = MVT::getVectorVT(EltVT, 4); // Create a new build vector and cast to v2i64/v2f64. SDValue NewBV = DAG.getBitcast(MVT::getVectorVT(WideEltVT, 2), DAG.getBuildVector(NarrowVT, dl, Ops)); // Broadcast from v2i64/v2f64 and cast to final VT. MVT BcastVT = MVT::getVectorVT(WideEltVT, NumElems/2); return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, dl, BcastVT, NewBV)); } } // For AVX-length vectors, build the individual 128-bit pieces and use // shuffles to put them in place. if (VT.getSizeInBits() > 128) { MVT HVT = MVT::getVectorVT(EltVT, NumElems/2); // Build both the lower and upper subvector. SDValue Lower = DAG.getBuildVector(HVT, dl, Op->ops().slice(0, NumElems / 2)); SDValue Upper = DAG.getBuildVector( HVT, dl, Op->ops().slice(NumElems / 2, NumElems /2)); // Recreate the wider vector with the lower and upper part. return concatSubVectors(Lower, Upper, VT, NumElems, DAG, dl, VT.getSizeInBits() / 2); } // Let legalizer expand 2-wide build_vectors. if (EVTBits == 64) { if (NumNonZero == 1) { // One half is zero or undef. unsigned Idx = countTrailingZeros(NonZeros); SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(Idx)); return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG); } return SDValue(); } // If element VT is < 32 bits, convert it to inserts into a zero vector. if (EVTBits == 8 && NumElems == 16) if (SDValue V = LowerBuildVectorv16i8(Op, NonZeros, NumNonZero, NumZero, DAG, Subtarget)) return V; if (EVTBits == 16 && NumElems == 8) if (SDValue V = LowerBuildVectorv8i16(Op, NonZeros, NumNonZero, NumZero, DAG, Subtarget)) return V; // If element VT is == 32 bits and has 4 elems, try to generate an INSERTPS if (EVTBits == 32 && NumElems == 4) if (SDValue V = LowerBuildVectorv4x32(Op, DAG, Subtarget)) return V; // If element VT is == 32 bits, turn it into a number of shuffles. if (NumElems == 4 && NumZero > 0) { SmallVector Ops(NumElems); for (unsigned i = 0; i < 4; ++i) { bool isZero = !(NonZeros & (1ULL << i)); if (isZero) Ops[i] = getZeroVector(VT, Subtarget, DAG, dl); else Ops[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i)); } for (unsigned i = 0; i < 2; ++i) { switch ((NonZeros >> (i*2)) & 0x3) { default: llvm_unreachable("Unexpected NonZero count"); case 0: Ops[i] = Ops[i*2]; // Must be a zero vector. break; case 1: Ops[i] = getMOVL(DAG, dl, VT, Ops[i*2+1], Ops[i*2]); break; case 2: Ops[i] = getMOVL(DAG, dl, VT, Ops[i*2], Ops[i*2+1]); break; case 3: Ops[i] = getUnpackl(DAG, dl, VT, Ops[i*2], Ops[i*2+1]); break; } } bool Reverse1 = (NonZeros & 0x3) == 2; bool Reverse2 = ((NonZeros & (0x3 << 2)) >> 2) == 2; int MaskVec[] = { Reverse1 ? 1 : 0, Reverse1 ? 0 : 1, static_cast(Reverse2 ? NumElems+1 : NumElems), static_cast(Reverse2 ? NumElems : NumElems+1) }; return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], MaskVec); } assert(Values.size() > 1 && "Expected non-undef and non-splat vector"); // Check for a build vector from mostly shuffle plus few inserting. if (SDValue Sh = buildFromShuffleMostly(Op, DAG)) return Sh; // For SSE 4.1, use insertps to put the high elements into the low element. if (Subtarget.hasSSE41()) { SDValue Result; if (!Op.getOperand(0).isUndef()) Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0)); else Result = DAG.getUNDEF(VT); for (unsigned i = 1; i < NumElems; ++i) { if (Op.getOperand(i).isUndef()) continue; Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result, Op.getOperand(i), DAG.getIntPtrConstant(i, dl)); } return Result; } // Otherwise, expand into a number of unpckl*, start by extending each of // our (non-undef) elements to the full vector width with the element in the // bottom slot of the vector (which generates no code for SSE). SmallVector Ops(NumElems); for (unsigned i = 0; i < NumElems; ++i) { if (!Op.getOperand(i).isUndef()) Ops[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i)); else Ops[i] = DAG.getUNDEF(VT); } // Next, we iteratively mix elements, e.g. for v4f32: // Step 1: unpcklps 0, 1 ==> X: // : unpcklps 2, 3 ==> Y: // Step 2: unpcklpd X, Y ==> <3, 2, 1, 0> for (unsigned Scale = 1; Scale < NumElems; Scale *= 2) { // Generate scaled UNPCKL shuffle mask. SmallVector Mask; for(unsigned i = 0; i != Scale; ++i) Mask.push_back(i); for (unsigned i = 0; i != Scale; ++i) Mask.push_back(NumElems+i); Mask.append(NumElems - Mask.size(), SM_SentinelUndef); for (unsigned i = 0, e = NumElems / (2 * Scale); i != e; ++i) Ops[i] = DAG.getVectorShuffle(VT, dl, Ops[2*i], Ops[(2*i)+1], Mask); } return Ops[0]; } // 256-bit AVX can use the vinsertf128 instruction // to create 256-bit vectors from two other 128-bit ones. // TODO: Detect subvector broadcast here instead of DAG combine? static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { SDLoc dl(Op); MVT ResVT = Op.getSimpleValueType(); assert((ResVT.is256BitVector() || ResVT.is512BitVector()) && "Value type must be 256-/512-bit wide"); unsigned NumOperands = Op.getNumOperands(); unsigned NumZero = 0; unsigned NumNonZero = 0; unsigned NonZeros = 0; for (unsigned i = 0; i != NumOperands; ++i) { SDValue SubVec = Op.getOperand(i); if (SubVec.isUndef()) continue; if (ISD::isBuildVectorAllZeros(SubVec.getNode())) ++NumZero; else { assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range. NonZeros |= 1 << i; ++NumNonZero; } } // If we have more than 2 non-zeros, build each half separately. if (NumNonZero > 2) { MVT HalfVT = MVT::getVectorVT(ResVT.getVectorElementType(), ResVT.getVectorNumElements()/2); ArrayRef Ops = Op->ops(); SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, Ops.slice(0, NumOperands/2)); SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, Ops.slice(NumOperands/2)); return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); } // Otherwise, build it up through insert_subvectors. SDValue Vec = NumZero ? getZeroVector(ResVT, Subtarget, DAG, dl) : DAG.getUNDEF(ResVT); MVT SubVT = Op.getOperand(0).getSimpleValueType(); unsigned NumSubElems = SubVT.getVectorNumElements(); for (unsigned i = 0; i != NumOperands; ++i) { if ((NonZeros & (1 << i)) == 0) continue; Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, Op.getOperand(i), DAG.getIntPtrConstant(i * NumSubElems, dl)); } return Vec; } // Return true if all the operands of the given CONCAT_VECTORS node are zeros // except for the first one. (CONCAT_VECTORS Op, 0, 0,...,0) static bool isExpandWithZeros(const SDValue &Op) { assert(Op.getOpcode() == ISD::CONCAT_VECTORS && "Expand with zeros only possible in CONCAT_VECTORS nodes!"); for (unsigned i = 1; i < Op.getNumOperands(); i++) if (!ISD::isBuildVectorAllZeros(Op.getOperand(i).getNode())) return false; return true; } // Returns true if the given node is a type promotion (by concatenating i1 // zeros) of the result of a node that already zeros all upper bits of // k-register. static SDValue isTypePromotionOfi1ZeroUpBits(SDValue Op) { unsigned Opc = Op.getOpcode(); assert(Opc == ISD::CONCAT_VECTORS && Op.getSimpleValueType().getVectorElementType() == MVT::i1 && "Unexpected node to check for type promotion!"); // As long as we are concatenating zeros to the upper part of a previous node // result, climb up the tree until a node with different opcode is // encountered while (Opc == ISD::INSERT_SUBVECTOR || Opc == ISD::CONCAT_VECTORS) { if (Opc == ISD::INSERT_SUBVECTOR) { if (ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode()) && Op.getConstantOperandVal(2) == 0) Op = Op.getOperand(1); else return SDValue(); } else { // Opc == ISD::CONCAT_VECTORS if (isExpandWithZeros(Op)) Op = Op.getOperand(0); else return SDValue(); } Opc = Op.getOpcode(); } // Check if the first inserted node zeroes the upper bits, or an 'and' result // of a node that zeros the upper bits (its masked version). if (isMaskedZeroUpperBitsvXi1(Op.getOpcode()) || (Op.getOpcode() == ISD::AND && (isMaskedZeroUpperBitsvXi1(Op.getOperand(0).getOpcode()) || isMaskedZeroUpperBitsvXi1(Op.getOperand(1).getOpcode())))) { return Op; } return SDValue(); } // TODO: Merge this with LowerAVXCONCAT_VECTORS? static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG & DAG) { SDLoc dl(Op); MVT ResVT = Op.getSimpleValueType(); unsigned NumOperands = Op.getNumOperands(); assert(NumOperands > 1 && isPowerOf2_32(NumOperands) && "Unexpected number of operands in CONCAT_VECTORS"); // If this node promotes - by concatenating zeroes - the type of the result // of a node with instruction that zeroes all upper (irrelevant) bits of the // output register, mark it as legal and catch the pattern in instruction // selection to avoid emitting extra instructions (for zeroing upper bits). if (SDValue Promoted = isTypePromotionOfi1ZeroUpBits(Op)) return widenSubVector(ResVT, Promoted, true, Subtarget, DAG, dl); unsigned NumZero = 0; unsigned NumNonZero = 0; uint64_t NonZeros = 0; for (unsigned i = 0; i != NumOperands; ++i) { SDValue SubVec = Op.getOperand(i); if (SubVec.isUndef()) continue; if (ISD::isBuildVectorAllZeros(SubVec.getNode())) ++NumZero; else { assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range. NonZeros |= (uint64_t)1 << i; ++NumNonZero; } } // If there are zero or one non-zeros we can handle this very simply. if (NumNonZero <= 1) { SDValue Vec = NumZero ? getZeroVector(ResVT, Subtarget, DAG, dl) : DAG.getUNDEF(ResVT); if (!NumNonZero) return Vec; unsigned Idx = countTrailingZeros(NonZeros); SDValue SubVec = Op.getOperand(Idx); unsigned SubVecNumElts = SubVec.getSimpleValueType().getVectorNumElements(); return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, SubVec, DAG.getIntPtrConstant(Idx * SubVecNumElts, dl)); } if (NumOperands > 2) { MVT HalfVT = MVT::getVectorVT(ResVT.getVectorElementType(), ResVT.getVectorNumElements()/2); ArrayRef Ops = Op->ops(); SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, Ops.slice(0, NumOperands/2)); SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, Ops.slice(NumOperands/2)); return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); } assert(NumNonZero == 2 && "Simple cases not handled?"); if (ResVT.getVectorNumElements() >= 16) return Op; // The operation is legal with KUNPCK SDValue Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, DAG.getUNDEF(ResVT), Op.getOperand(0), DAG.getIntPtrConstant(0, dl)); unsigned NumElems = ResVT.getVectorNumElements(); return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, Op.getOperand(1), DAG.getIntPtrConstant(NumElems/2, dl)); } static SDValue LowerCONCAT_VECTORS(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); if (VT.getVectorElementType() == MVT::i1) return LowerCONCAT_VECTORSvXi1(Op, Subtarget, DAG); assert((VT.is256BitVector() && Op.getNumOperands() == 2) || (VT.is512BitVector() && (Op.getNumOperands() == 2 || Op.getNumOperands() == 4))); // AVX can use the vinsertf128 instruction to create 256-bit vectors // from two other 128-bit ones. // 512-bit vector may contain 2 256-bit vectors or 4 128-bit vectors return LowerAVXCONCAT_VECTORS(Op, DAG, Subtarget); } //===----------------------------------------------------------------------===// // Vector shuffle lowering // // This is an experimental code path for lowering vector shuffles on x86. It is // designed to handle arbitrary vector shuffles and blends, gracefully // degrading performance as necessary. It works hard to recognize idiomatic // shuffles and lower them to optimal instruction patterns without leaving // a framework that allows reasonably efficient handling of all vector shuffle // patterns. //===----------------------------------------------------------------------===// /// Tiny helper function to identify a no-op mask. /// /// This is a somewhat boring predicate function. It checks whether the mask /// array input, which is assumed to be a single-input shuffle mask of the kind /// used by the X86 shuffle instructions (not a fully general /// ShuffleVectorSDNode mask) requires any shuffles to occur. Both undef and an /// in-place shuffle are 'no-op's. static bool isNoopShuffleMask(ArrayRef Mask) { for (int i = 0, Size = Mask.size(); i < Size; ++i) { assert(Mask[i] >= -1 && "Out of bound mask element!"); if (Mask[i] >= 0 && Mask[i] != i) return false; } return true; } /// Test whether there are elements crossing 128-bit lanes in this /// shuffle mask. /// /// X86 divides up its shuffles into in-lane and cross-lane shuffle operations /// and we routinely test for these. static bool is128BitLaneCrossingShuffleMask(MVT VT, ArrayRef Mask) { int LaneSize = 128 / VT.getScalarSizeInBits(); int Size = Mask.size(); for (int i = 0; i < Size; ++i) if (Mask[i] >= 0 && (Mask[i] % Size) / LaneSize != i / LaneSize) return true; return false; } /// Test whether a shuffle mask is equivalent within each sub-lane. /// /// This checks a shuffle mask to see if it is performing the same /// lane-relative shuffle in each sub-lane. This trivially implies /// that it is also not lane-crossing. It may however involve a blend from the /// same lane of a second vector. /// /// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is /// non-trivial to compute in the face of undef lanes. The representation is /// suitable for use with existing 128-bit shuffles as entries from the second /// vector have been remapped to [LaneSize, 2*LaneSize). static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef Mask, SmallVectorImpl &RepeatedMask) { auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits(); RepeatedMask.assign(LaneSize, -1); int Size = Mask.size(); for (int i = 0; i < Size; ++i) { assert(Mask[i] == SM_SentinelUndef || Mask[i] >= 0); if (Mask[i] < 0) continue; if ((Mask[i] % Size) / LaneSize != i / LaneSize) // This entry crosses lanes, so there is no way to model this shuffle. return false; // Ok, handle the in-lane shuffles by detecting if and when they repeat. // Adjust second vector indices to start at LaneSize instead of Size. int LocalM = Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize; if (RepeatedMask[i % LaneSize] < 0) // This is the first non-undef entry in this slot of a 128-bit lane. RepeatedMask[i % LaneSize] = LocalM; else if (RepeatedMask[i % LaneSize] != LocalM) // Found a mismatch with the repeated mask. return false; } return true; } /// Test whether a shuffle mask is equivalent within each 128-bit lane. static bool is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef Mask, SmallVectorImpl &RepeatedMask) { return isRepeatedShuffleMask(128, VT, Mask, RepeatedMask); } static bool is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef Mask) { SmallVector RepeatedMask; return isRepeatedShuffleMask(128, VT, Mask, RepeatedMask); } /// Test whether a shuffle mask is equivalent within each 256-bit lane. static bool is256BitLaneRepeatedShuffleMask(MVT VT, ArrayRef Mask, SmallVectorImpl &RepeatedMask) { return isRepeatedShuffleMask(256, VT, Mask, RepeatedMask); } /// Test whether a target shuffle mask is equivalent within each sub-lane. /// Unlike isRepeatedShuffleMask we must respect SM_SentinelZero. static bool isRepeatedTargetShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef Mask, SmallVectorImpl &RepeatedMask) { int LaneSize = LaneSizeInBits / VT.getScalarSizeInBits(); RepeatedMask.assign(LaneSize, SM_SentinelUndef); int Size = Mask.size(); for (int i = 0; i < Size; ++i) { assert(isUndefOrZero(Mask[i]) || (Mask[i] >= 0)); if (Mask[i] == SM_SentinelUndef) continue; if (Mask[i] == SM_SentinelZero) { if (!isUndefOrZero(RepeatedMask[i % LaneSize])) return false; RepeatedMask[i % LaneSize] = SM_SentinelZero; continue; } if ((Mask[i] % Size) / LaneSize != i / LaneSize) // This entry crosses lanes, so there is no way to model this shuffle. return false; // Ok, handle the in-lane shuffles by detecting if and when they repeat. // Adjust second vector indices to start at LaneSize instead of Size. int LocalM = Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize; if (RepeatedMask[i % LaneSize] == SM_SentinelUndef) // This is the first non-undef entry in this slot of a 128-bit lane. RepeatedMask[i % LaneSize] = LocalM; else if (RepeatedMask[i % LaneSize] != LocalM) // Found a mismatch with the repeated mask. return false; } return true; } /// Checks whether a shuffle mask is equivalent to an explicit list of /// arguments. /// /// This is a fast way to test a shuffle mask against a fixed pattern: /// /// if (isShuffleEquivalent(Mask, 3, 2, {1, 0})) { ... } /// /// It returns true if the mask is exactly as wide as the argument list, and /// each element of the mask is either -1 (signifying undef) or the value given /// in the argument. static bool isShuffleEquivalent(SDValue V1, SDValue V2, ArrayRef Mask, ArrayRef ExpectedMask) { if (Mask.size() != ExpectedMask.size()) return false; int Size = Mask.size(); // If the values are build vectors, we can look through them to find // equivalent inputs that make the shuffles equivalent. auto *BV1 = dyn_cast(V1); auto *BV2 = dyn_cast(V2); for (int i = 0; i < Size; ++i) { assert(Mask[i] >= -1 && "Out of bound mask element!"); if (Mask[i] >= 0 && Mask[i] != ExpectedMask[i]) { auto *MaskBV = Mask[i] < Size ? BV1 : BV2; auto *ExpectedBV = ExpectedMask[i] < Size ? BV1 : BV2; if (!MaskBV || !ExpectedBV || MaskBV->getOperand(Mask[i] % Size) != ExpectedBV->getOperand(ExpectedMask[i] % Size)) return false; } } return true; } /// Checks whether a target shuffle mask is equivalent to an explicit pattern. /// /// The masks must be exactly the same width. /// /// If an element in Mask matches SM_SentinelUndef (-1) then the corresponding /// value in ExpectedMask is always accepted. Otherwise the indices must match. /// /// SM_SentinelZero is accepted as a valid negative index but must match in both. static bool isTargetShuffleEquivalent(ArrayRef Mask, ArrayRef ExpectedMask) { int Size = Mask.size(); if (Size != (int)ExpectedMask.size()) return false; for (int i = 0; i < Size; ++i) if (Mask[i] == SM_SentinelUndef) continue; else if (Mask[i] < 0 && Mask[i] != SM_SentinelZero) return false; else if (Mask[i] != ExpectedMask[i]) return false; return true; } // Merges a general DAG shuffle mask and zeroable bit mask into a target shuffle // mask. static SmallVector createTargetShuffleMask(ArrayRef Mask, const APInt &Zeroable) { int NumElts = Mask.size(); assert(NumElts == (int)Zeroable.getBitWidth() && "Mismatch mask sizes"); SmallVector TargetMask(NumElts, SM_SentinelUndef); for (int i = 0; i != NumElts; ++i) { int M = Mask[i]; if (M == SM_SentinelUndef) continue; assert(0 <= M && M < (2 * NumElts) && "Out of range shuffle index"); TargetMask[i] = (Zeroable[i] ? SM_SentinelZero : M); } return TargetMask; } // Attempt to create a shuffle mask from a VSELECT condition mask. static bool createShuffleMaskFromVSELECT(SmallVectorImpl &Mask, SDValue Cond) { if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) return false; unsigned Size = Cond.getValueType().getVectorNumElements(); Mask.resize(Size, SM_SentinelUndef); for (int i = 0; i != (int)Size; ++i) { SDValue CondElt = Cond.getOperand(i); Mask[i] = i; // Arbitrarily choose from the 2nd operand if the select condition element // is undef. // TODO: Can we do better by matching patterns such as even/odd? if (CondElt.isUndef() || isNullConstant(CondElt)) Mask[i] += Size; } return true; } // Check if the shuffle mask is suitable for the AVX vpunpcklwd or vpunpckhwd // instructions. static bool isUnpackWdShuffleMask(ArrayRef Mask, MVT VT) { if (VT != MVT::v8i32 && VT != MVT::v8f32) return false; SmallVector Unpcklwd; createUnpackShuffleMask(MVT::v8i16, Unpcklwd, /* Lo = */ true, /* Unary = */ false); SmallVector Unpckhwd; createUnpackShuffleMask(MVT::v8i16, Unpckhwd, /* Lo = */ false, /* Unary = */ false); bool IsUnpackwdMask = (isTargetShuffleEquivalent(Mask, Unpcklwd) || isTargetShuffleEquivalent(Mask, Unpckhwd)); return IsUnpackwdMask; } /// Get a 4-lane 8-bit shuffle immediate for a mask. /// /// This helper function produces an 8-bit shuffle immediate corresponding to /// the ubiquitous shuffle encoding scheme used in x86 instructions for /// shuffling 4 lanes. It can be used with most of the PSHUF instructions for /// example. /// /// NB: We rely heavily on "undef" masks preserving the input lane. static unsigned getV4X86ShuffleImm(ArrayRef Mask) { assert(Mask.size() == 4 && "Only 4-lane shuffle masks"); assert(Mask[0] >= -1 && Mask[0] < 4 && "Out of bound mask element!"); assert(Mask[1] >= -1 && Mask[1] < 4 && "Out of bound mask element!"); assert(Mask[2] >= -1 && Mask[2] < 4 && "Out of bound mask element!"); assert(Mask[3] >= -1 && Mask[3] < 4 && "Out of bound mask element!"); unsigned Imm = 0; Imm |= (Mask[0] < 0 ? 0 : Mask[0]) << 0; Imm |= (Mask[1] < 0 ? 1 : Mask[1]) << 2; Imm |= (Mask[2] < 0 ? 2 : Mask[2]) << 4; Imm |= (Mask[3] < 0 ? 3 : Mask[3]) << 6; return Imm; } static SDValue getV4X86ShuffleImm8ForMask(ArrayRef Mask, const SDLoc &DL, SelectionDAG &DAG) { return DAG.getConstant(getV4X86ShuffleImm(Mask), DL, MVT::i8); } /// Compute whether each element of a shuffle is zeroable. /// /// A "zeroable" vector shuffle element is one which can be lowered to zero. /// Either it is an undef element in the shuffle mask, the element of the input /// referenced is undef, or the element of the input referenced is known to be /// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle /// as many lanes with this technique as possible to simplify the remaining /// shuffle. static APInt computeZeroableShuffleElements(ArrayRef Mask, SDValue V1, SDValue V2) { APInt Zeroable(Mask.size(), 0); V1 = peekThroughBitcasts(V1); V2 = peekThroughBitcasts(V2); bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode()); bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode()); int VectorSizeInBits = V1.getValueSizeInBits(); int ScalarSizeInBits = VectorSizeInBits / Mask.size(); assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size"); for (int i = 0, Size = Mask.size(); i < Size; ++i) { int M = Mask[i]; // Handle the easy cases. if (M < 0 || (M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) { Zeroable.setBit(i); continue; } // Determine shuffle input and normalize the mask. SDValue V = M < Size ? V1 : V2; M %= Size; // Currently we can only search BUILD_VECTOR for UNDEF/ZERO elements. if (V.getOpcode() != ISD::BUILD_VECTOR) continue; // If the BUILD_VECTOR has fewer elements then the bitcasted portion of // the (larger) source element must be UNDEF/ZERO. if ((Size % V.getNumOperands()) == 0) { int Scale = Size / V->getNumOperands(); SDValue Op = V.getOperand(M / Scale); if (Op.isUndef() || X86::isZeroNode(Op)) Zeroable.setBit(i); else if (ConstantSDNode *Cst = dyn_cast(Op)) { APInt Val = Cst->getAPIntValue(); Val.lshrInPlace((M % Scale) * ScalarSizeInBits); Val = Val.getLoBits(ScalarSizeInBits); if (Val == 0) Zeroable.setBit(i); } else if (ConstantFPSDNode *Cst = dyn_cast(Op)) { APInt Val = Cst->getValueAPF().bitcastToAPInt(); Val.lshrInPlace((M % Scale) * ScalarSizeInBits); Val = Val.getLoBits(ScalarSizeInBits); if (Val == 0) Zeroable.setBit(i); } continue; } // If the BUILD_VECTOR has more elements then all the (smaller) source // elements must be UNDEF or ZERO. if ((V.getNumOperands() % Size) == 0) { int Scale = V->getNumOperands() / Size; bool AllZeroable = true; for (int j = 0; j < Scale; ++j) { SDValue Op = V.getOperand((M * Scale) + j); AllZeroable &= (Op.isUndef() || X86::isZeroNode(Op)); } if (AllZeroable) Zeroable.setBit(i); continue; } } return Zeroable; } // The Shuffle result is as follow: // 0*a[0]0*a[1]...0*a[n] , n >=0 where a[] elements in a ascending order. // Each Zeroable's element correspond to a particular Mask's element. // As described in computeZeroableShuffleElements function. // // The function looks for a sub-mask that the nonzero elements are in // increasing order. If such sub-mask exist. The function returns true. static bool isNonZeroElementsInOrder(const APInt &Zeroable, ArrayRef Mask, const EVT &VectorType, bool &IsZeroSideLeft) { int NextElement = -1; // Check if the Mask's nonzero elements are in increasing order. for (int i = 0, e = Mask.size(); i < e; i++) { // Checks if the mask's zeros elements are built from only zeros. assert(Mask[i] >= -1 && "Out of bound mask element!"); if (Mask[i] < 0) return false; if (Zeroable[i]) continue; // Find the lowest non zero element if (NextElement < 0) { NextElement = Mask[i] != 0 ? VectorType.getVectorNumElements() : 0; IsZeroSideLeft = NextElement != 0; } // Exit if the mask's non zero elements are not in increasing order. if (NextElement != Mask[i]) return false; NextElement++; } return true; } /// Try to lower a shuffle with a single PSHUFB of V1 or V2. static SDValue lowerVectorShuffleWithPSHUFB(const SDLoc &DL, MVT VT, ArrayRef Mask, SDValue V1, SDValue V2, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { int Size = Mask.size(); int LaneSize = 128 / VT.getScalarSizeInBits(); const int NumBytes = VT.getSizeInBits() / 8; const int NumEltBytes = VT.getScalarSizeInBits() / 8; assert((Subtarget.hasSSSE3() && VT.is128BitVector()) || (Subtarget.hasAVX2() && VT.is256BitVector()) || (Subtarget.hasBWI() && VT.is512BitVector())); SmallVector PSHUFBMask(NumBytes); // Sign bit set in i8 mask means zero element. SDValue ZeroMask = DAG.getConstant(0x80, DL, MVT::i8); SDValue V; for (int i = 0; i < NumBytes; ++i) { int M = Mask[i / NumEltBytes]; if (M < 0) { PSHUFBMask[i] = DAG.getUNDEF(MVT::i8); continue; } if (Zeroable[i / NumEltBytes]) { PSHUFBMask[i] = ZeroMask; continue; } // We can only use a single input of V1 or V2. SDValue SrcV = (M >= Size ? V2 : V1); if (V && V != SrcV) return SDValue(); V = SrcV; M %= Size; // PSHUFB can't cross lanes, ensure this doesn't happen. if ((M / LaneSize) != ((i / NumEltBytes) / LaneSize)) return SDValue(); M = M % LaneSize; M = M * NumEltBytes + (i % NumEltBytes); PSHUFBMask[i] = DAG.getConstant(M, DL, MVT::i8); } assert(V && "Failed to find a source input"); MVT I8VT = MVT::getVectorVT(MVT::i8, NumBytes); return DAG.getBitcast( VT, DAG.getNode(X86ISD::PSHUFB, DL, I8VT, DAG.getBitcast(I8VT, V), DAG.getBuildVector(I8VT, DL, PSHUFBMask))); } static SDValue getMaskNode(SDValue Mask, MVT MaskVT, const X86Subtarget &Subtarget, SelectionDAG &DAG, const SDLoc &dl); // X86 has dedicated shuffle that can be lowered to VEXPAND static SDValue lowerVectorShuffleToEXPAND(const SDLoc &DL, MVT VT, const APInt &Zeroable, ArrayRef Mask, SDValue &V1, SDValue &V2, SelectionDAG &DAG, const X86Subtarget &Subtarget) { bool IsLeftZeroSide = true; if (!isNonZeroElementsInOrder(Zeroable, Mask, V1.getValueType(), IsLeftZeroSide)) return SDValue(); unsigned VEXPANDMask = (~Zeroable).getZExtValue(); MVT IntegerType = MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8)); SDValue MaskNode = DAG.getConstant(VEXPANDMask, DL, IntegerType); unsigned NumElts = VT.getVectorNumElements(); assert((NumElts == 4 || NumElts == 8 || NumElts == 16) && "Unexpected number of vector elements"); SDValue VMask = getMaskNode(MaskNode, MVT::getVectorVT(MVT::i1, NumElts), Subtarget, DAG, DL); SDValue ZeroVector = getZeroVector(VT, Subtarget, DAG, DL); SDValue ExpandedVector = IsLeftZeroSide ? V2 : V1; return DAG.getSelect(DL, VT, VMask, DAG.getNode(X86ISD::EXPAND, DL, VT, ExpandedVector), ZeroVector); } static bool matchVectorShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2, unsigned &UnpackOpcode, bool IsUnary, ArrayRef TargetMask, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget) { int NumElts = VT.getVectorNumElements(); bool Undef1 = true, Undef2 = true, Zero1 = true, Zero2 = true; for (int i = 0; i != NumElts; i += 2) { int M1 = TargetMask[i + 0]; int M2 = TargetMask[i + 1]; Undef1 &= (SM_SentinelUndef == M1); Undef2 &= (SM_SentinelUndef == M2); Zero1 &= isUndefOrZero(M1); Zero2 &= isUndefOrZero(M2); } assert(!((Undef1 || Zero1) && (Undef2 || Zero2)) && "Zeroable shuffle detected"); // Attempt to match the target mask against the unpack lo/hi mask patterns. SmallVector Unpckl, Unpckh; createUnpackShuffleMask(VT, Unpckl, /* Lo = */ true, IsUnary); if (isTargetShuffleEquivalent(TargetMask, Unpckl)) { UnpackOpcode = X86ISD::UNPCKL; V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2)); V1 = (Undef1 ? DAG.getUNDEF(VT) : V1); return true; } createUnpackShuffleMask(VT, Unpckh, /* Lo = */ false, IsUnary); if (isTargetShuffleEquivalent(TargetMask, Unpckh)) { UnpackOpcode = X86ISD::UNPCKH; V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2)); V1 = (Undef1 ? DAG.getUNDEF(VT) : V1); return true; } // If an unary shuffle, attempt to match as an unpack lo/hi with zero. if (IsUnary && (Zero1 || Zero2)) { // Don't bother if we can blend instead. if ((Subtarget.hasSSE41() || VT == MVT::v2i64 || VT == MVT::v2f64) && isSequentialOrUndefOrZeroInRange(TargetMask, 0, NumElts, 0)) return false; bool MatchLo = true, MatchHi = true; for (int i = 0; (i != NumElts) && (MatchLo || MatchHi); ++i) { int M = TargetMask[i]; // Ignore if the input is known to be zero or the index is undef. if ((((i & 1) == 0) && Zero1) || (((i & 1) == 1) && Zero2) || (M == SM_SentinelUndef)) continue; MatchLo &= (M == Unpckl[i]); MatchHi &= (M == Unpckh[i]); } if (MatchLo || MatchHi) { UnpackOpcode = MatchLo ? X86ISD::UNPCKL : X86ISD::UNPCKH; V2 = Zero2 ? getZeroVector(VT, Subtarget, DAG, DL) : V1; V1 = Zero1 ? getZeroVector(VT, Subtarget, DAG, DL) : V1; return true; } } // If a binary shuffle, commute and try again. if (!IsUnary) { ShuffleVectorSDNode::commuteMask(Unpckl); if (isTargetShuffleEquivalent(TargetMask, Unpckl)) { UnpackOpcode = X86ISD::UNPCKL; std::swap(V1, V2); return true; } ShuffleVectorSDNode::commuteMask(Unpckh); if (isTargetShuffleEquivalent(TargetMask, Unpckh)) { UnpackOpcode = X86ISD::UNPCKH; std::swap(V1, V2); return true; } } return false; } // X86 has dedicated unpack instructions that can handle specific blend // operations: UNPCKH and UNPCKL. static SDValue lowerVectorShuffleWithUNPCK(const SDLoc &DL, MVT VT, ArrayRef Mask, SDValue V1, SDValue V2, SelectionDAG &DAG) { SmallVector Unpckl; createUnpackShuffleMask(VT, Unpckl, /* Lo = */ true, /* Unary = */ false); if (isShuffleEquivalent(V1, V2, Mask, Unpckl)) return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2); SmallVector Unpckh; createUnpackShuffleMask(VT, Unpckh, /* Lo = */ false, /* Unary = */ false); if (isShuffleEquivalent(V1, V2, Mask, Unpckh)) return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2); // Commute and try again. ShuffleVectorSDNode::commuteMask(Unpckl); if (isShuffleEquivalent(V1, V2, Mask, Unpckl)) return DAG.getNode(X86ISD::UNPCKL, DL, VT, V2, V1); ShuffleVectorSDNode::commuteMask(Unpckh); if (isShuffleEquivalent(V1, V2, Mask, Unpckh)) return DAG.getNode(X86ISD::UNPCKH, DL, VT, V2, V1); return SDValue(); } static bool matchVectorShuffleAsVPMOV(ArrayRef Mask, bool SwappedOps, int Delta) { int Size = (int)Mask.size(); int Split = Size / Delta; int TruncatedVectorStart = SwappedOps ? Size : 0; // Match for mask starting with e.g.: <8, 10, 12, 14,... or <0, 2, 4, 6,... if (!isSequentialOrUndefInRange(Mask, 0, Split, TruncatedVectorStart, Delta)) return false; // The rest of the mask should not refer to the truncated vector's elements. if (isAnyInRange(Mask.slice(Split, Size - Split), TruncatedVectorStart, TruncatedVectorStart + Size)) return false; return true; } // Try to lower trunc+vector_shuffle to a vpmovdb or a vpmovdw instruction. // // An example is the following: // // t0: ch = EntryToken // t2: v4i64,ch = CopyFromReg t0, Register:v4i64 %0 // t25: v4i32 = truncate t2 // t41: v8i16 = bitcast t25 // t21: v8i16 = BUILD_VECTOR undef:i16, undef:i16, undef:i16, undef:i16, // Constant:i16<0>, Constant:i16<0>, Constant:i16<0>, Constant:i16<0> // t51: v8i16 = vector_shuffle<0,2,4,6,12,13,14,15> t41, t21 // t18: v2i64 = bitcast t51 // // Without avx512vl, this is lowered to: // // vpmovqd %zmm0, %ymm0 // vpshufb {{.*#+}} xmm0 = // xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero // // But when avx512vl is available, one can just use a single vpmovdw // instruction. static SDValue lowerVectorShuffleWithVPMOV(const SDLoc &DL, ArrayRef Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const X86Subtarget &Subtarget) { if (VT != MVT::v16i8 && VT != MVT::v8i16) return SDValue(); if (Mask.size() != VT.getVectorNumElements()) return SDValue(); bool SwappedOps = false; if (!ISD::isBuildVectorAllZeros(V2.getNode())) { if (!ISD::isBuildVectorAllZeros(V1.getNode())) return SDValue(); std::swap(V1, V2); SwappedOps = true; } // Look for: // // bitcast (truncate <8 x i32> %vec to <8 x i16>) to <16 x i8> // bitcast (truncate <4 x i64> %vec to <4 x i32>) to <8 x i16> // // and similar ones. if (V1.getOpcode() != ISD::BITCAST) return SDValue(); if (V1.getOperand(0).getOpcode() != ISD::TRUNCATE) return SDValue(); SDValue Src = V1.getOperand(0).getOperand(0); MVT SrcVT = Src.getSimpleValueType(); // The vptrunc** instructions truncating 128 bit and 256 bit vectors // are only available with avx512vl. if (!SrcVT.is512BitVector() && !Subtarget.hasVLX()) return SDValue(); // Down Convert Word to Byte is only available with avx512bw. The case with // 256-bit output doesn't contain a shuffle and is therefore not handled here. if (SrcVT.getVectorElementType() == MVT::i16 && VT == MVT::v16i8 && !Subtarget.hasBWI()) return SDValue(); // The first half/quarter of the mask should refer to every second/fourth // element of the vector truncated and bitcasted. if (!matchVectorShuffleAsVPMOV(Mask, SwappedOps, 2) && !matchVectorShuffleAsVPMOV(Mask, SwappedOps, 4)) return SDValue(); return DAG.getNode(X86ISD::VTRUNC, DL, VT, Src); } // X86 has dedicated pack instructions that can handle specific truncation // operations: PACKSS and PACKUS. static bool matchVectorShuffleWithPACK(MVT VT, MVT &SrcVT, SDValue &V1, SDValue &V2, unsigned &PackOpcode, ArrayRef TargetMask, SelectionDAG &DAG, const X86Subtarget &Subtarget) { unsigned NumElts = VT.getVectorNumElements(); unsigned BitSize = VT.getScalarSizeInBits(); MVT PackSVT = MVT::getIntegerVT(BitSize * 2); MVT PackVT = MVT::getVectorVT(PackSVT, NumElts / 2); auto MatchPACK = [&](SDValue N1, SDValue N2) { SDValue VV1 = DAG.getBitcast(PackVT, N1); SDValue VV2 = DAG.getBitcast(PackVT, N2); if (Subtarget.hasSSE41() || PackSVT == MVT::i16) { APInt ZeroMask = APInt::getHighBitsSet(BitSize * 2, BitSize); if ((N1.isUndef() || DAG.MaskedValueIsZero(VV1, ZeroMask)) && (N2.isUndef() || DAG.MaskedValueIsZero(VV2, ZeroMask))) { V1 = VV1; V2 = VV2; SrcVT = PackVT; PackOpcode = X86ISD::PACKUS; return true; } } if ((N1.isUndef() || DAG.ComputeNumSignBits(VV1) > BitSize) && (N2.isUndef() || DAG.ComputeNumSignBits(VV2) > BitSize)) { V1 = VV1; V2 = VV2; SrcVT = PackVT; PackOpcode = X86ISD::PACKSS; return true; } return false; }; // Try binary shuffle. SmallVector BinaryMask; createPackShuffleMask(VT, BinaryMask, false); if (isTargetShuffleEquivalent(TargetMask, BinaryMask)) if (MatchPACK(V1, V2)) return true; // Try unary shuffle. SmallVector UnaryMask; createPackShuffleMask(VT, UnaryMask, true); if (isTargetShuffleEquivalent(TargetMask, UnaryMask)) if (MatchPACK(V1, V1)) return true; return false; } static SDValue lowerVectorShuffleWithPACK(const SDLoc &DL, MVT VT, ArrayRef Mask, SDValue V1, SDValue V2, SelectionDAG &DAG, const X86Subtarget &Subtarget) { MVT PackVT; unsigned PackOpcode; if (matchVectorShuffleWithPACK(VT, PackVT, V1, V2, PackOpcode, Mask, DAG, Subtarget)) return DAG.getNode(PackOpcode, DL, VT, DAG.getBitcast(PackVT, V1), DAG.getBitcast(PackVT, V2)); return SDValue(); } /// Try to emit a bitmask instruction for a shuffle. /// /// This handles cases where we can model a blend exactly as a bitmask due to /// one of the inputs being zeroable. static SDValue lowerVectorShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, const APInt &Zeroable, SelectionDAG &DAG) { assert(!VT.isFloatingPoint() && "Floating point types are not supported"); MVT EltVT = VT.getVectorElementType(); SDValue Zero = DAG.getConstant(0, DL, EltVT); SDValue AllOnes = DAG.getAllOnesConstant(DL, EltVT); SmallVector VMaskOps(Mask.size(), Zero); SDValue V; for (int i = 0, Size = Mask.size(); i < Size; ++i) { if (Zeroable[i]) continue; if (Mask[i] % Size != i) return SDValue(); // Not a blend. if (!V) V = Mask[i] < Size ? V1 : V2; else if (V != (Mask[i] < Size ? V1 : V2)) return SDValue(); // Can only let one input through the mask. VMaskOps[i] = AllOnes; } if (!V) return SDValue(); // No non-zeroable elements! SDValue VMask = DAG.getBuildVector(VT, DL, VMaskOps); return DAG.getNode(ISD::AND, DL, VT, V, VMask); } /// Try to emit a blend instruction for a shuffle using bit math. /// /// This is used as a fallback approach when first class blend instructions are /// unavailable. Currently it is only suitable for integer vectors, but could /// be generalized for floating point vectors if desirable. static SDValue lowerVectorShuffleAsBitBlend(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, SelectionDAG &DAG) { assert(VT.isInteger() && "Only supports integer vector types!"); MVT EltVT = VT.getVectorElementType(); SDValue Zero = DAG.getConstant(0, DL, EltVT); SDValue AllOnes = DAG.getAllOnesConstant(DL, EltVT); SmallVector MaskOps; for (int i = 0, Size = Mask.size(); i < Size; ++i) { if (Mask[i] >= 0 && Mask[i] != i && Mask[i] != i + Size) return SDValue(); // Shuffled input! MaskOps.push_back(Mask[i] < Size ? AllOnes : Zero); } SDValue V1Mask = DAG.getBuildVector(VT, DL, MaskOps); V1 = DAG.getNode(ISD::AND, DL, VT, V1, V1Mask); V2 = DAG.getNode(X86ISD::ANDNP, DL, VT, V1Mask, V2); return DAG.getNode(ISD::OR, DL, VT, V1, V2); } static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, SDValue PreservedSrc, const X86Subtarget &Subtarget, SelectionDAG &DAG); static bool matchVectorShuffleAsBlend(SDValue V1, SDValue V2, MutableArrayRef TargetMask, bool &ForceV1Zero, bool &ForceV2Zero, uint64_t &BlendMask) { bool V1IsZeroOrUndef = V1.isUndef() || ISD::isBuildVectorAllZeros(V1.getNode()); bool V2IsZeroOrUndef = V2.isUndef() || ISD::isBuildVectorAllZeros(V2.getNode()); BlendMask = 0; ForceV1Zero = false, ForceV2Zero = false; assert(TargetMask.size() <= 64 && "Shuffle mask too big for blend mask"); // Attempt to generate the binary blend mask. If an input is zero then // we can use any lane. // TODO: generalize the zero matching to any scalar like isShuffleEquivalent. for (int i = 0, Size = TargetMask.size(); i < Size; ++i) { int M = TargetMask[i]; if (M == SM_SentinelUndef) continue; if (M == i) continue; if (M == i + Size) { BlendMask |= 1ull << i; continue; } if (M == SM_SentinelZero) { if (V1IsZeroOrUndef) { ForceV1Zero = true; TargetMask[i] = i; continue; } if (V2IsZeroOrUndef) { ForceV2Zero = true; BlendMask |= 1ull << i; TargetMask[i] = i + Size; continue; } } return false; } return true; } static uint64_t scaleVectorShuffleBlendMask(uint64_t BlendMask, int Size, int Scale) { uint64_t ScaledMask = 0; for (int i = 0; i != Size; ++i) if (BlendMask & (1ull << i)) ScaledMask |= ((1ull << Scale) - 1) << (i * Scale); return ScaledMask; } /// Try to emit a blend instruction for a shuffle. /// /// This doesn't do any checks for the availability of instructions for blending /// these values. It relies on the availability of the X86ISD::BLENDI pattern to /// be matched in the backend with the type given. What it does check for is /// that the shuffle mask is a blend, or convertible into a blend with zero. static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Original, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { SmallVector Mask = createTargetShuffleMask(Original, Zeroable); uint64_t BlendMask = 0; bool ForceV1Zero = false, ForceV2Zero = false; if (!matchVectorShuffleAsBlend(V1, V2, Mask, ForceV1Zero, ForceV2Zero, BlendMask)) return SDValue(); // Create a REAL zero vector - ISD::isBuildVectorAllZeros allows UNDEFs. if (ForceV1Zero) V1 = getZeroVector(VT, Subtarget, DAG, DL); if (ForceV2Zero) V2 = getZeroVector(VT, Subtarget, DAG, DL); switch (VT.SimpleTy) { case MVT::v2f64: case MVT::v4f32: case MVT::v4f64: case MVT::v8f32: return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2, DAG.getConstant(BlendMask, DL, MVT::i8)); case MVT::v4i64: case MVT::v8i32: assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!"); LLVM_FALLTHROUGH; case MVT::v2i64: case MVT::v4i32: // If we have AVX2 it is faster to use VPBLENDD when the shuffle fits into // that instruction. if (Subtarget.hasAVX2()) { // Scale the blend by the number of 32-bit dwords per element. int Scale = VT.getScalarSizeInBits() / 32; BlendMask = scaleVectorShuffleBlendMask(BlendMask, Mask.size(), Scale); MVT BlendVT = VT.getSizeInBits() > 128 ? MVT::v8i32 : MVT::v4i32; V1 = DAG.getBitcast(BlendVT, V1); V2 = DAG.getBitcast(BlendVT, V2); return DAG.getBitcast( VT, DAG.getNode(X86ISD::BLENDI, DL, BlendVT, V1, V2, DAG.getConstant(BlendMask, DL, MVT::i8))); } LLVM_FALLTHROUGH; case MVT::v8i16: { // For integer shuffles we need to expand the mask and cast the inputs to // v8i16s prior to blending. int Scale = 8 / VT.getVectorNumElements(); BlendMask = scaleVectorShuffleBlendMask(BlendMask, Mask.size(), Scale); V1 = DAG.getBitcast(MVT::v8i16, V1); V2 = DAG.getBitcast(MVT::v8i16, V2); return DAG.getBitcast(VT, DAG.getNode(X86ISD::BLENDI, DL, MVT::v8i16, V1, V2, DAG.getConstant(BlendMask, DL, MVT::i8))); } case MVT::v16i16: { assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!"); SmallVector RepeatedMask; if (is128BitLaneRepeatedShuffleMask(MVT::v16i16, Mask, RepeatedMask)) { // We can lower these with PBLENDW which is mirrored across 128-bit lanes. assert(RepeatedMask.size() == 8 && "Repeated mask size doesn't match!"); BlendMask = 0; for (int i = 0; i < 8; ++i) if (RepeatedMask[i] >= 8) BlendMask |= 1ull << i; return DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2, DAG.getConstant(BlendMask, DL, MVT::i8)); } // Use PBLENDW for lower/upper lanes and then blend lanes. // TODO - we should allow 2 PBLENDW here and leave shuffle combine to // merge to VSELECT where useful. uint64_t LoMask = BlendMask & 0xFF; uint64_t HiMask = (BlendMask >> 8) & 0xFF; if (LoMask == 0 || LoMask == 255 || HiMask == 0 || HiMask == 255) { SDValue Lo = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2, DAG.getConstant(LoMask, DL, MVT::i8)); SDValue Hi = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2, DAG.getConstant(HiMask, DL, MVT::i8)); return DAG.getVectorShuffle( MVT::v16i16, DL, Lo, Hi, {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}); } LLVM_FALLTHROUGH; } case MVT::v16i8: case MVT::v32i8: { assert((VT.is128BitVector() || Subtarget.hasAVX2()) && "256-bit byte-blends require AVX2 support!"); // Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB. if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, DAG)) return Masked; if (Subtarget.hasBWI() && Subtarget.hasVLX()) { MVT IntegerType = MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8)); SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType); return getVectorMaskingNode(V2, MaskNode, V1, Subtarget, DAG); } // Scale the blend by the number of bytes per element. int Scale = VT.getScalarSizeInBits() / 8; // This form of blend is always done on bytes. Compute the byte vector // type. MVT BlendVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8); // x86 allows load folding with blendvb from the 2nd source operand. But // we are still using LLVM select here (see comment below), so that's V1. // If V2 can be load-folded and V1 cannot be load-folded, then commute to // allow that load-folding possibility. if (!ISD::isNormalLoad(V1.getNode()) && ISD::isNormalLoad(V2.getNode())) { ShuffleVectorSDNode::commuteMask(Mask); std::swap(V1, V2); } // Compute the VSELECT mask. Note that VSELECT is really confusing in the // mix of LLVM's code generator and the x86 backend. We tell the code // generator that boolean values in the elements of an x86 vector register // are -1 for true and 0 for false. We then use the LLVM semantics of 'true' // mapping a select to operand #1, and 'false' mapping to operand #2. The // reality in x86 is that vector masks (pre-AVX-512) use only the high bit // of the element (the remaining are ignored) and 0 in that high bit would // mean operand #1 while 1 in the high bit would mean operand #2. So while // the LLVM model for boolean values in vector elements gets the relevant // bit set, it is set backwards and over constrained relative to x86's // actual model. SmallVector VSELECTMask; for (int i = 0, Size = Mask.size(); i < Size; ++i) for (int j = 0; j < Scale; ++j) VSELECTMask.push_back( Mask[i] < 0 ? DAG.getUNDEF(MVT::i8) : DAG.getConstant(Mask[i] < Size ? -1 : 0, DL, MVT::i8)); V1 = DAG.getBitcast(BlendVT, V1); V2 = DAG.getBitcast(BlendVT, V2); return DAG.getBitcast( VT, DAG.getSelect(DL, BlendVT, DAG.getBuildVector(BlendVT, DL, VSELECTMask), V1, V2)); } case MVT::v16f32: case MVT::v8f64: case MVT::v8i64: case MVT::v16i32: case MVT::v32i16: case MVT::v64i8: { MVT IntegerType = MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8)); SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType); return getVectorMaskingNode(V2, MaskNode, V1, Subtarget, DAG); } default: llvm_unreachable("Not a supported integer vector type!"); } } /// Try to lower as a blend of elements from two inputs followed by /// a single-input permutation. /// /// This matches the pattern where we can blend elements from two inputs and /// then reduce the shuffle to a single-input permutation. static SDValue lowerVectorShuffleAsBlendAndPermute(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, SelectionDAG &DAG, bool ImmBlends = false) { // We build up the blend mask while checking whether a blend is a viable way // to reduce the shuffle. SmallVector BlendMask(Mask.size(), -1); SmallVector PermuteMask(Mask.size(), -1); for (int i = 0, Size = Mask.size(); i < Size; ++i) { if (Mask[i] < 0) continue; assert(Mask[i] < Size * 2 && "Shuffle input is out of bounds."); if (BlendMask[Mask[i] % Size] < 0) BlendMask[Mask[i] % Size] = Mask[i]; else if (BlendMask[Mask[i] % Size] != Mask[i]) return SDValue(); // Can't blend in the needed input! PermuteMask[i] = Mask[i] % Size; } // If only immediate blends, then bail if the blend mask can't be widened to // i16. unsigned EltSize = VT.getScalarSizeInBits(); if (ImmBlends && EltSize == 8 && !canWidenShuffleElements(BlendMask)) return SDValue(); SDValue V = DAG.getVectorShuffle(VT, DL, V1, V2, BlendMask); return DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), PermuteMask); } /// Try to lower as an unpack of elements from two inputs followed by /// a single-input permutation. /// /// This matches the pattern where we can unpack elements from two inputs and /// then reduce the shuffle to a single-input (wider) permutation. static SDValue lowerVectorShuffleAsUNPCKAndPermute(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, SelectionDAG &DAG) { int NumElts = Mask.size(); int NumLanes = VT.getSizeInBits() / 128; int NumLaneElts = NumElts / NumLanes; int NumHalfLaneElts = NumLaneElts / 2; bool MatchLo = true, MatchHi = true; SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)}; // Determine UNPCKL/UNPCKH type and operand order. for (int Lane = 0; Lane != NumElts; Lane += NumLaneElts) { for (int Elt = 0; Elt != NumLaneElts; ++Elt) { int M = Mask[Lane + Elt]; if (M < 0) continue; SDValue &Op = Ops[Elt & 1]; if (M < NumElts && (Op.isUndef() || Op == V1)) Op = V1; else if (NumElts <= M && (Op.isUndef() || Op == V2)) Op = V2; else return SDValue(); int Lo = Lane, Mid = Lane + NumHalfLaneElts, Hi = Lane + NumLaneElts; MatchLo &= isUndefOrInRange(M, Lo, Mid) || isUndefOrInRange(M, NumElts + Lo, NumElts + Mid); MatchHi &= isUndefOrInRange(M, Mid, Hi) || isUndefOrInRange(M, NumElts + Mid, NumElts + Hi); if (!MatchLo && !MatchHi) return SDValue(); } } assert((MatchLo ^ MatchHi) && "Failed to match UNPCKLO/UNPCKHI"); // Now check that each pair of elts come from the same unpack pair // and set the permute mask based on each pair. // TODO - Investigate cases where we permute individual elements. SmallVector PermuteMask(NumElts, -1); for (int Lane = 0; Lane != NumElts; Lane += NumLaneElts) { for (int Elt = 0; Elt != NumLaneElts; Elt += 2) { int M0 = Mask[Lane + Elt + 0]; int M1 = Mask[Lane + Elt + 1]; if (0 <= M0 && 0 <= M1 && (M0 % NumHalfLaneElts) != (M1 % NumHalfLaneElts)) return SDValue(); if (0 <= M0) PermuteMask[Lane + Elt + 0] = Lane + (2 * (M0 % NumHalfLaneElts)); if (0 <= M1) PermuteMask[Lane + Elt + 1] = Lane + (2 * (M1 % NumHalfLaneElts)) + 1; } } unsigned UnpckOp = MatchLo ? X86ISD::UNPCKL : X86ISD::UNPCKH; SDValue Unpck = DAG.getNode(UnpckOp, DL, VT, Ops); return DAG.getVectorShuffle(VT, DL, Unpck, DAG.getUNDEF(VT), PermuteMask); } /// Helper to form a PALIGNR-based rotate+permute, merging 2 inputs and then /// permuting the elements of the result in place. static SDValue lowerVectorShuffleAsByteRotateAndPermute( const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG) { if ((VT.is128BitVector() && !Subtarget.hasSSSE3()) || (VT.is256BitVector() && !Subtarget.hasAVX2()) || (VT.is512BitVector() && !Subtarget.hasBWI())) return SDValue(); // We don't currently support lane crossing permutes. if (is128BitLaneCrossingShuffleMask(VT, Mask)) return SDValue(); int Scale = VT.getScalarSizeInBits() / 8; int NumLanes = VT.getSizeInBits() / 128; int NumElts = VT.getVectorNumElements(); int NumEltsPerLane = NumElts / NumLanes; // Determine range of mask elts. bool Blend1 = true; bool Blend2 = true; std::pair Range1 = std::make_pair(INT_MAX, INT_MIN); std::pair Range2 = std::make_pair(INT_MAX, INT_MIN); for (int Lane = 0; Lane != NumElts; Lane += NumEltsPerLane) { for (int Elt = 0; Elt != NumEltsPerLane; ++Elt) { int M = Mask[Lane + Elt]; if (M < 0) continue; if (M < NumElts) { Blend1 &= (M == (Lane + Elt)); assert(Lane <= M && M < (Lane + NumEltsPerLane) && "Out of range mask"); M = M % NumEltsPerLane; Range1.first = std::min(Range1.first, M); Range1.second = std::max(Range1.second, M); } else { M -= NumElts; Blend2 &= (M == (Lane + Elt)); assert(Lane <= M && M < (Lane + NumEltsPerLane) && "Out of range mask"); M = M % NumEltsPerLane; Range2.first = std::min(Range2.first, M); Range2.second = std::max(Range2.second, M); } } } // Bail if we don't need both elements. // TODO - it might be worth doing this for unary shuffles if the permute // can be widened. if (!(0 <= Range1.first && Range1.second < NumEltsPerLane) || !(0 <= Range2.first && Range2.second < NumEltsPerLane)) return SDValue(); if (VT.getSizeInBits() > 128 && (Blend1 || Blend2)) return SDValue(); // Rotate the 2 ops so we can access both ranges, then permute the result. auto RotateAndPermute = [&](SDValue Lo, SDValue Hi, int RotAmt, int Ofs) { MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8); SDValue Rotate = DAG.getBitcast( VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, DAG.getBitcast(ByteVT, Hi), DAG.getBitcast(ByteVT, Lo), DAG.getConstant(Scale * RotAmt, DL, MVT::i8))); SmallVector PermMask(NumElts, SM_SentinelUndef); for (int Lane = 0; Lane != NumElts; Lane += NumEltsPerLane) { for (int Elt = 0; Elt != NumEltsPerLane; ++Elt) { int M = Mask[Lane + Elt]; if (M < 0) continue; if (M < NumElts) PermMask[Lane + Elt] = Lane + ((M + Ofs - RotAmt) % NumEltsPerLane); else PermMask[Lane + Elt] = Lane + ((M - Ofs - RotAmt) % NumEltsPerLane); } } return DAG.getVectorShuffle(VT, DL, Rotate, DAG.getUNDEF(VT), PermMask); }; // Check if the ranges are small enough to rotate from either direction. if (Range2.second < Range1.first) return RotateAndPermute(V1, V2, Range1.first, 0); if (Range1.second < Range2.first) return RotateAndPermute(V2, V1, Range2.first, NumElts); return SDValue(); } /// Generic routine to decompose a shuffle and blend into independent /// blends and permutes. /// /// This matches the extremely common pattern for handling combined /// shuffle+blend operations on newer X86 ISAs where we have very fast blend /// operations. It will try to pick the best arrangement of shuffles and /// blends. static SDValue lowerVectorShuffleAsDecomposedShuffleBlend( const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG) { // Shuffle the input elements into the desired positions in V1 and V2 and // blend them together. SmallVector V1Mask(Mask.size(), -1); SmallVector V2Mask(Mask.size(), -1); SmallVector BlendMask(Mask.size(), -1); for (int i = 0, Size = Mask.size(); i < Size; ++i) if (Mask[i] >= 0 && Mask[i] < Size) { V1Mask[i] = Mask[i]; BlendMask[i] = i; } else if (Mask[i] >= Size) { V2Mask[i] = Mask[i] - Size; BlendMask[i] = i + Size; } // Try to lower with the simpler initial blend/unpack/rotate strategies unless // one of the input shuffles would be a no-op. We prefer to shuffle inputs as // the shuffle may be able to fold with a load or other benefit. However, when // we'll have to do 2x as many shuffles in order to achieve this, a 2-input // pre-shuffle first is a better strategy. if (!isNoopShuffleMask(V1Mask) && !isNoopShuffleMask(V2Mask)) { // Only prefer immediate blends to unpack/rotate. if (SDValue BlendPerm = lowerVectorShuffleAsBlendAndPermute( DL, VT, V1, V2, Mask, DAG, true)) return BlendPerm; if (SDValue UnpackPerm = lowerVectorShuffleAsUNPCKAndPermute(DL, VT, V1, V2, Mask, DAG)) return UnpackPerm; if (SDValue RotatePerm = lowerVectorShuffleAsByteRotateAndPermute( DL, VT, V1, V2, Mask, Subtarget, DAG)) return RotatePerm; // Unpack/rotate failed - try again with variable blends. if (SDValue BlendPerm = lowerVectorShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask, DAG)) return BlendPerm; } V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask); V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask); return DAG.getVectorShuffle(VT, DL, V1, V2, BlendMask); } /// Try to lower a vector shuffle as a rotation. /// /// This is used for support PALIGNR for SSSE3 or VALIGND/Q for AVX512. static int matchVectorShuffleAsRotate(SDValue &V1, SDValue &V2, ArrayRef Mask) { int NumElts = Mask.size(); // We need to detect various ways of spelling a rotation: // [11, 12, 13, 14, 15, 0, 1, 2] // [-1, 12, 13, 14, -1, -1, 1, -1] // [-1, -1, -1, -1, -1, -1, 1, 2] // [ 3, 4, 5, 6, 7, 8, 9, 10] // [-1, 4, 5, 6, -1, -1, 9, -1] // [-1, 4, 5, 6, -1, -1, -1, -1] int Rotation = 0; SDValue Lo, Hi; for (int i = 0; i < NumElts; ++i) { int M = Mask[i]; assert((M == SM_SentinelUndef || (0 <= M && M < (2*NumElts))) && "Unexpected mask index."); if (M < 0) continue; // Determine where a rotated vector would have started. int StartIdx = i - (M % NumElts); if (StartIdx == 0) // The identity rotation isn't interesting, stop. return -1; // If we found the tail of a vector the rotation must be the missing // front. If we found the head of a vector, it must be how much of the // head. int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx; if (Rotation == 0) Rotation = CandidateRotation; else if (Rotation != CandidateRotation) // The rotations don't match, so we can't match this mask. return -1; // Compute which value this mask is pointing at. SDValue MaskV = M < NumElts ? V1 : V2; // Compute which of the two target values this index should be assigned // to. This reflects whether the high elements are remaining or the low // elements are remaining. SDValue &TargetV = StartIdx < 0 ? Hi : Lo; // Either set up this value if we've not encountered it before, or check // that it remains consistent. if (!TargetV) TargetV = MaskV; else if (TargetV != MaskV) // This may be a rotation, but it pulls from the inputs in some // unsupported interleaving. return -1; } // Check that we successfully analyzed the mask, and normalize the results. assert(Rotation != 0 && "Failed to locate a viable rotation!"); assert((Lo || Hi) && "Failed to find a rotated input vector!"); if (!Lo) Lo = Hi; else if (!Hi) Hi = Lo; V1 = Lo; V2 = Hi; return Rotation; } /// Try to lower a vector shuffle as a byte rotation. /// /// SSSE3 has a generic PALIGNR instruction in x86 that will do an arbitrary /// byte-rotation of the concatenation of two vectors; pre-SSSE3 can use /// a PSRLDQ/PSLLDQ/POR pattern to get a similar effect. This routine will /// try to generically lower a vector shuffle through such an pattern. It /// does not check for the profitability of lowering either as PALIGNR or /// PSRLDQ/PSLLDQ/POR, only whether the mask is valid to lower in that form. /// This matches shuffle vectors that look like: /// /// v8i16 [11, 12, 13, 14, 15, 0, 1, 2] /// /// Essentially it concatenates V1 and V2, shifts right by some number of /// elements, and takes the low elements as the result. Note that while this is /// specified as a *right shift* because x86 is little-endian, it is a *left /// rotate* of the vector lanes. static int matchVectorShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, ArrayRef Mask) { // Don't accept any shuffles with zero elements. if (any_of(Mask, [](int M) { return M == SM_SentinelZero; })) return -1; // PALIGNR works on 128-bit lanes. SmallVector RepeatedMask; if (!is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask)) return -1; int Rotation = matchVectorShuffleAsRotate(V1, V2, RepeatedMask); if (Rotation <= 0) return -1; // PALIGNR rotates bytes, so we need to scale the // rotation based on how many bytes are in the vector lane. int NumElts = RepeatedMask.size(); int Scale = 16 / NumElts; return Rotation * Scale; } static SDValue lowerVectorShuffleAsByteRotate(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!"); SDValue Lo = V1, Hi = V2; int ByteRotation = matchVectorShuffleAsByteRotate(VT, Lo, Hi, Mask); if (ByteRotation <= 0) return SDValue(); // Cast the inputs to i8 vector of correct length to match PALIGNR or // PSLLDQ/PSRLDQ. MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8); Lo = DAG.getBitcast(ByteVT, Lo); Hi = DAG.getBitcast(ByteVT, Hi); // SSSE3 targets can use the palignr instruction. if (Subtarget.hasSSSE3()) { assert((!VT.is512BitVector() || Subtarget.hasBWI()) && "512-bit PALIGNR requires BWI instructions"); return DAG.getBitcast( VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, Lo, Hi, DAG.getConstant(ByteRotation, DL, MVT::i8))); } assert(VT.is128BitVector() && "Rotate-based lowering only supports 128-bit lowering!"); assert(Mask.size() <= 16 && "Can shuffle at most 16 bytes in a 128-bit vector!"); assert(ByteVT == MVT::v16i8 && "SSE2 rotate lowering only needed for v16i8!"); // Default SSE2 implementation int LoByteShift = 16 - ByteRotation; int HiByteShift = ByteRotation; SDValue LoShift = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Lo, DAG.getConstant(LoByteShift, DL, MVT::i8)); SDValue HiShift = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Hi, DAG.getConstant(HiByteShift, DL, MVT::i8)); return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, MVT::v16i8, LoShift, HiShift)); } /// Try to lower a vector shuffle as a dword/qword rotation. /// /// AVX512 has a VALIGND/VALIGNQ instructions that will do an arbitrary /// rotation of the concatenation of two vectors; This routine will /// try to generically lower a vector shuffle through such an pattern. /// /// Essentially it concatenates V1 and V2, shifts right by some number of /// elements, and takes the low elements as the result. Note that while this is /// specified as a *right shift* because x86 is little-endian, it is a *left /// rotate* of the vector lanes. static SDValue lowerVectorShuffleAsRotate(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert((VT.getScalarType() == MVT::i32 || VT.getScalarType() == MVT::i64) && "Only 32-bit and 64-bit elements are supported!"); // 128/256-bit vectors are only supported with VLX. assert((Subtarget.hasVLX() || (!VT.is128BitVector() && !VT.is256BitVector())) && "VLX required for 128/256-bit vectors"); SDValue Lo = V1, Hi = V2; int Rotation = matchVectorShuffleAsRotate(Lo, Hi, Mask); if (Rotation <= 0) return SDValue(); return DAG.getNode(X86ISD::VALIGN, DL, VT, Lo, Hi, DAG.getConstant(Rotation, DL, MVT::i8)); } /// Try to lower a vector shuffle as a bit shift (shifts in zeros). /// /// Attempts to match a shuffle mask against the PSLL(W/D/Q/DQ) and /// PSRL(W/D/Q/DQ) SSE2 and AVX2 logical bit-shift instructions. The function /// matches elements from one of the input vectors shuffled to the left or /// right with zeroable elements 'shifted in'. It handles both the strictly /// bit-wise element shifts and the byte shift across an entire 128-bit double /// quad word lane. /// /// PSHL : (little-endian) left bit shift. /// [ zz, 0, zz, 2 ] /// [ -1, 4, zz, -1 ] /// PSRL : (little-endian) right bit shift. /// [ 1, zz, 3, zz] /// [ -1, -1, 7, zz] /// PSLLDQ : (little-endian) left byte shift /// [ zz, 0, 1, 2, 3, 4, 5, 6] /// [ zz, zz, -1, -1, 2, 3, 4, -1] /// [ zz, zz, zz, zz, zz, zz, -1, 1] /// PSRLDQ : (little-endian) right byte shift /// [ 5, 6, 7, zz, zz, zz, zz, zz] /// [ -1, 5, 6, 7, zz, zz, zz, zz] /// [ 1, 2, -1, -1, -1, -1, zz, zz] static int matchVectorShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, unsigned ScalarSizeInBits, ArrayRef Mask, int MaskOffset, const APInt &Zeroable, const X86Subtarget &Subtarget) { int Size = Mask.size(); unsigned SizeInBits = Size * ScalarSizeInBits; auto CheckZeros = [&](int Shift, int Scale, bool Left) { for (int i = 0; i < Size; i += Scale) for (int j = 0; j < Shift; ++j) if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))]) return false; return true; }; auto MatchShift = [&](int Shift, int Scale, bool Left) { for (int i = 0; i != Size; i += Scale) { unsigned Pos = Left ? i + Shift : i; unsigned Low = Left ? i : i + Shift; unsigned Len = Scale - Shift; if (!isSequentialOrUndefInRange(Mask, Pos, Len, Low + MaskOffset)) return -1; } int ShiftEltBits = ScalarSizeInBits * Scale; bool ByteShift = ShiftEltBits > 64; Opcode = Left ? (ByteShift ? X86ISD::VSHLDQ : X86ISD::VSHLI) : (ByteShift ? X86ISD::VSRLDQ : X86ISD::VSRLI); int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1); // Normalize the scale for byte shifts to still produce an i64 element // type. Scale = ByteShift ? Scale / 2 : Scale; // We need to round trip through the appropriate type for the shift. MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale); ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8) : MVT::getVectorVT(ShiftSVT, Size / Scale); return (int)ShiftAmt; }; // SSE/AVX supports logical shifts up to 64-bit integers - so we can just // keep doubling the size of the integer elements up to that. We can // then shift the elements of the integer vector by whole multiples of // their width within the elements of the larger integer vector. Test each // multiple to see if we can find a match with the moved element indices // and that the shifted in elements are all zeroable. unsigned MaxWidth = ((SizeInBits == 512) && !Subtarget.hasBWI() ? 64 : 128); for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2) for (int Shift = 1; Shift != Scale; ++Shift) for (bool Left : {true, false}) if (CheckZeros(Shift, Scale, Left)) { int ShiftAmt = MatchShift(Shift, Scale, Left); if (0 < ShiftAmt) return ShiftAmt; } // no match return -1; } static SDValue lowerVectorShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { int Size = Mask.size(); assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size"); MVT ShiftVT; SDValue V = V1; unsigned Opcode; // Try to match shuffle against V1 shift. int ShiftAmt = matchVectorShuffleAsShift( ShiftVT, Opcode, VT.getScalarSizeInBits(), Mask, 0, Zeroable, Subtarget); // If V1 failed, try to match shuffle against V2 shift. if (ShiftAmt < 0) { ShiftAmt = matchVectorShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(), Mask, Size, Zeroable, Subtarget); V = V2; } if (ShiftAmt < 0) return SDValue(); assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) && "Illegal integer vector type"); V = DAG.getBitcast(ShiftVT, V); V = DAG.getNode(Opcode, DL, ShiftVT, V, DAG.getConstant(ShiftAmt, DL, MVT::i8)); return DAG.getBitcast(VT, V); } // EXTRQ: Extract Len elements from lower half of source, starting at Idx. // Remainder of lower half result is zero and upper half is all undef. static bool matchVectorShuffleAsEXTRQ(MVT VT, SDValue &V1, SDValue &V2, ArrayRef Mask, uint64_t &BitLen, uint64_t &BitIdx, const APInt &Zeroable) { int Size = Mask.size(); int HalfSize = Size / 2; assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size"); assert(!Zeroable.isAllOnesValue() && "Fully zeroable shuffle mask"); // Upper half must be undefined. if (!isUndefInRange(Mask, HalfSize, HalfSize)) return false; // Determine the extraction length from the part of the // lower half that isn't zeroable. int Len = HalfSize; for (; Len > 0; --Len) if (!Zeroable[Len - 1]) break; assert(Len > 0 && "Zeroable shuffle mask"); // Attempt to match first Len sequential elements from the lower half. SDValue Src; int Idx = -1; for (int i = 0; i != Len; ++i) { int M = Mask[i]; if (M == SM_SentinelUndef) continue; SDValue &V = (M < Size ? V1 : V2); M = M % Size; // The extracted elements must start at a valid index and all mask // elements must be in the lower half. if (i > M || M >= HalfSize) return false; if (Idx < 0 || (Src == V && Idx == (M - i))) { Src = V; Idx = M - i; continue; } return false; } if (!Src || Idx < 0) return false; assert((Idx + Len) <= HalfSize && "Illegal extraction mask"); BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f; BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f; V1 = Src; return true; } // INSERTQ: Extract lowest Len elements from lower half of second source and // insert over first source, starting at Idx. // { A[0], .., A[Idx-1], B[0], .., B[Len-1], A[Idx+Len], .., UNDEF, ... } static bool matchVectorShuffleAsINSERTQ(MVT VT, SDValue &V1, SDValue &V2, ArrayRef Mask, uint64_t &BitLen, uint64_t &BitIdx) { int Size = Mask.size(); int HalfSize = Size / 2; assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size"); // Upper half must be undefined. if (!isUndefInRange(Mask, HalfSize, HalfSize)) return false; for (int Idx = 0; Idx != HalfSize; ++Idx) { SDValue Base; // Attempt to match first source from mask before insertion point. if (isUndefInRange(Mask, 0, Idx)) { /* EMPTY */ } else if (isSequentialOrUndefInRange(Mask, 0, Idx, 0)) { Base = V1; } else if (isSequentialOrUndefInRange(Mask, 0, Idx, Size)) { Base = V2; } else { continue; } // Extend the extraction length looking to match both the insertion of // the second source and the remaining elements of the first. for (int Hi = Idx + 1; Hi <= HalfSize; ++Hi) { SDValue Insert; int Len = Hi - Idx; // Match insertion. if (isSequentialOrUndefInRange(Mask, Idx, Len, 0)) { Insert = V1; } else if (isSequentialOrUndefInRange(Mask, Idx, Len, Size)) { Insert = V2; } else { continue; } // Match the remaining elements of the lower half. if (isUndefInRange(Mask, Hi, HalfSize - Hi)) { /* EMPTY */ } else if ((!Base || (Base == V1)) && isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, Hi)) { Base = V1; } else if ((!Base || (Base == V2)) && isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, Size + Hi)) { Base = V2; } else { continue; } BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f; BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f; V1 = Base; V2 = Insert; return true; } } return false; } /// Try to lower a vector shuffle using SSE4a EXTRQ/INSERTQ. static SDValue lowerVectorShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, const APInt &Zeroable, SelectionDAG &DAG) { uint64_t BitLen, BitIdx; if (matchVectorShuffleAsEXTRQ(VT, V1, V2, Mask, BitLen, BitIdx, Zeroable)) return DAG.getNode(X86ISD::EXTRQI, DL, VT, V1, DAG.getConstant(BitLen, DL, MVT::i8), DAG.getConstant(BitIdx, DL, MVT::i8)); if (matchVectorShuffleAsINSERTQ(VT, V1, V2, Mask, BitLen, BitIdx)) return DAG.getNode(X86ISD::INSERTQI, DL, VT, V1 ? V1 : DAG.getUNDEF(VT), V2 ? V2 : DAG.getUNDEF(VT), DAG.getConstant(BitLen, DL, MVT::i8), DAG.getConstant(BitIdx, DL, MVT::i8)); return SDValue(); } /// Lower a vector shuffle as a zero or any extension. /// /// Given a specific number of elements, element bit width, and extension /// stride, produce either a zero or any extension based on the available /// features of the subtarget. The extended elements are consecutive and /// begin and can start from an offsetted element index in the input; to /// avoid excess shuffling the offset must either being in the bottom lane /// or at the start of a higher lane. All extended elements must be from /// the same lane. static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend( const SDLoc &DL, MVT VT, int Scale, int Offset, bool AnyExt, SDValue InputV, ArrayRef Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(Scale > 1 && "Need a scale to extend."); int EltBits = VT.getScalarSizeInBits(); int NumElements = VT.getVectorNumElements(); int NumEltsPerLane = 128 / EltBits; int OffsetLane = Offset / NumEltsPerLane; assert((EltBits == 8 || EltBits == 16 || EltBits == 32) && "Only 8, 16, and 32 bit elements can be extended."); assert(Scale * EltBits <= 64 && "Cannot zero extend past 64 bits."); assert(0 <= Offset && "Extension offset must be positive."); assert((Offset < NumEltsPerLane || Offset % NumEltsPerLane == 0) && "Extension offset must be in the first lane or start an upper lane."); // Check that an index is in same lane as the base offset. auto SafeOffset = [&](int Idx) { return OffsetLane == (Idx / NumEltsPerLane); }; // Shift along an input so that the offset base moves to the first element. auto ShuffleOffset = [&](SDValue V) { if (!Offset) return V; SmallVector ShMask((unsigned)NumElements, -1); for (int i = 0; i * Scale < NumElements; ++i) { int SrcIdx = i + Offset; ShMask[i] = SafeOffset(SrcIdx) ? SrcIdx : -1; } return DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), ShMask); }; // Found a valid zext mask! Try various lowering strategies based on the // input type and available ISA extensions. if (Subtarget.hasSSE41()) { // Not worth offsetting 128-bit vectors if scale == 2, a pattern using // PUNPCK will catch this in a later shuffle match. if (Offset && Scale == 2 && VT.is128BitVector()) return SDValue(); MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale), NumElements / Scale); InputV = ShuffleOffset(InputV); InputV = getExtendInVec(/*Signed*/false, DL, ExtVT, InputV, DAG); return DAG.getBitcast(VT, InputV); } assert(VT.is128BitVector() && "Only 128-bit vectors can be extended."); // For any extends we can cheat for larger element sizes and use shuffle // instructions that can fold with a load and/or copy. if (AnyExt && EltBits == 32) { int PSHUFDMask[4] = {Offset, -1, SafeOffset(Offset + 1) ? Offset + 1 : -1, -1}; return DAG.getBitcast( VT, DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, DAG.getBitcast(MVT::v4i32, InputV), getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); } if (AnyExt && EltBits == 16 && Scale > 2) { int PSHUFDMask[4] = {Offset / 2, -1, SafeOffset(Offset + 1) ? (Offset + 1) / 2 : -1, -1}; InputV = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, DAG.getBitcast(MVT::v4i32, InputV), getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)); int PSHUFWMask[4] = {1, -1, -1, -1}; unsigned OddEvenOp = (Offset & 1 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW); return DAG.getBitcast( VT, DAG.getNode(OddEvenOp, DL, MVT::v8i16, DAG.getBitcast(MVT::v8i16, InputV), getV4X86ShuffleImm8ForMask(PSHUFWMask, DL, DAG))); } // The SSE4A EXTRQ instruction can efficiently extend the first 2 lanes // to 64-bits. if ((Scale * EltBits) == 64 && EltBits < 32 && Subtarget.hasSSE4A()) { assert(NumElements == (int)Mask.size() && "Unexpected shuffle mask size!"); assert(VT.is128BitVector() && "Unexpected vector width!"); int LoIdx = Offset * EltBits; SDValue Lo = DAG.getBitcast( MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV, DAG.getConstant(EltBits, DL, MVT::i8), DAG.getConstant(LoIdx, DL, MVT::i8))); if (isUndefInRange(Mask, NumElements / 2, NumElements / 2) || !SafeOffset(Offset + 1)) return DAG.getBitcast(VT, Lo); int HiIdx = (Offset + 1) * EltBits; SDValue Hi = DAG.getBitcast( MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV, DAG.getConstant(EltBits, DL, MVT::i8), DAG.getConstant(HiIdx, DL, MVT::i8))); return DAG.getBitcast(VT, DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, Lo, Hi)); } // If this would require more than 2 unpack instructions to expand, use // pshufb when available. We can only use more than 2 unpack instructions // when zero extending i8 elements which also makes it easier to use pshufb. if (Scale > 4 && EltBits == 8 && Subtarget.hasSSSE3()) { assert(NumElements == 16 && "Unexpected byte vector width!"); SDValue PSHUFBMask[16]; for (int i = 0; i < 16; ++i) { int Idx = Offset + (i / Scale); PSHUFBMask[i] = DAG.getConstant( (i % Scale == 0 && SafeOffset(Idx)) ? Idx : 0x80, DL, MVT::i8); } InputV = DAG.getBitcast(MVT::v16i8, InputV); return DAG.getBitcast( VT, DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, InputV, DAG.getBuildVector(MVT::v16i8, DL, PSHUFBMask))); } // If we are extending from an offset, ensure we start on a boundary that // we can unpack from. int AlignToUnpack = Offset % (NumElements / Scale); if (AlignToUnpack) { SmallVector ShMask((unsigned)NumElements, -1); for (int i = AlignToUnpack; i < NumElements; ++i) ShMask[i - AlignToUnpack] = i; InputV = DAG.getVectorShuffle(VT, DL, InputV, DAG.getUNDEF(VT), ShMask); Offset -= AlignToUnpack; } // Otherwise emit a sequence of unpacks. do { unsigned UnpackLoHi = X86ISD::UNPCKL; if (Offset >= (NumElements / 2)) { UnpackLoHi = X86ISD::UNPCKH; Offset -= (NumElements / 2); } MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements); SDValue Ext = AnyExt ? DAG.getUNDEF(InputVT) : getZeroVector(InputVT, Subtarget, DAG, DL); InputV = DAG.getBitcast(InputVT, InputV); InputV = DAG.getNode(UnpackLoHi, DL, InputVT, InputV, Ext); Scale /= 2; EltBits *= 2; NumElements /= 2; } while (Scale > 1); return DAG.getBitcast(VT, InputV); } /// Try to lower a vector shuffle as a zero extension on any microarch. /// /// This routine will try to do everything in its power to cleverly lower /// a shuffle which happens to match the pattern of a zero extend. It doesn't /// check for the profitability of this lowering, it tries to aggressively /// match this pattern. It will use all of the micro-architectural details it /// can to emit an efficient lowering. It handles both blends with all-zero /// inputs to explicitly zero-extend and undef-lanes (sometimes undef due to /// masking out later). /// /// The reason we have dedicated lowering for zext-style shuffles is that they /// are both incredibly common and often quite performance sensitive. static SDValue lowerVectorShuffleAsZeroOrAnyExtend( const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { int Bits = VT.getSizeInBits(); int NumLanes = Bits / 128; int NumElements = VT.getVectorNumElements(); int NumEltsPerLane = NumElements / NumLanes; assert(VT.getScalarSizeInBits() <= 32 && "Exceeds 32-bit integer zero extension limit"); assert((int)Mask.size() == NumElements && "Unexpected shuffle mask size"); // Define a helper function to check a particular ext-scale and lower to it if // valid. auto Lower = [&](int Scale) -> SDValue { SDValue InputV; bool AnyExt = true; int Offset = 0; int Matches = 0; for (int i = 0; i < NumElements; ++i) { int M = Mask[i]; if (M < 0) continue; // Valid anywhere but doesn't tell us anything. if (i % Scale != 0) { // Each of the extended elements need to be zeroable. if (!Zeroable[i]) return SDValue(); // We no longer are in the anyext case. AnyExt = false; continue; } // Each of the base elements needs to be consecutive indices into the // same input vector. SDValue V = M < NumElements ? V1 : V2; M = M % NumElements; if (!InputV) { InputV = V; Offset = M - (i / Scale); } else if (InputV != V) return SDValue(); // Flip-flopping inputs. // Offset must start in the lowest 128-bit lane or at the start of an // upper lane. // FIXME: Is it ever worth allowing a negative base offset? if (!((0 <= Offset && Offset < NumEltsPerLane) || (Offset % NumEltsPerLane) == 0)) return SDValue(); // If we are offsetting, all referenced entries must come from the same // lane. if (Offset && (Offset / NumEltsPerLane) != (M / NumEltsPerLane)) return SDValue(); if ((M % NumElements) != (Offset + (i / Scale))) return SDValue(); // Non-consecutive strided elements. Matches++; } // If we fail to find an input, we have a zero-shuffle which should always // have already been handled. // FIXME: Maybe handle this here in case during blending we end up with one? if (!InputV) return SDValue(); // If we are offsetting, don't extend if we only match a single input, we // can always do better by using a basic PSHUF or PUNPCK. if (Offset != 0 && Matches < 2) return SDValue(); return lowerVectorShuffleAsSpecificZeroOrAnyExtend( DL, VT, Scale, Offset, AnyExt, InputV, Mask, Subtarget, DAG); }; // The widest scale possible for extending is to a 64-bit integer. assert(Bits % 64 == 0 && "The number of bits in a vector must be divisible by 64 on x86!"); int NumExtElements = Bits / 64; // Each iteration, try extending the elements half as much, but into twice as // many elements. for (; NumExtElements < NumElements; NumExtElements *= 2) { assert(NumElements % NumExtElements == 0 && "The input vector size must be divisible by the extended size."); if (SDValue V = Lower(NumElements / NumExtElements)) return V; } // General extends failed, but 128-bit vectors may be able to use MOVQ. if (Bits != 128) return SDValue(); // Returns one of the source operands if the shuffle can be reduced to a // MOVQ, copying the lower 64-bits and zero-extending to the upper 64-bits. auto CanZExtLowHalf = [&]() { for (int i = NumElements / 2; i != NumElements; ++i) if (!Zeroable[i]) return SDValue(); if (isSequentialOrUndefInRange(Mask, 0, NumElements / 2, 0)) return V1; if (isSequentialOrUndefInRange(Mask, 0, NumElements / 2, NumElements)) return V2; return SDValue(); }; if (SDValue V = CanZExtLowHalf()) { V = DAG.getBitcast(MVT::v2i64, V); V = DAG.getNode(X86ISD::VZEXT_MOVL, DL, MVT::v2i64, V); return DAG.getBitcast(VT, V); } // No viable ext lowering found. return SDValue(); } /// Try to get a scalar value for a specific element of a vector. /// /// Looks through BUILD_VECTOR and SCALAR_TO_VECTOR nodes to find a scalar. static SDValue getScalarValueForVectorElement(SDValue V, int Idx, SelectionDAG &DAG) { MVT VT = V.getSimpleValueType(); MVT EltVT = VT.getVectorElementType(); V = peekThroughBitcasts(V); // If the bitcasts shift the element size, we can't extract an equivalent // element from it. MVT NewVT = V.getSimpleValueType(); if (!NewVT.isVector() || NewVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) return SDValue(); if (V.getOpcode() == ISD::BUILD_VECTOR || (Idx == 0 && V.getOpcode() == ISD::SCALAR_TO_VECTOR)) { // Ensure the scalar operand is the same size as the destination. // FIXME: Add support for scalar truncation where possible. SDValue S = V.getOperand(Idx); if (EltVT.getSizeInBits() == S.getSimpleValueType().getSizeInBits()) return DAG.getBitcast(EltVT, S); } return SDValue(); } /// Helper to test for a load that can be folded with x86 shuffles. /// /// This is particularly important because the set of instructions varies /// significantly based on whether the operand is a load or not. static bool isShuffleFoldableLoad(SDValue V) { V = peekThroughBitcasts(V); return ISD::isNON_EXTLoad(V.getNode()); } /// Try to lower insertion of a single element into a zero vector. /// /// This is a common pattern that we have especially efficient patterns to lower /// across all subtarget feature sets. static SDValue lowerVectorShuffleAsElementInsertion( const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT ExtVT = VT; MVT EltVT = VT.getVectorElementType(); int V2Index = find_if(Mask, [&Mask](int M) { return M >= (int)Mask.size(); }) - Mask.begin(); bool IsV1Zeroable = true; for (int i = 0, Size = Mask.size(); i < Size; ++i) if (i != V2Index && !Zeroable[i]) { IsV1Zeroable = false; break; } // Check for a single input from a SCALAR_TO_VECTOR node. // FIXME: All of this should be canonicalized into INSERT_VECTOR_ELT and // all the smarts here sunk into that routine. However, the current // lowering of BUILD_VECTOR makes that nearly impossible until the old // vector shuffle lowering is dead. SDValue V2S = getScalarValueForVectorElement(V2, Mask[V2Index] - Mask.size(), DAG); if (V2S && DAG.getTargetLoweringInfo().isTypeLegal(V2S.getValueType())) { // We need to zext the scalar if it is smaller than an i32. V2S = DAG.getBitcast(EltVT, V2S); if (EltVT == MVT::i8 || EltVT == MVT::i16) { // Using zext to expand a narrow element won't work for non-zero // insertions. if (!IsV1Zeroable) return SDValue(); // Zero-extend directly to i32. ExtVT = MVT::getVectorVT(MVT::i32, ExtVT.getSizeInBits() / 32); V2S = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, V2S); } V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ExtVT, V2S); } else if (Mask[V2Index] != (int)Mask.size() || EltVT == MVT::i8 || EltVT == MVT::i16) { // Either not inserting from the low element of the input or the input // element size is too small to use VZEXT_MOVL to clear the high bits. return SDValue(); } if (!IsV1Zeroable) { // If V1 can't be treated as a zero vector we have fewer options to lower // this. We can't support integer vectors or non-zero targets cheaply, and // the V1 elements can't be permuted in any way. assert(VT == ExtVT && "Cannot change extended type when non-zeroable!"); if (!VT.isFloatingPoint() || V2Index != 0) return SDValue(); SmallVector V1Mask(Mask.begin(), Mask.end()); V1Mask[V2Index] = -1; if (!isNoopShuffleMask(V1Mask)) return SDValue(); if (!VT.is128BitVector()) return SDValue(); // Otherwise, use MOVSD or MOVSS. assert((EltVT == MVT::f32 || EltVT == MVT::f64) && "Only two types of floating point element types to handle!"); return DAG.getNode(EltVT == MVT::f32 ? X86ISD::MOVSS : X86ISD::MOVSD, DL, ExtVT, V1, V2); } // This lowering only works for the low element with floating point vectors. if (VT.isFloatingPoint() && V2Index != 0) return SDValue(); V2 = DAG.getNode(X86ISD::VZEXT_MOVL, DL, ExtVT, V2); if (ExtVT != VT) V2 = DAG.getBitcast(VT, V2); if (V2Index != 0) { // If we have 4 or fewer lanes we can cheaply shuffle the element into // the desired position. Otherwise it is more efficient to do a vector // shift left. We know that we can do a vector shift left because all // the inputs are zero. if (VT.isFloatingPoint() || VT.getVectorNumElements() <= 4) { SmallVector V2Shuffle(Mask.size(), 1); V2Shuffle[V2Index] = 0; V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Shuffle); } else { V2 = DAG.getBitcast(MVT::v16i8, V2); V2 = DAG.getNode( X86ISD::VSHLDQ, DL, MVT::v16i8, V2, DAG.getConstant(V2Index * EltVT.getSizeInBits() / 8, DL, MVT::i8)); V2 = DAG.getBitcast(VT, V2); } } return V2; } /// Try to lower broadcast of a single - truncated - integer element, /// coming from a scalar_to_vector/build_vector node \p V0 with larger elements. /// /// This assumes we have AVX2. static SDValue lowerVectorShuffleAsTruncBroadcast(const SDLoc &DL, MVT VT, SDValue V0, int BroadcastIdx, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(Subtarget.hasAVX2() && "We can only lower integer broadcasts with AVX2!"); EVT EltVT = VT.getVectorElementType(); EVT V0VT = V0.getValueType(); assert(VT.isInteger() && "Unexpected non-integer trunc broadcast!"); assert(V0VT.isVector() && "Unexpected non-vector vector-sized value!"); EVT V0EltVT = V0VT.getVectorElementType(); if (!V0EltVT.isInteger()) return SDValue(); const unsigned EltSize = EltVT.getSizeInBits(); const unsigned V0EltSize = V0EltVT.getSizeInBits(); // This is only a truncation if the original element type is larger. if (V0EltSize <= EltSize) return SDValue(); assert(((V0EltSize % EltSize) == 0) && "Scalar type sizes must all be powers of 2 on x86!"); const unsigned V0Opc = V0.getOpcode(); const unsigned Scale = V0EltSize / EltSize; const unsigned V0BroadcastIdx = BroadcastIdx / Scale; if ((V0Opc != ISD::SCALAR_TO_VECTOR || V0BroadcastIdx != 0) && V0Opc != ISD::BUILD_VECTOR) return SDValue(); SDValue Scalar = V0.getOperand(V0BroadcastIdx); // If we're extracting non-least-significant bits, shift so we can truncate. // Hopefully, we can fold away the trunc/srl/load into the broadcast. // Even if we can't (and !isShuffleFoldableLoad(Scalar)), prefer // vpbroadcast+vmovd+shr to vpshufb(m)+vmovd. if (const int OffsetIdx = BroadcastIdx % Scale) Scalar = DAG.getNode(ISD::SRL, DL, Scalar.getValueType(), Scalar, DAG.getConstant(OffsetIdx * EltSize, DL, MVT::i8)); return DAG.getNode(X86ISD::VBROADCAST, DL, VT, DAG.getNode(ISD::TRUNCATE, DL, EltVT, Scalar)); } /// Try to lower broadcast of a single element. /// /// For convenience, this code also bundles all of the subtarget feature set /// filtering. While a little annoying to re-dispatch on type here, there isn't /// a convenient way to factor it out. static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG) { if (!((Subtarget.hasSSE3() && VT == MVT::v2f64) || (Subtarget.hasAVX() && VT.isFloatingPoint()) || (Subtarget.hasAVX2() && VT.isInteger()))) return SDValue(); // With MOVDDUP (v2f64) we can broadcast from a register or a load, otherwise // we can only broadcast from a register with AVX2. unsigned NumElts = Mask.size(); unsigned Opcode = (VT == MVT::v2f64 && !Subtarget.hasAVX2()) ? X86ISD::MOVDDUP : X86ISD::VBROADCAST; bool BroadcastFromReg = (Opcode == X86ISD::MOVDDUP) || Subtarget.hasAVX2(); // Check that the mask is a broadcast. int BroadcastIdx = -1; for (int i = 0; i != (int)NumElts; ++i) { SmallVector BroadcastMask(NumElts, i); if (isShuffleEquivalent(V1, V2, Mask, BroadcastMask)) { BroadcastIdx = i; break; } } if (BroadcastIdx < 0) return SDValue(); assert(BroadcastIdx < (int)Mask.size() && "We only expect to be called with " "a sorted mask where the broadcast " "comes from V1."); // Go up the chain of (vector) values to find a scalar load that we can // combine with the broadcast. SDValue V = V1; for (;;) { switch (V.getOpcode()) { case ISD::BITCAST: { // Peek through bitcasts as long as BroadcastIdx can be adjusted. SDValue VSrc = V.getOperand(0); unsigned NumEltBits = V.getScalarValueSizeInBits(); unsigned NumSrcBits = VSrc.getScalarValueSizeInBits(); if ((NumEltBits % NumSrcBits) == 0) BroadcastIdx *= (NumEltBits / NumSrcBits); else if ((NumSrcBits % NumEltBits) == 0 && (BroadcastIdx % (NumSrcBits / NumEltBits)) == 0) BroadcastIdx /= (NumSrcBits / NumEltBits); else break; V = VSrc; continue; } case ISD::CONCAT_VECTORS: { int OperandSize = V.getOperand(0).getSimpleValueType().getVectorNumElements(); V = V.getOperand(BroadcastIdx / OperandSize); BroadcastIdx %= OperandSize; continue; } case ISD::INSERT_SUBVECTOR: { SDValue VOuter = V.getOperand(0), VInner = V.getOperand(1); auto ConstantIdx = dyn_cast(V.getOperand(2)); if (!ConstantIdx) break; int BeginIdx = (int)ConstantIdx->getZExtValue(); int EndIdx = BeginIdx + (int)VInner.getSimpleValueType().getVectorNumElements(); if (BroadcastIdx >= BeginIdx && BroadcastIdx < EndIdx) { BroadcastIdx -= BeginIdx; V = VInner; } else { V = VOuter; } continue; } } break; } // Ensure the source vector and BroadcastIdx are for a suitable type. if (VT.getScalarSizeInBits() != V.getScalarValueSizeInBits()) { unsigned NumEltBits = VT.getScalarSizeInBits(); unsigned NumSrcBits = V.getScalarValueSizeInBits(); if ((NumSrcBits % NumEltBits) == 0) BroadcastIdx *= (NumSrcBits / NumEltBits); else if ((NumEltBits % NumSrcBits) == 0 && (BroadcastIdx % (NumEltBits / NumSrcBits)) == 0) BroadcastIdx /= (NumEltBits / NumSrcBits); else return SDValue(); unsigned NumSrcElts = V.getValueSizeInBits() / NumEltBits; MVT SrcVT = MVT::getVectorVT(VT.getScalarType(), NumSrcElts); V = DAG.getBitcast(SrcVT, V); } // Check if this is a broadcast of a scalar. We special case lowering // for scalars so that we can more effectively fold with loads. // First, look through bitcast: if the original value has a larger element // type than the shuffle, the broadcast element is in essence truncated. // Make that explicit to ease folding. if (V.getOpcode() == ISD::BITCAST && VT.isInteger()) if (SDValue TruncBroadcast = lowerVectorShuffleAsTruncBroadcast( DL, VT, V.getOperand(0), BroadcastIdx, Subtarget, DAG)) return TruncBroadcast; MVT BroadcastVT = VT; // Peek through any bitcast (only useful for loads). SDValue BC = peekThroughBitcasts(V); // Also check the simpler case, where we can directly reuse the scalar. if ((V.getOpcode() == ISD::BUILD_VECTOR && V.hasOneUse()) || (V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)) { V = V.getOperand(BroadcastIdx); // If we can't broadcast from a register, check that the input is a load. if (!BroadcastFromReg && !isShuffleFoldableLoad(V)) return SDValue(); } else if (MayFoldLoad(BC) && !cast(BC)->isVolatile()) { // 32-bit targets need to load i64 as a f64 and then bitcast the result. if (!Subtarget.is64Bit() && VT.getScalarType() == MVT::i64) { BroadcastVT = MVT::getVectorVT(MVT::f64, VT.getVectorNumElements()); Opcode = (BroadcastVT.is128BitVector() && !Subtarget.hasAVX2()) ? X86ISD::MOVDDUP : Opcode; } // If we are broadcasting a load that is only used by the shuffle // then we can reduce the vector load to the broadcasted scalar load. LoadSDNode *Ld = cast(BC); SDValue BaseAddr = Ld->getOperand(1); EVT SVT = BroadcastVT.getScalarType(); unsigned Offset = BroadcastIdx * SVT.getStoreSize(); SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL); V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr, DAG.getMachineFunction().getMachineMemOperand( Ld->getMemOperand(), Offset, SVT.getStoreSize())); DAG.makeEquivalentMemoryOrdering(Ld, V); } else if (!BroadcastFromReg) { // We can't broadcast from a vector register. return SDValue(); } else if (BroadcastIdx != 0) { // We can only broadcast from the zero-element of a vector register, // but it can be advantageous to broadcast from the zero-element of a // subvector. if (!VT.is256BitVector() && !VT.is512BitVector()) return SDValue(); // VPERMQ/VPERMPD can perform the cross-lane shuffle directly. if (VT == MVT::v4f64 || VT == MVT::v4i64) return SDValue(); // Only broadcast the zero-element of a 128-bit subvector. unsigned EltSize = VT.getScalarSizeInBits(); if (((BroadcastIdx * EltSize) % 128) != 0) return SDValue(); // The shuffle input might have been a bitcast we looked through; look at // the original input vector. Emit an EXTRACT_SUBVECTOR of that type; we'll // later bitcast it to BroadcastVT. assert(V.getScalarValueSizeInBits() == BroadcastVT.getScalarSizeInBits() && "Unexpected vector element size"); assert((V.getValueSizeInBits() == 256 || V.getValueSizeInBits() == 512) && "Unexpected vector size"); V = extract128BitVector(V, BroadcastIdx, DAG, DL); } if (Opcode == X86ISD::MOVDDUP && !V.getValueType().isVector()) V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, DAG.getBitcast(MVT::f64, V)); // Bitcast back to the same scalar type as BroadcastVT. MVT SrcVT = V.getSimpleValueType(); if (SrcVT.getScalarType() != BroadcastVT.getScalarType()) { assert(SrcVT.getScalarSizeInBits() == BroadcastVT.getScalarSizeInBits() && "Unexpected vector element size"); if (SrcVT.isVector()) { unsigned NumSrcElts = SrcVT.getVectorNumElements(); SrcVT = MVT::getVectorVT(BroadcastVT.getScalarType(), NumSrcElts); } else { SrcVT = BroadcastVT.getScalarType(); } V = DAG.getBitcast(SrcVT, V); } // 32-bit targets need to load i64 as a f64 and then bitcast the result. if (!Subtarget.is64Bit() && SrcVT == MVT::i64) { V = DAG.getBitcast(MVT::f64, V); unsigned NumBroadcastElts = BroadcastVT.getVectorNumElements(); BroadcastVT = MVT::getVectorVT(MVT::f64, NumBroadcastElts); } // We only support broadcasting from 128-bit vectors to minimize the // number of patterns we need to deal with in isel. So extract down to // 128-bits, removing as many bitcasts as possible. if (SrcVT.getSizeInBits() > 128) { MVT ExtVT = MVT::getVectorVT(SrcVT.getScalarType(), 128 / SrcVT.getScalarSizeInBits()); V = extract128BitVector(peekThroughBitcasts(V), 0, DAG, DL); V = DAG.getBitcast(ExtVT, V); } return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, BroadcastVT, V)); } // Check for whether we can use INSERTPS to perform the shuffle. We only use // INSERTPS when the V1 elements are already in the correct locations // because otherwise we can just always use two SHUFPS instructions which // are much smaller to encode than a SHUFPS and an INSERTPS. We can also // perform INSERTPS if a single V1 element is out of place and all V2 // elements are zeroable. static bool matchVectorShuffleAsInsertPS(SDValue &V1, SDValue &V2, unsigned &InsertPSMask, const APInt &Zeroable, ArrayRef Mask, SelectionDAG &DAG) { assert(V1.getSimpleValueType().is128BitVector() && "Bad operand type!"); assert(V2.getSimpleValueType().is128BitVector() && "Bad operand type!"); assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); // Attempt to match INSERTPS with one element from VA or VB being // inserted into VA (or undef). If successful, V1, V2 and InsertPSMask // are updated. auto matchAsInsertPS = [&](SDValue VA, SDValue VB, ArrayRef CandidateMask) { unsigned ZMask = 0; int VADstIndex = -1; int VBDstIndex = -1; bool VAUsedInPlace = false; for (int i = 0; i < 4; ++i) { // Synthesize a zero mask from the zeroable elements (includes undefs). if (Zeroable[i]) { ZMask |= 1 << i; continue; } // Flag if we use any VA inputs in place. if (i == CandidateMask[i]) { VAUsedInPlace = true; continue; } // We can only insert a single non-zeroable element. if (VADstIndex >= 0 || VBDstIndex >= 0) return false; if (CandidateMask[i] < 4) { // VA input out of place for insertion. VADstIndex = i; } else { // VB input for insertion. VBDstIndex = i; } } // Don't bother if we have no (non-zeroable) element for insertion. if (VADstIndex < 0 && VBDstIndex < 0) return false; // Determine element insertion src/dst indices. The src index is from the // start of the inserted vector, not the start of the concatenated vector. unsigned VBSrcIndex = 0; if (VADstIndex >= 0) { // If we have a VA input out of place, we use VA as the V2 element // insertion and don't use the original V2 at all. VBSrcIndex = CandidateMask[VADstIndex]; VBDstIndex = VADstIndex; VB = VA; } else { VBSrcIndex = CandidateMask[VBDstIndex] - 4; } // If no V1 inputs are used in place, then the result is created only from // the zero mask and the V2 insertion - so remove V1 dependency. if (!VAUsedInPlace) VA = DAG.getUNDEF(MVT::v4f32); // Update V1, V2 and InsertPSMask accordingly. V1 = VA; V2 = VB; // Insert the V2 element into the desired position. InsertPSMask = VBSrcIndex << 6 | VBDstIndex << 4 | ZMask; assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!"); return true; }; if (matchAsInsertPS(V1, V2, Mask)) return true; // Commute and try again. SmallVector CommutedMask(Mask.begin(), Mask.end()); ShuffleVectorSDNode::commuteMask(CommutedMask); if (matchAsInsertPS(V2, V1, CommutedMask)) return true; return false; } static SDValue lowerVectorShuffleAsInsertPS(const SDLoc &DL, SDValue V1, SDValue V2, ArrayRef Mask, const APInt &Zeroable, SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!"); // Attempt to match the insertps pattern. unsigned InsertPSMask; if (!matchVectorShuffleAsInsertPS(V1, V2, InsertPSMask, Zeroable, Mask, DAG)) return SDValue(); // Insert the V2 element into the desired position. return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2, DAG.getConstant(InsertPSMask, DL, MVT::i8)); } /// Try to lower a shuffle as a permute of the inputs followed by an /// UNPCK instruction. /// /// This specifically targets cases where we end up with alternating between /// the two inputs, and so can permute them into something that feeds a single /// UNPCK instruction. Note that this routine only targets integer vectors /// because for floating point vectors we have a generalized SHUFPS lowering /// strategy that handles everything that doesn't *exactly* match an unpack, /// making this clever lowering unnecessary. static SDValue lowerVectorShuffleAsPermuteAndUnpack( const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(!VT.isFloatingPoint() && "This routine only supports integer vectors."); assert(VT.is128BitVector() && "This routine only works on 128-bit vectors."); assert(!V2.isUndef() && "This routine should only be used when blending two inputs."); assert(Mask.size() >= 2 && "Single element masks are invalid."); int Size = Mask.size(); int NumLoInputs = count_if(Mask, [Size](int M) { return M >= 0 && M % Size < Size / 2; }); int NumHiInputs = count_if(Mask, [Size](int M) { return M % Size >= Size / 2; }); bool UnpackLo = NumLoInputs >= NumHiInputs; auto TryUnpack = [&](int ScalarSize, int Scale) { SmallVector V1Mask((unsigned)Size, -1); SmallVector V2Mask((unsigned)Size, -1); for (int i = 0; i < Size; ++i) { if (Mask[i] < 0) continue; // Each element of the unpack contains Scale elements from this mask. int UnpackIdx = i / Scale; // We only handle the case where V1 feeds the first slots of the unpack. // We rely on canonicalization to ensure this is the case. if ((UnpackIdx % 2 == 0) != (Mask[i] < Size)) return SDValue(); // Setup the mask for this input. The indexing is tricky as we have to // handle the unpack stride. SmallVectorImpl &VMask = (UnpackIdx % 2 == 0) ? V1Mask : V2Mask; VMask[(UnpackIdx / 2) * Scale + i % Scale + (UnpackLo ? 0 : Size / 2)] = Mask[i] % Size; } // If we will have to shuffle both inputs to use the unpack, check whether // we can just unpack first and shuffle the result. If so, skip this unpack. if ((NumLoInputs == 0 || NumHiInputs == 0) && !isNoopShuffleMask(V1Mask) && !isNoopShuffleMask(V2Mask)) return SDValue(); // Shuffle the inputs into place. V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask); V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask); // Cast the inputs to the type we will use to unpack them. MVT UnpackVT = MVT::getVectorVT(MVT::getIntegerVT(ScalarSize), Size / Scale); V1 = DAG.getBitcast(UnpackVT, V1); V2 = DAG.getBitcast(UnpackVT, V2); // Unpack the inputs and cast the result back to the desired type. return DAG.getBitcast( VT, DAG.getNode(UnpackLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL, UnpackVT, V1, V2)); }; // We try each unpack from the largest to the smallest to try and find one // that fits this mask. int OrigScalarSize = VT.getScalarSizeInBits(); for (int ScalarSize = 64; ScalarSize >= OrigScalarSize; ScalarSize /= 2) if (SDValue Unpack = TryUnpack(ScalarSize, ScalarSize / OrigScalarSize)) return Unpack; // If we're shuffling with a zero vector then we're better off not doing // VECTOR_SHUFFLE(UNPCK()) as we lose track of those zero elements. if (ISD::isBuildVectorAllZeros(V1.getNode()) || ISD::isBuildVectorAllZeros(V2.getNode())) return SDValue(); // If none of the unpack-rooted lowerings worked (or were profitable) try an // initial unpack. if (NumLoInputs == 0 || NumHiInputs == 0) { assert((NumLoInputs > 0 || NumHiInputs > 0) && "We have to have *some* inputs!"); int HalfOffset = NumLoInputs == 0 ? Size / 2 : 0; // FIXME: We could consider the total complexity of the permute of each // possible unpacking. Or at the least we should consider how many // half-crossings are created. // FIXME: We could consider commuting the unpacks. SmallVector PermMask((unsigned)Size, -1); for (int i = 0; i < Size; ++i) { if (Mask[i] < 0) continue; assert(Mask[i] % Size >= HalfOffset && "Found input from wrong half!"); PermMask[i] = 2 * ((Mask[i] % Size) - HalfOffset) + (Mask[i] < Size ? 0 : 1); } return DAG.getVectorShuffle( VT, DL, DAG.getNode(NumLoInputs == 0 ? X86ISD::UNPCKH : X86ISD::UNPCKL, DL, VT, V1, V2), DAG.getUNDEF(VT), PermMask); } return SDValue(); } /// Handle lowering of 2-lane 64-bit floating point shuffles. /// /// This is the basis function for the 2-lane 64-bit shuffles as we have full /// support for floating point shuffles but not integer shuffles. These /// instructions will incur a domain crossing penalty on some chips though so /// it is better to avoid lowering through this for integer vectors where /// possible. static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v2f64 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v2f64 && "Bad operand type!"); assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!"); if (V2.isUndef()) { // Check for being able to broadcast a single element. if (SDValue Broadcast = lowerVectorShuffleAsBroadcast( DL, MVT::v2f64, V1, V2, Mask, Subtarget, DAG)) return Broadcast; // Straight shuffle of a single input vector. Simulate this by using the // single input as both of the "inputs" to this instruction.. unsigned SHUFPDMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1); if (Subtarget.hasAVX()) { // If we have AVX, we can use VPERMILPS which will allow folding a load // into the shuffle. return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v2f64, V1, DAG.getConstant(SHUFPDMask, DL, MVT::i8)); } return DAG.getNode( X86ISD::SHUFP, DL, MVT::v2f64, Mask[0] == SM_SentinelUndef ? DAG.getUNDEF(MVT::v2f64) : V1, Mask[1] == SM_SentinelUndef ? DAG.getUNDEF(MVT::v2f64) : V1, DAG.getConstant(SHUFPDMask, DL, MVT::i8)); } assert(Mask[0] >= 0 && "No undef lanes in multi-input v2 shuffles!"); assert(Mask[1] >= 0 && "No undef lanes in multi-input v2 shuffles!"); assert(Mask[0] < 2 && "We sort V1 to be the first input."); assert(Mask[1] >= 2 && "We sort V2 to be the second input."); // When loading a scalar and then shuffling it into a vector we can often do // the insertion cheaply. if (SDValue Insertion = lowerVectorShuffleAsElementInsertion( DL, MVT::v2f64, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Insertion; // Try inverting the insertion since for v2 masks it is easy to do and we // can't reliably sort the mask one way or the other. int InverseMask[2] = {Mask[0] < 0 ? -1 : (Mask[0] ^ 2), Mask[1] < 0 ? -1 : (Mask[1] ^ 2)}; if (SDValue Insertion = lowerVectorShuffleAsElementInsertion( DL, MVT::v2f64, V2, V1, InverseMask, Zeroable, Subtarget, DAG)) return Insertion; // Try to use one of the special instruction patterns to handle two common // blend patterns if a zero-blend above didn't work. if (isShuffleEquivalent(V1, V2, Mask, {0, 3}) || isShuffleEquivalent(V1, V2, Mask, {1, 3})) if (SDValue V1S = getScalarValueForVectorElement(V1, Mask[0], DAG)) // We can either use a special instruction to load over the low double or // to move just the low double. return DAG.getNode( X86ISD::MOVSD, DL, MVT::v2f64, V2, DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, V1S)); if (Subtarget.hasSSE41()) if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2f64, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Blend; // Use dedicated unpack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithUNPCK(DL, MVT::v2f64, Mask, V1, V2, DAG)) return V; unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1); return DAG.getNode(X86ISD::SHUFP, DL, MVT::v2f64, V1, V2, DAG.getConstant(SHUFPDMask, DL, MVT::i8)); } /// Handle lowering of 2-lane 64-bit integer shuffles. /// /// Tries to lower a 2-lane 64-bit shuffle using shuffle operations provided by /// the integer unit to minimize domain crossing penalties. However, for blends /// it falls back to the floating point shuffle operation with appropriate bit /// casting. static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v2i64 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v2i64 && "Bad operand type!"); assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!"); if (V2.isUndef()) { // Check for being able to broadcast a single element. if (SDValue Broadcast = lowerVectorShuffleAsBroadcast( DL, MVT::v2i64, V1, V2, Mask, Subtarget, DAG)) return Broadcast; // Straight shuffle of a single input vector. For everything from SSE2 // onward this has a single fast instruction with no scary immediates. // We have to map the mask as it is actually a v4i32 shuffle instruction. V1 = DAG.getBitcast(MVT::v4i32, V1); int WidenedMask[4] = { std::max(Mask[0], 0) * 2, std::max(Mask[0], 0) * 2 + 1, std::max(Mask[1], 0) * 2, std::max(Mask[1], 0) * 2 + 1}; return DAG.getBitcast( MVT::v2i64, DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1, getV4X86ShuffleImm8ForMask(WidenedMask, DL, DAG))); } assert(Mask[0] != -1 && "No undef lanes in multi-input v2 shuffles!"); assert(Mask[1] != -1 && "No undef lanes in multi-input v2 shuffles!"); assert(Mask[0] < 2 && "We sort V1 to be the first input."); assert(Mask[1] >= 2 && "We sort V2 to be the second input."); // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v2i64, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Shift; // When loading a scalar and then shuffling it into a vector we can often do // the insertion cheaply. if (SDValue Insertion = lowerVectorShuffleAsElementInsertion( DL, MVT::v2i64, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Insertion; // Try inverting the insertion since for v2 masks it is easy to do and we // can't reliably sort the mask one way or the other. int InverseMask[2] = {Mask[0] ^ 2, Mask[1] ^ 2}; if (SDValue Insertion = lowerVectorShuffleAsElementInsertion( DL, MVT::v2i64, V2, V1, InverseMask, Zeroable, Subtarget, DAG)) return Insertion; // We have different paths for blend lowering, but they all must use the // *exact* same predicate. bool IsBlendSupported = Subtarget.hasSSE41(); if (IsBlendSupported) if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Blend; // Use dedicated unpack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithUNPCK(DL, MVT::v2i64, Mask, V1, V2, DAG)) return V; // Try to use byte rotation instructions. // Its more profitable for pre-SSSE3 to use shuffles/unpacks. if (Subtarget.hasSSSE3()) { if (Subtarget.hasVLX()) if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v2i64, V1, V2, Mask, Subtarget, DAG)) return Rotate; if (SDValue Rotate = lowerVectorShuffleAsByteRotate( DL, MVT::v2i64, V1, V2, Mask, Subtarget, DAG)) return Rotate; } // If we have direct support for blends, we should lower by decomposing into // a permute. That will be faster than the domain cross. if (IsBlendSupported) return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v2i64, V1, V2, Mask, Subtarget, DAG); // We implement this with SHUFPD which is pretty lame because it will likely // incur 2 cycles of stall for integer vectors on Nehalem and older chips. // However, all the alternatives are still more cycles and newer chips don't // have this problem. It would be really nice if x86 had better shuffles here. V1 = DAG.getBitcast(MVT::v2f64, V1); V2 = DAG.getBitcast(MVT::v2f64, V2); return DAG.getBitcast(MVT::v2i64, DAG.getVectorShuffle(MVT::v2f64, DL, V1, V2, Mask)); } /// Test whether this can be lowered with a single SHUFPS instruction. /// /// This is used to disable more specialized lowerings when the shufps lowering /// will happen to be efficient. static bool isSingleSHUFPSMask(ArrayRef Mask) { // This routine only handles 128-bit shufps. assert(Mask.size() == 4 && "Unsupported mask size!"); assert(Mask[0] >= -1 && Mask[0] < 8 && "Out of bound mask element!"); assert(Mask[1] >= -1 && Mask[1] < 8 && "Out of bound mask element!"); assert(Mask[2] >= -1 && Mask[2] < 8 && "Out of bound mask element!"); assert(Mask[3] >= -1 && Mask[3] < 8 && "Out of bound mask element!"); // To lower with a single SHUFPS we need to have the low half and high half // each requiring a single input. if (Mask[0] >= 0 && Mask[1] >= 0 && (Mask[0] < 4) != (Mask[1] < 4)) return false; if (Mask[2] >= 0 && Mask[3] >= 0 && (Mask[2] < 4) != (Mask[3] < 4)) return false; return true; } /// Lower a vector shuffle using the SHUFPS instruction. /// /// This is a helper routine dedicated to lowering vector shuffles using SHUFPS. /// It makes no assumptions about whether this is the *best* lowering, it simply /// uses it. static SDValue lowerVectorShuffleWithSHUFPS(const SDLoc &DL, MVT VT, ArrayRef Mask, SDValue V1, SDValue V2, SelectionDAG &DAG) { SDValue LowV = V1, HighV = V2; int NewMask[4] = {Mask[0], Mask[1], Mask[2], Mask[3]}; int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; }); if (NumV2Elements == 1) { int V2Index = find_if(Mask, [](int M) { return M >= 4; }) - Mask.begin(); // Compute the index adjacent to V2Index and in the same half by toggling // the low bit. int V2AdjIndex = V2Index ^ 1; if (Mask[V2AdjIndex] < 0) { // Handles all the cases where we have a single V2 element and an undef. // This will only ever happen in the high lanes because we commute the // vector otherwise. if (V2Index < 2) std::swap(LowV, HighV); NewMask[V2Index] -= 4; } else { // Handle the case where the V2 element ends up adjacent to a V1 element. // To make this work, blend them together as the first step. int V1Index = V2AdjIndex; int BlendMask[4] = {Mask[V2Index] - 4, 0, Mask[V1Index], 0}; V2 = DAG.getNode(X86ISD::SHUFP, DL, VT, V2, V1, getV4X86ShuffleImm8ForMask(BlendMask, DL, DAG)); // Now proceed to reconstruct the final blend as we have the necessary // high or low half formed. if (V2Index < 2) { LowV = V2; HighV = V1; } else { HighV = V2; } NewMask[V1Index] = 2; // We put the V1 element in V2[2]. NewMask[V2Index] = 0; // We shifted the V2 element into V2[0]. } } else if (NumV2Elements == 2) { if (Mask[0] < 4 && Mask[1] < 4) { // Handle the easy case where we have V1 in the low lanes and V2 in the // high lanes. NewMask[2] -= 4; NewMask[3] -= 4; } else if (Mask[2] < 4 && Mask[3] < 4) { // We also handle the reversed case because this utility may get called // when we detect a SHUFPS pattern but can't easily commute the shuffle to // arrange things in the right direction. NewMask[0] -= 4; NewMask[1] -= 4; HighV = V1; LowV = V2; } else { // We have a mixture of V1 and V2 in both low and high lanes. Rather than // trying to place elements directly, just blend them and set up the final // shuffle to place them. // The first two blend mask elements are for V1, the second two are for // V2. int BlendMask[4] = {Mask[0] < 4 ? Mask[0] : Mask[1], Mask[2] < 4 ? Mask[2] : Mask[3], (Mask[0] >= 4 ? Mask[0] : Mask[1]) - 4, (Mask[2] >= 4 ? Mask[2] : Mask[3]) - 4}; V1 = DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2, getV4X86ShuffleImm8ForMask(BlendMask, DL, DAG)); // Now we do a normal shuffle of V1 by giving V1 as both operands to // a blend. LowV = HighV = V1; NewMask[0] = Mask[0] < 4 ? 0 : 2; NewMask[1] = Mask[0] < 4 ? 2 : 0; NewMask[2] = Mask[2] < 4 ? 1 : 3; NewMask[3] = Mask[2] < 4 ? 3 : 1; } } return DAG.getNode(X86ISD::SHUFP, DL, VT, LowV, HighV, getV4X86ShuffleImm8ForMask(NewMask, DL, DAG)); } /// Lower 4-lane 32-bit floating point shuffles. /// /// Uses instructions exclusively from the floating point unit to minimize /// domain crossing penalties, as these are sufficient to implement all v4f32 /// shuffles. static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!"); assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; }); if (NumV2Elements == 0) { // Check for being able to broadcast a single element. if (SDValue Broadcast = lowerVectorShuffleAsBroadcast( DL, MVT::v4f32, V1, V2, Mask, Subtarget, DAG)) return Broadcast; // Use even/odd duplicate instructions for masks that match their pattern. if (Subtarget.hasSSE3()) { if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 2, 2})) return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v4f32, V1); if (isShuffleEquivalent(V1, V2, Mask, {1, 1, 3, 3})) return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v4f32, V1); } if (Subtarget.hasAVX()) { // If we have AVX, we can use VPERMILPS which will allow folding a load // into the shuffle. return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f32, V1, getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); } // Use MOVLHPS/MOVHLPS to simulate unary shuffles. These are only valid // in SSE1 because otherwise they are widened to v2f64 and never get here. if (!Subtarget.hasSSE2()) { if (isShuffleEquivalent(V1, V2, Mask, {0, 1, 0, 1})) return DAG.getNode(X86ISD::MOVLHPS, DL, MVT::v4f32, V1, V1); if (isShuffleEquivalent(V1, V2, Mask, {2, 3, 2, 3})) return DAG.getNode(X86ISD::MOVHLPS, DL, MVT::v4f32, V1, V1); } // Otherwise, use a straight shuffle of a single input vector. We pass the // input vector to both operands to simulate this with a SHUFPS. return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V1, V1, getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); } // There are special ways we can lower some single-element blends. However, we // have custom ways we can lower more complex single-element blends below that // we defer to if both this and BLENDPS fail to match, so restrict this to // when the V2 input is targeting element 0 of the mask -- that is the fast // case here. if (NumV2Elements == 1 && Mask[0] >= 4) if (SDValue V = lowerVectorShuffleAsElementInsertion( DL, MVT::v4f32, V1, V2, Mask, Zeroable, Subtarget, DAG)) return V; if (Subtarget.hasSSE41()) { if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f32, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Blend; // Use INSERTPS if we can complete the shuffle efficiently. if (SDValue V = lowerVectorShuffleAsInsertPS(DL, V1, V2, Mask, Zeroable, DAG)) return V; if (!isSingleSHUFPSMask(Mask)) if (SDValue BlendPerm = lowerVectorShuffleAsBlendAndPermute( DL, MVT::v4f32, V1, V2, Mask, DAG)) return BlendPerm; } // Use low/high mov instructions. These are only valid in SSE1 because // otherwise they are widened to v2f64 and never get here. if (!Subtarget.hasSSE2()) { if (isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5})) return DAG.getNode(X86ISD::MOVLHPS, DL, MVT::v4f32, V1, V2); if (isShuffleEquivalent(V1, V2, Mask, {2, 3, 6, 7})) return DAG.getNode(X86ISD::MOVHLPS, DL, MVT::v4f32, V2, V1); } // Use dedicated unpack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithUNPCK(DL, MVT::v4f32, Mask, V1, V2, DAG)) return V; // Otherwise fall back to a SHUFPS lowering strategy. return lowerVectorShuffleWithSHUFPS(DL, MVT::v4f32, Mask, V1, V2, DAG); } /// Lower 4-lane i32 vector shuffles. /// /// We try to handle these with integer-domain shuffles where we can, but for /// blends we use the floating point domain blend instructions. static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v4i32 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v4i32 && "Bad operand type!"); assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); // Whenever we can lower this as a zext, that instruction is strictly faster // than any alternative. It also allows us to fold memory operands into the // shuffle in many cases. if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend( DL, MVT::v4i32, V1, V2, Mask, Zeroable, Subtarget, DAG)) return ZExt; int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; }); if (NumV2Elements == 0) { // Check for being able to broadcast a single element. if (SDValue Broadcast = lowerVectorShuffleAsBroadcast( DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG)) return Broadcast; // Straight shuffle of a single input vector. For everything from SSE2 // onward this has a single fast instruction with no scary immediates. // We coerce the shuffle pattern to be compatible with UNPCK instructions // but we aren't actually going to use the UNPCK instruction because doing // so prevents folding a load into this instruction or making a copy. const int UnpackLoMask[] = {0, 0, 1, 1}; const int UnpackHiMask[] = {2, 2, 3, 3}; if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 1, 1})) Mask = UnpackLoMask; else if (isShuffleEquivalent(V1, V2, Mask, {2, 2, 3, 3})) Mask = UnpackHiMask; return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1, getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); } // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v4i32, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Shift; // There are special ways we can lower some single-element blends. if (NumV2Elements == 1) if (SDValue V = lowerVectorShuffleAsElementInsertion( DL, MVT::v4i32, V1, V2, Mask, Zeroable, Subtarget, DAG)) return V; // We have different paths for blend lowering, but they all must use the // *exact* same predicate. bool IsBlendSupported = Subtarget.hasSSE41(); if (IsBlendSupported) if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Blend; if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, MVT::v4i32, V1, V2, Mask, Zeroable, DAG)) return Masked; // Use dedicated unpack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithUNPCK(DL, MVT::v4i32, Mask, V1, V2, DAG)) return V; // Try to use byte rotation instructions. // Its more profitable for pre-SSSE3 to use shuffles/unpacks. if (Subtarget.hasSSSE3()) { if (Subtarget.hasVLX()) if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG)) return Rotate; if (SDValue Rotate = lowerVectorShuffleAsByteRotate( DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG)) return Rotate; } // Assume that a single SHUFPS is faster than an alternative sequence of // multiple instructions (even if the CPU has a domain penalty). // If some CPU is harmed by the domain switch, we can fix it in a later pass. if (!isSingleSHUFPSMask(Mask)) { // If we have direct support for blends, we should lower by decomposing into // a permute. That will be faster than the domain cross. if (IsBlendSupported) return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG); // Try to lower by permuting the inputs into an unpack instruction. if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack( DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG)) return Unpack; } // We implement this with SHUFPS because it can blend from two vectors. // Because we're going to eventually use SHUFPS, we use SHUFPS even to build // up the inputs, bypassing domain shift penalties that we would incur if we // directly used PSHUFD on Nehalem and older. For newer chips, this isn't // relevant. SDValue CastV1 = DAG.getBitcast(MVT::v4f32, V1); SDValue CastV2 = DAG.getBitcast(MVT::v4f32, V2); SDValue ShufPS = DAG.getVectorShuffle(MVT::v4f32, DL, CastV1, CastV2, Mask); return DAG.getBitcast(MVT::v4i32, ShufPS); } /// Lowering of single-input v8i16 shuffles is the cornerstone of SSE2 /// shuffle lowering, and the most complex part. /// /// The lowering strategy is to try to form pairs of input lanes which are /// targeted at the same half of the final vector, and then use a dword shuffle /// to place them onto the right half, and finally unpack the paired lanes into /// their final position. /// /// The exact breakdown of how to form these dword pairs and align them on the /// correct sides is really tricky. See the comments within the function for /// more of the details. /// /// This code also handles repeated 128-bit lanes of v8i16 shuffles, but each /// lane must shuffle the *exact* same way. In fact, you must pass a v8 Mask to /// this routine for it to work correctly. To shuffle a 256-bit or 512-bit i16 /// vector, form the analogous 128-bit 8-element Mask. static SDValue lowerV8I16GeneralSingleInputVectorShuffle( const SDLoc &DL, MVT VT, SDValue V, MutableArrayRef Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(VT.getVectorElementType() == MVT::i16 && "Bad input type!"); MVT PSHUFDVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2); assert(Mask.size() == 8 && "Shuffle mask length doesn't match!"); MutableArrayRef LoMask = Mask.slice(0, 4); MutableArrayRef HiMask = Mask.slice(4, 4); // Attempt to directly match PSHUFLW or PSHUFHW. if (isUndefOrInRange(LoMask, 0, 4) && isSequentialOrUndefInRange(HiMask, 0, 4, 4)) { return DAG.getNode(X86ISD::PSHUFLW, DL, VT, V, getV4X86ShuffleImm8ForMask(LoMask, DL, DAG)); } if (isUndefOrInRange(HiMask, 4, 8) && isSequentialOrUndefInRange(LoMask, 0, 4, 0)) { for (int i = 0; i != 4; ++i) HiMask[i] = (HiMask[i] < 0 ? HiMask[i] : (HiMask[i] - 4)); return DAG.getNode(X86ISD::PSHUFHW, DL, VT, V, getV4X86ShuffleImm8ForMask(HiMask, DL, DAG)); } SmallVector LoInputs; copy_if(LoMask, std::back_inserter(LoInputs), [](int M) { return M >= 0; }); array_pod_sort(LoInputs.begin(), LoInputs.end()); LoInputs.erase(std::unique(LoInputs.begin(), LoInputs.end()), LoInputs.end()); SmallVector HiInputs; copy_if(HiMask, std::back_inserter(HiInputs), [](int M) { return M >= 0; }); array_pod_sort(HiInputs.begin(), HiInputs.end()); HiInputs.erase(std::unique(HiInputs.begin(), HiInputs.end()), HiInputs.end()); int NumLToL = std::lower_bound(LoInputs.begin(), LoInputs.end(), 4) - LoInputs.begin(); int NumHToL = LoInputs.size() - NumLToL; int NumLToH = std::lower_bound(HiInputs.begin(), HiInputs.end(), 4) - HiInputs.begin(); int NumHToH = HiInputs.size() - NumLToH; MutableArrayRef LToLInputs(LoInputs.data(), NumLToL); MutableArrayRef LToHInputs(HiInputs.data(), NumLToH); MutableArrayRef HToLInputs(LoInputs.data() + NumLToL, NumHToL); MutableArrayRef HToHInputs(HiInputs.data() + NumLToH, NumHToH); // If we are shuffling values from one half - check how many different DWORD // pairs we need to create. If only 1 or 2 then we can perform this as a // PSHUFLW/PSHUFHW + PSHUFD instead of the PSHUFD+PSHUFLW+PSHUFHW chain below. auto ShuffleDWordPairs = [&](ArrayRef PSHUFHalfMask, ArrayRef PSHUFDMask, unsigned ShufWOp) { V = DAG.getNode(ShufWOp, DL, VT, V, getV4X86ShuffleImm8ForMask(PSHUFHalfMask, DL, DAG)); V = DAG.getBitcast(PSHUFDVT, V); V = DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, V, getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)); return DAG.getBitcast(VT, V); }; if ((NumHToL + NumHToH) == 0 || (NumLToL + NumLToH) == 0) { int PSHUFDMask[4] = { -1, -1, -1, -1 }; SmallVector, 4> DWordPairs; int DOffset = ((NumHToL + NumHToH) == 0 ? 0 : 2); // Collect the different DWORD pairs. for (int DWord = 0; DWord != 4; ++DWord) { int M0 = Mask[2 * DWord + 0]; int M1 = Mask[2 * DWord + 1]; M0 = (M0 >= 0 ? M0 % 4 : M0); M1 = (M1 >= 0 ? M1 % 4 : M1); if (M0 < 0 && M1 < 0) continue; bool Match = false; for (int j = 0, e = DWordPairs.size(); j < e; ++j) { auto &DWordPair = DWordPairs[j]; if ((M0 < 0 || isUndefOrEqual(DWordPair.first, M0)) && (M1 < 0 || isUndefOrEqual(DWordPair.second, M1))) { DWordPair.first = (M0 >= 0 ? M0 : DWordPair.first); DWordPair.second = (M1 >= 0 ? M1 : DWordPair.second); PSHUFDMask[DWord] = DOffset + j; Match = true; break; } } if (!Match) { PSHUFDMask[DWord] = DOffset + DWordPairs.size(); DWordPairs.push_back(std::make_pair(M0, M1)); } } if (DWordPairs.size() <= 2) { DWordPairs.resize(2, std::make_pair(-1, -1)); int PSHUFHalfMask[4] = {DWordPairs[0].first, DWordPairs[0].second, DWordPairs[1].first, DWordPairs[1].second}; if ((NumHToL + NumHToH) == 0) return ShuffleDWordPairs(PSHUFHalfMask, PSHUFDMask, X86ISD::PSHUFLW); if ((NumLToL + NumLToH) == 0) return ShuffleDWordPairs(PSHUFHalfMask, PSHUFDMask, X86ISD::PSHUFHW); } } // Simplify the 1-into-3 and 3-into-1 cases with a single pshufd. For all // such inputs we can swap two of the dwords across the half mark and end up // with <=2 inputs to each half in each half. Once there, we can fall through // to the generic code below. For example: // // Input: [a, b, c, d, e, f, g, h] -PSHUFD[0,2,1,3]-> [a, b, e, f, c, d, g, h] // Mask: [0, 1, 2, 7, 4, 5, 6, 3] -----------------> [0, 1, 4, 7, 2, 3, 6, 5] // // However in some very rare cases we have a 1-into-3 or 3-into-1 on one half // and an existing 2-into-2 on the other half. In this case we may have to // pre-shuffle the 2-into-2 half to avoid turning it into a 3-into-1 or // 1-into-3 which could cause us to cycle endlessly fixing each side in turn. // Fortunately, we don't have to handle anything but a 2-into-2 pattern // because any other situation (including a 3-into-1 or 1-into-3 in the other // half than the one we target for fixing) will be fixed when we re-enter this // path. We will also combine away any sequence of PSHUFD instructions that // result into a single instruction. Here is an example of the tricky case: // // Input: [a, b, c, d, e, f, g, h] -PSHUFD[0,2,1,3]-> [a, b, e, f, c, d, g, h] // Mask: [3, 7, 1, 0, 2, 7, 3, 5] -THIS-IS-BAD!!!!-> [5, 7, 1, 0, 4, 7, 5, 3] // // This now has a 1-into-3 in the high half! Instead, we do two shuffles: // // Input: [a, b, c, d, e, f, g, h] PSHUFHW[0,2,1,3]-> [a, b, c, d, e, g, f, h] // Mask: [3, 7, 1, 0, 2, 7, 3, 5] -----------------> [3, 7, 1, 0, 2, 7, 3, 6] // // Input: [a, b, c, d, e, g, f, h] -PSHUFD[0,2,1,3]-> [a, b, e, g, c, d, f, h] // Mask: [3, 7, 1, 0, 2, 7, 3, 6] -----------------> [5, 7, 1, 0, 4, 7, 5, 6] // // The result is fine to be handled by the generic logic. auto balanceSides = [&](ArrayRef AToAInputs, ArrayRef BToAInputs, ArrayRef BToBInputs, ArrayRef AToBInputs, int AOffset, int BOffset) { assert((AToAInputs.size() == 3 || AToAInputs.size() == 1) && "Must call this with A having 3 or 1 inputs from the A half."); assert((BToAInputs.size() == 1 || BToAInputs.size() == 3) && "Must call this with B having 1 or 3 inputs from the B half."); assert(AToAInputs.size() + BToAInputs.size() == 4 && "Must call this with either 3:1 or 1:3 inputs (summing to 4)."); bool ThreeAInputs = AToAInputs.size() == 3; // Compute the index of dword with only one word among the three inputs in // a half by taking the sum of the half with three inputs and subtracting // the sum of the actual three inputs. The difference is the remaining // slot. int ADWord, BDWord; int &TripleDWord = ThreeAInputs ? ADWord : BDWord; int &OneInputDWord = ThreeAInputs ? BDWord : ADWord; int TripleInputOffset = ThreeAInputs ? AOffset : BOffset; ArrayRef TripleInputs = ThreeAInputs ? AToAInputs : BToAInputs; int OneInput = ThreeAInputs ? BToAInputs[0] : AToAInputs[0]; int TripleInputSum = 0 + 1 + 2 + 3 + (4 * TripleInputOffset); int TripleNonInputIdx = TripleInputSum - std::accumulate(TripleInputs.begin(), TripleInputs.end(), 0); TripleDWord = TripleNonInputIdx / 2; // We use xor with one to compute the adjacent DWord to whichever one the // OneInput is in. OneInputDWord = (OneInput / 2) ^ 1; // Check for one tricky case: We're fixing a 3<-1 or a 1<-3 shuffle for AToA // and BToA inputs. If there is also such a problem with the BToB and AToB // inputs, we don't try to fix it necessarily -- we'll recurse and see it in // the next pass. However, if we have a 2<-2 in the BToB and AToB inputs, it // is essential that we don't *create* a 3<-1 as then we might oscillate. if (BToBInputs.size() == 2 && AToBInputs.size() == 2) { // Compute how many inputs will be flipped by swapping these DWords. We // need // to balance this to ensure we don't form a 3-1 shuffle in the other // half. int NumFlippedAToBInputs = std::count(AToBInputs.begin(), AToBInputs.end(), 2 * ADWord) + std::count(AToBInputs.begin(), AToBInputs.end(), 2 * ADWord + 1); int NumFlippedBToBInputs = std::count(BToBInputs.begin(), BToBInputs.end(), 2 * BDWord) + std::count(BToBInputs.begin(), BToBInputs.end(), 2 * BDWord + 1); if ((NumFlippedAToBInputs == 1 && (NumFlippedBToBInputs == 0 || NumFlippedBToBInputs == 2)) || (NumFlippedBToBInputs == 1 && (NumFlippedAToBInputs == 0 || NumFlippedAToBInputs == 2))) { // We choose whether to fix the A half or B half based on whether that // half has zero flipped inputs. At zero, we may not be able to fix it // with that half. We also bias towards fixing the B half because that // will more commonly be the high half, and we have to bias one way. auto FixFlippedInputs = [&V, &DL, &Mask, &DAG](int PinnedIdx, int DWord, ArrayRef Inputs) { int FixIdx = PinnedIdx ^ 1; // The adjacent slot to the pinned slot. bool IsFixIdxInput = is_contained(Inputs, PinnedIdx ^ 1); // Determine whether the free index is in the flipped dword or the // unflipped dword based on where the pinned index is. We use this bit // in an xor to conditionally select the adjacent dword. int FixFreeIdx = 2 * (DWord ^ (PinnedIdx / 2 == DWord)); bool IsFixFreeIdxInput = is_contained(Inputs, FixFreeIdx); if (IsFixIdxInput == IsFixFreeIdxInput) FixFreeIdx += 1; IsFixFreeIdxInput = is_contained(Inputs, FixFreeIdx); assert(IsFixIdxInput != IsFixFreeIdxInput && "We need to be changing the number of flipped inputs!"); int PSHUFHalfMask[] = {0, 1, 2, 3}; std::swap(PSHUFHalfMask[FixFreeIdx % 4], PSHUFHalfMask[FixIdx % 4]); V = DAG.getNode( FixIdx < 4 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW, DL, MVT::getVectorVT(MVT::i16, V.getValueSizeInBits() / 16), V, getV4X86ShuffleImm8ForMask(PSHUFHalfMask, DL, DAG)); for (int &M : Mask) if (M >= 0 && M == FixIdx) M = FixFreeIdx; else if (M >= 0 && M == FixFreeIdx) M = FixIdx; }; if (NumFlippedBToBInputs != 0) { int BPinnedIdx = BToAInputs.size() == 3 ? TripleNonInputIdx : OneInput; FixFlippedInputs(BPinnedIdx, BDWord, BToBInputs); } else { assert(NumFlippedAToBInputs != 0 && "Impossible given predicates!"); int APinnedIdx = ThreeAInputs ? TripleNonInputIdx : OneInput; FixFlippedInputs(APinnedIdx, ADWord, AToBInputs); } } } int PSHUFDMask[] = {0, 1, 2, 3}; PSHUFDMask[ADWord] = BDWord; PSHUFDMask[BDWord] = ADWord; V = DAG.getBitcast( VT, DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, DAG.getBitcast(PSHUFDVT, V), getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); // Adjust the mask to match the new locations of A and B. for (int &M : Mask) if (M >= 0 && M/2 == ADWord) M = 2 * BDWord + M % 2; else if (M >= 0 && M/2 == BDWord) M = 2 * ADWord + M % 2; // Recurse back into this routine to re-compute state now that this isn't // a 3 and 1 problem. return lowerV8I16GeneralSingleInputVectorShuffle(DL, VT, V, Mask, Subtarget, DAG); }; if ((NumLToL == 3 && NumHToL == 1) || (NumLToL == 1 && NumHToL == 3)) return balanceSides(LToLInputs, HToLInputs, HToHInputs, LToHInputs, 0, 4); if ((NumHToH == 3 && NumLToH == 1) || (NumHToH == 1 && NumLToH == 3)) return balanceSides(HToHInputs, LToHInputs, LToLInputs, HToLInputs, 4, 0); // At this point there are at most two inputs to the low and high halves from // each half. That means the inputs can always be grouped into dwords and // those dwords can then be moved to the correct half with a dword shuffle. // We use at most one low and one high word shuffle to collect these paired // inputs into dwords, and finally a dword shuffle to place them. int PSHUFLMask[4] = {-1, -1, -1, -1}; int PSHUFHMask[4] = {-1, -1, -1, -1}; int PSHUFDMask[4] = {-1, -1, -1, -1}; // First fix the masks for all the inputs that are staying in their // original halves. This will then dictate the targets of the cross-half // shuffles. auto fixInPlaceInputs = [&PSHUFDMask](ArrayRef InPlaceInputs, ArrayRef IncomingInputs, MutableArrayRef SourceHalfMask, MutableArrayRef HalfMask, int HalfOffset) { if (InPlaceInputs.empty()) return; if (InPlaceInputs.size() == 1) { SourceHalfMask[InPlaceInputs[0] - HalfOffset] = InPlaceInputs[0] - HalfOffset; PSHUFDMask[InPlaceInputs[0] / 2] = InPlaceInputs[0] / 2; return; } if (IncomingInputs.empty()) { // Just fix all of the in place inputs. for (int Input : InPlaceInputs) { SourceHalfMask[Input - HalfOffset] = Input - HalfOffset; PSHUFDMask[Input / 2] = Input / 2; } return; } assert(InPlaceInputs.size() == 2 && "Cannot handle 3 or 4 inputs!"); SourceHalfMask[InPlaceInputs[0] - HalfOffset] = InPlaceInputs[0] - HalfOffset; // Put the second input next to the first so that they are packed into // a dword. We find the adjacent index by toggling the low bit. int AdjIndex = InPlaceInputs[0] ^ 1; SourceHalfMask[AdjIndex - HalfOffset] = InPlaceInputs[1] - HalfOffset; std::replace(HalfMask.begin(), HalfMask.end(), InPlaceInputs[1], AdjIndex); PSHUFDMask[AdjIndex / 2] = AdjIndex / 2; }; fixInPlaceInputs(LToLInputs, HToLInputs, PSHUFLMask, LoMask, 0); fixInPlaceInputs(HToHInputs, LToHInputs, PSHUFHMask, HiMask, 4); // Now gather the cross-half inputs and place them into a free dword of // their target half. // FIXME: This operation could almost certainly be simplified dramatically to // look more like the 3-1 fixing operation. auto moveInputsToRightHalf = [&PSHUFDMask]( MutableArrayRef IncomingInputs, ArrayRef ExistingInputs, MutableArrayRef SourceHalfMask, MutableArrayRef HalfMask, MutableArrayRef FinalSourceHalfMask, int SourceOffset, int DestOffset) { auto isWordClobbered = [](ArrayRef SourceHalfMask, int Word) { return SourceHalfMask[Word] >= 0 && SourceHalfMask[Word] != Word; }; auto isDWordClobbered = [&isWordClobbered](ArrayRef SourceHalfMask, int Word) { int LowWord = Word & ~1; int HighWord = Word | 1; return isWordClobbered(SourceHalfMask, LowWord) || isWordClobbered(SourceHalfMask, HighWord); }; if (IncomingInputs.empty()) return; if (ExistingInputs.empty()) { // Map any dwords with inputs from them into the right half. for (int Input : IncomingInputs) { // If the source half mask maps over the inputs, turn those into // swaps and use the swapped lane. if (isWordClobbered(SourceHalfMask, Input - SourceOffset)) { if (SourceHalfMask[SourceHalfMask[Input - SourceOffset]] < 0) { SourceHalfMask[SourceHalfMask[Input - SourceOffset]] = Input - SourceOffset; // We have to swap the uses in our half mask in one sweep. for (int &M : HalfMask) if (M == SourceHalfMask[Input - SourceOffset] + SourceOffset) M = Input; else if (M == Input) M = SourceHalfMask[Input - SourceOffset] + SourceOffset; } else { assert(SourceHalfMask[SourceHalfMask[Input - SourceOffset]] == Input - SourceOffset && "Previous placement doesn't match!"); } // Note that this correctly re-maps both when we do a swap and when // we observe the other side of the swap above. We rely on that to // avoid swapping the members of the input list directly. Input = SourceHalfMask[Input - SourceOffset] + SourceOffset; } // Map the input's dword into the correct half. if (PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] < 0) PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] = Input / 2; else assert(PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] == Input / 2 && "Previous placement doesn't match!"); } // And just directly shift any other-half mask elements to be same-half // as we will have mirrored the dword containing the element into the // same position within that half. for (int &M : HalfMask) if (M >= SourceOffset && M < SourceOffset + 4) { M = M - SourceOffset + DestOffset; assert(M >= 0 && "This should never wrap below zero!"); } return; } // Ensure we have the input in a viable dword of its current half. This // is particularly tricky because the original position may be clobbered // by inputs being moved and *staying* in that half. if (IncomingInputs.size() == 1) { if (isWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) { int InputFixed = find(SourceHalfMask, -1) - std::begin(SourceHalfMask) + SourceOffset; SourceHalfMask[InputFixed - SourceOffset] = IncomingInputs[0] - SourceOffset; std::replace(HalfMask.begin(), HalfMask.end(), IncomingInputs[0], InputFixed); IncomingInputs[0] = InputFixed; } } else if (IncomingInputs.size() == 2) { if (IncomingInputs[0] / 2 != IncomingInputs[1] / 2 || isDWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) { // We have two non-adjacent or clobbered inputs we need to extract from // the source half. To do this, we need to map them into some adjacent // dword slot in the source mask. int InputsFixed[2] = {IncomingInputs[0] - SourceOffset, IncomingInputs[1] - SourceOffset}; // If there is a free slot in the source half mask adjacent to one of // the inputs, place the other input in it. We use (Index XOR 1) to // compute an adjacent index. if (!isWordClobbered(SourceHalfMask, InputsFixed[0]) && SourceHalfMask[InputsFixed[0] ^ 1] < 0) { SourceHalfMask[InputsFixed[0]] = InputsFixed[0]; SourceHalfMask[InputsFixed[0] ^ 1] = InputsFixed[1]; InputsFixed[1] = InputsFixed[0] ^ 1; } else if (!isWordClobbered(SourceHalfMask, InputsFixed[1]) && SourceHalfMask[InputsFixed[1] ^ 1] < 0) { SourceHalfMask[InputsFixed[1]] = InputsFixed[1]; SourceHalfMask[InputsFixed[1] ^ 1] = InputsFixed[0]; InputsFixed[0] = InputsFixed[1] ^ 1; } else if (SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1)] < 0 && SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1) + 1] < 0) { // The two inputs are in the same DWord but it is clobbered and the // adjacent DWord isn't used at all. Move both inputs to the free // slot. SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1)] = InputsFixed[0]; SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1) + 1] = InputsFixed[1]; InputsFixed[0] = 2 * ((InputsFixed[0] / 2) ^ 1); InputsFixed[1] = 2 * ((InputsFixed[0] / 2) ^ 1) + 1; } else { // The only way we hit this point is if there is no clobbering // (because there are no off-half inputs to this half) and there is no // free slot adjacent to one of the inputs. In this case, we have to // swap an input with a non-input. for (int i = 0; i < 4; ++i) assert((SourceHalfMask[i] < 0 || SourceHalfMask[i] == i) && "We can't handle any clobbers here!"); assert(InputsFixed[1] != (InputsFixed[0] ^ 1) && "Cannot have adjacent inputs here!"); SourceHalfMask[InputsFixed[0] ^ 1] = InputsFixed[1]; SourceHalfMask[InputsFixed[1]] = InputsFixed[0] ^ 1; // We also have to update the final source mask in this case because // it may need to undo the above swap. for (int &M : FinalSourceHalfMask) if (M == (InputsFixed[0] ^ 1) + SourceOffset) M = InputsFixed[1] + SourceOffset; else if (M == InputsFixed[1] + SourceOffset) M = (InputsFixed[0] ^ 1) + SourceOffset; InputsFixed[1] = InputsFixed[0] ^ 1; } // Point everything at the fixed inputs. for (int &M : HalfMask) if (M == IncomingInputs[0]) M = InputsFixed[0] + SourceOffset; else if (M == IncomingInputs[1]) M = InputsFixed[1] + SourceOffset; IncomingInputs[0] = InputsFixed[0] + SourceOffset; IncomingInputs[1] = InputsFixed[1] + SourceOffset; } } else { llvm_unreachable("Unhandled input size!"); } // Now hoist the DWord down to the right half. int FreeDWord = (PSHUFDMask[DestOffset / 2] < 0 ? 0 : 1) + DestOffset / 2; assert(PSHUFDMask[FreeDWord] < 0 && "DWord not free"); PSHUFDMask[FreeDWord] = IncomingInputs[0] / 2; for (int &M : HalfMask) for (int Input : IncomingInputs) if (M == Input) M = FreeDWord * 2 + Input % 2; }; moveInputsToRightHalf(HToLInputs, LToLInputs, PSHUFHMask, LoMask, HiMask, /*SourceOffset*/ 4, /*DestOffset*/ 0); moveInputsToRightHalf(LToHInputs, HToHInputs, PSHUFLMask, HiMask, LoMask, /*SourceOffset*/ 0, /*DestOffset*/ 4); // Now enact all the shuffles we've computed to move the inputs into their // target half. if (!isNoopShuffleMask(PSHUFLMask)) V = DAG.getNode(X86ISD::PSHUFLW, DL, VT, V, getV4X86ShuffleImm8ForMask(PSHUFLMask, DL, DAG)); if (!isNoopShuffleMask(PSHUFHMask)) V = DAG.getNode(X86ISD::PSHUFHW, DL, VT, V, getV4X86ShuffleImm8ForMask(PSHUFHMask, DL, DAG)); if (!isNoopShuffleMask(PSHUFDMask)) V = DAG.getBitcast( VT, DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, DAG.getBitcast(PSHUFDVT, V), getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); // At this point, each half should contain all its inputs, and we can then // just shuffle them into their final position. assert(count_if(LoMask, [](int M) { return M >= 4; }) == 0 && "Failed to lift all the high half inputs to the low mask!"); assert(count_if(HiMask, [](int M) { return M >= 0 && M < 4; }) == 0 && "Failed to lift all the low half inputs to the high mask!"); // Do a half shuffle for the low mask. if (!isNoopShuffleMask(LoMask)) V = DAG.getNode(X86ISD::PSHUFLW, DL, VT, V, getV4X86ShuffleImm8ForMask(LoMask, DL, DAG)); // Do a half shuffle with the high mask after shifting its values down. for (int &M : HiMask) if (M >= 0) M -= 4; if (!isNoopShuffleMask(HiMask)) V = DAG.getNode(X86ISD::PSHUFHW, DL, VT, V, getV4X86ShuffleImm8ForMask(HiMask, DL, DAG)); return V; } /// Helper to form a PSHUFB-based shuffle+blend, opportunistically avoiding the /// blend if only one input is used. static SDValue lowerVectorShuffleAsBlendOfPSHUFBs( const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, const APInt &Zeroable, SelectionDAG &DAG, bool &V1InUse, bool &V2InUse) { assert(!is128BitLaneCrossingShuffleMask(VT, Mask) && "Lane crossing shuffle masks not supported"); int NumBytes = VT.getSizeInBits() / 8; int Size = Mask.size(); int Scale = NumBytes / Size; SmallVector V1Mask(NumBytes, DAG.getUNDEF(MVT::i8)); SmallVector V2Mask(NumBytes, DAG.getUNDEF(MVT::i8)); V1InUse = false; V2InUse = false; for (int i = 0; i < NumBytes; ++i) { int M = Mask[i / Scale]; if (M < 0) continue; const int ZeroMask = 0x80; int V1Idx = M < Size ? M * Scale + i % Scale : ZeroMask; int V2Idx = M < Size ? ZeroMask : (M - Size) * Scale + i % Scale; if (Zeroable[i / Scale]) V1Idx = V2Idx = ZeroMask; V1Mask[i] = DAG.getConstant(V1Idx, DL, MVT::i8); V2Mask[i] = DAG.getConstant(V2Idx, DL, MVT::i8); V1InUse |= (ZeroMask != V1Idx); V2InUse |= (ZeroMask != V2Idx); } MVT ShufVT = MVT::getVectorVT(MVT::i8, NumBytes); if (V1InUse) V1 = DAG.getNode(X86ISD::PSHUFB, DL, ShufVT, DAG.getBitcast(ShufVT, V1), DAG.getBuildVector(ShufVT, DL, V1Mask)); if (V2InUse) V2 = DAG.getNode(X86ISD::PSHUFB, DL, ShufVT, DAG.getBitcast(ShufVT, V2), DAG.getBuildVector(ShufVT, DL, V2Mask)); // If we need shuffled inputs from both, blend the two. SDValue V; if (V1InUse && V2InUse) V = DAG.getNode(ISD::OR, DL, ShufVT, V1, V2); else V = V1InUse ? V1 : V2; // Cast the result back to the correct type. return DAG.getBitcast(VT, V); } /// Generic lowering of 8-lane i16 shuffles. /// /// This handles both single-input shuffles and combined shuffle/blends with /// two inputs. The single input shuffles are immediately delegated to /// a dedicated lowering routine. /// /// The blends are lowered in one of three fundamental ways. If there are few /// enough inputs, it delegates to a basic UNPCK-based strategy. If the shuffle /// of the input is significantly cheaper when lowered as an interleaving of /// the two inputs, try to interleave them. Otherwise, blend the low and high /// halves of the inputs separately (making them have relatively few inputs) /// and then concatenate them. static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v8i16 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v8i16 && "Bad operand type!"); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); // Whenever we can lower this as a zext, that instruction is strictly faster // than any alternative. if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend( DL, MVT::v8i16, V1, V2, Mask, Zeroable, Subtarget, DAG)) return ZExt; int NumV2Inputs = count_if(Mask, [](int M) { return M >= 8; }); if (NumV2Inputs == 0) { // Check for being able to broadcast a single element. if (SDValue Broadcast = lowerVectorShuffleAsBroadcast( DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG)) return Broadcast; // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i16, V1, V1, Mask, Zeroable, Subtarget, DAG)) return Shift; // Use dedicated unpack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG)) return V; // Use dedicated pack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithPACK(DL, MVT::v8i16, Mask, V1, V2, DAG, Subtarget)) return V; // Try to use byte rotation instructions. if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v8i16, V1, V1, Mask, Subtarget, DAG)) return Rotate; // Make a copy of the mask so it can be modified. SmallVector MutableMask(Mask.begin(), Mask.end()); return lowerV8I16GeneralSingleInputVectorShuffle(DL, MVT::v8i16, V1, MutableMask, Subtarget, DAG); } assert(llvm::any_of(Mask, [](int M) { return M >= 0 && M < 8; }) && "All single-input shuffles should be canonicalized to be V1-input " "shuffles."); // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i16, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Shift; // See if we can use SSE4A Extraction / Insertion. if (Subtarget.hasSSE4A()) if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask, Zeroable, DAG)) return V; // There are special ways we can lower some single-element blends. if (NumV2Inputs == 1) if (SDValue V = lowerVectorShuffleAsElementInsertion( DL, MVT::v8i16, V1, V2, Mask, Zeroable, Subtarget, DAG)) return V; // We have different paths for blend lowering, but they all must use the // *exact* same predicate. bool IsBlendSupported = Subtarget.hasSSE41(); if (IsBlendSupported) if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i16, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Blend; if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, MVT::v8i16, V1, V2, Mask, Zeroable, DAG)) return Masked; // Use dedicated unpack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG)) return V; // Use dedicated pack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithPACK(DL, MVT::v8i16, Mask, V1, V2, DAG, Subtarget)) return V; // Try to use byte rotation instructions. if (SDValue Rotate = lowerVectorShuffleAsByteRotate( DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG)) return Rotate; if (SDValue BitBlend = lowerVectorShuffleAsBitBlend(DL, MVT::v8i16, V1, V2, Mask, DAG)) return BitBlend; // Try to lower by permuting the inputs into an unpack instruction. if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack( DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG)) return Unpack; // If we can't directly blend but can use PSHUFB, that will be better as it // can both shuffle and set up the inefficient blend. if (!IsBlendSupported && Subtarget.hasSSSE3()) { bool V1InUse, V2InUse; return lowerVectorShuffleAsBlendOfPSHUFBs(DL, MVT::v8i16, V1, V2, Mask, Zeroable, DAG, V1InUse, V2InUse); } // We can always bit-blend if we have to so the fallback strategy is to // decompose into single-input permutes and blends. return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG); } /// Check whether a compaction lowering can be done by dropping even /// elements and compute how many times even elements must be dropped. /// /// This handles shuffles which take every Nth element where N is a power of /// two. Example shuffle masks: /// /// N = 1: 0, 2, 4, 6, 8, 10, 12, 14, 0, 2, 4, 6, 8, 10, 12, 14 /// N = 1: 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 /// N = 2: 0, 4, 8, 12, 0, 4, 8, 12, 0, 4, 8, 12, 0, 4, 8, 12 /// N = 2: 0, 4, 8, 12, 16, 20, 24, 28, 0, 4, 8, 12, 16, 20, 24, 28 /// N = 3: 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8 /// N = 3: 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24 /// /// Any of these lanes can of course be undef. /// /// This routine only supports N <= 3. /// FIXME: Evaluate whether either AVX or AVX-512 have any opportunities here /// for larger N. /// /// \returns N above, or the number of times even elements must be dropped if /// there is such a number. Otherwise returns zero. static int canLowerByDroppingEvenElements(ArrayRef Mask, bool IsSingleInput) { // The modulus for the shuffle vector entries is based on whether this is // a single input or not. int ShuffleModulus = Mask.size() * (IsSingleInput ? 1 : 2); assert(isPowerOf2_32((uint32_t)ShuffleModulus) && "We should only be called with masks with a power-of-2 size!"); uint64_t ModMask = (uint64_t)ShuffleModulus - 1; // We track whether the input is viable for all power-of-2 strides 2^1, 2^2, // and 2^3 simultaneously. This is because we may have ambiguity with // partially undef inputs. bool ViableForN[3] = {true, true, true}; for (int i = 0, e = Mask.size(); i < e; ++i) { // Ignore undef lanes, we'll optimistically collapse them to the pattern we // want. if (Mask[i] < 0) continue; bool IsAnyViable = false; for (unsigned j = 0; j != array_lengthof(ViableForN); ++j) if (ViableForN[j]) { uint64_t N = j + 1; // The shuffle mask must be equal to (i * 2^N) % M. if ((uint64_t)Mask[i] == (((uint64_t)i << N) & ModMask)) IsAnyViable = true; else ViableForN[j] = false; } // Early exit if we exhaust the possible powers of two. if (!IsAnyViable) break; } for (unsigned j = 0; j != array_lengthof(ViableForN); ++j) if (ViableForN[j]) return j + 1; // Return 0 as there is no viable power of two. return 0; } static SDValue lowerVectorShuffleWithPERMV(const SDLoc &DL, MVT VT, ArrayRef Mask, SDValue V1, SDValue V2, SelectionDAG &DAG) { MVT MaskEltVT = MVT::getIntegerVT(VT.getScalarSizeInBits()); MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements()); SDValue MaskNode = getConstVector(Mask, MaskVecVT, DAG, DL, true); if (V2.isUndef()) return DAG.getNode(X86ISD::VPERMV, DL, VT, MaskNode, V1); return DAG.getNode(X86ISD::VPERMV3, DL, VT, V1, MaskNode, V2); } /// Generic lowering of v16i8 shuffles. /// /// This is a hybrid strategy to lower v16i8 vectors. It first attempts to /// detect any complexity reducing interleaving. If that doesn't help, it uses /// UNPCK to spread the i8 elements across two i16-element vectors, and uses /// the existing lowering for v8i16 blends on each half, finally PACK-ing them /// back together. static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v16i8 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v16i8 && "Bad operand type!"); assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Shift; // Try to use byte rotation instructions. if (SDValue Rotate = lowerVectorShuffleAsByteRotate( DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG)) return Rotate; // Use dedicated pack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithPACK(DL, MVT::v16i8, Mask, V1, V2, DAG, Subtarget)) return V; // Try to use a zext lowering. if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend( DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG)) return ZExt; // See if we can use SSE4A Extraction / Insertion. if (Subtarget.hasSSE4A()) if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask, Zeroable, DAG)) return V; int NumV2Elements = count_if(Mask, [](int M) { return M >= 16; }); // For single-input shuffles, there are some nicer lowering tricks we can use. if (NumV2Elements == 0) { // Check for being able to broadcast a single element. if (SDValue Broadcast = lowerVectorShuffleAsBroadcast( DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG)) return Broadcast; if (SDValue V = lowerVectorShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG)) return V; // Check whether we can widen this to an i16 shuffle by duplicating bytes. // Notably, this handles splat and partial-splat shuffles more efficiently. // However, it only makes sense if the pre-duplication shuffle simplifies // things significantly. Currently, this means we need to be able to // express the pre-duplication shuffle as an i16 shuffle. // // FIXME: We should check for other patterns which can be widened into an // i16 shuffle as well. auto canWidenViaDuplication = [](ArrayRef Mask) { for (int i = 0; i < 16; i += 2) if (Mask[i] >= 0 && Mask[i + 1] >= 0 && Mask[i] != Mask[i + 1]) return false; return true; }; auto tryToWidenViaDuplication = [&]() -> SDValue { if (!canWidenViaDuplication(Mask)) return SDValue(); SmallVector LoInputs; copy_if(Mask, std::back_inserter(LoInputs), [](int M) { return M >= 0 && M < 8; }); array_pod_sort(LoInputs.begin(), LoInputs.end()); LoInputs.erase(std::unique(LoInputs.begin(), LoInputs.end()), LoInputs.end()); SmallVector HiInputs; copy_if(Mask, std::back_inserter(HiInputs), [](int M) { return M >= 8; }); array_pod_sort(HiInputs.begin(), HiInputs.end()); HiInputs.erase(std::unique(HiInputs.begin(), HiInputs.end()), HiInputs.end()); bool TargetLo = LoInputs.size() >= HiInputs.size(); ArrayRef InPlaceInputs = TargetLo ? LoInputs : HiInputs; ArrayRef MovingInputs = TargetLo ? HiInputs : LoInputs; int PreDupI16Shuffle[] = {-1, -1, -1, -1, -1, -1, -1, -1}; SmallDenseMap LaneMap; for (int I : InPlaceInputs) { PreDupI16Shuffle[I/2] = I/2; LaneMap[I] = I; } int j = TargetLo ? 0 : 4, je = j + 4; for (int i = 0, ie = MovingInputs.size(); i < ie; ++i) { // Check if j is already a shuffle of this input. This happens when // there are two adjacent bytes after we move the low one. if (PreDupI16Shuffle[j] != MovingInputs[i] / 2) { // If we haven't yet mapped the input, search for a slot into which // we can map it. while (j < je && PreDupI16Shuffle[j] >= 0) ++j; if (j == je) // We can't place the inputs into a single half with a simple i16 shuffle, so bail. return SDValue(); // Map this input with the i16 shuffle. PreDupI16Shuffle[j] = MovingInputs[i] / 2; } // Update the lane map based on the mapping we ended up with. LaneMap[MovingInputs[i]] = 2 * j + MovingInputs[i] % 2; } V1 = DAG.getBitcast( MVT::v16i8, DAG.getVectorShuffle(MVT::v8i16, DL, DAG.getBitcast(MVT::v8i16, V1), DAG.getUNDEF(MVT::v8i16), PreDupI16Shuffle)); // Unpack the bytes to form the i16s that will be shuffled into place. V1 = DAG.getNode(TargetLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL, MVT::v16i8, V1, V1); int PostDupI16Shuffle[8] = {-1, -1, -1, -1, -1, -1, -1, -1}; for (int i = 0; i < 16; ++i) if (Mask[i] >= 0) { int MappedMask = LaneMap[Mask[i]] - (TargetLo ? 0 : 8); assert(MappedMask < 8 && "Invalid v8 shuffle mask!"); if (PostDupI16Shuffle[i / 2] < 0) PostDupI16Shuffle[i / 2] = MappedMask; else assert(PostDupI16Shuffle[i / 2] == MappedMask && "Conflicting entries in the original shuffle!"); } return DAG.getBitcast( MVT::v16i8, DAG.getVectorShuffle(MVT::v8i16, DL, DAG.getBitcast(MVT::v8i16, V1), DAG.getUNDEF(MVT::v8i16), PostDupI16Shuffle)); }; if (SDValue V = tryToWidenViaDuplication()) return V; } if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, MVT::v16i8, V1, V2, Mask, Zeroable, DAG)) return Masked; // Use dedicated unpack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG)) return V; // Check for SSSE3 which lets us lower all v16i8 shuffles much more directly // with PSHUFB. It is important to do this before we attempt to generate any // blends but after all of the single-input lowerings. If the single input // lowerings can find an instruction sequence that is faster than a PSHUFB, we // want to preserve that and we can DAG combine any longer sequences into // a PSHUFB in the end. But once we start blending from multiple inputs, // the complexity of DAG combining bad patterns back into PSHUFB is too high, // and there are *very* few patterns that would actually be faster than the // PSHUFB approach because of its ability to zero lanes. // // FIXME: The only exceptions to the above are blends which are exact // interleavings with direct instructions supporting them. We currently don't // handle those well here. if (Subtarget.hasSSSE3()) { bool V1InUse = false; bool V2InUse = false; SDValue PSHUFB = lowerVectorShuffleAsBlendOfPSHUFBs( DL, MVT::v16i8, V1, V2, Mask, Zeroable, DAG, V1InUse, V2InUse); // If both V1 and V2 are in use and we can use a direct blend or an unpack, // do so. This avoids using them to handle blends-with-zero which is // important as a single pshufb is significantly faster for that. if (V1InUse && V2InUse) { if (Subtarget.hasSSE41()) if (SDValue Blend = lowerVectorShuffleAsBlend( DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Blend; // We can use an unpack to do the blending rather than an or in some // cases. Even though the or may be (very minorly) more efficient, we // preference this lowering because there are common cases where part of // the complexity of the shuffles goes away when we do the final blend as // an unpack. // FIXME: It might be worth trying to detect if the unpack-feeding // shuffles will both be pshufb, in which case we shouldn't bother with // this. if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack( DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG)) return Unpack; // If we have VBMI we can use one VPERM instead of multiple PSHUFBs. if (Subtarget.hasVBMI() && Subtarget.hasVLX()) return lowerVectorShuffleWithPERMV(DL, MVT::v16i8, Mask, V1, V2, DAG); // Use PALIGNR+Permute if possible - permute might become PSHUFB but the // PALIGNR will be cheaper than the second PSHUFB+OR. if (SDValue V = lowerVectorShuffleAsByteRotateAndPermute( DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG)) return V; } return PSHUFB; } // There are special ways we can lower some single-element blends. if (NumV2Elements == 1) if (SDValue V = lowerVectorShuffleAsElementInsertion( DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG)) return V; if (SDValue BitBlend = lowerVectorShuffleAsBitBlend(DL, MVT::v16i8, V1, V2, Mask, DAG)) return BitBlend; // Check whether a compaction lowering can be done. This handles shuffles // which take every Nth element for some even N. See the helper function for // details. // // We special case these as they can be particularly efficiently handled with // the PACKUSB instruction on x86 and they show up in common patterns of // rearranging bytes to truncate wide elements. bool IsSingleInput = V2.isUndef(); if (int NumEvenDrops = canLowerByDroppingEvenElements(Mask, IsSingleInput)) { // NumEvenDrops is the power of two stride of the elements. Another way of // thinking about it is that we need to drop the even elements this many // times to get the original input. // First we need to zero all the dropped bytes. assert(NumEvenDrops <= 3 && "No support for dropping even elements more than 3 times."); // We use the mask type to pick which bytes are preserved based on how many // elements are dropped. MVT MaskVTs[] = { MVT::v8i16, MVT::v4i32, MVT::v2i64 }; SDValue ByteClearMask = DAG.getBitcast( MVT::v16i8, DAG.getConstant(0xFF, DL, MaskVTs[NumEvenDrops - 1])); V1 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V1, ByteClearMask); if (!IsSingleInput) V2 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V2, ByteClearMask); // Now pack things back together. V1 = DAG.getBitcast(MVT::v8i16, V1); V2 = IsSingleInput ? V1 : DAG.getBitcast(MVT::v8i16, V2); SDValue Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, V1, V2); for (int i = 1; i < NumEvenDrops; ++i) { Result = DAG.getBitcast(MVT::v8i16, Result); Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, Result, Result); } return Result; } // Handle multi-input cases by blending single-input shuffles. if (NumV2Elements > 0) return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG); // The fallback path for single-input shuffles widens this into two v8i16 // vectors with unpacks, shuffles those, and then pulls them back together // with a pack. SDValue V = V1; std::array LoBlendMask = {{-1, -1, -1, -1, -1, -1, -1, -1}}; std::array HiBlendMask = {{-1, -1, -1, -1, -1, -1, -1, -1}}; for (int i = 0; i < 16; ++i) if (Mask[i] >= 0) (i < 8 ? LoBlendMask[i] : HiBlendMask[i % 8]) = Mask[i]; SDValue VLoHalf, VHiHalf; // Check if any of the odd lanes in the v16i8 are used. If not, we can mask // them out and avoid using UNPCK{L,H} to extract the elements of V as // i16s. if (none_of(LoBlendMask, [](int M) { return M >= 0 && M % 2 == 1; }) && none_of(HiBlendMask, [](int M) { return M >= 0 && M % 2 == 1; })) { // Use a mask to drop the high bytes. VLoHalf = DAG.getBitcast(MVT::v8i16, V); VLoHalf = DAG.getNode(ISD::AND, DL, MVT::v8i16, VLoHalf, DAG.getConstant(0x00FF, DL, MVT::v8i16)); // This will be a single vector shuffle instead of a blend so nuke VHiHalf. VHiHalf = DAG.getUNDEF(MVT::v8i16); // Squash the masks to point directly into VLoHalf. for (int &M : LoBlendMask) if (M >= 0) M /= 2; for (int &M : HiBlendMask) if (M >= 0) M /= 2; } else { // Otherwise just unpack the low half of V into VLoHalf and the high half into // VHiHalf so that we can blend them as i16s. SDValue Zero = getZeroVector(MVT::v16i8, Subtarget, DAG, DL); VLoHalf = DAG.getBitcast( MVT::v8i16, DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V, Zero)); VHiHalf = DAG.getBitcast( MVT::v8i16, DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V, Zero)); } SDValue LoV = DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, LoBlendMask); SDValue HiV = DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, HiBlendMask); return DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, LoV, HiV); } /// Dispatching routine to lower various 128-bit x86 vector shuffles. /// /// This routine breaks down the specific type of 128-bit shuffle and /// dispatches to the lowering routines accordingly. static SDValue lower128BitVectorShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, SDValue V1, SDValue V2, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { switch (VT.SimpleTy) { case MVT::v2i64: return lowerV2I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v2f64: return lowerV2F64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v4i32: return lowerV4I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v4f32: return lowerV4F32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v8i16: return lowerV8I16VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v16i8: return lowerV16I8VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); default: llvm_unreachable("Unimplemented!"); } } /// Generic routine to split vector shuffle into half-sized shuffles. /// /// This routine just extracts two subvectors, shuffles them independently, and /// then concatenates them back together. This should work effectively with all /// AVX vector shuffle types. static SDValue splitAndLowerVectorShuffle(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, SelectionDAG &DAG) { assert(VT.getSizeInBits() >= 256 && "Only for 256-bit or wider vector shuffles!"); assert(V1.getSimpleValueType() == VT && "Bad operand type!"); assert(V2.getSimpleValueType() == VT && "Bad operand type!"); ArrayRef LoMask = Mask.slice(0, Mask.size() / 2); ArrayRef HiMask = Mask.slice(Mask.size() / 2); int NumElements = VT.getVectorNumElements(); int SplitNumElements = NumElements / 2; MVT ScalarVT = VT.getVectorElementType(); MVT SplitVT = MVT::getVectorVT(ScalarVT, NumElements / 2); // Rather than splitting build-vectors, just build two narrower build // vectors. This helps shuffling with splats and zeros. auto SplitVector = [&](SDValue V) { V = peekThroughBitcasts(V); MVT OrigVT = V.getSimpleValueType(); int OrigNumElements = OrigVT.getVectorNumElements(); int OrigSplitNumElements = OrigNumElements / 2; MVT OrigScalarVT = OrigVT.getVectorElementType(); MVT OrigSplitVT = MVT::getVectorVT(OrigScalarVT, OrigNumElements / 2); SDValue LoV, HiV; auto *BV = dyn_cast(V); if (!BV) { LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OrigSplitVT, V, DAG.getIntPtrConstant(0, DL)); HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OrigSplitVT, V, DAG.getIntPtrConstant(OrigSplitNumElements, DL)); } else { SmallVector LoOps, HiOps; for (int i = 0; i < OrigSplitNumElements; ++i) { LoOps.push_back(BV->getOperand(i)); HiOps.push_back(BV->getOperand(i + OrigSplitNumElements)); } LoV = DAG.getBuildVector(OrigSplitVT, DL, LoOps); HiV = DAG.getBuildVector(OrigSplitVT, DL, HiOps); } return std::make_pair(DAG.getBitcast(SplitVT, LoV), DAG.getBitcast(SplitVT, HiV)); }; SDValue LoV1, HiV1, LoV2, HiV2; std::tie(LoV1, HiV1) = SplitVector(V1); std::tie(LoV2, HiV2) = SplitVector(V2); // Now create two 4-way blends of these half-width vectors. auto HalfBlend = [&](ArrayRef HalfMask) { bool UseLoV1 = false, UseHiV1 = false, UseLoV2 = false, UseHiV2 = false; SmallVector V1BlendMask((unsigned)SplitNumElements, -1); SmallVector V2BlendMask((unsigned)SplitNumElements, -1); SmallVector BlendMask((unsigned)SplitNumElements, -1); for (int i = 0; i < SplitNumElements; ++i) { int M = HalfMask[i]; if (M >= NumElements) { if (M >= NumElements + SplitNumElements) UseHiV2 = true; else UseLoV2 = true; V2BlendMask[i] = M - NumElements; BlendMask[i] = SplitNumElements + i; } else if (M >= 0) { if (M >= SplitNumElements) UseHiV1 = true; else UseLoV1 = true; V1BlendMask[i] = M; BlendMask[i] = i; } } // Because the lowering happens after all combining takes place, we need to // manually combine these blend masks as much as possible so that we create // a minimal number of high-level vector shuffle nodes. // First try just blending the halves of V1 or V2. if (!UseLoV1 && !UseHiV1 && !UseLoV2 && !UseHiV2) return DAG.getUNDEF(SplitVT); if (!UseLoV2 && !UseHiV2) return DAG.getVectorShuffle(SplitVT, DL, LoV1, HiV1, V1BlendMask); if (!UseLoV1 && !UseHiV1) return DAG.getVectorShuffle(SplitVT, DL, LoV2, HiV2, V2BlendMask); SDValue V1Blend, V2Blend; if (UseLoV1 && UseHiV1) { V1Blend = DAG.getVectorShuffle(SplitVT, DL, LoV1, HiV1, V1BlendMask); } else { // We only use half of V1 so map the usage down into the final blend mask. V1Blend = UseLoV1 ? LoV1 : HiV1; for (int i = 0; i < SplitNumElements; ++i) if (BlendMask[i] >= 0 && BlendMask[i] < SplitNumElements) BlendMask[i] = V1BlendMask[i] - (UseLoV1 ? 0 : SplitNumElements); } if (UseLoV2 && UseHiV2) { V2Blend = DAG.getVectorShuffle(SplitVT, DL, LoV2, HiV2, V2BlendMask); } else { // We only use half of V2 so map the usage down into the final blend mask. V2Blend = UseLoV2 ? LoV2 : HiV2; for (int i = 0; i < SplitNumElements; ++i) if (BlendMask[i] >= SplitNumElements) BlendMask[i] = V2BlendMask[i] + (UseLoV2 ? SplitNumElements : 0); } return DAG.getVectorShuffle(SplitVT, DL, V1Blend, V2Blend, BlendMask); }; SDValue Lo = HalfBlend(LoMask); SDValue Hi = HalfBlend(HiMask); return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); } /// Either split a vector in halves or decompose the shuffles and the /// blend. /// /// This is provided as a good fallback for many lowerings of non-single-input /// shuffles with more than one 128-bit lane. In those cases, we want to select /// between splitting the shuffle into 128-bit components and stitching those /// back together vs. extracting the single-input shuffles and blending those /// results. static SDValue lowerVectorShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(!V2.isUndef() && "This routine must not be used to lower single-input " "shuffles as it could then recurse on itself."); int Size = Mask.size(); // If this can be modeled as a broadcast of two elements followed by a blend, // prefer that lowering. This is especially important because broadcasts can // often fold with memory operands. auto DoBothBroadcast = [&] { int V1BroadcastIdx = -1, V2BroadcastIdx = -1; for (int M : Mask) if (M >= Size) { if (V2BroadcastIdx < 0) V2BroadcastIdx = M - Size; else if (M - Size != V2BroadcastIdx) return false; } else if (M >= 0) { if (V1BroadcastIdx < 0) V1BroadcastIdx = M; else if (M != V1BroadcastIdx) return false; } return true; }; if (DoBothBroadcast()) return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask, Subtarget, DAG); // If the inputs all stem from a single 128-bit lane of each input, then we // split them rather than blending because the split will decompose to // unusually few instructions. int LaneCount = VT.getSizeInBits() / 128; int LaneSize = Size / LaneCount; SmallBitVector LaneInputs[2]; LaneInputs[0].resize(LaneCount, false); LaneInputs[1].resize(LaneCount, false); for (int i = 0; i < Size; ++i) if (Mask[i] >= 0) LaneInputs[Mask[i] / Size][(Mask[i] % Size) / LaneSize] = true; if (LaneInputs[0].count() <= 1 && LaneInputs[1].count() <= 1) return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG); // Otherwise, just fall back to decomposed shuffles and a blend. This requires // that the decomposed single-input shuffles don't end up here. return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask, Subtarget, DAG); } /// Lower a vector shuffle crossing multiple 128-bit lanes as /// a lane permutation followed by a per-lane permutation. /// /// This is mainly for cases where we can have non-repeating permutes /// in each lane. /// /// TODO: This is very similar to lowerVectorShuffleByMerging128BitLanes, /// we should investigate merging them. static SDValue lowerVectorShuffleAsLanePermuteAndPermute( const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, SelectionDAG &DAG, const X86Subtarget &Subtarget) { int NumElts = VT.getVectorNumElements(); int NumLanes = VT.getSizeInBits() / 128; int NumEltsPerLane = NumElts / NumLanes; SmallVector SrcLaneMask(NumLanes, SM_SentinelUndef); - SmallVector LaneMask(NumElts, SM_SentinelUndef); SmallVector PermMask(NumElts, SM_SentinelUndef); for (int i = 0; i != NumElts; ++i) { int M = Mask[i]; if (M < 0) continue; // Ensure that each lane comes from a single source lane. int SrcLane = M / NumEltsPerLane; int DstLane = i / NumEltsPerLane; if (!isUndefOrEqual(SrcLaneMask[DstLane], SrcLane)) return SDValue(); SrcLaneMask[DstLane] = SrcLane; - LaneMask[i] = (SrcLane * NumEltsPerLane) + (i % NumEltsPerLane); PermMask[i] = (DstLane * NumEltsPerLane) + (M % NumEltsPerLane); + } + + // Make sure we set all elements of the lane mask, to avoid undef propagation. + SmallVector LaneMask(NumElts, SM_SentinelUndef); + for (int DstLane = 0; DstLane != NumLanes; ++DstLane) { + int SrcLane = SrcLaneMask[DstLane]; + if (0 <= SrcLane) + for (int j = 0; j != NumEltsPerLane; ++j) { + LaneMask[(DstLane * NumEltsPerLane) + j] = + (SrcLane * NumEltsPerLane) + j; + } } // If we're only shuffling a single lowest lane and the rest are identity // then don't bother. // TODO - isShuffleMaskInputInPlace could be extended to something like this. int NumIdentityLanes = 0; bool OnlyShuffleLowestLane = true; for (int i = 0; i != NumLanes; ++i) { if (isSequentialOrUndefInRange(PermMask, i * NumEltsPerLane, NumEltsPerLane, i * NumEltsPerLane)) NumIdentityLanes++; else if (SrcLaneMask[i] != 0 && SrcLaneMask[i] != NumLanes) OnlyShuffleLowestLane = false; } if (OnlyShuffleLowestLane && NumIdentityLanes == (NumLanes - 1)) return SDValue(); SDValue LanePermute = DAG.getVectorShuffle(VT, DL, V1, V2, LaneMask); return DAG.getVectorShuffle(VT, DL, LanePermute, DAG.getUNDEF(VT), PermMask); } /// Lower a vector shuffle crossing multiple 128-bit lanes as /// a permutation and blend of those lanes. /// /// This essentially blends the out-of-lane inputs to each lane into the lane /// from a permuted copy of the vector. This lowering strategy results in four /// instructions in the worst case for a single-input cross lane shuffle which /// is lower than any other fully general cross-lane shuffle strategy I'm aware /// of. Special cases for each particular shuffle pattern should be handled /// prior to trying this lowering. static SDValue lowerVectorShuffleAsLanePermuteAndBlend(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, SelectionDAG &DAG, const X86Subtarget &Subtarget) { // FIXME: This should probably be generalized for 512-bit vectors as well. assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!"); int Size = Mask.size(); int LaneSize = Size / 2; // If there are only inputs from one 128-bit lane, splitting will in fact be // less expensive. The flags track whether the given lane contains an element // that crosses to another lane. if (!Subtarget.hasAVX2()) { bool LaneCrossing[2] = {false, false}; for (int i = 0; i < Size; ++i) if (Mask[i] >= 0 && (Mask[i] % Size) / LaneSize != i / LaneSize) LaneCrossing[(Mask[i] % Size) / LaneSize] = true; if (!LaneCrossing[0] || !LaneCrossing[1]) return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG); } else { bool LaneUsed[2] = {false, false}; for (int i = 0; i < Size; ++i) if (Mask[i] >= 0) LaneUsed[(Mask[i] / LaneSize)] = true; if (!LaneUsed[0] || !LaneUsed[1]) return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG); } assert(V2.isUndef() && "This last part of this routine only works on single input shuffles"); SmallVector FlippedBlendMask(Size); for (int i = 0; i < Size; ++i) FlippedBlendMask[i] = Mask[i] < 0 ? -1 : (((Mask[i] % Size) / LaneSize == i / LaneSize) ? Mask[i] : Mask[i] % LaneSize + (i / LaneSize) * LaneSize + Size); // Flip the vector, and blend the results which should now be in-lane. MVT PVT = VT.isFloatingPoint() ? MVT::v4f64 : MVT::v4i64; SDValue Flipped = DAG.getBitcast(PVT, V1); Flipped = DAG.getVectorShuffle(PVT, DL, Flipped, DAG.getUNDEF(PVT), { 2, 3, 0, 1 }); Flipped = DAG.getBitcast(VT, Flipped); return DAG.getVectorShuffle(VT, DL, V1, Flipped, FlippedBlendMask); } /// Handle lowering 2-lane 128-bit shuffles. static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { // With AVX2, use VPERMQ/VPERMPD for unary shuffles to allow memory folding. if (Subtarget.hasAVX2() && V2.isUndef()) return SDValue(); SmallVector WidenedMask; if (!canWidenShuffleElements(Mask, Zeroable, WidenedMask)) return SDValue(); bool IsLowZero = (Zeroable & 0x3) == 0x3; bool IsHighZero = (Zeroable & 0xc) == 0xc; // Try to use an insert into a zero vector. if (WidenedMask[0] == 0 && IsHighZero) { MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2); SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1, DAG.getIntPtrConstant(0, DL)); return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, getZeroVector(VT, Subtarget, DAG, DL), LoV, DAG.getIntPtrConstant(0, DL)); } // TODO: If minimizing size and one of the inputs is a zero vector and the // the zero vector has only one use, we could use a VPERM2X128 to save the // instruction bytes needed to explicitly generate the zero vector. // Blends are faster and handle all the non-lane-crossing cases. if (SDValue Blend = lowerVectorShuffleAsBlend(DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Blend; // If either input operand is a zero vector, use VPERM2X128 because its mask // allows us to replace the zero input with an implicit zero. if (!IsLowZero && !IsHighZero) { // Check for patterns which can be matched with a single insert of a 128-bit // subvector. bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask, {0, 1, 0, 1}); if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5})) { // With AVX1, use vperm2f128 (below) to allow load folding. Otherwise, // this will likely become vinsertf128 which can't fold a 256-bit memop. if (!isa(peekThroughBitcasts(V1))) { MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2); SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, OnlyUsesV1 ? V1 : V2, DAG.getIntPtrConstant(0, DL)); return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, V1, SubVec, DAG.getIntPtrConstant(2, DL)); } } // Try to use SHUF128 if possible. if (Subtarget.hasVLX()) { if (WidenedMask[0] < 2 && WidenedMask[1] >= 2) { unsigned PermMask = ((WidenedMask[0] % 2) << 0) | ((WidenedMask[1] % 2) << 1); return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2, DAG.getConstant(PermMask, DL, MVT::i8)); } } } // Otherwise form a 128-bit permutation. After accounting for undefs, // convert the 64-bit shuffle mask selection values into 128-bit // selection bits by dividing the indexes by 2 and shifting into positions // defined by a vperm2*128 instruction's immediate control byte. // The immediate permute control byte looks like this: // [1:0] - select 128 bits from sources for low half of destination // [2] - ignore // [3] - zero low half of destination // [5:4] - select 128 bits from sources for high half of destination // [6] - ignore // [7] - zero high half of destination assert((WidenedMask[0] >= 0 || IsLowZero) && (WidenedMask[1] >= 0 || IsHighZero) && "Undef half?"); unsigned PermMask = 0; PermMask |= IsLowZero ? 0x08 : (WidenedMask[0] << 0); PermMask |= IsHighZero ? 0x80 : (WidenedMask[1] << 4); // Check the immediate mask and replace unused sources with undef. if ((PermMask & 0x0a) != 0x00 && (PermMask & 0xa0) != 0x00) V1 = DAG.getUNDEF(VT); if ((PermMask & 0x0a) != 0x02 && (PermMask & 0xa0) != 0x20) V2 = DAG.getUNDEF(VT); return DAG.getNode(X86ISD::VPERM2X128, DL, VT, V1, V2, DAG.getConstant(PermMask, DL, MVT::i8)); } /// Lower a vector shuffle by first fixing the 128-bit lanes and then /// shuffling each lane. /// /// This attempts to create a repeated lane shuffle where each lane uses one /// or two of the lanes of the inputs. The lanes of the input vectors are /// shuffled in one or two independent shuffles to get the lanes into the /// position needed by the final shuffle. /// /// FIXME: This should be generalized to 512-bit shuffles. static SDValue lowerVectorShuffleByMerging128BitLanes( const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(!V2.isUndef() && "This is only useful with multiple inputs."); if (is128BitLaneRepeatedShuffleMask(VT, Mask)) return SDValue(); int Size = Mask.size(); int LaneSize = 128 / VT.getScalarSizeInBits(); int NumLanes = Size / LaneSize; assert(NumLanes == 2 && "Only handles 256-bit shuffles."); SmallVector RepeatMask(LaneSize, -1); int LaneSrcs[2][2] = { { -1, -1 }, { -1 , -1 } }; // First pass will try to fill in the RepeatMask from lanes that need two // sources. for (int Lane = 0; Lane != NumLanes; ++Lane) { int Srcs[2] = { -1, -1 }; SmallVector InLaneMask(LaneSize, -1); for (int i = 0; i != LaneSize; ++i) { int M = Mask[(Lane * LaneSize) + i]; if (M < 0) continue; // Determine which of the 4 possible input lanes (2 from each source) // this element comes from. Assign that as one of the sources for this // lane. We can assign up to 2 sources for this lane. If we run out // sources we can't do anything. int LaneSrc = M / LaneSize; int Src; if (Srcs[0] < 0 || Srcs[0] == LaneSrc) Src = 0; else if (Srcs[1] < 0 || Srcs[1] == LaneSrc) Src = 1; else return SDValue(); Srcs[Src] = LaneSrc; InLaneMask[i] = (M % LaneSize) + Src * Size; } // If this lane has two sources, see if it fits with the repeat mask so far. if (Srcs[1] < 0) continue; LaneSrcs[Lane][0] = Srcs[0]; LaneSrcs[Lane][1] = Srcs[1]; auto MatchMasks = [](ArrayRef M1, ArrayRef M2) { assert(M1.size() == M2.size() && "Unexpected mask size"); for (int i = 0, e = M1.size(); i != e; ++i) if (M1[i] >= 0 && M2[i] >= 0 && M1[i] != M2[i]) return false; return true; }; auto MergeMasks = [](ArrayRef Mask, MutableArrayRef MergedMask) { assert(Mask.size() == MergedMask.size() && "Unexpected mask size"); for (int i = 0, e = MergedMask.size(); i != e; ++i) { int M = Mask[i]; if (M < 0) continue; assert((MergedMask[i] < 0 || MergedMask[i] == M) && "Unexpected mask element"); MergedMask[i] = M; } }; if (MatchMasks(InLaneMask, RepeatMask)) { // Merge this lane mask into the final repeat mask. MergeMasks(InLaneMask, RepeatMask); continue; } // Didn't find a match. Swap the operands and try again. std::swap(LaneSrcs[Lane][0], LaneSrcs[Lane][1]); ShuffleVectorSDNode::commuteMask(InLaneMask); if (MatchMasks(InLaneMask, RepeatMask)) { // Merge this lane mask into the final repeat mask. MergeMasks(InLaneMask, RepeatMask); continue; } // Couldn't find a match with the operands in either order. return SDValue(); } // Now handle any lanes with only one source. for (int Lane = 0; Lane != NumLanes; ++Lane) { // If this lane has already been processed, skip it. if (LaneSrcs[Lane][0] >= 0) continue; for (int i = 0; i != LaneSize; ++i) { int M = Mask[(Lane * LaneSize) + i]; if (M < 0) continue; // If RepeatMask isn't defined yet we can define it ourself. if (RepeatMask[i] < 0) RepeatMask[i] = M % LaneSize; if (RepeatMask[i] < Size) { if (RepeatMask[i] != M % LaneSize) return SDValue(); LaneSrcs[Lane][0] = M / LaneSize; } else { if (RepeatMask[i] != ((M % LaneSize) + Size)) return SDValue(); LaneSrcs[Lane][1] = M / LaneSize; } } if (LaneSrcs[Lane][0] < 0 && LaneSrcs[Lane][1] < 0) return SDValue(); } SmallVector NewMask(Size, -1); for (int Lane = 0; Lane != NumLanes; ++Lane) { int Src = LaneSrcs[Lane][0]; for (int i = 0; i != LaneSize; ++i) { int M = -1; if (Src >= 0) M = Src * LaneSize + i; NewMask[Lane * LaneSize + i] = M; } } SDValue NewV1 = DAG.getVectorShuffle(VT, DL, V1, V2, NewMask); // Ensure we didn't get back the shuffle we started with. // FIXME: This is a hack to make up for some splat handling code in // getVectorShuffle. if (isa(NewV1) && cast(NewV1)->getMask() == Mask) return SDValue(); for (int Lane = 0; Lane != NumLanes; ++Lane) { int Src = LaneSrcs[Lane][1]; for (int i = 0; i != LaneSize; ++i) { int M = -1; if (Src >= 0) M = Src * LaneSize + i; NewMask[Lane * LaneSize + i] = M; } } SDValue NewV2 = DAG.getVectorShuffle(VT, DL, V1, V2, NewMask); // Ensure we didn't get back the shuffle we started with. // FIXME: This is a hack to make up for some splat handling code in // getVectorShuffle. if (isa(NewV2) && cast(NewV2)->getMask() == Mask) return SDValue(); for (int i = 0; i != Size; ++i) { NewMask[i] = RepeatMask[i % LaneSize]; if (NewMask[i] < 0) continue; NewMask[i] += (i / LaneSize) * LaneSize; } return DAG.getVectorShuffle(VT, DL, NewV1, NewV2, NewMask); } /// Lower shuffles where an entire half of a 256 or 512-bit vector is UNDEF. /// This allows for fast cases such as subvector extraction/insertion /// or shuffling smaller vector types which can lower more efficiently. static SDValue lowerVectorShuffleWithUndefHalf(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert((VT.is256BitVector() || VT.is512BitVector()) && "Expected 256-bit or 512-bit vector"); unsigned NumElts = VT.getVectorNumElements(); unsigned HalfNumElts = NumElts / 2; MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), HalfNumElts); bool UndefLower = isUndefInRange(Mask, 0, HalfNumElts); bool UndefUpper = isUndefInRange(Mask, HalfNumElts, HalfNumElts); if (!UndefLower && !UndefUpper) return SDValue(); // Upper half is undef and lower half is whole upper subvector. // e.g. vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u> if (UndefUpper && isSequentialOrUndefInRange(Mask, 0, HalfNumElts, HalfNumElts)) { SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1, DAG.getIntPtrConstant(HalfNumElts, DL)); return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi, DAG.getIntPtrConstant(0, DL)); } // Lower half is undef and upper half is whole lower subvector. // e.g. vector_shuffle or if (UndefLower && isSequentialOrUndefInRange(Mask, HalfNumElts, HalfNumElts, 0)) { SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1, DAG.getIntPtrConstant(0, DL)); return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi, DAG.getIntPtrConstant(HalfNumElts, DL)); } // If the shuffle only uses two of the four halves of the input operands, // then extract them and perform the 'half' shuffle at half width. // e.g. vector_shuffle or int HalfIdx1 = -1, HalfIdx2 = -1; SmallVector HalfMask(HalfNumElts); unsigned Offset = UndefLower ? HalfNumElts : 0; for (unsigned i = 0; i != HalfNumElts; ++i) { int M = Mask[i + Offset]; if (M < 0) { HalfMask[i] = M; continue; } // Determine which of the 4 half vectors this element is from. // i.e. 0 = Lower V1, 1 = Upper V1, 2 = Lower V2, 3 = Upper V2. int HalfIdx = M / HalfNumElts; // Determine the element index into its half vector source. int HalfElt = M % HalfNumElts; // We can shuffle with up to 2 half vectors, set the new 'half' // shuffle mask accordingly. if (HalfIdx1 < 0 || HalfIdx1 == HalfIdx) { HalfMask[i] = HalfElt; HalfIdx1 = HalfIdx; continue; } if (HalfIdx2 < 0 || HalfIdx2 == HalfIdx) { HalfMask[i] = HalfElt + HalfNumElts; HalfIdx2 = HalfIdx; continue; } // Too many half vectors referenced. return SDValue(); } assert(HalfMask.size() == HalfNumElts && "Unexpected shuffle mask length"); // Only shuffle the halves of the inputs when useful. int NumLowerHalves = (HalfIdx1 == 0 || HalfIdx1 == 2) + (HalfIdx2 == 0 || HalfIdx2 == 2); int NumUpperHalves = (HalfIdx1 == 1 || HalfIdx1 == 3) + (HalfIdx2 == 1 || HalfIdx2 == 3); // uuuuXXXX - don't extract uppers just to insert again. if (UndefLower && NumUpperHalves != 0) return SDValue(); // XXXXuuuu - don't extract both uppers, instead shuffle and then extract. if (UndefUpper && NumUpperHalves == 2) return SDValue(); // AVX2 - XXXXuuuu - always extract lowers. if (Subtarget.hasAVX2() && !(UndefUpper && NumUpperHalves == 0)) { // AVX2 supports efficient immediate 64-bit element cross-lane shuffles. if (VT == MVT::v4f64 || VT == MVT::v4i64) return SDValue(); // AVX2 supports variable 32-bit element cross-lane shuffles. if (VT == MVT::v8f32 || VT == MVT::v8i32) { // XXXXuuuu - don't extract lowers and uppers. if (UndefUpper && NumLowerHalves != 0 && NumUpperHalves != 0) return SDValue(); } } // AVX512 - XXXXuuuu - always extract lowers. if (VT.is512BitVector() && !(UndefUpper && NumUpperHalves == 0)) return SDValue(); auto GetHalfVector = [&](int HalfIdx) { if (HalfIdx < 0) return DAG.getUNDEF(HalfVT); SDValue V = (HalfIdx < 2 ? V1 : V2); HalfIdx = (HalfIdx % 2) * HalfNumElts; return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V, DAG.getIntPtrConstant(HalfIdx, DL)); }; SDValue Half1 = GetHalfVector(HalfIdx1); SDValue Half2 = GetHalfVector(HalfIdx2); SDValue V = DAG.getVectorShuffle(HalfVT, DL, Half1, Half2, HalfMask); return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, DAG.getIntPtrConstant(Offset, DL)); } /// Test whether the specified input (0 or 1) is in-place blended by the /// given mask. /// /// This returns true if the elements from a particular input are already in the /// slot required by the given mask and require no permutation. static bool isShuffleMaskInputInPlace(int Input, ArrayRef Mask) { assert((Input == 0 || Input == 1) && "Only two inputs to shuffles."); int Size = Mask.size(); for (int i = 0; i < Size; ++i) if (Mask[i] >= 0 && Mask[i] / Size == Input && Mask[i] % Size != i) return false; return true; } /// Handle case where shuffle sources are coming from the same 128-bit lane and /// every lane can be represented as the same repeating mask - allowing us to /// shuffle the sources with the repeating shuffle and then permute the result /// to the destination lanes. static SDValue lowerShuffleAsRepeatedMaskAndLanePermute( const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG) { int NumElts = VT.getVectorNumElements(); int NumLanes = VT.getSizeInBits() / 128; int NumLaneElts = NumElts / NumLanes; // On AVX2 we may be able to just shuffle the lowest elements and then // broadcast the result. if (Subtarget.hasAVX2()) { for (unsigned BroadcastSize : {16, 32, 64}) { if (BroadcastSize <= VT.getScalarSizeInBits()) continue; int NumBroadcastElts = BroadcastSize / VT.getScalarSizeInBits(); // Attempt to match a repeating pattern every NumBroadcastElts, // accounting for UNDEFs but only references the lowest 128-bit // lane of the inputs. auto FindRepeatingBroadcastMask = [&](SmallVectorImpl &RepeatMask) { for (int i = 0; i != NumElts; i += NumBroadcastElts) for (int j = 0; j != NumBroadcastElts; ++j) { int M = Mask[i + j]; if (M < 0) continue; int &R = RepeatMask[j]; if (0 != ((M % NumElts) / NumLaneElts)) return false; if (0 <= R && R != M) return false; R = M; } return true; }; SmallVector RepeatMask((unsigned)NumElts, -1); if (!FindRepeatingBroadcastMask(RepeatMask)) continue; // Shuffle the (lowest) repeated elements in place for broadcast. SDValue RepeatShuf = DAG.getVectorShuffle(VT, DL, V1, V2, RepeatMask); // Shuffle the actual broadcast. SmallVector BroadcastMask((unsigned)NumElts, -1); for (int i = 0; i != NumElts; i += NumBroadcastElts) for (int j = 0; j != NumBroadcastElts; ++j) BroadcastMask[i + j] = j; return DAG.getVectorShuffle(VT, DL, RepeatShuf, DAG.getUNDEF(VT), BroadcastMask); } } // Bail if the shuffle mask doesn't cross 128-bit lanes. if (!is128BitLaneCrossingShuffleMask(VT, Mask)) return SDValue(); // Bail if we already have a repeated lane shuffle mask. SmallVector RepeatedShuffleMask; if (is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedShuffleMask)) return SDValue(); // On AVX2 targets we can permute 256-bit vectors as 64-bit sub-lanes // (with PERMQ/PERMPD), otherwise we can only permute whole 128-bit lanes. int SubLaneScale = Subtarget.hasAVX2() && VT.is256BitVector() ? 2 : 1; int NumSubLanes = NumLanes * SubLaneScale; int NumSubLaneElts = NumLaneElts / SubLaneScale; // Check that all the sources are coming from the same lane and see if we can // form a repeating shuffle mask (local to each sub-lane). At the same time, // determine the source sub-lane for each destination sub-lane. int TopSrcSubLane = -1; SmallVector Dst2SrcSubLanes((unsigned)NumSubLanes, -1); SmallVector RepeatedSubLaneMasks[2] = { SmallVector((unsigned)NumSubLaneElts, SM_SentinelUndef), SmallVector((unsigned)NumSubLaneElts, SM_SentinelUndef)}; for (int DstSubLane = 0; DstSubLane != NumSubLanes; ++DstSubLane) { // Extract the sub-lane mask, check that it all comes from the same lane // and normalize the mask entries to come from the first lane. int SrcLane = -1; SmallVector SubLaneMask((unsigned)NumSubLaneElts, -1); for (int Elt = 0; Elt != NumSubLaneElts; ++Elt) { int M = Mask[(DstSubLane * NumSubLaneElts) + Elt]; if (M < 0) continue; int Lane = (M % NumElts) / NumLaneElts; if ((0 <= SrcLane) && (SrcLane != Lane)) return SDValue(); SrcLane = Lane; int LocalM = (M % NumLaneElts) + (M < NumElts ? 0 : NumElts); SubLaneMask[Elt] = LocalM; } // Whole sub-lane is UNDEF. if (SrcLane < 0) continue; // Attempt to match against the candidate repeated sub-lane masks. for (int SubLane = 0; SubLane != SubLaneScale; ++SubLane) { auto MatchMasks = [NumSubLaneElts](ArrayRef M1, ArrayRef M2) { for (int i = 0; i != NumSubLaneElts; ++i) { if (M1[i] < 0 || M2[i] < 0) continue; if (M1[i] != M2[i]) return false; } return true; }; auto &RepeatedSubLaneMask = RepeatedSubLaneMasks[SubLane]; if (!MatchMasks(SubLaneMask, RepeatedSubLaneMask)) continue; // Merge the sub-lane mask into the matching repeated sub-lane mask. for (int i = 0; i != NumSubLaneElts; ++i) { int M = SubLaneMask[i]; if (M < 0) continue; assert((RepeatedSubLaneMask[i] < 0 || RepeatedSubLaneMask[i] == M) && "Unexpected mask element"); RepeatedSubLaneMask[i] = M; } // Track the top most source sub-lane - by setting the remaining to UNDEF // we can greatly simplify shuffle matching. int SrcSubLane = (SrcLane * SubLaneScale) + SubLane; TopSrcSubLane = std::max(TopSrcSubLane, SrcSubLane); Dst2SrcSubLanes[DstSubLane] = SrcSubLane; break; } // Bail if we failed to find a matching repeated sub-lane mask. if (Dst2SrcSubLanes[DstSubLane] < 0) return SDValue(); } assert(0 <= TopSrcSubLane && TopSrcSubLane < NumSubLanes && "Unexpected source lane"); // Create a repeating shuffle mask for the entire vector. SmallVector RepeatedMask((unsigned)NumElts, -1); for (int SubLane = 0; SubLane <= TopSrcSubLane; ++SubLane) { int Lane = SubLane / SubLaneScale; auto &RepeatedSubLaneMask = RepeatedSubLaneMasks[SubLane % SubLaneScale]; for (int Elt = 0; Elt != NumSubLaneElts; ++Elt) { int M = RepeatedSubLaneMask[Elt]; if (M < 0) continue; int Idx = (SubLane * NumSubLaneElts) + Elt; RepeatedMask[Idx] = M + (Lane * NumLaneElts); } } SDValue RepeatedShuffle = DAG.getVectorShuffle(VT, DL, V1, V2, RepeatedMask); // Shuffle each source sub-lane to its destination. SmallVector SubLaneMask((unsigned)NumElts, -1); for (int i = 0; i != NumElts; i += NumSubLaneElts) { int SrcSubLane = Dst2SrcSubLanes[i / NumSubLaneElts]; if (SrcSubLane < 0) continue; for (int j = 0; j != NumSubLaneElts; ++j) SubLaneMask[i + j] = j + (SrcSubLane * NumSubLaneElts); } return DAG.getVectorShuffle(VT, DL, RepeatedShuffle, DAG.getUNDEF(VT), SubLaneMask); } static bool matchVectorShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2, unsigned &ShuffleImm, ArrayRef Mask) { int NumElts = VT.getVectorNumElements(); assert(VT.getScalarSizeInBits() == 64 && (NumElts == 2 || NumElts == 4 || NumElts == 8) && "Unexpected data type for VSHUFPD"); // Mask for V8F64: 0/1, 8/9, 2/3, 10/11, 4/5, .. // Mask for V4F64; 0/1, 4/5, 2/3, 6/7.. ShuffleImm = 0; bool ShufpdMask = true; bool CommutableMask = true; for (int i = 0; i < NumElts; ++i) { if (Mask[i] == SM_SentinelUndef) continue; if (Mask[i] < 0) return false; int Val = (i & 6) + NumElts * (i & 1); int CommutVal = (i & 0xe) + NumElts * ((i & 1) ^ 1); if (Mask[i] < Val || Mask[i] > Val + 1) ShufpdMask = false; if (Mask[i] < CommutVal || Mask[i] > CommutVal + 1) CommutableMask = false; ShuffleImm |= (Mask[i] % 2) << i; } if (ShufpdMask) return true; if (CommutableMask) { std::swap(V1, V2); return true; } return false; } static SDValue lowerVectorShuffleWithSHUFPD(const SDLoc &DL, MVT VT, ArrayRef Mask, SDValue V1, SDValue V2, SelectionDAG &DAG) { assert((VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v8f64)&& "Unexpected data type for VSHUFPD"); unsigned Immediate = 0; if (!matchVectorShuffleWithSHUFPD(VT, V1, V2, Immediate, Mask)) return SDValue(); return DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2, DAG.getConstant(Immediate, DL, MVT::i8)); } /// Handle lowering of 4-lane 64-bit floating point shuffles. /// /// Also ends up handling lowering of 4-lane 64-bit integer shuffles when AVX2 /// isn't available. static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v4f64 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v4f64 && "Bad operand type!"); assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask, Zeroable, Subtarget, DAG)) return V; if (V2.isUndef()) { // Check for being able to broadcast a single element. if (SDValue Broadcast = lowerVectorShuffleAsBroadcast( DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG)) return Broadcast; // Use low duplicate instructions for masks that match their pattern. if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 2, 2})) return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v4f64, V1); if (!is128BitLaneCrossingShuffleMask(MVT::v4f64, Mask)) { // Non-half-crossing single input shuffles can be lowered with an // interleaved permutation. unsigned VPERMILPMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1) | ((Mask[2] == 3) << 2) | ((Mask[3] == 3) << 3); return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f64, V1, DAG.getConstant(VPERMILPMask, DL, MVT::i8)); } // With AVX2 we have direct support for this permutation. if (Subtarget.hasAVX2()) return DAG.getNode(X86ISD::VPERMI, DL, MVT::v4f64, V1, getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); // Try to create an in-lane repeating shuffle mask and then shuffle the // results into the target lanes. if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG)) return V; // Try to permute the lanes and then use a per-lane permute. if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute( DL, MVT::v4f64, V1, V2, Mask, DAG, Subtarget)) return V; // Otherwise, fall back. return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v4f64, V1, V2, Mask, DAG, Subtarget); } // Use dedicated unpack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithUNPCK(DL, MVT::v4f64, Mask, V1, V2, DAG)) return V; if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Blend; // Check if the blend happens to exactly fit that of SHUFPD. if (SDValue Op = lowerVectorShuffleWithSHUFPD(DL, MVT::v4f64, Mask, V1, V2, DAG)) return Op; // Try to create an in-lane repeating shuffle mask and then shuffle the // results into the target lanes. if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG)) return V; // Try to simplify this by merging 128-bit lanes to enable a lane-based // shuffle. However, if we have AVX2 and either inputs are already in place, // we will be able to shuffle even across lanes the other input in a single // instruction so skip this pattern. if (!(Subtarget.hasAVX2() && (isShuffleMaskInputInPlace(0, Mask) || isShuffleMaskInputInPlace(1, Mask)))) if (SDValue Result = lowerVectorShuffleByMerging128BitLanes( DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG)) return Result; // If we have VLX support, we can use VEXPAND. if (Subtarget.hasVLX()) if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v4f64, Zeroable, Mask, V1, V2, DAG, Subtarget)) return V; // If we have AVX2 then we always want to lower with a blend because an v4 we // can fully permute the elements. if (Subtarget.hasAVX2()) return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG); // Otherwise fall back on generic lowering. return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG); } /// Handle lowering of 4-lane 64-bit integer shuffles. /// /// This routine is only called when we have AVX2 and thus a reasonable /// instruction set for v4i64 shuffling.. static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v4i64 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v4i64 && "Bad operand type!"); assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); assert(Subtarget.hasAVX2() && "We can only lower v4i64 with AVX2!"); if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask, Zeroable, Subtarget, DAG)) return V; if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Blend; // Check for being able to broadcast a single element. if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG)) return Broadcast; if (V2.isUndef()) { // When the shuffle is mirrored between the 128-bit lanes of the unit, we // can use lower latency instructions that will operate on both lanes. SmallVector RepeatedMask; if (is128BitLaneRepeatedShuffleMask(MVT::v4i64, Mask, RepeatedMask)) { SmallVector PSHUFDMask; scaleShuffleMask(2, RepeatedMask, PSHUFDMask); return DAG.getBitcast( MVT::v4i64, DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32, DAG.getBitcast(MVT::v8i32, V1), getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); } // AVX2 provides a direct instruction for permuting a single input across // lanes. return DAG.getNode(X86ISD::VPERMI, DL, MVT::v4i64, V1, getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); } // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v4i64, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Shift; // If we have VLX support, we can use VALIGN or VEXPAND. if (Subtarget.hasVLX()) { if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG)) return Rotate; if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v4i64, Zeroable, Mask, V1, V2, DAG, Subtarget)) return V; } // Try to use PALIGNR. if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG)) return Rotate; // Use dedicated unpack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithUNPCK(DL, MVT::v4i64, Mask, V1, V2, DAG)) return V; // Try to create an in-lane repeating shuffle mask and then shuffle the // results into the target lanes. if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG)) return V; // Try to simplify this by merging 128-bit lanes to enable a lane-based // shuffle. However, if we have AVX2 and either inputs are already in place, // we will be able to shuffle even across lanes the other input in a single // instruction so skip this pattern. if (!isShuffleMaskInputInPlace(0, Mask) && !isShuffleMaskInputInPlace(1, Mask)) if (SDValue Result = lowerVectorShuffleByMerging128BitLanes( DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG)) return Result; // Otherwise fall back on generic blend lowering. return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG); } /// Handle lowering of 8-lane 32-bit floating point shuffles. /// /// Also ends up handling lowering of 8-lane 32-bit integer shuffles when AVX2 /// isn't available. static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v8f32 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v8f32 && "Bad operand type!"); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Blend; // Check for being able to broadcast a single element. if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG)) return Broadcast; // If the shuffle mask is repeated in each 128-bit lane, we have many more // options to efficiently lower the shuffle. SmallVector RepeatedMask; if (is128BitLaneRepeatedShuffleMask(MVT::v8f32, Mask, RepeatedMask)) { assert(RepeatedMask.size() == 4 && "Repeated masks must be half the mask width!"); // Use even/odd duplicate instructions for masks that match their pattern. if (isShuffleEquivalent(V1, V2, RepeatedMask, {0, 0, 2, 2})) return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v8f32, V1); if (isShuffleEquivalent(V1, V2, RepeatedMask, {1, 1, 3, 3})) return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v8f32, V1); if (V2.isUndef()) return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f32, V1, getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); // Use dedicated unpack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithUNPCK(DL, MVT::v8f32, Mask, V1, V2, DAG)) return V; // Otherwise, fall back to a SHUFPS sequence. Here it is important that we // have already handled any direct blends. return lowerVectorShuffleWithSHUFPS(DL, MVT::v8f32, RepeatedMask, V1, V2, DAG); } // Try to create an in-lane repeating shuffle mask and then shuffle the // results into the target lanes. if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG)) return V; // If we have a single input shuffle with different shuffle patterns in the // two 128-bit lanes use the variable mask to VPERMILPS. if (V2.isUndef()) { SDValue VPermMask = getConstVector(Mask, MVT::v8i32, DAG, DL, true); if (!is128BitLaneCrossingShuffleMask(MVT::v8f32, Mask)) return DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v8f32, V1, VPermMask); if (Subtarget.hasAVX2()) return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8f32, VPermMask, V1); // Otherwise, fall back. return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v8f32, V1, V2, Mask, DAG, Subtarget); } // Try to simplify this by merging 128-bit lanes to enable a lane-based // shuffle. if (SDValue Result = lowerVectorShuffleByMerging128BitLanes( DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG)) return Result; // If we have VLX support, we can use VEXPAND. if (Subtarget.hasVLX()) if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8f32, Zeroable, Mask, V1, V2, DAG, Subtarget)) return V; // For non-AVX512 if the Mask is of 16bit elements in lane then try to split // since after split we get a more efficient code using vpunpcklwd and // vpunpckhwd instrs than vblend. if (!Subtarget.hasAVX512() && isUnpackWdShuffleMask(Mask, MVT::v8f32)) if (SDValue V = lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG)) return V; // If we have AVX2 then we always want to lower with a blend because at v8 we // can fully permute the elements. if (Subtarget.hasAVX2()) return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG); // Otherwise fall back on generic lowering. return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG); } /// Handle lowering of 8-lane 32-bit integer shuffles. /// /// This routine is only called when we have AVX2 and thus a reasonable /// instruction set for v8i32 shuffling.. static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v8i32 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v8i32 && "Bad operand type!"); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); assert(Subtarget.hasAVX2() && "We can only lower v8i32 with AVX2!"); // Whenever we can lower this as a zext, that instruction is strictly faster // than any alternative. It also allows us to fold memory operands into the // shuffle in many cases. if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend( DL, MVT::v8i32, V1, V2, Mask, Zeroable, Subtarget, DAG)) return ZExt; // For non-AVX512 if the Mask is of 16bit elements in lane then try to split // since after split we get a more efficient code than vblend by using // vpunpcklwd and vpunpckhwd instrs. if (isUnpackWdShuffleMask(Mask, MVT::v8i32) && !V2.isUndef() && !Subtarget.hasAVX512()) if (SDValue V = lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG)) return V; if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Blend; // Check for being able to broadcast a single element. if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG)) return Broadcast; // If the shuffle mask is repeated in each 128-bit lane we can use more // efficient instructions that mirror the shuffles across the two 128-bit // lanes. SmallVector RepeatedMask; bool Is128BitLaneRepeatedShuffle = is128BitLaneRepeatedShuffleMask(MVT::v8i32, Mask, RepeatedMask); if (Is128BitLaneRepeatedShuffle) { assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!"); if (V2.isUndef()) return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32, V1, getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); // Use dedicated unpack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithUNPCK(DL, MVT::v8i32, Mask, V1, V2, DAG)) return V; } // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i32, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Shift; // If we have VLX support, we can use VALIGN or EXPAND. if (Subtarget.hasVLX()) { if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG)) return Rotate; if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8i32, Zeroable, Mask, V1, V2, DAG, Subtarget)) return V; } // Try to use byte rotation instructions. if (SDValue Rotate = lowerVectorShuffleAsByteRotate( DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG)) return Rotate; // Try to create an in-lane repeating shuffle mask and then shuffle the // results into the target lanes. if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG)) return V; // If the shuffle patterns aren't repeated but it is a single input, directly // generate a cross-lane VPERMD instruction. if (V2.isUndef()) { SDValue VPermMask = getConstVector(Mask, MVT::v8i32, DAG, DL, true); return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8i32, VPermMask, V1); } // Assume that a single SHUFPS is faster than an alternative sequence of // multiple instructions (even if the CPU has a domain penalty). // If some CPU is harmed by the domain switch, we can fix it in a later pass. if (Is128BitLaneRepeatedShuffle && isSingleSHUFPSMask(RepeatedMask)) { SDValue CastV1 = DAG.getBitcast(MVT::v8f32, V1); SDValue CastV2 = DAG.getBitcast(MVT::v8f32, V2); SDValue ShufPS = lowerVectorShuffleWithSHUFPS(DL, MVT::v8f32, RepeatedMask, CastV1, CastV2, DAG); return DAG.getBitcast(MVT::v8i32, ShufPS); } // Try to simplify this by merging 128-bit lanes to enable a lane-based // shuffle. if (SDValue Result = lowerVectorShuffleByMerging128BitLanes( DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG)) return Result; // Otherwise fall back on generic blend lowering. return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG); } /// Handle lowering of 16-lane 16-bit integer shuffles. /// /// This routine is only called when we have AVX2 and thus a reasonable /// instruction set for v16i16 shuffling.. static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v16i16 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v16i16 && "Bad operand type!"); assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); assert(Subtarget.hasAVX2() && "We can only lower v16i16 with AVX2!"); // Whenever we can lower this as a zext, that instruction is strictly faster // than any alternative. It also allows us to fold memory operands into the // shuffle in many cases. if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend( DL, MVT::v16i16, V1, V2, Mask, Zeroable, Subtarget, DAG)) return ZExt; // Check for being able to broadcast a single element. if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG)) return Broadcast; if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i16, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Blend; // Use dedicated unpack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithUNPCK(DL, MVT::v16i16, Mask, V1, V2, DAG)) return V; // Use dedicated pack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithPACK(DL, MVT::v16i16, Mask, V1, V2, DAG, Subtarget)) return V; // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v16i16, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Shift; // Try to use byte rotation instructions. if (SDValue Rotate = lowerVectorShuffleAsByteRotate( DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG)) return Rotate; // Try to create an in-lane repeating shuffle mask and then shuffle the // results into the target lanes. if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG)) return V; if (V2.isUndef()) { // There are no generalized cross-lane shuffle operations available on i16 // element types. if (is128BitLaneCrossingShuffleMask(MVT::v16i16, Mask)) { if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute( DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget)) return V; return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget); } SmallVector RepeatedMask; if (is128BitLaneRepeatedShuffleMask(MVT::v16i16, Mask, RepeatedMask)) { // As this is a single-input shuffle, the repeated mask should be // a strictly valid v8i16 mask that we can pass through to the v8i16 // lowering to handle even the v16 case. return lowerV8I16GeneralSingleInputVectorShuffle( DL, MVT::v16i16, V1, RepeatedMask, Subtarget, DAG); } } if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB( DL, MVT::v16i16, Mask, V1, V2, Zeroable, Subtarget, DAG)) return PSHUFB; // AVX512BWVL can lower to VPERMW. if (Subtarget.hasBWI() && Subtarget.hasVLX()) return lowerVectorShuffleWithPERMV(DL, MVT::v16i16, Mask, V1, V2, DAG); // Try to simplify this by merging 128-bit lanes to enable a lane-based // shuffle. if (SDValue Result = lowerVectorShuffleByMerging128BitLanes( DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG)) return Result; // Try to permute the lanes and then use a per-lane permute. if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute( DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget)) return V; // Otherwise fall back on generic lowering. return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG); } /// Handle lowering of 32-lane 8-bit integer shuffles. /// /// This routine is only called when we have AVX2 and thus a reasonable /// instruction set for v32i8 shuffling.. static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v32i8 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v32i8 && "Bad operand type!"); assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!"); assert(Subtarget.hasAVX2() && "We can only lower v32i8 with AVX2!"); // Whenever we can lower this as a zext, that instruction is strictly faster // than any alternative. It also allows us to fold memory operands into the // shuffle in many cases. if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend( DL, MVT::v32i8, V1, V2, Mask, Zeroable, Subtarget, DAG)) return ZExt; // Check for being able to broadcast a single element. if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG)) return Broadcast; if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v32i8, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Blend; // Use dedicated unpack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithUNPCK(DL, MVT::v32i8, Mask, V1, V2, DAG)) return V; // Use dedicated pack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithPACK(DL, MVT::v32i8, Mask, V1, V2, DAG, Subtarget)) return V; // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v32i8, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Shift; // Try to use byte rotation instructions. if (SDValue Rotate = lowerVectorShuffleAsByteRotate( DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG)) return Rotate; // Try to create an in-lane repeating shuffle mask and then shuffle the // results into the target lanes. if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG)) return V; // There are no generalized cross-lane shuffle operations available on i8 // element types. if (V2.isUndef() && is128BitLaneCrossingShuffleMask(MVT::v32i8, Mask)) { if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute( DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget)) return V; return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget); } if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB( DL, MVT::v32i8, Mask, V1, V2, Zeroable, Subtarget, DAG)) return PSHUFB; // AVX512VBMIVL can lower to VPERMB. if (Subtarget.hasVBMI() && Subtarget.hasVLX()) return lowerVectorShuffleWithPERMV(DL, MVT::v32i8, Mask, V1, V2, DAG); // Try to simplify this by merging 128-bit lanes to enable a lane-based // shuffle. if (SDValue Result = lowerVectorShuffleByMerging128BitLanes( DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG)) return Result; // Try to permute the lanes and then use a per-lane permute. if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute( DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget)) return V; // Otherwise fall back on generic lowering. return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG); } /// High-level routine to lower various 256-bit x86 vector shuffles. /// /// This routine either breaks down the specific type of a 256-bit x86 vector /// shuffle or splits it into two 128-bit shuffles and fuses the results back /// together based on the available instructions. static SDValue lower256BitVectorShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, SDValue V1, SDValue V2, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { // If we have a single input to the zero element, insert that into V1 if we // can do so cheaply. int NumElts = VT.getVectorNumElements(); int NumV2Elements = count_if(Mask, [NumElts](int M) { return M >= NumElts; }); if (NumV2Elements == 1 && Mask[0] >= NumElts) if (SDValue Insertion = lowerVectorShuffleAsElementInsertion( DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Insertion; // Handle special cases where the lower or upper half is UNDEF. if (SDValue V = lowerVectorShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG)) return V; // There is a really nice hard cut-over between AVX1 and AVX2 that means we // can check for those subtargets here and avoid much of the subtarget // querying in the per-vector-type lowering routines. With AVX1 we have // essentially *zero* ability to manipulate a 256-bit vector with integer // types. Since we'll use floating point types there eventually, just // immediately cast everything to a float and operate entirely in that domain. if (VT.isInteger() && !Subtarget.hasAVX2()) { int ElementBits = VT.getScalarSizeInBits(); if (ElementBits < 32) { // No floating point type available, if we can't use the bit operations // for masking/blending then decompose into 128-bit vectors. if (SDValue V = lowerVectorShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, DAG)) return V; if (SDValue V = lowerVectorShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG)) return V; return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG); } MVT FpVT = MVT::getVectorVT(MVT::getFloatingPointVT(ElementBits), VT.getVectorNumElements()); V1 = DAG.getBitcast(FpVT, V1); V2 = DAG.getBitcast(FpVT, V2); return DAG.getBitcast(VT, DAG.getVectorShuffle(FpVT, DL, V1, V2, Mask)); } switch (VT.SimpleTy) { case MVT::v4f64: return lowerV4F64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v4i64: return lowerV4I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v8f32: return lowerV8F32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v8i32: return lowerV8I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v16i16: return lowerV16I16VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v32i8: return lowerV32I8VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); default: llvm_unreachable("Not a valid 256-bit x86 vector type!"); } } /// Try to lower a vector shuffle as a 128-bit shuffles. static SDValue lowerV4X128VectorShuffle(const SDLoc &DL, MVT VT, ArrayRef Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(VT.getScalarSizeInBits() == 64 && "Unexpected element type size for 128bit shuffle."); // To handle 256 bit vector requires VLX and most probably // function lowerV2X128VectorShuffle() is better solution. assert(VT.is512BitVector() && "Unexpected vector size for 512bit shuffle."); // TODO - use Zeroable like we do for lowerV2X128VectorShuffle? SmallVector WidenedMask; if (!canWidenShuffleElements(Mask, WidenedMask)) return SDValue(); // Try to use an insert into a zero vector. if (WidenedMask[0] == 0 && (Zeroable & 0xf0) == 0xf0 && (WidenedMask[1] == 1 || (Zeroable & 0x0c) == 0x0c)) { unsigned NumElts = ((Zeroable & 0x0c) == 0x0c) ? 2 : 4; MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), NumElts); SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1, DAG.getIntPtrConstant(0, DL)); return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, getZeroVector(VT, Subtarget, DAG, DL), LoV, DAG.getIntPtrConstant(0, DL)); } // Check for patterns which can be matched with a single insert of a 256-bit // subvector. bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask, {0, 1, 2, 3, 0, 1, 2, 3}); if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, Mask, {0, 1, 2, 3, 8, 9, 10, 11})) { MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 4); SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, OnlyUsesV1 ? V1 : V2, DAG.getIntPtrConstant(0, DL)); return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, V1, SubVec, DAG.getIntPtrConstant(4, DL)); } assert(WidenedMask.size() == 4); // See if this is an insertion of the lower 128-bits of V2 into V1. bool IsInsert = true; int V2Index = -1; for (int i = 0; i < 4; ++i) { assert(WidenedMask[i] >= -1); if (WidenedMask[i] < 0) continue; // Make sure all V1 subvectors are in place. if (WidenedMask[i] < 4) { if (WidenedMask[i] != i) { IsInsert = false; break; } } else { // Make sure we only have a single V2 index and its the lowest 128-bits. if (V2Index >= 0 || WidenedMask[i] != 4) { IsInsert = false; break; } V2Index = i; } } if (IsInsert && V2Index >= 0) { MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2); SDValue Subvec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V2, DAG.getIntPtrConstant(0, DL)); return insert128BitVector(V1, Subvec, V2Index * 2, DAG, DL); } // Try to lower to vshuf64x2/vshuf32x4. SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)}; unsigned PermMask = 0; // Insure elements came from the same Op. for (int i = 0; i < 4; ++i) { assert(WidenedMask[i] >= -1); if (WidenedMask[i] < 0) continue; SDValue Op = WidenedMask[i] >= 4 ? V2 : V1; unsigned OpIndex = i / 2; if (Ops[OpIndex].isUndef()) Ops[OpIndex] = Op; else if (Ops[OpIndex] != Op) return SDValue(); // Convert the 128-bit shuffle mask selection values into 128-bit selection // bits defined by a vshuf64x2 instruction's immediate control byte. PermMask |= (WidenedMask[i] % 4) << (i * 2); } return DAG.getNode(X86ISD::SHUF128, DL, VT, Ops[0], Ops[1], DAG.getConstant(PermMask, DL, MVT::i8)); } /// Handle lowering of 8-lane 64-bit floating point shuffles. static SDValue lowerV8F64VectorShuffle(const SDLoc &DL, ArrayRef Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v8f64 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v8f64 && "Bad operand type!"); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); if (V2.isUndef()) { // Use low duplicate instructions for masks that match their pattern. if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 2, 2, 4, 4, 6, 6})) return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v8f64, V1); if (!is128BitLaneCrossingShuffleMask(MVT::v8f64, Mask)) { // Non-half-crossing single input shuffles can be lowered with an // interleaved permutation. unsigned VPERMILPMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1) | ((Mask[2] == 3) << 2) | ((Mask[3] == 3) << 3) | ((Mask[4] == 5) << 4) | ((Mask[5] == 5) << 5) | ((Mask[6] == 7) << 6) | ((Mask[7] == 7) << 7); return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f64, V1, DAG.getConstant(VPERMILPMask, DL, MVT::i8)); } SmallVector RepeatedMask; if (is256BitLaneRepeatedShuffleMask(MVT::v8f64, Mask, RepeatedMask)) return DAG.getNode(X86ISD::VPERMI, DL, MVT::v8f64, V1, getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); } if (SDValue Shuf128 = lowerV4X128VectorShuffle(DL, MVT::v8f64, Mask, Zeroable, V1, V2, Subtarget, DAG)) return Shuf128; if (SDValue Unpck = lowerVectorShuffleWithUNPCK(DL, MVT::v8f64, Mask, V1, V2, DAG)) return Unpck; // Check if the blend happens to exactly fit that of SHUFPD. if (SDValue Op = lowerVectorShuffleWithSHUFPD(DL, MVT::v8f64, Mask, V1, V2, DAG)) return Op; if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8f64, Zeroable, Mask, V1, V2, DAG, Subtarget)) return V; if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f64, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Blend; return lowerVectorShuffleWithPERMV(DL, MVT::v8f64, Mask, V1, V2, DAG); } /// Handle lowering of 16-lane 32-bit floating point shuffles. static SDValue lowerV16F32VectorShuffle(const SDLoc &DL, ArrayRef Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v16f32 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v16f32 && "Bad operand type!"); assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); // If the shuffle mask is repeated in each 128-bit lane, we have many more // options to efficiently lower the shuffle. SmallVector RepeatedMask; if (is128BitLaneRepeatedShuffleMask(MVT::v16f32, Mask, RepeatedMask)) { assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!"); // Use even/odd duplicate instructions for masks that match their pattern. if (isShuffleEquivalent(V1, V2, RepeatedMask, {0, 0, 2, 2})) return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v16f32, V1); if (isShuffleEquivalent(V1, V2, RepeatedMask, {1, 1, 3, 3})) return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v16f32, V1); if (V2.isUndef()) return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v16f32, V1, getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); // Use dedicated unpack instructions for masks that match their pattern. if (SDValue Unpck = lowerVectorShuffleWithUNPCK(DL, MVT::v16f32, Mask, V1, V2, DAG)) return Unpck; if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16f32, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Blend; // Otherwise, fall back to a SHUFPS sequence. return lowerVectorShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask, V1, V2, DAG); } // If we have a single input shuffle with different shuffle patterns in the // 128-bit lanes and don't lane cross, use variable mask VPERMILPS. if (V2.isUndef() && !is128BitLaneCrossingShuffleMask(MVT::v16f32, Mask)) { SDValue VPermMask = getConstVector(Mask, MVT::v16i32, DAG, DL, true); return DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v16f32, V1, VPermMask); } // If we have AVX512F support, we can use VEXPAND. if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v16f32, Zeroable, Mask, V1, V2, DAG, Subtarget)) return V; return lowerVectorShuffleWithPERMV(DL, MVT::v16f32, Mask, V1, V2, DAG); } /// Handle lowering of 8-lane 64-bit integer shuffles. static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v8i64 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v8i64 && "Bad operand type!"); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); if (V2.isUndef()) { // When the shuffle is mirrored between the 128-bit lanes of the unit, we // can use lower latency instructions that will operate on all four // 128-bit lanes. SmallVector Repeated128Mask; if (is128BitLaneRepeatedShuffleMask(MVT::v8i64, Mask, Repeated128Mask)) { SmallVector PSHUFDMask; scaleShuffleMask(2, Repeated128Mask, PSHUFDMask); return DAG.getBitcast( MVT::v8i64, DAG.getNode(X86ISD::PSHUFD, DL, MVT::v16i32, DAG.getBitcast(MVT::v16i32, V1), getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); } SmallVector Repeated256Mask; if (is256BitLaneRepeatedShuffleMask(MVT::v8i64, Mask, Repeated256Mask)) return DAG.getNode(X86ISD::VPERMI, DL, MVT::v8i64, V1, getV4X86ShuffleImm8ForMask(Repeated256Mask, DL, DAG)); } if (SDValue Shuf128 = lowerV4X128VectorShuffle(DL, MVT::v8i64, Mask, Zeroable, V1, V2, Subtarget, DAG)) return Shuf128; // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i64, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Shift; // Try to use VALIGN. if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v8i64, V1, V2, Mask, Subtarget, DAG)) return Rotate; // Try to use PALIGNR. if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v8i64, V1, V2, Mask, Subtarget, DAG)) return Rotate; if (SDValue Unpck = lowerVectorShuffleWithUNPCK(DL, MVT::v8i64, Mask, V1, V2, DAG)) return Unpck; // If we have AVX512F support, we can use VEXPAND. if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8i64, Zeroable, Mask, V1, V2, DAG, Subtarget)) return V; if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i64, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Blend; return lowerVectorShuffleWithPERMV(DL, MVT::v8i64, Mask, V1, V2, DAG); } /// Handle lowering of 16-lane 32-bit integer shuffles. static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v16i32 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v16i32 && "Bad operand type!"); assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); // Whenever we can lower this as a zext, that instruction is strictly faster // than any alternative. It also allows us to fold memory operands into the // shuffle in many cases. if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend( DL, MVT::v16i32, V1, V2, Mask, Zeroable, Subtarget, DAG)) return ZExt; // If the shuffle mask is repeated in each 128-bit lane we can use more // efficient instructions that mirror the shuffles across the four 128-bit // lanes. SmallVector RepeatedMask; bool Is128BitLaneRepeatedShuffle = is128BitLaneRepeatedShuffleMask(MVT::v16i32, Mask, RepeatedMask); if (Is128BitLaneRepeatedShuffle) { assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!"); if (V2.isUndef()) return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v16i32, V1, getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); // Use dedicated unpack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithUNPCK(DL, MVT::v16i32, Mask, V1, V2, DAG)) return V; } // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v16i32, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Shift; // Try to use VALIGN. if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v16i32, V1, V2, Mask, Subtarget, DAG)) return Rotate; // Try to use byte rotation instructions. if (Subtarget.hasBWI()) if (SDValue Rotate = lowerVectorShuffleAsByteRotate( DL, MVT::v16i32, V1, V2, Mask, Subtarget, DAG)) return Rotate; // Assume that a single SHUFPS is faster than using a permv shuffle. // If some CPU is harmed by the domain switch, we can fix it in a later pass. if (Is128BitLaneRepeatedShuffle && isSingleSHUFPSMask(RepeatedMask)) { SDValue CastV1 = DAG.getBitcast(MVT::v16f32, V1); SDValue CastV2 = DAG.getBitcast(MVT::v16f32, V2); SDValue ShufPS = lowerVectorShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask, CastV1, CastV2, DAG); return DAG.getBitcast(MVT::v16i32, ShufPS); } // If we have AVX512F support, we can use VEXPAND. if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v16i32, Zeroable, Mask, V1, V2, DAG, Subtarget)) return V; if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i32, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Blend; return lowerVectorShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, DAG); } /// Handle lowering of 32-lane 16-bit integer shuffles. static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v32i16 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v32i16 && "Bad operand type!"); assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!"); assert(Subtarget.hasBWI() && "We can only lower v32i16 with AVX-512-BWI!"); // Whenever we can lower this as a zext, that instruction is strictly faster // than any alternative. It also allows us to fold memory operands into the // shuffle in many cases. if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend( DL, MVT::v32i16, V1, V2, Mask, Zeroable, Subtarget, DAG)) return ZExt; // Use dedicated unpack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithUNPCK(DL, MVT::v32i16, Mask, V1, V2, DAG)) return V; // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v32i16, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Shift; // Try to use byte rotation instructions. if (SDValue Rotate = lowerVectorShuffleAsByteRotate( DL, MVT::v32i16, V1, V2, Mask, Subtarget, DAG)) return Rotate; if (V2.isUndef()) { SmallVector RepeatedMask; if (is128BitLaneRepeatedShuffleMask(MVT::v32i16, Mask, RepeatedMask)) { // As this is a single-input shuffle, the repeated mask should be // a strictly valid v8i16 mask that we can pass through to the v8i16 // lowering to handle even the v32 case. return lowerV8I16GeneralSingleInputVectorShuffle( DL, MVT::v32i16, V1, RepeatedMask, Subtarget, DAG); } } if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v32i16, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Blend; if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB( DL, MVT::v32i16, Mask, V1, V2, Zeroable, Subtarget, DAG)) return PSHUFB; return lowerVectorShuffleWithPERMV(DL, MVT::v32i16, Mask, V1, V2, DAG); } /// Handle lowering of 64-lane 8-bit integer shuffles. static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v64i8 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v64i8 && "Bad operand type!"); assert(Mask.size() == 64 && "Unexpected mask size for v64 shuffle!"); assert(Subtarget.hasBWI() && "We can only lower v64i8 with AVX-512-BWI!"); // Whenever we can lower this as a zext, that instruction is strictly faster // than any alternative. It also allows us to fold memory operands into the // shuffle in many cases. if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend( DL, MVT::v64i8, V1, V2, Mask, Zeroable, Subtarget, DAG)) return ZExt; // Use dedicated unpack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithUNPCK(DL, MVT::v64i8, Mask, V1, V2, DAG)) return V; // Use dedicated pack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithPACK(DL, MVT::v64i8, Mask, V1, V2, DAG, Subtarget)) return V; // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v64i8, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Shift; // Try to use byte rotation instructions. if (SDValue Rotate = lowerVectorShuffleAsByteRotate( DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG)) return Rotate; if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB( DL, MVT::v64i8, Mask, V1, V2, Zeroable, Subtarget, DAG)) return PSHUFB; // VBMI can use VPERMV/VPERMV3 byte shuffles. if (Subtarget.hasVBMI()) return lowerVectorShuffleWithPERMV(DL, MVT::v64i8, Mask, V1, V2, DAG); // Try to create an in-lane repeating shuffle mask and then shuffle the // results into the target lanes. if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG)) return V; if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v64i8, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Blend; // FIXME: Implement direct support for this type! return splitAndLowerVectorShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG); } /// High-level routine to lower various 512-bit x86 vector shuffles. /// /// This routine either breaks down the specific type of a 512-bit x86 vector /// shuffle or splits it into two 256-bit shuffles and fuses the results back /// together based on the available instructions. static SDValue lower512BitVectorShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, SDValue V1, SDValue V2, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(Subtarget.hasAVX512() && "Cannot lower 512-bit vectors w/ basic ISA!"); // If we have a single input to the zero element, insert that into V1 if we // can do so cheaply. int NumElts = Mask.size(); int NumV2Elements = count_if(Mask, [NumElts](int M) { return M >= NumElts; }); if (NumV2Elements == 1 && Mask[0] >= NumElts) if (SDValue Insertion = lowerVectorShuffleAsElementInsertion( DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Insertion; // Handle special cases where the lower or upper half is UNDEF. if (SDValue V = lowerVectorShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG)) return V; // Check for being able to broadcast a single element. if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, VT, V1, V2, Mask, Subtarget, DAG)) return Broadcast; // Dispatch to each element type for lowering. If we don't have support for // specific element type shuffles at 512 bits, immediately split them and // lower them. Each lowering routine of a given type is allowed to assume that // the requisite ISA extensions for that element type are available. switch (VT.SimpleTy) { case MVT::v8f64: return lowerV8F64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v16f32: return lowerV16F32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v8i64: return lowerV8I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v16i32: return lowerV16I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v32i16: return lowerV32I16VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v64i8: return lowerV64I8VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); default: llvm_unreachable("Not a valid 512-bit x86 vector type!"); } } // Determine if this shuffle can be implemented with a KSHIFT instruction. // Returns the shift amount if possible or -1 if not. This is a simplified // version of matchVectorShuffleAsShift. static int match1BitShuffleAsKSHIFT(unsigned &Opcode, ArrayRef Mask, int MaskOffset, const APInt &Zeroable) { int Size = Mask.size(); auto CheckZeros = [&](int Shift, bool Left) { for (int j = 0; j < Shift; ++j) if (!Zeroable[j + (Left ? 0 : (Size - Shift))]) return false; return true; }; auto MatchShift = [&](int Shift, bool Left) { unsigned Pos = Left ? Shift : 0; unsigned Low = Left ? 0 : Shift; unsigned Len = Size - Shift; return isSequentialOrUndefInRange(Mask, Pos, Len, Low + MaskOffset); }; for (int Shift = 1; Shift != Size; ++Shift) for (bool Left : {true, false}) if (CheckZeros(Shift, Left) && MatchShift(Shift, Left)) { Opcode = Left ? X86ISD::KSHIFTL : X86ISD::KSHIFTR; return Shift; } return -1; } // Lower vXi1 vector shuffles. // There is no a dedicated instruction on AVX-512 that shuffles the masks. // The only way to shuffle bits is to sign-extend the mask vector to SIMD // vector, shuffle and then truncate it back. static SDValue lower1BitVectorShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, SDValue V1, SDValue V2, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(Subtarget.hasAVX512() && "Cannot lower 512-bit vectors w/o basic ISA!"); unsigned NumElts = Mask.size(); // Try to recognize shuffles that are just padding a subvector with zeros. unsigned SubvecElts = 0; for (int i = 0; i != (int)NumElts; ++i) { if (Mask[i] >= 0 && Mask[i] != i) break; ++SubvecElts; } assert(SubvecElts != NumElts && "Identity shuffle?"); // Clip to a power 2. SubvecElts = PowerOf2Floor(SubvecElts); // Make sure the number of zeroable bits in the top at least covers the bits // not covered by the subvector. if (Zeroable.countLeadingOnes() >= (NumElts - SubvecElts)) { MVT ExtractVT = MVT::getVectorVT(MVT::i1, SubvecElts); SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT, V1, DAG.getIntPtrConstant(0, DL)); return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, getZeroVector(VT, Subtarget, DAG, DL), Extract, DAG.getIntPtrConstant(0, DL)); } // Try to match KSHIFTs. // TODO: Support narrower than legal shifts by widening and extracting. if (NumElts >= 16 || (Subtarget.hasDQI() && NumElts == 8)) { unsigned Offset = 0; for (SDValue V : { V1, V2 }) { unsigned Opcode; int ShiftAmt = match1BitShuffleAsKSHIFT(Opcode, Mask, Offset, Zeroable); if (ShiftAmt >= 0) return DAG.getNode(Opcode, DL, VT, V, DAG.getConstant(ShiftAmt, DL, MVT::i8)); Offset += NumElts; // Increment for next iteration. } } MVT ExtVT; switch (VT.SimpleTy) { default: llvm_unreachable("Expected a vector of i1 elements"); case MVT::v2i1: ExtVT = MVT::v2i64; break; case MVT::v4i1: ExtVT = MVT::v4i32; break; case MVT::v8i1: // Take 512-bit type, more shuffles on KNL. If we have VLX use a 256-bit // shuffle. ExtVT = Subtarget.hasVLX() ? MVT::v8i32 : MVT::v8i64; break; case MVT::v16i1: // Take 512-bit type, unless we are avoiding 512-bit types and have the // 256-bit operation available. ExtVT = Subtarget.canExtendTo512DQ() ? MVT::v16i32 : MVT::v16i16; break; case MVT::v32i1: // Take 512-bit type, unless we are avoiding 512-bit types and have the // 256-bit operation available. assert(Subtarget.hasBWI() && "Expected AVX512BW support"); ExtVT = Subtarget.canExtendTo512BW() ? MVT::v32i16 : MVT::v32i8; break; case MVT::v64i1: ExtVT = MVT::v64i8; break; } V1 = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, V1); V2 = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, V2); SDValue Shuffle = DAG.getVectorShuffle(ExtVT, DL, V1, V2, Mask); // i1 was sign extended we can use X86ISD::CVT2MASK. int NumElems = VT.getVectorNumElements(); if ((Subtarget.hasBWI() && (NumElems >= 32)) || (Subtarget.hasDQI() && (NumElems < 32))) return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, ExtVT), Shuffle, ISD::SETGT); return DAG.getNode(ISD::TRUNCATE, DL, VT, Shuffle); } /// Helper function that returns true if the shuffle mask should be /// commuted to improve canonicalization. static bool canonicalizeShuffleMaskWithCommute(ArrayRef Mask) { int NumElements = Mask.size(); int NumV1Elements = 0, NumV2Elements = 0; for (int M : Mask) if (M < 0) continue; else if (M < NumElements) ++NumV1Elements; else ++NumV2Elements; // Commute the shuffle as needed such that more elements come from V1 than // V2. This allows us to match the shuffle pattern strictly on how many // elements come from V1 without handling the symmetric cases. if (NumV2Elements > NumV1Elements) return true; assert(NumV1Elements > 0 && "No V1 indices"); if (NumV2Elements == 0) return false; // When the number of V1 and V2 elements are the same, try to minimize the // number of uses of V2 in the low half of the vector. When that is tied, // ensure that the sum of indices for V1 is equal to or lower than the sum // indices for V2. When those are equal, try to ensure that the number of odd // indices for V1 is lower than the number of odd indices for V2. if (NumV1Elements == NumV2Elements) { int LowV1Elements = 0, LowV2Elements = 0; for (int M : Mask.slice(0, NumElements / 2)) if (M >= NumElements) ++LowV2Elements; else if (M >= 0) ++LowV1Elements; if (LowV2Elements > LowV1Elements) return true; if (LowV2Elements == LowV1Elements) { int SumV1Indices = 0, SumV2Indices = 0; for (int i = 0, Size = Mask.size(); i < Size; ++i) if (Mask[i] >= NumElements) SumV2Indices += i; else if (Mask[i] >= 0) SumV1Indices += i; if (SumV2Indices < SumV1Indices) return true; if (SumV2Indices == SumV1Indices) { int NumV1OddIndices = 0, NumV2OddIndices = 0; for (int i = 0, Size = Mask.size(); i < Size; ++i) if (Mask[i] >= NumElements) NumV2OddIndices += i % 2; else if (Mask[i] >= 0) NumV1OddIndices += i % 2; if (NumV2OddIndices < NumV1OddIndices) return true; } } } return false; } /// Top-level lowering for x86 vector shuffles. /// /// This handles decomposition, canonicalization, and lowering of all x86 /// vector shuffles. Most of the specific lowering strategies are encapsulated /// above in helper routines. The canonicalization attempts to widen shuffles /// to involve fewer lanes of wider elements, consolidate symmetric patterns /// s.t. only one of the two inputs needs to be tested, etc. static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { ShuffleVectorSDNode *SVOp = cast(Op); ArrayRef Mask = SVOp->getMask(); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); MVT VT = Op.getSimpleValueType(); int NumElements = VT.getVectorNumElements(); SDLoc DL(Op); bool Is1BitVector = (VT.getVectorElementType() == MVT::i1); assert((VT.getSizeInBits() != 64 || Is1BitVector) && "Can't lower MMX shuffles"); bool V1IsUndef = V1.isUndef(); bool V2IsUndef = V2.isUndef(); if (V1IsUndef && V2IsUndef) return DAG.getUNDEF(VT); // When we create a shuffle node we put the UNDEF node to second operand, // but in some cases the first operand may be transformed to UNDEF. // In this case we should just commute the node. if (V1IsUndef) return DAG.getCommutedVectorShuffle(*SVOp); // Check for non-undef masks pointing at an undef vector and make the masks // undef as well. This makes it easier to match the shuffle based solely on // the mask. if (V2IsUndef) for (int M : Mask) if (M >= NumElements) { SmallVector NewMask(Mask.begin(), Mask.end()); for (int &M : NewMask) if (M >= NumElements) M = -1; return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask); } // Check for illegal shuffle mask element index values. int MaskUpperLimit = Mask.size() * (V2IsUndef ? 1 : 2); (void)MaskUpperLimit; assert(llvm::all_of(Mask, [&](int M) { return -1 <= M && M < MaskUpperLimit; }) && "Out of bounds shuffle index"); // We actually see shuffles that are entirely re-arrangements of a set of // zero inputs. This mostly happens while decomposing complex shuffles into // simple ones. Directly lower these as a buildvector of zeros. APInt Zeroable = computeZeroableShuffleElements(Mask, V1, V2); if (Zeroable.isAllOnesValue()) return getZeroVector(VT, Subtarget, DAG, DL); bool V2IsZero = !V2IsUndef && ISD::isBuildVectorAllZeros(V2.getNode()); // Create an alternative mask with info about zeroable elements. // Here we do not set undef elements as zeroable. SmallVector ZeroableMask(Mask.begin(), Mask.end()); if (V2IsZero) { assert(!Zeroable.isNullValue() && "V2's non-undef elements are used?!"); for (int i = 0; i != NumElements; ++i) if (Mask[i] != SM_SentinelUndef && Zeroable[i]) ZeroableMask[i] = SM_SentinelZero; } // Try to collapse shuffles into using a vector type with fewer elements but // wider element types. We cap this to not form integers or floating point // elements wider than 64 bits, but it might be interesting to form i128 // integers to handle flipping the low and high halves of AVX 256-bit vectors. SmallVector WidenedMask; if (VT.getScalarSizeInBits() < 64 && !Is1BitVector && canWidenShuffleElements(ZeroableMask, WidenedMask)) { // Shuffle mask widening should not interfere with a broadcast opportunity // by obfuscating the operands with bitcasts. // TODO: Avoid lowering directly from this top-level function: make this // a query (canLowerAsBroadcast) and defer lowering to the type-based calls. if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, VT, V1, V2, Mask, Subtarget, DAG)) return Broadcast; MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(VT.getScalarSizeInBits() * 2) : MVT::getIntegerVT(VT.getScalarSizeInBits() * 2); int NewNumElts = NumElements / 2; MVT NewVT = MVT::getVectorVT(NewEltVT, NewNumElts); // Make sure that the new vector type is legal. For example, v2f64 isn't // legal on SSE1. if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) { if (V2IsZero) { // Modify the new Mask to take all zeros from the all-zero vector. // Choose indices that are blend-friendly. bool UsedZeroVector = false; assert(find(WidenedMask, SM_SentinelZero) != WidenedMask.end() && "V2's non-undef elements are used?!"); for (int i = 0; i != NewNumElts; ++i) if (WidenedMask[i] == SM_SentinelZero) { WidenedMask[i] = i + NewNumElts; UsedZeroVector = true; } // Ensure all elements of V2 are zero - isBuildVectorAllZeros permits // some elements to be undef. if (UsedZeroVector) V2 = getZeroVector(NewVT, Subtarget, DAG, DL); } V1 = DAG.getBitcast(NewVT, V1); V2 = DAG.getBitcast(NewVT, V2); return DAG.getBitcast( VT, DAG.getVectorShuffle(NewVT, DL, V1, V2, WidenedMask)); } } // Commute the shuffle if it will improve canonicalization. if (canonicalizeShuffleMaskWithCommute(Mask)) return DAG.getCommutedVectorShuffle(*SVOp); if (SDValue V = lowerVectorShuffleWithVPMOV(DL, Mask, VT, V1, V2, DAG, Subtarget)) return V; // For each vector width, delegate to a specialized lowering routine. if (VT.is128BitVector()) return lower128BitVectorShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG); if (VT.is256BitVector()) return lower256BitVectorShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG); if (VT.is512BitVector()) return lower512BitVectorShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG); if (Is1BitVector) return lower1BitVectorShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG); llvm_unreachable("Unimplemented!"); } /// Try to lower a VSELECT instruction to a vector shuffle. static SDValue lowerVSELECTtoVectorShuffle(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDValue Cond = Op.getOperand(0); SDValue LHS = Op.getOperand(1); SDValue RHS = Op.getOperand(2); MVT VT = Op.getSimpleValueType(); // Only non-legal VSELECTs reach this lowering, convert those into generic // shuffles and re-use the shuffle lowering path for blends. SmallVector Mask; if (createShuffleMaskFromVSELECT(Mask, Cond)) return DAG.getVectorShuffle(VT, SDLoc(Op), LHS, RHS, Mask); return SDValue(); } SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cond = Op.getOperand(0); SDValue LHS = Op.getOperand(1); SDValue RHS = Op.getOperand(2); // A vselect where all conditions and data are constants can be optimized into // a single vector load by SelectionDAGLegalize::ExpandBUILD_VECTOR(). if (ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()) && ISD::isBuildVectorOfConstantSDNodes(LHS.getNode()) && ISD::isBuildVectorOfConstantSDNodes(RHS.getNode())) return SDValue(); // Try to lower this to a blend-style vector shuffle. This can handle all // constant condition cases. if (SDValue BlendOp = lowerVSELECTtoVectorShuffle(Op, Subtarget, DAG)) return BlendOp; // If this VSELECT has a vector if i1 as a mask, it will be directly matched // with patterns on the mask registers on AVX-512. MVT CondVT = Cond.getSimpleValueType(); unsigned CondEltSize = Cond.getScalarValueSizeInBits(); if (CondEltSize == 1) return Op; // Variable blends are only legal from SSE4.1 onward. if (!Subtarget.hasSSE41()) return SDValue(); SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); unsigned EltSize = VT.getScalarSizeInBits(); unsigned NumElts = VT.getVectorNumElements(); // If the VSELECT is on a 512-bit type, we have to convert a non-i1 condition // into an i1 condition so that we can use the mask-based 512-bit blend // instructions. if (VT.getSizeInBits() == 512) { // Build a mask by testing the condition against zero. MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts); SDValue Mask = DAG.getSetCC(dl, MaskVT, Cond, DAG.getConstant(0, dl, CondVT), ISD::SETNE); // Now return a new VSELECT using the mask. return DAG.getSelect(dl, VT, Mask, LHS, RHS); } // SEXT/TRUNC cases where the mask doesn't match the destination size. if (CondEltSize != EltSize) { // If we don't have a sign splat, rely on the expansion. if (CondEltSize != DAG.ComputeNumSignBits(Cond)) return SDValue(); MVT NewCondSVT = MVT::getIntegerVT(EltSize); MVT NewCondVT = MVT::getVectorVT(NewCondSVT, NumElts); Cond = DAG.getSExtOrTrunc(Cond, dl, NewCondVT); return DAG.getNode(ISD::VSELECT, dl, VT, Cond, LHS, RHS); } // Only some types will be legal on some subtargets. If we can emit a legal // VSELECT-matching blend, return Op, and but if we need to expand, return // a null value. switch (VT.SimpleTy) { default: // Most of the vector types have blends past SSE4.1. return Op; case MVT::v32i8: // The byte blends for AVX vectors were introduced only in AVX2. if (Subtarget.hasAVX2()) return Op; return SDValue(); case MVT::v8i16: case MVT::v16i16: { // Bitcast everything to the vXi8 type and use a vXi8 vselect. MVT CastVT = MVT::getVectorVT(MVT::i8, NumElts * 2); Cond = DAG.getBitcast(CastVT, Cond); LHS = DAG.getBitcast(CastVT, LHS); RHS = DAG.getBitcast(CastVT, RHS); SDValue Select = DAG.getNode(ISD::VSELECT, dl, CastVT, Cond, LHS, RHS); return DAG.getBitcast(VT, Select); } } } static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); if (!Op.getOperand(0).getSimpleValueType().is128BitVector()) return SDValue(); if (VT.getSizeInBits() == 8) { SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32, Op.getOperand(0), Op.getOperand(1)); return DAG.getNode(ISD::TRUNCATE, dl, VT, Extract); } if (VT == MVT::f32) { // EXTRACTPS outputs to a GPR32 register which will require a movd to copy // the result back to FR32 register. It's only worth matching if the // result has a single use which is a store or a bitcast to i32. And in // the case of a store, it's not worth it if the index is a constant 0, // because a MOVSSmr can be used instead, which is smaller and faster. if (!Op.hasOneUse()) return SDValue(); SDNode *User = *Op.getNode()->use_begin(); if ((User->getOpcode() != ISD::STORE || isNullConstant(Op.getOperand(1))) && (User->getOpcode() != ISD::BITCAST || User->getValueType(0) != MVT::i32)) return SDValue(); SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op.getOperand(0)), Op.getOperand(1)); return DAG.getBitcast(MVT::f32, Extract); } if (VT == MVT::i32 || VT == MVT::i64) { // ExtractPS/pextrq works with constant index. if (isa(Op.getOperand(1))) return Op; } return SDValue(); } /// Extract one bit from mask vector, like v16i1 or v8i1. /// AVX-512 feature. static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { SDValue Vec = Op.getOperand(0); SDLoc dl(Vec); MVT VecVT = Vec.getSimpleValueType(); SDValue Idx = Op.getOperand(1); MVT EltVT = Op.getSimpleValueType(); assert((VecVT.getVectorNumElements() <= 16 || Subtarget.hasBWI()) && "Unexpected vector type in ExtractBitFromMaskVector"); // variable index can't be handled in mask registers, // extend vector to VR512/128 if (!isa(Idx)) { unsigned NumElts = VecVT.getVectorNumElements(); // Extending v8i1/v16i1 to 512-bit get better performance on KNL // than extending to 128/256bit. MVT ExtEltVT = (NumElts <= 8) ? MVT::getIntegerVT(128 / NumElts) : MVT::i8; MVT ExtVecVT = MVT::getVectorVT(ExtEltVT, NumElts); SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVecVT, Vec); SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ExtEltVT, Ext, Idx); return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt); } unsigned IdxVal = cast(Idx)->getZExtValue(); if (IdxVal == 0) // the operation is legal return Op; // Extend to natively supported kshift. unsigned NumElems = VecVT.getVectorNumElements(); MVT WideVecVT = VecVT; if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) { WideVecVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVecVT, DAG.getUNDEF(WideVecVT), Vec, DAG.getIntPtrConstant(0, dl)); } // Use kshiftr instruction to move to the lower element. Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec, DAG.getConstant(IdxVal, dl, MVT::i8)); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec, DAG.getIntPtrConstant(0, dl)); } SDValue X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); SDValue Vec = Op.getOperand(0); MVT VecVT = Vec.getSimpleValueType(); SDValue Idx = Op.getOperand(1); if (VecVT.getVectorElementType() == MVT::i1) return ExtractBitFromMaskVector(Op, DAG, Subtarget); if (!isa(Idx)) { // Its more profitable to go through memory (1 cycles throughput) // than using VMOVD + VPERMV/PSHUFB sequence ( 2/3 cycles throughput) // IACA tool was used to get performance estimation // (https://software.intel.com/en-us/articles/intel-architecture-code-analyzer) // // example : extractelement <16 x i8> %a, i32 %i // // Block Throughput: 3.00 Cycles // Throughput Bottleneck: Port5 // // | Num Of | Ports pressure in cycles | | // | Uops | 0 - DV | 5 | 6 | 7 | | // --------------------------------------------- // | 1 | | 1.0 | | | CP | vmovd xmm1, edi // | 1 | | 1.0 | | | CP | vpshufb xmm0, xmm0, xmm1 // | 2 | 1.0 | 1.0 | | | CP | vpextrb eax, xmm0, 0x0 // Total Num Of Uops: 4 // // // Block Throughput: 1.00 Cycles // Throughput Bottleneck: PORT2_AGU, PORT3_AGU, Port4 // // | | Ports pressure in cycles | | // |Uops| 1 | 2 - D |3 - D | 4 | 5 | | // --------------------------------------------------------- // |2^ | | 0.5 | 0.5 |1.0| |CP| vmovaps xmmword ptr [rsp-0x18], xmm0 // |1 |0.5| | | |0.5| | lea rax, ptr [rsp-0x18] // |1 | |0.5, 0.5|0.5, 0.5| | |CP| mov al, byte ptr [rdi+rax*1] // Total Num Of Uops: 4 return SDValue(); } unsigned IdxVal = cast(Idx)->getZExtValue(); // If this is a 256-bit vector result, first extract the 128-bit vector and // then extract the element from the 128-bit vector. if (VecVT.is256BitVector() || VecVT.is512BitVector()) { // Get the 128-bit vector. Vec = extract128BitVector(Vec, IdxVal, DAG, dl); MVT EltVT = VecVT.getVectorElementType(); unsigned ElemsPerChunk = 128 / EltVT.getSizeInBits(); assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"); // Find IdxVal modulo ElemsPerChunk. Since ElemsPerChunk is a power of 2 // this can be done with a mask. IdxVal &= ElemsPerChunk - 1; return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec, DAG.getConstant(IdxVal, dl, MVT::i32)); } assert(VecVT.is128BitVector() && "Unexpected vector length"); MVT VT = Op.getSimpleValueType(); if (VT.getSizeInBits() == 16) { // If IdxVal is 0, it's cheaper to do a move instead of a pextrw, unless // we're going to zero extend the register or fold the store (SSE41 only). if (IdxVal == 0 && !MayFoldIntoZeroExtend(Op) && !(Subtarget.hasSSE41() && MayFoldIntoStore(Op))) return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Vec), Idx)); // Transform it so it match pextrw which produces a 32-bit result. SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32, Op.getOperand(0), Op.getOperand(1)); return DAG.getNode(ISD::TRUNCATE, dl, VT, Extract); } if (Subtarget.hasSSE41()) if (SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG)) return Res; // TODO: We only extract a single element from v16i8, we can probably afford // to be more aggressive here before using the default approach of spilling to // stack. if (VT.getSizeInBits() == 8 && Op->isOnlyUserOf(Vec.getNode())) { // Extract either the lowest i32 or any i16, and extract the sub-byte. int DWordIdx = IdxVal / 4; if (DWordIdx == 0) { SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Vec), DAG.getIntPtrConstant(DWordIdx, dl)); int ShiftVal = (IdxVal % 4) * 8; if (ShiftVal != 0) Res = DAG.getNode(ISD::SRL, dl, MVT::i32, Res, DAG.getConstant(ShiftVal, dl, MVT::i8)); return DAG.getNode(ISD::TRUNCATE, dl, VT, Res); } int WordIdx = IdxVal / 2; SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, DAG.getBitcast(MVT::v8i16, Vec), DAG.getIntPtrConstant(WordIdx, dl)); int ShiftVal = (IdxVal % 2) * 8; if (ShiftVal != 0) Res = DAG.getNode(ISD::SRL, dl, MVT::i16, Res, DAG.getConstant(ShiftVal, dl, MVT::i8)); return DAG.getNode(ISD::TRUNCATE, dl, VT, Res); } if (VT.getSizeInBits() == 32) { if (IdxVal == 0) return Op; // SHUFPS the element to the lowest double word, then movss. int Mask[4] = { static_cast(IdxVal), -1, -1, -1 }; Vec = DAG.getVectorShuffle(VecVT, dl, Vec, DAG.getUNDEF(VecVT), Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, DAG.getIntPtrConstant(0, dl)); } if (VT.getSizeInBits() == 64) { // FIXME: .td only matches this for <2 x f64>, not <2 x i64> on 32b // FIXME: seems like this should be unnecessary if mov{h,l}pd were taught // to match extract_elt for f64. if (IdxVal == 0) return Op; // UNPCKHPD the element to the lowest double word, then movsd. // Note if the lower 64 bits of the result of the UNPCKHPD is then stored // to a f64mem, the whole operation is folded into a single MOVHPDmr. int Mask[2] = { 1, -1 }; Vec = DAG.getVectorShuffle(VecVT, dl, Vec, DAG.getUNDEF(VecVT), Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, DAG.getIntPtrConstant(0, dl)); } return SDValue(); } /// Insert one bit to mask vector, like v16i1 or v8i1. /// AVX-512 feature. static SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { SDLoc dl(Op); SDValue Vec = Op.getOperand(0); SDValue Elt = Op.getOperand(1); SDValue Idx = Op.getOperand(2); MVT VecVT = Vec.getSimpleValueType(); if (!isa(Idx)) { // Non constant index. Extend source and destination, // insert element and then truncate the result. unsigned NumElts = VecVT.getVectorNumElements(); MVT ExtEltVT = (NumElts <= 8) ? MVT::getIntegerVT(128 / NumElts) : MVT::i8; MVT ExtVecVT = MVT::getVectorVT(ExtEltVT, NumElts); SDValue ExtOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT, DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVecVT, Vec), DAG.getNode(ISD::SIGN_EXTEND, dl, ExtEltVT, Elt), Idx); return DAG.getNode(ISD::TRUNCATE, dl, VecVT, ExtOp); } // Copy into a k-register, extract to v1i1 and insert_subvector. SDValue EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i1, Elt); return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VecVT, Vec, EltInVec, Op.getOperand(2)); } SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { MVT VT = Op.getSimpleValueType(); MVT EltVT = VT.getVectorElementType(); unsigned NumElts = VT.getVectorNumElements(); if (EltVT == MVT::i1) return InsertBitToMaskVector(Op, DAG, Subtarget); SDLoc dl(Op); SDValue N0 = Op.getOperand(0); SDValue N1 = Op.getOperand(1); SDValue N2 = Op.getOperand(2); if (!isa(N2)) return SDValue(); auto *N2C = cast(N2); unsigned IdxVal = N2C->getZExtValue(); bool IsZeroElt = X86::isZeroNode(N1); bool IsAllOnesElt = VT.isInteger() && llvm::isAllOnesConstant(N1); // If we are inserting a element, see if we can do this more efficiently with // a blend shuffle with a rematerializable vector than a costly integer // insertion. if ((IsZeroElt || IsAllOnesElt) && Subtarget.hasSSE41() && 16 <= EltVT.getSizeInBits()) { SmallVector BlendMask; for (unsigned i = 0; i != NumElts; ++i) BlendMask.push_back(i == IdxVal ? i + NumElts : i); SDValue CstVector = IsZeroElt ? getZeroVector(VT, Subtarget, DAG, dl) : getOnesVector(VT, DAG, dl); return DAG.getVectorShuffle(VT, dl, N0, CstVector, BlendMask); } // If the vector is wider than 128 bits, extract the 128-bit subvector, insert // into that, and then insert the subvector back into the result. if (VT.is256BitVector() || VT.is512BitVector()) { // With a 256-bit vector, we can insert into the zero element efficiently // using a blend if we have AVX or AVX2 and the right data type. if (VT.is256BitVector() && IdxVal == 0) { // TODO: It is worthwhile to cast integer to floating point and back // and incur a domain crossing penalty if that's what we'll end up // doing anyway after extracting to a 128-bit vector. if ((Subtarget.hasAVX() && (EltVT == MVT::f64 || EltVT == MVT::f32)) || (Subtarget.hasAVX2() && EltVT == MVT::i32)) { SDValue N1Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1); N2 = DAG.getIntPtrConstant(1, dl); return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1Vec, N2); } } // Get the desired 128-bit vector chunk. SDValue V = extract128BitVector(N0, IdxVal, DAG, dl); // Insert the element into the desired chunk. unsigned NumEltsIn128 = 128 / EltVT.getSizeInBits(); assert(isPowerOf2_32(NumEltsIn128)); // Since NumEltsIn128 is a power of 2 we can use mask instead of modulo. unsigned IdxIn128 = IdxVal & (NumEltsIn128 - 1); V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, N1, DAG.getConstant(IdxIn128, dl, MVT::i32)); // Insert the changed part back into the bigger vector return insert128BitVector(N0, V, IdxVal, DAG, dl); } assert(VT.is128BitVector() && "Only 128-bit vector types should be left!"); // Transform it so it match pinsr{b,w} which expects a GR32 as its second // argument. SSE41 required for pinsrb. if (VT == MVT::v8i16 || (VT == MVT::v16i8 && Subtarget.hasSSE41())) { unsigned Opc; if (VT == MVT::v8i16) { assert(Subtarget.hasSSE2() && "SSE2 required for PINSRW"); Opc = X86ISD::PINSRW; } else { assert(VT == MVT::v16i8 && "PINSRB requires v16i8 vector"); assert(Subtarget.hasSSE41() && "SSE41 required for PINSRB"); Opc = X86ISD::PINSRB; } if (N1.getValueType() != MVT::i32) N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1); if (N2.getValueType() != MVT::i32) N2 = DAG.getIntPtrConstant(IdxVal, dl); return DAG.getNode(Opc, dl, VT, N0, N1, N2); } if (Subtarget.hasSSE41()) { if (EltVT == MVT::f32) { // Bits [7:6] of the constant are the source select. This will always be // zero here. The DAG Combiner may combine an extract_elt index into // these bits. For example (insert (extract, 3), 2) could be matched by // putting the '3' into bits [7:6] of X86ISD::INSERTPS. // Bits [5:4] of the constant are the destination select. This is the // value of the incoming immediate. // Bits [3:0] of the constant are the zero mask. The DAG Combiner may // combine either bitwise AND or insert of float 0.0 to set these bits. bool MinSize = DAG.getMachineFunction().getFunction().optForMinSize(); if (IdxVal == 0 && (!MinSize || !MayFoldLoad(N1))) { // If this is an insertion of 32-bits into the low 32-bits of // a vector, we prefer to generate a blend with immediate rather // than an insertps. Blends are simpler operations in hardware and so // will always have equal or better performance than insertps. // But if optimizing for size and there's a load folding opportunity, // generate insertps because blendps does not have a 32-bit memory // operand form. N2 = DAG.getIntPtrConstant(1, dl); N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1); return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1, N2); } N2 = DAG.getIntPtrConstant(IdxVal << 4, dl); // Create this as a scalar to vector.. N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1); return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2); } // PINSR* works with constant index. if (EltVT == MVT::i32 || EltVT == MVT::i64) return Op; } return SDValue(); } static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDLoc dl(Op); MVT OpVT = Op.getSimpleValueType(); // It's always cheaper to replace a xor+movd with xorps and simplifies further // combines. if (X86::isZeroNode(Op.getOperand(0))) return getZeroVector(OpVT, Subtarget, DAG, dl); // If this is a 256-bit vector result, first insert into a 128-bit // vector and then insert into the 256-bit vector. if (!OpVT.is128BitVector()) { // Insert into a 128-bit vector. unsigned SizeFactor = OpVT.getSizeInBits() / 128; MVT VT128 = MVT::getVectorVT(OpVT.getVectorElementType(), OpVT.getVectorNumElements() / SizeFactor); Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT128, Op.getOperand(0)); // Insert the 128-bit vector. return insert128BitVector(DAG.getUNDEF(OpVT), Op, 0, DAG, dl); } assert(OpVT.is128BitVector() && "Expected an SSE type!"); // Pass through a v4i32 SCALAR_TO_VECTOR as that's what we use in tblgen. if (OpVT == MVT::v4i32) return Op; SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0)); return DAG.getBitcast( OpVT, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, AnyExt)); } // Lower a node with an INSERT_SUBVECTOR opcode. This may result in a // simple superregister reference or explicit instructions to insert // the upper bits of a vector. static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(Op.getSimpleValueType().getVectorElementType() == MVT::i1); return insert1BitVector(Op, DAG, Subtarget); } static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(Op.getSimpleValueType().getVectorElementType() == MVT::i1 && "Only vXi1 extract_subvectors need custom lowering"); SDLoc dl(Op); SDValue Vec = Op.getOperand(0); SDValue Idx = Op.getOperand(1); if (!isa(Idx)) return SDValue(); unsigned IdxVal = cast(Idx)->getZExtValue(); if (IdxVal == 0) // the operation is legal return Op; MVT VecVT = Vec.getSimpleValueType(); unsigned NumElems = VecVT.getVectorNumElements(); // Extend to natively supported kshift. MVT WideVecVT = VecVT; if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) { WideVecVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVecVT, DAG.getUNDEF(WideVecVT), Vec, DAG.getIntPtrConstant(0, dl)); } // Shift to the LSB. Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec, DAG.getConstant(IdxVal, dl, MVT::i8)); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, Op.getValueType(), Vec, DAG.getIntPtrConstant(0, dl)); } // Returns the appropriate wrapper opcode for a global reference. unsigned X86TargetLowering::getGlobalWrapperKind( const GlobalValue *GV, const unsigned char OpFlags) const { // References to absolute symbols are never PC-relative. if (GV && GV->isAbsoluteSymbolRef()) return X86ISD::Wrapper; CodeModel::Model M = getTargetMachine().getCodeModel(); if (Subtarget.isPICStyleRIPRel() && (M == CodeModel::Small || M == CodeModel::Kernel)) return X86ISD::WrapperRIP; // GOTPCREL references must always use RIP. if (OpFlags == X86II::MO_GOTPCREL) return X86ISD::WrapperRIP; return X86ISD::Wrapper; } // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as // their target counterpart wrapped in the X86ISD::Wrapper node. Suppose N is // one of the above mentioned nodes. It has to be wrapped because otherwise // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only // be used to form addressing mode. These wrapped nodes will be selected // into MOV32ri. SDValue X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { ConstantPoolSDNode *CP = cast(Op); // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the // global base reg. unsigned char OpFlag = Subtarget.classifyLocalReference(nullptr); auto PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Result = DAG.getTargetConstantPool( CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), OpFlag); SDLoc DL(CP); Result = DAG.getNode(getGlobalWrapperKind(), DL, PtrVT, Result); // With PIC, the address is actually $g + Offset. if (OpFlag) { Result = DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result); } return Result; } SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { JumpTableSDNode *JT = cast(Op); // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the // global base reg. unsigned char OpFlag = Subtarget.classifyLocalReference(nullptr); auto PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag); SDLoc DL(JT); Result = DAG.getNode(getGlobalWrapperKind(), DL, PtrVT, Result); // With PIC, the address is actually $g + Offset. if (OpFlag) Result = DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result); return Result; } SDValue X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const { const char *Sym = cast(Op)->getSymbol(); // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the // global base reg. const Module *Mod = DAG.getMachineFunction().getFunction().getParent(); unsigned char OpFlag = Subtarget.classifyGlobalReference(nullptr, *Mod); auto PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Result = DAG.getTargetExternalSymbol(Sym, PtrVT, OpFlag); SDLoc DL(Op); Result = DAG.getNode(getGlobalWrapperKind(), DL, PtrVT, Result); // With PIC, the address is actually $g + Offset. if (OpFlag) { Result = DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result); } // For symbols that require a load from a stub to get the address, emit the // load. if (isGlobalStubReference(OpFlag)) Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo::getGOT(DAG.getMachineFunction())); return Result; } SDValue X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { // Create the TargetBlockAddressAddress node. unsigned char OpFlags = Subtarget.classifyBlockAddressReference(); const BlockAddress *BA = cast(Op)->getBlockAddress(); int64_t Offset = cast(Op)->getOffset(); SDLoc dl(Op); auto PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset, OpFlags); Result = DAG.getNode(getGlobalWrapperKind(), dl, PtrVT, Result); // With PIC, the address is actually $g + Offset. if (isGlobalRelativeToPICBase(OpFlags)) { Result = DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getNode(X86ISD::GlobalBaseReg, dl, PtrVT), Result); } return Result; } SDValue X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, const SDLoc &dl, int64_t Offset, SelectionDAG &DAG) const { // Create the TargetGlobalAddress node, folding in the constant // offset if it is legal. unsigned char OpFlags = Subtarget.classifyGlobalReference(GV); CodeModel::Model M = DAG.getTarget().getCodeModel(); auto PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Result; if (OpFlags == X86II::MO_NO_FLAG && X86::isOffsetSuitableForCodeModel(Offset, M)) { // A direct static reference to a global. Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset); Offset = 0; } else { Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, OpFlags); } Result = DAG.getNode(getGlobalWrapperKind(GV, OpFlags), dl, PtrVT, Result); // With PIC, the address is actually $g + Offset. if (isGlobalRelativeToPICBase(OpFlags)) { Result = DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getNode(X86ISD::GlobalBaseReg, dl, PtrVT), Result); } // For globals that require a load from a stub to get the address, emit the // load. if (isGlobalStubReference(OpFlags)) Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, MachinePointerInfo::getGOT(DAG.getMachineFunction())); // If there was a non-zero offset that we didn't fold, create an explicit // addition for it. if (Offset != 0) Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, DAG.getConstant(Offset, dl, PtrVT)); return Result; } SDValue X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { const GlobalValue *GV = cast(Op)->getGlobal(); int64_t Offset = cast(Op)->getOffset(); return LowerGlobalAddress(GV, SDLoc(Op), Offset, DAG); } static SDValue GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg, unsigned char OperandFlags, bool LocalDynamic = false) { MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SDLoc dl(GA); SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(), OperandFlags); X86ISD::NodeType CallType = LocalDynamic ? X86ISD::TLSBASEADDR : X86ISD::TLSADDR; if (InFlag) { SDValue Ops[] = { Chain, TGA, *InFlag }; Chain = DAG.getNode(CallType, dl, NodeTys, Ops); } else { SDValue Ops[] = { Chain, TGA }; Chain = DAG.getNode(CallType, dl, NodeTys, Ops); } // TLSADDR will be codegen'ed as call. Inform MFI that function has calls. MFI.setAdjustsStack(true); MFI.setHasCalls(true); SDValue Flag = Chain.getValue(1); return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag); } // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit static SDValue LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG, const EVT PtrVT) { SDValue InFlag; SDLoc dl(GA); // ? function entry point might be better SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX, DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InFlag); InFlag = Chain.getValue(1); return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, X86II::MO_TLSGD); } // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit static SDValue LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG, const EVT PtrVT) { return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::RAX, X86II::MO_TLSGD); } static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, const EVT PtrVT, bool is64Bit) { SDLoc dl(GA); // Get the start address of the TLS block for this module. X86MachineFunctionInfo *MFI = DAG.getMachineFunction() .getInfo(); MFI->incNumLocalDynamicTLSAccesses(); SDValue Base; if (is64Bit) { Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::RAX, X86II::MO_TLSLD, /*LocalDynamic=*/true); } else { SDValue InFlag; SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX, DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InFlag); InFlag = Chain.getValue(1); Base = GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, X86II::MO_TLSLDM, /*LocalDynamic=*/true); } // Note: the CleanupLocalDynamicTLSPass will remove redundant computations // of Base. // Build x@dtpoff. unsigned char OperandFlags = X86II::MO_DTPOFF; unsigned WrapperKind = X86ISD::Wrapper; SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(), OperandFlags); SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA); // Add x@dtpoff with the base. return DAG.getNode(ISD::ADD, dl, PtrVT, Offset, Base); } // Lower ISD::GlobalTLSAddress using the "initial exec" or "local exec" model. static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, const EVT PtrVT, TLSModel::Model model, bool is64Bit, bool isPIC) { SDLoc dl(GA); // Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit). Value *Ptr = Constant::getNullValue(Type::getInt8PtrTy(*DAG.getContext(), is64Bit ? 257 : 256)); SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getIntPtrConstant(0, dl), MachinePointerInfo(Ptr)); unsigned char OperandFlags = 0; // Most TLS accesses are not RIP relative, even on x86-64. One exception is // initialexec. unsigned WrapperKind = X86ISD::Wrapper; if (model == TLSModel::LocalExec) { OperandFlags = is64Bit ? X86II::MO_TPOFF : X86II::MO_NTPOFF; } else if (model == TLSModel::InitialExec) { if (is64Bit) { OperandFlags = X86II::MO_GOTTPOFF; WrapperKind = X86ISD::WrapperRIP; } else { OperandFlags = isPIC ? X86II::MO_GOTNTPOFF : X86II::MO_INDNTPOFF; } } else { llvm_unreachable("Unexpected model"); } // emit "addl x@ntpoff,%eax" (local exec) // or "addl x@indntpoff,%eax" (initial exec) // or "addl x@gotntpoff(%ebx) ,%eax" (initial exec, 32-bit pic) SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(), OperandFlags); SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA); if (model == TLSModel::InitialExec) { if (isPIC && !is64Bit) { Offset = DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Offset); } Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset, MachinePointerInfo::getGOT(DAG.getMachineFunction())); } // The address of the thread local variable is the add of the thread // pointer with the offset of the variable. return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); } SDValue X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { GlobalAddressSDNode *GA = cast(Op); if (DAG.getTarget().useEmulatedTLS()) return LowerToTLSEmulatedModel(GA, DAG); const GlobalValue *GV = GA->getGlobal(); auto PtrVT = getPointerTy(DAG.getDataLayout()); bool PositionIndependent = isPositionIndependent(); if (Subtarget.isTargetELF()) { TLSModel::Model model = DAG.getTarget().getTLSModel(GV); switch (model) { case TLSModel::GeneralDynamic: if (Subtarget.is64Bit()) return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT); return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT); case TLSModel::LocalDynamic: return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT, Subtarget.is64Bit()); case TLSModel::InitialExec: case TLSModel::LocalExec: return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget.is64Bit(), PositionIndependent); } llvm_unreachable("Unknown TLS model."); } if (Subtarget.isTargetDarwin()) { // Darwin only has one model of TLS. Lower to that. unsigned char OpFlag = 0; unsigned WrapperKind = Subtarget.isPICStyleRIPRel() ? X86ISD::WrapperRIP : X86ISD::Wrapper; // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the // global base reg. bool PIC32 = PositionIndependent && !Subtarget.is64Bit(); if (PIC32) OpFlag = X86II::MO_TLVP_PIC_BASE; else OpFlag = X86II::MO_TLVP; SDLoc DL(Op); SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL, GA->getValueType(0), GA->getOffset(), OpFlag); SDValue Offset = DAG.getNode(WrapperKind, DL, PtrVT, Result); // With PIC32, the address is actually $g + Offset. if (PIC32) Offset = DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Offset); // Lowering the machine isd will make sure everything is in the right // location. SDValue Chain = DAG.getEntryNode(); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL); SDValue Args[] = { Chain, Offset }; Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true), DAG.getIntPtrConstant(0, DL, true), Chain.getValue(1), DL); // TLSCALL will be codegen'ed as call. Inform MFI that function has calls. MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); MFI.setAdjustsStack(true); // And our return value (tls address) is in the standard call return value // location. unsigned Reg = Subtarget.is64Bit() ? X86::RAX : X86::EAX; return DAG.getCopyFromReg(Chain, DL, Reg, PtrVT, Chain.getValue(1)); } if (Subtarget.isTargetKnownWindowsMSVC() || Subtarget.isTargetWindowsItanium() || Subtarget.isTargetWindowsGNU()) { // Just use the implicit TLS architecture // Need to generate something similar to: // mov rdx, qword [gs:abs 58H]; Load pointer to ThreadLocalStorage // ; from TEB // mov ecx, dword [rel _tls_index]: Load index (from C runtime) // mov rcx, qword [rdx+rcx*8] // mov eax, .tls$:tlsvar // [rax+rcx] contains the address // Windows 64bit: gs:0x58 // Windows 32bit: fs:__tls_array SDLoc dl(GA); SDValue Chain = DAG.getEntryNode(); // Get the Thread Pointer, which is %fs:__tls_array (32-bit) or // %gs:0x58 (64-bit). On MinGW, __tls_array is not available, so directly // use its literal value of 0x2C. Value *Ptr = Constant::getNullValue(Subtarget.is64Bit() ? Type::getInt8PtrTy(*DAG.getContext(), 256) : Type::getInt32PtrTy(*DAG.getContext(), 257)); SDValue TlsArray = Subtarget.is64Bit() ? DAG.getIntPtrConstant(0x58, dl) : (Subtarget.isTargetWindowsGNU() ? DAG.getIntPtrConstant(0x2C, dl) : DAG.getExternalSymbol("_tls_array", PtrVT)); SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, Chain, TlsArray, MachinePointerInfo(Ptr)); SDValue res; if (GV->getThreadLocalMode() == GlobalVariable::LocalExecTLSModel) { res = ThreadPointer; } else { // Load the _tls_index variable SDValue IDX = DAG.getExternalSymbol("_tls_index", PtrVT); if (Subtarget.is64Bit()) IDX = DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain, IDX, MachinePointerInfo(), MVT::i32); else IDX = DAG.getLoad(PtrVT, dl, Chain, IDX, MachinePointerInfo()); auto &DL = DAG.getDataLayout(); SDValue Scale = DAG.getConstant(Log2_64_Ceil(DL.getPointerSize()), dl, MVT::i8); IDX = DAG.getNode(ISD::SHL, dl, PtrVT, IDX, Scale); res = DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, IDX); } res = DAG.getLoad(PtrVT, dl, Chain, res, MachinePointerInfo()); // Get the offset of start of .tls section SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(), X86II::MO_SECREL); SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, TGA); // The address of the thread local variable is the add of the thread // pointer with the offset of the variable. return DAG.getNode(ISD::ADD, dl, PtrVT, res, Offset); } llvm_unreachable("TLS not implemented for this target."); } /// Lower SRA_PARTS and friends, which return two i32 values /// and take a 2 x i32 value to shift plus a shift amount. /// TODO: Can this be moved to general expansion code? static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) { assert(Op.getNumOperands() == 3 && "Not a double-shift!"); MVT VT = Op.getSimpleValueType(); unsigned VTBits = VT.getSizeInBits(); SDLoc dl(Op); bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; SDValue ShOpLo = Op.getOperand(0); SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's optimized away // during isel. SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt, DAG.getConstant(VTBits - 1, dl, MVT::i8)); SDValue Tmp1 = isSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi, DAG.getConstant(VTBits - 1, dl, MVT::i8)) : DAG.getConstant(0, dl, VT); SDValue Tmp2, Tmp3; if (Op.getOpcode() == ISD::SHL_PARTS) { Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt); Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt); } else { Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt); Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt); } // If the shift amount is larger or equal than the width of a part we can't // rely on the results of shld/shrd. Insert a test and select the appropriate // values for large shift amounts. SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt, DAG.getConstant(VTBits, dl, MVT::i8)); SDValue Cond = DAG.getSetCC(dl, MVT::i8, AndNode, DAG.getConstant(0, dl, MVT::i8), ISD::SETNE); SDValue Hi, Lo; if (Op.getOpcode() == ISD::SHL_PARTS) { Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2); Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3); } else { Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2); Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3); } return DAG.getMergeValues({ Lo, Hi }, dl); } static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); assert((Op.getOpcode() == ISD::FSHL || Op.getOpcode() == ISD::FSHR) && "Unexpected funnel shift opcode!"); SDLoc DL(Op); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDValue Amt = Op.getOperand(2); bool IsFSHR = Op.getOpcode() == ISD::FSHR; if (VT.isVector()) { assert(Subtarget.hasVBMI2() && "Expected VBMI2"); if (IsFSHR) std::swap(Op0, Op1); APInt APIntShiftAmt; if (isConstantSplat(Amt, APIntShiftAmt)) { uint64_t ShiftAmt = APIntShiftAmt.getZExtValue(); return DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT, Op0, Op1, DAG.getConstant(ShiftAmt, DL, MVT::i8)); } return DAG.getNode(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT, Op0, Op1, Amt); } assert((VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) && "Unexpected funnel shift type!"); // Expand slow SHLD/SHRD cases if we are not optimizing for size. bool OptForSize = DAG.getMachineFunction().getFunction().optForSize(); if (!OptForSize && Subtarget.isSHLDSlow()) return SDValue(); if (IsFSHR) std::swap(Op0, Op1); // i16 needs to modulo the shift amount, but i32/i64 have implicit modulo. if (VT == MVT::i16) Amt = DAG.getNode(ISD::AND, DL, Amt.getValueType(), Amt, DAG.getConstant(15, DL, Amt.getValueType())); unsigned SHDOp = (IsFSHR ? X86ISD::SHRD : X86ISD::SHLD); return DAG.getNode(SHDOp, DL, VT, Op0, Op1, Amt); } // Try to use a packed vector operation to handle i64 on 32-bit targets when // AVX512DQ is enabled. static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { assert((Op.getOpcode() == ISD::SINT_TO_FP || Op.getOpcode() == ISD::UINT_TO_FP) && "Unexpected opcode!"); SDValue Src = Op.getOperand(0); MVT SrcVT = Src.getSimpleValueType(); MVT VT = Op.getSimpleValueType(); if (!Subtarget.hasDQI() || SrcVT != MVT::i64 || Subtarget.is64Bit() || (VT != MVT::f32 && VT != MVT::f64)) return SDValue(); // Pack the i64 into a vector, do the operation and extract. // Using 256-bit to ensure result is 128-bits for f32 case. unsigned NumElts = Subtarget.hasVLX() ? 4 : 8; MVT VecInVT = MVT::getVectorVT(MVT::i64, NumElts); MVT VecVT = MVT::getVectorVT(VT, NumElts); SDLoc dl(Op); SDValue InVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecInVT, Src); SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, VecVT, InVec); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec, DAG.getIntPtrConstant(0, dl)); } SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { SDValue Src = Op.getOperand(0); MVT SrcVT = Src.getSimpleValueType(); MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); if (SrcVT.isVector()) { if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) { return DAG.getNode(X86ISD::CVTSI2P, dl, VT, DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src, DAG.getUNDEF(SrcVT))); } return SDValue(); } assert(SrcVT <= MVT::i64 && SrcVT >= MVT::i16 && "Unknown SINT_TO_FP to lower!"); // These are really Legal; return the operand so the caller accepts it as // Legal. if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(VT)) return Op; if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(VT) && Subtarget.is64Bit()) return Op; if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget)) return V; SDValue ValueToStore = Op.getOperand(0); if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(VT) && !Subtarget.is64Bit()) // Bitcasting to f64 here allows us to do a single 64-bit store from // an SSE register, avoiding the store forwarding penalty that would come // with two 32-bit stores. ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore); unsigned Size = SrcVT.getSizeInBits()/8; MachineFunction &MF = DAG.getMachineFunction(); auto PtrVT = getPointerTy(MF.getDataLayout()); int SSFI = MF.getFrameInfo().CreateStackObject(Size, Size, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); SDValue Chain = DAG.getStore( DAG.getEntryNode(), dl, ValueToStore, StackSlot, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI)); return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG); } SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot, SelectionDAG &DAG) const { // Build the FILD SDLoc DL(Op); SDVTList Tys; bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType()); if (useSSE) Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Glue); else Tys = DAG.getVTList(Op.getValueType(), MVT::Other); unsigned ByteSize = SrcVT.getSizeInBits()/8; FrameIndexSDNode *FI = dyn_cast(StackSlot); MachineMemOperand *MMO; if (FI) { int SSFI = FI->getIndex(); MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), MachineMemOperand::MOLoad, ByteSize, ByteSize); } else { MMO = cast(StackSlot)->getMemOperand(); StackSlot = StackSlot.getOperand(1); } SDValue Ops[] = { Chain, StackSlot, DAG.getValueType(SrcVT) }; SDValue Result = DAG.getMemIntrinsicNode(useSSE ? X86ISD::FILD_FLAG : X86ISD::FILD, DL, Tys, Ops, SrcVT, MMO); if (useSSE) { Chain = Result.getValue(1); SDValue InFlag = Result.getValue(2); // FIXME: Currently the FST is glued to the FILD_FLAG. This // shouldn't be necessary except that RFP cannot be live across // multiple blocks. When stackifier is fixed, they can be uncoupled. MachineFunction &MF = DAG.getMachineFunction(); unsigned SSFISize = Op.getValueSizeInBits()/8; int SSFI = MF.getFrameInfo().CreateStackObject(SSFISize, SSFISize, false); auto PtrVT = getPointerTy(MF.getDataLayout()); SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); Tys = DAG.getVTList(MVT::Other); SDValue Ops[] = { Chain, Result, StackSlot, DAG.getValueType(Op.getValueType()), InFlag }; MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), MachineMemOperand::MOStore, SSFISize, SSFISize); Chain = DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys, Ops, Op.getValueType(), MMO); Result = DAG.getLoad( Op.getValueType(), DL, Chain, StackSlot, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI)); } return Result; } /// 64-bit unsigned integer to double expansion. static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { // This algorithm is not obvious. Here it is what we're trying to output: /* movq %rax, %xmm0 punpckldq (c0), %xmm0 // c0: (uint4){ 0x43300000U, 0x45300000U, 0U, 0U } subpd (c1), %xmm0 // c1: (double2){ 0x1.0p52, 0x1.0p52 * 0x1.0p32 } #ifdef __SSE3__ haddpd %xmm0, %xmm0 #else pshufd $0x4e, %xmm0, %xmm1 addpd %xmm1, %xmm0 #endif */ SDLoc dl(Op); LLVMContext *Context = DAG.getContext(); // Build some magic constants. static const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 }; Constant *C0 = ConstantDataVector::get(*Context, CV0); auto PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); SDValue CPIdx0 = DAG.getConstantPool(C0, PtrVT, 16); SmallVector CV1; CV1.push_back( ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble(), APInt(64, 0x4330000000000000ULL)))); CV1.push_back( ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble(), APInt(64, 0x4530000000000000ULL)))); Constant *C1 = ConstantVector::get(CV1); SDValue CPIdx1 = DAG.getConstantPool(C1, PtrVT, 16); // Load the 64-bit value into an XMM register. SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Op.getOperand(0)); SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), /* Alignment = */ 16); SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, DAG.getBitcast(MVT::v4i32, XR1), CLod0); SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), /* Alignment = */ 16); SDValue XR2F = DAG.getBitcast(MVT::v2f64, Unpck1); // TODO: Are there any fast-math-flags to propagate here? SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1); SDValue Result; if (Subtarget.hasSSE3()) { // FIXME: The 'haddpd' instruction may be slower than 'shuffle + addsd'. Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub); } else { SDValue Shuffle = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, Sub, {1,-1}); Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuffle, Sub); } return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result, DAG.getIntPtrConstant(0, dl)); } /// 32-bit unsigned integer to float expansion. static SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { SDLoc dl(Op); // FP constant to bias correct the final result. SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl, MVT::f64); // Load the 32-bit value into an XMM register. SDValue Load = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Op.getOperand(0)); // Zero out the upper parts of the register. Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget, DAG); Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, DAG.getBitcast(MVT::v2f64, Load), DAG.getIntPtrConstant(0, dl)); // Or the load with the bias. SDValue Or = DAG.getNode( ISD::OR, dl, MVT::v2i64, DAG.getBitcast(MVT::v2i64, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, Load)), DAG.getBitcast(MVT::v2i64, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, Bias))); Or = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, DAG.getBitcast(MVT::v2f64, Or), DAG.getIntPtrConstant(0, dl)); // Subtract the bias. // TODO: Are there any fast-math-flags to propagate here? SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias); // Handle final rounding. return DAG.getFPExtendOrRound(Sub, dl, Op.getSimpleValueType()); } static SDValue lowerUINT_TO_FP_v2i32(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &DL) { if (Op.getSimpleValueType() != MVT::v2f64) return SDValue(); SDValue N0 = Op.getOperand(0); assert(N0.getSimpleValueType() == MVT::v2i32 && "Unexpected input type"); // Legalize to v4i32 type. N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4i32, N0, DAG.getUNDEF(MVT::v2i32)); if (Subtarget.hasAVX512()) return DAG.getNode(X86ISD::CVTUI2P, DL, MVT::v2f64, N0); // Same implementation as VectorLegalizer::ExpandUINT_TO_FLOAT, // but using v2i32 to v2f64 with X86ISD::CVTSI2P. SDValue HalfWord = DAG.getConstant(16, DL, MVT::v4i32); SDValue HalfWordMask = DAG.getConstant(0x0000FFFF, DL, MVT::v4i32); // Two to the power of half-word-size. SDValue TWOHW = DAG.getConstantFP(1 << 16, DL, MVT::v2f64); // Clear upper part of LO, lower HI. SDValue HI = DAG.getNode(ISD::SRL, DL, MVT::v4i32, N0, HalfWord); SDValue LO = DAG.getNode(ISD::AND, DL, MVT::v4i32, N0, HalfWordMask); SDValue fHI = DAG.getNode(X86ISD::CVTSI2P, DL, MVT::v2f64, HI); fHI = DAG.getNode(ISD::FMUL, DL, MVT::v2f64, fHI, TWOHW); SDValue fLO = DAG.getNode(X86ISD::CVTSI2P, DL, MVT::v2f64, LO); // Add the two halves. return DAG.getNode(ISD::FADD, DL, MVT::v2f64, fHI, fLO); } static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { // The algorithm is the following: // #ifdef __SSE4_1__ // uint4 lo = _mm_blend_epi16( v, (uint4) 0x4b000000, 0xaa); // uint4 hi = _mm_blend_epi16( _mm_srli_epi32(v,16), // (uint4) 0x53000000, 0xaa); // #else // uint4 lo = (v & (uint4) 0xffff) | (uint4) 0x4b000000; // uint4 hi = (v >> 16) | (uint4) 0x53000000; // #endif // float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f); // return (float4) lo + fhi; // We shouldn't use it when unsafe-fp-math is enabled though: we might later // reassociate the two FADDs, and if we do that, the algorithm fails // spectacularly (PR24512). // FIXME: If we ever have some kind of Machine FMF, this should be marked // as non-fast and always be enabled. Why isn't SDAG FMF enough? Because // there's also the MachineCombiner reassociations happening on Machine IR. if (DAG.getTarget().Options.UnsafeFPMath) return SDValue(); SDLoc DL(Op); SDValue V = Op->getOperand(0); MVT VecIntVT = V.getSimpleValueType(); bool Is128 = VecIntVT == MVT::v4i32; MVT VecFloatVT = Is128 ? MVT::v4f32 : MVT::v8f32; // If we convert to something else than the supported type, e.g., to v4f64, // abort early. if (VecFloatVT != Op->getSimpleValueType(0)) return SDValue(); assert((VecIntVT == MVT::v4i32 || VecIntVT == MVT::v8i32) && "Unsupported custom type"); // In the #idef/#else code, we have in common: // - The vector of constants: // -- 0x4b000000 // -- 0x53000000 // - A shift: // -- v >> 16 // Create the splat vector for 0x4b000000. SDValue VecCstLow = DAG.getConstant(0x4b000000, DL, VecIntVT); // Create the splat vector for 0x53000000. SDValue VecCstHigh = DAG.getConstant(0x53000000, DL, VecIntVT); // Create the right shift. SDValue VecCstShift = DAG.getConstant(16, DL, VecIntVT); SDValue HighShift = DAG.getNode(ISD::SRL, DL, VecIntVT, V, VecCstShift); SDValue Low, High; if (Subtarget.hasSSE41()) { MVT VecI16VT = Is128 ? MVT::v8i16 : MVT::v16i16; // uint4 lo = _mm_blend_epi16( v, (uint4) 0x4b000000, 0xaa); SDValue VecCstLowBitcast = DAG.getBitcast(VecI16VT, VecCstLow); SDValue VecBitcast = DAG.getBitcast(VecI16VT, V); // Low will be bitcasted right away, so do not bother bitcasting back to its // original type. Low = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecBitcast, VecCstLowBitcast, DAG.getConstant(0xaa, DL, MVT::i32)); // uint4 hi = _mm_blend_epi16( _mm_srli_epi32(v,16), // (uint4) 0x53000000, 0xaa); SDValue VecCstHighBitcast = DAG.getBitcast(VecI16VT, VecCstHigh); SDValue VecShiftBitcast = DAG.getBitcast(VecI16VT, HighShift); // High will be bitcasted right away, so do not bother bitcasting back to // its original type. High = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecShiftBitcast, VecCstHighBitcast, DAG.getConstant(0xaa, DL, MVT::i32)); } else { SDValue VecCstMask = DAG.getConstant(0xffff, DL, VecIntVT); // uint4 lo = (v & (uint4) 0xffff) | (uint4) 0x4b000000; SDValue LowAnd = DAG.getNode(ISD::AND, DL, VecIntVT, V, VecCstMask); Low = DAG.getNode(ISD::OR, DL, VecIntVT, LowAnd, VecCstLow); // uint4 hi = (v >> 16) | (uint4) 0x53000000; High = DAG.getNode(ISD::OR, DL, VecIntVT, HighShift, VecCstHigh); } // Create the vector constant for -(0x1.0p39f + 0x1.0p23f). SDValue VecCstFAdd = DAG.getConstantFP( APFloat(APFloat::IEEEsingle(), APInt(32, 0xD3000080)), DL, VecFloatVT); // float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f); SDValue HighBitcast = DAG.getBitcast(VecFloatVT, High); // TODO: Are there any fast-math-flags to propagate here? SDValue FHigh = DAG.getNode(ISD::FADD, DL, VecFloatVT, HighBitcast, VecCstFAdd); // return (float4) lo + fhi; SDValue LowBitcast = DAG.getBitcast(VecFloatVT, Low); return DAG.getNode(ISD::FADD, DL, VecFloatVT, LowBitcast, FHigh); } static SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { SDValue N0 = Op.getOperand(0); MVT SrcVT = N0.getSimpleValueType(); SDLoc dl(Op); switch (SrcVT.SimpleTy) { default: llvm_unreachable("Custom UINT_TO_FP is not supported!"); case MVT::v2i32: return lowerUINT_TO_FP_v2i32(Op, DAG, Subtarget, dl); case MVT::v4i32: case MVT::v8i32: assert(!Subtarget.hasAVX512()); return lowerUINT_TO_FP_vXi32(Op, DAG, Subtarget); } } SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { SDValue N0 = Op.getOperand(0); SDLoc dl(Op); auto PtrVT = getPointerTy(DAG.getDataLayout()); if (Op.getSimpleValueType().isVector()) return lowerUINT_TO_FP_vec(Op, DAG, Subtarget); MVT SrcVT = N0.getSimpleValueType(); MVT DstVT = Op.getSimpleValueType(); if (Subtarget.hasAVX512() && isScalarFPTypeInSSEReg(DstVT) && (SrcVT == MVT::i32 || (SrcVT == MVT::i64 && Subtarget.is64Bit()))) { // Conversions from unsigned i32 to f32/f64 are legal, // using VCVTUSI2SS/SD. Same for i64 in 64-bit mode. return Op; } if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget)) return V; if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64) return LowerUINT_TO_FP_i64(Op, DAG, Subtarget); if (SrcVT == MVT::i32 && X86ScalarSSEf64) return LowerUINT_TO_FP_i32(Op, DAG, Subtarget); if (Subtarget.is64Bit() && SrcVT == MVT::i64 && DstVT == MVT::f32) return SDValue(); // Make a 64-bit buffer, and use it to build an FILD. SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64); if (SrcVT == MVT::i32) { SDValue OffsetSlot = DAG.getMemBasePlusOffset(StackSlot, 4, dl); SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), StackSlot, MachinePointerInfo()); SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, dl, MVT::i32), OffsetSlot, MachinePointerInfo()); SDValue Fild = BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG); return Fild; } assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP"); SDValue ValueToStore = Op.getOperand(0); if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget.is64Bit()) // Bitcasting to f64 here allows us to do a single 64-bit store from // an SSE register, avoiding the store forwarding penalty that would come // with two 32-bit stores. ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, ValueToStore, StackSlot, MachinePointerInfo()); // For i64 source, we need to add the appropriate power of 2 if the input // was negative. This is the same as the optimization in // DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here, // we must be careful to do the computation in x87 extended precision, not // in SSE. (The generic code can't know it's OK to do this, or how to.) int SSFI = cast(StackSlot)->getIndex(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), MachineMemOperand::MOLoad, 8, 8); SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) }; SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, MVT::i64, MMO); APInt FF(32, 0x5F800000ULL); // Check whether the sign bit is set. SDValue SignSet = DAG.getSetCC( dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64), Op.getOperand(0), DAG.getConstant(0, dl, MVT::i64), ISD::SETLT); // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits. SDValue FudgePtr = DAG.getConstantPool( ConstantInt::get(*DAG.getContext(), FF.zext(64)), PtrVT); // Get a pointer to FF if the sign bit was set, or to 0 otherwise. SDValue Zero = DAG.getIntPtrConstant(0, dl); SDValue Four = DAG.getIntPtrConstant(4, dl); SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet, Zero, Four); FudgePtr = DAG.getNode(ISD::ADD, dl, PtrVT, FudgePtr, Offset); // Load the value out, extending it from f32 to f80. // FIXME: Avoid the extend by constructing the right constant pool? SDValue Fudge = DAG.getExtLoad( ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(), FudgePtr, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32, /* Alignment = */ 4); // Extend everything to 80 bits to force it to be done on x87. // TODO: Are there any fast-math-flags to propagate here? SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge); return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0, dl)); } // If the given FP_TO_SINT (IsSigned) or FP_TO_UINT (!IsSigned) operation // is legal, or has an fp128 or f16 source (which needs to be promoted to f32), // just return an pair. // Otherwise it is assumed to be a conversion from one of f32, f64 or f80 // to i16, i32 or i64, and we lower it to a legal sequence. // If lowered to the final integer result we return a pair. // Otherwise we lower it to a sequence ending with a FIST, return a // pair, and the caller is responsible for loading // the final integer result from StackSlot. std::pair X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, bool IsReplace) const { SDLoc DL(Op); EVT DstTy = Op.getValueType(); EVT TheVT = Op.getOperand(0).getValueType(); auto PtrVT = getPointerTy(DAG.getDataLayout()); if (TheVT != MVT::f32 && TheVT != MVT::f64 && TheVT != MVT::f80) { // f16 must be promoted before using the lowering in this routine. // fp128 does not use this lowering. return std::make_pair(SDValue(), SDValue()); } // If using FIST to compute an unsigned i64, we'll need some fixup // to handle values above the maximum signed i64. A FIST is always // used for the 32-bit subtarget, but also for f80 on a 64-bit target. bool UnsignedFixup = !IsSigned && DstTy == MVT::i64 && (!Subtarget.is64Bit() || !isScalarFPTypeInSSEReg(TheVT)); if (!IsSigned && DstTy != MVT::i64 && !Subtarget.hasAVX512()) { // Replace the fp-to-uint32 operation with an fp-to-sint64 FIST. // The low 32 bits of the fist result will have the correct uint32 result. assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT"); DstTy = MVT::i64; } assert(DstTy.getSimpleVT() <= MVT::i64 && DstTy.getSimpleVT() >= MVT::i16 && "Unknown FP_TO_INT to lower!"); // These are really Legal. if (DstTy == MVT::i32 && isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) return std::make_pair(SDValue(), SDValue()); if (Subtarget.is64Bit() && DstTy == MVT::i64 && isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) return std::make_pair(SDValue(), SDValue()); // We lower FP->int64 into FISTP64 followed by a load from a temporary // stack slot. MachineFunction &MF = DAG.getMachineFunction(); unsigned MemSize = DstTy.getSizeInBits()/8; int SSFI = MF.getFrameInfo().CreateStackObject(MemSize, MemSize, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); unsigned Opc; switch (DstTy.getSimpleVT().SimpleTy) { default: llvm_unreachable("Invalid FP_TO_SINT to lower!"); case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; } SDValue Chain = DAG.getEntryNode(); SDValue Value = Op.getOperand(0); SDValue Adjust; // 0x0 or 0x80000000, for result sign bit adjustment. if (UnsignedFixup) { // // Conversion to unsigned i64 is implemented with a select, // depending on whether the source value fits in the range // of a signed i64. Let Thresh be the FP equivalent of // 0x8000000000000000ULL. // // Adjust i32 = (Value < Thresh) ? 0 : 0x80000000; // FistSrc = (Value < Thresh) ? Value : (Value - Thresh); // Fist-to-mem64 FistSrc // Add 0 or 0x800...0ULL to the 64-bit result, which is equivalent // to XOR'ing the high 32 bits with Adjust. // // Being a power of 2, Thresh is exactly representable in all FP formats. // For X87 we'd like to use the smallest FP type for this constant, but // for DAG type consistency we have to match the FP operand type. APFloat Thresh(APFloat::IEEEsingle(), APInt(32, 0x5f000000)); LLVM_ATTRIBUTE_UNUSED APFloat::opStatus Status = APFloat::opOK; bool LosesInfo = false; if (TheVT == MVT::f64) // The rounding mode is irrelevant as the conversion should be exact. Status = Thresh.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &LosesInfo); else if (TheVT == MVT::f80) Status = Thresh.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven, &LosesInfo); assert(Status == APFloat::opOK && !LosesInfo && "FP conversion should have been exact"); SDValue ThreshVal = DAG.getConstantFP(Thresh, DL, TheVT); SDValue Cmp = DAG.getSetCC(DL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), TheVT), Value, ThreshVal, ISD::SETLT); Adjust = DAG.getSelect(DL, MVT::i32, Cmp, DAG.getConstant(0, DL, MVT::i32), DAG.getConstant(0x80000000, DL, MVT::i32)); SDValue Sub = DAG.getNode(ISD::FSUB, DL, TheVT, Value, ThreshVal); Cmp = DAG.getSetCC(DL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), TheVT), Value, ThreshVal, ISD::SETLT); Value = DAG.getSelect(DL, TheVT, Cmp, Value, Sub); } // FIXME This causes a redundant load/store if the SSE-class value is already // in memory, such as if it is on the callstack. if (isScalarFPTypeInSSEReg(TheVT)) { assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!"); Chain = DAG.getStore(Chain, DL, Value, StackSlot, MachinePointerInfo::getFixedStack(MF, SSFI)); SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other); SDValue Ops[] = { Chain, StackSlot, DAG.getValueType(TheVT) }; MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI), MachineMemOperand::MOLoad, MemSize, MemSize); Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, DstTy, MMO); Chain = Value.getValue(1); SSFI = MF.getFrameInfo().CreateStackObject(MemSize, MemSize, false); StackSlot = DAG.getFrameIndex(SSFI, PtrVT); } MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI), MachineMemOperand::MOStore, MemSize, MemSize); if (UnsignedFixup) { // Insert the FIST, load its result as two i32's, // and XOR the high i32 with Adjust. SDValue FistOps[] = { Chain, Value, StackSlot }; SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other), FistOps, DstTy, MMO); SDValue Low32 = DAG.getLoad(MVT::i32, DL, FIST, StackSlot, MachinePointerInfo()); SDValue HighAddr = DAG.getMemBasePlusOffset(StackSlot, 4, DL); SDValue High32 = DAG.getLoad(MVT::i32, DL, FIST, HighAddr, MachinePointerInfo()); High32 = DAG.getNode(ISD::XOR, DL, MVT::i32, High32, Adjust); if (Subtarget.is64Bit()) { // Join High32 and Low32 into a 64-bit result. // (High32 << 32) | Low32 Low32 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Low32); High32 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, High32); High32 = DAG.getNode(ISD::SHL, DL, MVT::i64, High32, DAG.getConstant(32, DL, MVT::i8)); SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i64, High32, Low32); return std::make_pair(Result, SDValue()); } SDValue ResultOps[] = { Low32, High32 }; SDValue pair = IsReplace ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, ResultOps) : DAG.getMergeValues(ResultOps, DL); return std::make_pair(pair, SDValue()); } else { // Build the FP_TO_INT*_IN_MEM SDValue Ops[] = { Chain, Value, StackSlot }; SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other), Ops, DstTy, MMO); return std::make_pair(FIST, StackSlot); } } static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { MVT VT = Op->getSimpleValueType(0); SDValue In = Op->getOperand(0); MVT InVT = In.getSimpleValueType(); SDLoc dl(Op); assert(VT.isVector() && InVT.isVector() && "Expected vector type"); assert(VT.getVectorNumElements() == VT.getVectorNumElements() && "Expected same number of elements"); assert((VT.getVectorElementType() == MVT::i16 || VT.getVectorElementType() == MVT::i32 || VT.getVectorElementType() == MVT::i64) && "Unexpected element type"); assert((InVT.getVectorElementType() == MVT::i8 || InVT.getVectorElementType() == MVT::i16 || InVT.getVectorElementType() == MVT::i32) && "Unexpected element type"); // Custom legalize v8i8->v8i64 on CPUs without avx512bw. if (InVT == MVT::v8i8) { if (!ExperimentalVectorWideningLegalization || VT != MVT::v8i64) return SDValue(); In = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), MVT::v16i8, In, DAG.getUNDEF(MVT::v8i8)); // FIXME: This should be ANY_EXTEND_VECTOR_INREG for ANY_EXTEND input. return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, dl, VT, In); } if (Subtarget.hasInt256()) return Op; // Optimize vectors in AVX mode: // // v8i16 -> v8i32 // Use vpmovzwd for 4 lower elements v8i16 -> v4i32. // Use vpunpckhwd for 4 upper elements v8i16 -> v4i32. // Concat upper and lower parts. // // v4i32 -> v4i64 // Use vpmovzdq for 4 lower elements v4i32 -> v2i64. // Use vpunpckhdq for 4 upper elements v4i32 -> v2i64. // Concat upper and lower parts. // MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), VT.getVectorNumElements() / 2); SDValue OpLo = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, dl, HalfVT, In); SDValue ZeroVec = DAG.getConstant(0, dl, InVT); SDValue Undef = DAG.getUNDEF(InVT); bool NeedZero = Op.getOpcode() == ISD::ZERO_EXTEND; SDValue OpHi = getUnpackh(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef); OpHi = DAG.getBitcast(HalfVT, OpHi); return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); } // Helper to split and extend a v16i1 mask to v16i8 or v16i16. static SDValue SplitAndExtendv16i1(unsigned ExtOpc, MVT VT, SDValue In, const SDLoc &dl, SelectionDAG &DAG) { assert((VT == MVT::v16i8 || VT == MVT::v16i16) && "Unexpected VT."); SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i1, In, DAG.getIntPtrConstant(0, dl)); SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i1, In, DAG.getIntPtrConstant(8, dl)); Lo = DAG.getNode(ExtOpc, dl, MVT::v8i16, Lo); Hi = DAG.getNode(ExtOpc, dl, MVT::v8i16, Hi); SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i16, Lo, Hi); return DAG.getNode(ISD::TRUNCATE, dl, VT, Res); } static SDValue LowerZERO_EXTEND_Mask(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = Op->getSimpleValueType(0); SDValue In = Op->getOperand(0); MVT InVT = In.getSimpleValueType(); assert(InVT.getVectorElementType() == MVT::i1 && "Unexpected input type!"); SDLoc DL(Op); unsigned NumElts = VT.getVectorNumElements(); // For all vectors, but vXi8 we can just emit a sign_extend and a shift. This // avoids a constant pool load. if (VT.getVectorElementType() != MVT::i8) { SDValue Extend = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, In); return DAG.getNode(ISD::SRL, DL, VT, Extend, DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT)); } // Extend VT if BWI is not supported. MVT ExtVT = VT; if (!Subtarget.hasBWI()) { // If v16i32 is to be avoided, we'll need to split and concatenate. if (NumElts == 16 && !Subtarget.canExtendTo512DQ()) return SplitAndExtendv16i1(ISD::ZERO_EXTEND, VT, In, DL, DAG); ExtVT = MVT::getVectorVT(MVT::i32, NumElts); } // Widen to 512-bits if VLX is not supported. MVT WideVT = ExtVT; if (!ExtVT.is512BitVector() && !Subtarget.hasVLX()) { NumElts *= 512 / ExtVT.getSizeInBits(); InVT = MVT::getVectorVT(MVT::i1, NumElts); In = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT, DAG.getUNDEF(InVT), In, DAG.getIntPtrConstant(0, DL)); WideVT = MVT::getVectorVT(ExtVT.getVectorElementType(), NumElts); } SDValue One = DAG.getConstant(1, DL, WideVT); SDValue Zero = DAG.getConstant(0, DL, WideVT); SDValue SelectedVal = DAG.getSelect(DL, WideVT, In, One, Zero); // Truncate if we had to extend above. if (VT != ExtVT) { WideVT = MVT::getVectorVT(MVT::i8, NumElts); SelectedVal = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SelectedVal); } // Extract back to 128/256-bit if we widened. if (WideVT != VT) SelectedVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SelectedVal, DAG.getIntPtrConstant(0, DL)); return SelectedVal; } static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDValue In = Op.getOperand(0); MVT SVT = In.getSimpleValueType(); if (SVT.getVectorElementType() == MVT::i1) return LowerZERO_EXTEND_Mask(Op, Subtarget, DAG); assert(Subtarget.hasAVX() && "Expected AVX support"); return LowerAVXExtend(Op, DAG, Subtarget); } /// Helper to recursively truncate vector elements in half with PACKSS/PACKUS. /// It makes use of the fact that vectors with enough leading sign/zero bits /// prevent the PACKSS/PACKUS from saturating the results. /// AVX2 (Int256) sub-targets require extra shuffling as the PACK*S operates /// within each 128-bit lane. static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget) { assert((Opcode == X86ISD::PACKSS || Opcode == X86ISD::PACKUS) && "Unexpected PACK opcode"); assert(DstVT.isVector() && "VT not a vector?"); // Requires SSE2 but AVX512 has fast vector truncate. if (!Subtarget.hasSSE2()) return SDValue(); EVT SrcVT = In.getValueType(); // No truncation required, we might get here due to recursive calls. if (SrcVT == DstVT) return In; // We only support vector truncation to 64bits or greater from a // 128bits or greater source. unsigned DstSizeInBits = DstVT.getSizeInBits(); unsigned SrcSizeInBits = SrcVT.getSizeInBits(); if ((DstSizeInBits % 64) != 0 || (SrcSizeInBits % 128) != 0) return SDValue(); unsigned NumElems = SrcVT.getVectorNumElements(); if (!isPowerOf2_32(NumElems)) return SDValue(); LLVMContext &Ctx = *DAG.getContext(); assert(DstVT.getVectorNumElements() == NumElems && "Illegal truncation"); assert(SrcSizeInBits > DstSizeInBits && "Illegal truncation"); EVT PackedSVT = EVT::getIntegerVT(Ctx, SrcVT.getScalarSizeInBits() / 2); // Pack to the largest type possible: // vXi64/vXi32 -> PACK*SDW and vXi16 -> PACK*SWB. EVT InVT = MVT::i16, OutVT = MVT::i8; if (SrcVT.getScalarSizeInBits() > 16 && (Opcode == X86ISD::PACKSS || Subtarget.hasSSE41())) { InVT = MVT::i32; OutVT = MVT::i16; } // 128bit -> 64bit truncate - PACK 128-bit src in the lower subvector. if (SrcVT.is128BitVector()) { InVT = EVT::getVectorVT(Ctx, InVT, 128 / InVT.getSizeInBits()); OutVT = EVT::getVectorVT(Ctx, OutVT, 128 / OutVT.getSizeInBits()); In = DAG.getBitcast(InVT, In); SDValue Res = DAG.getNode(Opcode, DL, OutVT, In, In); Res = extractSubVector(Res, 0, DAG, DL, 64); return DAG.getBitcast(DstVT, Res); } // Extract lower/upper subvectors. unsigned NumSubElts = NumElems / 2; SDValue Lo = extractSubVector(In, 0 * NumSubElts, DAG, DL, SrcSizeInBits / 2); SDValue Hi = extractSubVector(In, 1 * NumSubElts, DAG, DL, SrcSizeInBits / 2); unsigned SubSizeInBits = SrcSizeInBits / 2; InVT = EVT::getVectorVT(Ctx, InVT, SubSizeInBits / InVT.getSizeInBits()); OutVT = EVT::getVectorVT(Ctx, OutVT, SubSizeInBits / OutVT.getSizeInBits()); // 256bit -> 128bit truncate - PACK lower/upper 128-bit subvectors. if (SrcVT.is256BitVector() && DstVT.is128BitVector()) { Lo = DAG.getBitcast(InVT, Lo); Hi = DAG.getBitcast(InVT, Hi); SDValue Res = DAG.getNode(Opcode, DL, OutVT, Lo, Hi); return DAG.getBitcast(DstVT, Res); } // AVX2: 512bit -> 256bit truncate - PACK lower/upper 256-bit subvectors. // AVX2: 512bit -> 128bit truncate - PACK(PACK, PACK). if (SrcVT.is512BitVector() && Subtarget.hasInt256()) { Lo = DAG.getBitcast(InVT, Lo); Hi = DAG.getBitcast(InVT, Hi); SDValue Res = DAG.getNode(Opcode, DL, OutVT, Lo, Hi); // 256-bit PACK(ARG0, ARG1) leaves us with ((LO0,LO1),(HI0,HI1)), // so we need to shuffle to get ((LO0,HI0),(LO1,HI1)). Res = DAG.getBitcast(MVT::v4i64, Res); Res = DAG.getVectorShuffle(MVT::v4i64, DL, Res, Res, {0, 2, 1, 3}); if (DstVT.is256BitVector()) return DAG.getBitcast(DstVT, Res); // If 512bit -> 128bit truncate another stage. EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems); Res = DAG.getBitcast(PackedVT, Res); return truncateVectorWithPACK(Opcode, DstVT, Res, DL, DAG, Subtarget); } // Recursively pack lower/upper subvectors, concat result and pack again. assert(SrcSizeInBits >= 256 && "Expected 256-bit vector or greater"); EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumSubElts); Lo = truncateVectorWithPACK(Opcode, PackedVT, Lo, DL, DAG, Subtarget); Hi = truncateVectorWithPACK(Opcode, PackedVT, Hi, DL, DAG, Subtarget); PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems); SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, PackedVT, Lo, Hi); return truncateVectorWithPACK(Opcode, DstVT, Res, DL, DAG, Subtarget); } static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { SDLoc DL(Op); MVT VT = Op.getSimpleValueType(); SDValue In = Op.getOperand(0); MVT InVT = In.getSimpleValueType(); assert(VT.getVectorElementType() == MVT::i1 && "Unexpected vector type."); // Shift LSB to MSB and use VPMOVB/W2M or TESTD/Q. unsigned ShiftInx = InVT.getScalarSizeInBits() - 1; if (InVT.getScalarSizeInBits() <= 16) { if (Subtarget.hasBWI()) { // legal, will go to VPMOVB2M, VPMOVW2M if (DAG.ComputeNumSignBits(In) < InVT.getScalarSizeInBits()) { // We need to shift to get the lsb into sign position. // Shift packed bytes not supported natively, bitcast to word MVT ExtVT = MVT::getVectorVT(MVT::i16, InVT.getSizeInBits()/16); In = DAG.getNode(ISD::SHL, DL, ExtVT, DAG.getBitcast(ExtVT, In), DAG.getConstant(ShiftInx, DL, ExtVT)); In = DAG.getBitcast(InVT, In); } return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, InVT), In, ISD::SETGT); } // Use TESTD/Q, extended vector to packed dword/qword. assert((InVT.is256BitVector() || InVT.is128BitVector()) && "Unexpected vector type."); unsigned NumElts = InVT.getVectorNumElements(); assert((NumElts == 8 || NumElts == 16) && "Unexpected number of elements"); // We need to change to a wider element type that we have support for. // For 8 element vectors this is easy, we either extend to v8i32 or v8i64. // For 16 element vectors we extend to v16i32 unless we are explicitly // trying to avoid 512-bit vectors. If we are avoiding 512-bit vectors // we need to split into two 8 element vectors which we can extend to v8i32, // truncate and concat the results. There's an additional complication if // the original type is v16i8. In that case we can't split the v16i8 so // first we pre-extend it to v16i16 which we can split to v8i16, then extend // to v8i32, truncate that to v8i1 and concat the two halves. if (NumElts == 16 && !Subtarget.canExtendTo512DQ()) { if (InVT == MVT::v16i8) { // First we need to sign extend up to 256-bits so we can split that. InVT = MVT::v16i16; In = DAG.getNode(ISD::SIGN_EXTEND, DL, InVT, In); } SDValue Lo = extract128BitVector(In, 0, DAG, DL); SDValue Hi = extract128BitVector(In, 8, DAG, DL); // We're split now, just emit two truncates and a concat. The two // truncates will trigger legalization to come back to this function. Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i1, Lo); Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i1, Hi); return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); } // We either have 8 elements or we're allowed to use 512-bit vectors. // If we have VLX, we want to use the narrowest vector that can get the // job done so we use vXi32. MVT EltVT = Subtarget.hasVLX() ? MVT::i32 : MVT::getIntegerVT(512/NumElts); MVT ExtVT = MVT::getVectorVT(EltVT, NumElts); In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In); InVT = ExtVT; ShiftInx = InVT.getScalarSizeInBits() - 1; } if (DAG.ComputeNumSignBits(In) < InVT.getScalarSizeInBits()) { // We need to shift to get the lsb into sign position. In = DAG.getNode(ISD::SHL, DL, InVT, In, DAG.getConstant(ShiftInx, DL, InVT)); } // If we have DQI, emit a pattern that will be iseled as vpmovq2m/vpmovd2m. if (Subtarget.hasDQI()) return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, InVT), In, ISD::SETGT); return DAG.getSetCC(DL, VT, In, DAG.getConstant(0, DL, InVT), ISD::SETNE); } SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); MVT VT = Op.getSimpleValueType(); SDValue In = Op.getOperand(0); MVT InVT = In.getSimpleValueType(); unsigned InNumEltBits = InVT.getScalarSizeInBits(); assert(VT.getVectorNumElements() == InVT.getVectorNumElements() && "Invalid TRUNCATE operation"); // If called by the legalizer just return. if (!DAG.getTargetLoweringInfo().isTypeLegal(InVT)) return SDValue(); if (VT.getVectorElementType() == MVT::i1) return LowerTruncateVecI1(Op, DAG, Subtarget); // vpmovqb/w/d, vpmovdb/w, vpmovwb if (Subtarget.hasAVX512()) { // word to byte only under BWI. Otherwise we have to promoted to v16i32 // and then truncate that. But we should only do that if we haven't been // asked to avoid 512-bit vectors. The actual promotion to v16i32 will be // handled by isel patterns. if (InVT != MVT::v16i16 || Subtarget.hasBWI() || Subtarget.canExtendTo512DQ()) return Op; } unsigned NumPackedSignBits = std::min(VT.getScalarSizeInBits(), 16); unsigned NumPackedZeroBits = Subtarget.hasSSE41() ? NumPackedSignBits : 8; // Truncate with PACKUS if we are truncating a vector with leading zero bits // that extend all the way to the packed/truncated value. // Pre-SSE41 we can only use PACKUSWB. KnownBits Known = DAG.computeKnownBits(In); if ((InNumEltBits - NumPackedZeroBits) <= Known.countMinLeadingZeros()) if (SDValue V = truncateVectorWithPACK(X86ISD::PACKUS, VT, In, DL, DAG, Subtarget)) return V; // Truncate with PACKSS if we are truncating a vector with sign-bits that // extend all the way to the packed/truncated value. if ((InNumEltBits - NumPackedSignBits) < DAG.ComputeNumSignBits(In)) if (SDValue V = truncateVectorWithPACK(X86ISD::PACKSS, VT, In, DL, DAG, Subtarget)) return V; if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) { // On AVX2, v4i64 -> v4i32 becomes VPERMD. if (Subtarget.hasInt256()) { static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1}; In = DAG.getBitcast(MVT::v8i32, In); In = DAG.getVectorShuffle(MVT::v8i32, DL, In, In, ShufMask); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, In, DAG.getIntPtrConstant(0, DL)); } SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In, DAG.getIntPtrConstant(0, DL)); SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In, DAG.getIntPtrConstant(2, DL)); OpLo = DAG.getBitcast(MVT::v4i32, OpLo); OpHi = DAG.getBitcast(MVT::v4i32, OpHi); static const int ShufMask[] = {0, 2, 4, 6}; return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask); } if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) { // On AVX2, v8i32 -> v8i16 becomes PSHUFB. if (Subtarget.hasInt256()) { In = DAG.getBitcast(MVT::v32i8, In); // The PSHUFB mask: static const int ShufMask1[] = { 0, 1, 4, 5, 8, 9, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1, 16, 17, 20, 21, 24, 25, 28, 29, -1, -1, -1, -1, -1, -1, -1, -1 }; In = DAG.getVectorShuffle(MVT::v32i8, DL, In, In, ShufMask1); In = DAG.getBitcast(MVT::v4i64, In); static const int ShufMask2[] = {0, 2, -1, -1}; In = DAG.getVectorShuffle(MVT::v4i64, DL, In, In, ShufMask2); In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In, DAG.getIntPtrConstant(0, DL)); return DAG.getBitcast(VT, In); } SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In, DAG.getIntPtrConstant(0, DL)); SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In, DAG.getIntPtrConstant(4, DL)); OpLo = DAG.getBitcast(MVT::v16i8, OpLo); OpHi = DAG.getBitcast(MVT::v16i8, OpHi); // The PSHUFB mask: static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1}; OpLo = DAG.getVectorShuffle(MVT::v16i8, DL, OpLo, OpLo, ShufMask1); OpHi = DAG.getVectorShuffle(MVT::v16i8, DL, OpHi, OpHi, ShufMask1); OpLo = DAG.getBitcast(MVT::v4i32, OpLo); OpHi = DAG.getBitcast(MVT::v4i32, OpHi); // The MOVLHPS Mask: static const int ShufMask2[] = {0, 1, 4, 5}; SDValue res = DAG.getVectorShuffle(MVT::v4i32, DL, OpLo, OpHi, ShufMask2); return DAG.getBitcast(MVT::v8i16, res); } if (VT == MVT::v16i8 && InVT == MVT::v16i16) { // Use an AND to zero uppper bits for PACKUS. In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(255, DL, InVT)); SDValue InLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i16, In, DAG.getIntPtrConstant(0, DL)); SDValue InHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i16, In, DAG.getIntPtrConstant(8, DL)); return DAG.getNode(X86ISD::PACKUS, DL, VT, InLo, InHi); } // Handle truncation of V256 to V128 using shuffles. assert(VT.is128BitVector() && InVT.is256BitVector() && "Unexpected types!"); assert(Subtarget.hasAVX() && "256-bit vector without AVX!"); unsigned NumElems = VT.getVectorNumElements(); MVT NVT = MVT::getVectorVT(VT.getVectorElementType(), NumElems * 2); SmallVector MaskVec(NumElems * 2, -1); // Prepare truncation shuffle mask for (unsigned i = 0; i != NumElems; ++i) MaskVec[i] = i * 2; In = DAG.getBitcast(NVT, In); SDValue V = DAG.getVectorShuffle(NVT, DL, In, In, MaskVec); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, DAG.getIntPtrConstant(0, DL)); } SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT; MVT VT = Op.getSimpleValueType(); if (VT.isVector()) { SDValue Src = Op.getOperand(0); SDLoc dl(Op); if (VT == MVT::v2i1 && Src.getSimpleValueType() == MVT::v2f64) { MVT ResVT = MVT::v4i32; MVT TruncVT = MVT::v4i1; unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; if (!IsSigned && !Subtarget.hasVLX()) { // Widen to 512-bits. ResVT = MVT::v8i32; TruncVT = MVT::v8i1; Opc = ISD::FP_TO_UINT; Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8f64, DAG.getUNDEF(MVT::v8f64), Src, DAG.getIntPtrConstant(0, dl)); } SDValue Res = DAG.getNode(Opc, dl, ResVT, Src); Res = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Res); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i1, Res, DAG.getIntPtrConstant(0, dl)); } assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL!"); if (VT == MVT::v2i64 && Src.getSimpleValueType() == MVT::v2f32) { return DAG.getNode(IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI, dl, VT, DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, DAG.getUNDEF(MVT::v2f32))); } return SDValue(); } assert(!VT.isVector()); std::pair Vals = FP_TO_INTHelper(Op, DAG, IsSigned, /*IsReplace=*/ false); SDValue FIST = Vals.first, StackSlot = Vals.second; // If FP_TO_INTHelper failed, the node is actually supposed to be Legal. if (!FIST.getNode()) return Op; if (StackSlot.getNode()) // Load the result. return DAG.getLoad(VT, SDLoc(Op), FIST, StackSlot, MachinePointerInfo()); // The node is the result. return FIST; } static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) { SDLoc DL(Op); MVT VT = Op.getSimpleValueType(); SDValue In = Op.getOperand(0); MVT SVT = In.getSimpleValueType(); assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!"); return DAG.getNode(X86ISD::VFPEXT, DL, VT, DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32, In, DAG.getUNDEF(SVT))); } /// Horizontal vector math instructions may be slower than normal math with /// shuffles. Limit horizontal op codegen based on size/speed trade-offs, uarch /// implementation, and likely shuffle complexity of the alternate sequence. static bool shouldUseHorizontalOp(bool IsSingleSource, SelectionDAG &DAG, const X86Subtarget &Subtarget) { bool IsOptimizingSize = DAG.getMachineFunction().getFunction().optForSize(); bool HasFastHOps = Subtarget.hasFastHorizontalOps(); return !IsSingleSource || IsOptimizingSize || HasFastHOps; } /// Depending on uarch and/or optimizing for size, we might prefer to use a /// vector operation in place of the typical scalar operation. static SDValue lowerAddSubToHorizontalOp(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { // If both operands have other uses, this is probably not profitable. SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); if (!LHS.hasOneUse() && !RHS.hasOneUse()) return Op; // FP horizontal add/sub were added with SSE3. Integer with SSSE3. bool IsFP = Op.getSimpleValueType().isFloatingPoint(); if (IsFP && !Subtarget.hasSSE3()) return Op; if (!IsFP && !Subtarget.hasSSSE3()) return Op; // Defer forming the minimal horizontal op if the vector source has more than // the 2 extract element uses that we're matching here. In that case, we might // form a horizontal op that includes more than 1 add/sub op. if (LHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || LHS.getOperand(0) != RHS.getOperand(0) || !LHS.getOperand(0)->hasNUsesOfValue(2, 0)) return Op; if (!isa(LHS.getOperand(1)) || !isa(RHS.getOperand(1)) || !shouldUseHorizontalOp(true, DAG, Subtarget)) return Op; // Allow commuted 'hadd' ops. // TODO: Allow commuted (f)sub by negating the result of (F)HSUB? unsigned HOpcode; switch (Op.getOpcode()) { case ISD::ADD: HOpcode = X86ISD::HADD; break; case ISD::SUB: HOpcode = X86ISD::HSUB; break; case ISD::FADD: HOpcode = X86ISD::FHADD; break; case ISD::FSUB: HOpcode = X86ISD::FHSUB; break; default: llvm_unreachable("Trying to lower unsupported opcode to horizontal op"); } unsigned LExtIndex = LHS.getConstantOperandVal(1); unsigned RExtIndex = RHS.getConstantOperandVal(1); if (LExtIndex == 1 && RExtIndex == 0 && (HOpcode == X86ISD::HADD || HOpcode == X86ISD::FHADD)) std::swap(LExtIndex, RExtIndex); // TODO: This can be extended to handle other adjacent extract pairs. if (LExtIndex != 0 || RExtIndex != 1) return Op; SDValue X = LHS.getOperand(0); EVT VecVT = X.getValueType(); unsigned BitWidth = VecVT.getSizeInBits(); assert((BitWidth == 128 || BitWidth == 256 || BitWidth == 512) && "Not expecting illegal vector widths here"); // Creating a 256-bit horizontal op would be wasteful, and there is no 512-bit // equivalent, so extract the 256/512-bit source op to 128-bit. // This is free: ymm/zmm -> xmm. SDLoc DL(Op); if (BitWidth == 256 || BitWidth == 512) X = extract128BitVector(X, 0, DAG, DL); // add (extractelt (X, 0), extractelt (X, 1)) --> extractelt (hadd X, X), 0 // add (extractelt (X, 1), extractelt (X, 0)) --> extractelt (hadd X, X), 0 // sub (extractelt (X, 0), extractelt (X, 1)) --> extractelt (hsub X, X), 0 SDValue HOp = DAG.getNode(HOpcode, DL, X.getValueType(), X, X); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getSimpleValueType(), HOp, DAG.getIntPtrConstant(0, DL)); } /// Depending on uarch and/or optimizing for size, we might prefer to use a /// vector operation in place of the typical scalar operation. static SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { assert((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) && "Only expecting float/double"); return lowerAddSubToHorizontalOp(Op, DAG, Subtarget); } /// The only differences between FABS and FNEG are the mask and the logic op. /// FNEG also has a folding opportunity for FNEG(FABS(x)). static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) { assert((Op.getOpcode() == ISD::FABS || Op.getOpcode() == ISD::FNEG) && "Wrong opcode for lowering FABS or FNEG."); bool IsFABS = (Op.getOpcode() == ISD::FABS); // If this is a FABS and it has an FNEG user, bail out to fold the combination // into an FNABS. We'll lower the FABS after that if it is still in use. if (IsFABS) for (SDNode *User : Op->uses()) if (User->getOpcode() == ISD::FNEG) return Op; SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); bool IsF128 = (VT == MVT::f128); assert((VT == MVT::f64 || VT == MVT::f32 || VT == MVT::f128 || VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v4f32 || VT == MVT::v8f32 || VT == MVT::v8f64 || VT == MVT::v16f32) && "Unexpected type in LowerFABSorFNEG"); // FIXME: Use function attribute "OptimizeForSize" and/or CodeGenOpt::Level to // decide if we should generate a 16-byte constant mask when we only need 4 or // 8 bytes for the scalar case. // There are no scalar bitwise logical SSE/AVX instructions, so we // generate a 16-byte vector constant and logic op even for the scalar case. // Using a 16-byte mask allows folding the load of the mask with // the logic op, so it can save (~4 bytes) on code size. bool IsFakeVector = !VT.isVector() && !IsF128; MVT LogicVT = VT; if (IsFakeVector) LogicVT = (VT == MVT::f64) ? MVT::v2f64 : MVT::v4f32; unsigned EltBits = VT.getScalarSizeInBits(); // For FABS, mask is 0x7f...; for FNEG, mask is 0x80... APInt MaskElt = IsFABS ? APInt::getSignedMaxValue(EltBits) : APInt::getSignMask(EltBits); const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(VT); SDValue Mask = DAG.getConstantFP(APFloat(Sem, MaskElt), dl, LogicVT); SDValue Op0 = Op.getOperand(0); bool IsFNABS = !IsFABS && (Op0.getOpcode() == ISD::FABS); unsigned LogicOp = IsFABS ? X86ISD::FAND : IsFNABS ? X86ISD::FOR : X86ISD::FXOR; SDValue Operand = IsFNABS ? Op0.getOperand(0) : Op0; if (VT.isVector() || IsF128) return DAG.getNode(LogicOp, dl, LogicVT, Operand, Mask); // For the scalar case extend to a 128-bit vector, perform the logic op, // and extract the scalar result back out. Operand = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Operand); SDValue LogicNode = DAG.getNode(LogicOp, dl, LogicVT, Operand, Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, LogicNode, DAG.getIntPtrConstant(0, dl)); } static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { SDValue Mag = Op.getOperand(0); SDValue Sign = Op.getOperand(1); SDLoc dl(Op); // If the sign operand is smaller, extend it first. MVT VT = Op.getSimpleValueType(); if (Sign.getSimpleValueType().bitsLT(VT)) Sign = DAG.getNode(ISD::FP_EXTEND, dl, VT, Sign); // And if it is bigger, shrink it first. if (Sign.getSimpleValueType().bitsGT(VT)) Sign = DAG.getNode(ISD::FP_ROUND, dl, VT, Sign, DAG.getIntPtrConstant(1, dl)); // At this point the operands and the result should have the same // type, and that won't be f80 since that is not custom lowered. bool IsF128 = (VT == MVT::f128); assert((VT == MVT::f64 || VT == MVT::f32 || VT == MVT::f128 || VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v4f32 || VT == MVT::v8f32 || VT == MVT::v8f64 || VT == MVT::v16f32) && "Unexpected type in LowerFCOPYSIGN"); const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(VT); // Perform all scalar logic operations as 16-byte vectors because there are no // scalar FP logic instructions in SSE. // TODO: This isn't necessary. If we used scalar types, we might avoid some // unnecessary splats, but we might miss load folding opportunities. Should // this decision be based on OptimizeForSize? bool IsFakeVector = !VT.isVector() && !IsF128; MVT LogicVT = VT; if (IsFakeVector) LogicVT = (VT == MVT::f64) ? MVT::v2f64 : MVT::v4f32; // The mask constants are automatically splatted for vector types. unsigned EltSizeInBits = VT.getScalarSizeInBits(); SDValue SignMask = DAG.getConstantFP( APFloat(Sem, APInt::getSignMask(EltSizeInBits)), dl, LogicVT); SDValue MagMask = DAG.getConstantFP( APFloat(Sem, APInt::getSignedMaxValue(EltSizeInBits)), dl, LogicVT); // First, clear all bits but the sign bit from the second operand (sign). if (IsFakeVector) Sign = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Sign); SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, LogicVT, Sign, SignMask); // Next, clear the sign bit from the first operand (magnitude). // TODO: If we had general constant folding for FP logic ops, this check // wouldn't be necessary. SDValue MagBits; if (ConstantFPSDNode *Op0CN = isConstOrConstSplatFP(Mag)) { APFloat APF = Op0CN->getValueAPF(); APF.clearSign(); MagBits = DAG.getConstantFP(APF, dl, LogicVT); } else { // If the magnitude operand wasn't a constant, we need to AND out the sign. if (IsFakeVector) Mag = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Mag); MagBits = DAG.getNode(X86ISD::FAND, dl, LogicVT, Mag, MagMask); } // OR the magnitude value with the sign bit. SDValue Or = DAG.getNode(X86ISD::FOR, dl, LogicVT, MagBits, SignBit); return !IsFakeVector ? Or : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Or, DAG.getIntPtrConstant(0, dl)); } static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) { SDValue N0 = Op.getOperand(0); SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); MVT OpVT = N0.getSimpleValueType(); assert((OpVT == MVT::f32 || OpVT == MVT::f64) && "Unexpected type for FGETSIGN"); // Lower ISD::FGETSIGN to (AND (X86ISD::MOVMSK ...) 1). MVT VecVT = (OpVT == MVT::f32 ? MVT::v4f32 : MVT::v2f64); SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, N0); Res = DAG.getNode(X86ISD::MOVMSK, dl, MVT::i32, Res); Res = DAG.getZExtOrTrunc(Res, dl, VT); Res = DAG.getNode(ISD::AND, dl, VT, Res, DAG.getConstant(1, dl, VT)); return Res; } /// Helper for creating a X86ISD::SETCC node. static SDValue getSETCC(X86::CondCode Cond, SDValue EFLAGS, const SDLoc &dl, SelectionDAG &DAG) { return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, DAG.getConstant(Cond, dl, MVT::i8), EFLAGS); } // Check whether an OR'd tree is PTEST-able. static SDValue LowerVectorAllZeroTest(SDValue Op, ISD::CondCode CC, const X86Subtarget &Subtarget, SelectionDAG &DAG, SDValue &X86CC) { assert(Op.getOpcode() == ISD::OR && "Only check OR'd tree."); if (!Subtarget.hasSSE41()) return SDValue(); if (!Op->hasOneUse()) return SDValue(); SDNode *N = Op.getNode(); SDLoc DL(N); SmallVector Opnds; DenseMap VecInMap; SmallVector VecIns; EVT VT = MVT::Other; // Recognize a special case where a vector is casted into wide integer to // test all 0s. Opnds.push_back(N->getOperand(0)); Opnds.push_back(N->getOperand(1)); for (unsigned Slot = 0, e = Opnds.size(); Slot < e; ++Slot) { SmallVectorImpl::const_iterator I = Opnds.begin() + Slot; // BFS traverse all OR'd operands. if (I->getOpcode() == ISD::OR) { Opnds.push_back(I->getOperand(0)); Opnds.push_back(I->getOperand(1)); // Re-evaluate the number of nodes to be traversed. e += 2; // 2 more nodes (LHS and RHS) are pushed. continue; } // Quit if a non-EXTRACT_VECTOR_ELT if (I->getOpcode() != ISD::EXTRACT_VECTOR_ELT) return SDValue(); // Quit if without a constant index. SDValue Idx = I->getOperand(1); if (!isa(Idx)) return SDValue(); SDValue ExtractedFromVec = I->getOperand(0); DenseMap::iterator M = VecInMap.find(ExtractedFromVec); if (M == VecInMap.end()) { VT = ExtractedFromVec.getValueType(); // Quit if not 128/256-bit vector. if (!VT.is128BitVector() && !VT.is256BitVector()) return SDValue(); // Quit if not the same type. if (VecInMap.begin() != VecInMap.end() && VT != VecInMap.begin()->first.getValueType()) return SDValue(); M = VecInMap.insert(std::make_pair(ExtractedFromVec, 0)).first; VecIns.push_back(ExtractedFromVec); } M->second |= 1U << cast(Idx)->getZExtValue(); } assert((VT.is128BitVector() || VT.is256BitVector()) && "Not extracted from 128-/256-bit vector."); unsigned FullMask = (1U << VT.getVectorNumElements()) - 1U; for (DenseMap::const_iterator I = VecInMap.begin(), E = VecInMap.end(); I != E; ++I) { // Quit if not all elements are used. if (I->second != FullMask) return SDValue(); } MVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64; // Cast all vectors into TestVT for PTEST. for (unsigned i = 0, e = VecIns.size(); i < e; ++i) VecIns[i] = DAG.getBitcast(TestVT, VecIns[i]); // If more than one full vector is evaluated, OR them first before PTEST. for (unsigned Slot = 0, e = VecIns.size(); e - Slot > 1; Slot += 2, e += 1) { // Each iteration will OR 2 nodes and append the result until there is only // 1 node left, i.e. the final OR'd value of all vectors. SDValue LHS = VecIns[Slot]; SDValue RHS = VecIns[Slot + 1]; VecIns.push_back(DAG.getNode(ISD::OR, DL, TestVT, LHS, RHS)); } X86CC = DAG.getConstant(CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE, DL, MVT::i8); return DAG.getNode(X86ISD::PTEST, DL, MVT::i32, VecIns.back(), VecIns.back()); } /// return true if \c Op has a use that doesn't just read flags. static bool hasNonFlagsUse(SDValue Op) { for (SDNode::use_iterator UI = Op->use_begin(), UE = Op->use_end(); UI != UE; ++UI) { SDNode *User = *UI; unsigned UOpNo = UI.getOperandNo(); if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) { // Look pass truncate. UOpNo = User->use_begin().getOperandNo(); User = *User->use_begin(); } if (User->getOpcode() != ISD::BRCOND && User->getOpcode() != ISD::SETCC && !(User->getOpcode() == ISD::SELECT && UOpNo == 0)) return true; } return false; } /// Emit nodes that will be selected as "test Op0,Op0", or something /// equivalent. static SDValue EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl, SelectionDAG &DAG, const X86Subtarget &Subtarget) { // CF and OF aren't always set the way we want. Determine which // of these we need. bool NeedCF = false; bool NeedOF = false; switch (X86CC) { default: break; case X86::COND_A: case X86::COND_AE: case X86::COND_B: case X86::COND_BE: NeedCF = true; break; case X86::COND_G: case X86::COND_GE: case X86::COND_L: case X86::COND_LE: case X86::COND_O: case X86::COND_NO: { // Check if we really need to set the // Overflow flag. If NoSignedWrap is present // that is not actually needed. switch (Op->getOpcode()) { case ISD::ADD: case ISD::SUB: case ISD::MUL: case ISD::SHL: if (Op.getNode()->getFlags().hasNoSignedWrap()) break; LLVM_FALLTHROUGH; default: NeedOF = true; break; } break; } } // See if we can use the EFLAGS value from the operand instead of // doing a separate TEST. TEST always sets OF and CF to 0, so unless // we prove that the arithmetic won't overflow, we can't use OF or CF. if (Op.getResNo() != 0 || NeedOF || NeedCF) { // Emit a CMP with 0, which is the TEST pattern. return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, DAG.getConstant(0, dl, Op.getValueType())); } unsigned Opcode = 0; unsigned NumOperands = 0; SDValue ArithOp = Op; // NOTICE: In the code below we use ArithOp to hold the arithmetic operation // which may be the result of a CAST. We use the variable 'Op', which is the // non-casted variable when we check for possible users. switch (ArithOp.getOpcode()) { case ISD::AND: // If the primary 'and' result isn't used, don't bother using X86ISD::AND, // because a TEST instruction will be better. if (!hasNonFlagsUse(Op)) break; LLVM_FALLTHROUGH; case ISD::ADD: case ISD::SUB: case ISD::OR: case ISD::XOR: // Transform to an x86-specific ALU node with flags if there is a chance of // using an RMW op or only the flags are used. Otherwise, leave // the node alone and emit a 'test' instruction. for (SDNode::use_iterator UI = Op.getNode()->use_begin(), UE = Op.getNode()->use_end(); UI != UE; ++UI) if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC && UI->getOpcode() != ISD::STORE) goto default_case; // Otherwise use a regular EFLAGS-setting instruction. switch (ArithOp.getOpcode()) { default: llvm_unreachable("unexpected operator!"); case ISD::ADD: Opcode = X86ISD::ADD; break; case ISD::SUB: Opcode = X86ISD::SUB; break; case ISD::XOR: Opcode = X86ISD::XOR; break; case ISD::AND: Opcode = X86ISD::AND; break; case ISD::OR: Opcode = X86ISD::OR; break; } NumOperands = 2; break; case X86ISD::ADD: case X86ISD::SUB: case X86ISD::OR: case X86ISD::XOR: case X86ISD::AND: return SDValue(Op.getNode(), 1); default: default_case: break; } if (Opcode == 0) { // Emit a CMP with 0, which is the TEST pattern. return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, DAG.getConstant(0, dl, Op.getValueType())); } SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); SmallVector Ops(Op->op_begin(), Op->op_begin() + NumOperands); SDValue New = DAG.getNode(Opcode, dl, VTs, Ops); DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), New); return SDValue(New.getNode(), 1); } /// Emit nodes that will be selected as "cmp Op0,Op1", or something /// equivalent. SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl, SelectionDAG &DAG) const { if (isNullConstant(Op1)) return EmitTest(Op0, X86CC, dl, DAG, Subtarget); if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 || Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) { // Only promote the compare up to I32 if it is a 16 bit operation // with an immediate. 16 bit immediates are to be avoided. if (Op0.getValueType() == MVT::i16 && ((isa(Op0) && !cast(Op0)->getAPIntValue().isSignedIntN(8)) || (isa(Op1) && !cast(Op1)->getAPIntValue().isSignedIntN(8))) && !DAG.getMachineFunction().getFunction().optForMinSize() && !Subtarget.isAtom()) { unsigned ExtendOp = isX86CCUnsigned(X86CC) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND; Op0 = DAG.getNode(ExtendOp, dl, MVT::i32, Op0); Op1 = DAG.getNode(ExtendOp, dl, MVT::i32, Op1); } // Use SUB instead of CMP to enable CSE between SUB and CMP. SDVTList VTs = DAG.getVTList(Op0.getValueType(), MVT::i32); SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs, Op0, Op1); return SDValue(Sub.getNode(), 1); } assert(Op0.getValueType().isFloatingPoint() && "Unexpected VT!"); return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1); } /// Convert a comparison if required by the subtarget. SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const { // If the subtarget does not support the FUCOMI instruction, floating-point // comparisons have to be converted. if (Subtarget.hasCMov() || Cmp.getOpcode() != X86ISD::CMP || !Cmp.getOperand(0).getValueType().isFloatingPoint() || !Cmp.getOperand(1).getValueType().isFloatingPoint()) return Cmp; // The instruction selector will select an FUCOM instruction instead of // FUCOMI, which writes the comparison result to FPSW instead of EFLAGS. Hence // build an SDNode sequence that transfers the result from FPSW into EFLAGS: // (X86sahf (trunc (srl (X86fp_stsw (trunc (X86cmp ...)), 8)))) SDLoc dl(Cmp); SDValue TruncFPSW = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, Cmp); SDValue FNStSW = DAG.getNode(X86ISD::FNSTSW16r, dl, MVT::i16, TruncFPSW); SDValue Srl = DAG.getNode(ISD::SRL, dl, MVT::i16, FNStSW, DAG.getConstant(8, dl, MVT::i8)); SDValue TruncSrl = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Srl); // Some 64-bit targets lack SAHF support, but they do support FCOMI. assert(Subtarget.hasLAHFSAHF() && "Target doesn't support SAHF or FCOMI?"); return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl); } /// Check if replacement of SQRT with RSQRT should be disabled. bool X86TargetLowering::isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); // We never want to use both SQRT and RSQRT instructions for the same input. if (DAG.getNodeIfExists(X86ISD::FRSQRT, DAG.getVTList(VT), Op)) return false; if (VT.isVector()) return Subtarget.hasFastVectorFSQRT(); return Subtarget.hasFastScalarFSQRT(); } /// The minimum architected relative accuracy is 2^-12. We need one /// Newton-Raphson step to have a good float result (24 bits of precision). SDValue X86TargetLowering::getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const { EVT VT = Op.getValueType(); // SSE1 has rsqrtss and rsqrtps. AVX adds a 256-bit variant for rsqrtps. // It is likely not profitable to do this for f64 because a double-precision // rsqrt estimate with refinement on x86 prior to FMA requires at least 16 // instructions: convert to single, rsqrtss, convert back to double, refine // (3 steps = at least 13 insts). If an 'rsqrtsd' variant was added to the ISA // along with FMA, this could be a throughput win. // TODO: SQRT requires SSE2 to prevent the introduction of an illegal v4i32 // after legalize types. if ((VT == MVT::f32 && Subtarget.hasSSE1()) || (VT == MVT::v4f32 && Subtarget.hasSSE1() && Reciprocal) || (VT == MVT::v4f32 && Subtarget.hasSSE2() && !Reciprocal) || (VT == MVT::v8f32 && Subtarget.hasAVX()) || (VT == MVT::v16f32 && Subtarget.useAVX512Regs())) { if (RefinementSteps == ReciprocalEstimate::Unspecified) RefinementSteps = 1; UseOneConstNR = false; // There is no FSQRT for 512-bits, but there is RSQRT14. unsigned Opcode = VT == MVT::v16f32 ? X86ISD::RSQRT14 : X86ISD::FRSQRT; return DAG.getNode(Opcode, SDLoc(Op), VT, Op); } return SDValue(); } /// The minimum architected relative accuracy is 2^-12. We need one /// Newton-Raphson step to have a good float result (24 bits of precision). SDValue X86TargetLowering::getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const { EVT VT = Op.getValueType(); // SSE1 has rcpss and rcpps. AVX adds a 256-bit variant for rcpps. // It is likely not profitable to do this for f64 because a double-precision // reciprocal estimate with refinement on x86 prior to FMA requires // 15 instructions: convert to single, rcpss, convert back to double, refine // (3 steps = 12 insts). If an 'rcpsd' variant was added to the ISA // along with FMA, this could be a throughput win. if ((VT == MVT::f32 && Subtarget.hasSSE1()) || (VT == MVT::v4f32 && Subtarget.hasSSE1()) || (VT == MVT::v8f32 && Subtarget.hasAVX()) || (VT == MVT::v16f32 && Subtarget.useAVX512Regs())) { // Enable estimate codegen with 1 refinement step for vector division. // Scalar division estimates are disabled because they break too much // real-world code. These defaults are intended to match GCC behavior. if (VT == MVT::f32 && Enabled == ReciprocalEstimate::Unspecified) return SDValue(); if (RefinementSteps == ReciprocalEstimate::Unspecified) RefinementSteps = 1; // There is no FSQRT for 512-bits, but there is RCP14. unsigned Opcode = VT == MVT::v16f32 ? X86ISD::RCP14 : X86ISD::FRCP; return DAG.getNode(Opcode, SDLoc(Op), VT, Op); } return SDValue(); } /// If we have at least two divisions that use the same divisor, convert to /// multiplication by a reciprocal. This may need to be adjusted for a given /// CPU if a division's cost is not at least twice the cost of a multiplication. /// This is because we still need one division to calculate the reciprocal and /// then we need two multiplies by that reciprocal as replacements for the /// original divisions. unsigned X86TargetLowering::combineRepeatedFPDivisors() const { return 2; } /// Result of 'and' is compared against zero. Change to a BT node if possible. /// Returns the BT node and the condition code needed to use it. static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, const SDLoc &dl, SelectionDAG &DAG, SDValue &X86CC) { assert(And.getOpcode() == ISD::AND && "Expected AND node!"); SDValue Op0 = And.getOperand(0); SDValue Op1 = And.getOperand(1); if (Op0.getOpcode() == ISD::TRUNCATE) Op0 = Op0.getOperand(0); if (Op1.getOpcode() == ISD::TRUNCATE) Op1 = Op1.getOperand(0); SDValue Src, BitNo; if (Op1.getOpcode() == ISD::SHL) std::swap(Op0, Op1); if (Op0.getOpcode() == ISD::SHL) { if (isOneConstant(Op0.getOperand(0))) { // If we looked past a truncate, check that it's only truncating away // known zeros. unsigned BitWidth = Op0.getValueSizeInBits(); unsigned AndBitWidth = And.getValueSizeInBits(); if (BitWidth > AndBitWidth) { KnownBits Known = DAG.computeKnownBits(Op0); if (Known.countMinLeadingZeros() < BitWidth - AndBitWidth) return SDValue(); } Src = Op1; BitNo = Op0.getOperand(1); } } else if (Op1.getOpcode() == ISD::Constant) { ConstantSDNode *AndRHS = cast(Op1); uint64_t AndRHSVal = AndRHS->getZExtValue(); SDValue AndLHS = Op0; if (AndRHSVal == 1 && AndLHS.getOpcode() == ISD::SRL) { Src = AndLHS.getOperand(0); BitNo = AndLHS.getOperand(1); } else { // Use BT if the immediate can't be encoded in a TEST instruction or we // are optimizing for size and the immedaite won't fit in a byte. bool OptForSize = DAG.getMachineFunction().getFunction().optForSize(); if ((!isUInt<32>(AndRHSVal) || (OptForSize && !isUInt<8>(AndRHSVal))) && isPowerOf2_64(AndRHSVal)) { Src = AndLHS; BitNo = DAG.getConstant(Log2_64_Ceil(AndRHSVal), dl, Src.getValueType()); } } } // No patterns found, give up. if (!Src.getNode()) return SDValue(); // If Src is i8, promote it to i32 with any_extend. There is no i8 BT // instruction. Since the shift amount is in-range-or-undefined, we know // that doing a bittest on the i32 value is ok. We extend to i32 because // the encoding for the i16 version is larger than the i32 version. // Also promote i16 to i32 for performance / code size reason. if (Src.getValueType() == MVT::i8 || Src.getValueType() == MVT::i16) Src = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Src); // See if we can use the 32-bit instruction instead of the 64-bit one for a // shorter encoding. Since the former takes the modulo 32 of BitNo and the // latter takes the modulo 64, this is only valid if the 5th bit of BitNo is // known to be zero. if (Src.getValueType() == MVT::i64 && DAG.MaskedValueIsZero(BitNo, APInt(BitNo.getValueSizeInBits(), 32))) Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src); // If the operand types disagree, extend the shift amount to match. Since // BT ignores high bits (like shifts) we can use anyextend. if (Src.getValueType() != BitNo.getValueType()) BitNo = DAG.getNode(ISD::ANY_EXTEND, dl, Src.getValueType(), BitNo); X86CC = DAG.getConstant(CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B, dl, MVT::i8); return DAG.getNode(X86ISD::BT, dl, MVT::i32, Src, BitNo); } /// Turns an ISD::CondCode into a value suitable for SSE floating-point mask /// CMPs. static unsigned translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0, SDValue &Op1) { unsigned SSECC; bool Swap = false; // SSE Condition code mapping: // 0 - EQ // 1 - LT // 2 - LE // 3 - UNORD // 4 - NEQ // 5 - NLT // 6 - NLE // 7 - ORD switch (SetCCOpcode) { default: llvm_unreachable("Unexpected SETCC condition"); case ISD::SETOEQ: case ISD::SETEQ: SSECC = 0; break; case ISD::SETOGT: case ISD::SETGT: Swap = true; LLVM_FALLTHROUGH; case ISD::SETLT: case ISD::SETOLT: SSECC = 1; break; case ISD::SETOGE: case ISD::SETGE: Swap = true; LLVM_FALLTHROUGH; case ISD::SETLE: case ISD::SETOLE: SSECC = 2; break; case ISD::SETUO: SSECC = 3; break; case ISD::SETUNE: case ISD::SETNE: SSECC = 4; break; case ISD::SETULE: Swap = true; LLVM_FALLTHROUGH; case ISD::SETUGE: SSECC = 5; break; case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH; case ISD::SETUGT: SSECC = 6; break; case ISD::SETO: SSECC = 7; break; case ISD::SETUEQ: SSECC = 8; break; case ISD::SETONE: SSECC = 12; break; } if (Swap) std::swap(Op0, Op1); return SSECC; } /// Break a VSETCC 256-bit integer VSETCC into two new 128 ones and then /// concatenate the result back. static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); assert(VT.is256BitVector() && Op.getOpcode() == ISD::SETCC && "Unsupported value type for operation"); unsigned NumElems = VT.getVectorNumElements(); SDLoc dl(Op); SDValue CC = Op.getOperand(2); // Extract the LHS vectors SDValue LHS = Op.getOperand(0); SDValue LHS1 = extract128BitVector(LHS, 0, DAG, dl); SDValue LHS2 = extract128BitVector(LHS, NumElems / 2, DAG, dl); // Extract the RHS vectors SDValue RHS = Op.getOperand(1); SDValue RHS1 = extract128BitVector(RHS, 0, DAG, dl); SDValue RHS2 = extract128BitVector(RHS, NumElems / 2, DAG, dl); // Issue the operation on the smaller types and concatenate the result back MVT EltVT = VT.getVectorElementType(); MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1, CC), DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC)); } static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDValue CC = Op.getOperand(2); MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); assert(VT.getVectorElementType() == MVT::i1 && "Cannot set masked compare for this operation"); ISD::CondCode SetCCOpcode = cast(CC)->get(); // If this is a seteq make sure any build vectors of all zeros are on the RHS. // This helps with vptestm matching. // TODO: Should we just canonicalize the setcc during DAG combine? if ((SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE) && ISD::isBuildVectorAllZeros(Op0.getNode())) std::swap(Op0, Op1); // Prefer SETGT over SETLT. if (SetCCOpcode == ISD::SETLT) { SetCCOpcode = ISD::getSetCCSwappedOperands(SetCCOpcode); std::swap(Op0, Op1); } return DAG.getSetCC(dl, VT, Op0, Op1, SetCCOpcode); } /// Given a simple buildvector constant, return a new vector constant with each /// element decremented. If decrementing would result in underflow or this /// is not a simple vector constant, return an empty value. static SDValue decrementVectorConstant(SDValue V, SelectionDAG &DAG) { auto *BV = dyn_cast(V.getNode()); if (!BV) return SDValue(); MVT VT = V.getSimpleValueType(); MVT EltVT = VT.getVectorElementType(); unsigned NumElts = VT.getVectorNumElements(); SmallVector NewVecC; SDLoc DL(V); for (unsigned i = 0; i < NumElts; ++i) { auto *Elt = dyn_cast(BV->getOperand(i)); if (!Elt || Elt->isOpaque() || Elt->getSimpleValueType(0) != EltVT) return SDValue(); // Avoid underflow. if (Elt->getAPIntValue().isNullValue()) return SDValue(); NewVecC.push_back(DAG.getConstant(Elt->getAPIntValue() - 1, DL, EltVT)); } return DAG.getBuildVector(VT, DL, NewVecC); } /// As another special case, use PSUBUS[BW] when it's profitable. E.g. for /// Op0 u<= Op1: /// t = psubus Op0, Op1 /// pcmpeq t, <0..0> static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT, ISD::CondCode Cond, const SDLoc &dl, const X86Subtarget &Subtarget, SelectionDAG &DAG) { if (!Subtarget.hasSSE2()) return SDValue(); MVT VET = VT.getVectorElementType(); if (VET != MVT::i8 && VET != MVT::i16) return SDValue(); switch (Cond) { default: return SDValue(); case ISD::SETULT: { // If the comparison is against a constant we can turn this into a // setule. With psubus, setule does not require a swap. This is // beneficial because the constant in the register is no longer // destructed as the destination so it can be hoisted out of a loop. // Only do this pre-AVX since vpcmp* is no longer destructive. if (Subtarget.hasAVX()) return SDValue(); SDValue ULEOp1 = decrementVectorConstant(Op1, DAG); if (!ULEOp1) return SDValue(); Op1 = ULEOp1; break; } // Psubus is better than flip-sign because it requires no inversion. case ISD::SETUGE: std::swap(Op0, Op1); break; case ISD::SETULE: break; } SDValue Result = DAG.getNode(ISD::USUBSAT, dl, VT, Op0, Op1); return DAG.getNode(X86ISD::PCMPEQ, dl, VT, Result, DAG.getConstant(0, dl, VT)); } static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDValue CC = Op.getOperand(2); MVT VT = Op.getSimpleValueType(); ISD::CondCode Cond = cast(CC)->get(); bool isFP = Op.getOperand(1).getSimpleValueType().isFloatingPoint(); SDLoc dl(Op); if (isFP) { #ifndef NDEBUG MVT EltVT = Op0.getSimpleValueType().getVectorElementType(); assert(EltVT == MVT::f32 || EltVT == MVT::f64); #endif unsigned Opc; if (Subtarget.hasAVX512() && VT.getVectorElementType() == MVT::i1) { assert(VT.getVectorNumElements() <= 16); Opc = X86ISD::CMPM; } else { Opc = X86ISD::CMPP; // The SSE/AVX packed FP comparison nodes are defined with a // floating-point vector result that matches the operand type. This allows // them to work with an SSE1 target (integer vector types are not legal). VT = Op0.getSimpleValueType(); } // In the two cases not handled by SSE compare predicates (SETUEQ/SETONE), // emit two comparisons and a logic op to tie them together. SDValue Cmp; unsigned SSECC = translateX86FSETCC(Cond, Op0, Op1); if (SSECC >= 8 && !Subtarget.hasAVX()) { // LLVM predicate is SETUEQ or SETONE. unsigned CC0, CC1; unsigned CombineOpc; if (Cond == ISD::SETUEQ) { CC0 = 3; // UNORD CC1 = 0; // EQ CombineOpc = X86ISD::FOR; } else { assert(Cond == ISD::SETONE); CC0 = 7; // ORD CC1 = 4; // NEQ CombineOpc = X86ISD::FAND; } SDValue Cmp0 = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(CC0, dl, MVT::i8)); SDValue Cmp1 = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(CC1, dl, MVT::i8)); Cmp = DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1); } else { // Handle all other FP comparisons here. Cmp = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(SSECC, dl, MVT::i8)); } // If this is SSE/AVX CMPP, bitcast the result back to integer to match the // result type of SETCC. The bitcast is expected to be optimized away // during combining/isel. if (Opc == X86ISD::CMPP) Cmp = DAG.getBitcast(Op.getSimpleValueType(), Cmp); return Cmp; } MVT VTOp0 = Op0.getSimpleValueType(); assert(VTOp0 == Op1.getSimpleValueType() && "Expected operands with same type!"); assert(VT.getVectorNumElements() == VTOp0.getVectorNumElements() && "Invalid number of packed elements for source and destination!"); // This is being called by type legalization because v2i32 is marked custom // for result type legalization for v2f32. if (VTOp0 == MVT::v2i32) return SDValue(); // The non-AVX512 code below works under the assumption that source and // destination types are the same. assert((Subtarget.hasAVX512() || (VT == VTOp0)) && "Value types for source and destination must be the same!"); // Break 256-bit integer vector compare into smaller ones. if (VT.is256BitVector() && !Subtarget.hasInt256()) return Lower256IntVSETCC(Op, DAG); // The result is boolean, but operands are int/float if (VT.getVectorElementType() == MVT::i1) { // In AVX-512 architecture setcc returns mask with i1 elements, // But there is no compare instruction for i8 and i16 elements in KNL. assert((VTOp0.getScalarSizeInBits() >= 32 || Subtarget.hasBWI()) && "Unexpected operand type"); return LowerIntVSETCC_AVX512(Op, DAG); } // Lower using XOP integer comparisons. if (VT.is128BitVector() && Subtarget.hasXOP()) { // Translate compare code to XOP PCOM compare mode. unsigned CmpMode = 0; switch (Cond) { default: llvm_unreachable("Unexpected SETCC condition"); case ISD::SETULT: case ISD::SETLT: CmpMode = 0x00; break; case ISD::SETULE: case ISD::SETLE: CmpMode = 0x01; break; case ISD::SETUGT: case ISD::SETGT: CmpMode = 0x02; break; case ISD::SETUGE: case ISD::SETGE: CmpMode = 0x03; break; case ISD::SETEQ: CmpMode = 0x04; break; case ISD::SETNE: CmpMode = 0x05; break; } // Are we comparing unsigned or signed integers? unsigned Opc = ISD::isUnsignedIntSetCC(Cond) ? X86ISD::VPCOMU : X86ISD::VPCOM; return DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(CmpMode, dl, MVT::i8)); } // (X & Y) != 0 --> (X & Y) == Y iff Y is power-of-2. // Revert part of the simplifySetCCWithAnd combine, to avoid an invert. if (Cond == ISD::SETNE && ISD::isBuildVectorAllZeros(Op1.getNode())) { SDValue BC0 = peekThroughBitcasts(Op0); if (BC0.getOpcode() == ISD::AND) { APInt UndefElts; SmallVector EltBits; if (getTargetConstantBitsFromNode(BC0.getOperand(1), VT.getScalarSizeInBits(), UndefElts, EltBits, false, false)) { if (llvm::all_of(EltBits, [](APInt &V) { return V.isPowerOf2(); })) { Cond = ISD::SETEQ; Op1 = DAG.getBitcast(VT, BC0.getOperand(1)); } } } } // If this is a SETNE against the signed minimum value, change it to SETGT. // If this is a SETNE against the signed maximum value, change it to SETLT. // which will be swapped to SETGT. // Otherwise we use PCMPEQ+invert. APInt ConstValue; if (Cond == ISD::SETNE && ISD::isConstantSplatVector(Op1.getNode(), ConstValue)) { if (ConstValue.isMinSignedValue()) Cond = ISD::SETGT; else if (ConstValue.isMaxSignedValue()) Cond = ISD::SETLT; } // If both operands are known non-negative, then an unsigned compare is the // same as a signed compare and there's no need to flip signbits. // TODO: We could check for more general simplifications here since we're // computing known bits. bool FlipSigns = ISD::isUnsignedIntSetCC(Cond) && !(DAG.SignBitIsZero(Op0) && DAG.SignBitIsZero(Op1)); // Special case: Use min/max operations for unsigned compares. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (ISD::isUnsignedIntSetCC(Cond) && (FlipSigns || ISD::isTrueWhenEqual(Cond)) && TLI.isOperationLegal(ISD::UMIN, VT)) { // If we have a constant operand, increment/decrement it and change the // condition to avoid an invert. // TODO: This could be extended to handle a non-splat constant by checking // that each element of the constant is not the max/null value. APInt C; if (Cond == ISD::SETUGT && isConstantSplat(Op1, C) && !C.isMaxValue()) { // X > C --> X >= (C+1) --> X == umax(X, C+1) Op1 = DAG.getConstant(C + 1, dl, VT); Cond = ISD::SETUGE; } if (Cond == ISD::SETULT && isConstantSplat(Op1, C) && !C.isNullValue()) { // X < C --> X <= (C-1) --> X == umin(X, C-1) Op1 = DAG.getConstant(C - 1, dl, VT); Cond = ISD::SETULE; } bool Invert = false; unsigned Opc; switch (Cond) { default: llvm_unreachable("Unexpected condition code"); case ISD::SETUGT: Invert = true; LLVM_FALLTHROUGH; case ISD::SETULE: Opc = ISD::UMIN; break; case ISD::SETULT: Invert = true; LLVM_FALLTHROUGH; case ISD::SETUGE: Opc = ISD::UMAX; break; } SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1); Result = DAG.getNode(X86ISD::PCMPEQ, dl, VT, Op0, Result); // If the logical-not of the result is required, perform that now. if (Invert) Result = DAG.getNOT(dl, Result, VT); return Result; } // Try to use SUBUS and PCMPEQ. if (SDValue V = LowerVSETCCWithSUBUS(Op0, Op1, VT, Cond, dl, Subtarget, DAG)) return V; // We are handling one of the integer comparisons here. Since SSE only has // GT and EQ comparisons for integer, swapping operands and multiple // operations may be required for some comparisons. unsigned Opc = (Cond == ISD::SETEQ || Cond == ISD::SETNE) ? X86ISD::PCMPEQ : X86ISD::PCMPGT; bool Swap = Cond == ISD::SETLT || Cond == ISD::SETULT || Cond == ISD::SETGE || Cond == ISD::SETUGE; bool Invert = Cond == ISD::SETNE || (Cond != ISD::SETEQ && ISD::isTrueWhenEqual(Cond)); if (Swap) std::swap(Op0, Op1); // Check that the operation in question is available (most are plain SSE2, // but PCMPGTQ and PCMPEQQ have different requirements). if (VT == MVT::v2i64) { if (Opc == X86ISD::PCMPGT && !Subtarget.hasSSE42()) { assert(Subtarget.hasSSE2() && "Don't know how to lower!"); // Since SSE has no unsigned integer comparisons, we need to flip the sign // bits of the inputs before performing those operations. The lower // compare is always unsigned. SDValue SB; if (FlipSigns) { SB = DAG.getConstant(0x8000000080000000ULL, dl, MVT::v2i64); } else { SB = DAG.getConstant(0x0000000080000000ULL, dl, MVT::v2i64); } Op0 = DAG.getNode(ISD::XOR, dl, MVT::v2i64, Op0, SB); Op1 = DAG.getNode(ISD::XOR, dl, MVT::v2i64, Op1, SB); // Cast everything to the right type. Op0 = DAG.getBitcast(MVT::v4i32, Op0); Op1 = DAG.getBitcast(MVT::v4i32, Op1); // Emulate PCMPGTQ with (hi1 > hi2) | ((hi1 == hi2) & (lo1 > lo2)) SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1); SDValue EQ = DAG.getNode(X86ISD::PCMPEQ, dl, MVT::v4i32, Op0, Op1); // Create masks for only the low parts/high parts of the 64 bit integers. static const int MaskHi[] = { 1, 1, 3, 3 }; static const int MaskLo[] = { 0, 0, 2, 2 }; SDValue EQHi = DAG.getVectorShuffle(MVT::v4i32, dl, EQ, EQ, MaskHi); SDValue GTLo = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskLo); SDValue GTHi = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi); SDValue Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, EQHi, GTLo); Result = DAG.getNode(ISD::OR, dl, MVT::v4i32, Result, GTHi); if (Invert) Result = DAG.getNOT(dl, Result, MVT::v4i32); return DAG.getBitcast(VT, Result); } if (Opc == X86ISD::PCMPEQ && !Subtarget.hasSSE41()) { // If pcmpeqq is missing but pcmpeqd is available synthesize pcmpeqq with // pcmpeqd + pshufd + pand. assert(Subtarget.hasSSE2() && !FlipSigns && "Don't know how to lower!"); // First cast everything to the right type. Op0 = DAG.getBitcast(MVT::v4i32, Op0); Op1 = DAG.getBitcast(MVT::v4i32, Op1); // Do the compare. SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1); // Make sure the lower and upper halves are both all-ones. static const int Mask[] = { 1, 0, 3, 2 }; SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask); Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, Result, Shuf); if (Invert) Result = DAG.getNOT(dl, Result, MVT::v4i32); return DAG.getBitcast(VT, Result); } } // Since SSE has no unsigned integer comparisons, we need to flip the sign // bits of the inputs before performing those operations. if (FlipSigns) { MVT EltVT = VT.getVectorElementType(); SDValue SM = DAG.getConstant(APInt::getSignMask(EltVT.getSizeInBits()), dl, VT); Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SM); Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SM); } SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1); // If the logical-not of the result is required, perform that now. if (Invert) Result = DAG.getNOT(dl, Result, VT); return Result; } // Try to select this as a KORTEST+SETCC if possible. static SDValue EmitKORTEST(SDValue Op0, SDValue Op1, ISD::CondCode CC, const SDLoc &dl, SelectionDAG &DAG, const X86Subtarget &Subtarget, SDValue &X86CC) { // Only support equality comparisons. if (CC != ISD::SETEQ && CC != ISD::SETNE) return SDValue(); // Must be a bitcast from vXi1. if (Op0.getOpcode() != ISD::BITCAST) return SDValue(); Op0 = Op0.getOperand(0); MVT VT = Op0.getSimpleValueType(); if (!(Subtarget.hasAVX512() && VT == MVT::v16i1) && !(Subtarget.hasDQI() && VT == MVT::v8i1) && !(Subtarget.hasBWI() && (VT == MVT::v32i1 || VT == MVT::v64i1))) return SDValue(); X86::CondCode X86Cond; if (isNullConstant(Op1)) { X86Cond = CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE; } else if (isAllOnesConstant(Op1)) { // C flag is set for all ones. X86Cond = CC == ISD::SETEQ ? X86::COND_B : X86::COND_AE; } else return SDValue(); // If the input is an OR, we can combine it's operands into the KORTEST. SDValue LHS = Op0; SDValue RHS = Op0; if (Op0.getOpcode() == ISD::OR && Op0.hasOneUse()) { LHS = Op0.getOperand(0); RHS = Op0.getOperand(1); } X86CC = DAG.getConstant(X86Cond, dl, MVT::i8); return DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS); } /// Emit flags for the given setcc condition and operands. Also returns the /// corresponding X86 condition code constant in X86CC. SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC, const SDLoc &dl, SelectionDAG &DAG, SDValue &X86CC) const { // Optimize to BT if possible. // Lower (X & (1 << N)) == 0 to BT(X, N). // Lower ((X >>u N) & 1) != 0 to BT(X, N). // Lower ((X >>s N) & 1) != 0 to BT(X, N). if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && isNullConstant(Op1) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { if (SDValue BT = LowerAndToBT(Op0, CC, dl, DAG, X86CC)) return BT; } // Try to use PTEST for a tree ORs equality compared with 0. // TODO: We could do AND tree with all 1s as well by using the C flag. if (Op0.getOpcode() == ISD::OR && isNullConstant(Op1) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { if (SDValue PTEST = LowerVectorAllZeroTest(Op0, CC, Subtarget, DAG, X86CC)) return PTEST; } // Try to lower using KORTEST. if (SDValue KORTEST = EmitKORTEST(Op0, Op1, CC, dl, DAG, Subtarget, X86CC)) return KORTEST; // Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of // these. if ((isOneConstant(Op1) || isNullConstant(Op1)) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { // If the input is a setcc, then reuse the input setcc or use a new one with // the inverted condition. if (Op0.getOpcode() == X86ISD::SETCC) { bool Invert = (CC == ISD::SETNE) ^ isNullConstant(Op1); X86CC = Op0.getOperand(0); if (Invert) { X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0); CCode = X86::GetOppositeBranchCondition(CCode); X86CC = DAG.getConstant(CCode, dl, MVT::i8); } return Op0.getOperand(1); } } bool IsFP = Op1.getSimpleValueType().isFloatingPoint(); X86::CondCode CondCode = TranslateX86CC(CC, dl, IsFP, Op0, Op1, DAG); if (CondCode == X86::COND_INVALID) return SDValue(); SDValue EFLAGS = EmitCmp(Op0, Op1, CondCode, dl, DAG); EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG); X86CC = DAG.getConstant(CondCode, dl, MVT::i8); return EFLAGS; } SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { MVT VT = Op.getSimpleValueType(); if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG); assert(VT == MVT::i8 && "SetCC type must be 8-bit integer"); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDLoc dl(Op); ISD::CondCode CC = cast(Op.getOperand(2))->get(); SDValue X86CC; SDValue EFLAGS = emitFlagsForSetcc(Op0, Op1, CC, dl, DAG, X86CC); if (!EFLAGS) return SDValue(); return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, X86CC, EFLAGS); } SDValue X86TargetLowering::LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); SDValue Carry = Op.getOperand(2); SDValue Cond = Op.getOperand(3); SDLoc DL(Op); assert(LHS.getSimpleValueType().isInteger() && "SETCCCARRY is integer only."); X86::CondCode CC = TranslateIntegerX86CC(cast(Cond)->get()); // Recreate the carry if needed. EVT CarryVT = Carry.getValueType(); APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits()); Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32), Carry, DAG.getConstant(NegOne, DL, CarryVT)); SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32); SDValue Cmp = DAG.getNode(X86ISD::SBB, DL, VTs, LHS, RHS, Carry.getValue(1)); return getSETCC(CC, Cmp.getValue(1), DL, DAG); } // This function returns three things: the arithmetic computation itself // (Value), an EFLAGS result (Overflow), and a condition code (Cond). The // flag and the condition code define the case in which the arithmetic // computation overflows. static std::pair getX86XALUOOp(X86::CondCode &Cond, SDValue Op, SelectionDAG &DAG) { assert(Op.getResNo() == 0 && "Unexpected result number!"); SDValue Value, Overflow; SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); unsigned BaseOp = 0; SDLoc DL(Op); switch (Op.getOpcode()) { default: llvm_unreachable("Unknown ovf instruction!"); case ISD::SADDO: BaseOp = X86ISD::ADD; Cond = X86::COND_O; break; case ISD::UADDO: BaseOp = X86ISD::ADD; Cond = X86::COND_B; break; case ISD::SSUBO: BaseOp = X86ISD::SUB; Cond = X86::COND_O; break; case ISD::USUBO: BaseOp = X86ISD::SUB; Cond = X86::COND_B; break; case ISD::SMULO: BaseOp = X86ISD::SMUL; Cond = X86::COND_O; break; case ISD::UMULO: BaseOp = X86ISD::UMUL; Cond = X86::COND_O; break; } if (BaseOp) { // Also sets EFLAGS. SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); Value = DAG.getNode(BaseOp, DL, VTs, LHS, RHS); Overflow = Value.getValue(1); } return std::make_pair(Value, Overflow); } static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) { // Lower the "add/sub/mul with overflow" instruction into a regular ins plus // a "setcc" instruction that checks the overflow flag. The "brcond" lowering // looks for this combo and may remove the "setcc" instruction if the "setcc" // has only one use. SDLoc DL(Op); X86::CondCode Cond; SDValue Value, Overflow; std::tie(Value, Overflow) = getX86XALUOOp(Cond, Op, DAG); SDValue SetCC = getSETCC(Cond, Overflow, DL, DAG); return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), Value, SetCC); } /// Return true if opcode is a X86 logical comparison. static bool isX86LogicalCmp(SDValue Op) { unsigned Opc = Op.getOpcode(); if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI || Opc == X86ISD::SAHF) return true; if (Op.getResNo() == 1 && (Opc == X86ISD::ADD || Opc == X86ISD::SUB || Opc == X86ISD::ADC || Opc == X86ISD::SBB || Opc == X86ISD::SMUL || Opc == X86ISD::UMUL || Opc == X86ISD::OR || Opc == X86ISD::XOR || Opc == X86ISD::AND)) return true; return false; } static bool isTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG) { if (V.getOpcode() != ISD::TRUNCATE) return false; SDValue VOp0 = V.getOperand(0); unsigned InBits = VOp0.getValueSizeInBits(); unsigned Bits = V.getValueSizeInBits(); return DAG.MaskedValueIsZero(VOp0, APInt::getHighBitsSet(InBits,InBits-Bits)); } SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { bool AddTest = true; SDValue Cond = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDValue Op2 = Op.getOperand(2); SDLoc DL(Op); MVT VT = Op1.getSimpleValueType(); SDValue CC; // Lower FP selects into a CMP/AND/ANDN/OR sequence when the necessary SSE ops // are available or VBLENDV if AVX is available. // Otherwise FP cmovs get lowered into a less efficient branch sequence later. if (Cond.getOpcode() == ISD::SETCC && ((Subtarget.hasSSE2() && VT == MVT::f64) || (Subtarget.hasSSE1() && VT == MVT::f32)) && VT == Cond.getOperand(0).getSimpleValueType() && Cond->hasOneUse()) { SDValue CondOp0 = Cond.getOperand(0), CondOp1 = Cond.getOperand(1); unsigned SSECC = translateX86FSETCC( cast(Cond.getOperand(2))->get(), CondOp0, CondOp1); if (Subtarget.hasAVX512()) { SDValue Cmp = DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CondOp0, CondOp1, DAG.getConstant(SSECC, DL, MVT::i8)); assert(!VT.isVector() && "Not a scalar type?"); return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2); } if (SSECC < 8 || Subtarget.hasAVX()) { SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, VT, CondOp0, CondOp1, DAG.getConstant(SSECC, DL, MVT::i8)); // If we have AVX, we can use a variable vector select (VBLENDV) instead // of 3 logic instructions for size savings and potentially speed. // Unfortunately, there is no scalar form of VBLENDV. // If either operand is a +0.0 constant, don't try this. We can expect to // optimize away at least one of the logic instructions later in that // case, so that sequence would be faster than a variable blend. // BLENDV was introduced with SSE 4.1, but the 2 register form implicitly // uses XMM0 as the selection register. That may need just as many // instructions as the AND/ANDN/OR sequence due to register moves, so // don't bother. if (Subtarget.hasAVX() && !isNullFPConstant(Op1) && !isNullFPConstant(Op2)) { // Convert to vectors, do a VSELECT, and convert back to scalar. // All of the conversions should be optimized away. MVT VecVT = VT == MVT::f32 ? MVT::v4f32 : MVT::v2f64; SDValue VOp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op1); SDValue VOp2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op2); SDValue VCmp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Cmp); MVT VCmpVT = VT == MVT::f32 ? MVT::v4i32 : MVT::v2i64; VCmp = DAG.getBitcast(VCmpVT, VCmp); SDValue VSel = DAG.getSelect(DL, VecVT, VCmp, VOp1, VOp2); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VSel, DAG.getIntPtrConstant(0, DL)); } SDValue AndN = DAG.getNode(X86ISD::FANDN, DL, VT, Cmp, Op2); SDValue And = DAG.getNode(X86ISD::FAND, DL, VT, Cmp, Op1); return DAG.getNode(X86ISD::FOR, DL, VT, AndN, And); } } // AVX512 fallback is to lower selects of scalar floats to masked moves. if ((VT == MVT::f64 || VT == MVT::f32) && Subtarget.hasAVX512()) { SDValue Cmp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, Cond); return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2); } // For v64i1 without 64-bit support we need to split and rejoin. if (VT == MVT::v64i1 && !Subtarget.is64Bit()) { assert(Subtarget.hasBWI() && "Expected BWI to be legal"); SDValue Op1Lo = extractSubVector(Op1, 0, DAG, DL, 32); SDValue Op2Lo = extractSubVector(Op2, 0, DAG, DL, 32); SDValue Op1Hi = extractSubVector(Op1, 32, DAG, DL, 32); SDValue Op2Hi = extractSubVector(Op2, 32, DAG, DL, 32); SDValue Lo = DAG.getSelect(DL, MVT::v32i1, Cond, Op1Lo, Op2Lo); SDValue Hi = DAG.getSelect(DL, MVT::v32i1, Cond, Op1Hi, Op2Hi); return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); } if (VT.isVector() && VT.getVectorElementType() == MVT::i1) { SDValue Op1Scalar; if (ISD::isBuildVectorOfConstantSDNodes(Op1.getNode())) Op1Scalar = ConvertI1VectorToInteger(Op1, DAG); else if (Op1.getOpcode() == ISD::BITCAST && Op1.getOperand(0)) Op1Scalar = Op1.getOperand(0); SDValue Op2Scalar; if (ISD::isBuildVectorOfConstantSDNodes(Op2.getNode())) Op2Scalar = ConvertI1VectorToInteger(Op2, DAG); else if (Op2.getOpcode() == ISD::BITCAST && Op2.getOperand(0)) Op2Scalar = Op2.getOperand(0); if (Op1Scalar.getNode() && Op2Scalar.getNode()) { SDValue newSelect = DAG.getSelect(DL, Op1Scalar.getValueType(), Cond, Op1Scalar, Op2Scalar); if (newSelect.getValueSizeInBits() == VT.getSizeInBits()) return DAG.getBitcast(VT, newSelect); SDValue ExtVec = DAG.getBitcast(MVT::v8i1, newSelect); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtVec, DAG.getIntPtrConstant(0, DL)); } } if (Cond.getOpcode() == ISD::SETCC) { if (SDValue NewCond = LowerSETCC(Cond, DAG)) { Cond = NewCond; // If the condition was updated, it's possible that the operands of the // select were also updated (for example, EmitTest has a RAUW). Refresh // the local references to the select operands in case they got stale. Op1 = Op.getOperand(1); Op2 = Op.getOperand(2); } } // (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y // (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y // (select (x != 0), y, -1) -> (sign_bit (x - 1)) | y // (select (x != 0), -1, y) -> ~(sign_bit (x - 1)) | y // (select (and (x , 0x1) == 0), y, (z ^ y) ) -> (-(and (x , 0x1)) & z ) ^ y // (select (and (x , 0x1) == 0), y, (z | y) ) -> (-(and (x , 0x1)) & z ) | y if (Cond.getOpcode() == X86ISD::SETCC && Cond.getOperand(1).getOpcode() == X86ISD::CMP && isNullConstant(Cond.getOperand(1).getOperand(1))) { SDValue Cmp = Cond.getOperand(1); unsigned CondCode = cast(Cond.getOperand(0))->getZExtValue(); if ((isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) && (CondCode == X86::COND_E || CondCode == X86::COND_NE)) { SDValue Y = isAllOnesConstant(Op2) ? Op1 : Op2; SDValue CmpOp0 = Cmp.getOperand(0); // Apply further optimizations for special cases // (select (x != 0), -1, 0) -> neg & sbb // (select (x == 0), 0, -1) -> neg & sbb if (isNullConstant(Y) && (isAllOnesConstant(Op1) == (CondCode == X86::COND_NE))) { SDValue Zero = DAG.getConstant(0, DL, CmpOp0.getValueType()); SDValue Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Zero, CmpOp0); SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); Zero = DAG.getConstant(0, DL, Op.getValueType()); return DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, Cmp); } Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpOp0, DAG.getConstant(1, DL, CmpOp0.getValueType())); Cmp = ConvertCmpIfNecessary(Cmp, DAG); SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); SDValue Zero = DAG.getConstant(0, DL, Op.getValueType()); SDValue Res = // Res = 0 or -1. DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, Cmp); if (isAllOnesConstant(Op1) != (CondCode == X86::COND_E)) Res = DAG.getNOT(DL, Res, Res.getValueType()); if (!isNullConstant(Op2)) Res = DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y); return Res; } else if (!Subtarget.hasCMov() && CondCode == X86::COND_E && Cmp.getOperand(0).getOpcode() == ISD::AND && isOneConstant(Cmp.getOperand(0).getOperand(1))) { SDValue CmpOp0 = Cmp.getOperand(0); SDValue Src1, Src2; // true if Op2 is XOR or OR operator and one of its operands // is equal to Op1 // ( a , a op b) || ( b , a op b) auto isOrXorPattern = [&]() { if ((Op2.getOpcode() == ISD::XOR || Op2.getOpcode() == ISD::OR) && (Op2.getOperand(0) == Op1 || Op2.getOperand(1) == Op1)) { Src1 = Op2.getOperand(0) == Op1 ? Op2.getOperand(1) : Op2.getOperand(0); Src2 = Op1; return true; } return false; }; if (isOrXorPattern()) { SDValue Neg; unsigned int CmpSz = CmpOp0.getSimpleValueType().getSizeInBits(); // we need mask of all zeros or ones with same size of the other // operands. if (CmpSz > VT.getSizeInBits()) Neg = DAG.getNode(ISD::TRUNCATE, DL, VT, CmpOp0); else if (CmpSz < VT.getSizeInBits()) Neg = DAG.getNode(ISD::AND, DL, VT, DAG.getNode(ISD::ANY_EXTEND, DL, VT, CmpOp0.getOperand(0)), DAG.getConstant(1, DL, VT)); else Neg = CmpOp0; SDValue Mask = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Neg); // -(and (x, 0x1)) SDValue And = DAG.getNode(ISD::AND, DL, VT, Mask, Src1); // Mask & z return DAG.getNode(Op2.getOpcode(), DL, VT, And, Src2); // And Op y } } } // Look past (and (setcc_carry (cmp ...)), 1). if (Cond.getOpcode() == ISD::AND && Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY && isOneConstant(Cond.getOperand(1))) Cond = Cond.getOperand(0); // If condition flag is set by a X86ISD::CMP, then use it as the condition // setting operand in place of the X86ISD::SETCC. unsigned CondOpcode = Cond.getOpcode(); if (CondOpcode == X86ISD::SETCC || CondOpcode == X86ISD::SETCC_CARRY) { CC = Cond.getOperand(0); SDValue Cmp = Cond.getOperand(1); unsigned Opc = Cmp.getOpcode(); MVT VT = Op.getSimpleValueType(); bool IllegalFPCMov = false; if (VT.isFloatingPoint() && !VT.isVector() && !isScalarFPTypeInSSEReg(VT)) // FPStack? IllegalFPCMov = !hasFPCMov(cast(CC)->getSExtValue()); if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) || Opc == X86ISD::BT) { // FIXME Cond = Cmp; AddTest = false; } } else if (CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO || CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO || CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) { SDValue Value; X86::CondCode X86Cond; std::tie(Value, Cond) = getX86XALUOOp(X86Cond, Cond.getValue(0), DAG); CC = DAG.getConstant(X86Cond, DL, MVT::i8); AddTest = false; } if (AddTest) { // Look past the truncate if the high bits are known zero. if (isTruncWithZeroHighBitsInput(Cond, DAG)) Cond = Cond.getOperand(0); // We know the result of AND is compared against zero. Try to match // it to BT. if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { SDValue BTCC; if (SDValue BT = LowerAndToBT(Cond, ISD::SETNE, DL, DAG, BTCC)) { CC = BTCC; Cond = BT; AddTest = false; } } } if (AddTest) { CC = DAG.getConstant(X86::COND_NE, DL, MVT::i8); Cond = EmitCmp(Cond, DAG.getConstant(0, DL, Cond.getValueType()), X86::COND_NE, DL, DAG); } // a < b ? -1 : 0 -> RES = ~setcc_carry // a < b ? 0 : -1 -> RES = setcc_carry // a >= b ? -1 : 0 -> RES = setcc_carry // a >= b ? 0 : -1 -> RES = ~setcc_carry if (Cond.getOpcode() == X86ISD::SUB) { Cond = ConvertCmpIfNecessary(Cond, DAG); unsigned CondCode = cast(CC)->getZExtValue(); if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) && (isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) && (isNullConstant(Op1) || isNullConstant(Op2))) { SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(), DAG.getConstant(X86::COND_B, DL, MVT::i8), Cond); if (isAllOnesConstant(Op1) != (CondCode == X86::COND_B)) return DAG.getNOT(DL, Res, Res.getValueType()); return Res; } } // X86 doesn't have an i8 cmov. If both operands are the result of a truncate // widen the cmov and push the truncate through. This avoids introducing a new // branch during isel and doesn't add any extensions. if (Op.getValueType() == MVT::i8 && Op1.getOpcode() == ISD::TRUNCATE && Op2.getOpcode() == ISD::TRUNCATE) { SDValue T1 = Op1.getOperand(0), T2 = Op2.getOperand(0); if (T1.getValueType() == T2.getValueType() && // Blacklist CopyFromReg to avoid partial register stalls. T1.getOpcode() != ISD::CopyFromReg && T2.getOpcode()!=ISD::CopyFromReg){ SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, T1.getValueType(), T2, T1, CC, Cond); return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov); } } // Promote i16 cmovs if it won't prevent folding a load. if (Op.getValueType() == MVT::i16 && !MayFoldLoad(Op1) && !MayFoldLoad(Op2)) { Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1); Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2); SDValue Ops[] = { Op2, Op1, CC, Cond }; SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, MVT::i32, Ops); return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov); } // X86ISD::CMOV means set the result (which is operand 1) to the RHS if // condition is true. SDValue Ops[] = { Op2, Op1, CC, Cond }; return DAG.getNode(X86ISD::CMOV, DL, Op.getValueType(), Ops); } static SDValue LowerSIGN_EXTEND_Mask(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = Op->getSimpleValueType(0); SDValue In = Op->getOperand(0); MVT InVT = In.getSimpleValueType(); assert(InVT.getVectorElementType() == MVT::i1 && "Unexpected input type!"); MVT VTElt = VT.getVectorElementType(); SDLoc dl(Op); unsigned NumElts = VT.getVectorNumElements(); // Extend VT if the scalar type is i8/i16 and BWI is not supported. MVT ExtVT = VT; if (!Subtarget.hasBWI() && VTElt.getSizeInBits() <= 16) { // If v16i32 is to be avoided, we'll need to split and concatenate. if (NumElts == 16 && !Subtarget.canExtendTo512DQ()) return SplitAndExtendv16i1(Op.getOpcode(), VT, In, dl, DAG); ExtVT = MVT::getVectorVT(MVT::i32, NumElts); } // Widen to 512-bits if VLX is not supported. MVT WideVT = ExtVT; if (!ExtVT.is512BitVector() && !Subtarget.hasVLX()) { NumElts *= 512 / ExtVT.getSizeInBits(); InVT = MVT::getVectorVT(MVT::i1, NumElts); In = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, InVT, DAG.getUNDEF(InVT), In, DAG.getIntPtrConstant(0, dl)); WideVT = MVT::getVectorVT(ExtVT.getVectorElementType(), NumElts); } SDValue V; MVT WideEltVT = WideVT.getVectorElementType(); if ((Subtarget.hasDQI() && WideEltVT.getSizeInBits() >= 32) || (Subtarget.hasBWI() && WideEltVT.getSizeInBits() <= 16)) { V = DAG.getNode(Op.getOpcode(), dl, WideVT, In); } else { SDValue NegOne = DAG.getConstant(-1, dl, WideVT); SDValue Zero = DAG.getConstant(0, dl, WideVT); V = DAG.getSelect(dl, WideVT, In, NegOne, Zero); } // Truncate if we had to extend i16/i8 above. if (VT != ExtVT) { WideVT = MVT::getVectorVT(VTElt, NumElts); V = DAG.getNode(ISD::TRUNCATE, dl, WideVT, V); } // Extract back to 128/256-bit if we widened. if (WideVT != VT) V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, V, DAG.getIntPtrConstant(0, dl)); return V; } static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDValue In = Op->getOperand(0); MVT InVT = In.getSimpleValueType(); if (InVT.getVectorElementType() == MVT::i1) return LowerSIGN_EXTEND_Mask(Op, Subtarget, DAG); assert(Subtarget.hasAVX() && "Expected AVX support"); return LowerAVXExtend(Op, DAG, Subtarget); } // Lowering for SIGN_EXTEND_VECTOR_INREG and ZERO_EXTEND_VECTOR_INREG. // For sign extend this needs to handle all vector sizes and SSE4.1 and // non-SSE4.1 targets. For zero extend this should only handle inputs of // MVT::v64i8 when BWI is not supported, but AVX512 is. static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDValue In = Op->getOperand(0); MVT VT = Op->getSimpleValueType(0); MVT InVT = In.getSimpleValueType(); MVT SVT = VT.getVectorElementType(); MVT InSVT = InVT.getVectorElementType(); assert(SVT.getSizeInBits() > InSVT.getSizeInBits()); if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16) return SDValue(); if (InSVT != MVT::i32 && InSVT != MVT::i16 && InSVT != MVT::i8) return SDValue(); if (!(VT.is128BitVector() && Subtarget.hasSSE2()) && !(VT.is256BitVector() && Subtarget.hasAVX()) && !(VT.is512BitVector() && Subtarget.hasAVX512())) return SDValue(); SDLoc dl(Op); unsigned Opc = Op.getOpcode(); unsigned NumElts = VT.getVectorNumElements(); // For 256-bit vectors, we only need the lower (128-bit) half of the input. // For 512-bit vectors, we need 128-bits or 256-bits. if (InVT.getSizeInBits() > 128) { // Input needs to be at least the same number of elements as output, and // at least 128-bits. int InSize = InSVT.getSizeInBits() * NumElts; In = extractSubVector(In, 0, DAG, dl, std::max(InSize, 128)); InVT = In.getSimpleValueType(); } // SSE41 targets can use the pmov[sz]x* instructions directly for 128-bit results, // so are legal and shouldn't occur here. AVX2/AVX512 pmovsx* instructions still // need to be handled here for 256/512-bit results. if (Subtarget.hasInt256()) { assert(VT.getSizeInBits() > 128 && "Unexpected 128-bit vector extension"); if (InVT.getVectorNumElements() != NumElts) return DAG.getNode(Op.getOpcode(), dl, VT, In); // FIXME: Apparently we create inreg operations that could be regular // extends. unsigned ExtOpc = Opc == ISD::SIGN_EXTEND_VECTOR_INREG ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; return DAG.getNode(ExtOpc, dl, VT, In); } // pre-AVX2 256-bit extensions need to be split into 128-bit instructions. if (Subtarget.hasAVX()) { assert(VT.is256BitVector() && "256-bit vector expected"); int HalfNumElts = NumElts / 2; MVT HalfVT = MVT::getVectorVT(SVT, HalfNumElts); unsigned NumSrcElts = InVT.getVectorNumElements(); SmallVector HiMask(NumSrcElts, SM_SentinelUndef); for (int i = 0; i != HalfNumElts; ++i) HiMask[i] = HalfNumElts + i; SDValue Lo = DAG.getNode(Opc, dl, HalfVT, In); SDValue Hi = DAG.getVectorShuffle(InVT, dl, In, DAG.getUNDEF(InVT), HiMask); Hi = DAG.getNode(Opc, dl, HalfVT, Hi); return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); } // We should only get here for sign extend. assert(Opc == ISD::SIGN_EXTEND_VECTOR_INREG && "Unexpected opcode!"); assert(VT.is128BitVector() && InVT.is128BitVector() && "Unexpected VTs"); // pre-SSE41 targets unpack lower lanes and then sign-extend using SRAI. SDValue Curr = In; SDValue SignExt = Curr; // As SRAI is only available on i16/i32 types, we expand only up to i32 // and handle i64 separately. if (InVT != MVT::v4i32) { MVT DestVT = VT == MVT::v2i64 ? MVT::v4i32 : VT; unsigned DestWidth = DestVT.getScalarSizeInBits(); unsigned Scale = DestWidth / InSVT.getSizeInBits(); unsigned InNumElts = InVT.getVectorNumElements(); unsigned DestElts = DestVT.getVectorNumElements(); // Build a shuffle mask that takes each input element and places it in the // MSBs of the new element size. SmallVector Mask(InNumElts, SM_SentinelUndef); for (unsigned i = 0; i != DestElts; ++i) Mask[i * Scale + (Scale - 1)] = i; Curr = DAG.getVectorShuffle(InVT, dl, In, In, Mask); Curr = DAG.getBitcast(DestVT, Curr); unsigned SignExtShift = DestWidth - InSVT.getSizeInBits(); SignExt = DAG.getNode(X86ISD::VSRAI, dl, DestVT, Curr, DAG.getConstant(SignExtShift, dl, MVT::i8)); } if (VT == MVT::v2i64) { assert(Curr.getValueType() == MVT::v4i32 && "Unexpected input VT"); SDValue Zero = DAG.getConstant(0, dl, MVT::v4i32); SDValue Sign = DAG.getSetCC(dl, MVT::v4i32, Zero, Curr, ISD::SETGT); SignExt = DAG.getVectorShuffle(MVT::v4i32, dl, SignExt, Sign, {0, 4, 1, 5}); SignExt = DAG.getBitcast(VT, SignExt); } return SignExt; } static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = Op->getSimpleValueType(0); SDValue In = Op->getOperand(0); MVT InVT = In.getSimpleValueType(); SDLoc dl(Op); if (InVT.getVectorElementType() == MVT::i1) return LowerSIGN_EXTEND_Mask(Op, Subtarget, DAG); assert(VT.isVector() && InVT.isVector() && "Expected vector type"); assert(VT.getVectorNumElements() == VT.getVectorNumElements() && "Expected same number of elements"); assert((VT.getVectorElementType() == MVT::i16 || VT.getVectorElementType() == MVT::i32 || VT.getVectorElementType() == MVT::i64) && "Unexpected element type"); assert((InVT.getVectorElementType() == MVT::i8 || InVT.getVectorElementType() == MVT::i16 || InVT.getVectorElementType() == MVT::i32) && "Unexpected element type"); // Custom legalize v8i8->v8i64 on CPUs without avx512bw. if (InVT == MVT::v8i8) { if (!ExperimentalVectorWideningLegalization || VT != MVT::v8i64) return SDValue(); In = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), MVT::v16i8, In, DAG.getUNDEF(MVT::v8i8)); return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, VT, In); } if (Subtarget.hasInt256()) return Op; // Optimize vectors in AVX mode // Sign extend v8i16 to v8i32 and // v4i32 to v4i64 // // Divide input vector into two parts // for v4i32 the high shuffle mask will be {2, 3, -1, -1} // use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32 // concat the vectors to original VT MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), VT.getVectorNumElements() / 2); SDValue OpLo = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, HalfVT, In); unsigned NumElems = InVT.getVectorNumElements(); SmallVector ShufMask(NumElems, -1); for (unsigned i = 0; i != NumElems/2; ++i) ShufMask[i] = i + NumElems/2; SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, In, ShufMask); OpHi = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, HalfVT, OpHi); return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); } static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { StoreSDNode *St = cast(Op.getNode()); SDLoc dl(St); SDValue StoredVal = St->getValue(); // Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 stores. if (StoredVal.getValueType().isVector() && StoredVal.getValueType().getVectorElementType() == MVT::i1) { assert(StoredVal.getValueType().getVectorNumElements() <= 8 && "Unexpected VT"); assert(!St->isTruncatingStore() && "Expected non-truncating store"); assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() && "Expected AVX512F without AVX512DQI"); StoredVal = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1, DAG.getUNDEF(MVT::v16i1), StoredVal, DAG.getIntPtrConstant(0, dl)); StoredVal = DAG.getBitcast(MVT::i16, StoredVal); StoredVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, StoredVal); return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), St->getPointerInfo(), St->getAlignment(), St->getMemOperand()->getFlags()); } if (St->isTruncatingStore()) return SDValue(); MVT StoreVT = StoredVal.getSimpleValueType(); assert(StoreVT.isVector() && StoreVT.getSizeInBits() == 64 && "Unexpected VT"); if (DAG.getTargetLoweringInfo().getTypeAction(*DAG.getContext(), StoreVT) != TargetLowering::TypeWidenVector) return SDValue(); // Widen the vector, cast to a v2x64 type, extract the single 64-bit element // and store it. MVT WideVT = MVT::getVectorVT(StoreVT.getVectorElementType(), StoreVT.getVectorNumElements() * 2); StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, StoredVal, DAG.getUNDEF(StoreVT)); MVT StVT = Subtarget.is64Bit() && StoreVT.isInteger() ? MVT::i64 : MVT::f64; MVT CastVT = MVT::getVectorVT(StVT, 2); StoredVal = DAG.getBitcast(CastVT, StoredVal); StoredVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, StVT, StoredVal, DAG.getIntPtrConstant(0, dl)); return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), St->getPointerInfo(), St->getAlignment(), St->getMemOperand()->getFlags()); } // Lower vector extended loads using a shuffle. If SSSE3 is not available we // may emit an illegal shuffle but the expansion is still better than scalar // code. We generate sext/sext_invec for SEXTLOADs if it's available, otherwise // we'll emit a shuffle and a arithmetic shift. // FIXME: Is the expansion actually better than scalar code? It doesn't seem so. // TODO: It is possible to support ZExt by zeroing the undef values during // the shuffle phase or after the shuffle. static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT RegVT = Op.getSimpleValueType(); assert(RegVT.isVector() && "We only custom lower vector loads."); assert(RegVT.isInteger() && "We only custom lower integer vector loads."); LoadSDNode *Ld = cast(Op.getNode()); SDLoc dl(Ld); EVT MemVT = Ld->getMemoryVT(); // Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 loads. if (RegVT.getVectorElementType() == MVT::i1) { assert(EVT(RegVT) == MemVT && "Expected non-extending load"); assert(RegVT.getVectorNumElements() <= 8 && "Unexpected VT"); assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() && "Expected AVX512F without AVX512DQI"); SDValue NewLd = DAG.getLoad(MVT::i8, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), Ld->getAlignment(), Ld->getMemOperand()->getFlags()); // Replace chain users with the new chain. assert(NewLd->getNumValues() == 2 && "Loads must carry a chain!"); SDValue Val = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, NewLd); Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RegVT, DAG.getBitcast(MVT::v16i1, Val), DAG.getIntPtrConstant(0, dl)); return DAG.getMergeValues({Val, NewLd.getValue(1)}, dl); } // Nothing useful we can do without SSE2 shuffles. assert(Subtarget.hasSSE2() && "We only custom lower sext loads with SSE2."); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned RegSz = RegVT.getSizeInBits(); ISD::LoadExtType Ext = Ld->getExtensionType(); assert((Ext == ISD::EXTLOAD || Ext == ISD::SEXTLOAD) && "Only anyext and sext are currently implemented."); assert(MemVT != RegVT && "Cannot extend to the same type"); assert(MemVT.isVector() && "Must load a vector from memory"); unsigned NumElems = RegVT.getVectorNumElements(); unsigned MemSz = MemVT.getSizeInBits(); assert(RegSz > MemSz && "Register size must be greater than the mem size"); if (Ext == ISD::SEXTLOAD && RegSz == 256 && !Subtarget.hasInt256()) { // The only way in which we have a legal 256-bit vector result but not the // integer 256-bit operations needed to directly lower a sextload is if we // have AVX1 but not AVX2. In that case, we can always emit a sextload to // a 128-bit vector and a normal sign_extend to 256-bits that should get // correctly legalized. We do this late to allow the canonical form of // sextload to persist throughout the rest of the DAG combiner -- it wants // to fold together any extensions it can, and so will fuse a sign_extend // of an sextload into a sextload targeting a wider value. SDValue Load; if (MemSz == 128) { // Just switch this to a normal load. assert(TLI.isTypeLegal(MemVT) && "If the memory type is a 128-bit type, " "it must be a legal 128-bit vector " "type!"); Load = DAG.getLoad(MemVT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), Ld->getAlignment(), Ld->getMemOperand()->getFlags()); } else { assert(MemSz < 128 && "Can't extend a type wider than 128 bits to a 256 bit vector!"); // Do an sext load to a 128-bit vector type. We want to use the same // number of elements, but elements half as wide. This will end up being // recursively lowered by this routine, but will succeed as we definitely // have all the necessary features if we're using AVX1. EVT HalfEltVT = EVT::getIntegerVT(*DAG.getContext(), RegVT.getScalarSizeInBits() / 2); EVT HalfVecVT = EVT::getVectorVT(*DAG.getContext(), HalfEltVT, NumElems); Load = DAG.getExtLoad(Ext, dl, HalfVecVT, Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), MemVT, Ld->getAlignment(), Ld->getMemOperand()->getFlags()); } // Replace chain users with the new chain. assert(Load->getNumValues() == 2 && "Loads must carry a chain!"); // Finally, do a normal sign-extend to the desired register. SDValue SExt = DAG.getSExtOrTrunc(Load, dl, RegVT); return DAG.getMergeValues({SExt, Load.getValue(1)}, dl); } // All sizes must be a power of two. assert(isPowerOf2_32(RegSz * MemSz * NumElems) && "Non-power-of-two elements are not custom lowered!"); // Attempt to load the original value using scalar loads. // Find the largest scalar type that divides the total loaded size. MVT SclrLoadTy = MVT::i8; for (MVT Tp : MVT::integer_valuetypes()) { if (TLI.isTypeLegal(Tp) && ((MemSz % Tp.getSizeInBits()) == 0)) { SclrLoadTy = Tp; } } // On 32bit systems, we can't save 64bit integers. Try bitcasting to F64. if (TLI.isTypeLegal(MVT::f64) && SclrLoadTy.getSizeInBits() < 64 && (64 <= MemSz)) SclrLoadTy = MVT::f64; // Calculate the number of scalar loads that we need to perform // in order to load our vector from memory. unsigned NumLoads = MemSz / SclrLoadTy.getSizeInBits(); assert((Ext != ISD::SEXTLOAD || NumLoads == 1) && "Can only lower sext loads with a single scalar load!"); unsigned loadRegSize = RegSz; if (Ext == ISD::SEXTLOAD && RegSz >= 256) loadRegSize = 128; // If we don't have BWI we won't be able to create the shuffle needed for // v8i8->v8i64. if (Ext == ISD::EXTLOAD && !Subtarget.hasBWI() && RegVT == MVT::v8i64 && MemVT == MVT::v8i8) loadRegSize = 128; // Represent our vector as a sequence of elements which are the // largest scalar that we can load. EVT LoadUnitVecVT = EVT::getVectorVT( *DAG.getContext(), SclrLoadTy, loadRegSize / SclrLoadTy.getSizeInBits()); // Represent the data using the same element type that is stored in // memory. In practice, we ''widen'' MemVT. EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), loadRegSize / MemVT.getScalarSizeInBits()); assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() && "Invalid vector type"); // We can't shuffle using an illegal type. assert(TLI.isTypeLegal(WideVecVT) && "We only lower types that form legal widened vector types"); SmallVector Chains; SDValue Ptr = Ld->getBasePtr(); unsigned OffsetInc = SclrLoadTy.getSizeInBits() / 8; SDValue Increment = DAG.getConstant(OffsetInc, dl, TLI.getPointerTy(DAG.getDataLayout())); SDValue Res = DAG.getUNDEF(LoadUnitVecVT); unsigned Offset = 0; for (unsigned i = 0; i < NumLoads; ++i) { unsigned NewAlign = MinAlign(Ld->getAlignment(), Offset); // Perform a single load. SDValue ScalarLoad = DAG.getLoad(SclrLoadTy, dl, Ld->getChain(), Ptr, Ld->getPointerInfo().getWithOffset(Offset), NewAlign, Ld->getMemOperand()->getFlags()); Chains.push_back(ScalarLoad.getValue(1)); // Create the first element type using SCALAR_TO_VECTOR in order to avoid // another round of DAGCombining. if (i == 0) Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoadUnitVecVT, ScalarLoad); else Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, LoadUnitVecVT, Res, ScalarLoad, DAG.getIntPtrConstant(i, dl)); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); Offset += OffsetInc; } SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); // Bitcast the loaded value to a vector of the original element type, in // the size of the target vector type. SDValue SlicedVec = DAG.getBitcast(WideVecVT, Res); unsigned SizeRatio = RegSz / MemSz; if (Ext == ISD::SEXTLOAD) { SDValue Sext = getExtendInVec(/*Signed*/true, dl, RegVT, SlicedVec, DAG); return DAG.getMergeValues({Sext, TF}, dl); } if (Ext == ISD::EXTLOAD && !Subtarget.hasBWI() && RegVT == MVT::v8i64 && MemVT == MVT::v8i8) { SDValue Sext = getExtendInVec(/*Signed*/false, dl, RegVT, SlicedVec, DAG); return DAG.getMergeValues({Sext, TF}, dl); } // Redistribute the loaded elements into the different locations. SmallVector ShuffleVec(NumElems * SizeRatio, -1); for (unsigned i = 0; i != NumElems; ++i) ShuffleVec[i * SizeRatio] = i; SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec, DAG.getUNDEF(WideVecVT), ShuffleVec); // Bitcast to the requested type. Shuff = DAG.getBitcast(RegVT, Shuff); return DAG.getMergeValues({Shuff, TF}, dl); } /// Return true if node is an ISD::AND or ISD::OR of two X86ISD::SETCC nodes /// each of which has no other use apart from the AND / OR. static bool isAndOrOfSetCCs(SDValue Op, unsigned &Opc) { Opc = Op.getOpcode(); if (Opc != ISD::OR && Opc != ISD::AND) return false; return (Op.getOperand(0).getOpcode() == X86ISD::SETCC && Op.getOperand(0).hasOneUse() && Op.getOperand(1).getOpcode() == X86ISD::SETCC && Op.getOperand(1).hasOneUse()); } /// Return true if node is an ISD::XOR of a X86ISD::SETCC and 1 and that the /// SETCC node has a single use. static bool isXor1OfSetCC(SDValue Op) { if (Op.getOpcode() != ISD::XOR) return false; if (isOneConstant(Op.getOperand(1))) return Op.getOperand(0).getOpcode() == X86ISD::SETCC && Op.getOperand(0).hasOneUse(); return false; } SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { bool addTest = true; SDValue Chain = Op.getOperand(0); SDValue Cond = Op.getOperand(1); SDValue Dest = Op.getOperand(2); SDLoc dl(Op); SDValue CC; bool Inverted = false; if (Cond.getOpcode() == ISD::SETCC) { // Check for setcc([su]{add,sub,mul}o == 0). if (cast(Cond.getOperand(2))->get() == ISD::SETEQ && isNullConstant(Cond.getOperand(1)) && Cond.getOperand(0).getResNo() == 1 && (Cond.getOperand(0).getOpcode() == ISD::SADDO || Cond.getOperand(0).getOpcode() == ISD::UADDO || Cond.getOperand(0).getOpcode() == ISD::SSUBO || Cond.getOperand(0).getOpcode() == ISD::USUBO || Cond.getOperand(0).getOpcode() == ISD::SMULO || Cond.getOperand(0).getOpcode() == ISD::UMULO)) { Inverted = true; Cond = Cond.getOperand(0); } else { if (SDValue NewCond = LowerSETCC(Cond, DAG)) Cond = NewCond; } } #if 0 // FIXME: LowerXALUO doesn't handle these!! else if (Cond.getOpcode() == X86ISD::ADD || Cond.getOpcode() == X86ISD::SUB || Cond.getOpcode() == X86ISD::SMUL || Cond.getOpcode() == X86ISD::UMUL) Cond = LowerXALUO(Cond, DAG); #endif // Look pass (and (setcc_carry (cmp ...)), 1). if (Cond.getOpcode() == ISD::AND && Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY && isOneConstant(Cond.getOperand(1))) Cond = Cond.getOperand(0); // If condition flag is set by a X86ISD::CMP, then use it as the condition // setting operand in place of the X86ISD::SETCC. unsigned CondOpcode = Cond.getOpcode(); if (CondOpcode == X86ISD::SETCC || CondOpcode == X86ISD::SETCC_CARRY) { CC = Cond.getOperand(0); SDValue Cmp = Cond.getOperand(1); unsigned Opc = Cmp.getOpcode(); // FIXME: WHY THE SPECIAL CASING OF LogicalCmp?? if (isX86LogicalCmp(Cmp) || Opc == X86ISD::BT) { Cond = Cmp; addTest = false; } else { switch (cast(CC)->getZExtValue()) { default: break; case X86::COND_O: case X86::COND_B: // These can only come from an arithmetic instruction with overflow, // e.g. SADDO, UADDO. Cond = Cond.getOperand(1); addTest = false; break; } } } CondOpcode = Cond.getOpcode(); if (CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO || CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO || CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) { SDValue Value; X86::CondCode X86Cond; std::tie(Value, Cond) = getX86XALUOOp(X86Cond, Cond.getValue(0), DAG); if (Inverted) X86Cond = X86::GetOppositeBranchCondition(X86Cond); CC = DAG.getConstant(X86Cond, dl, MVT::i8); addTest = false; } else { unsigned CondOpc; if (Cond.hasOneUse() && isAndOrOfSetCCs(Cond, CondOpc)) { SDValue Cmp = Cond.getOperand(0).getOperand(1); if (CondOpc == ISD::OR) { // Also, recognize the pattern generated by an FCMP_UNE. We can emit // two branches instead of an explicit OR instruction with a // separate test. if (Cmp == Cond.getOperand(1).getOperand(1) && isX86LogicalCmp(Cmp)) { CC = Cond.getOperand(0).getOperand(0); Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain, Dest, CC, Cmp); CC = Cond.getOperand(1).getOperand(0); Cond = Cmp; addTest = false; } } else { // ISD::AND // Also, recognize the pattern generated by an FCMP_OEQ. We can emit // two branches instead of an explicit AND instruction with a // separate test. However, we only do this if this block doesn't // have a fall-through edge, because this requires an explicit // jmp when the condition is false. if (Cmp == Cond.getOperand(1).getOperand(1) && isX86LogicalCmp(Cmp) && Op.getNode()->hasOneUse()) { X86::CondCode CCode = (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0); CCode = X86::GetOppositeBranchCondition(CCode); CC = DAG.getConstant(CCode, dl, MVT::i8); SDNode *User = *Op.getNode()->use_begin(); // Look for an unconditional branch following this conditional branch. // We need this because we need to reverse the successors in order // to implement FCMP_OEQ. if (User->getOpcode() == ISD::BR) { SDValue FalseBB = User->getOperand(1); SDNode *NewBR = DAG.UpdateNodeOperands(User, User->getOperand(0), Dest); assert(NewBR == User); (void)NewBR; Dest = FalseBB; Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain, Dest, CC, Cmp); X86::CondCode CCode = (X86::CondCode)Cond.getOperand(1).getConstantOperandVal(0); CCode = X86::GetOppositeBranchCondition(CCode); CC = DAG.getConstant(CCode, dl, MVT::i8); Cond = Cmp; addTest = false; } } } } else if (Cond.hasOneUse() && isXor1OfSetCC(Cond)) { // Recognize for xorb (setcc), 1 patterns. The xor inverts the condition. // It should be transformed during dag combiner except when the condition // is set by a arithmetics with overflow node. X86::CondCode CCode = (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0); CCode = X86::GetOppositeBranchCondition(CCode); CC = DAG.getConstant(CCode, dl, MVT::i8); Cond = Cond.getOperand(0).getOperand(1); addTest = false; } else if (Cond.getOpcode() == ISD::SETCC && cast(Cond.getOperand(2))->get() == ISD::SETOEQ) { // For FCMP_OEQ, we can emit // two branches instead of an explicit AND instruction with a // separate test. However, we only do this if this block doesn't // have a fall-through edge, because this requires an explicit // jmp when the condition is false. if (Op.getNode()->hasOneUse()) { SDNode *User = *Op.getNode()->use_begin(); // Look for an unconditional branch following this conditional branch. // We need this because we need to reverse the successors in order // to implement FCMP_OEQ. if (User->getOpcode() == ISD::BR) { SDValue FalseBB = User->getOperand(1); SDNode *NewBR = DAG.UpdateNodeOperands(User, User->getOperand(0), Dest); assert(NewBR == User); (void)NewBR; Dest = FalseBB; SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32, Cond.getOperand(0), Cond.getOperand(1)); Cmp = ConvertCmpIfNecessary(Cmp, DAG); CC = DAG.getConstant(X86::COND_NE, dl, MVT::i8); Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain, Dest, CC, Cmp); CC = DAG.getConstant(X86::COND_P, dl, MVT::i8); Cond = Cmp; addTest = false; } } } else if (Cond.getOpcode() == ISD::SETCC && cast(Cond.getOperand(2))->get() == ISD::SETUNE) { // For FCMP_UNE, we can emit // two branches instead of an explicit OR instruction with a // separate test. SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32, Cond.getOperand(0), Cond.getOperand(1)); Cmp = ConvertCmpIfNecessary(Cmp, DAG); CC = DAG.getConstant(X86::COND_NE, dl, MVT::i8); Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain, Dest, CC, Cmp); CC = DAG.getConstant(X86::COND_P, dl, MVT::i8); Cond = Cmp; addTest = false; } } if (addTest) { // Look pass the truncate if the high bits are known zero. if (isTruncWithZeroHighBitsInput(Cond, DAG)) Cond = Cond.getOperand(0); // We know the result of AND is compared against zero. Try to match // it to BT. if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { SDValue BTCC; if (SDValue BT = LowerAndToBT(Cond, ISD::SETNE, dl, DAG, BTCC)) { CC = BTCC; Cond = BT; addTest = false; } } } if (addTest) { X86::CondCode X86Cond = Inverted ? X86::COND_E : X86::COND_NE; CC = DAG.getConstant(X86Cond, dl, MVT::i8); Cond = EmitCmp(Cond, DAG.getConstant(0, dl, Cond.getValueType()), X86Cond, dl, DAG); } Cond = ConvertCmpIfNecessary(Cond, DAG); return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain, Dest, CC, Cond); } // Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets. // Calls to _alloca are needed to probe the stack when allocating more than 4k // bytes in one go. Touching the stack at 4K increments is necessary to ensure // that the guard pages used by the OS virtual memory manager are allocated in // correct sequence. SDValue X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); bool SplitStack = MF.shouldSplitStack(); bool EmitStackProbe = !getStackProbeSymbolName(MF).empty(); bool Lower = (Subtarget.isOSWindows() && !Subtarget.isTargetMachO()) || SplitStack || EmitStackProbe; SDLoc dl(Op); // Get the inputs. SDNode *Node = Op.getNode(); SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); unsigned Align = cast(Op.getOperand(2))->getZExtValue(); EVT VT = Node->getValueType(0); // Chain the dynamic stack allocation so that it doesn't modify the stack // pointer when other instructions are using the stack. Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl); bool Is64Bit = Subtarget.is64Bit(); MVT SPTy = getPointerTy(DAG.getDataLayout()); SDValue Result; if (!Lower) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore(); assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and" " not tell us which reg is the stack pointer!"); SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); Chain = SP.getValue(1); const TargetFrameLowering &TFI = *Subtarget.getFrameLowering(); unsigned StackAlign = TFI.getStackAlignment(); Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value if (Align > StackAlign) Result = DAG.getNode(ISD::AND, dl, VT, Result, DAG.getConstant(-(uint64_t)Align, dl, VT)); Chain = DAG.getCopyToReg(Chain, dl, SPReg, Result); // Output chain } else if (SplitStack) { MachineRegisterInfo &MRI = MF.getRegInfo(); if (Is64Bit) { // The 64 bit implementation of segmented stacks needs to clobber both r10 // r11. This makes it impossible to use it along with nested parameters. const Function &F = MF.getFunction(); for (const auto &A : F.args()) { if (A.hasNestAttr()) report_fatal_error("Cannot use segmented stacks with functions that " "have nested arguments."); } } const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy); unsigned Vreg = MRI.createVirtualRegister(AddrRegClass); Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size); Result = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain, DAG.getRegister(Vreg, SPTy)); } else { SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Size); MF.getInfo()->setHasWinAlloca(true); const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); unsigned SPReg = RegInfo->getStackRegister(); SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, SPTy); Chain = SP.getValue(1); if (Align) { SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0), DAG.getConstant(-(uint64_t)Align, dl, VT)); Chain = DAG.getCopyToReg(Chain, dl, SPReg, SP); } Result = SP; } Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true), DAG.getIntPtrConstant(0, dl, true), SDValue(), dl); SDValue Ops[2] = {Result, Chain}; return DAG.getMergeValues(Ops, dl); } SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); auto PtrVT = getPointerTy(MF.getDataLayout()); X86MachineFunctionInfo *FuncInfo = MF.getInfo(); const Value *SV = cast(Op.getOperand(2))->getValue(); SDLoc DL(Op); if (!Subtarget.is64Bit() || Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv())) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1), MachinePointerInfo(SV)); } // __va_list_tag: // gp_offset (0 - 6 * 8) // fp_offset (48 - 48 + 8 * 16) // overflow_arg_area (point to parameters coming in memory). // reg_save_area SmallVector MemOps; SDValue FIN = Op.getOperand(1); // Store gp_offset SDValue Store = DAG.getStore( Op.getOperand(0), DL, DAG.getConstant(FuncInfo->getVarArgsGPOffset(), DL, MVT::i32), FIN, MachinePointerInfo(SV)); MemOps.push_back(Store); // Store fp_offset FIN = DAG.getMemBasePlusOffset(FIN, 4, DL); Store = DAG.getStore( Op.getOperand(0), DL, DAG.getConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32), FIN, MachinePointerInfo(SV, 4)); MemOps.push_back(Store); // Store ptr to overflow_arg_area FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(4, DL)); SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); Store = DAG.getStore(Op.getOperand(0), DL, OVFIN, FIN, MachinePointerInfo(SV, 8)); MemOps.push_back(Store); // Store ptr to reg_save_area. FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant( Subtarget.isTarget64BitLP64() ? 8 : 4, DL)); SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT); Store = DAG.getStore( Op.getOperand(0), DL, RSFIN, FIN, MachinePointerInfo(SV, Subtarget.isTarget64BitLP64() ? 16 : 12)); MemOps.push_back(Store); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); } SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget.is64Bit() && "LowerVAARG only handles 64-bit va_arg!"); assert(Op.getNumOperands() == 4); MachineFunction &MF = DAG.getMachineFunction(); if (Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv())) // The Win64 ABI uses char* instead of a structure. return DAG.expandVAArg(Op.getNode()); SDValue Chain = Op.getOperand(0); SDValue SrcPtr = Op.getOperand(1); const Value *SV = cast(Op.getOperand(2))->getValue(); unsigned Align = Op.getConstantOperandVal(3); SDLoc dl(Op); EVT ArgVT = Op.getNode()->getValueType(0); Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); uint32_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy); uint8_t ArgMode; // Decide which area this value should be read from. // TODO: Implement the AMD64 ABI in its entirety. This simple // selection mechanism works only for the basic types. if (ArgVT == MVT::f80) { llvm_unreachable("va_arg for f80 not yet implemented"); } else if (ArgVT.isFloatingPoint() && ArgSize <= 16 /*bytes*/) { ArgMode = 2; // Argument passed in XMM register. Use fp_offset. } else if (ArgVT.isInteger() && ArgSize <= 32 /*bytes*/) { ArgMode = 1; // Argument passed in GPR64 register(s). Use gp_offset. } else { llvm_unreachable("Unhandled argument type in LowerVAARG"); } if (ArgMode == 2) { // Sanity Check: Make sure using fp_offset makes sense. assert(!Subtarget.useSoftFloat() && !(MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) && Subtarget.hasSSE1()); } // Insert VAARG_64 node into the DAG // VAARG_64 returns two values: Variable Argument Address, Chain SDValue InstOps[] = {Chain, SrcPtr, DAG.getConstant(ArgSize, dl, MVT::i32), DAG.getConstant(ArgMode, dl, MVT::i8), DAG.getConstant(Align, dl, MVT::i32)}; SDVTList VTs = DAG.getVTList(getPointerTy(DAG.getDataLayout()), MVT::Other); SDValue VAARG = DAG.getMemIntrinsicNode( X86ISD::VAARG_64, dl, VTs, InstOps, MVT::i64, MachinePointerInfo(SV), /*Align=*/0, MachineMemOperand::MOLoad | MachineMemOperand::MOStore); Chain = VAARG.getValue(1); // Load the next argument and return it return DAG.getLoad(ArgVT, dl, Chain, VAARG, MachinePointerInfo()); } static SDValue LowerVACOPY(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { // X86-64 va_list is a struct { i32, i32, i8*, i8* }, except on Windows, // where a va_list is still an i8*. assert(Subtarget.is64Bit() && "This code only handles 64-bit va_copy!"); if (Subtarget.isCallingConvWin64( DAG.getMachineFunction().getFunction().getCallingConv())) // Probably a Win64 va_copy. return DAG.expandVACopy(Op.getNode()); SDValue Chain = Op.getOperand(0); SDValue DstPtr = Op.getOperand(1); SDValue SrcPtr = Op.getOperand(2); const Value *DstSV = cast(Op.getOperand(3))->getValue(); const Value *SrcSV = cast(Op.getOperand(4))->getValue(); SDLoc DL(Op); return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(24, DL), 8, /*isVolatile*/false, false, false, MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); } // Helper to get immediate/variable SSE shift opcode from other shift opcodes. static unsigned getTargetVShiftUniformOpcode(unsigned Opc, bool IsVariable) { switch (Opc) { case ISD::SHL: case X86ISD::VSHL: case X86ISD::VSHLI: return IsVariable ? X86ISD::VSHL : X86ISD::VSHLI; case ISD::SRL: case X86ISD::VSRL: case X86ISD::VSRLI: return IsVariable ? X86ISD::VSRL : X86ISD::VSRLI; case ISD::SRA: case X86ISD::VSRA: case X86ISD::VSRAI: return IsVariable ? X86ISD::VSRA : X86ISD::VSRAI; } llvm_unreachable("Unknown target vector shift node"); } /// Handle vector element shifts where the shift amount is a constant. /// Takes immediate version of shift as input. static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT, SDValue SrcOp, uint64_t ShiftAmt, SelectionDAG &DAG) { MVT ElementType = VT.getVectorElementType(); // Bitcast the source vector to the output type, this is mainly necessary for // vXi8/vXi64 shifts. if (VT != SrcOp.getSimpleValueType()) SrcOp = DAG.getBitcast(VT, SrcOp); // Fold this packed shift into its first operand if ShiftAmt is 0. if (ShiftAmt == 0) return SrcOp; // Check for ShiftAmt >= element width if (ShiftAmt >= ElementType.getSizeInBits()) { if (Opc == X86ISD::VSRAI) ShiftAmt = ElementType.getSizeInBits() - 1; else return DAG.getConstant(0, dl, VT); } assert((Opc == X86ISD::VSHLI || Opc == X86ISD::VSRLI || Opc == X86ISD::VSRAI) && "Unknown target vector shift-by-constant node"); // Fold this packed vector shift into a build vector if SrcOp is a // vector of Constants or UNDEFs. if (ISD::isBuildVectorOfConstantSDNodes(SrcOp.getNode())) { SmallVector Elts; unsigned NumElts = SrcOp->getNumOperands(); ConstantSDNode *ND; switch(Opc) { default: llvm_unreachable("Unknown opcode!"); case X86ISD::VSHLI: for (unsigned i=0; i!=NumElts; ++i) { SDValue CurrentOp = SrcOp->getOperand(i); if (CurrentOp->isUndef()) { Elts.push_back(CurrentOp); continue; } ND = cast(CurrentOp); const APInt &C = ND->getAPIntValue(); Elts.push_back(DAG.getConstant(C.shl(ShiftAmt), dl, ElementType)); } break; case X86ISD::VSRLI: for (unsigned i=0; i!=NumElts; ++i) { SDValue CurrentOp = SrcOp->getOperand(i); if (CurrentOp->isUndef()) { Elts.push_back(CurrentOp); continue; } ND = cast(CurrentOp); const APInt &C = ND->getAPIntValue(); Elts.push_back(DAG.getConstant(C.lshr(ShiftAmt), dl, ElementType)); } break; case X86ISD::VSRAI: for (unsigned i=0; i!=NumElts; ++i) { SDValue CurrentOp = SrcOp->getOperand(i); if (CurrentOp->isUndef()) { Elts.push_back(CurrentOp); continue; } ND = cast(CurrentOp); const APInt &C = ND->getAPIntValue(); Elts.push_back(DAG.getConstant(C.ashr(ShiftAmt), dl, ElementType)); } break; } return DAG.getBuildVector(VT, dl, Elts); } return DAG.getNode(Opc, dl, VT, SrcOp, DAG.getConstant(ShiftAmt, dl, MVT::i8)); } /// Handle vector element shifts where the shift amount may or may not be a /// constant. Takes immediate version of shift as input. static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT, SDValue SrcOp, SDValue ShAmt, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT SVT = ShAmt.getSimpleValueType(); assert((SVT == MVT::i32 || SVT == MVT::i64) && "Unexpected value type!"); // Catch shift-by-constant. if (ConstantSDNode *CShAmt = dyn_cast(ShAmt)) return getTargetVShiftByConstNode(Opc, dl, VT, SrcOp, CShAmt->getZExtValue(), DAG); // Change opcode to non-immediate version. Opc = getTargetVShiftUniformOpcode(Opc, true); // Need to build a vector containing shift amount. // SSE/AVX packed shifts only use the lower 64-bit of the shift count. // +====================+============+=======================================+ // | ShAmt is | HasSSE4.1? | Construct ShAmt vector as | // +====================+============+=======================================+ // | i64 | Yes, No | Use ShAmt as lowest elt | // | i32 | Yes | zero-extend in-reg | // | (i32 zext(i16/i8)) | Yes | zero-extend in-reg | // | (i32 zext(i16/i8)) | No | byte-shift-in-reg | // | i16/i32 | No | v4i32 build_vector(ShAmt, 0, ud, ud)) | // +====================+============+=======================================+ if (SVT == MVT::i64) ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v2i64, ShAmt); else if (ShAmt.getOpcode() == ISD::ZERO_EXTEND && ShAmt.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT && (ShAmt.getOperand(0).getSimpleValueType() == MVT::i16 || ShAmt.getOperand(0).getSimpleValueType() == MVT::i8)) { ShAmt = ShAmt.getOperand(0); MVT AmtTy = ShAmt.getSimpleValueType() == MVT::i8 ? MVT::v16i8 : MVT::v8i16; ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), AmtTy, ShAmt); if (Subtarget.hasSSE41()) ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt), MVT::v2i64, ShAmt); else { SDValue ByteShift = DAG.getConstant( (128 - AmtTy.getScalarSizeInBits()) / 8, SDLoc(ShAmt), MVT::i8); ShAmt = DAG.getBitcast(MVT::v16i8, ShAmt); ShAmt = DAG.getNode(X86ISD::VSHLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt, ByteShift); ShAmt = DAG.getNode(X86ISD::VSRLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt, ByteShift); } } else if (Subtarget.hasSSE41() && ShAmt.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v4i32, ShAmt); ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt), MVT::v2i64, ShAmt); } else { SDValue ShOps[4] = {ShAmt, DAG.getConstant(0, dl, SVT), DAG.getUNDEF(SVT), DAG.getUNDEF(SVT)}; ShAmt = DAG.getBuildVector(MVT::v4i32, dl, ShOps); } // The return type has to be a 128-bit type with the same element // type as the input type. MVT EltVT = VT.getVectorElementType(); MVT ShVT = MVT::getVectorVT(EltVT, 128 / EltVT.getSizeInBits()); ShAmt = DAG.getBitcast(ShVT, ShAmt); return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt); } /// Return Mask with the necessary casting or extending /// for \p Mask according to \p MaskVT when lowering masking intrinsics static SDValue getMaskNode(SDValue Mask, MVT MaskVT, const X86Subtarget &Subtarget, SelectionDAG &DAG, const SDLoc &dl) { if (isAllOnesConstant(Mask)) return DAG.getConstant(1, dl, MaskVT); if (X86::isZeroNode(Mask)) return DAG.getConstant(0, dl, MaskVT); assert(MaskVT.bitsLE(Mask.getSimpleValueType()) && "Unexpected mask size!"); if (Mask.getSimpleValueType() == MVT::i64 && Subtarget.is32Bit()) { assert(MaskVT == MVT::v64i1 && "Expected v64i1 mask!"); assert(Subtarget.hasBWI() && "Expected AVX512BW target!"); // In case 32bit mode, bitcast i64 is illegal, extend/split it. SDValue Lo, Hi; Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask, DAG.getConstant(0, dl, MVT::i32)); Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask, DAG.getConstant(1, dl, MVT::i32)); Lo = DAG.getBitcast(MVT::v32i1, Lo); Hi = DAG.getBitcast(MVT::v32i1, Hi); return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lo, Hi); } else { MVT BitcastVT = MVT::getVectorVT(MVT::i1, Mask.getSimpleValueType().getSizeInBits()); // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements // are extracted by EXTRACT_SUBVECTOR. return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, DAG.getBitcast(BitcastVT, Mask), DAG.getIntPtrConstant(0, dl)); } } /// Return (and \p Op, \p Mask) for compare instructions or /// (vselect \p Mask, \p Op, \p PreservedSrc) for others along with the /// necessary casting or extending for \p Mask when lowering masking intrinsics static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, SDValue PreservedSrc, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); unsigned OpcodeSelect = ISD::VSELECT; SDLoc dl(Op); if (isAllOnesConstant(Mask)) return Op; SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); if (PreservedSrc.isUndef()) PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl); return DAG.getNode(OpcodeSelect, dl, VT, VMask, Op, PreservedSrc); } /// Creates an SDNode for a predicated scalar operation. /// \returns (X86vselect \p Mask, \p Op, \p PreservedSrc). /// The mask is coming as MVT::i8 and it should be transformed /// to MVT::v1i1 while lowering masking intrinsics. /// The main difference between ScalarMaskingNode and VectorMaskingNode is using /// "X86select" instead of "vselect". We just can't create the "vselect" node /// for a scalar instruction. static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask, SDValue PreservedSrc, const X86Subtarget &Subtarget, SelectionDAG &DAG) { if (auto *MaskConst = dyn_cast(Mask)) if (MaskConst->getZExtValue() & 0x1) return Op; MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); assert(Mask.getValueType() == MVT::i8 && "Unexpect type"); SDValue IMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v1i1, DAG.getBitcast(MVT::v8i1, Mask), DAG.getIntPtrConstant(0, dl)); if (Op.getOpcode() == X86ISD::FSETCCM || Op.getOpcode() == X86ISD::FSETCCM_RND || Op.getOpcode() == X86ISD::VFPCLASSS) return DAG.getNode(ISD::AND, dl, VT, Op, IMask); if (PreservedSrc.isUndef()) PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl); return DAG.getNode(X86ISD::SELECTS, dl, VT, IMask, Op, PreservedSrc); } static int getSEHRegistrationNodeSize(const Function *Fn) { if (!Fn->hasPersonalityFn()) report_fatal_error( "querying registration node size for function without personality"); // The RegNodeSize is 6 32-bit words for SEH and 4 for C++ EH. See // WinEHStatePass for the full struct definition. switch (classifyEHPersonality(Fn->getPersonalityFn())) { case EHPersonality::MSVC_X86SEH: return 24; case EHPersonality::MSVC_CXX: return 16; default: break; } report_fatal_error( "can only recover FP for 32-bit MSVC EH personality functions"); } /// When the MSVC runtime transfers control to us, either to an outlined /// function or when returning to a parent frame after catching an exception, we /// recover the parent frame pointer by doing arithmetic on the incoming EBP. /// Here's the math: /// RegNodeBase = EntryEBP - RegNodeSize /// ParentFP = RegNodeBase - ParentFrameOffset /// Subtracting RegNodeSize takes us to the offset of the registration node, and /// subtracting the offset (negative on x86) takes us back to the parent FP. static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn, SDValue EntryEBP) { MachineFunction &MF = DAG.getMachineFunction(); SDLoc dl; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); // It's possible that the parent function no longer has a personality function // if the exceptional code was optimized away, in which case we just return // the incoming EBP. if (!Fn->hasPersonalityFn()) return EntryEBP; // Get an MCSymbol that will ultimately resolve to the frame offset of the EH // registration, or the .set_setframe offset. MCSymbol *OffsetSym = MF.getMMI().getContext().getOrCreateParentFrameOffsetSymbol( GlobalValue::dropLLVMManglingEscape(Fn->getName())); SDValue OffsetSymVal = DAG.getMCSymbol(OffsetSym, PtrVT); SDValue ParentFrameOffset = DAG.getNode(ISD::LOCAL_RECOVER, dl, PtrVT, OffsetSymVal); // Return EntryEBP + ParentFrameOffset for x64. This adjusts from RSP after // prologue to RBP in the parent function. const X86Subtarget &Subtarget = static_cast(DAG.getSubtarget()); if (Subtarget.is64Bit()) return DAG.getNode(ISD::ADD, dl, PtrVT, EntryEBP, ParentFrameOffset); int RegNodeSize = getSEHRegistrationNodeSize(Fn); // RegNodeBase = EntryEBP - RegNodeSize // ParentFP = RegNodeBase - ParentFrameOffset SDValue RegNodeBase = DAG.getNode(ISD::SUB, dl, PtrVT, EntryEBP, DAG.getConstant(RegNodeSize, dl, PtrVT)); return DAG.getNode(ISD::SUB, dl, PtrVT, RegNodeBase, ParentFrameOffset); } SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { // Helper to detect if the operand is CUR_DIRECTION rounding mode. auto isRoundModeCurDirection = [](SDValue Rnd) { if (!isa(Rnd)) return false; unsigned Round = cast(Rnd)->getZExtValue(); return Round == X86::STATIC_ROUNDING::CUR_DIRECTION; }; SDLoc dl(Op); unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); MVT VT = Op.getSimpleValueType(); const IntrinsicData* IntrData = getIntrinsicWithoutChain(IntNo); if (IntrData) { switch(IntrData->Type) { case INTR_TYPE_1OP: { // We specify 2 possible opcodes for intrinsics with rounding modes. // First, we check if the intrinsic may have non-default rounding mode, // (IntrData->Opc1 != 0), then we check the rounding mode operand. unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; if (IntrWithRoundingModeOpcode != 0) { SDValue Rnd = Op.getOperand(2); if (!isRoundModeCurDirection(Rnd)) { return DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), Op.getOperand(1), Rnd); } } return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1)); } case INTR_TYPE_2OP: { SDValue Src2 = Op.getOperand(2); // We specify 2 possible opcodes for intrinsics with rounding modes. // First, we check if the intrinsic may have non-default rounding mode, // (IntrData->Opc1 != 0), then we check the rounding mode operand. unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; if (IntrWithRoundingModeOpcode != 0) { SDValue Rnd = Op.getOperand(3); if (!isRoundModeCurDirection(Rnd)) { return DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), Op.getOperand(1), Src2, Rnd); } } return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1), Src2); } case INTR_TYPE_3OP: case INTR_TYPE_3OP_IMM8: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); SDValue Src3 = Op.getOperand(3); if (IntrData->Type == INTR_TYPE_3OP_IMM8) Src3 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src3); // We specify 2 possible opcodes for intrinsics with rounding modes. // First, we check if the intrinsic may have non-default rounding mode, // (IntrData->Opc1 != 0), then we check the rounding mode operand. unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; if (IntrWithRoundingModeOpcode != 0) { SDValue Rnd = Op.getOperand(4); if (!isRoundModeCurDirection(Rnd)) { return DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), Src1, Src2, Src3, Rnd); } } return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src1, Src2, Src3); } case INTR_TYPE_4OP: return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3), Op.getOperand(4)); case INTR_TYPE_1OP_MASK_RM: { SDValue Src = Op.getOperand(1); SDValue PassThru = Op.getOperand(2); SDValue Mask = Op.getOperand(3); SDValue RoundingMode; // We always add rounding mode to the Node. // If the rounding mode is not specified, we add the // "current direction" mode. if (Op.getNumOperands() == 4) RoundingMode = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32); else RoundingMode = Op.getOperand(4); assert(IntrData->Opc1 == 0 && "Unexpected second opcode!"); return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src, RoundingMode), Mask, PassThru, Subtarget, DAG); } case INTR_TYPE_1OP_MASK: { SDValue Src = Op.getOperand(1); SDValue PassThru = Op.getOperand(2); SDValue Mask = Op.getOperand(3); // We add rounding mode to the Node when // - RM Opcode is specified and // - RM is not "current direction". unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; if (IntrWithRoundingModeOpcode != 0) { SDValue Rnd = Op.getOperand(4); if (!isRoundModeCurDirection(Rnd)) { return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), Src, Rnd), Mask, PassThru, Subtarget, DAG); } } return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src), Mask, PassThru, Subtarget, DAG); } case INTR_TYPE_SCALAR_MASK: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); SDValue passThru = Op.getOperand(3); SDValue Mask = Op.getOperand(4); unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; // There are 2 kinds of intrinsics in this group: // (1) With suppress-all-exceptions (sae) or rounding mode- 6 operands // (2) With rounding mode and sae - 7 operands. bool HasRounding = IntrWithRoundingModeOpcode != 0; if (Op.getNumOperands() == (5U + HasRounding)) { if (HasRounding) { SDValue Rnd = Op.getOperand(5); if (!isRoundModeCurDirection(Rnd)) return getScalarMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, dl, VT, Src1, Src2, Rnd), Mask, passThru, Subtarget, DAG); } return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2), Mask, passThru, Subtarget, DAG); } assert(Op.getNumOperands() == (6U + HasRounding) && "Unexpected intrinsic form"); SDValue RoundingMode = Op.getOperand(5); if (HasRounding) { SDValue Sae = Op.getOperand(6); if (!isRoundModeCurDirection(Sae)) return getScalarMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, dl, VT, Src1, Src2, RoundingMode, Sae), Mask, passThru, Subtarget, DAG); } return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2, RoundingMode), Mask, passThru, Subtarget, DAG); } case INTR_TYPE_SCALAR_MASK_RM: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); SDValue Src0 = Op.getOperand(3); SDValue Mask = Op.getOperand(4); // There are 2 kinds of intrinsics in this group: // (1) With suppress-all-exceptions (sae) or rounding mode- 6 operands // (2) With rounding mode and sae - 7 operands. if (Op.getNumOperands() == 6) { SDValue Sae = Op.getOperand(5); return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2, Sae), Mask, Src0, Subtarget, DAG); } assert(Op.getNumOperands() == 7 && "Unexpected intrinsic form"); SDValue RoundingMode = Op.getOperand(5); SDValue Sae = Op.getOperand(6); return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2, RoundingMode, Sae), Mask, Src0, Subtarget, DAG); } case INTR_TYPE_2OP_MASK: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); SDValue PassThru = Op.getOperand(3); SDValue Mask = Op.getOperand(4); // We specify 2 possible opcodes for intrinsics with rounding modes. // First, we check if the intrinsic may have non-default rounding mode, // (IntrData->Opc1 != 0), then we check the rounding mode operand. unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; if (IntrWithRoundingModeOpcode != 0) { SDValue Rnd = Op.getOperand(5); if (!isRoundModeCurDirection(Rnd)) { return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), Src1, Src2, Rnd), Mask, PassThru, Subtarget, DAG); } } // TODO: Intrinsics should have fast-math-flags to propagate. return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,Src1,Src2), Mask, PassThru, Subtarget, DAG); } case INTR_TYPE_2OP_MASK_RM: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); SDValue PassThru = Op.getOperand(3); SDValue Mask = Op.getOperand(4); // We specify 2 possible modes for intrinsics, with/without rounding // modes. // First, we check if the intrinsic have rounding mode (6 operands), // if not, we set rounding mode to "current". SDValue Rnd; if (Op.getNumOperands() == 6) Rnd = Op.getOperand(5); else Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32); return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2, Rnd), Mask, PassThru, Subtarget, DAG); } case INTR_TYPE_3OP_SCALAR_MASK: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); SDValue Src3 = Op.getOperand(3); SDValue PassThru = Op.getOperand(4); SDValue Mask = Op.getOperand(5); unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; if (IntrWithRoundingModeOpcode != 0) { SDValue Rnd = Op.getOperand(6); if (!isRoundModeCurDirection(Rnd)) return getScalarMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, dl, VT, Src1, Src2, Src3, Rnd), Mask, PassThru, Subtarget, DAG); } return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2, Src3), Mask, PassThru, Subtarget, DAG); } case INTR_TYPE_3OP_MASK: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); SDValue Src3 = Op.getOperand(3); SDValue PassThru = Op.getOperand(4); SDValue Mask = Op.getOperand(5); // We specify 2 possible opcodes for intrinsics with rounding modes. // First, we check if the intrinsic may have non-default rounding mode, // (IntrData->Opc1 != 0), then we check the rounding mode operand. unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; if (IntrWithRoundingModeOpcode != 0) { SDValue Rnd = Op.getOperand(6); if (!isRoundModeCurDirection(Rnd)) { return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), Src1, Src2, Src3, Rnd), Mask, PassThru, Subtarget, DAG); } } return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2, Src3), Mask, PassThru, Subtarget, DAG); } case VPERM_2OP : { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); // Swap Src1 and Src2 in the node creation return DAG.getNode(IntrData->Opc0, dl, VT,Src2, Src1); } case IFMA_OP: // NOTE: We need to swizzle the operands to pass the multiply operands // first. return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); case CVTPD2PS: // ISD::FP_ROUND has a second argument that indicates if the truncation // does not change the value. Set it to 0 since it can change. return DAG.getNode(IntrData->Opc0, dl, VT, Op.getOperand(1), DAG.getIntPtrConstant(0, dl)); case CVTPD2PS_RND_MASK: { SDValue Src = Op.getOperand(1); SDValue PassThru = Op.getOperand(2); SDValue Mask = Op.getOperand(3); // We add rounding mode to the Node when // - RM Opcode is specified and // - RM is not "current direction". unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; if (IntrWithRoundingModeOpcode != 0) { SDValue Rnd = Op.getOperand(4); if (!isRoundModeCurDirection(Rnd)) { return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), Src, Rnd), Mask, PassThru, Subtarget, DAG); } } assert(IntrData->Opc0 == ISD::FP_ROUND && "Unexpected opcode!"); // ISD::FP_ROUND has a second argument that indicates if the truncation // does not change the value. Set it to 0 since it can change. return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src, DAG.getIntPtrConstant(0, dl)), Mask, PassThru, Subtarget, DAG); } case FPCLASSS: { SDValue Src1 = Op.getOperand(1); SDValue Imm = Op.getOperand(2); SDValue Mask = Op.getOperand(3); SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Imm); SDValue FPclassMask = getScalarMaskingNode(FPclass, Mask, SDValue(), Subtarget, DAG); // Need to fill with zeros to ensure the bitcast will produce zeroes // for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee that. SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8i1, DAG.getConstant(0, dl, MVT::v8i1), FPclassMask, DAG.getIntPtrConstant(0, dl)); return DAG.getBitcast(MVT::i8, Ins); } case CMP_MASK_CC: { MVT MaskVT = Op.getSimpleValueType(); SDValue Cmp; SDValue CC = Op.getOperand(3); CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, CC); // We specify 2 possible opcodes for intrinsics with rounding modes. // First, we check if the intrinsic may have non-default rounding mode, // (IntrData->Opc1 != 0), then we check the rounding mode operand. if (IntrData->Opc1 != 0) { SDValue Rnd = Op.getOperand(4); if (!isRoundModeCurDirection(Rnd)) Cmp = DAG.getNode(IntrData->Opc1, dl, MaskVT, Op.getOperand(1), Op.getOperand(2), CC, Rnd); } //default rounding mode if (!Cmp.getNode()) Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1), Op.getOperand(2), CC); return Cmp; } case CMP_MASK_SCALAR_CC: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); SDValue CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(3)); SDValue Mask = Op.getOperand(4); SDValue Cmp; if (IntrData->Opc1 != 0) { SDValue Rnd = Op.getOperand(5); if (!isRoundModeCurDirection(Rnd)) Cmp = DAG.getNode(IntrData->Opc1, dl, MVT::v1i1, Src1, Src2, CC, Rnd); } //default rounding mode if(!Cmp.getNode()) Cmp = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Src2, CC); SDValue CmpMask = getScalarMaskingNode(Cmp, Mask, SDValue(), Subtarget, DAG); // Need to fill with zeros to ensure the bitcast will produce zeroes // for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee that. SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8i1, DAG.getConstant(0, dl, MVT::v8i1), CmpMask, DAG.getIntPtrConstant(0, dl)); return DAG.getBitcast(MVT::i8, Ins); } case COMI: { // Comparison intrinsics ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1; SDValue LHS = Op.getOperand(1); SDValue RHS = Op.getOperand(2); SDValue Comi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS); SDValue InvComi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, RHS, LHS); SDValue SetCC; switch (CC) { case ISD::SETEQ: { // (ZF = 0 and PF = 0) SetCC = getSETCC(X86::COND_E, Comi, dl, DAG); SDValue SetNP = getSETCC(X86::COND_NP, Comi, dl, DAG); SetCC = DAG.getNode(ISD::AND, dl, MVT::i8, SetCC, SetNP); break; } case ISD::SETNE: { // (ZF = 1 or PF = 1) SetCC = getSETCC(X86::COND_NE, Comi, dl, DAG); SDValue SetP = getSETCC(X86::COND_P, Comi, dl, DAG); SetCC = DAG.getNode(ISD::OR, dl, MVT::i8, SetCC, SetP); break; } case ISD::SETGT: // (CF = 0 and ZF = 0) SetCC = getSETCC(X86::COND_A, Comi, dl, DAG); break; case ISD::SETLT: { // The condition is opposite to GT. Swap the operands. SetCC = getSETCC(X86::COND_A, InvComi, dl, DAG); break; } case ISD::SETGE: // CF = 0 SetCC = getSETCC(X86::COND_AE, Comi, dl, DAG); break; case ISD::SETLE: // The condition is opposite to GE. Swap the operands. SetCC = getSETCC(X86::COND_AE, InvComi, dl, DAG); break; default: llvm_unreachable("Unexpected illegal condition!"); } return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } case COMI_RM: { // Comparison intrinsics with Sae SDValue LHS = Op.getOperand(1); SDValue RHS = Op.getOperand(2); unsigned CondVal = cast(Op.getOperand(3))->getZExtValue(); SDValue Sae = Op.getOperand(4); SDValue FCmp; if (isRoundModeCurDirection(Sae)) FCmp = DAG.getNode(X86ISD::FSETCCM, dl, MVT::v1i1, LHS, RHS, DAG.getConstant(CondVal, dl, MVT::i8)); else FCmp = DAG.getNode(X86ISD::FSETCCM_RND, dl, MVT::v1i1, LHS, RHS, DAG.getConstant(CondVal, dl, MVT::i8), Sae); // Need to fill with zeros to ensure the bitcast will produce zeroes // for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee that. SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1, DAG.getConstant(0, dl, MVT::v16i1), FCmp, DAG.getIntPtrConstant(0, dl)); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, DAG.getBitcast(MVT::i16, Ins)); } case VSHIFT: return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(), Op.getOperand(1), Op.getOperand(2), Subtarget, DAG); case COMPRESS_EXPAND_IN_REG: { SDValue Mask = Op.getOperand(3); SDValue DataToCompress = Op.getOperand(1); SDValue PassThru = Op.getOperand(2); if (isAllOnesConstant(Mask)) // return data as is return Op.getOperand(1); return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, DataToCompress), Mask, PassThru, Subtarget, DAG); } case FIXUPIMMS: case FIXUPIMMS_MASKZ: case FIXUPIMM: case FIXUPIMM_MASKZ:{ SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); SDValue Src3 = Op.getOperand(3); SDValue Imm = Op.getOperand(4); SDValue Mask = Op.getOperand(5); SDValue Passthru = (IntrData->Type == FIXUPIMM || IntrData->Type == FIXUPIMMS ) ? Src1 : getZeroVector(VT, Subtarget, DAG, dl); // We specify 2 possible modes for intrinsics, with/without rounding // modes. // First, we check if the intrinsic have rounding mode (7 operands), // if not, we set rounding mode to "current". SDValue Rnd; if (Op.getNumOperands() == 7) Rnd = Op.getOperand(6); else Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32); if (IntrData->Type == FIXUPIMM || IntrData->Type == FIXUPIMM_MASKZ) return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2, Src3, Imm, Rnd), Mask, Passthru, Subtarget, DAG); else // Scalar - FIXUPIMMS, FIXUPIMMS_MASKZ return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2, Src3, Imm, Rnd), Mask, Passthru, Subtarget, DAG); } case ROUNDP: { assert(IntrData->Opc0 == X86ISD::VRNDSCALE && "Unexpected opcode"); // Clear the upper bits of the rounding immediate so that the legacy // intrinsic can't trigger the scaling behavior of VRNDSCALE. SDValue RoundingMode = DAG.getNode(ISD::AND, dl, MVT::i32, Op.getOperand(2), DAG.getConstant(0xf, dl, MVT::i32)); return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1), RoundingMode); } case ROUNDS: { assert(IntrData->Opc0 == X86ISD::VRNDSCALES && "Unexpected opcode"); // Clear the upper bits of the rounding immediate so that the legacy // intrinsic can't trigger the scaling behavior of VRNDSCALE. SDValue RoundingMode = DAG.getNode(ISD::AND, dl, MVT::i32, Op.getOperand(3), DAG.getConstant(0xf, dl, MVT::i32)); return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2), RoundingMode); } // ADC/ADCX/SBB case ADX: { SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::i32); SDVTList VTs = DAG.getVTList(Op.getOperand(2).getValueType(), MVT::i32); SDValue Res; // If the carry in is zero, then we should just use ADD/SUB instead of // ADC/SBB. if (isNullConstant(Op.getOperand(1))) { Res = DAG.getNode(IntrData->Opc1, dl, VTs, Op.getOperand(2), Op.getOperand(3)); } else { SDValue GenCF = DAG.getNode(X86ISD::ADD, dl, CFVTs, Op.getOperand(1), DAG.getConstant(-1, dl, MVT::i8)); Res = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(2), Op.getOperand(3), GenCF.getValue(1)); } SDValue SetCC = getSETCC(X86::COND_B, Res.getValue(1), dl, DAG); SDValue Results[] = { SetCC, Res }; return DAG.getMergeValues(Results, dl); } case CVTPD2PS_MASK: case CVTPD2I_MASK: case TRUNCATE_TO_REG: { SDValue Src = Op.getOperand(1); SDValue PassThru = Op.getOperand(2); SDValue Mask = Op.getOperand(3); if (isAllOnesConstant(Mask)) return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src); MVT SrcVT = Src.getSimpleValueType(); MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements()); Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), Src, PassThru, Mask); } case CVTPS2PH_MASK: { SDValue Src = Op.getOperand(1); SDValue Rnd = Op.getOperand(2); SDValue PassThru = Op.getOperand(3); SDValue Mask = Op.getOperand(4); if (isAllOnesConstant(Mask)) return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src, Rnd); MVT SrcVT = Src.getSimpleValueType(); MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements()); Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), Src, Rnd, PassThru, Mask); } default: break; } } switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. // ptest and testp intrinsics. The intrinsic these come from are designed to // return an integer value, not just an instruction so lower it to the ptest // or testp pattern and a setcc for the result. case Intrinsic::x86_avx512_ktestc_b: case Intrinsic::x86_avx512_ktestc_w: case Intrinsic::x86_avx512_ktestc_d: case Intrinsic::x86_avx512_ktestc_q: case Intrinsic::x86_avx512_ktestz_b: case Intrinsic::x86_avx512_ktestz_w: case Intrinsic::x86_avx512_ktestz_d: case Intrinsic::x86_avx512_ktestz_q: case Intrinsic::x86_sse41_ptestz: case Intrinsic::x86_sse41_ptestc: case Intrinsic::x86_sse41_ptestnzc: case Intrinsic::x86_avx_ptestz_256: case Intrinsic::x86_avx_ptestc_256: case Intrinsic::x86_avx_ptestnzc_256: case Intrinsic::x86_avx_vtestz_ps: case Intrinsic::x86_avx_vtestc_ps: case Intrinsic::x86_avx_vtestnzc_ps: case Intrinsic::x86_avx_vtestz_pd: case Intrinsic::x86_avx_vtestc_pd: case Intrinsic::x86_avx_vtestnzc_pd: case Intrinsic::x86_avx_vtestz_ps_256: case Intrinsic::x86_avx_vtestc_ps_256: case Intrinsic::x86_avx_vtestnzc_ps_256: case Intrinsic::x86_avx_vtestz_pd_256: case Intrinsic::x86_avx_vtestc_pd_256: case Intrinsic::x86_avx_vtestnzc_pd_256: { unsigned TestOpc = X86ISD::PTEST; X86::CondCode X86CC; switch (IntNo) { default: llvm_unreachable("Bad fallthrough in Intrinsic lowering."); case Intrinsic::x86_avx512_ktestc_b: case Intrinsic::x86_avx512_ktestc_w: case Intrinsic::x86_avx512_ktestc_d: case Intrinsic::x86_avx512_ktestc_q: // CF = 1 TestOpc = X86ISD::KTEST; X86CC = X86::COND_B; break; case Intrinsic::x86_avx512_ktestz_b: case Intrinsic::x86_avx512_ktestz_w: case Intrinsic::x86_avx512_ktestz_d: case Intrinsic::x86_avx512_ktestz_q: TestOpc = X86ISD::KTEST; X86CC = X86::COND_E; break; case Intrinsic::x86_avx_vtestz_ps: case Intrinsic::x86_avx_vtestz_pd: case Intrinsic::x86_avx_vtestz_ps_256: case Intrinsic::x86_avx_vtestz_pd_256: TestOpc = X86ISD::TESTP; LLVM_FALLTHROUGH; case Intrinsic::x86_sse41_ptestz: case Intrinsic::x86_avx_ptestz_256: // ZF = 1 X86CC = X86::COND_E; break; case Intrinsic::x86_avx_vtestc_ps: case Intrinsic::x86_avx_vtestc_pd: case Intrinsic::x86_avx_vtestc_ps_256: case Intrinsic::x86_avx_vtestc_pd_256: TestOpc = X86ISD::TESTP; LLVM_FALLTHROUGH; case Intrinsic::x86_sse41_ptestc: case Intrinsic::x86_avx_ptestc_256: // CF = 1 X86CC = X86::COND_B; break; case Intrinsic::x86_avx_vtestnzc_ps: case Intrinsic::x86_avx_vtestnzc_pd: case Intrinsic::x86_avx_vtestnzc_ps_256: case Intrinsic::x86_avx_vtestnzc_pd_256: TestOpc = X86ISD::TESTP; LLVM_FALLTHROUGH; case Intrinsic::x86_sse41_ptestnzc: case Intrinsic::x86_avx_ptestnzc_256: // ZF and CF = 0 X86CC = X86::COND_A; break; } SDValue LHS = Op.getOperand(1); SDValue RHS = Op.getOperand(2); SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS); SDValue SetCC = getSETCC(X86CC, Test, dl, DAG); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } case Intrinsic::x86_sse42_pcmpistria128: case Intrinsic::x86_sse42_pcmpestria128: case Intrinsic::x86_sse42_pcmpistric128: case Intrinsic::x86_sse42_pcmpestric128: case Intrinsic::x86_sse42_pcmpistrio128: case Intrinsic::x86_sse42_pcmpestrio128: case Intrinsic::x86_sse42_pcmpistris128: case Intrinsic::x86_sse42_pcmpestris128: case Intrinsic::x86_sse42_pcmpistriz128: case Intrinsic::x86_sse42_pcmpestriz128: { unsigned Opcode; X86::CondCode X86CC; switch (IntNo) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. case Intrinsic::x86_sse42_pcmpistria128: Opcode = X86ISD::PCMPISTR; X86CC = X86::COND_A; break; case Intrinsic::x86_sse42_pcmpestria128: Opcode = X86ISD::PCMPESTR; X86CC = X86::COND_A; break; case Intrinsic::x86_sse42_pcmpistric128: Opcode = X86ISD::PCMPISTR; X86CC = X86::COND_B; break; case Intrinsic::x86_sse42_pcmpestric128: Opcode = X86ISD::PCMPESTR; X86CC = X86::COND_B; break; case Intrinsic::x86_sse42_pcmpistrio128: Opcode = X86ISD::PCMPISTR; X86CC = X86::COND_O; break; case Intrinsic::x86_sse42_pcmpestrio128: Opcode = X86ISD::PCMPESTR; X86CC = X86::COND_O; break; case Intrinsic::x86_sse42_pcmpistris128: Opcode = X86ISD::PCMPISTR; X86CC = X86::COND_S; break; case Intrinsic::x86_sse42_pcmpestris128: Opcode = X86ISD::PCMPESTR; X86CC = X86::COND_S; break; case Intrinsic::x86_sse42_pcmpistriz128: Opcode = X86ISD::PCMPISTR; X86CC = X86::COND_E; break; case Intrinsic::x86_sse42_pcmpestriz128: Opcode = X86ISD::PCMPESTR; X86CC = X86::COND_E; break; } SmallVector NewOps(Op->op_begin()+1, Op->op_end()); SDVTList VTs = DAG.getVTList(MVT::i32, MVT::v16i8, MVT::i32); SDValue PCMP = DAG.getNode(Opcode, dl, VTs, NewOps).getValue(2); SDValue SetCC = getSETCC(X86CC, PCMP, dl, DAG); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } case Intrinsic::x86_sse42_pcmpistri128: case Intrinsic::x86_sse42_pcmpestri128: { unsigned Opcode; if (IntNo == Intrinsic::x86_sse42_pcmpistri128) Opcode = X86ISD::PCMPISTR; else Opcode = X86ISD::PCMPESTR; SmallVector NewOps(Op->op_begin()+1, Op->op_end()); SDVTList VTs = DAG.getVTList(MVT::i32, MVT::v16i8, MVT::i32); return DAG.getNode(Opcode, dl, VTs, NewOps); } case Intrinsic::x86_sse42_pcmpistrm128: case Intrinsic::x86_sse42_pcmpestrm128: { unsigned Opcode; if (IntNo == Intrinsic::x86_sse42_pcmpistrm128) Opcode = X86ISD::PCMPISTR; else Opcode = X86ISD::PCMPESTR; SmallVector NewOps(Op->op_begin()+1, Op->op_end()); SDVTList VTs = DAG.getVTList(MVT::i32, MVT::v16i8, MVT::i32); return DAG.getNode(Opcode, dl, VTs, NewOps).getValue(1); } case Intrinsic::eh_sjlj_lsda: { MachineFunction &MF = DAG.getMachineFunction(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); auto &Context = MF.getMMI().getContext(); MCSymbol *S = Context.getOrCreateSymbol(Twine("GCC_except_table") + Twine(MF.getFunctionNumber())); return DAG.getNode(getGlobalWrapperKind(), dl, VT, DAG.getMCSymbol(S, PtrVT)); } case Intrinsic::x86_seh_lsda: { // Compute the symbol for the LSDA. We know it'll get emitted later. MachineFunction &MF = DAG.getMachineFunction(); SDValue Op1 = Op.getOperand(1); auto *Fn = cast(cast(Op1)->getGlobal()); MCSymbol *LSDASym = MF.getMMI().getContext().getOrCreateLSDASymbol( GlobalValue::dropLLVMManglingEscape(Fn->getName())); // Generate a simple absolute symbol reference. This intrinsic is only // supported on 32-bit Windows, which isn't PIC. SDValue Result = DAG.getMCSymbol(LSDASym, VT); return DAG.getNode(X86ISD::Wrapper, dl, VT, Result); } case Intrinsic::eh_recoverfp: { SDValue FnOp = Op.getOperand(1); SDValue IncomingFPOp = Op.getOperand(2); GlobalAddressSDNode *GSD = dyn_cast(FnOp); auto *Fn = dyn_cast_or_null(GSD ? GSD->getGlobal() : nullptr); if (!Fn) report_fatal_error( "llvm.eh.recoverfp must take a function as the first argument"); return recoverFramePointer(DAG, Fn, IncomingFPOp); } case Intrinsic::localaddress: { // Returns one of the stack, base, or frame pointer registers, depending on // which is used to reference local variables. MachineFunction &MF = DAG.getMachineFunction(); const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); unsigned Reg; if (RegInfo->hasBasePointer(MF)) Reg = RegInfo->getBaseRegister(); else // This function handles the SP or FP case. Reg = RegInfo->getPtrSizedFrameRegister(MF); return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); } } } static SDValue getAVX2GatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Src, SDValue Mask, SDValue Base, SDValue Index, SDValue ScaleOp, SDValue Chain, const X86Subtarget &Subtarget) { SDLoc dl(Op); auto *C = dyn_cast(ScaleOp); // Scale must be constant. if (!C) return SDValue(); SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); EVT MaskVT = Mask.getValueType(); SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other); SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32); SDValue Segment = DAG.getRegister(0, MVT::i32); // If source is undef or we know it won't be used, use a zero vector // to break register dependency. // TODO: use undef instead and let BreakFalseDeps deal with it? if (Src.isUndef() || ISD::isBuildVectorAllOnes(Mask.getNode())) Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl); SDValue Ops[] = {Src, Base, Scale, Index, Disp, Segment, Mask, Chain}; SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops); SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) }; return DAG.getMergeValues(RetOps, dl); } static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Src, SDValue Mask, SDValue Base, SDValue Index, SDValue ScaleOp, SDValue Chain, const X86Subtarget &Subtarget) { MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); auto *C = dyn_cast(ScaleOp); // Scale must be constant. if (!C) return SDValue(); SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); unsigned MinElts = std::min(Index.getSimpleValueType().getVectorNumElements(), VT.getVectorNumElements()); MVT MaskVT = MVT::getVectorVT(MVT::i1, MinElts); // We support two versions of the gather intrinsics. One with scalar mask and // one with vXi1 mask. Convert scalar to vXi1 if necessary. if (Mask.getValueType() != MaskVT) Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other); SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32); SDValue Segment = DAG.getRegister(0, MVT::i32); // If source is undef or we know it won't be used, use a zero vector // to break register dependency. // TODO: use undef instead and let BreakFalseDeps deal with it? if (Src.isUndef() || ISD::isBuildVectorAllOnes(Mask.getNode())) Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl); SDValue Ops[] = {Src, Mask, Base, Scale, Index, Disp, Segment, Chain}; SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops); SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) }; return DAG.getMergeValues(RetOps, dl); } static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Src, SDValue Mask, SDValue Base, SDValue Index, SDValue ScaleOp, SDValue Chain, const X86Subtarget &Subtarget) { SDLoc dl(Op); auto *C = dyn_cast(ScaleOp); // Scale must be constant. if (!C) return SDValue(); SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32); SDValue Segment = DAG.getRegister(0, MVT::i32); unsigned MinElts = std::min(Index.getSimpleValueType().getVectorNumElements(), Src.getSimpleValueType().getVectorNumElements()); MVT MaskVT = MVT::getVectorVT(MVT::i1, MinElts); // We support two versions of the scatter intrinsics. One with scalar mask and // one with vXi1 mask. Convert scalar to vXi1 if necessary. if (Mask.getValueType() != MaskVT) Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other); SDValue Ops[] = {Base, Scale, Index, Disp, Segment, Mask, Src, Chain}; SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops); return SDValue(Res, 1); } static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Mask, SDValue Base, SDValue Index, SDValue ScaleOp, SDValue Chain, const X86Subtarget &Subtarget) { SDLoc dl(Op); auto *C = dyn_cast(ScaleOp); // Scale must be constant. if (!C) return SDValue(); SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32); SDValue Segment = DAG.getRegister(0, MVT::i32); MVT MaskVT = MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements()); SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); SDValue Ops[] = {VMask, Base, Scale, Index, Disp, Segment, Chain}; SDNode *Res = DAG.getMachineNode(Opc, dl, MVT::Other, Ops); return SDValue(Res, 0); } /// Handles the lowering of builtin intrinsic that return the value /// of the extended control register. static void getExtendedControlRegister(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget, SmallVectorImpl &Results) { assert(N->getNumOperands() == 3 && "Unexpected number of operands!"); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue LO, HI; // The ECX register is used to select the index of the XCR register to // return. SDValue Chain = DAG.getCopyToReg(N->getOperand(0), DL, X86::ECX, N->getOperand(2)); SDNode *N1 = DAG.getMachineNode(X86::XGETBV, DL, Tys, Chain); Chain = SDValue(N1, 0); // Reads the content of XCR and returns it in registers EDX:EAX. if (Subtarget.is64Bit()) { LO = DAG.getCopyFromReg(Chain, DL, X86::RAX, MVT::i64, SDValue(N1, 1)); HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64, LO.getValue(2)); } else { LO = DAG.getCopyFromReg(Chain, DL, X86::EAX, MVT::i32, SDValue(N1, 1)); HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::EDX, MVT::i32, LO.getValue(2)); } Chain = HI.getValue(1); if (Subtarget.is64Bit()) { // Merge the two 32-bit values into a 64-bit one.. SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI, DAG.getConstant(32, DL, MVT::i8)); Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp)); Results.push_back(Chain); return; } // Use a buildpair to merge the two 32-bit values into a 64-bit one. SDValue Ops[] = { LO, HI }; SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops); Results.push_back(Pair); Results.push_back(Chain); } /// Handles the lowering of builtin intrinsics that read performance monitor /// counters (x86_rdpmc). static void getReadPerformanceCounter(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget, SmallVectorImpl &Results) { assert(N->getNumOperands() == 3 && "Unexpected number of operands!"); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue LO, HI; // The ECX register is used to select the index of the performance counter // to read. SDValue Chain = DAG.getCopyToReg(N->getOperand(0), DL, X86::ECX, N->getOperand(2)); SDValue rd = DAG.getNode(X86ISD::RDPMC_DAG, DL, Tys, Chain); // Reads the content of a 64-bit performance counter and returns it in the // registers EDX:EAX. if (Subtarget.is64Bit()) { LO = DAG.getCopyFromReg(rd, DL, X86::RAX, MVT::i64, rd.getValue(1)); HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64, LO.getValue(2)); } else { LO = DAG.getCopyFromReg(rd, DL, X86::EAX, MVT::i32, rd.getValue(1)); HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::EDX, MVT::i32, LO.getValue(2)); } Chain = HI.getValue(1); if (Subtarget.is64Bit()) { // The EAX register is loaded with the low-order 32 bits. The EDX register // is loaded with the supported high-order bits of the counter. SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI, DAG.getConstant(32, DL, MVT::i8)); Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp)); Results.push_back(Chain); return; } // Use a buildpair to merge the two 32-bit values into a 64-bit one. SDValue Ops[] = { LO, HI }; SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops); Results.push_back(Pair); Results.push_back(Chain); } /// Handles the lowering of builtin intrinsics that read the time stamp counter /// (x86_rdtsc and x86_rdtscp). This function is also used to custom lower /// READCYCLECOUNTER nodes. static void getReadTimeStampCounter(SDNode *N, const SDLoc &DL, unsigned Opcode, SelectionDAG &DAG, const X86Subtarget &Subtarget, SmallVectorImpl &Results) { SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue rd = DAG.getNode(Opcode, DL, Tys, N->getOperand(0)); SDValue LO, HI; // The processor's time-stamp counter (a 64-bit MSR) is stored into the // EDX:EAX registers. EDX is loaded with the high-order 32 bits of the MSR // and the EAX register is loaded with the low-order 32 bits. if (Subtarget.is64Bit()) { LO = DAG.getCopyFromReg(rd, DL, X86::RAX, MVT::i64, rd.getValue(1)); HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64, LO.getValue(2)); } else { LO = DAG.getCopyFromReg(rd, DL, X86::EAX, MVT::i32, rd.getValue(1)); HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::EDX, MVT::i32, LO.getValue(2)); } SDValue Chain = HI.getValue(1); SDValue TSC; if (Subtarget.is64Bit()) { // The EDX register is loaded with the high-order 32 bits of the MSR, and // the EAX register is loaded with the low-order 32 bits. TSC = DAG.getNode(ISD::SHL, DL, MVT::i64, HI, DAG.getConstant(32, DL, MVT::i8)); TSC = DAG.getNode(ISD::OR, DL, MVT::i64, LO, TSC); } else { // Use a buildpair to merge the two 32-bit values into a 64-bit one. TSC = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, { LO, HI }); } if (Opcode == X86ISD::RDTSCP_DAG) { assert(N->getNumOperands() == 2 && "Unexpected number of operands!"); // Instruction RDTSCP loads the IA32:TSC_AUX_MSR (address C000_0103H) into // the ECX register. Add 'ecx' explicitly to the chain. SDValue ecx = DAG.getCopyFromReg(Chain, DL, X86::ECX, MVT::i32, HI.getValue(2)); Results.push_back(TSC); Results.push_back(ecx); Results.push_back(ecx.getValue(1)); return; } Results.push_back(TSC); Results.push_back(Chain); } static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { SmallVector Results; SDLoc DL(Op); getReadTimeStampCounter(Op.getNode(), DL, X86ISD::RDTSC_DAG, DAG, Subtarget, Results); return DAG.getMergeValues(Results, DL); } static SDValue MarkEHRegistrationNode(SDValue Op, SelectionDAG &DAG) { MachineFunction &MF = DAG.getMachineFunction(); SDValue Chain = Op.getOperand(0); SDValue RegNode = Op.getOperand(2); WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo(); if (!EHInfo) report_fatal_error("EH registrations only live in functions using WinEH"); // Cast the operand to an alloca, and remember the frame index. auto *FINode = dyn_cast(RegNode); if (!FINode) report_fatal_error("llvm.x86.seh.ehregnode expects a static alloca"); EHInfo->EHRegNodeFrameIndex = FINode->getIndex(); // Return the chain operand without making any DAG nodes. return Chain; } static SDValue MarkEHGuard(SDValue Op, SelectionDAG &DAG) { MachineFunction &MF = DAG.getMachineFunction(); SDValue Chain = Op.getOperand(0); SDValue EHGuard = Op.getOperand(2); WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo(); if (!EHInfo) report_fatal_error("EHGuard only live in functions using WinEH"); // Cast the operand to an alloca, and remember the frame index. auto *FINode = dyn_cast(EHGuard); if (!FINode) report_fatal_error("llvm.x86.seh.ehguard expects a static alloca"); EHInfo->EHGuardFrameIndex = FINode->getIndex(); // Return the chain operand without making any DAG nodes. return Chain; } /// Emit Truncating Store with signed or unsigned saturation. static SDValue EmitTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl, SDValue Val, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO, SelectionDAG &DAG) { SDVTList VTs = DAG.getVTList(MVT::Other); SDValue Undef = DAG.getUNDEF(Ptr.getValueType()); SDValue Ops[] = { Chain, Val, Ptr, Undef }; return SignedSat ? DAG.getTargetMemSDNode(VTs, Ops, Dl, MemVT, MMO) : DAG.getTargetMemSDNode(VTs, Ops, Dl, MemVT, MMO); } /// Emit Masked Truncating Store with signed or unsigned saturation. static SDValue EmitMaskedTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl, SDValue Val, SDValue Ptr, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, SelectionDAG &DAG) { SDVTList VTs = DAG.getVTList(MVT::Other); SDValue Ops[] = { Chain, Val, Ptr, Mask }; return SignedSat ? DAG.getTargetMemSDNode(VTs, Ops, Dl, MemVT, MMO) : DAG.getTargetMemSDNode(VTs, Ops, Dl, MemVT, MMO); } static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { unsigned IntNo = cast(Op.getOperand(1))->getZExtValue(); const IntrinsicData *IntrData = getIntrinsicWithChain(IntNo); if (!IntrData) { switch (IntNo) { case llvm::Intrinsic::x86_seh_ehregnode: return MarkEHRegistrationNode(Op, DAG); case llvm::Intrinsic::x86_seh_ehguard: return MarkEHGuard(Op, DAG); case llvm::Intrinsic::x86_flags_read_u32: case llvm::Intrinsic::x86_flags_read_u64: case llvm::Intrinsic::x86_flags_write_u32: case llvm::Intrinsic::x86_flags_write_u64: { // We need a frame pointer because this will get lowered to a PUSH/POP // sequence. MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); MFI.setHasCopyImplyingStackAdjustment(true); // Don't do anything here, we will expand these intrinsics out later // during ExpandISelPseudos in EmitInstrWithCustomInserter. return SDValue(); } case Intrinsic::x86_lwpins32: case Intrinsic::x86_lwpins64: case Intrinsic::x86_umwait: case Intrinsic::x86_tpause: { SDLoc dl(Op); SDValue Chain = Op->getOperand(0); SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); unsigned Opcode; switch (IntNo) { default: llvm_unreachable("Impossible intrinsic"); case Intrinsic::x86_umwait: Opcode = X86ISD::UMWAIT; break; case Intrinsic::x86_tpause: Opcode = X86ISD::TPAUSE; break; case Intrinsic::x86_lwpins32: case Intrinsic::x86_lwpins64: Opcode = X86ISD::LWPINS; break; } SDValue Operation = DAG.getNode(Opcode, dl, VTs, Chain, Op->getOperand(2), Op->getOperand(3), Op->getOperand(4)); SDValue SetCC = getSETCC(X86::COND_B, Operation.getValue(0), dl, DAG); SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, SetCC); return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, Operation.getValue(1)); } } return SDValue(); } SDLoc dl(Op); switch(IntrData->Type) { default: llvm_unreachable("Unknown Intrinsic Type"); case RDSEED: case RDRAND: { // Emit the node with the right value type. SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32, MVT::Other); SDValue Result = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(0)); // If the value returned by RDRAND/RDSEED was valid (CF=1), return 1. // Otherwise return the value from Rand, which is always 0, casted to i32. SDValue Ops[] = { DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)), DAG.getConstant(1, dl, Op->getValueType(1)), DAG.getConstant(X86::COND_B, dl, MVT::i8), SDValue(Result.getNode(), 1) }; SDValue isValid = DAG.getNode(X86ISD::CMOV, dl, Op->getValueType(1), Ops); // Return { result, isValid, chain }. return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid, SDValue(Result.getNode(), 2)); } case GATHER_AVX2: { SDValue Chain = Op.getOperand(0); SDValue Src = Op.getOperand(2); SDValue Base = Op.getOperand(3); SDValue Index = Op.getOperand(4); SDValue Mask = Op.getOperand(5); SDValue Scale = Op.getOperand(6); return getAVX2GatherNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index, Scale, Chain, Subtarget); } case GATHER: { //gather(v1, mask, index, base, scale); SDValue Chain = Op.getOperand(0); SDValue Src = Op.getOperand(2); SDValue Base = Op.getOperand(3); SDValue Index = Op.getOperand(4); SDValue Mask = Op.getOperand(5); SDValue Scale = Op.getOperand(6); return getGatherNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index, Scale, Chain, Subtarget); } case SCATTER: { //scatter(base, mask, index, v1, scale); SDValue Chain = Op.getOperand(0); SDValue Base = Op.getOperand(2); SDValue Mask = Op.getOperand(3); SDValue Index = Op.getOperand(4); SDValue Src = Op.getOperand(5); SDValue Scale = Op.getOperand(6); return getScatterNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index, Scale, Chain, Subtarget); } case PREFETCH: { SDValue Hint = Op.getOperand(6); unsigned HintVal = cast(Hint)->getZExtValue(); assert((HintVal == 2 || HintVal == 3) && "Wrong prefetch hint in intrinsic: should be 2 or 3"); unsigned Opcode = (HintVal == 2 ? IntrData->Opc1 : IntrData->Opc0); SDValue Chain = Op.getOperand(0); SDValue Mask = Op.getOperand(2); SDValue Index = Op.getOperand(3); SDValue Base = Op.getOperand(4); SDValue Scale = Op.getOperand(5); return getPrefetchNode(Opcode, Op, DAG, Mask, Base, Index, Scale, Chain, Subtarget); } // Read Time Stamp Counter (RDTSC) and Processor ID (RDTSCP). case RDTSC: { SmallVector Results; getReadTimeStampCounter(Op.getNode(), dl, IntrData->Opc0, DAG, Subtarget, Results); return DAG.getMergeValues(Results, dl); } // Read Performance Monitoring Counters. case RDPMC: { SmallVector Results; getReadPerformanceCounter(Op.getNode(), dl, DAG, Subtarget, Results); return DAG.getMergeValues(Results, dl); } // Get Extended Control Register. case XGETBV: { SmallVector Results; getExtendedControlRegister(Op.getNode(), dl, DAG, Subtarget, Results); return DAG.getMergeValues(Results, dl); } // XTEST intrinsics. case XTEST: { SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other); SDValue InTrans = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(0)); SDValue SetCC = getSETCC(X86::COND_NE, InTrans, dl, DAG); SDValue Ret = DAG.getNode(ISD::ZERO_EXTEND, dl, Op->getValueType(0), SetCC); return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Ret, SDValue(InTrans.getNode(), 1)); } case TRUNCATE_TO_MEM_VI8: case TRUNCATE_TO_MEM_VI16: case TRUNCATE_TO_MEM_VI32: { SDValue Mask = Op.getOperand(4); SDValue DataToTruncate = Op.getOperand(3); SDValue Addr = Op.getOperand(2); SDValue Chain = Op.getOperand(0); MemIntrinsicSDNode *MemIntr = dyn_cast(Op); assert(MemIntr && "Expected MemIntrinsicSDNode!"); EVT MemVT = MemIntr->getMemoryVT(); uint16_t TruncationOp = IntrData->Opc0; switch (TruncationOp) { case X86ISD::VTRUNC: { if (isAllOnesConstant(Mask)) // return just a truncate store return DAG.getTruncStore(Chain, dl, DataToTruncate, Addr, MemVT, MemIntr->getMemOperand()); MVT MaskVT = MVT::getVectorVT(MVT::i1, MemVT.getVectorNumElements()); SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr, VMask, MemVT, MemIntr->getMemOperand(), true /* truncating */); } case X86ISD::VTRUNCUS: case X86ISD::VTRUNCS: { bool IsSigned = (TruncationOp == X86ISD::VTRUNCS); if (isAllOnesConstant(Mask)) return EmitTruncSStore(IsSigned, Chain, dl, DataToTruncate, Addr, MemVT, MemIntr->getMemOperand(), DAG); MVT MaskVT = MVT::getVectorVT(MVT::i1, MemVT.getVectorNumElements()); SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); return EmitMaskedTruncSStore(IsSigned, Chain, dl, DataToTruncate, Addr, VMask, MemVT, MemIntr->getMemOperand(), DAG); } default: llvm_unreachable("Unsupported truncstore intrinsic"); } } } } SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); MFI.setReturnAddressIsTaken(true); if (verifyReturnAddressArgumentIsConstant(Op, DAG)) return SDValue(); unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); SDLoc dl(Op); EVT PtrVT = getPointerTy(DAG.getDataLayout()); if (Depth > 0) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); SDValue Offset = DAG.getConstant(RegInfo->getSlotSize(), dl, PtrVT); return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset), MachinePointerInfo()); } // Just load the return address. SDValue RetAddrFI = getReturnAddressFrameIndex(DAG); return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI, MachinePointerInfo()); } SDValue X86TargetLowering::LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const { DAG.getMachineFunction().getFrameInfo().setReturnAddressIsTaken(true); return getReturnAddressFrameIndex(DAG); } SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); X86MachineFunctionInfo *FuncInfo = MF.getInfo(); const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); EVT VT = Op.getValueType(); MFI.setFrameAddressIsTaken(true); if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) { // Depth > 0 makes no sense on targets which use Windows unwind codes. It // is not possible to crawl up the stack without looking at the unwind codes // simultaneously. int FrameAddrIndex = FuncInfo->getFAIndex(); if (!FrameAddrIndex) { // Set up a frame object for the return address. unsigned SlotSize = RegInfo->getSlotSize(); FrameAddrIndex = MF.getFrameInfo().CreateFixedObject( SlotSize, /*Offset=*/0, /*IsImmutable=*/false); FuncInfo->setFAIndex(FrameAddrIndex); } return DAG.getFrameIndex(FrameAddrIndex, VT); } unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction()); SDLoc dl(Op); // FIXME probably not meaningful unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); assert(((FrameReg == X86::RBP && VT == MVT::i64) || (FrameReg == X86::EBP && VT == MVT::i32)) && "Invalid Frame Register!"); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, MachinePointerInfo()); return FrameAddr; } // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. unsigned X86TargetLowering::getRegisterByName(const char* RegName, EVT VT, SelectionDAG &DAG) const { const TargetFrameLowering &TFI = *Subtarget.getFrameLowering(); const MachineFunction &MF = DAG.getMachineFunction(); unsigned Reg = StringSwitch(RegName) .Case("esp", X86::ESP) .Case("rsp", X86::RSP) .Case("ebp", X86::EBP) .Case("rbp", X86::RBP) .Default(0); if (Reg == X86::EBP || Reg == X86::RBP) { if (!TFI.hasFP(MF)) report_fatal_error("register " + StringRef(RegName) + " is allocatable: function has no frame pointer"); #ifndef NDEBUG else { const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction()); assert((FrameReg == X86::EBP || FrameReg == X86::RBP) && "Invalid Frame Register!"); } #endif } if (Reg) return Reg; report_fatal_error("Invalid register name global variable"); } SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const { const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize(), SDLoc(Op)); } unsigned X86TargetLowering::getExceptionPointerRegister( const Constant *PersonalityFn) const { if (classifyEHPersonality(PersonalityFn) == EHPersonality::CoreCLR) return Subtarget.isTarget64BitLP64() ? X86::RDX : X86::EDX; return Subtarget.isTarget64BitLP64() ? X86::RAX : X86::EAX; } unsigned X86TargetLowering::getExceptionSelectorRegister( const Constant *PersonalityFn) const { // Funclet personalities don't use selectors (the runtime does the selection). assert(!isFuncletEHPersonality(classifyEHPersonality(PersonalityFn))); return Subtarget.isTarget64BitLP64() ? X86::RDX : X86::EDX; } bool X86TargetLowering::needsFixedCatchObjects() const { return Subtarget.isTargetWin64(); } SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Offset = Op.getOperand(1); SDValue Handler = Op.getOperand(2); SDLoc dl (Op); EVT PtrVT = getPointerTy(DAG.getDataLayout()); const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction()); assert(((FrameReg == X86::RBP && PtrVT == MVT::i64) || (FrameReg == X86::EBP && PtrVT == MVT::i32)) && "Invalid Frame Register!"); SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, PtrVT); unsigned StoreAddrReg = (PtrVT == MVT::i64) ? X86::RCX : X86::ECX; SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Frame, DAG.getIntPtrConstant(RegInfo->getSlotSize(), dl)); StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, StoreAddr, Offset); Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo()); Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr); return DAG.getNode(X86ISD::EH_RETURN, dl, MVT::Other, Chain, DAG.getRegister(StoreAddrReg, PtrVT)); } SDValue X86TargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); // If the subtarget is not 64bit, we may need the global base reg // after isel expand pseudo, i.e., after CGBR pass ran. // Therefore, ask for the GlobalBaseReg now, so that the pass // inserts the code for us in case we need it. // Otherwise, we will end up in a situation where we will // reference a virtual register that is not defined! if (!Subtarget.is64Bit()) { const X86InstrInfo *TII = Subtarget.getInstrInfo(); (void)TII->getGlobalBaseReg(&DAG.getMachineFunction()); } return DAG.getNode(X86ISD::EH_SJLJ_SETJMP, DL, DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), Op.getOperand(1)); } SDValue X86TargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); return DAG.getNode(X86ISD::EH_SJLJ_LONGJMP, DL, MVT::Other, Op.getOperand(0), Op.getOperand(1)); } SDValue X86TargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); return DAG.getNode(X86ISD::EH_SJLJ_SETUP_DISPATCH, DL, MVT::Other, Op.getOperand(0)); } static SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) { return Op.getOperand(0); } SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { SDValue Root = Op.getOperand(0); SDValue Trmp = Op.getOperand(1); // trampoline SDValue FPtr = Op.getOperand(2); // nested function SDValue Nest = Op.getOperand(3); // 'nest' parameter value SDLoc dl (Op); const Value *TrmpAddr = cast(Op.getOperand(4))->getValue(); const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); if (Subtarget.is64Bit()) { SDValue OutChains[6]; // Large code-model. const unsigned char JMP64r = 0xFF; // 64-bit jmp through register opcode. const unsigned char MOV64ri = 0xB8; // X86::MOV64ri opcode. const unsigned char N86R10 = TRI->getEncodingValue(X86::R10) & 0x7; const unsigned char N86R11 = TRI->getEncodingValue(X86::R11) & 0x7; const unsigned char REX_WB = 0x40 | 0x08 | 0x01; // REX prefix // Load the pointer to the nested function into R11. unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11 SDValue Addr = Trmp; OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, dl, MVT::i16), Addr, MachinePointerInfo(TrmpAddr)); Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(2, dl, MVT::i64)); OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, MachinePointerInfo(TrmpAddr, 2), /* Alignment = */ 2); // Load the 'nest' parameter value into R10. // R10 is specified in X86CallingConv.td OpCode = ((MOV64ri | N86R10) << 8) | REX_WB; // movabsq r10 Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(10, dl, MVT::i64)); OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, dl, MVT::i16), Addr, MachinePointerInfo(TrmpAddr, 10)); Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(12, dl, MVT::i64)); OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, MachinePointerInfo(TrmpAddr, 12), /* Alignment = */ 2); // Jump to the nested function. OpCode = (JMP64r << 8) | REX_WB; // jmpq *... Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(20, dl, MVT::i64)); OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, dl, MVT::i16), Addr, MachinePointerInfo(TrmpAddr, 20)); unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11 Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(22, dl, MVT::i64)); OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, dl, MVT::i8), Addr, MachinePointerInfo(TrmpAddr, 22)); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } else { const Function *Func = cast(cast(Op.getOperand(5))->getValue()); CallingConv::ID CC = Func->getCallingConv(); unsigned NestReg; switch (CC) { default: llvm_unreachable("Unsupported calling convention"); case CallingConv::C: case CallingConv::X86_StdCall: { // Pass 'nest' parameter in ECX. // Must be kept in sync with X86CallingConv.td NestReg = X86::ECX; // Check that ECX wasn't needed by an 'inreg' parameter. FunctionType *FTy = Func->getFunctionType(); const AttributeList &Attrs = Func->getAttributes(); if (!Attrs.isEmpty() && !Func->isVarArg()) { unsigned InRegCount = 0; unsigned Idx = 1; for (FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end(); I != E; ++I, ++Idx) if (Attrs.hasAttribute(Idx, Attribute::InReg)) { auto &DL = DAG.getDataLayout(); // FIXME: should only count parameters that are lowered to integers. InRegCount += (DL.getTypeSizeInBits(*I) + 31) / 32; } if (InRegCount > 2) { report_fatal_error("Nest register in use - reduce number of inreg" " parameters!"); } } break; } case CallingConv::X86_FastCall: case CallingConv::X86_ThisCall: case CallingConv::Fast: // Pass 'nest' parameter in EAX. // Must be kept in sync with X86CallingConv.td NestReg = X86::EAX; break; } SDValue OutChains[4]; SDValue Addr, Disp; Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(10, dl, MVT::i32)); Disp = DAG.getNode(ISD::SUB, dl, MVT::i32, FPtr, Addr); // This is storing the opcode for MOV32ri. const unsigned char MOV32ri = 0xB8; // X86::MOV32ri's opcode byte. const unsigned char N86Reg = TRI->getEncodingValue(NestReg) & 0x7; OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(MOV32ri | N86Reg, dl, MVT::i8), Trmp, MachinePointerInfo(TrmpAddr)); Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(1, dl, MVT::i32)); OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, MachinePointerInfo(TrmpAddr, 1), /* Alignment = */ 1); const unsigned char JMP = 0xE9; // jmp <32bit dst> opcode. Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(5, dl, MVT::i32)); OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, dl, MVT::i8), Addr, MachinePointerInfo(TrmpAddr, 5), /* Alignment = */ 1); Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(6, dl, MVT::i32)); OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, MachinePointerInfo(TrmpAddr, 6), /* Alignment = */ 1); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } } SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const { /* The rounding mode is in bits 11:10 of FPSR, and has the following settings: 00 Round to nearest 01 Round to -inf 10 Round to +inf 11 Round to 0 FLT_ROUNDS, on the other hand, expects the following: -1 Undefined 0 Round to 0 1 Round to nearest 2 Round to +inf 3 Round to -inf To perform the conversion, we do: (((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3) */ MachineFunction &MF = DAG.getMachineFunction(); const TargetFrameLowering &TFI = *Subtarget.getFrameLowering(); unsigned StackAlignment = TFI.getStackAlignment(); MVT VT = Op.getSimpleValueType(); SDLoc DL(Op); // Save FP Control Word to stack slot int SSFI = MF.getFrameInfo().CreateStackObject(2, StackAlignment, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy(DAG.getDataLayout())); MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI), MachineMemOperand::MOStore, 2, 2); SDValue Ops[] = { DAG.getEntryNode(), StackSlot }; SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL, DAG.getVTList(MVT::Other), Ops, MVT::i16, MMO); // Load FP Control Word from stack slot SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot, MachinePointerInfo()); // Transform as necessary SDValue CWD1 = DAG.getNode(ISD::SRL, DL, MVT::i16, DAG.getNode(ISD::AND, DL, MVT::i16, CWD, DAG.getConstant(0x800, DL, MVT::i16)), DAG.getConstant(11, DL, MVT::i8)); SDValue CWD2 = DAG.getNode(ISD::SRL, DL, MVT::i16, DAG.getNode(ISD::AND, DL, MVT::i16, CWD, DAG.getConstant(0x400, DL, MVT::i16)), DAG.getConstant(9, DL, MVT::i8)); SDValue RetVal = DAG.getNode(ISD::AND, DL, MVT::i16, DAG.getNode(ISD::ADD, DL, MVT::i16, DAG.getNode(ISD::OR, DL, MVT::i16, CWD1, CWD2), DAG.getConstant(1, DL, MVT::i16)), DAG.getConstant(3, DL, MVT::i16)); return DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal); } // Split an unary integer op into 2 half sized ops. static SDValue LowerVectorIntUnary(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); unsigned NumElems = VT.getVectorNumElements(); unsigned SizeInBits = VT.getSizeInBits(); MVT EltVT = VT.getVectorElementType(); SDValue Src = Op.getOperand(0); assert(EltVT == Src.getSimpleValueType().getVectorElementType() && "Src and Op should have the same element type!"); // Extract the Lo/Hi vectors SDLoc dl(Op); SDValue Lo = extractSubVector(Src, 0, DAG, dl, SizeInBits / 2); SDValue Hi = extractSubVector(Src, NumElems / 2, DAG, dl, SizeInBits / 2); MVT NewVT = MVT::getVectorVT(EltVT, NumElems / 2); return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, DAG.getNode(Op.getOpcode(), dl, NewVT, Lo), DAG.getNode(Op.getOpcode(), dl, NewVT, Hi)); } // Decompose 256-bit ops into smaller 128-bit ops. static SDValue Lower256IntUnary(SDValue Op, SelectionDAG &DAG) { assert(Op.getSimpleValueType().is256BitVector() && Op.getSimpleValueType().isInteger() && "Only handle AVX 256-bit vector integer operation"); return LowerVectorIntUnary(Op, DAG); } // Decompose 512-bit ops into smaller 256-bit ops. static SDValue Lower512IntUnary(SDValue Op, SelectionDAG &DAG) { assert(Op.getSimpleValueType().is512BitVector() && Op.getSimpleValueType().isInteger() && "Only handle AVX 512-bit vector integer operation"); return LowerVectorIntUnary(Op, DAG); } /// Lower a vector CTLZ using native supported vector CTLZ instruction. // // i8/i16 vector implemented using dword LZCNT vector instruction // ( sub(trunc(lzcnt(zext32(x)))) ). In case zext32(x) is illegal, // split the vector, perform operation on it's Lo a Hi part and // concatenate the results. static SDValue LowerVectorCTLZ_AVX512CDI(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { assert(Op.getOpcode() == ISD::CTLZ); SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); MVT EltVT = VT.getVectorElementType(); unsigned NumElems = VT.getVectorNumElements(); assert((EltVT == MVT::i8 || EltVT == MVT::i16) && "Unsupported element type"); // Split vector, it's Lo and Hi parts will be handled in next iteration. if (NumElems > 16 || (NumElems == 16 && !Subtarget.canExtendTo512DQ())) return LowerVectorIntUnary(Op, DAG); MVT NewVT = MVT::getVectorVT(MVT::i32, NumElems); assert((NewVT.is256BitVector() || NewVT.is512BitVector()) && "Unsupported value type for operation"); // Use native supported vector instruction vplzcntd. Op = DAG.getNode(ISD::ZERO_EXTEND, dl, NewVT, Op.getOperand(0)); SDValue CtlzNode = DAG.getNode(ISD::CTLZ, dl, NewVT, Op); SDValue TruncNode = DAG.getNode(ISD::TRUNCATE, dl, VT, CtlzNode); SDValue Delta = DAG.getConstant(32 - EltVT.getSizeInBits(), dl, VT); return DAG.getNode(ISD::SUB, dl, VT, TruncNode, Delta); } // Lower CTLZ using a PSHUFB lookup table implementation. static SDValue LowerVectorCTLZInRegLUT(SDValue Op, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); int NumElts = VT.getVectorNumElements(); int NumBytes = NumElts * (VT.getScalarSizeInBits() / 8); MVT CurrVT = MVT::getVectorVT(MVT::i8, NumBytes); // Per-nibble leading zero PSHUFB lookup table. const int LUT[16] = {/* 0 */ 4, /* 1 */ 3, /* 2 */ 2, /* 3 */ 2, /* 4 */ 1, /* 5 */ 1, /* 6 */ 1, /* 7 */ 1, /* 8 */ 0, /* 9 */ 0, /* a */ 0, /* b */ 0, /* c */ 0, /* d */ 0, /* e */ 0, /* f */ 0}; SmallVector LUTVec; for (int i = 0; i < NumBytes; ++i) LUTVec.push_back(DAG.getConstant(LUT[i % 16], DL, MVT::i8)); SDValue InRegLUT = DAG.getBuildVector(CurrVT, DL, LUTVec); // Begin by bitcasting the input to byte vector, then split those bytes // into lo/hi nibbles and use the PSHUFB LUT to perform CLTZ on each of them. // If the hi input nibble is zero then we add both results together, otherwise // we just take the hi result (by masking the lo result to zero before the // add). SDValue Op0 = DAG.getBitcast(CurrVT, Op.getOperand(0)); SDValue Zero = DAG.getConstant(0, DL, CurrVT); SDValue NibbleShift = DAG.getConstant(0x4, DL, CurrVT); SDValue Lo = Op0; SDValue Hi = DAG.getNode(ISD::SRL, DL, CurrVT, Op0, NibbleShift); SDValue HiZ; if (CurrVT.is512BitVector()) { MVT MaskVT = MVT::getVectorVT(MVT::i1, CurrVT.getVectorNumElements()); HiZ = DAG.getSetCC(DL, MaskVT, Hi, Zero, ISD::SETEQ); HiZ = DAG.getNode(ISD::SIGN_EXTEND, DL, CurrVT, HiZ); } else { HiZ = DAG.getSetCC(DL, CurrVT, Hi, Zero, ISD::SETEQ); } Lo = DAG.getNode(X86ISD::PSHUFB, DL, CurrVT, InRegLUT, Lo); Hi = DAG.getNode(X86ISD::PSHUFB, DL, CurrVT, InRegLUT, Hi); Lo = DAG.getNode(ISD::AND, DL, CurrVT, Lo, HiZ); SDValue Res = DAG.getNode(ISD::ADD, DL, CurrVT, Lo, Hi); // Merge result back from vXi8 back to VT, working on the lo/hi halves // of the current vector width in the same way we did for the nibbles. // If the upper half of the input element is zero then add the halves' // leading zero counts together, otherwise just use the upper half's. // Double the width of the result until we are at target width. while (CurrVT != VT) { int CurrScalarSizeInBits = CurrVT.getScalarSizeInBits(); int CurrNumElts = CurrVT.getVectorNumElements(); MVT NextSVT = MVT::getIntegerVT(CurrScalarSizeInBits * 2); MVT NextVT = MVT::getVectorVT(NextSVT, CurrNumElts / 2); SDValue Shift = DAG.getConstant(CurrScalarSizeInBits, DL, NextVT); // Check if the upper half of the input element is zero. if (CurrVT.is512BitVector()) { MVT MaskVT = MVT::getVectorVT(MVT::i1, CurrVT.getVectorNumElements()); HiZ = DAG.getSetCC(DL, MaskVT, DAG.getBitcast(CurrVT, Op0), DAG.getBitcast(CurrVT, Zero), ISD::SETEQ); HiZ = DAG.getNode(ISD::SIGN_EXTEND, DL, CurrVT, HiZ); } else { HiZ = DAG.getSetCC(DL, CurrVT, DAG.getBitcast(CurrVT, Op0), DAG.getBitcast(CurrVT, Zero), ISD::SETEQ); } HiZ = DAG.getBitcast(NextVT, HiZ); // Move the upper/lower halves to the lower bits as we'll be extending to // NextVT. Mask the lower result to zero if HiZ is true and add the results // together. SDValue ResNext = Res = DAG.getBitcast(NextVT, Res); SDValue R0 = DAG.getNode(ISD::SRL, DL, NextVT, ResNext, Shift); SDValue R1 = DAG.getNode(ISD::SRL, DL, NextVT, HiZ, Shift); R1 = DAG.getNode(ISD::AND, DL, NextVT, ResNext, R1); Res = DAG.getNode(ISD::ADD, DL, NextVT, R0, R1); CurrVT = NextVT; } return Res; } static SDValue LowerVectorCTLZ(SDValue Op, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); if (Subtarget.hasCDI() && // vXi8 vectors need to be promoted to 512-bits for vXi32. (Subtarget.canExtendTo512DQ() || VT.getVectorElementType() != MVT::i8)) return LowerVectorCTLZ_AVX512CDI(Op, DAG, Subtarget); // Decompose 256-bit ops into smaller 128-bit ops. if (VT.is256BitVector() && !Subtarget.hasInt256()) return Lower256IntUnary(Op, DAG); // Decompose 512-bit ops into smaller 256-bit ops. if (VT.is512BitVector() && !Subtarget.hasBWI()) return Lower512IntUnary(Op, DAG); assert(Subtarget.hasSSSE3() && "Expected SSSE3 support for PSHUFB"); return LowerVectorCTLZInRegLUT(Op, DL, Subtarget, DAG); } static SDValue LowerCTLZ(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); MVT OpVT = VT; unsigned NumBits = VT.getSizeInBits(); SDLoc dl(Op); unsigned Opc = Op.getOpcode(); if (VT.isVector()) return LowerVectorCTLZ(Op, dl, Subtarget, DAG); Op = Op.getOperand(0); if (VT == MVT::i8) { // Zero extend to i32 since there is not an i8 bsr. OpVT = MVT::i32; Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op); } // Issue a bsr (scan bits in reverse) which also sets EFLAGS. SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); Op = DAG.getNode(X86ISD::BSR, dl, VTs, Op); if (Opc == ISD::CTLZ) { // If src is zero (i.e. bsr sets ZF), returns NumBits. SDValue Ops[] = { Op, DAG.getConstant(NumBits + NumBits - 1, dl, OpVT), DAG.getConstant(X86::COND_E, dl, MVT::i8), Op.getValue(1) }; Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops); } // Finally xor with NumBits-1. Op = DAG.getNode(ISD::XOR, dl, OpVT, Op, DAG.getConstant(NumBits - 1, dl, OpVT)); if (VT == MVT::i8) Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op); return Op; } static SDValue LowerCTTZ(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); unsigned NumBits = VT.getScalarSizeInBits(); SDValue N0 = Op.getOperand(0); SDLoc dl(Op); // Decompose 256-bit ops into smaller 128-bit ops. if (VT.is256BitVector() && !Subtarget.hasInt256()) return Lower256IntUnary(Op, DAG); assert(!VT.isVector() && Op.getOpcode() == ISD::CTTZ && "Only scalar CTTZ requires custom lowering"); // Issue a bsf (scan bits forward) which also sets EFLAGS. SDVTList VTs = DAG.getVTList(VT, MVT::i32); Op = DAG.getNode(X86ISD::BSF, dl, VTs, N0); // If src is zero (i.e. bsf sets ZF), returns NumBits. SDValue Ops[] = { Op, DAG.getConstant(NumBits, dl, VT), DAG.getConstant(X86::COND_E, dl, MVT::i8), Op.getValue(1) }; return DAG.getNode(X86ISD::CMOV, dl, VT, Ops); } /// Break a 256-bit integer operation into two new 128-bit ones and then /// concatenate the result back. static SDValue split256IntArith(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); assert(VT.is256BitVector() && VT.isInteger() && "Unsupported value type for operation"); unsigned NumElems = VT.getVectorNumElements(); SDLoc dl(Op); // Extract the LHS vectors SDValue LHS = Op.getOperand(0); SDValue LHS1 = extract128BitVector(LHS, 0, DAG, dl); SDValue LHS2 = extract128BitVector(LHS, NumElems / 2, DAG, dl); // Extract the RHS vectors SDValue RHS = Op.getOperand(1); SDValue RHS1 = extract128BitVector(RHS, 0, DAG, dl); SDValue RHS2 = extract128BitVector(RHS, NumElems / 2, DAG, dl); MVT EltVT = VT.getVectorElementType(); MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1), DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2)); } /// Break a 512-bit integer operation into two new 256-bit ones and then /// concatenate the result back. static SDValue split512IntArith(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); assert(VT.is512BitVector() && VT.isInteger() && "Unsupported value type for operation"); unsigned NumElems = VT.getVectorNumElements(); SDLoc dl(Op); // Extract the LHS vectors SDValue LHS = Op.getOperand(0); SDValue LHS1 = extract256BitVector(LHS, 0, DAG, dl); SDValue LHS2 = extract256BitVector(LHS, NumElems / 2, DAG, dl); // Extract the RHS vectors SDValue RHS = Op.getOperand(1); SDValue RHS1 = extract256BitVector(RHS, 0, DAG, dl); SDValue RHS2 = extract256BitVector(RHS, NumElems / 2, DAG, dl); MVT EltVT = VT.getVectorElementType(); MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1), DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2)); } static SDValue lowerAddSub(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { MVT VT = Op.getSimpleValueType(); if (VT == MVT::i16 || VT == MVT::i32) return lowerAddSubToHorizontalOp(Op, DAG, Subtarget); if (VT.getScalarType() == MVT::i1) return DAG.getNode(ISD::XOR, SDLoc(Op), VT, Op.getOperand(0), Op.getOperand(1)); assert(Op.getSimpleValueType().is256BitVector() && Op.getSimpleValueType().isInteger() && "Only handle AVX 256-bit vector integer operation"); return split256IntArith(Op, DAG); } static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); if (VT.getScalarType() == MVT::i1) { SDLoc dl(Op); switch (Op.getOpcode()) { default: llvm_unreachable("Expected saturated arithmetic opcode"); case ISD::UADDSAT: case ISD::SADDSAT: return DAG.getNode(ISD::OR, dl, VT, Op.getOperand(0), Op.getOperand(1)); case ISD::USUBSAT: case ISD::SSUBSAT: return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), DAG.getNOT(dl, Op.getOperand(1), VT)); } } assert(Op.getSimpleValueType().is256BitVector() && Op.getSimpleValueType().isInteger() && "Only handle AVX 256-bit vector integer operation"); return split256IntArith(Op, DAG); } static SDValue LowerABS(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) { // Since X86 does not have CMOV for 8-bit integer, we don't convert // 8-bit integer abs to NEG and CMOV. SDLoc DL(Op); SDValue N0 = Op.getOperand(0); SDValue Neg = DAG.getNode(X86ISD::SUB, DL, DAG.getVTList(VT, MVT::i32), DAG.getConstant(0, DL, VT), N0); SDValue Ops[] = {N0, Neg, DAG.getConstant(X86::COND_GE, DL, MVT::i8), SDValue(Neg.getNode(), 1)}; return DAG.getNode(X86ISD::CMOV, DL, VT, Ops); } // ABS(vXi64 X) --> VPBLENDVPD(X, 0-X, X). if ((VT == MVT::v2i64 || VT == MVT::v4i64) && Subtarget.hasSSE41()) { SDLoc DL(Op); SDValue Src = Op.getOperand(0); SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Src); return DAG.getNode(X86ISD::BLENDV, DL, VT, Src, Sub, Src); } if (VT.is256BitVector() && !Subtarget.hasInt256()) { assert(VT.isInteger() && "Only handle AVX 256-bit vector integer operation"); return Lower256IntUnary(Op, DAG); } // Default to expand. return SDValue(); } static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); // For AVX1 cases, split to use legal ops (everything but v4i64). if (VT.getScalarType() != MVT::i64 && VT.is256BitVector()) return split256IntArith(Op, DAG); SDLoc DL(Op); unsigned Opcode = Op.getOpcode(); SDValue N0 = Op.getOperand(0); SDValue N1 = Op.getOperand(1); // For pre-SSE41, we can perform UMIN/UMAX v8i16 by flipping the signbit, // using the SMIN/SMAX instructions and flipping the signbit back. if (VT == MVT::v8i16) { assert((Opcode == ISD::UMIN || Opcode == ISD::UMAX) && "Unexpected MIN/MAX opcode"); SDValue Sign = DAG.getConstant(APInt::getSignedMinValue(16), DL, VT); N0 = DAG.getNode(ISD::XOR, DL, VT, N0, Sign); N1 = DAG.getNode(ISD::XOR, DL, VT, N1, Sign); Opcode = (Opcode == ISD::UMIN ? ISD::SMIN : ISD::SMAX); SDValue Result = DAG.getNode(Opcode, DL, VT, N0, N1); return DAG.getNode(ISD::XOR, DL, VT, Result, Sign); } // Else, expand to a compare/select. ISD::CondCode CC; switch (Opcode) { case ISD::SMIN: CC = ISD::CondCode::SETLT; break; case ISD::SMAX: CC = ISD::CondCode::SETGT; break; case ISD::UMIN: CC = ISD::CondCode::SETULT; break; case ISD::UMAX: CC = ISD::CondCode::SETUGT; break; default: llvm_unreachable("Unknown MINMAX opcode"); } SDValue Cond = DAG.getSetCC(DL, VT, N0, N1, CC); return DAG.getSelect(DL, VT, Cond, N0, N1); } static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); if (VT.getScalarType() == MVT::i1) return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), Op.getOperand(1)); // Decompose 256-bit ops into 128-bit ops. if (VT.is256BitVector() && !Subtarget.hasInt256()) return split256IntArith(Op, DAG); SDValue A = Op.getOperand(0); SDValue B = Op.getOperand(1); // Lower v16i8/v32i8/v64i8 mul as sign-extension to v8i16/v16i16/v32i16 // vector pairs, multiply and truncate. if (VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8) { unsigned NumElts = VT.getVectorNumElements(); if ((VT == MVT::v16i8 && Subtarget.hasInt256()) || (VT == MVT::v32i8 && Subtarget.canExtendTo512BW())) { MVT ExVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements()); return DAG.getNode( ISD::TRUNCATE, dl, VT, DAG.getNode(ISD::MUL, dl, ExVT, DAG.getNode(ISD::ANY_EXTEND, dl, ExVT, A), DAG.getNode(ISD::ANY_EXTEND, dl, ExVT, B))); } MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts / 2); // Extract the lo/hi parts to any extend to i16. // We're going to mask off the low byte of each result element of the // pmullw, so it doesn't matter what's in the high byte of each 16-bit // element. SDValue Undef = DAG.getUNDEF(VT); SDValue ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, A, Undef)); SDValue AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, A, Undef)); SDValue BLo, BHi; if (ISD::isBuildVectorOfConstantSDNodes(B.getNode())) { // If the LHS is a constant, manually unpackl/unpackh. SmallVector LoOps, HiOps; for (unsigned i = 0; i != NumElts; i += 16) { for (unsigned j = 0; j != 8; ++j) { LoOps.push_back(DAG.getAnyExtOrTrunc(B.getOperand(i + j), dl, MVT::i16)); HiOps.push_back(DAG.getAnyExtOrTrunc(B.getOperand(i + j + 8), dl, MVT::i16)); } } BLo = DAG.getBuildVector(ExVT, dl, LoOps); BHi = DAG.getBuildVector(ExVT, dl, HiOps); } else { BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, B, Undef)); BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, B, Undef)); } // Multiply, mask the lower 8bits of the lo/hi results and pack. SDValue RLo = DAG.getNode(ISD::MUL, dl, ExVT, ALo, BLo); SDValue RHi = DAG.getNode(ISD::MUL, dl, ExVT, AHi, BHi); RLo = DAG.getNode(ISD::AND, dl, ExVT, RLo, DAG.getConstant(255, dl, ExVT)); RHi = DAG.getNode(ISD::AND, dl, ExVT, RHi, DAG.getConstant(255, dl, ExVT)); return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi); } // Lower v4i32 mul as 2x shuffle, 2x pmuludq, 2x shuffle. if (VT == MVT::v4i32) { assert(Subtarget.hasSSE2() && !Subtarget.hasSSE41() && "Should not custom lower when pmulld is available!"); // Extract the odd parts. static const int UnpackMask[] = { 1, -1, 3, -1 }; SDValue Aodds = DAG.getVectorShuffle(VT, dl, A, A, UnpackMask); SDValue Bodds = DAG.getVectorShuffle(VT, dl, B, B, UnpackMask); // Multiply the even parts. SDValue Evens = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, DAG.getBitcast(MVT::v2i64, A), DAG.getBitcast(MVT::v2i64, B)); // Now multiply odd parts. SDValue Odds = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, DAG.getBitcast(MVT::v2i64, Aodds), DAG.getBitcast(MVT::v2i64, Bodds)); Evens = DAG.getBitcast(VT, Evens); Odds = DAG.getBitcast(VT, Odds); // Merge the two vectors back together with a shuffle. This expands into 2 // shuffles. static const int ShufMask[] = { 0, 4, 2, 6 }; return DAG.getVectorShuffle(VT, dl, Evens, Odds, ShufMask); } assert((VT == MVT::v2i64 || VT == MVT::v4i64 || VT == MVT::v8i64) && "Only know how to lower V2I64/V4I64/V8I64 multiply"); assert(!Subtarget.hasDQI() && "DQI should use MULLQ"); // Ahi = psrlqi(a, 32); // Bhi = psrlqi(b, 32); // // AloBlo = pmuludq(a, b); // AloBhi = pmuludq(a, Bhi); // AhiBlo = pmuludq(Ahi, b); // // Hi = psllqi(AloBhi + AhiBlo, 32); // return AloBlo + Hi; KnownBits AKnown = DAG.computeKnownBits(A); KnownBits BKnown = DAG.computeKnownBits(B); APInt LowerBitsMask = APInt::getLowBitsSet(64, 32); bool ALoIsZero = LowerBitsMask.isSubsetOf(AKnown.Zero); bool BLoIsZero = LowerBitsMask.isSubsetOf(BKnown.Zero); APInt UpperBitsMask = APInt::getHighBitsSet(64, 32); bool AHiIsZero = UpperBitsMask.isSubsetOf(AKnown.Zero); bool BHiIsZero = UpperBitsMask.isSubsetOf(BKnown.Zero); SDValue Zero = DAG.getConstant(0, dl, VT); // Only multiply lo/hi halves that aren't known to be zero. SDValue AloBlo = Zero; if (!ALoIsZero && !BLoIsZero) AloBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, B); SDValue AloBhi = Zero; if (!ALoIsZero && !BHiIsZero) { SDValue Bhi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, B, 32, DAG); AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, Bhi); } SDValue AhiBlo = Zero; if (!AHiIsZero && !BLoIsZero) { SDValue Ahi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, A, 32, DAG); AhiBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Ahi, B); } SDValue Hi = DAG.getNode(ISD::ADD, dl, VT, AloBhi, AhiBlo); Hi = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Hi, 32, DAG); return DAG.getNode(ISD::ADD, dl, VT, AloBlo, Hi); } static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); bool IsSigned = Op->getOpcode() == ISD::MULHS; unsigned NumElts = VT.getVectorNumElements(); SDValue A = Op.getOperand(0); SDValue B = Op.getOperand(1); // Decompose 256-bit ops into 128-bit ops. if (VT.is256BitVector() && !Subtarget.hasInt256()) return split256IntArith(Op, DAG); if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) { assert((VT == MVT::v4i32 && Subtarget.hasSSE2()) || (VT == MVT::v8i32 && Subtarget.hasInt256()) || (VT == MVT::v16i32 && Subtarget.hasAVX512())); // PMULxD operations multiply each even value (starting at 0) of LHS with // the related value of RHS and produce a widen result. // E.g., PMULUDQ <4 x i32> , <4 x i32> // => <2 x i64> // // In other word, to have all the results, we need to perform two PMULxD: // 1. one with the even values. // 2. one with the odd values. // To achieve #2, with need to place the odd values at an even position. // // Place the odd value at an even position (basically, shift all values 1 // step to the left): const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1}; // => SDValue Odd0 = DAG.getVectorShuffle(VT, dl, A, A, makeArrayRef(&Mask[0], NumElts)); // => SDValue Odd1 = DAG.getVectorShuffle(VT, dl, B, B, makeArrayRef(&Mask[0], NumElts)); // Emit two multiplies, one for the lower 2 ints and one for the higher 2 // ints. MVT MulVT = MVT::getVectorVT(MVT::i64, NumElts / 2); unsigned Opcode = (IsSigned && Subtarget.hasSSE41()) ? X86ISD::PMULDQ : X86ISD::PMULUDQ; // PMULUDQ <4 x i32> , <4 x i32> // => <2 x i64> SDValue Mul1 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT, DAG.getBitcast(MulVT, A), DAG.getBitcast(MulVT, B))); // PMULUDQ <4 x i32> , <4 x i32> // => <2 x i64> SDValue Mul2 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT, DAG.getBitcast(MulVT, Odd0), DAG.getBitcast(MulVT, Odd1))); // Shuffle it back into the right order. SmallVector ShufMask(NumElts); for (int i = 0; i != (int)NumElts; ++i) ShufMask[i] = (i / 2) * 2 + ((i % 2) * NumElts) + 1; SDValue Res = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, ShufMask); // If we have a signed multiply but no PMULDQ fix up the result of an // unsigned multiply. if (IsSigned && !Subtarget.hasSSE41()) { SDValue Zero = DAG.getConstant(0, dl, VT); SDValue T1 = DAG.getNode(ISD::AND, dl, VT, DAG.getSetCC(dl, VT, Zero, A, ISD::SETGT), B); SDValue T2 = DAG.getNode(ISD::AND, dl, VT, DAG.getSetCC(dl, VT, Zero, B, ISD::SETGT), A); SDValue Fixup = DAG.getNode(ISD::ADD, dl, VT, T1, T2); Res = DAG.getNode(ISD::SUB, dl, VT, Res, Fixup); } return Res; } // Only i8 vectors should need custom lowering after this. assert((VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.hasInt256()) || (VT == MVT::v64i8 && Subtarget.hasBWI())) && "Unsupported vector type"); // Lower v16i8/v32i8 as extension to v8i16/v16i16 vector pairs, multiply, // logical shift down the upper half and pack back to i8. // With SSE41 we can use sign/zero extend, but for pre-SSE41 we unpack // and then ashr/lshr the upper bits down to the lower bits before multiply. unsigned ExAVX = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; if ((VT == MVT::v16i8 && Subtarget.hasInt256()) || (VT == MVT::v32i8 && Subtarget.canExtendTo512BW())) { MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts); SDValue ExA = DAG.getNode(ExAVX, dl, ExVT, A); SDValue ExB = DAG.getNode(ExAVX, dl, ExVT, B); SDValue Mul = DAG.getNode(ISD::MUL, dl, ExVT, ExA, ExB); Mul = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, Mul, 8, DAG); return DAG.getNode(ISD::TRUNCATE, dl, VT, Mul); } // For signed 512-bit vectors, split into 256-bit vectors to allow the // sign-extension to occur. if (VT == MVT::v64i8 && IsSigned) return split512IntArith(Op, DAG); // Signed AVX2 implementation - extend xmm subvectors to ymm. if (VT == MVT::v32i8 && IsSigned) { SDValue Lo = DAG.getIntPtrConstant(0, dl); SDValue Hi = DAG.getIntPtrConstant(NumElts / 2, dl); MVT ExVT = MVT::v16i16; SDValue ALo = extract128BitVector(A, 0, DAG, dl); SDValue BLo = extract128BitVector(B, 0, DAG, dl); SDValue AHi = extract128BitVector(A, NumElts / 2, DAG, dl); SDValue BHi = extract128BitVector(B, NumElts / 2, DAG, dl); ALo = DAG.getNode(ExAVX, dl, ExVT, ALo); BLo = DAG.getNode(ExAVX, dl, ExVT, BLo); AHi = DAG.getNode(ExAVX, dl, ExVT, AHi); BHi = DAG.getNode(ExAVX, dl, ExVT, BHi); Lo = DAG.getNode(ISD::MUL, dl, ExVT, ALo, BLo); Hi = DAG.getNode(ISD::MUL, dl, ExVT, AHi, BHi); Lo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, Lo, 8, DAG); Hi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, Hi, 8, DAG); // Bitcast back to VT and then pack all the even elements from Lo and Hi. // Shuffle lowering should turn this into PACKUS+PERMQ Lo = DAG.getBitcast(VT, Lo); Hi = DAG.getBitcast(VT, Hi); return DAG.getVectorShuffle(VT, dl, Lo, Hi, { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62}); } // For signed v16i8 and all unsigned vXi8 we will unpack the low and high // half of each 128 bit lane to widen to a vXi16 type. Do the multiplies, // shift the results and pack the half lane results back together. MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts / 2); static const int PSHUFDMask[] = { 8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}; // Extract the lo parts and zero/sign extend to i16. // Only use SSE4.1 instructions for signed v16i8 where using unpack requires // shifts to sign extend. Using unpack for unsigned only requires an xor to // create zeros and a copy due to tied registers contraints pre-avx. But using // zero_extend_vector_inreg would require an additional pshufd for the high // part. SDValue ALo, AHi; if (IsSigned && VT == MVT::v16i8 && Subtarget.hasSSE41()) { ALo = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, ExVT, A); AHi = DAG.getVectorShuffle(VT, dl, A, A, PSHUFDMask); AHi = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, ExVT, AHi); } else if (IsSigned) { ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, DAG.getUNDEF(VT), A)); AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, DAG.getUNDEF(VT), A)); ALo = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, ALo, 8, DAG); AHi = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, AHi, 8, DAG); } else { ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, A, DAG.getConstant(0, dl, VT))); AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, A, DAG.getConstant(0, dl, VT))); } SDValue BLo, BHi; if (ISD::isBuildVectorOfConstantSDNodes(B.getNode())) { // If the LHS is a constant, manually unpackl/unpackh and extend. SmallVector LoOps, HiOps; for (unsigned i = 0; i != NumElts; i += 16) { for (unsigned j = 0; j != 8; ++j) { SDValue LoOp = B.getOperand(i + j); SDValue HiOp = B.getOperand(i + j + 8); if (IsSigned) { LoOp = DAG.getSExtOrTrunc(LoOp, dl, MVT::i16); HiOp = DAG.getSExtOrTrunc(HiOp, dl, MVT::i16); } else { LoOp = DAG.getZExtOrTrunc(LoOp, dl, MVT::i16); HiOp = DAG.getZExtOrTrunc(HiOp, dl, MVT::i16); } LoOps.push_back(LoOp); HiOps.push_back(HiOp); } } BLo = DAG.getBuildVector(ExVT, dl, LoOps); BHi = DAG.getBuildVector(ExVT, dl, HiOps); } else if (IsSigned && VT == MVT::v16i8 && Subtarget.hasSSE41()) { BLo = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, ExVT, B); BHi = DAG.getVectorShuffle(VT, dl, B, B, PSHUFDMask); BHi = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, ExVT, BHi); } else if (IsSigned) { BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, DAG.getUNDEF(VT), B)); BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, DAG.getUNDEF(VT), B)); BLo = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, BLo, 8, DAG); BHi = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, BHi, 8, DAG); } else { BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, B, DAG.getConstant(0, dl, VT))); BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, B, DAG.getConstant(0, dl, VT))); } // Multiply, lshr the upper 8bits to the lower 8bits of the lo/hi results and // pack back to vXi8. SDValue RLo = DAG.getNode(ISD::MUL, dl, ExVT, ALo, BLo); SDValue RHi = DAG.getNode(ISD::MUL, dl, ExVT, AHi, BHi); RLo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, RLo, 8, DAG); RHi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, RHi, 8, DAG); // Bitcast back to VT and then pack all the even elements from Lo and Hi. return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi); } SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget.isTargetWin64() && "Unexpected target"); EVT VT = Op.getValueType(); assert(VT.isInteger() && VT.getSizeInBits() == 128 && "Unexpected return type for lowering"); RTLIB::Libcall LC; bool isSigned; switch (Op->getOpcode()) { default: llvm_unreachable("Unexpected request for libcall!"); case ISD::SDIV: isSigned = true; LC = RTLIB::SDIV_I128; break; case ISD::UDIV: isSigned = false; LC = RTLIB::UDIV_I128; break; case ISD::SREM: isSigned = true; LC = RTLIB::SREM_I128; break; case ISD::UREM: isSigned = false; LC = RTLIB::UREM_I128; break; case ISD::SDIVREM: isSigned = true; LC = RTLIB::SDIVREM_I128; break; case ISD::UDIVREM: isSigned = false; LC = RTLIB::UDIVREM_I128; break; } SDLoc dl(Op); SDValue InChain = DAG.getEntryNode(); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) { EVT ArgVT = Op->getOperand(i).getValueType(); assert(ArgVT.isInteger() && ArgVT.getSizeInBits() == 128 && "Unexpected argument type for lowering"); SDValue StackPtr = DAG.CreateStackTemporary(ArgVT, 16); Entry.Node = StackPtr; InChain = DAG.getStore(InChain, dl, Op->getOperand(i), StackPtr, MachinePointerInfo(), /* Alignment = */ 16); Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); Entry.Ty = PointerType::get(ArgTy,0); Entry.IsSExt = false; Entry.IsZExt = false; Args.push_back(Entry); } SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy(DAG.getDataLayout())); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl) .setChain(InChain) .setLibCallee( getLibcallCallingConv(LC), static_cast(MVT::v2i64).getTypeForEVT(*DAG.getContext()), Callee, std::move(Args)) .setInRegister() .setSExtResult(isSigned) .setZExtResult(!isSigned); std::pair CallInfo = LowerCallTo(CLI); return DAG.getBitcast(VT, CallInfo.first); } // Return true if the required (according to Opcode) shift-imm form is natively // supported by the Subtarget static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget &Subtarget, unsigned Opcode) { if (VT.getScalarSizeInBits() < 16) return false; if (VT.is512BitVector() && Subtarget.hasAVX512() && (VT.getScalarSizeInBits() > 16 || Subtarget.hasBWI())) return true; bool LShift = (VT.is128BitVector() && Subtarget.hasSSE2()) || (VT.is256BitVector() && Subtarget.hasInt256()); bool AShift = LShift && (Subtarget.hasAVX512() || (VT != MVT::v2i64 && VT != MVT::v4i64)); return (Opcode == ISD::SRA) ? AShift : LShift; } // The shift amount is a variable, but it is the same for all vector lanes. // These instructions are defined together with shift-immediate. static bool SupportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget &Subtarget, unsigned Opcode) { return SupportedVectorShiftWithImm(VT, Subtarget, Opcode); } // Return true if the required (according to Opcode) variable-shift form is // natively supported by the Subtarget static bool SupportedVectorVarShift(MVT VT, const X86Subtarget &Subtarget, unsigned Opcode) { if (!Subtarget.hasInt256() || VT.getScalarSizeInBits() < 16) return false; // vXi16 supported only on AVX-512, BWI if (VT.getScalarSizeInBits() == 16 && !Subtarget.hasBWI()) return false; if (Subtarget.hasAVX512()) return true; bool LShift = VT.is128BitVector() || VT.is256BitVector(); bool AShift = LShift && VT != MVT::v2i64 && VT != MVT::v4i64; return (Opcode == ISD::SRA) ? AShift : LShift; } static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); SDValue R = Op.getOperand(0); SDValue Amt = Op.getOperand(1); unsigned X86Opc = getTargetVShiftUniformOpcode(Op.getOpcode(), false); auto ArithmeticShiftRight64 = [&](uint64_t ShiftAmt) { assert((VT == MVT::v2i64 || VT == MVT::v4i64) && "Unexpected SRA type"); MVT ExVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() * 2); SDValue Ex = DAG.getBitcast(ExVT, R); // ashr(R, 63) === cmp_slt(R, 0) if (ShiftAmt == 63 && Subtarget.hasSSE42()) { assert((VT != MVT::v4i64 || Subtarget.hasInt256()) && "Unsupported PCMPGT op"); return DAG.getNode(X86ISD::PCMPGT, dl, VT, DAG.getConstant(0, dl, VT), R); } if (ShiftAmt >= 32) { // Splat sign to upper i32 dst, and SRA upper i32 src to lower i32. SDValue Upper = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex, 31, DAG); SDValue Lower = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex, ShiftAmt - 32, DAG); if (VT == MVT::v2i64) Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, {5, 1, 7, 3}); if (VT == MVT::v4i64) Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, {9, 1, 11, 3, 13, 5, 15, 7}); } else { // SRA upper i32, SRL whole i64 and select lower i32. SDValue Upper = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex, ShiftAmt, DAG); SDValue Lower = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, R, ShiftAmt, DAG); Lower = DAG.getBitcast(ExVT, Lower); if (VT == MVT::v2i64) Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, {4, 1, 6, 3}); if (VT == MVT::v4i64) Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, {8, 1, 10, 3, 12, 5, 14, 7}); } return DAG.getBitcast(VT, Ex); }; // Optimize shl/srl/sra with constant shift amount. APInt APIntShiftAmt; if (!isConstantSplat(Amt, APIntShiftAmt)) return SDValue(); uint64_t ShiftAmt = APIntShiftAmt.getZExtValue(); if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode())) return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG); // i64 SRA needs to be performed as partial shifts. if (((!Subtarget.hasXOP() && VT == MVT::v2i64) || (Subtarget.hasInt256() && VT == MVT::v4i64)) && Op.getOpcode() == ISD::SRA) return ArithmeticShiftRight64(ShiftAmt); if (VT == MVT::v16i8 || (Subtarget.hasInt256() && VT == MVT::v32i8) || VT == MVT::v64i8) { unsigned NumElts = VT.getVectorNumElements(); MVT ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2); // Simple i8 add case if (Op.getOpcode() == ISD::SHL && ShiftAmt == 1) return DAG.getNode(ISD::ADD, dl, VT, R, R); // ashr(R, 7) === cmp_slt(R, 0) if (Op.getOpcode() == ISD::SRA && ShiftAmt == 7) { SDValue Zeros = DAG.getConstant(0, dl, VT); if (VT.is512BitVector()) { assert(VT == MVT::v64i8 && "Unexpected element type!"); SDValue CMP = DAG.getSetCC(dl, MVT::v64i1, Zeros, R, ISD::SETGT); return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, CMP); } return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R); } // XOP can shift v16i8 directly instead of as shift v8i16 + mask. if (VT == MVT::v16i8 && Subtarget.hasXOP()) return SDValue(); if (Op.getOpcode() == ISD::SHL) { // Make a large shift. SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ShiftVT, R, ShiftAmt, DAG); SHL = DAG.getBitcast(VT, SHL); // Zero out the rightmost bits. return DAG.getNode(ISD::AND, dl, VT, SHL, DAG.getConstant(uint8_t(-1U << ShiftAmt), dl, VT)); } if (Op.getOpcode() == ISD::SRL) { // Make a large shift. SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ShiftVT, R, ShiftAmt, DAG); SRL = DAG.getBitcast(VT, SRL); // Zero out the leftmost bits. return DAG.getNode(ISD::AND, dl, VT, SRL, DAG.getConstant(uint8_t(-1U) >> ShiftAmt, dl, VT)); } if (Op.getOpcode() == ISD::SRA) { // ashr(R, Amt) === sub(xor(lshr(R, Amt), Mask), Mask) SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt); SDValue Mask = DAG.getConstant(128 >> ShiftAmt, dl, VT); Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask); Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask); return Res; } llvm_unreachable("Unknown shift opcode."); } return SDValue(); } // If V is a splat value, return the source vector and splat index; static SDValue IsSplatVector(SDValue V, int &SplatIdx, SelectionDAG &DAG) { V = peekThroughEXTRACT_SUBVECTORs(V); EVT VT = V.getValueType(); unsigned Opcode = V.getOpcode(); switch (Opcode) { default: { APInt UndefElts; APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); if (DAG.isSplatValue(V, DemandedElts, UndefElts)) { // Handle case where all demanded elements are UNDEF. if (DemandedElts.isSubsetOf(UndefElts)) { SplatIdx = 0; return DAG.getUNDEF(VT); } SplatIdx = (UndefElts & DemandedElts).countTrailingOnes(); return V; } break; } case ISD::VECTOR_SHUFFLE: { // Check if this is a shuffle node doing a splat. // TODO - remove this and rely purely on SelectionDAG::isSplatValue, // getTargetVShiftNode currently struggles without the splat source. auto *SVN = cast(V); if (!SVN->isSplat()) break; int Idx = SVN->getSplatIndex(); int NumElts = V.getValueType().getVectorNumElements(); SplatIdx = Idx % NumElts; return V.getOperand(Idx / NumElts); } } return SDValue(); } static SDValue GetSplatValue(SDValue V, const SDLoc &dl, SelectionDAG &DAG) { int SplatIdx; if (SDValue SrcVector = IsSplatVector(V, SplatIdx, DAG)) return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVector.getValueType().getScalarType(), SrcVector, DAG.getIntPtrConstant(SplatIdx, dl)); return SDValue(); } static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); SDValue R = Op.getOperand(0); SDValue Amt = Op.getOperand(1); unsigned Opcode = Op.getOpcode(); unsigned X86OpcI = getTargetVShiftUniformOpcode(Opcode, false); unsigned X86OpcV = getTargetVShiftUniformOpcode(Opcode, true); if (SDValue BaseShAmt = GetSplatValue(Amt, dl, DAG)) { if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) { MVT EltVT = VT.getVectorElementType(); assert(EltVT.bitsLE(MVT::i64) && "Unexpected element type!"); if (EltVT != MVT::i64 && EltVT.bitsGT(MVT::i32)) BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, BaseShAmt); else if (EltVT.bitsLT(MVT::i32)) BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt); return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, Subtarget, DAG); } // vXi8 shifts - shift as v8i16 + mask result. if (((VT == MVT::v16i8 && !Subtarget.canExtendTo512DQ()) || (VT == MVT::v32i8 && !Subtarget.canExtendTo512BW()) || VT == MVT::v64i8) && !Subtarget.hasXOP()) { unsigned NumElts = VT.getVectorNumElements(); MVT ExtVT = MVT::getVectorVT(MVT::i16, NumElts / 2); if (SupportedVectorShiftWithBaseAmnt(ExtVT, Subtarget, Opcode)) { unsigned LogicalOp = (Opcode == ISD::SHL ? ISD::SHL : ISD::SRL); unsigned LogicalX86Op = getTargetVShiftUniformOpcode(LogicalOp, false); BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt); // Create the mask using vXi16 shifts. For shift-rights we need to move // the upper byte down before splatting the vXi8 mask. SDValue BitMask = DAG.getConstant(-1, dl, ExtVT); BitMask = getTargetVShiftNode(LogicalX86Op, dl, ExtVT, BitMask, BaseShAmt, Subtarget, DAG); if (Opcode != ISD::SHL) BitMask = getTargetVShiftByConstNode(LogicalX86Op, dl, ExtVT, BitMask, 8, DAG); BitMask = DAG.getBitcast(VT, BitMask); BitMask = DAG.getVectorShuffle(VT, dl, BitMask, BitMask, SmallVector(NumElts, 0)); SDValue Res = getTargetVShiftNode(LogicalX86Op, dl, ExtVT, DAG.getBitcast(ExtVT, R), BaseShAmt, Subtarget, DAG); Res = DAG.getBitcast(VT, Res); Res = DAG.getNode(ISD::AND, dl, VT, Res, BitMask); if (Opcode == ISD::SRA) { // ashr(R, Amt) === sub(xor(lshr(R, Amt), SignMask), SignMask) // SignMask = lshr(SignBit, Amt) - safe to do this with PSRLW. SDValue SignMask = DAG.getConstant(0x8080, dl, ExtVT); SignMask = getTargetVShiftNode(LogicalX86Op, dl, ExtVT, SignMask, BaseShAmt, Subtarget, DAG); SignMask = DAG.getBitcast(VT, SignMask); Res = DAG.getNode(ISD::XOR, dl, VT, Res, SignMask); Res = DAG.getNode(ISD::SUB, dl, VT, Res, SignMask); } return Res; } } } // Check cases (mainly 32-bit) where i64 is expanded into high and low parts. if (VT == MVT::v2i64 && Amt.getOpcode() == ISD::BITCAST && Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { Amt = Amt.getOperand(0); unsigned Ratio = 64 / Amt.getScalarValueSizeInBits(); std::vector Vals(Ratio); for (unsigned i = 0; i != Ratio; ++i) Vals[i] = Amt.getOperand(i); for (unsigned i = Ratio, e = Amt.getNumOperands(); i != e; i += Ratio) { for (unsigned j = 0; j != Ratio; ++j) if (Vals[j] != Amt.getOperand(i + j)) return SDValue(); } if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Op.getOpcode())) return DAG.getNode(X86OpcV, dl, VT, R, Op.getOperand(1)); } return SDValue(); } // Convert a shift/rotate left amount to a multiplication scale factor. static SDValue convertShiftLeftToScale(SDValue Amt, const SDLoc &dl, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = Amt.getSimpleValueType(); if (!(VT == MVT::v8i16 || VT == MVT::v4i32 || (Subtarget.hasInt256() && VT == MVT::v16i16) || (!Subtarget.hasAVX512() && VT == MVT::v16i8))) return SDValue(); if (ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) { SmallVector Elts; MVT SVT = VT.getVectorElementType(); unsigned SVTBits = SVT.getSizeInBits(); APInt One(SVTBits, 1); unsigned NumElems = VT.getVectorNumElements(); for (unsigned i = 0; i != NumElems; ++i) { SDValue Op = Amt->getOperand(i); if (Op->isUndef()) { Elts.push_back(Op); continue; } ConstantSDNode *ND = cast(Op); APInt C(SVTBits, ND->getAPIntValue().getZExtValue()); uint64_t ShAmt = C.getZExtValue(); if (ShAmt >= SVTBits) { Elts.push_back(DAG.getUNDEF(SVT)); continue; } Elts.push_back(DAG.getConstant(One.shl(ShAmt), dl, SVT)); } return DAG.getBuildVector(VT, dl, Elts); } // If the target doesn't support variable shifts, use either FP conversion // or integer multiplication to avoid shifting each element individually. if (VT == MVT::v4i32) { Amt = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, dl, VT)); Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, DAG.getConstant(0x3f800000U, dl, VT)); Amt = DAG.getBitcast(MVT::v4f32, Amt); return DAG.getNode(ISD::FP_TO_SINT, dl, VT, Amt); } // AVX2 can more effectively perform this as a zext/trunc to/from v8i32. if (VT == MVT::v8i16 && !Subtarget.hasAVX2()) { SDValue Z = DAG.getConstant(0, dl, VT); SDValue Lo = DAG.getBitcast(MVT::v4i32, getUnpackl(DAG, dl, VT, Amt, Z)); SDValue Hi = DAG.getBitcast(MVT::v4i32, getUnpackh(DAG, dl, VT, Amt, Z)); Lo = convertShiftLeftToScale(Lo, dl, Subtarget, DAG); Hi = convertShiftLeftToScale(Hi, dl, Subtarget, DAG); if (Subtarget.hasSSE41()) return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi); return DAG.getVectorShuffle(VT, dl, DAG.getBitcast(VT, Lo), DAG.getBitcast(VT, Hi), {0, 2, 4, 6, 8, 10, 12, 14}); } return SDValue(); } static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); SDValue R = Op.getOperand(0); SDValue Amt = Op.getOperand(1); unsigned EltSizeInBits = VT.getScalarSizeInBits(); bool ConstantAmt = ISD::isBuildVectorOfConstantSDNodes(Amt.getNode()); unsigned Opc = Op.getOpcode(); unsigned X86OpcV = getTargetVShiftUniformOpcode(Opc, true); unsigned X86OpcI = getTargetVShiftUniformOpcode(Opc, false); assert(VT.isVector() && "Custom lowering only for vector shifts!"); assert(Subtarget.hasSSE2() && "Only custom lower when we have SSE2!"); if (SDValue V = LowerScalarImmediateShift(Op, DAG, Subtarget)) return V; if (SDValue V = LowerScalarVariableShift(Op, DAG, Subtarget)) return V; if (SupportedVectorVarShift(VT, Subtarget, Opc)) return Op; // XOP has 128-bit variable logical/arithmetic shifts. // +ve/-ve Amt = shift left/right. if (Subtarget.hasXOP() && (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8)) { if (Opc == ISD::SRL || Opc == ISD::SRA) { SDValue Zero = DAG.getConstant(0, dl, VT); Amt = DAG.getNode(ISD::SUB, dl, VT, Zero, Amt); } if (Opc == ISD::SHL || Opc == ISD::SRL) return DAG.getNode(X86ISD::VPSHL, dl, VT, R, Amt); if (Opc == ISD::SRA) return DAG.getNode(X86ISD::VPSHA, dl, VT, R, Amt); } // 2i64 vector logical shifts can efficiently avoid scalarization - do the // shifts per-lane and then shuffle the partial results back together. if (VT == MVT::v2i64 && Opc != ISD::SRA) { // Splat the shift amounts so the scalar shifts above will catch it. SDValue Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {0, 0}); SDValue Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {1, 1}); SDValue R0 = DAG.getNode(Opc, dl, VT, R, Amt0); SDValue R1 = DAG.getNode(Opc, dl, VT, R, Amt1); return DAG.getVectorShuffle(VT, dl, R0, R1, {0, 3}); } // i64 vector arithmetic shift can be emulated with the transform: // M = lshr(SIGN_MASK, Amt) // ashr(R, Amt) === sub(xor(lshr(R, Amt), M), M) if ((VT == MVT::v2i64 || (VT == MVT::v4i64 && Subtarget.hasInt256())) && Opc == ISD::SRA) { SDValue S = DAG.getConstant(APInt::getSignMask(64), dl, VT); SDValue M = DAG.getNode(ISD::SRL, dl, VT, S, Amt); R = DAG.getNode(ISD::SRL, dl, VT, R, Amt); R = DAG.getNode(ISD::XOR, dl, VT, R, M); R = DAG.getNode(ISD::SUB, dl, VT, R, M); return R; } // If possible, lower this shift as a sequence of two shifts by // constant plus a BLENDing shuffle instead of scalarizing it. // Example: // (v4i32 (srl A, (build_vector < X, Y, Y, Y>))) // // Could be rewritten as: // (v4i32 (MOVSS (srl A, ), (srl A, ))) // // The advantage is that the two shifts from the example would be // lowered as X86ISD::VSRLI nodes in parallel before blending. if (ConstantAmt && (VT == MVT::v8i16 || VT == MVT::v4i32 || (VT == MVT::v16i16 && Subtarget.hasInt256()))) { SDValue Amt1, Amt2; unsigned NumElts = VT.getVectorNumElements(); SmallVector ShuffleMask; for (unsigned i = 0; i != NumElts; ++i) { SDValue A = Amt->getOperand(i); if (A.isUndef()) { ShuffleMask.push_back(SM_SentinelUndef); continue; } if (!Amt1 || Amt1 == A) { ShuffleMask.push_back(i); Amt1 = A; continue; } if (!Amt2 || Amt2 == A) { ShuffleMask.push_back(i + NumElts); Amt2 = A; continue; } break; } // Only perform this blend if we can perform it without loading a mask. if (ShuffleMask.size() == NumElts && Amt1 && Amt2 && (VT != MVT::v16i16 || is128BitLaneRepeatedShuffleMask(VT, ShuffleMask)) && (VT == MVT::v4i32 || Subtarget.hasSSE41() || Opc != ISD::SHL || canWidenShuffleElements(ShuffleMask))) { auto *Cst1 = dyn_cast(Amt1); auto *Cst2 = dyn_cast(Amt2); if (Cst1 && Cst2 && Cst1->getAPIntValue().ult(EltSizeInBits) && Cst2->getAPIntValue().ult(EltSizeInBits)) { SDValue Shift1 = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, Cst1->getZExtValue(), DAG); SDValue Shift2 = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, Cst2->getZExtValue(), DAG); return DAG.getVectorShuffle(VT, dl, Shift1, Shift2, ShuffleMask); } } } // If possible, lower this packed shift into a vector multiply instead of // expanding it into a sequence of scalar shifts. if (Opc == ISD::SHL) if (SDValue Scale = convertShiftLeftToScale(Amt, dl, Subtarget, DAG)) return DAG.getNode(ISD::MUL, dl, VT, R, Scale); // Constant ISD::SRL can be performed efficiently on vXi16 vectors as we // can replace with ISD::MULHU, creating scale factor from (NumEltBits - Amt). if (Opc == ISD::SRL && ConstantAmt && (VT == MVT::v8i16 || (VT == MVT::v16i16 && Subtarget.hasInt256()))) { SDValue EltBits = DAG.getConstant(EltSizeInBits, dl, VT); SDValue RAmt = DAG.getNode(ISD::SUB, dl, VT, EltBits, Amt); if (SDValue Scale = convertShiftLeftToScale(RAmt, dl, Subtarget, DAG)) { SDValue Zero = DAG.getConstant(0, dl, VT); SDValue ZAmt = DAG.getSetCC(dl, VT, Amt, Zero, ISD::SETEQ); SDValue Res = DAG.getNode(ISD::MULHU, dl, VT, R, Scale); return DAG.getSelect(dl, VT, ZAmt, R, Res); } } // Constant ISD::SRA can be performed efficiently on vXi16 vectors as we // can replace with ISD::MULHS, creating scale factor from (NumEltBits - Amt). // TODO: Special case handling for shift by 0/1, really we can afford either // of these cases in pre-SSE41/XOP/AVX512 but not both. if (Opc == ISD::SRA && ConstantAmt && (VT == MVT::v8i16 || (VT == MVT::v16i16 && Subtarget.hasInt256())) && ((Subtarget.hasSSE41() && !Subtarget.hasXOP() && !Subtarget.hasAVX512()) || DAG.isKnownNeverZero(Amt))) { SDValue EltBits = DAG.getConstant(EltSizeInBits, dl, VT); SDValue RAmt = DAG.getNode(ISD::SUB, dl, VT, EltBits, Amt); if (SDValue Scale = convertShiftLeftToScale(RAmt, dl, Subtarget, DAG)) { SDValue Amt0 = DAG.getSetCC(dl, VT, Amt, DAG.getConstant(0, dl, VT), ISD::SETEQ); SDValue Amt1 = DAG.getSetCC(dl, VT, Amt, DAG.getConstant(1, dl, VT), ISD::SETEQ); SDValue Sra1 = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, R, 1, DAG); SDValue Res = DAG.getNode(ISD::MULHS, dl, VT, R, Scale); Res = DAG.getSelect(dl, VT, Amt0, R, Res); return DAG.getSelect(dl, VT, Amt1, Sra1, Res); } } // v4i32 Non Uniform Shifts. // If the shift amount is constant we can shift each lane using the SSE2 // immediate shifts, else we need to zero-extend each lane to the lower i64 // and shift using the SSE2 variable shifts. // The separate results can then be blended together. if (VT == MVT::v4i32) { SDValue Amt0, Amt1, Amt2, Amt3; if (ConstantAmt) { Amt0 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {0, 0, 0, 0}); Amt1 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {1, 1, 1, 1}); Amt2 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {2, 2, 2, 2}); Amt3 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {3, 3, 3, 3}); } else { // The SSE2 shifts use the lower i64 as the same shift amount for // all lanes and the upper i64 is ignored. On AVX we're better off // just zero-extending, but for SSE just duplicating the top 16-bits is // cheaper and has the same effect for out of range values. if (Subtarget.hasAVX()) { SDValue Z = DAG.getConstant(0, dl, VT); Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Z, {0, 4, -1, -1}); Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Z, {1, 5, -1, -1}); Amt2 = DAG.getVectorShuffle(VT, dl, Amt, Z, {2, 6, -1, -1}); Amt3 = DAG.getVectorShuffle(VT, dl, Amt, Z, {3, 7, -1, -1}); } else { SDValue Amt01 = DAG.getBitcast(MVT::v8i16, Amt); SDValue Amt23 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt01, Amt01, {4, 5, 6, 7, -1, -1, -1, -1}); Amt0 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt01, Amt01, {0, 1, 1, 1, -1, -1, -1, -1}); Amt1 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt01, Amt01, {2, 3, 3, 3, -1, -1, -1, -1}); Amt2 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt23, Amt23, {0, 1, 1, 1, -1, -1, -1, -1}); Amt3 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt23, Amt23, {2, 3, 3, 3, -1, -1, -1, -1}); } } unsigned ShOpc = ConstantAmt ? Opc : X86OpcV; SDValue R0 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt0)); SDValue R1 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt1)); SDValue R2 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt2)); SDValue R3 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt3)); // Merge the shifted lane results optimally with/without PBLENDW. // TODO - ideally shuffle combining would handle this. if (Subtarget.hasSSE41()) { SDValue R02 = DAG.getVectorShuffle(VT, dl, R0, R2, {0, -1, 6, -1}); SDValue R13 = DAG.getVectorShuffle(VT, dl, R1, R3, {-1, 1, -1, 7}); return DAG.getVectorShuffle(VT, dl, R02, R13, {0, 5, 2, 7}); } SDValue R01 = DAG.getVectorShuffle(VT, dl, R0, R1, {0, -1, -1, 5}); SDValue R23 = DAG.getVectorShuffle(VT, dl, R2, R3, {2, -1, -1, 7}); return DAG.getVectorShuffle(VT, dl, R01, R23, {0, 3, 4, 7}); } // It's worth extending once and using the vXi16/vXi32 shifts for smaller // types, but without AVX512 the extra overheads to get from vXi8 to vXi32 // make the existing SSE solution better. // NOTE: We honor prefered vector width before promoting to 512-bits. if ((Subtarget.hasInt256() && VT == MVT::v8i16) || (Subtarget.canExtendTo512DQ() && VT == MVT::v16i16) || (Subtarget.canExtendTo512DQ() && VT == MVT::v16i8) || (Subtarget.canExtendTo512BW() && VT == MVT::v32i8) || (Subtarget.hasBWI() && Subtarget.hasVLX() && VT == MVT::v16i8)) { assert((!Subtarget.hasBWI() || VT == MVT::v32i8 || VT == MVT::v16i8) && "Unexpected vector type"); MVT EvtSVT = Subtarget.hasBWI() ? MVT::i16 : MVT::i32; MVT ExtVT = MVT::getVectorVT(EvtSVT, VT.getVectorNumElements()); unsigned ExtOpc = Opc == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; R = DAG.getNode(ExtOpc, dl, ExtVT, R); Amt = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, Amt); return DAG.getNode(ISD::TRUNCATE, dl, VT, DAG.getNode(Opc, dl, ExtVT, R, Amt)); } // Constant ISD::SRA/SRL can be performed efficiently on vXi8 vectors as we // extend to vXi16 to perform a MUL scale effectively as a MUL_LOHI. if (ConstantAmt && (Opc == ISD::SRA || Opc == ISD::SRL) && (VT == MVT::v16i8 || VT == MVT::v64i8 || (VT == MVT::v32i8 && Subtarget.hasInt256())) && !Subtarget.hasXOP()) { int NumElts = VT.getVectorNumElements(); SDValue Cst8 = DAG.getConstant(8, dl, MVT::i8); // Extend constant shift amount to vXi16 (it doesn't matter if the type // isn't legal). MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts); Amt = DAG.getZExtOrTrunc(Amt, dl, ExVT); Amt = DAG.getNode(ISD::SUB, dl, ExVT, DAG.getConstant(8, dl, ExVT), Amt); Amt = DAG.getNode(ISD::SHL, dl, ExVT, DAG.getConstant(1, dl, ExVT), Amt); assert(ISD::isBuildVectorOfConstantSDNodes(Amt.getNode()) && "Constant build vector expected"); if (VT == MVT::v16i8 && Subtarget.hasInt256()) { R = Opc == ISD::SRA ? DAG.getSExtOrTrunc(R, dl, ExVT) : DAG.getZExtOrTrunc(R, dl, ExVT); R = DAG.getNode(ISD::MUL, dl, ExVT, R, Amt); R = DAG.getNode(X86ISD::VSRLI, dl, ExVT, R, Cst8); return DAG.getZExtOrTrunc(R, dl, VT); } SmallVector LoAmt, HiAmt; for (int i = 0; i != NumElts; i += 16) { for (int j = 0; j != 8; ++j) { LoAmt.push_back(Amt.getOperand(i + j)); HiAmt.push_back(Amt.getOperand(i + j + 8)); } } MVT VT16 = MVT::getVectorVT(MVT::i16, NumElts / 2); SDValue LoA = DAG.getBuildVector(VT16, dl, LoAmt); SDValue HiA = DAG.getBuildVector(VT16, dl, HiAmt); SDValue LoR = DAG.getBitcast(VT16, getUnpackl(DAG, dl, VT, R, R)); SDValue HiR = DAG.getBitcast(VT16, getUnpackh(DAG, dl, VT, R, R)); LoR = DAG.getNode(X86OpcI, dl, VT16, LoR, Cst8); HiR = DAG.getNode(X86OpcI, dl, VT16, HiR, Cst8); LoR = DAG.getNode(ISD::MUL, dl, VT16, LoR, LoA); HiR = DAG.getNode(ISD::MUL, dl, VT16, HiR, HiA); LoR = DAG.getNode(X86ISD::VSRLI, dl, VT16, LoR, Cst8); HiR = DAG.getNode(X86ISD::VSRLI, dl, VT16, HiR, Cst8); return DAG.getNode(X86ISD::PACKUS, dl, VT, LoR, HiR); } if (VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.hasInt256() && !Subtarget.hasXOP()) || (VT == MVT::v64i8 && Subtarget.hasBWI())) { MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2); auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) { if (VT.is512BitVector()) { // On AVX512BW targets we make use of the fact that VSELECT lowers // to a masked blend which selects bytes based just on the sign bit // extracted to a mask. MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); V0 = DAG.getBitcast(VT, V0); V1 = DAG.getBitcast(VT, V1); Sel = DAG.getBitcast(VT, Sel); Sel = DAG.getSetCC(dl, MaskVT, DAG.getConstant(0, dl, VT), Sel, ISD::SETGT); return DAG.getBitcast(SelVT, DAG.getSelect(dl, VT, Sel, V0, V1)); } else if (Subtarget.hasSSE41()) { // On SSE41 targets we make use of the fact that VSELECT lowers // to PBLENDVB which selects bytes based just on the sign bit. V0 = DAG.getBitcast(VT, V0); V1 = DAG.getBitcast(VT, V1); Sel = DAG.getBitcast(VT, Sel); return DAG.getBitcast(SelVT, DAG.getSelect(dl, VT, Sel, V0, V1)); } // On pre-SSE41 targets we test for the sign bit by comparing to // zero - a negative value will set all bits of the lanes to true // and VSELECT uses that in its OR(AND(V0,C),AND(V1,~C)) lowering. SDValue Z = DAG.getConstant(0, dl, SelVT); SDValue C = DAG.getNode(X86ISD::PCMPGT, dl, SelVT, Z, Sel); return DAG.getSelect(dl, SelVT, C, V0, V1); }; // Turn 'a' into a mask suitable for VSELECT: a = a << 5; // We can safely do this using i16 shifts as we're only interested in // the 3 lower bits of each byte. Amt = DAG.getBitcast(ExtVT, Amt); Amt = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ExtVT, Amt, 5, DAG); Amt = DAG.getBitcast(VT, Amt); if (Opc == ISD::SHL || Opc == ISD::SRL) { // r = VSELECT(r, shift(r, 4), a); SDValue M = DAG.getNode(Opc, dl, VT, R, DAG.getConstant(4, dl, VT)); R = SignBitSelect(VT, Amt, M, R); // a += a Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt); // r = VSELECT(r, shift(r, 2), a); M = DAG.getNode(Opc, dl, VT, R, DAG.getConstant(2, dl, VT)); R = SignBitSelect(VT, Amt, M, R); // a += a Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt); // return VSELECT(r, shift(r, 1), a); M = DAG.getNode(Opc, dl, VT, R, DAG.getConstant(1, dl, VT)); R = SignBitSelect(VT, Amt, M, R); return R; } if (Opc == ISD::SRA) { // For SRA we need to unpack each byte to the higher byte of a i16 vector // so we can correctly sign extend. We don't care what happens to the // lower byte. SDValue ALo = getUnpackl(DAG, dl, VT, DAG.getUNDEF(VT), Amt); SDValue AHi = getUnpackh(DAG, dl, VT, DAG.getUNDEF(VT), Amt); SDValue RLo = getUnpackl(DAG, dl, VT, DAG.getUNDEF(VT), R); SDValue RHi = getUnpackh(DAG, dl, VT, DAG.getUNDEF(VT), R); ALo = DAG.getBitcast(ExtVT, ALo); AHi = DAG.getBitcast(ExtVT, AHi); RLo = DAG.getBitcast(ExtVT, RLo); RHi = DAG.getBitcast(ExtVT, RHi); // r = VSELECT(r, shift(r, 4), a); SDValue MLo = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RLo, 4, DAG); SDValue MHi = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RHi, 4, DAG); RLo = SignBitSelect(ExtVT, ALo, MLo, RLo); RHi = SignBitSelect(ExtVT, AHi, MHi, RHi); // a += a ALo = DAG.getNode(ISD::ADD, dl, ExtVT, ALo, ALo); AHi = DAG.getNode(ISD::ADD, dl, ExtVT, AHi, AHi); // r = VSELECT(r, shift(r, 2), a); MLo = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RLo, 2, DAG); MHi = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RHi, 2, DAG); RLo = SignBitSelect(ExtVT, ALo, MLo, RLo); RHi = SignBitSelect(ExtVT, AHi, MHi, RHi); // a += a ALo = DAG.getNode(ISD::ADD, dl, ExtVT, ALo, ALo); AHi = DAG.getNode(ISD::ADD, dl, ExtVT, AHi, AHi); // r = VSELECT(r, shift(r, 1), a); MLo = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RLo, 1, DAG); MHi = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RHi, 1, DAG); RLo = SignBitSelect(ExtVT, ALo, MLo, RLo); RHi = SignBitSelect(ExtVT, AHi, MHi, RHi); // Logical shift the result back to the lower byte, leaving a zero upper // byte meaning that we can safely pack with PACKUSWB. RLo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, RLo, 8, DAG); RHi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, RHi, 8, DAG); return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi); } } if (Subtarget.hasInt256() && !Subtarget.hasXOP() && VT == MVT::v16i16) { MVT ExtVT = MVT::v8i32; SDValue Z = DAG.getConstant(0, dl, VT); SDValue ALo = getUnpackl(DAG, dl, VT, Amt, Z); SDValue AHi = getUnpackh(DAG, dl, VT, Amt, Z); SDValue RLo = getUnpackl(DAG, dl, VT, Z, R); SDValue RHi = getUnpackh(DAG, dl, VT, Z, R); ALo = DAG.getBitcast(ExtVT, ALo); AHi = DAG.getBitcast(ExtVT, AHi); RLo = DAG.getBitcast(ExtVT, RLo); RHi = DAG.getBitcast(ExtVT, RHi); SDValue Lo = DAG.getNode(Opc, dl, ExtVT, RLo, ALo); SDValue Hi = DAG.getNode(Opc, dl, ExtVT, RHi, AHi); Lo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, Lo, 16, DAG); Hi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, Hi, 16, DAG); return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi); } if (VT == MVT::v8i16) { // If we have a constant shift amount, the non-SSE41 path is best as // avoiding bitcasts make it easier to constant fold and reduce to PBLENDW. bool UseSSE41 = Subtarget.hasSSE41() && !ISD::isBuildVectorOfConstantSDNodes(Amt.getNode()); auto SignBitSelect = [&](SDValue Sel, SDValue V0, SDValue V1) { // On SSE41 targets we make use of the fact that VSELECT lowers // to PBLENDVB which selects bytes based just on the sign bit. if (UseSSE41) { MVT ExtVT = MVT::getVectorVT(MVT::i8, VT.getVectorNumElements() * 2); V0 = DAG.getBitcast(ExtVT, V0); V1 = DAG.getBitcast(ExtVT, V1); Sel = DAG.getBitcast(ExtVT, Sel); return DAG.getBitcast(VT, DAG.getSelect(dl, ExtVT, Sel, V0, V1)); } // On pre-SSE41 targets we splat the sign bit - a negative value will // set all bits of the lanes to true and VSELECT uses that in // its OR(AND(V0,C),AND(V1,~C)) lowering. SDValue C = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, Sel, 15, DAG); return DAG.getSelect(dl, VT, C, V0, V1); }; // Turn 'a' into a mask suitable for VSELECT: a = a << 12; if (UseSSE41) { // On SSE41 targets we need to replicate the shift mask in both // bytes for PBLENDVB. Amt = DAG.getNode( ISD::OR, dl, VT, getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Amt, 4, DAG), getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Amt, 12, DAG)); } else { Amt = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Amt, 12, DAG); } // r = VSELECT(r, shift(r, 8), a); SDValue M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 8, DAG); R = SignBitSelect(Amt, M, R); // a += a Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt); // r = VSELECT(r, shift(r, 4), a); M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 4, DAG); R = SignBitSelect(Amt, M, R); // a += a Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt); // r = VSELECT(r, shift(r, 2), a); M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 2, DAG); R = SignBitSelect(Amt, M, R); // a += a Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt); // return VSELECT(r, shift(r, 1), a); M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 1, DAG); R = SignBitSelect(Amt, M, R); return R; } // Decompose 256-bit shifts into 128-bit shifts. if (VT.is256BitVector()) return split256IntArith(Op, DAG); return SDValue(); } static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); assert(VT.isVector() && "Custom lowering only for vector rotates!"); SDLoc DL(Op); SDValue R = Op.getOperand(0); SDValue Amt = Op.getOperand(1); unsigned Opcode = Op.getOpcode(); unsigned EltSizeInBits = VT.getScalarSizeInBits(); int NumElts = VT.getVectorNumElements(); // Check for constant splat rotation amount. APInt UndefElts; SmallVector EltBits; int CstSplatIndex = -1; if (getTargetConstantBitsFromNode(Amt, EltSizeInBits, UndefElts, EltBits)) for (int i = 0; i != NumElts; ++i) if (!UndefElts[i]) { if (CstSplatIndex < 0 || EltBits[i] == EltBits[CstSplatIndex]) { CstSplatIndex = i; continue; } CstSplatIndex = -1; break; } // AVX512 implicitly uses modulo rotation amounts. if (Subtarget.hasAVX512() && 32 <= EltSizeInBits) { // Attempt to rotate by immediate. if (0 <= CstSplatIndex) { unsigned Op = (Opcode == ISD::ROTL ? X86ISD::VROTLI : X86ISD::VROTRI); uint64_t RotateAmt = EltBits[CstSplatIndex].urem(EltSizeInBits); return DAG.getNode(Op, DL, VT, R, DAG.getConstant(RotateAmt, DL, MVT::i8)); } // Else, fall-back on VPROLV/VPRORV. return Op; } assert((Opcode == ISD::ROTL) && "Only ROTL supported"); // XOP has 128-bit vector variable + immediate rotates. // +ve/-ve Amt = rotate left/right - just need to handle ISD::ROTL. // XOP implicitly uses modulo rotation amounts. if (Subtarget.hasXOP()) { if (VT.is256BitVector()) return split256IntArith(Op, DAG); assert(VT.is128BitVector() && "Only rotate 128-bit vectors!"); // Attempt to rotate by immediate. if (0 <= CstSplatIndex) { uint64_t RotateAmt = EltBits[CstSplatIndex].urem(EltSizeInBits); return DAG.getNode(X86ISD::VROTLI, DL, VT, R, DAG.getConstant(RotateAmt, DL, MVT::i8)); } // Use general rotate by variable (per-element). return Op; } // Split 256-bit integers on pre-AVX2 targets. if (VT.is256BitVector() && !Subtarget.hasAVX2()) return split256IntArith(Op, DAG); assert((VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8 || ((VT == MVT::v8i32 || VT == MVT::v16i16 || VT == MVT::v32i8) && Subtarget.hasAVX2())) && "Only vXi32/vXi16/vXi8 vector rotates supported"); // Rotate by an uniform constant - expand back to shifts. if (0 <= CstSplatIndex) return SDValue(); bool IsSplatAmt = DAG.isSplatValue(Amt); // v16i8/v32i8: Split rotation into rot4/rot2/rot1 stages and select by // the amount bit. if (EltSizeInBits == 8 && !IsSplatAmt) { if (ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) return SDValue(); // We don't need ModuloAmt here as we just peek at individual bits. MVT ExtVT = MVT::getVectorVT(MVT::i16, NumElts / 2); auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) { if (Subtarget.hasSSE41()) { // On SSE41 targets we make use of the fact that VSELECT lowers // to PBLENDVB which selects bytes based just on the sign bit. V0 = DAG.getBitcast(VT, V0); V1 = DAG.getBitcast(VT, V1); Sel = DAG.getBitcast(VT, Sel); return DAG.getBitcast(SelVT, DAG.getSelect(DL, VT, Sel, V0, V1)); } // On pre-SSE41 targets we test for the sign bit by comparing to // zero - a negative value will set all bits of the lanes to true // and VSELECT uses that in its OR(AND(V0,C),AND(V1,~C)) lowering. SDValue Z = DAG.getConstant(0, DL, SelVT); SDValue C = DAG.getNode(X86ISD::PCMPGT, DL, SelVT, Z, Sel); return DAG.getSelect(DL, SelVT, C, V0, V1); }; // Turn 'a' into a mask suitable for VSELECT: a = a << 5; // We can safely do this using i16 shifts as we're only interested in // the 3 lower bits of each byte. Amt = DAG.getBitcast(ExtVT, Amt); Amt = DAG.getNode(ISD::SHL, DL, ExtVT, Amt, DAG.getConstant(5, DL, ExtVT)); Amt = DAG.getBitcast(VT, Amt); // r = VSELECT(r, rot(r, 4), a); SDValue M; M = DAG.getNode( ISD::OR, DL, VT, DAG.getNode(ISD::SHL, DL, VT, R, DAG.getConstant(4, DL, VT)), DAG.getNode(ISD::SRL, DL, VT, R, DAG.getConstant(4, DL, VT))); R = SignBitSelect(VT, Amt, M, R); // a += a Amt = DAG.getNode(ISD::ADD, DL, VT, Amt, Amt); // r = VSELECT(r, rot(r, 2), a); M = DAG.getNode( ISD::OR, DL, VT, DAG.getNode(ISD::SHL, DL, VT, R, DAG.getConstant(2, DL, VT)), DAG.getNode(ISD::SRL, DL, VT, R, DAG.getConstant(6, DL, VT))); R = SignBitSelect(VT, Amt, M, R); // a += a Amt = DAG.getNode(ISD::ADD, DL, VT, Amt, Amt); // return VSELECT(r, rot(r, 1), a); M = DAG.getNode( ISD::OR, DL, VT, DAG.getNode(ISD::SHL, DL, VT, R, DAG.getConstant(1, DL, VT)), DAG.getNode(ISD::SRL, DL, VT, R, DAG.getConstant(7, DL, VT))); return SignBitSelect(VT, Amt, M, R); } // ISD::ROT* uses modulo rotate amounts. Amt = DAG.getNode(ISD::AND, DL, VT, Amt, DAG.getConstant(EltSizeInBits - 1, DL, VT)); bool ConstantAmt = ISD::isBuildVectorOfConstantSDNodes(Amt.getNode()); bool LegalVarShifts = SupportedVectorVarShift(VT, Subtarget, ISD::SHL) && SupportedVectorVarShift(VT, Subtarget, ISD::SRL); // Fallback for splats + all supported variable shifts. // Fallback for non-constants AVX2 vXi16 as well. if (IsSplatAmt || LegalVarShifts || (Subtarget.hasAVX2() && !ConstantAmt)) { SDValue AmtR = DAG.getConstant(EltSizeInBits, DL, VT); AmtR = DAG.getNode(ISD::SUB, DL, VT, AmtR, Amt); SDValue SHL = DAG.getNode(ISD::SHL, DL, VT, R, Amt); SDValue SRL = DAG.getNode(ISD::SRL, DL, VT, R, AmtR); return DAG.getNode(ISD::OR, DL, VT, SHL, SRL); } // As with shifts, convert the rotation amount to a multiplication factor. SDValue Scale = convertShiftLeftToScale(Amt, DL, Subtarget, DAG); assert(Scale && "Failed to convert ROTL amount to scale"); // v8i16/v16i16: perform unsigned multiply hi/lo and OR the results. if (EltSizeInBits == 16) { SDValue Lo = DAG.getNode(ISD::MUL, DL, VT, R, Scale); SDValue Hi = DAG.getNode(ISD::MULHU, DL, VT, R, Scale); return DAG.getNode(ISD::OR, DL, VT, Lo, Hi); } // v4i32: make use of the PMULUDQ instruction to multiply 2 lanes of v4i32 // to v2i64 results at a time. The upper 32-bits contain the wrapped bits // that can then be OR'd with the lower 32-bits. assert(VT == MVT::v4i32 && "Only v4i32 vector rotate expected"); static const int OddMask[] = {1, -1, 3, -1}; SDValue R13 = DAG.getVectorShuffle(VT, DL, R, R, OddMask); SDValue Scale13 = DAG.getVectorShuffle(VT, DL, Scale, Scale, OddMask); SDValue Res02 = DAG.getNode(X86ISD::PMULUDQ, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, R), DAG.getBitcast(MVT::v2i64, Scale)); SDValue Res13 = DAG.getNode(X86ISD::PMULUDQ, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, R13), DAG.getBitcast(MVT::v2i64, Scale13)); Res02 = DAG.getBitcast(VT, Res02); Res13 = DAG.getBitcast(VT, Res13); return DAG.getNode(ISD::OR, DL, VT, DAG.getVectorShuffle(VT, DL, Res02, Res13, {0, 4, 2, 6}), DAG.getVectorShuffle(VT, DL, Res02, Res13, {1, 5, 3, 7})); } /// Returns true if the operand type is exactly twice the native width, and /// the corresponding cmpxchg8b or cmpxchg16b instruction is available. /// Used to know whether to use cmpxchg8/16b when expanding atomic operations /// (otherwise we leave them alone to become __sync_fetch_and_... calls). bool X86TargetLowering::needsCmpXchgNb(Type *MemType) const { unsigned OpWidth = MemType->getPrimitiveSizeInBits(); if (OpWidth == 64) return !Subtarget.is64Bit(); // FIXME this should be Subtarget.hasCmpxchg8b else if (OpWidth == 128) return Subtarget.hasCmpxchg16b(); else return false; } bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { return needsCmpXchgNb(SI->getValueOperand()->getType()); } // Note: this turns large loads into lock cmpxchg8b/16b. // FIXME: On 32 bits x86, fild/movq might be faster than lock cmpxchg8b. TargetLowering::AtomicExpansionKind X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { auto PTy = cast(LI->getPointerOperandType()); return needsCmpXchgNb(PTy->getElementType()) ? AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None; } TargetLowering::AtomicExpansionKind X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32; Type *MemType = AI->getType(); // If the operand is too big, we must see if cmpxchg8/16b is available // and default to library calls otherwise. if (MemType->getPrimitiveSizeInBits() > NativeWidth) { return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None; } AtomicRMWInst::BinOp Op = AI->getOperation(); switch (Op) { default: llvm_unreachable("Unknown atomic operation"); case AtomicRMWInst::Xchg: case AtomicRMWInst::Add: case AtomicRMWInst::Sub: // It's better to use xadd, xsub or xchg for these in all cases. return AtomicExpansionKind::None; case AtomicRMWInst::Or: case AtomicRMWInst::And: case AtomicRMWInst::Xor: // If the atomicrmw's result isn't actually used, we can just add a "lock" // prefix to a normal instruction for these operations. return !AI->use_empty() ? AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None; case AtomicRMWInst::Nand: case AtomicRMWInst::Max: case AtomicRMWInst::Min: case AtomicRMWInst::UMax: case AtomicRMWInst::UMin: // These always require a non-trivial set of data operations on x86. We must // use a cmpxchg loop. return AtomicExpansionKind::CmpXChg; } } LoadInst * X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const { unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32; Type *MemType = AI->getType(); // Accesses larger than the native width are turned into cmpxchg/libcalls, so // there is no benefit in turning such RMWs into loads, and it is actually // harmful as it introduces a mfence. if (MemType->getPrimitiveSizeInBits() > NativeWidth) return nullptr; auto Builder = IRBuilder<>(AI); Module *M = Builder.GetInsertBlock()->getParent()->getParent(); auto SSID = AI->getSyncScopeID(); // We must restrict the ordering to avoid generating loads with Release or // ReleaseAcquire orderings. auto Order = AtomicCmpXchgInst::getStrongestFailureOrdering(AI->getOrdering()); auto Ptr = AI->getPointerOperand(); // Before the load we need a fence. Here is an example lifted from // http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf showing why a fence // is required: // Thread 0: // x.store(1, relaxed); // r1 = y.fetch_add(0, release); // Thread 1: // y.fetch_add(42, acquire); // r2 = x.load(relaxed); // r1 = r2 = 0 is impossible, but becomes possible if the idempotent rmw is // lowered to just a load without a fence. A mfence flushes the store buffer, // making the optimization clearly correct. // FIXME: it is required if isReleaseOrStronger(Order) but it is not clear // otherwise, we might be able to be more aggressive on relaxed idempotent // rmw. In practice, they do not look useful, so we don't try to be // especially clever. if (SSID == SyncScope::SingleThread) // FIXME: we could just insert an X86ISD::MEMBARRIER here, except we are at // the IR level, so we must wrap it in an intrinsic. return nullptr; if (!Subtarget.hasMFence()) // FIXME: it might make sense to use a locked operation here but on a // different cache-line to prevent cache-line bouncing. In practice it // is probably a small win, and x86 processors without mfence are rare // enough that we do not bother. return nullptr; Function *MFence = llvm::Intrinsic::getDeclaration(M, Intrinsic::x86_sse2_mfence); Builder.CreateCall(MFence, {}); // Finally we can emit the atomic load. LoadInst *Loaded = Builder.CreateAlignedLoad(Ptr, AI->getType()->getPrimitiveSizeInBits()); Loaded->setAtomic(Order, SSID); AI->replaceAllUsesWith(Loaded); AI->eraseFromParent(); return Loaded; } static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDLoc dl(Op); AtomicOrdering FenceOrdering = static_cast( cast(Op.getOperand(1))->getZExtValue()); SyncScope::ID FenceSSID = static_cast( cast(Op.getOperand(2))->getZExtValue()); // The only fence that needs an instruction is a sequentially-consistent // cross-thread fence. if (FenceOrdering == AtomicOrdering::SequentiallyConsistent && FenceSSID == SyncScope::System) { if (Subtarget.hasMFence()) return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0)); SDValue Chain = Op.getOperand(0); SDValue Zero = DAG.getTargetConstant(0, dl, MVT::i32); SDValue Ops[] = { DAG.getRegister(X86::ESP, MVT::i32), // Base DAG.getTargetConstant(1, dl, MVT::i8), // Scale DAG.getRegister(0, MVT::i32), // Index DAG.getTargetConstant(0, dl, MVT::i32), // Disp DAG.getRegister(0, MVT::i32), // Segment. Zero, Chain }; SDNode *Res = DAG.getMachineNode(X86::OR32mi8Locked, dl, MVT::Other, Ops); return SDValue(Res, 0); } // MEMBARRIER is a compiler barrier; it codegens to a no-op. return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); } static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT T = Op.getSimpleValueType(); SDLoc DL(Op); unsigned Reg = 0; unsigned size = 0; switch(T.SimpleTy) { default: llvm_unreachable("Invalid value type!"); case MVT::i8: Reg = X86::AL; size = 1; break; case MVT::i16: Reg = X86::AX; size = 2; break; case MVT::i32: Reg = X86::EAX; size = 4; break; case MVT::i64: assert(Subtarget.is64Bit() && "Node not type legal!"); Reg = X86::RAX; size = 8; break; } SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg, Op.getOperand(2), SDValue()); SDValue Ops[] = { cpIn.getValue(0), Op.getOperand(1), Op.getOperand(3), DAG.getTargetConstant(size, DL, MVT::i8), cpIn.getValue(1) }; SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); MachineMemOperand *MMO = cast(Op)->getMemOperand(); SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys, Ops, T, MMO); SDValue cpOut = DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1)); SDValue EFLAGS = DAG.getCopyFromReg(cpOut.getValue(1), DL, X86::EFLAGS, MVT::i32, cpOut.getValue(2)); SDValue Success = getSETCC(X86::COND_E, EFLAGS, DL, DAG); DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), cpOut); DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success); DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), EFLAGS.getValue(1)); return SDValue(); } // Create MOVMSKB, taking into account whether we need to split for AVX1. static SDValue getPMOVMSKB(const SDLoc &DL, SDValue V, SelectionDAG &DAG, const X86Subtarget &Subtarget) { MVT InVT = V.getSimpleValueType(); if (InVT == MVT::v32i8 && !Subtarget.hasInt256()) { SDValue Lo, Hi; std::tie(Lo, Hi) = DAG.SplitVector(V, DL); Lo = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Lo); Hi = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Hi); Hi = DAG.getNode(ISD::SHL, DL, MVT::i32, Hi, DAG.getConstant(16, DL, MVT::i8)); return DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi); } return DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V); } static SDValue LowerBITCAST(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDValue Src = Op.getOperand(0); MVT SrcVT = Src.getSimpleValueType(); MVT DstVT = Op.getSimpleValueType(); // Legalize (v64i1 (bitcast i64 (X))) by splitting the i64, bitcasting each // half to v32i1 and concatenating the result. if (SrcVT == MVT::i64 && DstVT == MVT::v64i1) { assert(!Subtarget.is64Bit() && "Expected 32-bit mode"); assert(Subtarget.hasBWI() && "Expected BWI target"); SDLoc dl(Op); SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Src, DAG.getIntPtrConstant(0, dl)); Lo = DAG.getBitcast(MVT::v32i1, Lo); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Src, DAG.getIntPtrConstant(1, dl)); Hi = DAG.getBitcast(MVT::v32i1, Hi); return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lo, Hi); } // Custom splitting for BWI types when AVX512F is available but BWI isn't. if ((SrcVT == MVT::v32i16 || SrcVT == MVT::v64i8) && DstVT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(DstVT)) { SDLoc dl(Op); SDValue Lo, Hi; std::tie(Lo, Hi) = DAG.SplitVector(Op.getOperand(0), dl); EVT CastVT = MVT::getVectorVT(DstVT.getVectorElementType(), DstVT.getVectorNumElements() / 2); Lo = DAG.getBitcast(CastVT, Lo); Hi = DAG.getBitcast(CastVT, Hi); return DAG.getNode(ISD::CONCAT_VECTORS, dl, DstVT, Lo, Hi); } // Use MOVMSK for vector to scalar conversion to prevent scalarization. if ((SrcVT == MVT::v16i1 || SrcVT == MVT::v32i1) && DstVT.isScalarInteger()) { assert(!Subtarget.hasAVX512() && "Should use K-registers with AVX512"); MVT SExtVT = SrcVT == MVT::v16i1 ? MVT::v16i8 : MVT::v32i8; SDLoc DL(Op); SDValue V = DAG.getSExtOrTrunc(Src, DL, SExtVT); V = getPMOVMSKB(DL, V, DAG, Subtarget); return DAG.getZExtOrTrunc(V, DL, DstVT); } if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 || SrcVT == MVT::i64) { assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); if (DstVT != MVT::f64 && DstVT != MVT::i64 && !(DstVT == MVT::x86mmx && SrcVT.isVector())) // This conversion needs to be expanded. return SDValue(); SDLoc dl(Op); if (SrcVT.isVector()) { // Widen the vector in input in the case of MVT::v2i32. // Example: from MVT::v2i32 to MVT::v4i32. MVT NewVT = MVT::getVectorVT(SrcVT.getVectorElementType(), SrcVT.getVectorNumElements() * 2); Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewVT, Src, DAG.getUNDEF(SrcVT)); } else { assert(SrcVT == MVT::i64 && !Subtarget.is64Bit() && "Unexpected source type in LowerBITCAST"); Src = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Src); } MVT V2X64VT = DstVT == MVT::f64 ? MVT::v2f64 : MVT::v2i64; Src = DAG.getNode(ISD::BITCAST, dl, V2X64VT, Src); if (DstVT == MVT::x86mmx) return DAG.getNode(X86ISD::MOVDQ2Q, dl, DstVT, Src); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, DstVT, Src, DAG.getIntPtrConstant(0, dl)); } assert(Subtarget.is64Bit() && !Subtarget.hasSSE2() && Subtarget.hasMMX() && "Unexpected custom BITCAST"); assert((DstVT == MVT::i64 || (DstVT.isVector() && DstVT.getSizeInBits()==64)) && "Unexpected custom BITCAST"); // i64 <=> MMX conversions are Legal. if (SrcVT==MVT::i64 && DstVT.isVector()) return Op; if (DstVT==MVT::i64 && SrcVT.isVector()) return Op; // MMX <=> MMX conversions are Legal. if (SrcVT.isVector() && DstVT.isVector()) return Op; // All other conversions need to be expanded. return SDValue(); } /// Compute the horizontal sum of bytes in V for the elements of VT. /// /// Requires V to be a byte vector and VT to be an integer vector type with /// wider elements than V's type. The width of the elements of VT determines /// how many bytes of V are summed horizontally to produce each element of the /// result. static SDValue LowerHorizontalByteSum(SDValue V, MVT VT, const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDLoc DL(V); MVT ByteVecVT = V.getSimpleValueType(); MVT EltVT = VT.getVectorElementType(); assert(ByteVecVT.getVectorElementType() == MVT::i8 && "Expected value to have byte element type."); assert(EltVT != MVT::i8 && "Horizontal byte sum only makes sense for wider elements!"); unsigned VecSize = VT.getSizeInBits(); assert(ByteVecVT.getSizeInBits() == VecSize && "Cannot change vector size!"); // PSADBW instruction horizontally add all bytes and leave the result in i64 // chunks, thus directly computes the pop count for v2i64 and v4i64. if (EltVT == MVT::i64) { SDValue Zeros = DAG.getConstant(0, DL, ByteVecVT); MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64); V = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, V, Zeros); return DAG.getBitcast(VT, V); } if (EltVT == MVT::i32) { // We unpack the low half and high half into i32s interleaved with zeros so // that we can use PSADBW to horizontally sum them. The most useful part of // this is that it lines up the results of two PSADBW instructions to be // two v2i64 vectors which concatenated are the 4 population counts. We can // then use PACKUSWB to shrink and concatenate them into a v4i32 again. SDValue Zeros = DAG.getConstant(0, DL, VT); SDValue V32 = DAG.getBitcast(VT, V); SDValue Low = getUnpackl(DAG, DL, VT, V32, Zeros); SDValue High = getUnpackh(DAG, DL, VT, V32, Zeros); // Do the horizontal sums into two v2i64s. Zeros = DAG.getConstant(0, DL, ByteVecVT); MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64); Low = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, DAG.getBitcast(ByteVecVT, Low), Zeros); High = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, DAG.getBitcast(ByteVecVT, High), Zeros); // Merge them together. MVT ShortVecVT = MVT::getVectorVT(MVT::i16, VecSize / 16); V = DAG.getNode(X86ISD::PACKUS, DL, ByteVecVT, DAG.getBitcast(ShortVecVT, Low), DAG.getBitcast(ShortVecVT, High)); return DAG.getBitcast(VT, V); } // The only element type left is i16. assert(EltVT == MVT::i16 && "Unknown how to handle type"); // To obtain pop count for each i16 element starting from the pop count for // i8 elements, shift the i16s left by 8, sum as i8s, and then shift as i16s // right by 8. It is important to shift as i16s as i8 vector shift isn't // directly supported. SDValue ShifterV = DAG.getConstant(8, DL, VT); SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, DAG.getBitcast(VT, V), ShifterV); V = DAG.getNode(ISD::ADD, DL, ByteVecVT, DAG.getBitcast(ByteVecVT, Shl), DAG.getBitcast(ByteVecVT, V)); return DAG.getNode(ISD::SRL, DL, VT, DAG.getBitcast(VT, V), ShifterV); } static SDValue LowerVectorCTPOPInRegLUT(SDValue Op, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); MVT EltVT = VT.getVectorElementType(); int NumElts = VT.getVectorNumElements(); (void)EltVT; assert(EltVT == MVT::i8 && "Only vXi8 vector CTPOP lowering supported."); // Implement a lookup table in register by using an algorithm based on: // http://wm.ite.pl/articles/sse-popcount.html // // The general idea is that every lower byte nibble in the input vector is an // index into a in-register pre-computed pop count table. We then split up the // input vector in two new ones: (1) a vector with only the shifted-right // higher nibbles for each byte and (2) a vector with the lower nibbles (and // masked out higher ones) for each byte. PSHUFB is used separately with both // to index the in-register table. Next, both are added and the result is a // i8 vector where each element contains the pop count for input byte. const int LUT[16] = {/* 0 */ 0, /* 1 */ 1, /* 2 */ 1, /* 3 */ 2, /* 4 */ 1, /* 5 */ 2, /* 6 */ 2, /* 7 */ 3, /* 8 */ 1, /* 9 */ 2, /* a */ 2, /* b */ 3, /* c */ 2, /* d */ 3, /* e */ 3, /* f */ 4}; SmallVector LUTVec; for (int i = 0; i < NumElts; ++i) LUTVec.push_back(DAG.getConstant(LUT[i % 16], DL, MVT::i8)); SDValue InRegLUT = DAG.getBuildVector(VT, DL, LUTVec); SDValue M0F = DAG.getConstant(0x0F, DL, VT); // High nibbles SDValue FourV = DAG.getConstant(4, DL, VT); SDValue HiNibbles = DAG.getNode(ISD::SRL, DL, VT, Op, FourV); // Low nibbles SDValue LoNibbles = DAG.getNode(ISD::AND, DL, VT, Op, M0F); // The input vector is used as the shuffle mask that index elements into the // LUT. After counting low and high nibbles, add the vector to obtain the // final pop count per i8 element. SDValue HiPopCnt = DAG.getNode(X86ISD::PSHUFB, DL, VT, InRegLUT, HiNibbles); SDValue LoPopCnt = DAG.getNode(X86ISD::PSHUFB, DL, VT, InRegLUT, LoNibbles); return DAG.getNode(ISD::ADD, DL, VT, HiPopCnt, LoPopCnt); } // Please ensure that any codegen change from LowerVectorCTPOP is reflected in // updated cost models in X86TTIImpl::getIntrinsicInstrCost. static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); assert((VT.is512BitVector() || VT.is256BitVector() || VT.is128BitVector()) && "Unknown CTPOP type to handle"); SDLoc DL(Op.getNode()); SDValue Op0 = Op.getOperand(0); // TRUNC(CTPOP(ZEXT(X))) to make use of vXi32/vXi64 VPOPCNT instructions. if (Subtarget.hasVPOPCNTDQ()) { unsigned NumElems = VT.getVectorNumElements(); assert((VT.getVectorElementType() == MVT::i8 || VT.getVectorElementType() == MVT::i16) && "Unexpected type"); if (NumElems < 16 || (NumElems == 16 && Subtarget.canExtendTo512DQ())) { MVT NewVT = MVT::getVectorVT(MVT::i32, NumElems); Op = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, Op0); Op = DAG.getNode(ISD::CTPOP, DL, NewVT, Op); return DAG.getNode(ISD::TRUNCATE, DL, VT, Op); } } // Decompose 256-bit ops into smaller 128-bit ops. if (VT.is256BitVector() && !Subtarget.hasInt256()) return Lower256IntUnary(Op, DAG); // Decompose 512-bit ops into smaller 256-bit ops. if (VT.is512BitVector() && !Subtarget.hasBWI()) return Lower512IntUnary(Op, DAG); // For element types greater than i8, do vXi8 pop counts and a bytesum. if (VT.getScalarType() != MVT::i8) { MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8); SDValue ByteOp = DAG.getBitcast(ByteVT, Op0); SDValue PopCnt8 = DAG.getNode(ISD::CTPOP, DL, ByteVT, ByteOp); return LowerHorizontalByteSum(PopCnt8, VT, Subtarget, DAG); } // We can't use the fast LUT approach, so fall back on LegalizeDAG. if (!Subtarget.hasSSSE3()) return SDValue(); return LowerVectorCTPOPInRegLUT(Op0, DL, Subtarget, DAG); } static SDValue LowerCTPOP(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(Op.getSimpleValueType().isVector() && "We only do custom lowering for vector population count."); return LowerVectorCTPOP(Op, Subtarget, DAG); } static SDValue LowerBITREVERSE_XOP(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); SDValue In = Op.getOperand(0); SDLoc DL(Op); // For scalars, its still beneficial to transfer to/from the SIMD unit to // perform the BITREVERSE. if (!VT.isVector()) { MVT VecVT = MVT::getVectorVT(VT, 128 / VT.getSizeInBits()); SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, In); Res = DAG.getNode(ISD::BITREVERSE, DL, VecVT, Res); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Res, DAG.getIntPtrConstant(0, DL)); } int NumElts = VT.getVectorNumElements(); int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8; // Decompose 256-bit ops into smaller 128-bit ops. if (VT.is256BitVector()) return Lower256IntUnary(Op, DAG); assert(VT.is128BitVector() && "Only 128-bit vector bitreverse lowering supported."); // VPPERM reverses the bits of a byte with the permute Op (2 << 5), and we // perform the BSWAP in the shuffle. // Its best to shuffle using the second operand as this will implicitly allow // memory folding for multiple vectors. SmallVector MaskElts; for (int i = 0; i != NumElts; ++i) { for (int j = ScalarSizeInBytes - 1; j >= 0; --j) { int SourceByte = 16 + (i * ScalarSizeInBytes) + j; int PermuteByte = SourceByte | (2 << 5); MaskElts.push_back(DAG.getConstant(PermuteByte, DL, MVT::i8)); } } SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, MaskElts); SDValue Res = DAG.getBitcast(MVT::v16i8, In); Res = DAG.getNode(X86ISD::VPPERM, DL, MVT::v16i8, DAG.getUNDEF(MVT::v16i8), Res, Mask); return DAG.getBitcast(VT, Res); } static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); if (Subtarget.hasXOP() && !VT.is512BitVector()) return LowerBITREVERSE_XOP(Op, DAG); assert(Subtarget.hasSSSE3() && "SSSE3 required for BITREVERSE"); SDValue In = Op.getOperand(0); SDLoc DL(Op); unsigned NumElts = VT.getVectorNumElements(); assert(VT.getScalarType() == MVT::i8 && "Only byte vector BITREVERSE supported"); // Decompose 256-bit ops into smaller 128-bit ops on pre-AVX2. if (VT.is256BitVector() && !Subtarget.hasInt256()) return Lower256IntUnary(Op, DAG); // Perform BITREVERSE using PSHUFB lookups. Each byte is split into // two nibbles and a PSHUFB lookup to find the bitreverse of each // 0-15 value (moved to the other nibble). SDValue NibbleMask = DAG.getConstant(0xF, DL, VT); SDValue Lo = DAG.getNode(ISD::AND, DL, VT, In, NibbleMask); SDValue Hi = DAG.getNode(ISD::SRL, DL, VT, In, DAG.getConstant(4, DL, VT)); const int LoLUT[16] = { /* 0 */ 0x00, /* 1 */ 0x80, /* 2 */ 0x40, /* 3 */ 0xC0, /* 4 */ 0x20, /* 5 */ 0xA0, /* 6 */ 0x60, /* 7 */ 0xE0, /* 8 */ 0x10, /* 9 */ 0x90, /* a */ 0x50, /* b */ 0xD0, /* c */ 0x30, /* d */ 0xB0, /* e */ 0x70, /* f */ 0xF0}; const int HiLUT[16] = { /* 0 */ 0x00, /* 1 */ 0x08, /* 2 */ 0x04, /* 3 */ 0x0C, /* 4 */ 0x02, /* 5 */ 0x0A, /* 6 */ 0x06, /* 7 */ 0x0E, /* 8 */ 0x01, /* 9 */ 0x09, /* a */ 0x05, /* b */ 0x0D, /* c */ 0x03, /* d */ 0x0B, /* e */ 0x07, /* f */ 0x0F}; SmallVector LoMaskElts, HiMaskElts; for (unsigned i = 0; i < NumElts; ++i) { LoMaskElts.push_back(DAG.getConstant(LoLUT[i % 16], DL, MVT::i8)); HiMaskElts.push_back(DAG.getConstant(HiLUT[i % 16], DL, MVT::i8)); } SDValue LoMask = DAG.getBuildVector(VT, DL, LoMaskElts); SDValue HiMask = DAG.getBuildVector(VT, DL, HiMaskElts); Lo = DAG.getNode(X86ISD::PSHUFB, DL, VT, LoMask, Lo); Hi = DAG.getNode(X86ISD::PSHUFB, DL, VT, HiMask, Hi); return DAG.getNode(ISD::OR, DL, VT, Lo, Hi); } static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { unsigned NewOpc = 0; switch (N->getOpcode()) { case ISD::ATOMIC_LOAD_ADD: NewOpc = X86ISD::LADD; break; case ISD::ATOMIC_LOAD_SUB: NewOpc = X86ISD::LSUB; break; case ISD::ATOMIC_LOAD_OR: NewOpc = X86ISD::LOR; break; case ISD::ATOMIC_LOAD_XOR: NewOpc = X86ISD::LXOR; break; case ISD::ATOMIC_LOAD_AND: NewOpc = X86ISD::LAND; break; default: llvm_unreachable("Unknown ATOMIC_LOAD_ opcode"); } MachineMemOperand *MMO = cast(N)->getMemOperand(); return DAG.getMemIntrinsicNode( NewOpc, SDLoc(N), DAG.getVTList(MVT::i32, MVT::Other), {N->getOperand(0), N->getOperand(1), N->getOperand(2)}, /*MemVT=*/N->getSimpleValueType(0), MMO); } /// Lower atomic_load_ops into LOCK-prefixed operations. static SDValue lowerAtomicArith(SDValue N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { SDValue Chain = N->getOperand(0); SDValue LHS = N->getOperand(1); SDValue RHS = N->getOperand(2); unsigned Opc = N->getOpcode(); MVT VT = N->getSimpleValueType(0); SDLoc DL(N); // We can lower atomic_load_add into LXADD. However, any other atomicrmw op // can only be lowered when the result is unused. They should have already // been transformed into a cmpxchg loop in AtomicExpand. if (N->hasAnyUseOfValue(0)) { // Handle (atomic_load_sub p, v) as (atomic_load_add p, -v), to be able to // select LXADD if LOCK_SUB can't be selected. if (Opc == ISD::ATOMIC_LOAD_SUB) { AtomicSDNode *AN = cast(N.getNode()); RHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), RHS); return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, VT, Chain, LHS, RHS, AN->getMemOperand()); } assert(Opc == ISD::ATOMIC_LOAD_ADD && "Used AtomicRMW ops other than Add should have been expanded!"); return N; } SDValue LockOp = lowerAtomicArithWithLOCK(N, DAG, Subtarget); // RAUW the chain, but don't worry about the result, as it's unused. assert(!N->hasAnyUseOfValue(0)); DAG.ReplaceAllUsesOfValueWith(N.getValue(1), LockOp.getValue(1)); return SDValue(); } static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) { SDNode *Node = Op.getNode(); SDLoc dl(Node); EVT VT = cast(Node)->getMemoryVT(); // Convert seq_cst store -> xchg // Convert wide store -> swap (-> cmpxchg8b/cmpxchg16b) // FIXME: On 32-bit, store -> fist or movq would be more efficient // (The only way to get a 16-byte store is cmpxchg16b) // FIXME: 16-byte ATOMIC_SWAP isn't actually hooked up at the moment. if (cast(Node)->getOrdering() == AtomicOrdering::SequentiallyConsistent || !DAG.getTargetLoweringInfo().isTypeLegal(VT)) { SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl, cast(Node)->getMemoryVT(), Node->getOperand(0), Node->getOperand(1), Node->getOperand(2), cast(Node)->getMemOperand()); return Swap.getValue(1); } // Other atomic stores have a simple pattern. return Op; } static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) { SDNode *N = Op.getNode(); MVT VT = N->getSimpleValueType(0); // Let legalize expand this if it isn't a legal type yet. if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) return SDValue(); SDVTList VTs = DAG.getVTList(VT, MVT::i32); SDLoc DL(N); // Set the carry flag. SDValue Carry = Op.getOperand(2); EVT CarryVT = Carry.getValueType(); APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits()); Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32), Carry, DAG.getConstant(NegOne, DL, CarryVT)); unsigned Opc = Op.getOpcode() == ISD::ADDCARRY ? X86ISD::ADC : X86ISD::SBB; SDValue Sum = DAG.getNode(Opc, DL, VTs, Op.getOperand(0), Op.getOperand(1), Carry.getValue(1)); SDValue SetCC = getSETCC(X86::COND_B, Sum.getValue(1), DL, DAG); if (N->getValueType(1) == MVT::i1) SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC); return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC); } static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(Subtarget.isTargetDarwin() && Subtarget.is64Bit()); // For MacOSX, we want to call an alternative entry point: __sincos_stret, // which returns the values as { float, float } (in XMM0) or // { double, double } (which is returned in XMM0, XMM1). SDLoc dl(Op); SDValue Arg = Op.getOperand(0); EVT ArgVT = Arg.getValueType(); Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Arg; Entry.Ty = ArgTy; Entry.IsSExt = false; Entry.IsZExt = false; Args.push_back(Entry); bool isF64 = ArgVT == MVT::f64; // Only optimize x86_64 for now. i386 is a bit messy. For f32, // the small struct {f32, f32} is returned in (eax, edx). For f64, // the results are returned via SRet in memory. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); RTLIB::Libcall LC = isF64 ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32; const char *LibcallName = TLI.getLibcallName(LC); SDValue Callee = DAG.getExternalSymbol(LibcallName, TLI.getPointerTy(DAG.getDataLayout())); Type *RetTy = isF64 ? (Type *)StructType::get(ArgTy, ArgTy) : (Type *)VectorType::get(ArgTy, 4); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl) .setChain(DAG.getEntryNode()) .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args)); std::pair CallResult = TLI.LowerCallTo(CLI); if (isF64) // Returned in xmm0 and xmm1. return CallResult.first; // Returned in bits 0:31 and 32:64 xmm0. SDValue SinVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT, CallResult.first, DAG.getIntPtrConstant(0, dl)); SDValue CosVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT, CallResult.first, DAG.getIntPtrConstant(1, dl)); SDVTList Tys = DAG.getVTList(ArgVT, ArgVT); return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, SinVal, CosVal); } /// Widen a vector input to a vector of NVT. The /// input vector must have the same element type as NVT. static SDValue ExtendToType(SDValue InOp, MVT NVT, SelectionDAG &DAG, bool FillWithZeroes = false) { // Check if InOp already has the right width. MVT InVT = InOp.getSimpleValueType(); if (InVT == NVT) return InOp; if (InOp.isUndef()) return DAG.getUNDEF(NVT); assert(InVT.getVectorElementType() == NVT.getVectorElementType() && "input and widen element type must match"); unsigned InNumElts = InVT.getVectorNumElements(); unsigned WidenNumElts = NVT.getVectorNumElements(); assert(WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0 && "Unexpected request for vector widening"); SDLoc dl(InOp); if (InOp.getOpcode() == ISD::CONCAT_VECTORS && InOp.getNumOperands() == 2) { SDValue N1 = InOp.getOperand(1); if ((ISD::isBuildVectorAllZeros(N1.getNode()) && FillWithZeroes) || N1.isUndef()) { InOp = InOp.getOperand(0); InVT = InOp.getSimpleValueType(); InNumElts = InVT.getVectorNumElements(); } } if (ISD::isBuildVectorOfConstantSDNodes(InOp.getNode()) || ISD::isBuildVectorOfConstantFPSDNodes(InOp.getNode())) { SmallVector Ops; for (unsigned i = 0; i < InNumElts; ++i) Ops.push_back(InOp.getOperand(i)); EVT EltVT = InOp.getOperand(0).getValueType(); SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) : DAG.getUNDEF(EltVT); for (unsigned i = 0; i < WidenNumElts - InNumElts; ++i) Ops.push_back(FillVal); return DAG.getBuildVector(NVT, dl, Ops); } SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, NVT) : DAG.getUNDEF(NVT); return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NVT, FillVal, InOp, DAG.getIntPtrConstant(0, dl)); } static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(Subtarget.hasAVX512() && "MGATHER/MSCATTER are supported on AVX-512 arch only"); MaskedScatterSDNode *N = cast(Op.getNode()); SDValue Src = N->getValue(); MVT VT = Src.getSimpleValueType(); assert(VT.getScalarSizeInBits() >= 32 && "Unsupported scatter op"); SDLoc dl(Op); SDValue Scale = N->getScale(); SDValue Index = N->getIndex(); SDValue Mask = N->getMask(); SDValue Chain = N->getChain(); SDValue BasePtr = N->getBasePtr(); if (VT == MVT::v2f32) { assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type"); // If the index is v2i64 and we have VLX we can use xmm for data and index. if (Index.getValueType() == MVT::v2i64 && Subtarget.hasVLX()) { Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, DAG.getUNDEF(MVT::v2f32)); SDVTList VTs = DAG.getVTList(MVT::v2i1, MVT::Other); SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale}; SDValue NewScatter = DAG.getTargetMemSDNode( VTs, Ops, dl, N->getMemoryVT(), N->getMemOperand()); DAG.ReplaceAllUsesWith(Op, SDValue(NewScatter.getNode(), 1)); return SDValue(NewScatter.getNode(), 1); } return SDValue(); } if (VT == MVT::v2i32) { assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type"); Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src, DAG.getUNDEF(MVT::v2i32)); // If the index is v2i64 and we have VLX we can use xmm for data and index. if (Index.getValueType() == MVT::v2i64 && Subtarget.hasVLX()) { SDVTList VTs = DAG.getVTList(MVT::v2i1, MVT::Other); SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale}; SDValue NewScatter = DAG.getTargetMemSDNode( VTs, Ops, dl, N->getMemoryVT(), N->getMemOperand()); DAG.ReplaceAllUsesWith(Op, SDValue(NewScatter.getNode(), 1)); return SDValue(NewScatter.getNode(), 1); } // Custom widen all the operands to avoid promotion. EVT NewIndexVT = EVT::getVectorVT( *DAG.getContext(), Index.getValueType().getVectorElementType(), 4); Index = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewIndexVT, Index, DAG.getUNDEF(Index.getValueType())); Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask, DAG.getConstant(0, dl, MVT::v2i1)); SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale}; return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), N->getMemoryVT(), dl, Ops, N->getMemOperand()); } MVT IndexVT = Index.getSimpleValueType(); MVT MaskVT = Mask.getSimpleValueType(); // If the index is v2i32, we're being called by type legalization and we // should just let the default handling take care of it. if (IndexVT == MVT::v2i32) return SDValue(); // If we don't have VLX and neither the passthru or index is 512-bits, we // need to widen until one is. if (!Subtarget.hasVLX() && !VT.is512BitVector() && !Index.getSimpleValueType().is512BitVector()) { // Determine how much we need to widen by to get a 512-bit type. unsigned Factor = std::min(512/VT.getSizeInBits(), 512/IndexVT.getSizeInBits()); unsigned NumElts = VT.getVectorNumElements() * Factor; VT = MVT::getVectorVT(VT.getVectorElementType(), NumElts); IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), NumElts); MaskVT = MVT::getVectorVT(MVT::i1, NumElts); Src = ExtendToType(Src, VT, DAG); Index = ExtendToType(Index, IndexVT, DAG); Mask = ExtendToType(Mask, MaskVT, DAG, true); } SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other); SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale}; SDValue NewScatter = DAG.getTargetMemSDNode( VTs, Ops, dl, N->getMemoryVT(), N->getMemOperand()); DAG.ReplaceAllUsesWith(Op, SDValue(NewScatter.getNode(), 1)); return SDValue(NewScatter.getNode(), 1); } static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MaskedLoadSDNode *N = cast(Op.getNode()); MVT VT = Op.getSimpleValueType(); MVT ScalarVT = VT.getScalarType(); SDValue Mask = N->getMask(); SDLoc dl(Op); assert((!N->isExpandingLoad() || Subtarget.hasAVX512()) && "Expanding masked load is supported on AVX-512 target only!"); assert((!N->isExpandingLoad() || ScalarVT.getSizeInBits() >= 32) && "Expanding masked load is supported for 32 and 64-bit types only!"); assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() && "Cannot lower masked load op."); assert((ScalarVT.getSizeInBits() >= 32 || (Subtarget.hasBWI() && (ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) && "Unsupported masked load op."); // This operation is legal for targets with VLX, but without // VLX the vector should be widened to 512 bit unsigned NumEltsInWideVec = 512 / VT.getScalarSizeInBits(); MVT WideDataVT = MVT::getVectorVT(ScalarVT, NumEltsInWideVec); SDValue PassThru = ExtendToType(N->getPassThru(), WideDataVT, DAG); // Mask element has to be i1. assert(Mask.getSimpleValueType().getScalarType() == MVT::i1 && "Unexpected mask type"); MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec); Mask = ExtendToType(Mask, WideMaskVT, DAG, true); SDValue NewLoad = DAG.getMaskedLoad(WideDataVT, dl, N->getChain(), N->getBasePtr(), Mask, PassThru, N->getMemoryVT(), N->getMemOperand(), N->getExtensionType(), N->isExpandingLoad()); SDValue Exract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, NewLoad.getValue(0), DAG.getIntPtrConstant(0, dl)); SDValue RetOps[] = {Exract, NewLoad.getValue(1)}; return DAG.getMergeValues(RetOps, dl); } static SDValue LowerMSTORE(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MaskedStoreSDNode *N = cast(Op.getNode()); SDValue DataToStore = N->getValue(); MVT VT = DataToStore.getSimpleValueType(); MVT ScalarVT = VT.getScalarType(); SDValue Mask = N->getMask(); SDLoc dl(Op); assert((!N->isCompressingStore() || Subtarget.hasAVX512()) && "Expanding masked load is supported on AVX-512 target only!"); assert((!N->isCompressingStore() || ScalarVT.getSizeInBits() >= 32) && "Expanding masked load is supported for 32 and 64-bit types only!"); assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() && "Cannot lower masked store op."); assert((ScalarVT.getSizeInBits() >= 32 || (Subtarget.hasBWI() && (ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) && "Unsupported masked store op."); // This operation is legal for targets with VLX, but without // VLX the vector should be widened to 512 bit unsigned NumEltsInWideVec = 512/VT.getScalarSizeInBits(); MVT WideDataVT = MVT::getVectorVT(ScalarVT, NumEltsInWideVec); // Mask element has to be i1. assert(Mask.getSimpleValueType().getScalarType() == MVT::i1 && "Unexpected mask type"); MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec); DataToStore = ExtendToType(DataToStore, WideDataVT, DAG); Mask = ExtendToType(Mask, WideMaskVT, DAG, true); return DAG.getMaskedStore(N->getChain(), dl, DataToStore, N->getBasePtr(), Mask, N->getMemoryVT(), N->getMemOperand(), N->isTruncatingStore(), N->isCompressingStore()); } static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(Subtarget.hasAVX2() && "MGATHER/MSCATTER are supported on AVX-512/AVX-2 arch only"); MaskedGatherSDNode *N = cast(Op.getNode()); SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); SDValue Index = N->getIndex(); SDValue Mask = N->getMask(); SDValue PassThru = N->getPassThru(); MVT IndexVT = Index.getSimpleValueType(); MVT MaskVT = Mask.getSimpleValueType(); assert(VT.getScalarSizeInBits() >= 32 && "Unsupported gather op"); // If the index is v2i32, we're being called by type legalization. if (IndexVT == MVT::v2i32) return SDValue(); // If we don't have VLX and neither the passthru or index is 512-bits, we // need to widen until one is. MVT OrigVT = VT; if (Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() && !IndexVT.is512BitVector()) { // Determine how much we need to widen by to get a 512-bit type. unsigned Factor = std::min(512/VT.getSizeInBits(), 512/IndexVT.getSizeInBits()); unsigned NumElts = VT.getVectorNumElements() * Factor; VT = MVT::getVectorVT(VT.getVectorElementType(), NumElts); IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), NumElts); MaskVT = MVT::getVectorVT(MVT::i1, NumElts); PassThru = ExtendToType(PassThru, VT, DAG); Index = ExtendToType(Index, IndexVT, DAG); Mask = ExtendToType(Mask, MaskVT, DAG, true); } SDValue Ops[] = { N->getChain(), PassThru, Mask, N->getBasePtr(), Index, N->getScale() }; SDValue NewGather = DAG.getTargetMemSDNode( DAG.getVTList(VT, MaskVT, MVT::Other), Ops, dl, N->getMemoryVT(), N->getMemOperand()); SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OrigVT, NewGather, DAG.getIntPtrConstant(0, dl)); return DAG.getMergeValues({Extract, NewGather.getValue(2)}, dl); } SDValue X86TargetLowering::LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const { // TODO: Eventually, the lowering of these nodes should be informed by or // deferred to the GC strategy for the function in which they appear. For // now, however, they must be lowered to something. Since they are logically // no-ops in the case of a null GC strategy (or a GC strategy which does not // require special handling for these nodes), lower them as literal NOOPs for // the time being. SmallVector Ops; Ops.push_back(Op.getOperand(0)); if (Op->getGluedNode()) Ops.push_back(Op->getOperand(Op->getNumOperands() - 1)); SDLoc OpDL(Op); SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue NOOP(DAG.getMachineNode(X86::NOOP, SDLoc(Op), VTs, Ops), 0); return NOOP; } SDValue X86TargetLowering::LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const { // TODO: Eventually, the lowering of these nodes should be informed by or // deferred to the GC strategy for the function in which they appear. For // now, however, they must be lowered to something. Since they are logically // no-ops in the case of a null GC strategy (or a GC strategy which does not // require special handling for these nodes), lower them as literal NOOPs for // the time being. SmallVector Ops; Ops.push_back(Op.getOperand(0)); if (Op->getGluedNode()) Ops.push_back(Op->getOperand(Op->getNumOperands() - 1)); SDLoc OpDL(Op); SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue NOOP(DAG.getMachineNode(X86::NOOP, SDLoc(Op), VTs, Ops), 0); return NOOP; } /// Provide custom lowering hooks for some operations. SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Should not custom lower this!"); case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, Subtarget, DAG); case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: return LowerCMP_SWAP(Op, Subtarget, DAG); case ISD::CTPOP: return LowerCTPOP(Op, Subtarget, DAG); case ISD::ATOMIC_LOAD_ADD: case ISD::ATOMIC_LOAD_SUB: case ISD::ATOMIC_LOAD_OR: case ISD::ATOMIC_LOAD_XOR: case ISD::ATOMIC_LOAD_AND: return lowerAtomicArith(Op, DAG, Subtarget); case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op, DAG); case ISD::BITREVERSE: return LowerBITREVERSE(Op, Subtarget, DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, Subtarget, DAG); case ISD::VECTOR_SHUFFLE: return lowerVectorShuffle(Op, Subtarget, DAG); case ISD::VSELECT: return LowerVSELECT(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, Subtarget,DAG); case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op,Subtarget,DAG); case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, Subtarget,DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::SHL_PARTS: case ISD::SRA_PARTS: case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG); case ISD::FSHL: case ISD::FSHR: return LowerFunnelShift(Op, Subtarget, DAG); case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, Subtarget, DAG); case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, Subtarget, DAG); case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, Subtarget, DAG); case ISD::ZERO_EXTEND_VECTOR_INREG: case ISD::SIGN_EXTEND_VECTOR_INREG: return LowerEXTEND_VECTOR_INREG(Op, Subtarget, DAG); case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); case ISD::LOAD: return LowerLoad(Op, Subtarget, DAG); case ISD::STORE: return LowerStore(Op, Subtarget, DAG); case ISD::FADD: case ISD::FSUB: return lowerFaddFsub(Op, DAG, Subtarget); case ISD::FABS: case ISD::FNEG: return LowerFABSorFNEG(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::VAARG: return LowerVAARG(Op, DAG); case ISD::VACOPY: return LowerVACOPY(Op, Subtarget, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, Subtarget, DAG); case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::ADDROFRETURNADDR: return LowerADDROFRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::FRAME_TO_ARGS_OFFSET: return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); case ISD::EH_SJLJ_SETUP_DISPATCH: return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG); case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ(Op, Subtarget, DAG); case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op, Subtarget, DAG); case ISD::MUL: return LowerMUL(Op, Subtarget, DAG); case ISD::MULHS: case ISD::MULHU: return LowerMULH(Op, Subtarget, DAG); case ISD::ROTL: case ISD::ROTR: return LowerRotate(Op, Subtarget, DAG); case ISD::SRA: case ISD::SRL: case ISD::SHL: return LowerShift(Op, Subtarget, DAG); case ISD::SADDO: case ISD::UADDO: case ISD::SSUBO: case ISD::USUBO: case ISD::SMULO: case ISD::UMULO: return LowerXALUO(Op, DAG); case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, Subtarget,DAG); case ISD::BITCAST: return LowerBITCAST(Op, Subtarget, DAG); case ISD::ADDCARRY: case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG); case ISD::ADD: case ISD::SUB: return lowerAddSub(Op, DAG, Subtarget); case ISD::UADDSAT: case ISD::SADDSAT: case ISD::USUBSAT: case ISD::SSUBSAT: return LowerADDSAT_SUBSAT(Op, DAG); case ISD::SMAX: case ISD::SMIN: case ISD::UMAX: case ISD::UMIN: return LowerMINMAX(Op, DAG); case ISD::ABS: return LowerABS(Op, Subtarget, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG); case ISD::MLOAD: return LowerMLOAD(Op, Subtarget, DAG); case ISD::MSTORE: return LowerMSTORE(Op, Subtarget, DAG); case ISD::MGATHER: return LowerMGATHER(Op, Subtarget, DAG); case ISD::MSCATTER: return LowerMSCATTER(Op, Subtarget, DAG); case ISD::GC_TRANSITION_START: return LowerGC_TRANSITION_START(Op, DAG); case ISD::GC_TRANSITION_END: return LowerGC_TRANSITION_END(Op, DAG); } } /// Places new result values for the node in Results (their number /// and types must exactly match those of the original return values of /// the node), or leaves Results empty, which indicates that the node is not /// to be custom lowered after all. void X86TargetLowering::LowerOperationWrapper(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { SDValue Res = LowerOperation(SDValue(N, 0), DAG); if (!Res.getNode()) return; assert((N->getNumValues() <= Res->getNumValues()) && "Lowering returned the wrong number of results!"); // Places new result values base on N result number. // In some cases (LowerSINT_TO_FP for example) Res has more result values // than original node, chain should be dropped(last value). for (unsigned I = 0, E = N->getNumValues(); I != E; ++I) Results.push_back(Res.getValue(I)); } /// Replace a node with an illegal result type with a new node built out of /// custom code. void X86TargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, SelectionDAG &DAG) const { SDLoc dl(N); switch (N->getOpcode()) { default: llvm_unreachable("Do not know how to custom type legalize this operation!"); case ISD::MUL: { EVT VT = N->getValueType(0); assert(VT.isVector() && "Unexpected VT"); if (getTypeAction(*DAG.getContext(), VT) == TypePromoteInteger && VT.getVectorNumElements() == 2) { // Promote to a pattern that will be turned into PMULUDQ. SDValue N0 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v2i64, N->getOperand(0)); SDValue N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v2i64, N->getOperand(1)); SDValue Mul = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, N0, N1); Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, VT, Mul)); } else if (getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && VT.getVectorElementType() == MVT::i8) { // Pre-promote these to vXi16 to avoid op legalization thinking all 16 // elements are needed. MVT MulVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements()); SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(0)); SDValue Op1 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(1)); SDValue Res = DAG.getNode(ISD::MUL, dl, MulVT, Op0, Op1); Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); unsigned NumConcats = 16 / VT.getVectorNumElements(); SmallVector ConcatOps(NumConcats, DAG.getUNDEF(VT)); ConcatOps[0] = Res; Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i8, ConcatOps); Results.push_back(Res); } return; } case ISD::UADDSAT: case ISD::SADDSAT: case ISD::USUBSAT: case ISD::SSUBSAT: case X86ISD::VPMADDWD: case X86ISD::AVG: { // Legalize types for ISD::UADDSAT/SADDSAT/USUBSAT/SSUBSAT and // X86ISD::AVG/VPMADDWD by widening. assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); EVT VT = N->getValueType(0); EVT InVT = N->getOperand(0).getValueType(); assert(VT.getSizeInBits() < 128 && 128 % VT.getSizeInBits() == 0 && "Expected a VT that divides into 128 bits."); unsigned NumConcat = 128 / InVT.getSizeInBits(); EVT InWideVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), NumConcat * InVT.getVectorNumElements()); EVT WideVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), NumConcat * VT.getVectorNumElements()); SmallVector Ops(NumConcat, DAG.getUNDEF(InVT)); Ops[0] = N->getOperand(0); SDValue InVec0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWideVT, Ops); Ops[0] = N->getOperand(1); SDValue InVec1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWideVT, Ops); SDValue Res = DAG.getNode(N->getOpcode(), dl, WideVT, InVec0, InVec1); if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res, DAG.getIntPtrConstant(0, dl)); Results.push_back(Res); return; } case ISD::SETCC: { // Widen v2i32 (setcc v2f32). This is really needed for AVX512VL when // setCC result type is v2i1 because type legalzation will end up with // a v4i1 setcc plus an extend. assert(N->getValueType(0) == MVT::v2i32 && "Unexpected type"); if (N->getOperand(0).getValueType() != MVT::v2f32 || getTypeAction(*DAG.getContext(), MVT::v2i32) == TypeWidenVector) return; SDValue UNDEF = DAG.getUNDEF(MVT::v2f32); SDValue LHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, N->getOperand(0), UNDEF); SDValue RHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, N->getOperand(1), UNDEF); SDValue Res = DAG.getNode(ISD::SETCC, dl, MVT::v4i32, LHS, RHS, N->getOperand(2)); Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, DAG.getIntPtrConstant(0, dl)); Results.push_back(Res); return; } // We might have generated v2f32 FMIN/FMAX operations. Widen them to v4f32. case X86ISD::FMINC: case X86ISD::FMIN: case X86ISD::FMAXC: case X86ISD::FMAX: { EVT VT = N->getValueType(0); assert(VT == MVT::v2f32 && "Unexpected type (!= v2f32) on FMIN/FMAX."); SDValue UNDEF = DAG.getUNDEF(VT); SDValue LHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, N->getOperand(0), UNDEF); SDValue RHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, N->getOperand(1), UNDEF); Results.push_back(DAG.getNode(N->getOpcode(), dl, MVT::v4f32, LHS, RHS)); return; } case ISD::SDIV: case ISD::UDIV: case ISD::SREM: case ISD::UREM: { EVT VT = N->getValueType(0); if (getTypeAction(*DAG.getContext(), VT) == TypeWidenVector) { // If this RHS is a constant splat vector we can widen this and let // division/remainder by constant optimize it. // TODO: Can we do something for non-splat? APInt SplatVal; if (ISD::isConstantSplatVector(N->getOperand(1).getNode(), SplatVal)) { unsigned NumConcats = 128 / VT.getSizeInBits(); SmallVector Ops0(NumConcats, DAG.getUNDEF(VT)); Ops0[0] = N->getOperand(0); EVT ResVT = getTypeToTransformTo(*DAG.getContext(), VT); SDValue N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Ops0); SDValue N1 = DAG.getConstant(SplatVal, dl, ResVT); SDValue Res = DAG.getNode(N->getOpcode(), dl, ResVT, N0, N1); Results.push_back(Res); } return; } if (VT == MVT::v2i32) { // Legalize v2i32 div/rem by unrolling. Otherwise we promote to the // v2i64 and unroll later. But then we create i64 scalar ops which // might be slow in 64-bit mode or require a libcall in 32-bit mode. Results.push_back(DAG.UnrollVectorOp(N)); return; } if (VT.isVector()) return; LLVM_FALLTHROUGH; } case ISD::SDIVREM: case ISD::UDIVREM: { SDValue V = LowerWin64_i128OP(SDValue(N,0), DAG); Results.push_back(V); return; } case ISD::TRUNCATE: { MVT VT = N->getSimpleValueType(0); if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) return; // The generic legalizer will try to widen the input type to the same // number of elements as the widened result type. But this isn't always // the best thing so do some custom legalization to avoid some cases. MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT(); SDValue In = N->getOperand(0); EVT InVT = In.getValueType(); unsigned InBits = InVT.getSizeInBits(); if (128 % InBits == 0) { // 128 bit and smaller inputs should avoid truncate all together and // just use a build_vector that will become a shuffle. // TODO: Widen and use a shuffle directly? MVT InEltVT = InVT.getSimpleVT().getVectorElementType(); EVT EltVT = VT.getVectorElementType(); unsigned WidenNumElts = WidenVT.getVectorNumElements(); SmallVector Ops(WidenNumElts, DAG.getUNDEF(EltVT)); // Use the original element count so we don't do more scalar opts than // necessary. unsigned MinElts = VT.getVectorNumElements(); for (unsigned i=0; i < MinElts; ++i) { SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, In, DAG.getIntPtrConstant(i, dl)); Ops[i] = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Val); } Results.push_back(DAG.getBuildVector(WidenVT, dl, Ops)); return; } // With AVX512 there are some cases that can use a target specific // truncate node to go from 256/512 to less than 128 with zeros in the // upper elements of the 128 bit result. if (Subtarget.hasAVX512() && isTypeLegal(InVT)) { // We can use VTRUNC directly if for 256 bits with VLX or for any 512. if ((InBits == 256 && Subtarget.hasVLX()) || InBits == 512) { Results.push_back(DAG.getNode(X86ISD::VTRUNC, dl, WidenVT, In)); return; } // There's one case we can widen to 512 bits and use VTRUNC. if (InVT == MVT::v4i64 && VT == MVT::v4i8 && isTypeLegal(MVT::v8i64)) { In = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i64, In, DAG.getUNDEF(MVT::v4i64)); Results.push_back(DAG.getNode(X86ISD::VTRUNC, dl, WidenVT, In)); return; } } return; } case ISD::SIGN_EXTEND_VECTOR_INREG: { if (ExperimentalVectorWideningLegalization) return; EVT VT = N->getValueType(0); SDValue In = N->getOperand(0); EVT InVT = In.getValueType(); if (!Subtarget.hasSSE41() && VT == MVT::v4i64 && (InVT == MVT::v16i16 || InVT == MVT::v32i8)) { // Custom split this so we can extend i8/i16->i32 invec. This is better // since sign_extend_inreg i8/i16->i64 requires an extend to i32 using // sra. Then extending from i32 to i64 using pcmpgt. By custom splitting // we allow the sra from the extend to i32 to be shared by the split. EVT ExtractVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), InVT.getVectorNumElements() / 2); MVT ExtendVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements()); In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ExtractVT, In, DAG.getIntPtrConstant(0, dl)); In = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, MVT::v4i32, In); // Fill a vector with sign bits for each element. SDValue Zero = DAG.getConstant(0, dl, ExtendVT); SDValue SignBits = DAG.getSetCC(dl, ExtendVT, Zero, In, ISD::SETGT); EVT LoVT, HiVT; std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); // Create an unpackl and unpackh to interleave the sign bits then bitcast // to vXi64. SDValue Lo = getUnpackl(DAG, dl, ExtendVT, In, SignBits); Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); SDValue Hi = getUnpackh(DAG, dl, ExtendVT, In, SignBits); Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); Results.push_back(Res); return; } return; } case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: { if (!ExperimentalVectorWideningLegalization) return; EVT VT = N->getValueType(0); SDValue In = N->getOperand(0); EVT InVT = In.getValueType(); if (!Subtarget.hasSSE41() && VT == MVT::v4i64 && (InVT == MVT::v4i16 || InVT == MVT::v4i8)) { // Custom split this so we can extend i8/i16->i32 invec. This is better // since sign_extend_inreg i8/i16->i64 requires an extend to i32 using // sra. Then extending from i32 to i64 using pcmpgt. By custom splitting // we allow the sra from the extend to i32 to be shared by the split. In = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, In); // Fill a vector with sign bits for each element. SDValue Zero = DAG.getConstant(0, dl, MVT::v4i32); SDValue SignBits = DAG.getSetCC(dl, MVT::v4i32, Zero, In, ISD::SETGT); // Create an unpackl and unpackh to interleave the sign bits then bitcast // to v2i64. SDValue Lo = DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits, {0, 4, 1, 5}); Lo = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Lo); SDValue Hi = DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits, {2, 6, 3, 7}); Hi = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Hi); SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); Results.push_back(Res); return; } if ((VT == MVT::v16i32 || VT == MVT::v8i64) && InVT.is128BitVector()) { // Perform custom splitting instead of the two stage extend we would get // by default. EVT LoVT, HiVT; std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); assert(isTypeLegal(LoVT) && "Split VT not legal?"); bool IsSigned = N->getOpcode() == ISD::SIGN_EXTEND; SDValue Lo = getExtendInVec(IsSigned, dl, LoVT, In, DAG); // We need to shift the input over by half the number of elements. unsigned NumElts = InVT.getVectorNumElements(); unsigned HalfNumElts = NumElts / 2; SmallVector ShufMask(NumElts, SM_SentinelUndef); for (unsigned i = 0; i != HalfNumElts; ++i) ShufMask[i] = i + HalfNumElts; SDValue Hi = DAG.getVectorShuffle(InVT, dl, In, In, ShufMask); Hi = getExtendInVec(IsSigned, dl, HiVT, Hi, DAG); SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); Results.push_back(Res); } return; } case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: { bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT; EVT VT = N->getValueType(0); SDValue Src = N->getOperand(0); EVT SrcVT = Src.getValueType(); // Promote these manually to avoid over promotion to v2i64. Type // legalization will revisit the v2i32 operation for more cleanup. if ((VT == MVT::v2i8 || VT == MVT::v2i16) && getTypeAction(*DAG.getContext(), VT) == TypePromoteInteger) { // AVX512DQ provides instructions that produce a v2i64 result. if (Subtarget.hasDQI()) return; SDValue Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v2i32, Src); Res = DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext : ISD::AssertSext, dl, MVT::v2i32, Res, DAG.getValueType(VT.getVectorElementType())); Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); Results.push_back(Res); return; } if (VT.isVector() && VT.getScalarSizeInBits() < 32) { if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) return; // Try to create a 128 bit vector, but don't exceed a 32 bit element. unsigned NewEltWidth = std::min(128 / VT.getVectorNumElements(), 32U); MVT PromoteVT = MVT::getVectorVT(MVT::getIntegerVT(NewEltWidth), VT.getVectorNumElements()); SDValue Res = DAG.getNode(ISD::FP_TO_SINT, dl, PromoteVT, Src); // Preserve what we know about the size of the original result. Except // when the result is v2i32 since we can't widen the assert. if (PromoteVT != MVT::v2i32) Res = DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext : ISD::AssertSext, dl, PromoteVT, Res, DAG.getValueType(VT.getVectorElementType())); // Truncate back to the original width. Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); // Now widen to 128 bits. unsigned NumConcats = 128 / VT.getSizeInBits(); MVT ConcatVT = MVT::getVectorVT(VT.getSimpleVT().getVectorElementType(), VT.getVectorNumElements() * NumConcats); SmallVector ConcatOps(NumConcats, DAG.getUNDEF(VT)); ConcatOps[0] = Res; Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatVT, ConcatOps); Results.push_back(Res); return; } if (VT == MVT::v2i32) { assert((IsSigned || Subtarget.hasAVX512()) && "Can only handle signed conversion without AVX512"); assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); bool Widenv2i32 = getTypeAction(*DAG.getContext(), MVT::v2i32) == TypeWidenVector; if (Src.getValueType() == MVT::v2f64) { unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; if (!IsSigned && !Subtarget.hasVLX()) { // If v2i32 is widened, we can defer to the generic legalizer. if (Widenv2i32) return; // Custom widen by doubling to a legal vector with. Isel will // further widen to v8f64. Opc = ISD::FP_TO_UINT; Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f64, Src, DAG.getUNDEF(MVT::v2f64)); } SDValue Res = DAG.getNode(Opc, dl, MVT::v4i32, Src); if (!Widenv2i32) Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, DAG.getIntPtrConstant(0, dl)); Results.push_back(Res); return; } if (SrcVT == MVT::v2f32 && getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) { SDValue Idx = DAG.getIntPtrConstant(0, dl); SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, DAG.getUNDEF(MVT::v2f32)); Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, MVT::v4i32, Res); Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, Idx); Results.push_back(Res); return; } // The FP_TO_INTHelper below only handles f32/f64/f80 scalar inputs, // so early out here. return; } if (Subtarget.hasDQI() && VT == MVT::i64 && (SrcVT == MVT::f32 || SrcVT == MVT::f64)) { assert(!Subtarget.is64Bit() && "i64 should be legal"); unsigned NumElts = Subtarget.hasVLX() ? 4 : 8; // Using a 256-bit input here to guarantee 128-bit input for f32 case. // TODO: Use 128-bit vectors for f64 case? // TODO: Use 128-bit vectors for f32 by using CVTTP2SI/CVTTP2UI. MVT VecVT = MVT::getVectorVT(MVT::i64, NumElts); MVT VecInVT = MVT::getVectorVT(SrcVT.getSimpleVT(), NumElts); SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl); SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecInVT, DAG.getConstantFP(0.0, dl, VecInVT), Src, ZeroIdx); Res = DAG.getNode(N->getOpcode(), SDLoc(N), VecVT, Res); Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Res, ZeroIdx); Results.push_back(Res); return; } std::pair Vals = FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, /*IsReplace=*/ true); SDValue FIST = Vals.first, StackSlot = Vals.second; if (FIST.getNode()) { // Return a load from the stack slot. if (StackSlot.getNode()) Results.push_back( DAG.getLoad(VT, dl, FIST, StackSlot, MachinePointerInfo())); else Results.push_back(FIST); } return; } case ISD::SINT_TO_FP: { assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL!"); SDValue Src = N->getOperand(0); if (N->getValueType(0) != MVT::v2f32 || Src.getValueType() != MVT::v2i64) return; Results.push_back(DAG.getNode(X86ISD::CVTSI2P, dl, MVT::v4f32, Src)); return; } case ISD::UINT_TO_FP: { assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); EVT VT = N->getValueType(0); if (VT != MVT::v2f32) return; SDValue Src = N->getOperand(0); EVT SrcVT = Src.getValueType(); if (Subtarget.hasDQI() && Subtarget.hasVLX() && SrcVT == MVT::v2i64) { Results.push_back(DAG.getNode(X86ISD::CVTUI2P, dl, MVT::v4f32, Src)); return; } if (SrcVT != MVT::v2i32) return; SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, Src); SDValue VBias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl, MVT::v2f64); SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, ZExtIn, DAG.getBitcast(MVT::v2i64, VBias)); Or = DAG.getBitcast(MVT::v2f64, Or); // TODO: Are there any fast-math-flags to propagate here? SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, Or, VBias); Results.push_back(DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Sub)); return; } case ISD::FP_ROUND: { if (!isTypeLegal(N->getOperand(0).getValueType())) return; SDValue V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0)); Results.push_back(V); return; } case ISD::FP_EXTEND: { // Right now, only MVT::v2f32 has OperationAction for FP_EXTEND. // No other ValueType for FP_EXTEND should reach this point. assert(N->getValueType(0) == MVT::v2f32 && "Do not know how to legalize this Node"); return; } case ISD::INTRINSIC_W_CHAIN: { unsigned IntNo = cast(N->getOperand(1))->getZExtValue(); switch (IntNo) { default : llvm_unreachable("Do not know how to custom type " "legalize this intrinsic operation!"); case Intrinsic::x86_rdtsc: return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget, Results); case Intrinsic::x86_rdtscp: return getReadTimeStampCounter(N, dl, X86ISD::RDTSCP_DAG, DAG, Subtarget, Results); case Intrinsic::x86_rdpmc: return getReadPerformanceCounter(N, dl, DAG, Subtarget, Results); case Intrinsic::x86_xgetbv: return getExtendedControlRegister(N, dl, DAG, Subtarget, Results); } } case ISD::INTRINSIC_WO_CHAIN: { if (SDValue V = LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG)) Results.push_back(V); return; } case ISD::READCYCLECOUNTER: { return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget, Results); } case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { EVT T = N->getValueType(0); assert((T == MVT::i64 || T == MVT::i128) && "can only expand cmpxchg pair"); bool Regs64bit = T == MVT::i128; MVT HalfT = Regs64bit ? MVT::i64 : MVT::i32; SDValue cpInL, cpInH; cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2), DAG.getConstant(0, dl, HalfT)); cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2), DAG.getConstant(1, dl, HalfT)); cpInL = DAG.getCopyToReg(N->getOperand(0), dl, Regs64bit ? X86::RAX : X86::EAX, cpInL, SDValue()); cpInH = DAG.getCopyToReg(cpInL.getValue(0), dl, Regs64bit ? X86::RDX : X86::EDX, cpInH, cpInL.getValue(1)); SDValue swapInL, swapInH; swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3), DAG.getConstant(0, dl, HalfT)); swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3), DAG.getConstant(1, dl, HalfT)); swapInH = DAG.getCopyToReg(cpInH.getValue(0), dl, Regs64bit ? X86::RCX : X86::ECX, swapInH, cpInH.getValue(1)); // If the current function needs the base pointer, RBX, // we shouldn't use cmpxchg directly. // Indeed the lowering of that instruction will clobber // that register and since RBX will be a reserved register // the register allocator will not make sure its value will // be properly saved and restored around this live-range. const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); SDValue Result; SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); unsigned BasePtr = TRI->getBaseRegister(); MachineMemOperand *MMO = cast(N)->getMemOperand(); if (TRI->hasBasePointer(DAG.getMachineFunction()) && (BasePtr == X86::RBX || BasePtr == X86::EBX)) { // ISel prefers the LCMPXCHG64 variant. // If that assert breaks, that means it is not the case anymore, // and we need to teach LCMPXCHG8_SAVE_EBX_DAG how to save RBX, // not just EBX. This is a matter of accepting i64 input for that // pseudo, and restoring into the register of the right wide // in expand pseudo. Everything else should just work. assert(((Regs64bit == (BasePtr == X86::RBX)) || BasePtr == X86::EBX) && "Saving only half of the RBX"); unsigned Opcode = Regs64bit ? X86ISD::LCMPXCHG16_SAVE_RBX_DAG : X86ISD::LCMPXCHG8_SAVE_EBX_DAG; SDValue RBXSave = DAG.getCopyFromReg(swapInH.getValue(0), dl, Regs64bit ? X86::RBX : X86::EBX, HalfT, swapInH.getValue(1)); SDValue Ops[] = {/*Chain*/ RBXSave.getValue(1), N->getOperand(1), swapInL, RBXSave, /*Glue*/ RBXSave.getValue(2)}; Result = DAG.getMemIntrinsicNode(Opcode, dl, Tys, Ops, T, MMO); } else { unsigned Opcode = Regs64bit ? X86ISD::LCMPXCHG16_DAG : X86ISD::LCMPXCHG8_DAG; swapInL = DAG.getCopyToReg(swapInH.getValue(0), dl, Regs64bit ? X86::RBX : X86::EBX, swapInL, swapInH.getValue(1)); SDValue Ops[] = {swapInL.getValue(0), N->getOperand(1), swapInL.getValue(1)}; Result = DAG.getMemIntrinsicNode(Opcode, dl, Tys, Ops, T, MMO); } SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl, Regs64bit ? X86::RAX : X86::EAX, HalfT, Result.getValue(1)); SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl, Regs64bit ? X86::RDX : X86::EDX, HalfT, cpOutL.getValue(2)); SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)}; SDValue EFLAGS = DAG.getCopyFromReg(cpOutH.getValue(1), dl, X86::EFLAGS, MVT::i32, cpOutH.getValue(2)); SDValue Success = getSETCC(X86::COND_E, EFLAGS, dl, DAG); Success = DAG.getZExtOrTrunc(Success, dl, N->getValueType(1)); Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, T, OpsF)); Results.push_back(Success); Results.push_back(EFLAGS.getValue(1)); return; } case ISD::ATOMIC_SWAP: case ISD::ATOMIC_LOAD_ADD: case ISD::ATOMIC_LOAD_SUB: case ISD::ATOMIC_LOAD_AND: case ISD::ATOMIC_LOAD_OR: case ISD::ATOMIC_LOAD_XOR: case ISD::ATOMIC_LOAD_NAND: case ISD::ATOMIC_LOAD_MIN: case ISD::ATOMIC_LOAD_MAX: case ISD::ATOMIC_LOAD_UMIN: case ISD::ATOMIC_LOAD_UMAX: case ISD::ATOMIC_LOAD: { // Delegate to generic TypeLegalization. Situations we can really handle // should have already been dealt with by AtomicExpandPass.cpp. break; } case ISD::BITCAST: { assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); EVT DstVT = N->getValueType(0); EVT SrcVT = N->getOperand(0).getValueType(); // If this is a bitcast from a v64i1 k-register to a i64 on a 32-bit target // we can split using the k-register rather than memory. if (SrcVT == MVT::v64i1 && DstVT == MVT::i64 && Subtarget.hasBWI()) { assert(!Subtarget.is64Bit() && "Expected 32-bit mode"); SDValue Lo, Hi; std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); Lo = DAG.getBitcast(MVT::i32, Lo); Hi = DAG.getBitcast(MVT::i32, Hi); SDValue Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); Results.push_back(Res); return; } // Custom splitting for BWI types when AVX512F is available but BWI isn't. if ((DstVT == MVT::v32i16 || DstVT == MVT::v64i8) && SrcVT.isVector() && isTypeLegal(SrcVT)) { SDValue Lo, Hi; std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); MVT CastVT = (DstVT == MVT::v32i16) ? MVT::v16i16 : MVT::v32i8; Lo = DAG.getBitcast(CastVT, Lo); Hi = DAG.getBitcast(CastVT, Hi); SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, DstVT, Lo, Hi); Results.push_back(Res); return; } if (SrcVT != MVT::f64 || (DstVT != MVT::v2i32 && DstVT != MVT::v4i16 && DstVT != MVT::v8i8) || getTypeAction(*DAG.getContext(), DstVT) == TypeWidenVector) return; unsigned NumElts = DstVT.getVectorNumElements(); EVT SVT = DstVT.getVectorElementType(); EVT WiderVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2); SDValue Res; Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, N->getOperand(0)); Res = DAG.getBitcast(WiderVT, Res); Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, Res, DAG.getIntPtrConstant(0, dl)); Results.push_back(Res); return; } case ISD::MGATHER: { EVT VT = N->getValueType(0); if (VT == MVT::v2f32 && (Subtarget.hasVLX() || !Subtarget.hasAVX512())) { auto *Gather = cast(N); SDValue Index = Gather->getIndex(); if (Index.getValueType() != MVT::v2i64) return; SDValue Mask = Gather->getMask(); assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type"); SDValue PassThru = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Gather->getPassThru(), DAG.getUNDEF(MVT::v2f32)); if (!Subtarget.hasVLX()) { // We need to widen the mask, but the instruction will only use 2 // of its elements. So we can use undef. Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask, DAG.getUNDEF(MVT::v2i1)); Mask = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Mask); } SDValue Ops[] = { Gather->getChain(), PassThru, Mask, Gather->getBasePtr(), Index, Gather->getScale() }; SDValue Res = DAG.getTargetMemSDNode( DAG.getVTList(MVT::v4f32, Mask.getValueType(), MVT::Other), Ops, dl, Gather->getMemoryVT(), Gather->getMemOperand()); Results.push_back(Res); Results.push_back(Res.getValue(2)); return; } if (VT == MVT::v2i32) { auto *Gather = cast(N); SDValue Index = Gather->getIndex(); SDValue Mask = Gather->getMask(); assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type"); SDValue PassThru = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Gather->getPassThru(), DAG.getUNDEF(MVT::v2i32)); // If the index is v2i64 we can use it directly. if (Index.getValueType() == MVT::v2i64 && (Subtarget.hasVLX() || !Subtarget.hasAVX512())) { if (!Subtarget.hasVLX()) { // We need to widen the mask, but the instruction will only use 2 // of its elements. So we can use undef. Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask, DAG.getUNDEF(MVT::v2i1)); Mask = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Mask); } SDValue Ops[] = { Gather->getChain(), PassThru, Mask, Gather->getBasePtr(), Index, Gather->getScale() }; SDValue Res = DAG.getTargetMemSDNode( DAG.getVTList(MVT::v4i32, Mask.getValueType(), MVT::Other), Ops, dl, Gather->getMemoryVT(), Gather->getMemOperand()); SDValue Chain = Res.getValue(2); if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, DAG.getIntPtrConstant(0, dl)); Results.push_back(Res); Results.push_back(Chain); return; } if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) { EVT IndexVT = Index.getValueType(); EVT NewIndexVT = EVT::getVectorVT(*DAG.getContext(), IndexVT.getScalarType(), 4); // Otherwise we need to custom widen everything to avoid promotion. Index = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewIndexVT, Index, DAG.getUNDEF(IndexVT)); Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask, DAG.getConstant(0, dl, MVT::v2i1)); SDValue Ops[] = { Gather->getChain(), PassThru, Mask, Gather->getBasePtr(), Index, Gather->getScale() }; SDValue Res = DAG.getMaskedGather(DAG.getVTList(MVT::v4i32, MVT::Other), Gather->getMemoryVT(), dl, Ops, Gather->getMemOperand()); SDValue Chain = Res.getValue(1); if (getTypeAction(*DAG.getContext(), MVT::v2i32) != TypeWidenVector) Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, DAG.getIntPtrConstant(0, dl)); Results.push_back(Res); Results.push_back(Chain); return; } } return; } case ISD::LOAD: { // Use an f64/i64 load and a scalar_to_vector for v2f32/v2i32 loads. This // avoids scalarizing in 32-bit mode. In 64-bit mode this avoids a int->fp // cast since type legalization will try to use an i64 load. MVT VT = N->getSimpleValueType(0); assert(VT.isVector() && VT.getSizeInBits() == 64 && "Unexpected VT"); if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) return; if (!ISD::isNON_EXTLoad(N)) return; auto *Ld = cast(N); MVT LdVT = Subtarget.is64Bit() && VT.isInteger() ? MVT::i64 : MVT::f64; SDValue Res = DAG.getLoad(LdVT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), Ld->getAlignment(), Ld->getMemOperand()->getFlags()); SDValue Chain = Res.getValue(1); MVT WideVT = MVT::getVectorVT(LdVT, 2); Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, WideVT, Res); MVT CastVT = MVT::getVectorVT(VT.getVectorElementType(), VT.getVectorNumElements() * 2); Res = DAG.getBitcast(CastVT, Res); Results.push_back(Res); Results.push_back(Chain); return; } } } const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((X86ISD::NodeType)Opcode) { case X86ISD::FIRST_NUMBER: break; case X86ISD::BSF: return "X86ISD::BSF"; case X86ISD::BSR: return "X86ISD::BSR"; case X86ISD::SHLD: return "X86ISD::SHLD"; case X86ISD::SHRD: return "X86ISD::SHRD"; case X86ISD::FAND: return "X86ISD::FAND"; case X86ISD::FANDN: return "X86ISD::FANDN"; case X86ISD::FOR: return "X86ISD::FOR"; case X86ISD::FXOR: return "X86ISD::FXOR"; case X86ISD::FILD: return "X86ISD::FILD"; case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; case X86ISD::FLD: return "X86ISD::FLD"; case X86ISD::FST: return "X86ISD::FST"; case X86ISD::CALL: return "X86ISD::CALL"; case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; case X86ISD::RDTSCP_DAG: return "X86ISD::RDTSCP_DAG"; case X86ISD::RDPMC_DAG: return "X86ISD::RDPMC_DAG"; case X86ISD::BT: return "X86ISD::BT"; case X86ISD::CMP: return "X86ISD::CMP"; case X86ISD::COMI: return "X86ISD::COMI"; case X86ISD::UCOMI: return "X86ISD::UCOMI"; case X86ISD::CMPM: return "X86ISD::CMPM"; case X86ISD::CMPM_RND: return "X86ISD::CMPM_RND"; case X86ISD::SETCC: return "X86ISD::SETCC"; case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY"; case X86ISD::FSETCC: return "X86ISD::FSETCC"; case X86ISD::FSETCCM: return "X86ISD::FSETCCM"; case X86ISD::FSETCCM_RND: return "X86ISD::FSETCCM_RND"; case X86ISD::CMOV: return "X86ISD::CMOV"; case X86ISD::BRCOND: return "X86ISD::BRCOND"; case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; case X86ISD::IRET: return "X86ISD::IRET"; case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; case X86ISD::Wrapper: return "X86ISD::Wrapper"; case X86ISD::WrapperRIP: return "X86ISD::WrapperRIP"; case X86ISD::MOVDQ2Q: return "X86ISD::MOVDQ2Q"; case X86ISD::MMX_MOVD2W: return "X86ISD::MMX_MOVD2W"; case X86ISD::MMX_MOVW2D: return "X86ISD::MMX_MOVW2D"; case X86ISD::PEXTRB: return "X86ISD::PEXTRB"; case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; case X86ISD::INSERTPS: return "X86ISD::INSERTPS"; case X86ISD::PINSRB: return "X86ISD::PINSRB"; case X86ISD::PINSRW: return "X86ISD::PINSRW"; case X86ISD::PSHUFB: return "X86ISD::PSHUFB"; case X86ISD::ANDNP: return "X86ISD::ANDNP"; case X86ISD::BLENDI: return "X86ISD::BLENDI"; case X86ISD::BLENDV: return "X86ISD::BLENDV"; case X86ISD::HADD: return "X86ISD::HADD"; case X86ISD::HSUB: return "X86ISD::HSUB"; case X86ISD::FHADD: return "X86ISD::FHADD"; case X86ISD::FHSUB: return "X86ISD::FHSUB"; case X86ISD::CONFLICT: return "X86ISD::CONFLICT"; case X86ISD::FMAX: return "X86ISD::FMAX"; case X86ISD::FMAXS: return "X86ISD::FMAXS"; case X86ISD::FMAX_RND: return "X86ISD::FMAX_RND"; case X86ISD::FMAXS_RND: return "X86ISD::FMAX_RND"; case X86ISD::FMIN: return "X86ISD::FMIN"; case X86ISD::FMINS: return "X86ISD::FMINS"; case X86ISD::FMIN_RND: return "X86ISD::FMIN_RND"; case X86ISD::FMINS_RND: return "X86ISD::FMINS_RND"; case X86ISD::FMAXC: return "X86ISD::FMAXC"; case X86ISD::FMINC: return "X86ISD::FMINC"; case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; case X86ISD::FRCP: return "X86ISD::FRCP"; case X86ISD::EXTRQI: return "X86ISD::EXTRQI"; case X86ISD::INSERTQI: return "X86ISD::INSERTQI"; case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; case X86ISD::TLSBASEADDR: return "X86ISD::TLSBASEADDR"; case X86ISD::TLSCALL: return "X86ISD::TLSCALL"; case X86ISD::EH_SJLJ_SETJMP: return "X86ISD::EH_SJLJ_SETJMP"; case X86ISD::EH_SJLJ_LONGJMP: return "X86ISD::EH_SJLJ_LONGJMP"; case X86ISD::EH_SJLJ_SETUP_DISPATCH: return "X86ISD::EH_SJLJ_SETUP_DISPATCH"; case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN"; case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m"; case X86ISD::FNSTSW16r: return "X86ISD::FNSTSW16r"; case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG"; case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG"; case X86ISD::LCMPXCHG16_DAG: return "X86ISD::LCMPXCHG16_DAG"; case X86ISD::LCMPXCHG8_SAVE_EBX_DAG: return "X86ISD::LCMPXCHG8_SAVE_EBX_DAG"; case X86ISD::LCMPXCHG16_SAVE_RBX_DAG: return "X86ISD::LCMPXCHG16_SAVE_RBX_DAG"; case X86ISD::LADD: return "X86ISD::LADD"; case X86ISD::LSUB: return "X86ISD::LSUB"; case X86ISD::LOR: return "X86ISD::LOR"; case X86ISD::LXOR: return "X86ISD::LXOR"; case X86ISD::LAND: return "X86ISD::LAND"; case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL"; case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD"; case X86ISD::VTRUNC: return "X86ISD::VTRUNC"; case X86ISD::VTRUNCS: return "X86ISD::VTRUNCS"; case X86ISD::VTRUNCUS: return "X86ISD::VTRUNCUS"; case X86ISD::VMTRUNC: return "X86ISD::VMTRUNC"; case X86ISD::VMTRUNCS: return "X86ISD::VMTRUNCS"; case X86ISD::VMTRUNCUS: return "X86ISD::VMTRUNCUS"; case X86ISD::VTRUNCSTORES: return "X86ISD::VTRUNCSTORES"; case X86ISD::VTRUNCSTOREUS: return "X86ISD::VTRUNCSTOREUS"; case X86ISD::VMTRUNCSTORES: return "X86ISD::VMTRUNCSTORES"; case X86ISD::VMTRUNCSTOREUS: return "X86ISD::VMTRUNCSTOREUS"; case X86ISD::VFPEXT: return "X86ISD::VFPEXT"; case X86ISD::VFPEXT_RND: return "X86ISD::VFPEXT_RND"; case X86ISD::VFPEXTS_RND: return "X86ISD::VFPEXTS_RND"; case X86ISD::VFPROUND: return "X86ISD::VFPROUND"; case X86ISD::VMFPROUND: return "X86ISD::VMFPROUND"; case X86ISD::VFPROUND_RND: return "X86ISD::VFPROUND_RND"; case X86ISD::VFPROUNDS_RND: return "X86ISD::VFPROUNDS_RND"; case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ"; case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ"; case X86ISD::VSHL: return "X86ISD::VSHL"; case X86ISD::VSRL: return "X86ISD::VSRL"; case X86ISD::VSRA: return "X86ISD::VSRA"; case X86ISD::VSHLI: return "X86ISD::VSHLI"; case X86ISD::VSRLI: return "X86ISD::VSRLI"; case X86ISD::VSRAI: return "X86ISD::VSRAI"; case X86ISD::VSHLV: return "X86ISD::VSHLV"; case X86ISD::VSRLV: return "X86ISD::VSRLV"; case X86ISD::VSRAV: return "X86ISD::VSRAV"; case X86ISD::VROTLI: return "X86ISD::VROTLI"; case X86ISD::VROTRI: return "X86ISD::VROTRI"; case X86ISD::VPPERM: return "X86ISD::VPPERM"; case X86ISD::CMPP: return "X86ISD::CMPP"; case X86ISD::PCMPEQ: return "X86ISD::PCMPEQ"; case X86ISD::PCMPGT: return "X86ISD::PCMPGT"; case X86ISD::PHMINPOS: return "X86ISD::PHMINPOS"; case X86ISD::ADD: return "X86ISD::ADD"; case X86ISD::SUB: return "X86ISD::SUB"; case X86ISD::ADC: return "X86ISD::ADC"; case X86ISD::SBB: return "X86ISD::SBB"; case X86ISD::SMUL: return "X86ISD::SMUL"; case X86ISD::UMUL: return "X86ISD::UMUL"; case X86ISD::OR: return "X86ISD::OR"; case X86ISD::XOR: return "X86ISD::XOR"; case X86ISD::AND: return "X86ISD::AND"; case X86ISD::BEXTR: return "X86ISD::BEXTR"; case X86ISD::BZHI: return "X86ISD::BZHI"; case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM"; case X86ISD::MOVMSK: return "X86ISD::MOVMSK"; case X86ISD::PTEST: return "X86ISD::PTEST"; case X86ISD::TESTP: return "X86ISD::TESTP"; case X86ISD::KORTEST: return "X86ISD::KORTEST"; case X86ISD::KTEST: return "X86ISD::KTEST"; case X86ISD::KADD: return "X86ISD::KADD"; case X86ISD::KSHIFTL: return "X86ISD::KSHIFTL"; case X86ISD::KSHIFTR: return "X86ISD::KSHIFTR"; case X86ISD::PACKSS: return "X86ISD::PACKSS"; case X86ISD::PACKUS: return "X86ISD::PACKUS"; case X86ISD::PALIGNR: return "X86ISD::PALIGNR"; case X86ISD::VALIGN: return "X86ISD::VALIGN"; case X86ISD::VSHLD: return "X86ISD::VSHLD"; case X86ISD::VSHRD: return "X86ISD::VSHRD"; case X86ISD::VSHLDV: return "X86ISD::VSHLDV"; case X86ISD::VSHRDV: return "X86ISD::VSHRDV"; case X86ISD::PSHUFD: return "X86ISD::PSHUFD"; case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW"; case X86ISD::PSHUFLW: return "X86ISD::PSHUFLW"; case X86ISD::SHUFP: return "X86ISD::SHUFP"; case X86ISD::SHUF128: return "X86ISD::SHUF128"; case X86ISD::MOVLHPS: return "X86ISD::MOVLHPS"; case X86ISD::MOVHLPS: return "X86ISD::MOVHLPS"; case X86ISD::MOVDDUP: return "X86ISD::MOVDDUP"; case X86ISD::MOVSHDUP: return "X86ISD::MOVSHDUP"; case X86ISD::MOVSLDUP: return "X86ISD::MOVSLDUP"; case X86ISD::MOVSD: return "X86ISD::MOVSD"; case X86ISD::MOVSS: return "X86ISD::MOVSS"; case X86ISD::UNPCKL: return "X86ISD::UNPCKL"; case X86ISD::UNPCKH: return "X86ISD::UNPCKH"; case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST"; case X86ISD::VBROADCASTM: return "X86ISD::VBROADCASTM"; case X86ISD::SUBV_BROADCAST: return "X86ISD::SUBV_BROADCAST"; case X86ISD::VPERMILPV: return "X86ISD::VPERMILPV"; case X86ISD::VPERMILPI: return "X86ISD::VPERMILPI"; case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128"; case X86ISD::VPERMV: return "X86ISD::VPERMV"; case X86ISD::VPERMV3: return "X86ISD::VPERMV3"; case X86ISD::VPERMI: return "X86ISD::VPERMI"; case X86ISD::VPTERNLOG: return "X86ISD::VPTERNLOG"; case X86ISD::VFIXUPIMM: return "X86ISD::VFIXUPIMM"; case X86ISD::VFIXUPIMMS: return "X86ISD::VFIXUPIMMS"; case X86ISD::VRANGE: return "X86ISD::VRANGE"; case X86ISD::VRANGE_RND: return "X86ISD::VRANGE_RND"; case X86ISD::VRANGES: return "X86ISD::VRANGES"; case X86ISD::VRANGES_RND: return "X86ISD::VRANGES_RND"; case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ"; case X86ISD::PMULDQ: return "X86ISD::PMULDQ"; case X86ISD::PSADBW: return "X86ISD::PSADBW"; case X86ISD::DBPSADBW: return "X86ISD::DBPSADBW"; case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; case X86ISD::VAARG_64: return "X86ISD::VAARG_64"; case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA"; case X86ISD::MEMBARRIER: return "X86ISD::MEMBARRIER"; case X86ISD::MFENCE: return "X86ISD::MFENCE"; case X86ISD::SEG_ALLOCA: return "X86ISD::SEG_ALLOCA"; case X86ISD::SAHF: return "X86ISD::SAHF"; case X86ISD::RDRAND: return "X86ISD::RDRAND"; case X86ISD::RDSEED: return "X86ISD::RDSEED"; case X86ISD::VPMADDUBSW: return "X86ISD::VPMADDUBSW"; case X86ISD::VPMADDWD: return "X86ISD::VPMADDWD"; case X86ISD::VPSHA: return "X86ISD::VPSHA"; case X86ISD::VPSHL: return "X86ISD::VPSHL"; case X86ISD::VPCOM: return "X86ISD::VPCOM"; case X86ISD::VPCOMU: return "X86ISD::VPCOMU"; case X86ISD::VPERMIL2: return "X86ISD::VPERMIL2"; case X86ISD::FMSUB: return "X86ISD::FMSUB"; case X86ISD::FNMADD: return "X86ISD::FNMADD"; case X86ISD::FNMSUB: return "X86ISD::FNMSUB"; case X86ISD::FMADDSUB: return "X86ISD::FMADDSUB"; case X86ISD::FMSUBADD: return "X86ISD::FMSUBADD"; case X86ISD::FMADD_RND: return "X86ISD::FMADD_RND"; case X86ISD::FNMADD_RND: return "X86ISD::FNMADD_RND"; case X86ISD::FMSUB_RND: return "X86ISD::FMSUB_RND"; case X86ISD::FNMSUB_RND: return "X86ISD::FNMSUB_RND"; case X86ISD::FMADDSUB_RND: return "X86ISD::FMADDSUB_RND"; case X86ISD::FMSUBADD_RND: return "X86ISD::FMSUBADD_RND"; case X86ISD::VPMADD52H: return "X86ISD::VPMADD52H"; case X86ISD::VPMADD52L: return "X86ISD::VPMADD52L"; case X86ISD::VRNDSCALE: return "X86ISD::VRNDSCALE"; case X86ISD::VRNDSCALE_RND: return "X86ISD::VRNDSCALE_RND"; case X86ISD::VRNDSCALES: return "X86ISD::VRNDSCALES"; case X86ISD::VRNDSCALES_RND: return "X86ISD::VRNDSCALES_RND"; case X86ISD::VREDUCE: return "X86ISD::VREDUCE"; case X86ISD::VREDUCE_RND: return "X86ISD::VREDUCE_RND"; case X86ISD::VREDUCES: return "X86ISD::VREDUCES"; case X86ISD::VREDUCES_RND: return "X86ISD::VREDUCES_RND"; case X86ISD::VGETMANT: return "X86ISD::VGETMANT"; case X86ISD::VGETMANT_RND: return "X86ISD::VGETMANT_RND"; case X86ISD::VGETMANTS: return "X86ISD::VGETMANTS"; case X86ISD::VGETMANTS_RND: return "X86ISD::VGETMANTS_RND"; case X86ISD::PCMPESTR: return "X86ISD::PCMPESTR"; case X86ISD::PCMPISTR: return "X86ISD::PCMPISTR"; case X86ISD::XTEST: return "X86ISD::XTEST"; case X86ISD::COMPRESS: return "X86ISD::COMPRESS"; case X86ISD::EXPAND: return "X86ISD::EXPAND"; case X86ISD::SELECTS: return "X86ISD::SELECTS"; case X86ISD::ADDSUB: return "X86ISD::ADDSUB"; case X86ISD::RCP14: return "X86ISD::RCP14"; case X86ISD::RCP14S: return "X86ISD::RCP14S"; case X86ISD::RCP28: return "X86ISD::RCP28"; case X86ISD::RCP28S: return "X86ISD::RCP28S"; case X86ISD::EXP2: return "X86ISD::EXP2"; case X86ISD::RSQRT14: return "X86ISD::RSQRT14"; case X86ISD::RSQRT14S: return "X86ISD::RSQRT14S"; case X86ISD::RSQRT28: return "X86ISD::RSQRT28"; case X86ISD::RSQRT28S: return "X86ISD::RSQRT28S"; case X86ISD::FADD_RND: return "X86ISD::FADD_RND"; case X86ISD::FADDS_RND: return "X86ISD::FADDS_RND"; case X86ISD::FSUB_RND: return "X86ISD::FSUB_RND"; case X86ISD::FSUBS_RND: return "X86ISD::FSUBS_RND"; case X86ISD::FMUL_RND: return "X86ISD::FMUL_RND"; case X86ISD::FMULS_RND: return "X86ISD::FMULS_RND"; case X86ISD::FDIV_RND: return "X86ISD::FDIV_RND"; case X86ISD::FDIVS_RND: return "X86ISD::FDIVS_RND"; case X86ISD::FSQRT_RND: return "X86ISD::FSQRT_RND"; case X86ISD::FSQRTS_RND: return "X86ISD::FSQRTS_RND"; case X86ISD::FGETEXP_RND: return "X86ISD::FGETEXP_RND"; case X86ISD::FGETEXPS_RND: return "X86ISD::FGETEXPS_RND"; case X86ISD::SCALEF: return "X86ISD::SCALEF"; case X86ISD::SCALEFS: return "X86ISD::SCALEFS"; case X86ISD::AVG: return "X86ISD::AVG"; case X86ISD::MULHRS: return "X86ISD::MULHRS"; case X86ISD::SINT_TO_FP_RND: return "X86ISD::SINT_TO_FP_RND"; case X86ISD::UINT_TO_FP_RND: return "X86ISD::UINT_TO_FP_RND"; case X86ISD::CVTTP2SI: return "X86ISD::CVTTP2SI"; case X86ISD::CVTTP2UI: return "X86ISD::CVTTP2UI"; case X86ISD::MCVTTP2SI: return "X86ISD::MCVTTP2SI"; case X86ISD::MCVTTP2UI: return "X86ISD::MCVTTP2UI"; case X86ISD::CVTTP2SI_RND: return "X86ISD::CVTTP2SI_RND"; case X86ISD::CVTTP2UI_RND: return "X86ISD::CVTTP2UI_RND"; case X86ISD::CVTTS2SI: return "X86ISD::CVTTS2SI"; case X86ISD::CVTTS2UI: return "X86ISD::CVTTS2UI"; case X86ISD::CVTTS2SI_RND: return "X86ISD::CVTTS2SI_RND"; case X86ISD::CVTTS2UI_RND: return "X86ISD::CVTTS2UI_RND"; case X86ISD::CVTSI2P: return "X86ISD::CVTSI2P"; case X86ISD::CVTUI2P: return "X86ISD::CVTUI2P"; case X86ISD::VFPCLASS: return "X86ISD::VFPCLASS"; case X86ISD::VFPCLASSS: return "X86ISD::VFPCLASSS"; case X86ISD::MULTISHIFT: return "X86ISD::MULTISHIFT"; case X86ISD::SCALAR_SINT_TO_FP_RND: return "X86ISD::SCALAR_SINT_TO_FP_RND"; case X86ISD::SCALAR_UINT_TO_FP_RND: return "X86ISD::SCALAR_UINT_TO_FP_RND"; case X86ISD::CVTPS2PH: return "X86ISD::CVTPS2PH"; case X86ISD::MCVTPS2PH: return "X86ISD::MCVTPS2PH"; case X86ISD::CVTPH2PS: return "X86ISD::CVTPH2PS"; case X86ISD::CVTPH2PS_RND: return "X86ISD::CVTPH2PS_RND"; case X86ISD::CVTP2SI: return "X86ISD::CVTP2SI"; case X86ISD::CVTP2UI: return "X86ISD::CVTP2UI"; case X86ISD::MCVTP2SI: return "X86ISD::MCVTP2SI"; case X86ISD::MCVTP2UI: return "X86ISD::MCVTP2UI"; case X86ISD::CVTP2SI_RND: return "X86ISD::CVTP2SI_RND"; case X86ISD::CVTP2UI_RND: return "X86ISD::CVTP2UI_RND"; case X86ISD::CVTS2SI: return "X86ISD::CVTS2SI"; case X86ISD::CVTS2UI: return "X86ISD::CVTS2UI"; case X86ISD::CVTS2SI_RND: return "X86ISD::CVTS2SI_RND"; case X86ISD::CVTS2UI_RND: return "X86ISD::CVTS2UI_RND"; case X86ISD::LWPINS: return "X86ISD::LWPINS"; case X86ISD::MGATHER: return "X86ISD::MGATHER"; case X86ISD::MSCATTER: return "X86ISD::MSCATTER"; case X86ISD::VPDPBUSD: return "X86ISD::VPDPBUSD"; case X86ISD::VPDPBUSDS: return "X86ISD::VPDPBUSDS"; case X86ISD::VPDPWSSD: return "X86ISD::VPDPWSSD"; case X86ISD::VPDPWSSDS: return "X86ISD::VPDPWSSDS"; case X86ISD::VPSHUFBITQMB: return "X86ISD::VPSHUFBITQMB"; case X86ISD::GF2P8MULB: return "X86ISD::GF2P8MULB"; case X86ISD::GF2P8AFFINEQB: return "X86ISD::GF2P8AFFINEQB"; case X86ISD::GF2P8AFFINEINVQB: return "X86ISD::GF2P8AFFINEINVQB"; case X86ISD::NT_CALL: return "X86ISD::NT_CALL"; case X86ISD::NT_BRIND: return "X86ISD::NT_BRIND"; case X86ISD::UMWAIT: return "X86ISD::UMWAIT"; case X86ISD::TPAUSE: return "X86ISD::TPAUSE"; } return nullptr; } /// Return true if the addressing mode represented by AM is legal for this /// target, for a load/store of the specified type. bool X86TargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const { // X86 supports extremely general addressing modes. CodeModel::Model M = getTargetMachine().getCodeModel(); // X86 allows a sign-extended 32-bit immediate field as a displacement. if (!X86::isOffsetSuitableForCodeModel(AM.BaseOffs, M, AM.BaseGV != nullptr)) return false; if (AM.BaseGV) { unsigned GVFlags = Subtarget.classifyGlobalReference(AM.BaseGV); // If a reference to this global requires an extra load, we can't fold it. if (isGlobalStubReference(GVFlags)) return false; // If BaseGV requires a register for the PIC base, we cannot also have a // BaseReg specified. if (AM.HasBaseReg && isGlobalRelativeToPICBase(GVFlags)) return false; // If lower 4G is not available, then we must use rip-relative addressing. if ((M != CodeModel::Small || isPositionIndependent()) && Subtarget.is64Bit() && (AM.BaseOffs || AM.Scale > 1)) return false; } switch (AM.Scale) { case 0: case 1: case 2: case 4: case 8: // These scales always work. break; case 3: case 5: case 9: // These scales are formed with basereg+scalereg. Only accept if there is // no basereg yet. if (AM.HasBaseReg) return false; break; default: // Other stuff never works. return false; } return true; } bool X86TargetLowering::isVectorShiftByScalarCheap(Type *Ty) const { unsigned Bits = Ty->getScalarSizeInBits(); // 8-bit shifts are always expensive, but versions with a scalar amount aren't // particularly cheaper than those without. if (Bits == 8) return false; // XOP has v16i8/v8i16/v4i32/v2i64 variable vector shifts. if (Subtarget.hasXOP() && Ty->getPrimitiveSizeInBits() == 128 && (Bits == 8 || Bits == 16 || Bits == 32 || Bits == 64)) return false; // AVX2 has vpsllv[dq] instructions (and other shifts) that make variable // shifts just as cheap as scalar ones. if (Subtarget.hasAVX2() && (Bits == 32 || Bits == 64)) return false; // AVX512BW has shifts such as vpsllvw. if (Subtarget.hasBWI() && Bits == 16) return false; // Otherwise, it's significantly cheaper to shift by a scalar amount than by a // fully general vector. return true; } bool X86TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) return false; unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); return NumBits1 > NumBits2; } bool X86TargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const { if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) return false; if (!isTypeLegal(EVT::getEVT(Ty1))) return false; assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop"); // Assuming the caller doesn't have a zeroext or signext return parameter, // truncation all the way down to i1 is valid. return true; } bool X86TargetLowering::isLegalICmpImmediate(int64_t Imm) const { return isInt<32>(Imm); } bool X86TargetLowering::isLegalAddImmediate(int64_t Imm) const { // Can also use sub to handle negated immediates. return isInt<32>(Imm); } bool X86TargetLowering::isLegalStoreImmediate(int64_t Imm) const { return isInt<32>(Imm); } bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { if (!VT1.isInteger() || !VT2.isInteger()) return false; unsigned NumBits1 = VT1.getSizeInBits(); unsigned NumBits2 = VT2.getSizeInBits(); return NumBits1 > NumBits2; } bool X86TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const { // x86-64 implicitly zero-extends 32-bit results in 64-bit registers. return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget.is64Bit(); } bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { // x86-64 implicitly zero-extends 32-bit results in 64-bit registers. return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget.is64Bit(); } bool X86TargetLowering::isZExtFree(SDValue Val, EVT VT2) const { EVT VT1 = Val.getValueType(); if (isZExtFree(VT1, VT2)) return true; if (Val.getOpcode() != ISD::LOAD) return false; if (!VT1.isSimple() || !VT1.isInteger() || !VT2.isSimple() || !VT2.isInteger()) return false; switch (VT1.getSimpleVT().SimpleTy) { default: break; case MVT::i8: case MVT::i16: case MVT::i32: // X86 has 8, 16, and 32-bit zero-extending loads. return true; } return false; } bool X86TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { EVT SrcVT = ExtVal.getOperand(0).getValueType(); // There is no extending load for vXi1. if (SrcVT.getScalarType() == MVT::i1) return false; return true; } bool X86TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { if (!Subtarget.hasAnyFMA()) return false; VT = VT.getScalarType(); if (!VT.isSimple()) return false; switch (VT.getSimpleVT().SimpleTy) { case MVT::f32: case MVT::f64: return true; default: break; } return false; } bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const { // i16 instructions are longer (0x66 prefix) and potentially slower. return !(VT1 == MVT::i32 && VT2 == MVT::i16); } /// Targets can use this to indicate that they only support *some* /// VECTOR_SHUFFLE operations, those with specific masks. /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values /// are assumed to be legal. bool X86TargetLowering::isShuffleMaskLegal(ArrayRef M, EVT VT) const { if (!VT.isSimple()) return false; // Not for i1 vectors if (VT.getSimpleVT().getScalarType() == MVT::i1) return false; // Very little shuffling can be done for 64-bit vectors right now. if (VT.getSimpleVT().getSizeInBits() == 64) return false; // We only care that the types being shuffled are legal. The lowering can // handle any possible shuffle mask that results. return isTypeLegal(VT.getSimpleVT()); } bool X86TargetLowering::isVectorClearMaskLegal(ArrayRef Mask, EVT VT) const { // Don't convert an 'and' into a shuffle that we don't directly support. // vpblendw and vpshufb for 256-bit vectors are not available on AVX1. if (!Subtarget.hasAVX2()) if (VT == MVT::v32i8 || VT == MVT::v16i16) return false; // Just delegate to the generic legality, clear masks aren't special. return isShuffleMaskLegal(Mask, VT); } bool X86TargetLowering::areJTsAllowed(const Function *Fn) const { // If the subtarget is using retpolines, we need to not generate jump tables. if (Subtarget.useRetpolineIndirectBranches()) return false; // Otherwise, fallback on the generic logic. return TargetLowering::areJTsAllowed(Fn); } //===----------------------------------------------------------------------===// // X86 Scheduler Hooks //===----------------------------------------------------------------------===// /// Utility function to emit xbegin specifying the start of an RTM region. static MachineBasicBlock *emitXBegin(MachineInstr &MI, MachineBasicBlock *MBB, const TargetInstrInfo *TII) { DebugLoc DL = MI.getDebugLoc(); const BasicBlock *BB = MBB->getBasicBlock(); MachineFunction::iterator I = ++MBB->getIterator(); // For the v = xbegin(), we generate // // thisMBB: // xbegin sinkMBB // // mainMBB: // s0 = -1 // // fallBB: // eax = # XABORT_DEF // s1 = eax // // sinkMBB: // v = phi(s0/mainBB, s1/fallBB) MachineBasicBlock *thisMBB = MBB; MachineFunction *MF = MBB->getParent(); MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); MachineBasicBlock *fallMBB = MF->CreateMachineBasicBlock(BB); MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); MF->insert(I, mainMBB); MF->insert(I, fallMBB); MF->insert(I, sinkMBB); // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)), MBB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); MachineRegisterInfo &MRI = MF->getRegInfo(); unsigned DstReg = MI.getOperand(0).getReg(); const TargetRegisterClass *RC = MRI.getRegClass(DstReg); unsigned mainDstReg = MRI.createVirtualRegister(RC); unsigned fallDstReg = MRI.createVirtualRegister(RC); // thisMBB: // xbegin fallMBB // # fallthrough to mainMBB // # abortion to fallMBB BuildMI(thisMBB, DL, TII->get(X86::XBEGIN_4)).addMBB(fallMBB); thisMBB->addSuccessor(mainMBB); thisMBB->addSuccessor(fallMBB); // mainMBB: // mainDstReg := -1 BuildMI(mainMBB, DL, TII->get(X86::MOV32ri), mainDstReg).addImm(-1); BuildMI(mainMBB, DL, TII->get(X86::JMP_1)).addMBB(sinkMBB); mainMBB->addSuccessor(sinkMBB); // fallMBB: // ; pseudo instruction to model hardware's definition from XABORT // EAX := XABORT_DEF // fallDstReg := EAX BuildMI(fallMBB, DL, TII->get(X86::XABORT_DEF)); BuildMI(fallMBB, DL, TII->get(TargetOpcode::COPY), fallDstReg) .addReg(X86::EAX); fallMBB->addSuccessor(sinkMBB); // sinkMBB: // DstReg := phi(mainDstReg/mainBB, fallDstReg/fallBB) BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(X86::PHI), DstReg) .addReg(mainDstReg).addMBB(mainMBB) .addReg(fallDstReg).addMBB(fallMBB); MI.eraseFromParent(); return sinkMBB; } static MachineBasicBlock *emitWRPKRU(MachineInstr &MI, MachineBasicBlock *BB, const X86Subtarget &Subtarget) { DebugLoc dl = MI.getDebugLoc(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); // insert input VAL into EAX BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EAX) .addReg(MI.getOperand(0).getReg()); // insert zero to ECX BuildMI(*BB, MI, dl, TII->get(X86::MOV32r0), X86::ECX); // insert zero to EDX BuildMI(*BB, MI, dl, TII->get(X86::MOV32r0), X86::EDX); // insert WRPKRU instruction BuildMI(*BB, MI, dl, TII->get(X86::WRPKRUr)); MI.eraseFromParent(); // The pseudo is gone now. return BB; } static MachineBasicBlock *emitRDPKRU(MachineInstr &MI, MachineBasicBlock *BB, const X86Subtarget &Subtarget) { DebugLoc dl = MI.getDebugLoc(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); // insert zero to ECX BuildMI(*BB, MI, dl, TII->get(X86::MOV32r0), X86::ECX); // insert RDPKRU instruction BuildMI(*BB, MI, dl, TII->get(X86::RDPKRUr)); BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg()) .addReg(X86::EAX); MI.eraseFromParent(); // The pseudo is gone now. return BB; } static MachineBasicBlock *emitMonitor(MachineInstr &MI, MachineBasicBlock *BB, const X86Subtarget &Subtarget, unsigned Opc) { DebugLoc dl = MI.getDebugLoc(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); // Address into RAX/EAX, other two args into ECX, EDX. unsigned MemOpc = Subtarget.is64Bit() ? X86::LEA64r : X86::LEA32r; unsigned MemReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX; MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg); for (int i = 0; i < X86::AddrNumOperands; ++i) MIB.add(MI.getOperand(i)); unsigned ValOps = X86::AddrNumOperands; BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX) .addReg(MI.getOperand(ValOps).getReg()); BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EDX) .addReg(MI.getOperand(ValOps + 1).getReg()); // The instruction doesn't actually take any operands though. BuildMI(*BB, MI, dl, TII->get(Opc)); MI.eraseFromParent(); // The pseudo is gone now. return BB; } static MachineBasicBlock *emitClzero(MachineInstr *MI, MachineBasicBlock *BB, const X86Subtarget &Subtarget) { DebugLoc dl = MI->getDebugLoc(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); // Address into RAX/EAX unsigned MemOpc = Subtarget.is64Bit() ? X86::LEA64r : X86::LEA32r; unsigned MemReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX; MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg); for (int i = 0; i < X86::AddrNumOperands; ++i) MIB.add(MI->getOperand(i)); // The instruction doesn't actually take any operands though. BuildMI(*BB, MI, dl, TII->get(X86::CLZEROr)); MI->eraseFromParent(); // The pseudo is gone now. return BB; } MachineBasicBlock * X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const { // Emit va_arg instruction on X86-64. // Operands to this pseudo-instruction: // 0 ) Output : destination address (reg) // 1-5) Input : va_list address (addr, i64mem) // 6 ) ArgSize : Size (in bytes) of vararg type // 7 ) ArgMode : 0=overflow only, 1=use gp_offset, 2=use fp_offset // 8 ) Align : Alignment of type // 9 ) EFLAGS (implicit-def) assert(MI.getNumOperands() == 10 && "VAARG_64 should have 10 operands!"); static_assert(X86::AddrNumOperands == 5, "VAARG_64 assumes 5 address operands"); unsigned DestReg = MI.getOperand(0).getReg(); MachineOperand &Base = MI.getOperand(1); MachineOperand &Scale = MI.getOperand(2); MachineOperand &Index = MI.getOperand(3); MachineOperand &Disp = MI.getOperand(4); MachineOperand &Segment = MI.getOperand(5); unsigned ArgSize = MI.getOperand(6).getImm(); unsigned ArgMode = MI.getOperand(7).getImm(); unsigned Align = MI.getOperand(8).getImm(); // Memory Reference assert(MI.hasOneMemOperand() && "Expected VAARG_64 to have one memoperand"); SmallVector MMOs(MI.memoperands_begin(), MI.memoperands_end()); // Machine Information const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64); const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32); DebugLoc DL = MI.getDebugLoc(); // struct va_list { // i32 gp_offset // i32 fp_offset // i64 overflow_area (address) // i64 reg_save_area (address) // } // sizeof(va_list) = 24 // alignment(va_list) = 8 unsigned TotalNumIntRegs = 6; unsigned TotalNumXMMRegs = 8; bool UseGPOffset = (ArgMode == 1); bool UseFPOffset = (ArgMode == 2); unsigned MaxOffset = TotalNumIntRegs * 8 + (UseFPOffset ? TotalNumXMMRegs * 16 : 0); /* Align ArgSize to a multiple of 8 */ unsigned ArgSizeA8 = (ArgSize + 7) & ~7; bool NeedsAlign = (Align > 8); MachineBasicBlock *thisMBB = MBB; MachineBasicBlock *overflowMBB; MachineBasicBlock *offsetMBB; MachineBasicBlock *endMBB; unsigned OffsetDestReg = 0; // Argument address computed by offsetMBB unsigned OverflowDestReg = 0; // Argument address computed by overflowMBB unsigned OffsetReg = 0; if (!UseGPOffset && !UseFPOffset) { // If we only pull from the overflow region, we don't create a branch. // We don't need to alter control flow. OffsetDestReg = 0; // unused OverflowDestReg = DestReg; offsetMBB = nullptr; overflowMBB = thisMBB; endMBB = thisMBB; } else { // First emit code to check if gp_offset (or fp_offset) is below the bound. // If so, pull the argument from reg_save_area. (branch to offsetMBB) // If not, pull from overflow_area. (branch to overflowMBB) // // thisMBB // | . // | . // offsetMBB overflowMBB // | . // | . // endMBB // Registers for the PHI in endMBB OffsetDestReg = MRI.createVirtualRegister(AddrRegClass); OverflowDestReg = MRI.createVirtualRegister(AddrRegClass); const BasicBlock *LLVM_BB = MBB->getBasicBlock(); MachineFunction *MF = MBB->getParent(); overflowMBB = MF->CreateMachineBasicBlock(LLVM_BB); offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB); endMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineFunction::iterator MBBIter = ++MBB->getIterator(); // Insert the new basic blocks MF->insert(MBBIter, offsetMBB); MF->insert(MBBIter, overflowMBB); MF->insert(MBBIter, endMBB); // Transfer the remainder of MBB and its successor edges to endMBB. endMBB->splice(endMBB->begin(), thisMBB, std::next(MachineBasicBlock::iterator(MI)), thisMBB->end()); endMBB->transferSuccessorsAndUpdatePHIs(thisMBB); // Make offsetMBB and overflowMBB successors of thisMBB thisMBB->addSuccessor(offsetMBB); thisMBB->addSuccessor(overflowMBB); // endMBB is a successor of both offsetMBB and overflowMBB offsetMBB->addSuccessor(endMBB); overflowMBB->addSuccessor(endMBB); // Load the offset value into a register OffsetReg = MRI.createVirtualRegister(OffsetRegClass); BuildMI(thisMBB, DL, TII->get(X86::MOV32rm), OffsetReg) .add(Base) .add(Scale) .add(Index) .addDisp(Disp, UseFPOffset ? 4 : 0) .add(Segment) .setMemRefs(MMOs); // Check if there is enough room left to pull this argument. BuildMI(thisMBB, DL, TII->get(X86::CMP32ri)) .addReg(OffsetReg) .addImm(MaxOffset + 8 - ArgSizeA8); // Branch to "overflowMBB" if offset >= max // Fall through to "offsetMBB" otherwise BuildMI(thisMBB, DL, TII->get(X86::GetCondBranchFromCond(X86::COND_AE))) .addMBB(overflowMBB); } // In offsetMBB, emit code to use the reg_save_area. if (offsetMBB) { assert(OffsetReg != 0); // Read the reg_save_area address. unsigned RegSaveReg = MRI.createVirtualRegister(AddrRegClass); BuildMI(offsetMBB, DL, TII->get(X86::MOV64rm), RegSaveReg) .add(Base) .add(Scale) .add(Index) .addDisp(Disp, 16) .add(Segment) .setMemRefs(MMOs); // Zero-extend the offset unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass); BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64) .addImm(0) .addReg(OffsetReg) .addImm(X86::sub_32bit); // Add the offset to the reg_save_area to get the final address. BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg) .addReg(OffsetReg64) .addReg(RegSaveReg); // Compute the offset for the next argument unsigned NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass); BuildMI(offsetMBB, DL, TII->get(X86::ADD32ri), NextOffsetReg) .addReg(OffsetReg) .addImm(UseFPOffset ? 16 : 8); // Store it back into the va_list. BuildMI(offsetMBB, DL, TII->get(X86::MOV32mr)) .add(Base) .add(Scale) .add(Index) .addDisp(Disp, UseFPOffset ? 4 : 0) .add(Segment) .addReg(NextOffsetReg) .setMemRefs(MMOs); // Jump to endMBB BuildMI(offsetMBB, DL, TII->get(X86::JMP_1)) .addMBB(endMBB); } // // Emit code to use overflow area // // Load the overflow_area address into a register. unsigned OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass); BuildMI(overflowMBB, DL, TII->get(X86::MOV64rm), OverflowAddrReg) .add(Base) .add(Scale) .add(Index) .addDisp(Disp, 8) .add(Segment) .setMemRefs(MMOs); // If we need to align it, do so. Otherwise, just copy the address // to OverflowDestReg. if (NeedsAlign) { // Align the overflow address assert(isPowerOf2_32(Align) && "Alignment must be a power of 2"); unsigned TmpReg = MRI.createVirtualRegister(AddrRegClass); // aligned_addr = (addr + (align-1)) & ~(align-1) BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), TmpReg) .addReg(OverflowAddrReg) .addImm(Align-1); BuildMI(overflowMBB, DL, TII->get(X86::AND64ri32), OverflowDestReg) .addReg(TmpReg) .addImm(~(uint64_t)(Align-1)); } else { BuildMI(overflowMBB, DL, TII->get(TargetOpcode::COPY), OverflowDestReg) .addReg(OverflowAddrReg); } // Compute the next overflow address after this argument. // (the overflow address should be kept 8-byte aligned) unsigned NextAddrReg = MRI.createVirtualRegister(AddrRegClass); BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), NextAddrReg) .addReg(OverflowDestReg) .addImm(ArgSizeA8); // Store the new overflow address. BuildMI(overflowMBB, DL, TII->get(X86::MOV64mr)) .add(Base) .add(Scale) .add(Index) .addDisp(Disp, 8) .add(Segment) .addReg(NextAddrReg) .setMemRefs(MMOs); // If we branched, emit the PHI to the front of endMBB. if (offsetMBB) { BuildMI(*endMBB, endMBB->begin(), DL, TII->get(X86::PHI), DestReg) .addReg(OffsetDestReg).addMBB(offsetMBB) .addReg(OverflowDestReg).addMBB(overflowMBB); } // Erase the pseudo instruction MI.eraseFromParent(); return endMBB; } MachineBasicBlock *X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( MachineInstr &MI, MachineBasicBlock *MBB) const { // Emit code to save XMM registers to the stack. The ABI says that the // number of registers to save is given in %al, so it's theoretically // possible to do an indirect jump trick to avoid saving all of them, // however this code takes a simpler approach and just executes all // of the stores if %al is non-zero. It's less code, and it's probably // easier on the hardware branch predictor, and stores aren't all that // expensive anyway. // Create the new basic blocks. One block contains all the XMM stores, // and one block is the final destination regardless of whether any // stores were performed. const BasicBlock *LLVM_BB = MBB->getBasicBlock(); MachineFunction *F = MBB->getParent(); MachineFunction::iterator MBBIter = ++MBB->getIterator(); MachineBasicBlock *XMMSaveMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *EndMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(MBBIter, XMMSaveMBB); F->insert(MBBIter, EndMBB); // Transfer the remainder of MBB and its successor edges to EndMBB. EndMBB->splice(EndMBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)), MBB->end()); EndMBB->transferSuccessorsAndUpdatePHIs(MBB); // The original block will now fall through to the XMM save block. MBB->addSuccessor(XMMSaveMBB); // The XMMSaveMBB will fall through to the end block. XMMSaveMBB->addSuccessor(EndMBB); // Now add the instructions. const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); unsigned CountReg = MI.getOperand(0).getReg(); int64_t RegSaveFrameIndex = MI.getOperand(1).getImm(); int64_t VarArgsFPOffset = MI.getOperand(2).getImm(); if (!Subtarget.isCallingConvWin64(F->getFunction().getCallingConv())) { // If %al is 0, branch around the XMM save block. BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg); BuildMI(MBB, DL, TII->get(X86::JE_1)).addMBB(EndMBB); MBB->addSuccessor(EndMBB); } // Make sure the last operand is EFLAGS, which gets clobbered by the branch // that was just emitted, but clearly shouldn't be "saved". assert((MI.getNumOperands() <= 3 || !MI.getOperand(MI.getNumOperands() - 1).isReg() || MI.getOperand(MI.getNumOperands() - 1).getReg() == X86::EFLAGS) && "Expected last argument to be EFLAGS"); unsigned MOVOpc = Subtarget.hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr; // In the XMM save block, save all the XMM argument registers. for (int i = 3, e = MI.getNumOperands() - 1; i != e; ++i) { int64_t Offset = (i - 3) * 16 + VarArgsFPOffset; MachineMemOperand *MMO = F->getMachineMemOperand( MachinePointerInfo::getFixedStack(*F, RegSaveFrameIndex, Offset), MachineMemOperand::MOStore, /*Size=*/16, /*Align=*/16); BuildMI(XMMSaveMBB, DL, TII->get(MOVOpc)) .addFrameIndex(RegSaveFrameIndex) .addImm(/*Scale=*/1) .addReg(/*IndexReg=*/0) .addImm(/*Disp=*/Offset) .addReg(/*Segment=*/0) .addReg(MI.getOperand(i).getReg()) .addMemOperand(MMO); } MI.eraseFromParent(); // The pseudo instruction is gone now. return EndMBB; } // The EFLAGS operand of SelectItr might be missing a kill marker // because there were multiple uses of EFLAGS, and ISel didn't know // which to mark. Figure out whether SelectItr should have had a // kill marker, and set it if it should. Returns the correct kill // marker value. static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr, MachineBasicBlock* BB, const TargetRegisterInfo* TRI) { // Scan forward through BB for a use/def of EFLAGS. MachineBasicBlock::iterator miI(std::next(SelectItr)); for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) { const MachineInstr& mi = *miI; if (mi.readsRegister(X86::EFLAGS)) return false; if (mi.definesRegister(X86::EFLAGS)) break; // Should have kill-flag - update below. } // If we hit the end of the block, check whether EFLAGS is live into a // successor. if (miI == BB->end()) { for (MachineBasicBlock::succ_iterator sItr = BB->succ_begin(), sEnd = BB->succ_end(); sItr != sEnd; ++sItr) { MachineBasicBlock* succ = *sItr; if (succ->isLiveIn(X86::EFLAGS)) return false; } } // We found a def, or hit the end of the basic block and EFLAGS wasn't live // out. SelectMI should have a kill flag on EFLAGS. SelectItr->addRegisterKilled(X86::EFLAGS, TRI); return true; } // Return true if it is OK for this CMOV pseudo-opcode to be cascaded // together with other CMOV pseudo-opcodes into a single basic-block with // conditional jump around it. static bool isCMOVPseudo(MachineInstr &MI) { switch (MI.getOpcode()) { case X86::CMOV_FR32: case X86::CMOV_FR64: case X86::CMOV_GR8: case X86::CMOV_GR16: case X86::CMOV_GR32: case X86::CMOV_RFP32: case X86::CMOV_RFP64: case X86::CMOV_RFP80: case X86::CMOV_VR128: case X86::CMOV_VR128X: case X86::CMOV_VR256: case X86::CMOV_VR256X: case X86::CMOV_VR512: case X86::CMOV_VK2: case X86::CMOV_VK4: case X86::CMOV_VK8: case X86::CMOV_VK16: case X86::CMOV_VK32: case X86::CMOV_VK64: return true; default: return false; } } // Helper function, which inserts PHI functions into SinkMBB: // %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ], // where %FalseValue(i) and %TrueValue(i) are taken from the consequent CMOVs // in [MIItBegin, MIItEnd) range. It returns the last MachineInstrBuilder for // the last PHI function inserted. static MachineInstrBuilder createPHIsForCMOVsInSinkBB( MachineBasicBlock::iterator MIItBegin, MachineBasicBlock::iterator MIItEnd, MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, MachineBasicBlock *SinkMBB) { MachineFunction *MF = TrueMBB->getParent(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); DebugLoc DL = MIItBegin->getDebugLoc(); X86::CondCode CC = X86::CondCode(MIItBegin->getOperand(3).getImm()); X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC); MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin(); // As we are creating the PHIs, we have to be careful if there is more than // one. Later CMOVs may reference the results of earlier CMOVs, but later // PHIs have to reference the individual true/false inputs from earlier PHIs. // That also means that PHI construction must work forward from earlier to // later, and that the code must maintain a mapping from earlier PHI's // destination registers, and the registers that went into the PHI. DenseMap> RegRewriteTable; MachineInstrBuilder MIB; for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) { unsigned DestReg = MIIt->getOperand(0).getReg(); unsigned Op1Reg = MIIt->getOperand(1).getReg(); unsigned Op2Reg = MIIt->getOperand(2).getReg(); // If this CMOV we are generating is the opposite condition from // the jump we generated, then we have to swap the operands for the // PHI that is going to be generated. if (MIIt->getOperand(3).getImm() == OppCC) std::swap(Op1Reg, Op2Reg); if (RegRewriteTable.find(Op1Reg) != RegRewriteTable.end()) Op1Reg = RegRewriteTable[Op1Reg].first; if (RegRewriteTable.find(Op2Reg) != RegRewriteTable.end()) Op2Reg = RegRewriteTable[Op2Reg].second; MIB = BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(X86::PHI), DestReg) .addReg(Op1Reg) .addMBB(FalseMBB) .addReg(Op2Reg) .addMBB(TrueMBB); // Add this PHI to the rewrite table. RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg); } return MIB; } // Lower cascaded selects in form of (SecondCmov (FirstCMOV F, T, cc1), T, cc2). MachineBasicBlock * X86TargetLowering::EmitLoweredCascadedSelect(MachineInstr &FirstCMOV, MachineInstr &SecondCascadedCMOV, MachineBasicBlock *ThisMBB) const { const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = FirstCMOV.getDebugLoc(); // We lower cascaded CMOVs such as // // (SecondCascadedCMOV (FirstCMOV F, T, cc1), T, cc2) // // to two successive branches. // // Without this, we would add a PHI between the two jumps, which ends up // creating a few copies all around. For instance, for // // (sitofp (zext (fcmp une))) // // we would generate: // // ucomiss %xmm1, %xmm0 // movss <1.0f>, %xmm0 // movaps %xmm0, %xmm1 // jne .LBB5_2 // xorps %xmm1, %xmm1 // .LBB5_2: // jp .LBB5_4 // movaps %xmm1, %xmm0 // .LBB5_4: // retq // // because this custom-inserter would have generated: // // A // | \ // | B // | / // C // | \ // | D // | / // E // // A: X = ...; Y = ... // B: empty // C: Z = PHI [X, A], [Y, B] // D: empty // E: PHI [X, C], [Z, D] // // If we lower both CMOVs in a single step, we can instead generate: // // A // | \ // | C // | /| // |/ | // | | // | D // | / // E // // A: X = ...; Y = ... // D: empty // E: PHI [X, A], [X, C], [Y, D] // // Which, in our sitofp/fcmp example, gives us something like: // // ucomiss %xmm1, %xmm0 // movss <1.0f>, %xmm0 // jne .LBB5_4 // jp .LBB5_4 // xorps %xmm0, %xmm0 // .LBB5_4: // retq // // We lower cascaded CMOV into two successive branches to the same block. // EFLAGS is used by both, so mark it as live in the second. const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock(); MachineFunction *F = ThisMBB->getParent(); MachineBasicBlock *FirstInsertedMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *SecondInsertedMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineFunction::iterator It = ++ThisMBB->getIterator(); F->insert(It, FirstInsertedMBB); F->insert(It, SecondInsertedMBB); F->insert(It, SinkMBB); // For a cascaded CMOV, we lower it to two successive branches to // the same block (SinkMBB). EFLAGS is used by both, so mark it as live in // the FirstInsertedMBB. FirstInsertedMBB->addLiveIn(X86::EFLAGS); // If the EFLAGS register isn't dead in the terminator, then claim that it's // live into the sink and copy blocks. const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); if (!SecondCascadedCMOV.killsRegister(X86::EFLAGS) && !checkAndUpdateEFLAGSKill(SecondCascadedCMOV, ThisMBB, TRI)) { SecondInsertedMBB->addLiveIn(X86::EFLAGS); SinkMBB->addLiveIn(X86::EFLAGS); } // Transfer the remainder of ThisMBB and its successor edges to SinkMBB. SinkMBB->splice(SinkMBB->begin(), ThisMBB, std::next(MachineBasicBlock::iterator(FirstCMOV)), ThisMBB->end()); SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB); // Fallthrough block for ThisMBB. ThisMBB->addSuccessor(FirstInsertedMBB); // The true block target of the first branch is always SinkMBB. ThisMBB->addSuccessor(SinkMBB); // Fallthrough block for FirstInsertedMBB. FirstInsertedMBB->addSuccessor(SecondInsertedMBB); // The true block for the branch of FirstInsertedMBB. FirstInsertedMBB->addSuccessor(SinkMBB); // This is fallthrough. SecondInsertedMBB->addSuccessor(SinkMBB); // Create the conditional branch instructions. X86::CondCode FirstCC = X86::CondCode(FirstCMOV.getOperand(3).getImm()); unsigned Opc = X86::GetCondBranchFromCond(FirstCC); BuildMI(ThisMBB, DL, TII->get(Opc)).addMBB(SinkMBB); X86::CondCode SecondCC = X86::CondCode(SecondCascadedCMOV.getOperand(3).getImm()); unsigned Opc2 = X86::GetCondBranchFromCond(SecondCC); BuildMI(FirstInsertedMBB, DL, TII->get(Opc2)).addMBB(SinkMBB); // SinkMBB: // %Result = phi [ %FalseValue, SecondInsertedMBB ], [ %TrueValue, ThisMBB ] unsigned DestReg = FirstCMOV.getOperand(0).getReg(); unsigned Op1Reg = FirstCMOV.getOperand(1).getReg(); unsigned Op2Reg = FirstCMOV.getOperand(2).getReg(); MachineInstrBuilder MIB = BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(X86::PHI), DestReg) .addReg(Op1Reg) .addMBB(SecondInsertedMBB) .addReg(Op2Reg) .addMBB(ThisMBB); // The second SecondInsertedMBB provides the same incoming value as the // FirstInsertedMBB (the True operand of the SELECT_CC/CMOV nodes). MIB.addReg(FirstCMOV.getOperand(2).getReg()).addMBB(FirstInsertedMBB); // Copy the PHI result to the register defined by the second CMOV. BuildMI(*SinkMBB, std::next(MachineBasicBlock::iterator(MIB.getInstr())), DL, TII->get(TargetOpcode::COPY), SecondCascadedCMOV.getOperand(0).getReg()) .addReg(FirstCMOV.getOperand(0).getReg()); // Now remove the CMOVs. FirstCMOV.eraseFromParent(); SecondCascadedCMOV.eraseFromParent(); return SinkMBB; } MachineBasicBlock * X86TargetLowering::EmitLoweredSelect(MachineInstr &MI, MachineBasicBlock *ThisMBB) const { const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); // To "insert" a SELECT_CC instruction, we actually have to insert the // diamond control-flow pattern. The incoming instruction knows the // destination vreg to set, the condition code register to branch on, the // true/false values to select between and a branch opcode to use. // ThisMBB: // ... // TrueVal = ... // cmpTY ccX, r1, r2 // bCC copy1MBB // fallthrough --> FalseMBB // This code lowers all pseudo-CMOV instructions. Generally it lowers these // as described above, by inserting a BB, and then making a PHI at the join // point to select the true and false operands of the CMOV in the PHI. // // The code also handles two different cases of multiple CMOV opcodes // in a row. // // Case 1: // In this case, there are multiple CMOVs in a row, all which are based on // the same condition setting (or the exact opposite condition setting). // In this case we can lower all the CMOVs using a single inserted BB, and // then make a number of PHIs at the join point to model the CMOVs. The only // trickiness here, is that in a case like: // // t2 = CMOV cond1 t1, f1 // t3 = CMOV cond1 t2, f2 // // when rewriting this into PHIs, we have to perform some renaming on the // temps since you cannot have a PHI operand refer to a PHI result earlier // in the same block. The "simple" but wrong lowering would be: // // t2 = PHI t1(BB1), f1(BB2) // t3 = PHI t2(BB1), f2(BB2) // // but clearly t2 is not defined in BB1, so that is incorrect. The proper // renaming is to note that on the path through BB1, t2 is really just a // copy of t1, and do that renaming, properly generating: // // t2 = PHI t1(BB1), f1(BB2) // t3 = PHI t1(BB1), f2(BB2) // // Case 2: // CMOV ((CMOV F, T, cc1), T, cc2) is checked here and handled by a separate // function - EmitLoweredCascadedSelect. X86::CondCode CC = X86::CondCode(MI.getOperand(3).getImm()); X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC); MachineInstr *LastCMOV = &MI; MachineBasicBlock::iterator NextMIIt = std::next(MachineBasicBlock::iterator(MI)); // Check for case 1, where there are multiple CMOVs with the same condition // first. Of the two cases of multiple CMOV lowerings, case 1 reduces the // number of jumps the most. if (isCMOVPseudo(MI)) { // See if we have a string of CMOVS with the same condition. while (NextMIIt != ThisMBB->end() && isCMOVPseudo(*NextMIIt) && (NextMIIt->getOperand(3).getImm() == CC || NextMIIt->getOperand(3).getImm() == OppCC)) { LastCMOV = &*NextMIIt; ++NextMIIt; } } // This checks for case 2, but only do this if we didn't already find // case 1, as indicated by LastCMOV == MI. if (LastCMOV == &MI && NextMIIt != ThisMBB->end() && NextMIIt->getOpcode() == MI.getOpcode() && NextMIIt->getOperand(2).getReg() == MI.getOperand(2).getReg() && NextMIIt->getOperand(1).getReg() == MI.getOperand(0).getReg() && NextMIIt->getOperand(1).isKill()) { return EmitLoweredCascadedSelect(MI, *NextMIIt, ThisMBB); } const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock(); MachineFunction *F = ThisMBB->getParent(); MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineFunction::iterator It = ++ThisMBB->getIterator(); F->insert(It, FalseMBB); F->insert(It, SinkMBB); // If the EFLAGS register isn't dead in the terminator, then claim that it's // live into the sink and copy blocks. const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); if (!LastCMOV->killsRegister(X86::EFLAGS) && !checkAndUpdateEFLAGSKill(LastCMOV, ThisMBB, TRI)) { FalseMBB->addLiveIn(X86::EFLAGS); SinkMBB->addLiveIn(X86::EFLAGS); } // Transfer the remainder of ThisMBB and its successor edges to SinkMBB. SinkMBB->splice(SinkMBB->begin(), ThisMBB, std::next(MachineBasicBlock::iterator(LastCMOV)), ThisMBB->end()); SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB); // Fallthrough block for ThisMBB. ThisMBB->addSuccessor(FalseMBB); // The true block target of the first (or only) branch is always a SinkMBB. ThisMBB->addSuccessor(SinkMBB); // Fallthrough block for FalseMBB. FalseMBB->addSuccessor(SinkMBB); // Create the conditional branch instruction. unsigned Opc = X86::GetCondBranchFromCond(CC); BuildMI(ThisMBB, DL, TII->get(Opc)).addMBB(SinkMBB); // SinkMBB: // %Result = phi [ %FalseValue, FalseMBB ], [ %TrueValue, ThisMBB ] // ... MachineBasicBlock::iterator MIItBegin = MachineBasicBlock::iterator(MI); MachineBasicBlock::iterator MIItEnd = std::next(MachineBasicBlock::iterator(LastCMOV)); createPHIsForCMOVsInSinkBB(MIItBegin, MIItEnd, ThisMBB, FalseMBB, SinkMBB); // Now remove the CMOV(s). ThisMBB->erase(MIItBegin, MIItEnd); return SinkMBB; } MachineBasicBlock * X86TargetLowering::EmitLoweredAtomicFP(MachineInstr &MI, MachineBasicBlock *BB) const { // Combine the following atomic floating-point modification pattern: // a.store(reg OP a.load(acquire), release) // Transform them into: // OPss (%gpr), %xmm // movss %xmm, (%gpr) // Or sd equivalent for 64-bit operations. unsigned MOp, FOp; switch (MI.getOpcode()) { default: llvm_unreachable("unexpected instr type for EmitLoweredAtomicFP"); case X86::RELEASE_FADD32mr: FOp = X86::ADDSSrm; MOp = X86::MOVSSmr; break; case X86::RELEASE_FADD64mr: FOp = X86::ADDSDrm; MOp = X86::MOVSDmr; break; } const X86InstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); unsigned ValOpIdx = X86::AddrNumOperands; unsigned VSrc = MI.getOperand(ValOpIdx).getReg(); MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(FOp), MRI.createVirtualRegister(MRI.getRegClass(VSrc))) .addReg(VSrc); for (int i = 0; i < X86::AddrNumOperands; ++i) { MachineOperand &Operand = MI.getOperand(i); // Clear any kill flags on register operands as we'll create a second // instruction using the same address operands. if (Operand.isReg()) Operand.setIsKill(false); MIB.add(Operand); } MachineInstr *FOpMI = MIB; MIB = BuildMI(*BB, MI, DL, TII->get(MOp)); for (int i = 0; i < X86::AddrNumOperands; ++i) MIB.add(MI.getOperand(i)); MIB.addReg(FOpMI->getOperand(0).getReg(), RegState::Kill); MI.eraseFromParent(); // The pseudo instruction is gone now. return BB; } MachineBasicBlock * X86TargetLowering::EmitLoweredSegAlloca(MachineInstr &MI, MachineBasicBlock *BB) const { MachineFunction *MF = BB->getParent(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); assert(MF->shouldSplitStack()); const bool Is64Bit = Subtarget.is64Bit(); const bool IsLP64 = Subtarget.isTarget64BitLP64(); const unsigned TlsReg = Is64Bit ? X86::FS : X86::GS; const unsigned TlsOffset = IsLP64 ? 0x70 : Is64Bit ? 0x40 : 0x30; // BB: // ... [Till the alloca] // If stacklet is not large enough, jump to mallocMBB // // bumpMBB: // Allocate by subtracting from RSP // Jump to continueMBB // // mallocMBB: // Allocate by call to runtime // // continueMBB: // ... // [rest of original BB] // MachineBasicBlock *mallocMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *bumpMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *continueMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineRegisterInfo &MRI = MF->getRegInfo(); const TargetRegisterClass *AddrRegClass = getRegClassFor(getPointerTy(MF->getDataLayout())); unsigned mallocPtrVReg = MRI.createVirtualRegister(AddrRegClass), bumpSPPtrVReg = MRI.createVirtualRegister(AddrRegClass), tmpSPVReg = MRI.createVirtualRegister(AddrRegClass), SPLimitVReg = MRI.createVirtualRegister(AddrRegClass), sizeVReg = MI.getOperand(1).getReg(), physSPReg = IsLP64 || Subtarget.isTargetNaCl64() ? X86::RSP : X86::ESP; MachineFunction::iterator MBBIter = ++BB->getIterator(); MF->insert(MBBIter, bumpMBB); MF->insert(MBBIter, mallocMBB); MF->insert(MBBIter, continueMBB); continueMBB->splice(continueMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); continueMBB->transferSuccessorsAndUpdatePHIs(BB); // Add code to the main basic block to check if the stack limit has been hit, // and if so, jump to mallocMBB otherwise to bumpMBB. BuildMI(BB, DL, TII->get(TargetOpcode::COPY), tmpSPVReg).addReg(physSPReg); BuildMI(BB, DL, TII->get(IsLP64 ? X86::SUB64rr:X86::SUB32rr), SPLimitVReg) .addReg(tmpSPVReg).addReg(sizeVReg); BuildMI(BB, DL, TII->get(IsLP64 ? X86::CMP64mr:X86::CMP32mr)) .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg) .addReg(SPLimitVReg); BuildMI(BB, DL, TII->get(X86::JG_1)).addMBB(mallocMBB); // bumpMBB simply decreases the stack pointer, since we know the current // stacklet has enough space. BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), physSPReg) .addReg(SPLimitVReg); BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), bumpSPPtrVReg) .addReg(SPLimitVReg); BuildMI(bumpMBB, DL, TII->get(X86::JMP_1)).addMBB(continueMBB); // Calls into a routine in libgcc to allocate more space from the heap. const uint32_t *RegMask = Subtarget.getRegisterInfo()->getCallPreservedMask(*MF, CallingConv::C); if (IsLP64) { BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI) .addReg(sizeVReg); BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32)) .addExternalSymbol("__morestack_allocate_stack_space") .addRegMask(RegMask) .addReg(X86::RDI, RegState::Implicit) .addReg(X86::RAX, RegState::ImplicitDefine); } else if (Is64Bit) { BuildMI(mallocMBB, DL, TII->get(X86::MOV32rr), X86::EDI) .addReg(sizeVReg); BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32)) .addExternalSymbol("__morestack_allocate_stack_space") .addRegMask(RegMask) .addReg(X86::EDI, RegState::Implicit) .addReg(X86::EAX, RegState::ImplicitDefine); } else { BuildMI(mallocMBB, DL, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg) .addImm(12); BuildMI(mallocMBB, DL, TII->get(X86::PUSH32r)).addReg(sizeVReg); BuildMI(mallocMBB, DL, TII->get(X86::CALLpcrel32)) .addExternalSymbol("__morestack_allocate_stack_space") .addRegMask(RegMask) .addReg(X86::EAX, RegState::ImplicitDefine); } if (!Is64Bit) BuildMI(mallocMBB, DL, TII->get(X86::ADD32ri), physSPReg).addReg(physSPReg) .addImm(16); BuildMI(mallocMBB, DL, TII->get(TargetOpcode::COPY), mallocPtrVReg) .addReg(IsLP64 ? X86::RAX : X86::EAX); BuildMI(mallocMBB, DL, TII->get(X86::JMP_1)).addMBB(continueMBB); // Set up the CFG correctly. BB->addSuccessor(bumpMBB); BB->addSuccessor(mallocMBB); mallocMBB->addSuccessor(continueMBB); bumpMBB->addSuccessor(continueMBB); // Take care of the PHI nodes. BuildMI(*continueMBB, continueMBB->begin(), DL, TII->get(X86::PHI), MI.getOperand(0).getReg()) .addReg(mallocPtrVReg) .addMBB(mallocMBB) .addReg(bumpSPPtrVReg) .addMBB(bumpMBB); // Delete the original pseudo instruction. MI.eraseFromParent(); // And we're done. return continueMBB; } MachineBasicBlock * X86TargetLowering::EmitLoweredCatchRet(MachineInstr &MI, MachineBasicBlock *BB) const { MachineFunction *MF = BB->getParent(); const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB(); DebugLoc DL = MI.getDebugLoc(); assert(!isAsynchronousEHPersonality( classifyEHPersonality(MF->getFunction().getPersonalityFn())) && "SEH does not use catchret!"); // Only 32-bit EH needs to worry about manually restoring stack pointers. if (!Subtarget.is32Bit()) return BB; // C++ EH creates a new target block to hold the restore code, and wires up // the new block to the return destination with a normal JMP_4. MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB->getBasicBlock()); assert(BB->succ_size() == 1); MF->insert(std::next(BB->getIterator()), RestoreMBB); RestoreMBB->transferSuccessorsAndUpdatePHIs(BB); BB->addSuccessor(RestoreMBB); MI.getOperand(0).setMBB(RestoreMBB); auto RestoreMBBI = RestoreMBB->begin(); BuildMI(*RestoreMBB, RestoreMBBI, DL, TII.get(X86::EH_RESTORE)); BuildMI(*RestoreMBB, RestoreMBBI, DL, TII.get(X86::JMP_4)).addMBB(TargetMBB); return BB; } MachineBasicBlock * X86TargetLowering::EmitLoweredCatchPad(MachineInstr &MI, MachineBasicBlock *BB) const { MachineFunction *MF = BB->getParent(); const Constant *PerFn = MF->getFunction().getPersonalityFn(); bool IsSEH = isAsynchronousEHPersonality(classifyEHPersonality(PerFn)); // Only 32-bit SEH requires special handling for catchpad. if (IsSEH && Subtarget.is32Bit()) { const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); BuildMI(*BB, MI, DL, TII.get(X86::EH_RESTORE)); } MI.eraseFromParent(); return BB; } MachineBasicBlock * X86TargetLowering::EmitLoweredTLSAddr(MachineInstr &MI, MachineBasicBlock *BB) const { // So, here we replace TLSADDR with the sequence: // adjust_stackdown -> TLSADDR -> adjust_stackup. // We need this because TLSADDR is lowered into calls // inside MC, therefore without the two markers shrink-wrapping // may push the prologue/epilogue pass them. const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); MachineFunction &MF = *BB->getParent(); // Emit CALLSEQ_START right before the instruction. unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); MachineInstrBuilder CallseqStart = BuildMI(MF, DL, TII.get(AdjStackDown)).addImm(0).addImm(0).addImm(0); BB->insert(MachineBasicBlock::iterator(MI), CallseqStart); // Emit CALLSEQ_END right after the instruction. // We don't call erase from parent because we want to keep the // original instruction around. unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); MachineInstrBuilder CallseqEnd = BuildMI(MF, DL, TII.get(AdjStackUp)).addImm(0).addImm(0); BB->insertAfter(MachineBasicBlock::iterator(MI), CallseqEnd); return BB; } MachineBasicBlock * X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI, MachineBasicBlock *BB) const { // This is pretty easy. We're taking the value that we received from // our load from the relocation, sticking it in either RDI (x86-64) // or EAX and doing an indirect call. The return value will then // be in the normal return register. MachineFunction *F = BB->getParent(); const X86InstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); assert(Subtarget.isTargetDarwin() && "Darwin only instr emitted?"); assert(MI.getOperand(3).isGlobal() && "This should be a global"); // Get a register mask for the lowered call. // FIXME: The 32-bit calls have non-standard calling conventions. Use a // proper register mask. const uint32_t *RegMask = Subtarget.is64Bit() ? Subtarget.getRegisterInfo()->getDarwinTLSCallPreservedMask() : Subtarget.getRegisterInfo()->getCallPreservedMask(*F, CallingConv::C); if (Subtarget.is64Bit()) { MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(X86::MOV64rm), X86::RDI) .addReg(X86::RIP) .addImm(0) .addReg(0) .addGlobalAddress(MI.getOperand(3).getGlobal(), 0, MI.getOperand(3).getTargetFlags()) .addReg(0); MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m)); addDirectMem(MIB, X86::RDI); MIB.addReg(X86::RAX, RegState::ImplicitDefine).addRegMask(RegMask); } else if (!isPositionIndependent()) { MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(X86::MOV32rm), X86::EAX) .addReg(0) .addImm(0) .addReg(0) .addGlobalAddress(MI.getOperand(3).getGlobal(), 0, MI.getOperand(3).getTargetFlags()) .addReg(0); MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m)); addDirectMem(MIB, X86::EAX); MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask); } else { MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(X86::MOV32rm), X86::EAX) .addReg(TII->getGlobalBaseReg(F)) .addImm(0) .addReg(0) .addGlobalAddress(MI.getOperand(3).getGlobal(), 0, MI.getOperand(3).getTargetFlags()) .addReg(0); MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m)); addDirectMem(MIB, X86::EAX); MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask); } MI.eraseFromParent(); // The pseudo instruction is gone now. return BB; } static unsigned getOpcodeForRetpoline(unsigned RPOpc) { switch (RPOpc) { case X86::RETPOLINE_CALL32: return X86::CALLpcrel32; case X86::RETPOLINE_CALL64: return X86::CALL64pcrel32; case X86::RETPOLINE_TCRETURN32: return X86::TCRETURNdi; case X86::RETPOLINE_TCRETURN64: return X86::TCRETURNdi64; } llvm_unreachable("not retpoline opcode"); } static const char *getRetpolineSymbol(const X86Subtarget &Subtarget, unsigned Reg) { if (Subtarget.useRetpolineExternalThunk()) { // When using an external thunk for retpolines, we pick names that match the // names GCC happens to use as well. This helps simplify the implementation // of the thunks for kernels where they have no easy ability to create // aliases and are doing non-trivial configuration of the thunk's body. For // example, the Linux kernel will do boot-time hot patching of the thunk // bodies and cannot easily export aliases of these to loaded modules. // // Note that at any point in the future, we may need to change the semantics // of how we implement retpolines and at that time will likely change the // name of the called thunk. Essentially, there is no hard guarantee that // LLVM will generate calls to specific thunks, we merely make a best-effort // attempt to help out kernels and other systems where duplicating the // thunks is costly. switch (Reg) { case X86::EAX: assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); return "__x86_indirect_thunk_eax"; case X86::ECX: assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); return "__x86_indirect_thunk_ecx"; case X86::EDX: assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); return "__x86_indirect_thunk_edx"; case X86::EDI: assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); return "__x86_indirect_thunk_edi"; case X86::R11: assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!"); return "__x86_indirect_thunk_r11"; } llvm_unreachable("unexpected reg for retpoline"); } // When targeting an internal COMDAT thunk use an LLVM-specific name. switch (Reg) { case X86::EAX: assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); return "__llvm_retpoline_eax"; case X86::ECX: assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); return "__llvm_retpoline_ecx"; case X86::EDX: assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); return "__llvm_retpoline_edx"; case X86::EDI: assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); return "__llvm_retpoline_edi"; case X86::R11: assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!"); return "__llvm_retpoline_r11"; } llvm_unreachable("unexpected reg for retpoline"); } MachineBasicBlock * X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI, MachineBasicBlock *BB) const { // Copy the virtual register into the R11 physical register and // call the retpoline thunk. DebugLoc DL = MI.getDebugLoc(); const X86InstrInfo *TII = Subtarget.getInstrInfo(); unsigned CalleeVReg = MI.getOperand(0).getReg(); unsigned Opc = getOpcodeForRetpoline(MI.getOpcode()); // Find an available scratch register to hold the callee. On 64-bit, we can // just use R11, but we scan for uses anyway to ensure we don't generate // incorrect code. On 32-bit, we use one of EAX, ECX, or EDX that isn't // already a register use operand to the call to hold the callee. If none // are available, use EDI instead. EDI is chosen because EBX is the PIC base // register and ESI is the base pointer to realigned stack frames with VLAs. SmallVector AvailableRegs; if (Subtarget.is64Bit()) AvailableRegs.push_back(X86::R11); else AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX, X86::EDI}); // Zero out any registers that are already used. for (const auto &MO : MI.operands()) { if (MO.isReg() && MO.isUse()) for (unsigned &Reg : AvailableRegs) if (Reg == MO.getReg()) Reg = 0; } // Choose the first remaining non-zero available register. unsigned AvailableReg = 0; for (unsigned MaybeReg : AvailableRegs) { if (MaybeReg) { AvailableReg = MaybeReg; break; } } if (!AvailableReg) report_fatal_error("calling convention incompatible with retpoline, no " "available registers"); const char *Symbol = getRetpolineSymbol(Subtarget, AvailableReg); BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg) .addReg(CalleeVReg); MI.getOperand(0).ChangeToES(Symbol); MI.setDesc(TII->get(Opc)); MachineInstrBuilder(*BB->getParent(), &MI) .addReg(AvailableReg, RegState::Implicit | RegState::Kill); return BB; } /// SetJmp implies future control flow change upon calling the corresponding /// LongJmp. /// Instead of using the 'return' instruction, the long jump fixes the stack and /// performs an indirect branch. To do so it uses the registers that were stored /// in the jump buffer (when calling SetJmp). /// In case the shadow stack is enabled we need to fix it as well, because some /// return addresses will be skipped. /// The function will save the SSP for future fixing in the function /// emitLongJmpShadowStackFix. /// \sa emitLongJmpShadowStackFix /// \param [in] MI The temporary Machine Instruction for the builtin. /// \param [in] MBB The Machine Basic Block that will be modified. void X86TargetLowering::emitSetJmpShadowStackFix(MachineInstr &MI, MachineBasicBlock *MBB) const { DebugLoc DL = MI.getDebugLoc(); MachineFunction *MF = MBB->getParent(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &MRI = MF->getRegInfo(); MachineInstrBuilder MIB; // Memory Reference. SmallVector MMOs(MI.memoperands_begin(), MI.memoperands_end()); // Initialize a register with zero. MVT PVT = getPointerTy(MF->getDataLayout()); const TargetRegisterClass *PtrRC = getRegClassFor(PVT); unsigned ZReg = MRI.createVirtualRegister(PtrRC); unsigned XorRROpc = (PVT == MVT::i64) ? X86::XOR64rr : X86::XOR32rr; BuildMI(*MBB, MI, DL, TII->get(XorRROpc)) .addDef(ZReg) .addReg(ZReg, RegState::Undef) .addReg(ZReg, RegState::Undef); // Read the current SSP Register value to the zeroed register. unsigned SSPCopyReg = MRI.createVirtualRegister(PtrRC); unsigned RdsspOpc = (PVT == MVT::i64) ? X86::RDSSPQ : X86::RDSSPD; BuildMI(*MBB, MI, DL, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg); // Write the SSP register value to offset 3 in input memory buffer. unsigned PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr; MIB = BuildMI(*MBB, MI, DL, TII->get(PtrStoreOpc)); const int64_t SSPOffset = 3 * PVT.getStoreSize(); const unsigned MemOpndSlot = 1; for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { if (i == X86::AddrDisp) MIB.addDisp(MI.getOperand(MemOpndSlot + i), SSPOffset); else MIB.add(MI.getOperand(MemOpndSlot + i)); } MIB.addReg(SSPCopyReg); MIB.setMemRefs(MMOs); } MachineBasicBlock * X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const { DebugLoc DL = MI.getDebugLoc(); MachineFunction *MF = MBB->getParent(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); MachineRegisterInfo &MRI = MF->getRegInfo(); const BasicBlock *BB = MBB->getBasicBlock(); MachineFunction::iterator I = ++MBB->getIterator(); // Memory Reference SmallVector MMOs(MI.memoperands_begin(), MI.memoperands_end()); unsigned DstReg; unsigned MemOpndSlot = 0; unsigned CurOp = 0; DstReg = MI.getOperand(CurOp++).getReg(); const TargetRegisterClass *RC = MRI.getRegClass(DstReg); assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!"); (void)TRI; unsigned mainDstReg = MRI.createVirtualRegister(RC); unsigned restoreDstReg = MRI.createVirtualRegister(RC); MemOpndSlot = CurOp; MVT PVT = getPointerTy(MF->getDataLayout()); assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"); // For v = setjmp(buf), we generate // // thisMBB: // buf[LabelOffset] = restoreMBB <-- takes address of restoreMBB // SjLjSetup restoreMBB // // mainMBB: // v_main = 0 // // sinkMBB: // v = phi(main, restore) // // restoreMBB: // if base pointer being used, load it from frame // v_restore = 1 MachineBasicBlock *thisMBB = MBB; MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); MachineBasicBlock *restoreMBB = MF->CreateMachineBasicBlock(BB); MF->insert(I, mainMBB); MF->insert(I, sinkMBB); MF->push_back(restoreMBB); restoreMBB->setHasAddressTaken(); MachineInstrBuilder MIB; // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)), MBB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); // thisMBB: unsigned PtrStoreOpc = 0; unsigned LabelReg = 0; const int64_t LabelOffset = 1 * PVT.getStoreSize(); bool UseImmLabel = (MF->getTarget().getCodeModel() == CodeModel::Small) && !isPositionIndependent(); // Prepare IP either in reg or imm. if (!UseImmLabel) { PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr; const TargetRegisterClass *PtrRC = getRegClassFor(PVT); LabelReg = MRI.createVirtualRegister(PtrRC); if (Subtarget.is64Bit()) { MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA64r), LabelReg) .addReg(X86::RIP) .addImm(0) .addReg(0) .addMBB(restoreMBB) .addReg(0); } else { const X86InstrInfo *XII = static_cast(TII); MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA32r), LabelReg) .addReg(XII->getGlobalBaseReg(MF)) .addImm(0) .addReg(0) .addMBB(restoreMBB, Subtarget.classifyBlockAddressReference()) .addReg(0); } } else PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi; // Store IP MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrStoreOpc)); for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { if (i == X86::AddrDisp) MIB.addDisp(MI.getOperand(MemOpndSlot + i), LabelOffset); else MIB.add(MI.getOperand(MemOpndSlot + i)); } if (!UseImmLabel) MIB.addReg(LabelReg); else MIB.addMBB(restoreMBB); MIB.setMemRefs(MMOs); if (MF->getMMI().getModule()->getModuleFlag("cf-protection-return")) { emitSetJmpShadowStackFix(MI, thisMBB); } // Setup MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::EH_SjLj_Setup)) .addMBB(restoreMBB); const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); MIB.addRegMask(RegInfo->getNoPreservedMask()); thisMBB->addSuccessor(mainMBB); thisMBB->addSuccessor(restoreMBB); // mainMBB: // EAX = 0 BuildMI(mainMBB, DL, TII->get(X86::MOV32r0), mainDstReg); mainMBB->addSuccessor(sinkMBB); // sinkMBB: BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(X86::PHI), DstReg) .addReg(mainDstReg).addMBB(mainMBB) .addReg(restoreDstReg).addMBB(restoreMBB); // restoreMBB: if (RegInfo->hasBasePointer(*MF)) { const bool Uses64BitFramePtr = Subtarget.isTarget64BitLP64() || Subtarget.isTargetNaCl64(); X86MachineFunctionInfo *X86FI = MF->getInfo(); X86FI->setRestoreBasePointer(MF); unsigned FramePtr = RegInfo->getFrameRegister(*MF); unsigned BasePtr = RegInfo->getBaseRegister(); unsigned Opm = Uses64BitFramePtr ? X86::MOV64rm : X86::MOV32rm; addRegOffset(BuildMI(restoreMBB, DL, TII->get(Opm), BasePtr), FramePtr, true, X86FI->getRestoreBasePointerOffset()) .setMIFlag(MachineInstr::FrameSetup); } BuildMI(restoreMBB, DL, TII->get(X86::MOV32ri), restoreDstReg).addImm(1); BuildMI(restoreMBB, DL, TII->get(X86::JMP_1)).addMBB(sinkMBB); restoreMBB->addSuccessor(sinkMBB); MI.eraseFromParent(); return sinkMBB; } /// Fix the shadow stack using the previously saved SSP pointer. /// \sa emitSetJmpShadowStackFix /// \param [in] MI The temporary Machine Instruction for the builtin. /// \param [in] MBB The Machine Basic Block that will be modified. /// \return The sink MBB that will perform the future indirect branch. MachineBasicBlock * X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI, MachineBasicBlock *MBB) const { DebugLoc DL = MI.getDebugLoc(); MachineFunction *MF = MBB->getParent(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &MRI = MF->getRegInfo(); // Memory Reference SmallVector MMOs(MI.memoperands_begin(), MI.memoperands_end()); MVT PVT = getPointerTy(MF->getDataLayout()); const TargetRegisterClass *PtrRC = getRegClassFor(PVT); // checkSspMBB: // xor vreg1, vreg1 // rdssp vreg1 // test vreg1, vreg1 // je sinkMBB # Jump if Shadow Stack is not supported // fallMBB: // mov buf+24/12(%rip), vreg2 // sub vreg1, vreg2 // jbe sinkMBB # No need to fix the Shadow Stack // fixShadowMBB: // shr 3/2, vreg2 // incssp vreg2 # fix the SSP according to the lower 8 bits // shr 8, vreg2 // je sinkMBB // fixShadowLoopPrepareMBB: // shl vreg2 // mov 128, vreg3 // fixShadowLoopMBB: // incssp vreg3 // dec vreg2 // jne fixShadowLoopMBB # Iterate until you finish fixing // # the Shadow Stack // sinkMBB: MachineFunction::iterator I = ++MBB->getIterator(); const BasicBlock *BB = MBB->getBasicBlock(); MachineBasicBlock *checkSspMBB = MF->CreateMachineBasicBlock(BB); MachineBasicBlock *fallMBB = MF->CreateMachineBasicBlock(BB); MachineBasicBlock *fixShadowMBB = MF->CreateMachineBasicBlock(BB); MachineBasicBlock *fixShadowLoopPrepareMBB = MF->CreateMachineBasicBlock(BB); MachineBasicBlock *fixShadowLoopMBB = MF->CreateMachineBasicBlock(BB); MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); MF->insert(I, checkSspMBB); MF->insert(I, fallMBB); MF->insert(I, fixShadowMBB); MF->insert(I, fixShadowLoopPrepareMBB); MF->insert(I, fixShadowLoopMBB); MF->insert(I, sinkMBB); // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), MBB, MachineBasicBlock::iterator(MI), MBB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); MBB->addSuccessor(checkSspMBB); // Initialize a register with zero. unsigned ZReg = MRI.createVirtualRegister(PtrRC); unsigned XorRROpc = (PVT == MVT::i64) ? X86::XOR64rr : X86::XOR32rr; BuildMI(checkSspMBB, DL, TII->get(XorRROpc)) .addDef(ZReg) .addReg(ZReg, RegState::Undef) .addReg(ZReg, RegState::Undef); // Read the current SSP Register value to the zeroed register. unsigned SSPCopyReg = MRI.createVirtualRegister(PtrRC); unsigned RdsspOpc = (PVT == MVT::i64) ? X86::RDSSPQ : X86::RDSSPD; BuildMI(checkSspMBB, DL, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg); // Check whether the result of the SSP register is zero and jump directly // to the sink. unsigned TestRROpc = (PVT == MVT::i64) ? X86::TEST64rr : X86::TEST32rr; BuildMI(checkSspMBB, DL, TII->get(TestRROpc)) .addReg(SSPCopyReg) .addReg(SSPCopyReg); BuildMI(checkSspMBB, DL, TII->get(X86::JE_1)).addMBB(sinkMBB); checkSspMBB->addSuccessor(sinkMBB); checkSspMBB->addSuccessor(fallMBB); // Reload the previously saved SSP register value. unsigned PrevSSPReg = MRI.createVirtualRegister(PtrRC); unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm; const int64_t SPPOffset = 3 * PVT.getStoreSize(); MachineInstrBuilder MIB = BuildMI(fallMBB, DL, TII->get(PtrLoadOpc), PrevSSPReg); for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { const MachineOperand &MO = MI.getOperand(i); if (i == X86::AddrDisp) MIB.addDisp(MO, SPPOffset); else if (MO.isReg()) // Don't add the whole operand, we don't want to // preserve kill flags. MIB.addReg(MO.getReg()); else MIB.add(MO); } MIB.setMemRefs(MMOs); // Subtract the current SSP from the previous SSP. unsigned SspSubReg = MRI.createVirtualRegister(PtrRC); unsigned SubRROpc = (PVT == MVT::i64) ? X86::SUB64rr : X86::SUB32rr; BuildMI(fallMBB, DL, TII->get(SubRROpc), SspSubReg) .addReg(PrevSSPReg) .addReg(SSPCopyReg); // Jump to sink in case PrevSSPReg <= SSPCopyReg. BuildMI(fallMBB, DL, TII->get(X86::JBE_1)).addMBB(sinkMBB); fallMBB->addSuccessor(sinkMBB); fallMBB->addSuccessor(fixShadowMBB); // Shift right by 2/3 for 32/64 because incssp multiplies the argument by 4/8. unsigned ShrRIOpc = (PVT == MVT::i64) ? X86::SHR64ri : X86::SHR32ri; unsigned Offset = (PVT == MVT::i64) ? 3 : 2; unsigned SspFirstShrReg = MRI.createVirtualRegister(PtrRC); BuildMI(fixShadowMBB, DL, TII->get(ShrRIOpc), SspFirstShrReg) .addReg(SspSubReg) .addImm(Offset); // Increase SSP when looking only on the lower 8 bits of the delta. unsigned IncsspOpc = (PVT == MVT::i64) ? X86::INCSSPQ : X86::INCSSPD; BuildMI(fixShadowMBB, DL, TII->get(IncsspOpc)).addReg(SspFirstShrReg); // Reset the lower 8 bits. unsigned SspSecondShrReg = MRI.createVirtualRegister(PtrRC); BuildMI(fixShadowMBB, DL, TII->get(ShrRIOpc), SspSecondShrReg) .addReg(SspFirstShrReg) .addImm(8); // Jump if the result of the shift is zero. BuildMI(fixShadowMBB, DL, TII->get(X86::JE_1)).addMBB(sinkMBB); fixShadowMBB->addSuccessor(sinkMBB); fixShadowMBB->addSuccessor(fixShadowLoopPrepareMBB); // Do a single shift left. unsigned ShlR1Opc = (PVT == MVT::i64) ? X86::SHL64r1 : X86::SHL32r1; unsigned SspAfterShlReg = MRI.createVirtualRegister(PtrRC); BuildMI(fixShadowLoopPrepareMBB, DL, TII->get(ShlR1Opc), SspAfterShlReg) .addReg(SspSecondShrReg); // Save the value 128 to a register (will be used next with incssp). unsigned Value128InReg = MRI.createVirtualRegister(PtrRC); unsigned MovRIOpc = (PVT == MVT::i64) ? X86::MOV64ri32 : X86::MOV32ri; BuildMI(fixShadowLoopPrepareMBB, DL, TII->get(MovRIOpc), Value128InReg) .addImm(128); fixShadowLoopPrepareMBB->addSuccessor(fixShadowLoopMBB); // Since incssp only looks at the lower 8 bits, we might need to do several // iterations of incssp until we finish fixing the shadow stack. unsigned DecReg = MRI.createVirtualRegister(PtrRC); unsigned CounterReg = MRI.createVirtualRegister(PtrRC); BuildMI(fixShadowLoopMBB, DL, TII->get(X86::PHI), CounterReg) .addReg(SspAfterShlReg) .addMBB(fixShadowLoopPrepareMBB) .addReg(DecReg) .addMBB(fixShadowLoopMBB); // Every iteration we increase the SSP by 128. BuildMI(fixShadowLoopMBB, DL, TII->get(IncsspOpc)).addReg(Value128InReg); // Every iteration we decrement the counter by 1. unsigned DecROpc = (PVT == MVT::i64) ? X86::DEC64r : X86::DEC32r; BuildMI(fixShadowLoopMBB, DL, TII->get(DecROpc), DecReg).addReg(CounterReg); // Jump if the counter is not zero yet. BuildMI(fixShadowLoopMBB, DL, TII->get(X86::JNE_1)).addMBB(fixShadowLoopMBB); fixShadowLoopMBB->addSuccessor(sinkMBB); fixShadowLoopMBB->addSuccessor(fixShadowLoopMBB); return sinkMBB; } MachineBasicBlock * X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const { DebugLoc DL = MI.getDebugLoc(); MachineFunction *MF = MBB->getParent(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &MRI = MF->getRegInfo(); // Memory Reference SmallVector MMOs(MI.memoperands_begin(), MI.memoperands_end()); MVT PVT = getPointerTy(MF->getDataLayout()); assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"); const TargetRegisterClass *RC = (PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass; unsigned Tmp = MRI.createVirtualRegister(RC); // Since FP is only updated here but NOT referenced, it's treated as GPR. const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); unsigned FP = (PVT == MVT::i64) ? X86::RBP : X86::EBP; unsigned SP = RegInfo->getStackRegister(); MachineInstrBuilder MIB; const int64_t LabelOffset = 1 * PVT.getStoreSize(); const int64_t SPOffset = 2 * PVT.getStoreSize(); unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm; unsigned IJmpOpc = (PVT == MVT::i64) ? X86::JMP64r : X86::JMP32r; MachineBasicBlock *thisMBB = MBB; // When CET and shadow stack is enabled, we need to fix the Shadow Stack. if (MF->getMMI().getModule()->getModuleFlag("cf-protection-return")) { thisMBB = emitLongJmpShadowStackFix(MI, thisMBB); } // Reload FP MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), FP); for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { const MachineOperand &MO = MI.getOperand(i); if (MO.isReg()) // Don't add the whole operand, we don't want to // preserve kill flags. MIB.addReg(MO.getReg()); else MIB.add(MO); } MIB.setMemRefs(MMOs); // Reload IP MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), Tmp); for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { const MachineOperand &MO = MI.getOperand(i); if (i == X86::AddrDisp) MIB.addDisp(MO, LabelOffset); else if (MO.isReg()) // Don't add the whole operand, we don't want to // preserve kill flags. MIB.addReg(MO.getReg()); else MIB.add(MO); } MIB.setMemRefs(MMOs); // Reload SP MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), SP); for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { if (i == X86::AddrDisp) MIB.addDisp(MI.getOperand(i), SPOffset); else MIB.add(MI.getOperand(i)); // We can preserve the kill flags here, it's // the last instruction of the expansion. } MIB.setMemRefs(MMOs); // Jump BuildMI(*thisMBB, MI, DL, TII->get(IJmpOpc)).addReg(Tmp); MI.eraseFromParent(); return thisMBB; } void X86TargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB, MachineBasicBlock *DispatchBB, int FI) const { DebugLoc DL = MI.getDebugLoc(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); const X86InstrInfo *TII = Subtarget.getInstrInfo(); MVT PVT = getPointerTy(MF->getDataLayout()); assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"); unsigned Op = 0; unsigned VR = 0; bool UseImmLabel = (MF->getTarget().getCodeModel() == CodeModel::Small) && !isPositionIndependent(); if (UseImmLabel) { Op = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi; } else { const TargetRegisterClass *TRC = (PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass; VR = MRI->createVirtualRegister(TRC); Op = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr; if (Subtarget.is64Bit()) BuildMI(*MBB, MI, DL, TII->get(X86::LEA64r), VR) .addReg(X86::RIP) .addImm(1) .addReg(0) .addMBB(DispatchBB) .addReg(0); else BuildMI(*MBB, MI, DL, TII->get(X86::LEA32r), VR) .addReg(0) /* TII->getGlobalBaseReg(MF) */ .addImm(1) .addReg(0) .addMBB(DispatchBB, Subtarget.classifyBlockAddressReference()) .addReg(0); } MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(Op)); addFrameReference(MIB, FI, Subtarget.is64Bit() ? 56 : 36); if (UseImmLabel) MIB.addMBB(DispatchBB); else MIB.addReg(VR); } MachineBasicBlock * X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, MachineBasicBlock *BB) const { DebugLoc DL = MI.getDebugLoc(); MachineFunction *MF = BB->getParent(); MachineFrameInfo &MFI = MF->getFrameInfo(); MachineRegisterInfo *MRI = &MF->getRegInfo(); const X86InstrInfo *TII = Subtarget.getInstrInfo(); int FI = MFI.getFunctionContextIndex(); // Get a mapping of the call site numbers to all of the landing pads they're // associated with. DenseMap> CallSiteNumToLPad; unsigned MaxCSNum = 0; for (auto &MBB : *MF) { if (!MBB.isEHPad()) continue; MCSymbol *Sym = nullptr; for (const auto &MI : MBB) { if (MI.isDebugInstr()) continue; assert(MI.isEHLabel() && "expected EH_LABEL"); Sym = MI.getOperand(0).getMCSymbol(); break; } if (!MF->hasCallSiteLandingPad(Sym)) continue; for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) { CallSiteNumToLPad[CSI].push_back(&MBB); MaxCSNum = std::max(MaxCSNum, CSI); } } // Get an ordered list of the machine basic blocks for the jump table. std::vector LPadList; SmallPtrSet InvokeBBs; LPadList.reserve(CallSiteNumToLPad.size()); for (unsigned CSI = 1; CSI <= MaxCSNum; ++CSI) { for (auto &LP : CallSiteNumToLPad[CSI]) { LPadList.push_back(LP); InvokeBBs.insert(LP->pred_begin(), LP->pred_end()); } } assert(!LPadList.empty() && "No landing pad destinations for the dispatch jump table!"); // Create the MBBs for the dispatch code. // Shove the dispatch's address into the return slot in the function context. MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock(); DispatchBB->setIsEHPad(true); MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); BuildMI(TrapBB, DL, TII->get(X86::TRAP)); DispatchBB->addSuccessor(TrapBB); MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock(); DispatchBB->addSuccessor(DispContBB); // Insert MBBs. MF->push_back(DispatchBB); MF->push_back(DispContBB); MF->push_back(TrapBB); // Insert code into the entry block that creates and registers the function // context. SetupEntryBlockForSjLj(MI, BB, DispatchBB, FI); // Create the jump table and associated information unsigned JTE = getJumpTableEncoding(); MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(JTE); unsigned MJTI = JTI->createJumpTableIndex(LPadList); const X86RegisterInfo &RI = TII->getRegisterInfo(); // Add a register mask with no preserved registers. This results in all // registers being marked as clobbered. if (RI.hasBasePointer(*MF)) { const bool FPIs64Bit = Subtarget.isTarget64BitLP64() || Subtarget.isTargetNaCl64(); X86MachineFunctionInfo *MFI = MF->getInfo(); MFI->setRestoreBasePointer(MF); unsigned FP = RI.getFrameRegister(*MF); unsigned BP = RI.getBaseRegister(); unsigned Op = FPIs64Bit ? X86::MOV64rm : X86::MOV32rm; addRegOffset(BuildMI(DispatchBB, DL, TII->get(Op), BP), FP, true, MFI->getRestoreBasePointerOffset()) .addRegMask(RI.getNoPreservedMask()); } else { BuildMI(DispatchBB, DL, TII->get(X86::NOOP)) .addRegMask(RI.getNoPreservedMask()); } // IReg is used as an index in a memory operand and therefore can't be SP unsigned IReg = MRI->createVirtualRegister(&X86::GR32_NOSPRegClass); addFrameReference(BuildMI(DispatchBB, DL, TII->get(X86::MOV32rm), IReg), FI, Subtarget.is64Bit() ? 8 : 4); BuildMI(DispatchBB, DL, TII->get(X86::CMP32ri)) .addReg(IReg) .addImm(LPadList.size()); BuildMI(DispatchBB, DL, TII->get(X86::JAE_1)).addMBB(TrapBB); if (Subtarget.is64Bit()) { unsigned BReg = MRI->createVirtualRegister(&X86::GR64RegClass); unsigned IReg64 = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass); // leaq .LJTI0_0(%rip), BReg BuildMI(DispContBB, DL, TII->get(X86::LEA64r), BReg) .addReg(X86::RIP) .addImm(1) .addReg(0) .addJumpTableIndex(MJTI) .addReg(0); // movzx IReg64, IReg BuildMI(DispContBB, DL, TII->get(TargetOpcode::SUBREG_TO_REG), IReg64) .addImm(0) .addReg(IReg) .addImm(X86::sub_32bit); switch (JTE) { case MachineJumpTableInfo::EK_BlockAddress: // jmpq *(BReg,IReg64,8) BuildMI(DispContBB, DL, TII->get(X86::JMP64m)) .addReg(BReg) .addImm(8) .addReg(IReg64) .addImm(0) .addReg(0); break; case MachineJumpTableInfo::EK_LabelDifference32: { unsigned OReg = MRI->createVirtualRegister(&X86::GR32RegClass); unsigned OReg64 = MRI->createVirtualRegister(&X86::GR64RegClass); unsigned TReg = MRI->createVirtualRegister(&X86::GR64RegClass); // movl (BReg,IReg64,4), OReg BuildMI(DispContBB, DL, TII->get(X86::MOV32rm), OReg) .addReg(BReg) .addImm(4) .addReg(IReg64) .addImm(0) .addReg(0); // movsx OReg64, OReg BuildMI(DispContBB, DL, TII->get(X86::MOVSX64rr32), OReg64).addReg(OReg); // addq BReg, OReg64, TReg BuildMI(DispContBB, DL, TII->get(X86::ADD64rr), TReg) .addReg(OReg64) .addReg(BReg); // jmpq *TReg BuildMI(DispContBB, DL, TII->get(X86::JMP64r)).addReg(TReg); break; } default: llvm_unreachable("Unexpected jump table encoding"); } } else { // jmpl *.LJTI0_0(,IReg,4) BuildMI(DispContBB, DL, TII->get(X86::JMP32m)) .addReg(0) .addImm(4) .addReg(IReg) .addJumpTableIndex(MJTI) .addReg(0); } // Add the jump table entries as successors to the MBB. SmallPtrSet SeenMBBs; for (auto &LP : LPadList) if (SeenMBBs.insert(LP).second) DispContBB->addSuccessor(LP); // N.B. the order the invoke BBs are processed in doesn't matter here. SmallVector MBBLPads; const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs(); for (MachineBasicBlock *MBB : InvokeBBs) { // Remove the landing pad successor from the invoke block and replace it // with the new dispatch block. // Keep a copy of Successors since it's modified inside the loop. SmallVector Successors(MBB->succ_rbegin(), MBB->succ_rend()); // FIXME: Avoid quadratic complexity. for (auto MBBS : Successors) { if (MBBS->isEHPad()) { MBB->removeSuccessor(MBBS); MBBLPads.push_back(MBBS); } } MBB->addSuccessor(DispatchBB); // Find the invoke call and mark all of the callee-saved registers as // 'implicit defined' so that they're spilled. This prevents code from // moving instructions to before the EH block, where they will never be // executed. for (auto &II : reverse(*MBB)) { if (!II.isCall()) continue; DenseMap DefRegs; for (auto &MOp : II.operands()) if (MOp.isReg()) DefRegs[MOp.getReg()] = true; MachineInstrBuilder MIB(*MF, &II); for (unsigned RI = 0; SavedRegs[RI]; ++RI) { unsigned Reg = SavedRegs[RI]; if (!DefRegs[Reg]) MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead); } break; } } // Mark all former landing pads as non-landing pads. The dispatch is the only // landing pad now. for (auto &LP : MBBLPads) LP->setIsEHPad(false); // The instruction is gone now. MI.eraseFromParent(); return BB; } MachineBasicBlock * X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { MachineFunction *MF = BB->getParent(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); switch (MI.getOpcode()) { default: llvm_unreachable("Unexpected instr type to insert"); case X86::TLS_addr32: case X86::TLS_addr64: case X86::TLS_base_addr32: case X86::TLS_base_addr64: return EmitLoweredTLSAddr(MI, BB); case X86::RETPOLINE_CALL32: case X86::RETPOLINE_CALL64: case X86::RETPOLINE_TCRETURN32: case X86::RETPOLINE_TCRETURN64: return EmitLoweredRetpoline(MI, BB); case X86::CATCHRET: return EmitLoweredCatchRet(MI, BB); case X86::CATCHPAD: return EmitLoweredCatchPad(MI, BB); case X86::SEG_ALLOCA_32: case X86::SEG_ALLOCA_64: return EmitLoweredSegAlloca(MI, BB); case X86::TLSCall_32: case X86::TLSCall_64: return EmitLoweredTLSCall(MI, BB); case X86::CMOV_FR32: case X86::CMOV_FR64: case X86::CMOV_GR8: case X86::CMOV_GR16: case X86::CMOV_GR32: case X86::CMOV_RFP32: case X86::CMOV_RFP64: case X86::CMOV_RFP80: case X86::CMOV_VR128: case X86::CMOV_VR128X: case X86::CMOV_VR256: case X86::CMOV_VR256X: case X86::CMOV_VR512: case X86::CMOV_VK2: case X86::CMOV_VK4: case X86::CMOV_VK8: case X86::CMOV_VK16: case X86::CMOV_VK32: case X86::CMOV_VK64: return EmitLoweredSelect(MI, BB); case X86::RDFLAGS32: case X86::RDFLAGS64: { unsigned PushF = MI.getOpcode() == X86::RDFLAGS32 ? X86::PUSHF32 : X86::PUSHF64; unsigned Pop = MI.getOpcode() == X86::RDFLAGS32 ? X86::POP32r : X86::POP64r; MachineInstr *Push = BuildMI(*BB, MI, DL, TII->get(PushF)); // Permit reads of the EFLAGS and DF registers without them being defined. // This intrinsic exists to read external processor state in flags, such as // the trap flag, interrupt flag, and direction flag, none of which are // modeled by the backend. assert(Push->getOperand(2).getReg() == X86::EFLAGS && "Unexpected register in operand!"); Push->getOperand(2).setIsUndef(); assert(Push->getOperand(3).getReg() == X86::DF && "Unexpected register in operand!"); Push->getOperand(3).setIsUndef(); BuildMI(*BB, MI, DL, TII->get(Pop), MI.getOperand(0).getReg()); MI.eraseFromParent(); // The pseudo is gone now. return BB; } case X86::WRFLAGS32: case X86::WRFLAGS64: { unsigned Push = MI.getOpcode() == X86::WRFLAGS32 ? X86::PUSH32r : X86::PUSH64r; unsigned PopF = MI.getOpcode() == X86::WRFLAGS32 ? X86::POPF32 : X86::POPF64; BuildMI(*BB, MI, DL, TII->get(Push)).addReg(MI.getOperand(0).getReg()); BuildMI(*BB, MI, DL, TII->get(PopF)); MI.eraseFromParent(); // The pseudo is gone now. return BB; } case X86::RELEASE_FADD32mr: case X86::RELEASE_FADD64mr: return EmitLoweredAtomicFP(MI, BB); case X86::FP32_TO_INT16_IN_MEM: case X86::FP32_TO_INT32_IN_MEM: case X86::FP32_TO_INT64_IN_MEM: case X86::FP64_TO_INT16_IN_MEM: case X86::FP64_TO_INT32_IN_MEM: case X86::FP64_TO_INT64_IN_MEM: case X86::FP80_TO_INT16_IN_MEM: case X86::FP80_TO_INT32_IN_MEM: case X86::FP80_TO_INT64_IN_MEM: { // Change the floating point control register to use "round towards zero" // mode when truncating to an integer value. int CWFrameIdx = MF->getFrameInfo().CreateStackObject(2, 2, false); addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::FNSTCW16m)), CWFrameIdx); // Load the old value of the high byte of the control word... unsigned OldCW = MF->getRegInfo().createVirtualRegister(&X86::GR16RegClass); addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16rm), OldCW), CWFrameIdx); // Set the high part to be round to zero... addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mi)), CWFrameIdx) .addImm(0xC7F); // Reload the modified control word now... addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::FLDCW16m)), CWFrameIdx); // Restore the memory image of control word to original value addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)), CWFrameIdx) .addReg(OldCW); // Get the X86 opcode to use. unsigned Opc; switch (MI.getOpcode()) { default: llvm_unreachable("illegal opcode!"); case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break; case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break; case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break; case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break; case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break; case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break; case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break; case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break; case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break; } X86AddressMode AM = getAddressFromInstr(&MI, 0); addFullAddress(BuildMI(*BB, MI, DL, TII->get(Opc)), AM) .addReg(MI.getOperand(X86::AddrNumOperands).getReg()); // Reload the original control word now. addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::FLDCW16m)), CWFrameIdx); MI.eraseFromParent(); // The pseudo instruction is gone now. return BB; } // Thread synchronization. case X86::MONITOR: return emitMonitor(MI, BB, Subtarget, X86::MONITORrrr); case X86::MONITORX: return emitMonitor(MI, BB, Subtarget, X86::MONITORXrrr); // Cache line zero case X86::CLZERO: return emitClzero(&MI, BB, Subtarget); // PKU feature case X86::WRPKRU: return emitWRPKRU(MI, BB, Subtarget); case X86::RDPKRU: return emitRDPKRU(MI, BB, Subtarget); // xbegin case X86::XBEGIN: return emitXBegin(MI, BB, Subtarget.getInstrInfo()); case X86::VASTART_SAVE_XMM_REGS: return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB); case X86::VAARG_64: return EmitVAARG64WithCustomInserter(MI, BB); case X86::EH_SjLj_SetJmp32: case X86::EH_SjLj_SetJmp64: return emitEHSjLjSetJmp(MI, BB); case X86::EH_SjLj_LongJmp32: case X86::EH_SjLj_LongJmp64: return emitEHSjLjLongJmp(MI, BB); case X86::Int_eh_sjlj_setup_dispatch: return EmitSjLjDispatchBlock(MI, BB); case TargetOpcode::STATEPOINT: // As an implementation detail, STATEPOINT shares the STACKMAP format at // this point in the process. We diverge later. return emitPatchPoint(MI, BB); case TargetOpcode::STACKMAP: case TargetOpcode::PATCHPOINT: return emitPatchPoint(MI, BB); case TargetOpcode::PATCHABLE_EVENT_CALL: return emitXRayCustomEvent(MI, BB); case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL: return emitXRayTypedEvent(MI, BB); case X86::LCMPXCHG8B: { const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); // In addition to 4 E[ABCD] registers implied by encoding, CMPXCHG8B // requires a memory operand. If it happens that current architecture is // i686 and for current function we need a base pointer // - which is ESI for i686 - register allocator would not be able to // allocate registers for an address in form of X(%reg, %reg, Y) // - there never would be enough unreserved registers during regalloc // (without the need for base ptr the only option would be X(%edi, %esi, Y). // We are giving a hand to register allocator by precomputing the address in // a new vreg using LEA. // If it is not i686 or there is no base pointer - nothing to do here. if (!Subtarget.is32Bit() || !TRI->hasBasePointer(*MF)) return BB; // Even though this code does not necessarily needs the base pointer to // be ESI, we check for that. The reason: if this assert fails, there are // some changes happened in the compiler base pointer handling, which most // probably have to be addressed somehow here. assert(TRI->getBaseRegister() == X86::ESI && "LCMPXCHG8B custom insertion for i686 is written with X86::ESI as a " "base pointer in mind"); MachineRegisterInfo &MRI = MF->getRegInfo(); MVT SPTy = getPointerTy(MF->getDataLayout()); const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy); unsigned computedAddrVReg = MRI.createVirtualRegister(AddrRegClass); X86AddressMode AM = getAddressFromInstr(&MI, 0); // Regalloc does not need any help when the memory operand of CMPXCHG8B // does not use index register. if (AM.IndexReg == X86::NoRegister) return BB; // After X86TargetLowering::ReplaceNodeResults CMPXCHG8B is glued to its // four operand definitions that are E[ABCD] registers. We skip them and // then insert the LEA. MachineBasicBlock::iterator MBBI(MI); while (MBBI->definesRegister(X86::EAX) || MBBI->definesRegister(X86::EBX) || MBBI->definesRegister(X86::ECX) || MBBI->definesRegister(X86::EDX)) --MBBI; addFullAddress( BuildMI(*BB, *MBBI, DL, TII->get(X86::LEA32r), computedAddrVReg), AM); setDirectAddressInInstr(&MI, 0, computedAddrVReg); return BB; } case X86::LCMPXCHG16B: return BB; case X86::LCMPXCHG8B_SAVE_EBX: case X86::LCMPXCHG16B_SAVE_RBX: { unsigned BasePtr = MI.getOpcode() == X86::LCMPXCHG8B_SAVE_EBX ? X86::EBX : X86::RBX; if (!BB->isLiveIn(BasePtr)) BB->addLiveIn(BasePtr); return BB; } } } //===----------------------------------------------------------------------===// // X86 Optimization Hooks //===----------------------------------------------------------------------===// bool X86TargetLowering::targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const { // Only optimize Ands to prevent shrinking a constant that could be // matched by movzx. if (Op.getOpcode() != ISD::AND) return false; EVT VT = Op.getValueType(); // Ignore vectors. if (VT.isVector()) return false; unsigned Size = VT.getSizeInBits(); // Make sure the RHS really is a constant. ConstantSDNode *C = dyn_cast(Op.getOperand(1)); if (!C) return false; const APInt &Mask = C->getAPIntValue(); // Clear all non-demanded bits initially. APInt ShrunkMask = Mask & Demanded; // Find the width of the shrunk mask. unsigned Width = ShrunkMask.getActiveBits(); // If the mask is all 0s there's nothing to do here. if (Width == 0) return false; // Find the next power of 2 width, rounding up to a byte. Width = PowerOf2Ceil(std::max(Width, 8U)); // Truncate the width to size to handle illegal types. Width = std::min(Width, Size); // Calculate a possible zero extend mask for this constant. APInt ZeroExtendMask = APInt::getLowBitsSet(Size, Width); // If we aren't changing the mask, just return true to keep it and prevent // the caller from optimizing. if (ZeroExtendMask == Mask) return true; // Make sure the new mask can be represented by a combination of mask bits // and non-demanded bits. if (!ZeroExtendMask.isSubsetOf(Mask | ~Demanded)) return false; // Replace the constant with the zero extend mask. SDLoc DL(Op); SDValue NewC = TLO.DAG.getConstant(ZeroExtendMask, DL, VT); SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC); return TLO.CombineTo(Op, NewOp); } void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { unsigned BitWidth = Known.getBitWidth(); unsigned Opc = Op.getOpcode(); EVT VT = Op.getValueType(); assert((Opc >= ISD::BUILTIN_OP_END || Opc == ISD::INTRINSIC_WO_CHAIN || Opc == ISD::INTRINSIC_W_CHAIN || Opc == ISD::INTRINSIC_VOID) && "Should use MaskedValueIsZero if you don't know whether Op" " is a target node!"); Known.resetAll(); switch (Opc) { default: break; case X86ISD::SETCC: Known.Zero.setBitsFrom(1); break; case X86ISD::MOVMSK: { unsigned NumLoBits = Op.getOperand(0).getValueType().getVectorNumElements(); Known.Zero.setBitsFrom(NumLoBits); break; } case X86ISD::PEXTRB: case X86ISD::PEXTRW: { SDValue Src = Op.getOperand(0); EVT SrcVT = Src.getValueType(); APInt DemandedElt = APInt::getOneBitSet(SrcVT.getVectorNumElements(), Op.getConstantOperandVal(1)); Known = DAG.computeKnownBits(Src, DemandedElt, Depth + 1); Known = Known.zextOrTrunc(BitWidth); Known.Zero.setBitsFrom(SrcVT.getScalarSizeInBits()); break; } case X86ISD::VSRAI: case X86ISD::VSHLI: case X86ISD::VSRLI: { if (auto *ShiftImm = dyn_cast(Op.getOperand(1))) { if (ShiftImm->getAPIntValue().uge(VT.getScalarSizeInBits())) { Known.setAllZero(); break; } Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); unsigned ShAmt = ShiftImm->getZExtValue(); if (Opc == X86ISD::VSHLI) { Known.Zero <<= ShAmt; Known.One <<= ShAmt; // Low bits are known zero. Known.Zero.setLowBits(ShAmt); } else if (Opc == X86ISD::VSRLI) { Known.Zero.lshrInPlace(ShAmt); Known.One.lshrInPlace(ShAmt); // High bits are known zero. Known.Zero.setHighBits(ShAmt); } else { Known.Zero.ashrInPlace(ShAmt); Known.One.ashrInPlace(ShAmt); } } break; } case X86ISD::PACKUS: { // PACKUS is just a truncation if the upper half is zero. APInt DemandedLHS, DemandedRHS; getPackDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS); Known.One = APInt::getAllOnesValue(BitWidth * 2); Known.Zero = APInt::getAllOnesValue(BitWidth * 2); KnownBits Known2; if (!!DemandedLHS) { Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedLHS, Depth + 1); Known.One &= Known2.One; Known.Zero &= Known2.Zero; } if (!!DemandedRHS) { Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedRHS, Depth + 1); Known.One &= Known2.One; Known.Zero &= Known2.Zero; } if (Known.countMinLeadingZeros() < BitWidth) Known.resetAll(); Known = Known.trunc(BitWidth); break; } case X86ISD::CMOV: { Known = DAG.computeKnownBits(Op.getOperand(1), Depth+1); // If we don't know any bits, early out. if (Known.isUnknown()) break; KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth+1); // Only known if known in both the LHS and RHS. Known.One &= Known2.One; Known.Zero &= Known2.Zero; break; } } // Handle target shuffles. // TODO - use resolveTargetShuffleInputs once we can limit recursive depth. if (isTargetShuffle(Opc)) { bool IsUnary; SmallVector Mask; SmallVector Ops; if (getTargetShuffleMask(Op.getNode(), VT.getSimpleVT(), true, Ops, Mask, IsUnary)) { unsigned NumOps = Ops.size(); unsigned NumElts = VT.getVectorNumElements(); if (Mask.size() == NumElts) { SmallVector DemandedOps(NumOps, APInt(NumElts, 0)); Known.Zero.setAllBits(); Known.One.setAllBits(); for (unsigned i = 0; i != NumElts; ++i) { if (!DemandedElts[i]) continue; int M = Mask[i]; if (M == SM_SentinelUndef) { // For UNDEF elements, we don't know anything about the common state // of the shuffle result. Known.resetAll(); break; } else if (M == SM_SentinelZero) { Known.One.clearAllBits(); continue; } assert(0 <= M && (unsigned)M < (NumOps * NumElts) && "Shuffle index out of range"); unsigned OpIdx = (unsigned)M / NumElts; unsigned EltIdx = (unsigned)M % NumElts; if (Ops[OpIdx].getValueType() != VT) { // TODO - handle target shuffle ops with different value types. Known.resetAll(); break; } DemandedOps[OpIdx].setBit(EltIdx); } // Known bits are the values that are shared by every demanded element. for (unsigned i = 0; i != NumOps && !Known.isUnknown(); ++i) { if (!DemandedOps[i]) continue; KnownBits Known2 = DAG.computeKnownBits(Ops[i], DemandedOps[i], Depth + 1); Known.One &= Known2.One; Known.Zero &= Known2.Zero; } } } } } unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode( SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { unsigned VTBits = Op.getScalarValueSizeInBits(); unsigned Opcode = Op.getOpcode(); switch (Opcode) { case X86ISD::SETCC_CARRY: // SETCC_CARRY sets the dest to ~0 for true or 0 for false. return VTBits; case X86ISD::VTRUNC: { // TODO: Add DemandedElts support. SDValue Src = Op.getOperand(0); unsigned NumSrcBits = Src.getScalarValueSizeInBits(); assert(VTBits < NumSrcBits && "Illegal truncation input type"); unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1); if (Tmp > (NumSrcBits - VTBits)) return Tmp - (NumSrcBits - VTBits); return 1; } case X86ISD::PACKSS: { // PACKSS is just a truncation if the sign bits extend to the packed size. APInt DemandedLHS, DemandedRHS; getPackDemandedElts(Op.getValueType(), DemandedElts, DemandedLHS, DemandedRHS); unsigned SrcBits = Op.getOperand(0).getScalarValueSizeInBits(); unsigned Tmp0 = SrcBits, Tmp1 = SrcBits; if (!!DemandedLHS) Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), DemandedLHS, Depth + 1); if (!!DemandedRHS) Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), DemandedRHS, Depth + 1); unsigned Tmp = std::min(Tmp0, Tmp1); if (Tmp > (SrcBits - VTBits)) return Tmp - (SrcBits - VTBits); return 1; } case X86ISD::VSHLI: { SDValue Src = Op.getOperand(0); APInt ShiftVal = cast(Op.getOperand(1))->getAPIntValue(); if (ShiftVal.uge(VTBits)) return VTBits; // Shifted all bits out --> zero. unsigned Tmp = DAG.ComputeNumSignBits(Src, DemandedElts, Depth + 1); if (ShiftVal.uge(Tmp)) return 1; // Shifted all sign bits out --> unknown. return Tmp - ShiftVal.getZExtValue(); } case X86ISD::VSRAI: { SDValue Src = Op.getOperand(0); APInt ShiftVal = cast(Op.getOperand(1))->getAPIntValue(); if (ShiftVal.uge(VTBits - 1)) return VTBits; // Sign splat. unsigned Tmp = DAG.ComputeNumSignBits(Src, DemandedElts, Depth + 1); ShiftVal += Tmp; return ShiftVal.uge(VTBits) ? VTBits : ShiftVal.getZExtValue(); } case X86ISD::PCMPGT: case X86ISD::PCMPEQ: case X86ISD::CMPP: case X86ISD::VPCOM: case X86ISD::VPCOMU: // Vector compares return zero/all-bits result values. return VTBits; case X86ISD::CMOV: { unsigned Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), Depth+1); if (Tmp0 == 1) return 1; // Early out. unsigned Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), Depth+1); return std::min(Tmp0, Tmp1); } } // Fallback case. return 1; } SDValue X86TargetLowering::unwrapAddress(SDValue N) const { if (N->getOpcode() == X86ISD::Wrapper || N->getOpcode() == X86ISD::WrapperRIP) return N->getOperand(0); return N; } // Attempt to match a combined shuffle mask against supported unary shuffle // instructions. // TODO: Investigate sharing more of this with shuffle lowering. static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef Mask, bool AllowFloatDomain, bool AllowIntDomain, SDValue &V1, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget, unsigned &Shuffle, MVT &SrcVT, MVT &DstVT) { unsigned NumMaskElts = Mask.size(); unsigned MaskEltSize = MaskVT.getScalarSizeInBits(); // Match against a VZEXT_MOVL vXi32 zero-extending instruction. if (MaskEltSize == 32 && isUndefOrEqual(Mask[0], 0) && isUndefOrZero(Mask[1]) && isUndefInRange(Mask, 2, NumMaskElts - 2)) { Shuffle = X86ISD::VZEXT_MOVL; SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT; return true; } // Match against a ZERO_EXTEND_VECTOR_INREG/VZEXT instruction. // TODO: Add 512-bit vector support (split AVX512F and AVX512BW). if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE41()) || (MaskVT.is256BitVector() && Subtarget.hasInt256()))) { unsigned MaxScale = 64 / MaskEltSize; for (unsigned Scale = 2; Scale <= MaxScale; Scale *= 2) { bool Match = true; unsigned NumDstElts = NumMaskElts / Scale; for (unsigned i = 0; i != NumDstElts && Match; ++i) { Match &= isUndefOrEqual(Mask[i * Scale], (int)i); Match &= isUndefOrZeroInRange(Mask, (i * Scale) + 1, Scale - 1); } if (Match) { unsigned SrcSize = std::max(128u, NumDstElts * MaskEltSize); MVT ScalarTy = MaskVT.isInteger() ? MaskVT.getScalarType() : MVT::getIntegerVT(MaskEltSize); SrcVT = MVT::getVectorVT(ScalarTy, SrcSize / MaskEltSize); if (SrcVT.getSizeInBits() != MaskVT.getSizeInBits()) V1 = extractSubVector(V1, 0, DAG, DL, SrcSize); if (SrcVT.getVectorNumElements() == NumDstElts) Shuffle = unsigned(ISD::ZERO_EXTEND); else Shuffle = unsigned(ISD::ZERO_EXTEND_VECTOR_INREG); DstVT = MVT::getIntegerVT(Scale * MaskEltSize); DstVT = MVT::getVectorVT(DstVT, NumDstElts); return true; } } } // Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS). if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2())) && isUndefOrEqual(Mask[0], 0) && isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) { Shuffle = X86ISD::VZEXT_MOVL; SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT; return true; } // Check if we have SSE3 which will let us use MOVDDUP etc. The // instructions are no slower than UNPCKLPD but has the option to // fold the input operand into even an unaligned memory load. if (MaskVT.is128BitVector() && Subtarget.hasSSE3() && AllowFloatDomain) { if (!Subtarget.hasAVX2() && isTargetShuffleEquivalent(Mask, {0, 0})) { Shuffle = X86ISD::MOVDDUP; SrcVT = DstVT = MVT::v2f64; return true; } if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2})) { Shuffle = X86ISD::MOVSLDUP; SrcVT = DstVT = MVT::v4f32; return true; } if (isTargetShuffleEquivalent(Mask, {1, 1, 3, 3})) { Shuffle = X86ISD::MOVSHDUP; SrcVT = DstVT = MVT::v4f32; return true; } } if (MaskVT.is256BitVector() && AllowFloatDomain) { assert(Subtarget.hasAVX() && "AVX required for 256-bit vector shuffles"); if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2})) { Shuffle = X86ISD::MOVDDUP; SrcVT = DstVT = MVT::v4f64; return true; } if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2, 4, 4, 6, 6})) { Shuffle = X86ISD::MOVSLDUP; SrcVT = DstVT = MVT::v8f32; return true; } if (isTargetShuffleEquivalent(Mask, {1, 1, 3, 3, 5, 5, 7, 7})) { Shuffle = X86ISD::MOVSHDUP; SrcVT = DstVT = MVT::v8f32; return true; } } if (MaskVT.is512BitVector() && AllowFloatDomain) { assert(Subtarget.hasAVX512() && "AVX512 required for 512-bit vector shuffles"); if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2, 4, 4, 6, 6})) { Shuffle = X86ISD::MOVDDUP; SrcVT = DstVT = MVT::v8f64; return true; } if (isTargetShuffleEquivalent( Mask, {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14})) { Shuffle = X86ISD::MOVSLDUP; SrcVT = DstVT = MVT::v16f32; return true; } if (isTargetShuffleEquivalent( Mask, {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15})) { Shuffle = X86ISD::MOVSHDUP; SrcVT = DstVT = MVT::v16f32; return true; } } // Attempt to match against broadcast-from-vector. if (Subtarget.hasAVX2()) { SmallVector BroadcastMask(NumMaskElts, 0); if (isTargetShuffleEquivalent(Mask, BroadcastMask)) { SrcVT = DstVT = MaskVT; Shuffle = X86ISD::VBROADCAST; return true; } } return false; } // Attempt to match a combined shuffle mask against supported unary immediate // permute instructions. // TODO: Investigate sharing more of this with shuffle lowering. static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef Mask, const APInt &Zeroable, bool AllowFloatDomain, bool AllowIntDomain, const X86Subtarget &Subtarget, unsigned &Shuffle, MVT &ShuffleVT, unsigned &PermuteImm) { unsigned NumMaskElts = Mask.size(); unsigned InputSizeInBits = MaskVT.getSizeInBits(); unsigned MaskScalarSizeInBits = InputSizeInBits / NumMaskElts; MVT MaskEltVT = MVT::getIntegerVT(MaskScalarSizeInBits); bool ContainsZeros = llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; }); // Handle VPERMI/VPERMILPD vXi64/vXi64 patterns. if (!ContainsZeros && MaskScalarSizeInBits == 64) { // Check for lane crossing permutes. if (is128BitLaneCrossingShuffleMask(MaskEltVT, Mask)) { // PERMPD/PERMQ permutes within a 256-bit vector (AVX2+). if (Subtarget.hasAVX2() && MaskVT.is256BitVector()) { Shuffle = X86ISD::VPERMI; ShuffleVT = (AllowFloatDomain ? MVT::v4f64 : MVT::v4i64); PermuteImm = getV4X86ShuffleImm(Mask); return true; } if (Subtarget.hasAVX512() && MaskVT.is512BitVector()) { SmallVector RepeatedMask; if (is256BitLaneRepeatedShuffleMask(MVT::v8f64, Mask, RepeatedMask)) { Shuffle = X86ISD::VPERMI; ShuffleVT = (AllowFloatDomain ? MVT::v8f64 : MVT::v8i64); PermuteImm = getV4X86ShuffleImm(RepeatedMask); return true; } } } else if (AllowFloatDomain && Subtarget.hasAVX()) { // VPERMILPD can permute with a non-repeating shuffle. Shuffle = X86ISD::VPERMILPI; ShuffleVT = MVT::getVectorVT(MVT::f64, Mask.size()); PermuteImm = 0; for (int i = 0, e = Mask.size(); i != e; ++i) { int M = Mask[i]; if (M == SM_SentinelUndef) continue; assert(((M / 2) == (i / 2)) && "Out of range shuffle mask index"); PermuteImm |= (M & 1) << i; } return true; } } // Handle PSHUFD/VPERMILPI vXi32/vXf32 repeated patterns. // AVX introduced the VPERMILPD/VPERMILPS float permutes, before then we // had to use 2-input SHUFPD/SHUFPS shuffles (not handled here). if ((MaskScalarSizeInBits == 64 || MaskScalarSizeInBits == 32) && !ContainsZeros && (AllowIntDomain || Subtarget.hasAVX())) { SmallVector RepeatedMask; if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) { // Narrow the repeated mask to create 32-bit element permutes. SmallVector WordMask = RepeatedMask; if (MaskScalarSizeInBits == 64) scaleShuffleMask(2, RepeatedMask, WordMask); Shuffle = (AllowIntDomain ? X86ISD::PSHUFD : X86ISD::VPERMILPI); ShuffleVT = (AllowIntDomain ? MVT::i32 : MVT::f32); ShuffleVT = MVT::getVectorVT(ShuffleVT, InputSizeInBits / 32); PermuteImm = getV4X86ShuffleImm(WordMask); return true; } } // Handle PSHUFLW/PSHUFHW vXi16 repeated patterns. if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits == 16) { SmallVector RepeatedMask; if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) { ArrayRef LoMask(Mask.data() + 0, 4); ArrayRef HiMask(Mask.data() + 4, 4); // PSHUFLW: permute lower 4 elements only. if (isUndefOrInRange(LoMask, 0, 4) && isSequentialOrUndefInRange(HiMask, 0, 4, 4)) { Shuffle = X86ISD::PSHUFLW; ShuffleVT = MVT::getVectorVT(MVT::i16, InputSizeInBits / 16); PermuteImm = getV4X86ShuffleImm(LoMask); return true; } // PSHUFHW: permute upper 4 elements only. if (isUndefOrInRange(HiMask, 4, 8) && isSequentialOrUndefInRange(LoMask, 0, 4, 0)) { // Offset the HiMask so that we can create the shuffle immediate. int OffsetHiMask[4]; for (int i = 0; i != 4; ++i) OffsetHiMask[i] = (HiMask[i] < 0 ? HiMask[i] : HiMask[i] - 4); Shuffle = X86ISD::PSHUFHW; ShuffleVT = MVT::getVectorVT(MVT::i16, InputSizeInBits / 16); PermuteImm = getV4X86ShuffleImm(OffsetHiMask); return true; } } } // Attempt to match against byte/bit shifts. // FIXME: Add 512-bit support. if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) || (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) { int ShiftAmt = matchVectorShuffleAsShift(ShuffleVT, Shuffle, MaskScalarSizeInBits, Mask, 0, Zeroable, Subtarget); if (0 < ShiftAmt) { PermuteImm = (unsigned)ShiftAmt; return true; } } return false; } // Attempt to match a combined unary shuffle mask against supported binary // shuffle instructions. // TODO: Investigate sharing more of this with shuffle lowering. static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef Mask, bool AllowFloatDomain, bool AllowIntDomain, SDValue &V1, SDValue &V2, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget, unsigned &Shuffle, MVT &SrcVT, MVT &DstVT, bool IsUnary) { unsigned EltSizeInBits = MaskVT.getScalarSizeInBits(); if (MaskVT.is128BitVector()) { if (isTargetShuffleEquivalent(Mask, {0, 0}) && AllowFloatDomain) { V2 = V1; V1 = (SM_SentinelUndef == Mask[0] ? DAG.getUNDEF(MVT::v4f32) : V1); Shuffle = Subtarget.hasSSE2() ? X86ISD::UNPCKL : X86ISD::MOVLHPS; SrcVT = DstVT = Subtarget.hasSSE2() ? MVT::v2f64 : MVT::v4f32; return true; } if (isTargetShuffleEquivalent(Mask, {1, 1}) && AllowFloatDomain) { V2 = V1; Shuffle = Subtarget.hasSSE2() ? X86ISD::UNPCKH : X86ISD::MOVHLPS; SrcVT = DstVT = Subtarget.hasSSE2() ? MVT::v2f64 : MVT::v4f32; return true; } if (isTargetShuffleEquivalent(Mask, {0, 3}) && Subtarget.hasSSE2() && (AllowFloatDomain || !Subtarget.hasSSE41())) { std::swap(V1, V2); Shuffle = X86ISD::MOVSD; SrcVT = DstVT = MVT::v2f64; return true; } if (isTargetShuffleEquivalent(Mask, {4, 1, 2, 3}) && (AllowFloatDomain || !Subtarget.hasSSE41())) { Shuffle = X86ISD::MOVSS; SrcVT = DstVT = MVT::v4f32; return true; } } // Attempt to match against either an unary or binary PACKSS/PACKUS shuffle. if (((MaskVT == MVT::v8i16 || MaskVT == MVT::v16i8) && Subtarget.hasSSE2()) || ((MaskVT == MVT::v16i16 || MaskVT == MVT::v32i8) && Subtarget.hasInt256()) || ((MaskVT == MVT::v32i16 || MaskVT == MVT::v64i8) && Subtarget.hasBWI())) { if (matchVectorShuffleWithPACK(MaskVT, SrcVT, V1, V2, Shuffle, Mask, DAG, Subtarget)) { DstVT = MaskVT; return true; } } // Attempt to match against either a unary or binary UNPCKL/UNPCKH shuffle. if ((MaskVT == MVT::v4f32 && Subtarget.hasSSE1()) || (MaskVT.is128BitVector() && Subtarget.hasSSE2()) || (MaskVT.is256BitVector() && 32 <= EltSizeInBits && Subtarget.hasAVX()) || (MaskVT.is256BitVector() && Subtarget.hasAVX2()) || (MaskVT.is512BitVector() && Subtarget.hasAVX512())) { if (matchVectorShuffleWithUNPCK(MaskVT, V1, V2, Shuffle, IsUnary, Mask, DL, DAG, Subtarget)) { SrcVT = DstVT = MaskVT; if (MaskVT.is256BitVector() && !Subtarget.hasAVX2()) SrcVT = DstVT = (32 == EltSizeInBits ? MVT::v8f32 : MVT::v4f64); return true; } } return false; } static bool matchBinaryPermuteVectorShuffle( MVT MaskVT, ArrayRef Mask, const APInt &Zeroable, bool AllowFloatDomain, bool AllowIntDomain, SDValue &V1, SDValue &V2, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget, unsigned &Shuffle, MVT &ShuffleVT, unsigned &PermuteImm) { unsigned NumMaskElts = Mask.size(); unsigned EltSizeInBits = MaskVT.getScalarSizeInBits(); // Attempt to match against PALIGNR byte rotate. if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSSE3()) || (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) { int ByteRotation = matchVectorShuffleAsByteRotate(MaskVT, V1, V2, Mask); if (0 < ByteRotation) { Shuffle = X86ISD::PALIGNR; ShuffleVT = MVT::getVectorVT(MVT::i8, MaskVT.getSizeInBits() / 8); PermuteImm = ByteRotation; return true; } } // Attempt to combine to X86ISD::BLENDI. if ((NumMaskElts <= 8 && ((Subtarget.hasSSE41() && MaskVT.is128BitVector()) || (Subtarget.hasAVX() && MaskVT.is256BitVector()))) || (MaskVT == MVT::v16i16 && Subtarget.hasAVX2())) { uint64_t BlendMask = 0; bool ForceV1Zero = false, ForceV2Zero = false; SmallVector TargetMask(Mask.begin(), Mask.end()); if (matchVectorShuffleAsBlend(V1, V2, TargetMask, ForceV1Zero, ForceV2Zero, BlendMask)) { if (MaskVT == MVT::v16i16) { // We can only use v16i16 PBLENDW if the lanes are repeated. SmallVector RepeatedMask; if (isRepeatedTargetShuffleMask(128, MaskVT, TargetMask, RepeatedMask)) { assert(RepeatedMask.size() == 8 && "Repeated mask size doesn't match!"); PermuteImm = 0; for (int i = 0; i < 8; ++i) if (RepeatedMask[i] >= 8) PermuteImm |= 1 << i; V1 = ForceV1Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V1; V2 = ForceV2Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V2; Shuffle = X86ISD::BLENDI; ShuffleVT = MaskVT; return true; } } else { // Determine a type compatible with X86ISD::BLENDI. ShuffleVT = MaskVT; if (Subtarget.hasAVX2()) { if (ShuffleVT == MVT::v4i64) ShuffleVT = MVT::v8i32; else if (ShuffleVT == MVT::v2i64) ShuffleVT = MVT::v4i32; } else { if (ShuffleVT == MVT::v2i64 || ShuffleVT == MVT::v4i32) ShuffleVT = MVT::v8i16; else if (ShuffleVT == MVT::v4i64) ShuffleVT = MVT::v4f64; else if (ShuffleVT == MVT::v8i32) ShuffleVT = MVT::v8f32; } if (!ShuffleVT.isFloatingPoint()) { int Scale = EltSizeInBits / ShuffleVT.getScalarSizeInBits(); BlendMask = scaleVectorShuffleBlendMask(BlendMask, NumMaskElts, Scale); ShuffleVT = MVT::getIntegerVT(EltSizeInBits / Scale); ShuffleVT = MVT::getVectorVT(ShuffleVT, NumMaskElts * Scale); } V1 = ForceV1Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V1; V2 = ForceV2Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V2; PermuteImm = (unsigned)BlendMask; Shuffle = X86ISD::BLENDI; return true; } } } // Attempt to combine to INSERTPS. if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.hasSSE41() && MaskVT.is128BitVector()) { if (Zeroable.getBoolValue() && matchVectorShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) { Shuffle = X86ISD::INSERTPS; ShuffleVT = MVT::v4f32; return true; } } // Attempt to combine to SHUFPD. if (AllowFloatDomain && EltSizeInBits == 64 && ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) || (MaskVT.is256BitVector() && Subtarget.hasAVX()) || (MaskVT.is512BitVector() && Subtarget.hasAVX512()))) { if (matchVectorShuffleWithSHUFPD(MaskVT, V1, V2, PermuteImm, Mask)) { Shuffle = X86ISD::SHUFP; ShuffleVT = MVT::getVectorVT(MVT::f64, MaskVT.getSizeInBits() / 64); return true; } } // Attempt to combine to SHUFPS. if (AllowFloatDomain && EltSizeInBits == 32 && ((MaskVT.is128BitVector() && Subtarget.hasSSE1()) || (MaskVT.is256BitVector() && Subtarget.hasAVX()) || (MaskVT.is512BitVector() && Subtarget.hasAVX512()))) { SmallVector RepeatedMask; if (isRepeatedTargetShuffleMask(128, MaskVT, Mask, RepeatedMask)) { // Match each half of the repeated mask, to determine if its just // referencing one of the vectors, is zeroable or entirely undef. auto MatchHalf = [&](unsigned Offset, int &S0, int &S1) { int M0 = RepeatedMask[Offset]; int M1 = RepeatedMask[Offset + 1]; if (isUndefInRange(RepeatedMask, Offset, 2)) { return DAG.getUNDEF(MaskVT); } else if (isUndefOrZeroInRange(RepeatedMask, Offset, 2)) { S0 = (SM_SentinelUndef == M0 ? -1 : 0); S1 = (SM_SentinelUndef == M1 ? -1 : 1); return getZeroVector(MaskVT, Subtarget, DAG, DL); } else if (isUndefOrInRange(M0, 0, 4) && isUndefOrInRange(M1, 0, 4)) { S0 = (SM_SentinelUndef == M0 ? -1 : M0 & 3); S1 = (SM_SentinelUndef == M1 ? -1 : M1 & 3); return V1; } else if (isUndefOrInRange(M0, 4, 8) && isUndefOrInRange(M1, 4, 8)) { S0 = (SM_SentinelUndef == M0 ? -1 : M0 & 3); S1 = (SM_SentinelUndef == M1 ? -1 : M1 & 3); return V2; } return SDValue(); }; int ShufMask[4] = {-1, -1, -1, -1}; SDValue Lo = MatchHalf(0, ShufMask[0], ShufMask[1]); SDValue Hi = MatchHalf(2, ShufMask[2], ShufMask[3]); if (Lo && Hi) { V1 = Lo; V2 = Hi; Shuffle = X86ISD::SHUFP; ShuffleVT = MVT::getVectorVT(MVT::f32, MaskVT.getSizeInBits() / 32); PermuteImm = getV4X86ShuffleImm(ShufMask); return true; } } } return false; } /// Combine an arbitrary chain of shuffles into a single instruction if /// possible. /// /// This is the leaf of the recursive combine below. When we have found some /// chain of single-use x86 shuffle instructions and accumulated the combined /// shuffle mask represented by them, this will try to pattern match that mask /// into either a single instruction if there is a special purpose instruction /// for this operation, or into a PSHUFB instruction which is a fully general /// instruction but should only be used to replace chains over a certain depth. static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, ArrayRef BaseMask, int Depth, bool HasVariableMask, bool AllowVariableMask, SelectionDAG &DAG, const X86Subtarget &Subtarget) { assert(!BaseMask.empty() && "Cannot combine an empty shuffle mask!"); assert((Inputs.size() == 1 || Inputs.size() == 2) && "Unexpected number of shuffle inputs!"); // Find the inputs that enter the chain. Note that multiple uses are OK // here, we're not going to remove the operands we find. bool UnaryShuffle = (Inputs.size() == 1); SDValue V1 = peekThroughBitcasts(Inputs[0]); SDValue V2 = (UnaryShuffle ? DAG.getUNDEF(V1.getValueType()) : peekThroughBitcasts(Inputs[1])); MVT VT1 = V1.getSimpleValueType(); MVT VT2 = V2.getSimpleValueType(); MVT RootVT = Root.getSimpleValueType(); assert(VT1.getSizeInBits() == RootVT.getSizeInBits() && VT2.getSizeInBits() == RootVT.getSizeInBits() && "Vector size mismatch"); SDLoc DL(Root); SDValue Res; unsigned NumBaseMaskElts = BaseMask.size(); if (NumBaseMaskElts == 1) { assert(BaseMask[0] == 0 && "Invalid shuffle index found!"); return DAG.getBitcast(RootVT, V1); } unsigned RootSizeInBits = RootVT.getSizeInBits(); unsigned NumRootElts = RootVT.getVectorNumElements(); unsigned BaseMaskEltSizeInBits = RootSizeInBits / NumBaseMaskElts; bool FloatDomain = VT1.isFloatingPoint() || VT2.isFloatingPoint() || (RootVT.isFloatingPoint() && Depth >= 2) || (RootVT.is256BitVector() && !Subtarget.hasAVX2()); // Don't combine if we are a AVX512/EVEX target and the mask element size // is different from the root element size - this would prevent writemasks // from being reused. // TODO - this currently prevents all lane shuffles from occurring. // TODO - check for writemasks usage instead of always preventing combining. // TODO - attempt to narrow Mask back to writemask size. bool IsEVEXShuffle = RootSizeInBits == 512 || (Subtarget.hasVLX() && RootSizeInBits >= 128); // TODO - handle 128/256-bit lane shuffles of 512-bit vectors. // Handle 128-bit lane shuffles of 256-bit vectors. // If we have AVX2, prefer to use VPERMQ/VPERMPD for unary shuffles unless // we need to use the zeroing feature. // TODO - this should support binary shuffles. if (UnaryShuffle && RootVT.is256BitVector() && NumBaseMaskElts == 2 && !(Subtarget.hasAVX2() && BaseMask[0] >= -1 && BaseMask[1] >= -1) && !isSequentialOrUndefOrZeroInRange(BaseMask, 0, 2, 0)) { if (Depth == 1 && Root.getOpcode() == X86ISD::VPERM2X128) return SDValue(); // Nothing to do! MVT ShuffleVT = (FloatDomain ? MVT::v4f64 : MVT::v4i64); unsigned PermMask = 0; PermMask |= ((BaseMask[0] < 0 ? 0x8 : (BaseMask[0] & 1)) << 0); PermMask |= ((BaseMask[1] < 0 ? 0x8 : (BaseMask[1] & 1)) << 4); Res = DAG.getBitcast(ShuffleVT, V1); Res = DAG.getNode(X86ISD::VPERM2X128, DL, ShuffleVT, Res, DAG.getUNDEF(ShuffleVT), DAG.getConstant(PermMask, DL, MVT::i8)); return DAG.getBitcast(RootVT, Res); } // For masks that have been widened to 128-bit elements or more, // narrow back down to 64-bit elements. SmallVector Mask; if (BaseMaskEltSizeInBits > 64) { assert((BaseMaskEltSizeInBits % 64) == 0 && "Illegal mask size"); int MaskScale = BaseMaskEltSizeInBits / 64; scaleShuffleMask(MaskScale, BaseMask, Mask); } else { Mask = SmallVector(BaseMask.begin(), BaseMask.end()); } unsigned NumMaskElts = Mask.size(); unsigned MaskEltSizeInBits = RootSizeInBits / NumMaskElts; // Determine the effective mask value type. FloatDomain &= (32 <= MaskEltSizeInBits); MVT MaskVT = FloatDomain ? MVT::getFloatingPointVT(MaskEltSizeInBits) : MVT::getIntegerVT(MaskEltSizeInBits); MaskVT = MVT::getVectorVT(MaskVT, NumMaskElts); // Only allow legal mask types. if (!DAG.getTargetLoweringInfo().isTypeLegal(MaskVT)) return SDValue(); // Attempt to match the mask against known shuffle patterns. MVT ShuffleSrcVT, ShuffleVT; unsigned Shuffle, PermuteImm; // Which shuffle domains are permitted? // Permit domain crossing at higher combine depths. bool AllowFloatDomain = FloatDomain || (Depth > 3); bool AllowIntDomain = (!FloatDomain || (Depth > 3)) && Subtarget.hasSSE2() && (!MaskVT.is256BitVector() || Subtarget.hasAVX2()); // Determine zeroable mask elements. APInt Zeroable(NumMaskElts, 0); for (unsigned i = 0; i != NumMaskElts; ++i) if (isUndefOrZero(Mask[i])) Zeroable.setBit(i); if (UnaryShuffle) { // If we are shuffling a X86ISD::VZEXT_LOAD then we can use the load // directly if we don't shuffle the lower element and we shuffle the upper // (zero) elements within themselves. if (V1.getOpcode() == X86ISD::VZEXT_LOAD && (V1.getScalarValueSizeInBits() % MaskEltSizeInBits) == 0) { unsigned Scale = V1.getScalarValueSizeInBits() / MaskEltSizeInBits; ArrayRef HiMask(Mask.data() + Scale, NumMaskElts - Scale); if (isSequentialOrUndefInRange(Mask, 0, Scale, 0) && isUndefOrZeroOrInRange(HiMask, Scale, NumMaskElts)) { return DAG.getBitcast(RootVT, V1); } } SDValue NewV1 = V1; // Save operand in case early exit happens. if (matchUnaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, NewV1, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT, ShuffleVT) && (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { if (Depth == 1 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! Res = DAG.getBitcast(ShuffleSrcVT, NewV1); Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res); return DAG.getBitcast(RootVT, Res); } if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, Subtarget, Shuffle, ShuffleVT, PermuteImm) && (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { if (Depth == 1 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! Res = DAG.getBitcast(ShuffleVT, V1); Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res, DAG.getConstant(PermuteImm, DL, MVT::i8)); return DAG.getBitcast(RootVT, Res); } } SDValue NewV1 = V1; // Save operands in case early exit happens. SDValue NewV2 = V2; if (matchBinaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, NewV1, NewV2, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT, ShuffleVT, UnaryShuffle) && (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { if (Depth == 1 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! NewV1 = DAG.getBitcast(ShuffleSrcVT, NewV1); NewV2 = DAG.getBitcast(ShuffleSrcVT, NewV2); Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2); return DAG.getBitcast(RootVT, Res); } NewV1 = V1; // Save operands in case early exit happens. NewV2 = V2; if (matchBinaryPermuteVectorShuffle( MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, NewV1, NewV2, DL, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm) && (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { if (Depth == 1 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! NewV1 = DAG.getBitcast(ShuffleVT, NewV1); NewV2 = DAG.getBitcast(ShuffleVT, NewV2); Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2, DAG.getConstant(PermuteImm, DL, MVT::i8)); return DAG.getBitcast(RootVT, Res); } // Typically from here on, we need an integer version of MaskVT. MVT IntMaskVT = MVT::getIntegerVT(MaskEltSizeInBits); IntMaskVT = MVT::getVectorVT(IntMaskVT, NumMaskElts); // Annoyingly, SSE4A instructions don't map into the above match helpers. if (Subtarget.hasSSE4A() && AllowIntDomain && RootSizeInBits == 128) { uint64_t BitLen, BitIdx; if (matchVectorShuffleAsEXTRQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx, Zeroable)) { if (Depth == 1 && Root.getOpcode() == X86ISD::EXTRQI) return SDValue(); // Nothing to do! V1 = DAG.getBitcast(IntMaskVT, V1); Res = DAG.getNode(X86ISD::EXTRQI, DL, IntMaskVT, V1, DAG.getConstant(BitLen, DL, MVT::i8), DAG.getConstant(BitIdx, DL, MVT::i8)); return DAG.getBitcast(RootVT, Res); } if (matchVectorShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) { if (Depth == 1 && Root.getOpcode() == X86ISD::INSERTQI) return SDValue(); // Nothing to do! V1 = DAG.getBitcast(IntMaskVT, V1); V2 = DAG.getBitcast(IntMaskVT, V2); Res = DAG.getNode(X86ISD::INSERTQI, DL, IntMaskVT, V1, V2, DAG.getConstant(BitLen, DL, MVT::i8), DAG.getConstant(BitIdx, DL, MVT::i8)); return DAG.getBitcast(RootVT, Res); } } // Don't try to re-form single instruction chains under any circumstances now // that we've done encoding canonicalization for them. if (Depth < 2) return SDValue(); // Depth threshold above which we can efficiently use variable mask shuffles. int VariableShuffleDepth = Subtarget.hasFastVariableShuffle() ? 2 : 3; AllowVariableMask &= (Depth >= VariableShuffleDepth) || HasVariableMask; bool MaskContainsZeros = any_of(Mask, [](int M) { return M == SM_SentinelZero; }); if (is128BitLaneCrossingShuffleMask(MaskVT, Mask)) { // If we have a single input lane-crossing shuffle then lower to VPERMV. if (UnaryShuffle && AllowVariableMask && !MaskContainsZeros && ((Subtarget.hasAVX2() && (MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) || (Subtarget.hasAVX512() && (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 || MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) || (Subtarget.hasBWI() && MaskVT == MVT::v32i16) || (Subtarget.hasBWI() && Subtarget.hasVLX() && MaskVT == MVT::v16i16) || (Subtarget.hasVBMI() && MaskVT == MVT::v64i8) || (Subtarget.hasVBMI() && Subtarget.hasVLX() && MaskVT == MVT::v32i8))) { SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true); Res = DAG.getBitcast(MaskVT, V1); Res = DAG.getNode(X86ISD::VPERMV, DL, MaskVT, VPermMask, Res); return DAG.getBitcast(RootVT, Res); } // Lower a unary+zero lane-crossing shuffle as VPERMV3 with a zero // vector as the second source. if (UnaryShuffle && AllowVariableMask && ((Subtarget.hasAVX512() && (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 || MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) || (Subtarget.hasVLX() && (MaskVT == MVT::v4f64 || MaskVT == MVT::v4i64 || MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) || (Subtarget.hasBWI() && MaskVT == MVT::v32i16) || (Subtarget.hasBWI() && Subtarget.hasVLX() && MaskVT == MVT::v16i16) || (Subtarget.hasVBMI() && MaskVT == MVT::v64i8) || (Subtarget.hasVBMI() && Subtarget.hasVLX() && MaskVT == MVT::v32i8))) { // Adjust shuffle mask - replace SM_SentinelZero with second source index. for (unsigned i = 0; i != NumMaskElts; ++i) if (Mask[i] == SM_SentinelZero) Mask[i] = NumMaskElts + i; SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true); Res = DAG.getBitcast(MaskVT, V1); SDValue Zero = getZeroVector(MaskVT, Subtarget, DAG, DL); Res = DAG.getNode(X86ISD::VPERMV3, DL, MaskVT, Res, VPermMask, Zero); return DAG.getBitcast(RootVT, Res); } // If we have a dual input lane-crossing shuffle then lower to VPERMV3. if (AllowVariableMask && !MaskContainsZeros && ((Subtarget.hasAVX512() && (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 || MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) || (Subtarget.hasVLX() && (MaskVT == MVT::v4f64 || MaskVT == MVT::v4i64 || MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) || (Subtarget.hasBWI() && MaskVT == MVT::v32i16) || (Subtarget.hasBWI() && Subtarget.hasVLX() && MaskVT == MVT::v16i16) || (Subtarget.hasVBMI() && MaskVT == MVT::v64i8) || (Subtarget.hasVBMI() && Subtarget.hasVLX() && MaskVT == MVT::v32i8))) { SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true); V1 = DAG.getBitcast(MaskVT, V1); V2 = DAG.getBitcast(MaskVT, V2); Res = DAG.getNode(X86ISD::VPERMV3, DL, MaskVT, V1, VPermMask, V2); return DAG.getBitcast(RootVT, Res); } return SDValue(); } // See if we can combine a single input shuffle with zeros to a bit-mask, // which is much simpler than any shuffle. if (UnaryShuffle && MaskContainsZeros && AllowVariableMask && isSequentialOrUndefOrZeroInRange(Mask, 0, NumMaskElts, 0) && DAG.getTargetLoweringInfo().isTypeLegal(MaskVT)) { APInt Zero = APInt::getNullValue(MaskEltSizeInBits); APInt AllOnes = APInt::getAllOnesValue(MaskEltSizeInBits); APInt UndefElts(NumMaskElts, 0); SmallVector EltBits(NumMaskElts, Zero); for (unsigned i = 0; i != NumMaskElts; ++i) { int M = Mask[i]; if (M == SM_SentinelUndef) { UndefElts.setBit(i); continue; } if (M == SM_SentinelZero) continue; EltBits[i] = AllOnes; } SDValue BitMask = getConstVector(EltBits, UndefElts, MaskVT, DAG, DL); Res = DAG.getBitcast(MaskVT, V1); unsigned AndOpcode = FloatDomain ? unsigned(X86ISD::FAND) : unsigned(ISD::AND); Res = DAG.getNode(AndOpcode, DL, MaskVT, Res, BitMask); return DAG.getBitcast(RootVT, Res); } // If we have a single input shuffle with different shuffle patterns in the // the 128-bit lanes use the variable mask to VPERMILPS. // TODO Combine other mask types at higher depths. if (UnaryShuffle && AllowVariableMask && !MaskContainsZeros && ((MaskVT == MVT::v8f32 && Subtarget.hasAVX()) || (MaskVT == MVT::v16f32 && Subtarget.hasAVX512()))) { SmallVector VPermIdx; for (int M : Mask) { SDValue Idx = M < 0 ? DAG.getUNDEF(MVT::i32) : DAG.getConstant(M % 4, DL, MVT::i32); VPermIdx.push_back(Idx); } SDValue VPermMask = DAG.getBuildVector(IntMaskVT, DL, VPermIdx); Res = DAG.getBitcast(MaskVT, V1); Res = DAG.getNode(X86ISD::VPERMILPV, DL, MaskVT, Res, VPermMask); return DAG.getBitcast(RootVT, Res); } // With XOP, binary shuffles of 128/256-bit floating point vectors can combine // to VPERMIL2PD/VPERMIL2PS. if (AllowVariableMask && Subtarget.hasXOP() && (MaskVT == MVT::v2f64 || MaskVT == MVT::v4f64 || MaskVT == MVT::v4f32 || MaskVT == MVT::v8f32)) { // VPERMIL2 Operation. // Bits[3] - Match Bit. // Bits[2:1] - (Per Lane) PD Shuffle Mask. // Bits[2:0] - (Per Lane) PS Shuffle Mask. unsigned NumLanes = MaskVT.getSizeInBits() / 128; unsigned NumEltsPerLane = NumMaskElts / NumLanes; SmallVector VPerm2Idx; unsigned M2ZImm = 0; for (int M : Mask) { if (M == SM_SentinelUndef) { VPerm2Idx.push_back(-1); continue; } if (M == SM_SentinelZero) { M2ZImm = 2; VPerm2Idx.push_back(8); continue; } int Index = (M % NumEltsPerLane) + ((M / NumMaskElts) * NumEltsPerLane); Index = (MaskVT.getScalarSizeInBits() == 64 ? Index << 1 : Index); VPerm2Idx.push_back(Index); } V1 = DAG.getBitcast(MaskVT, V1); V2 = DAG.getBitcast(MaskVT, V2); SDValue VPerm2MaskOp = getConstVector(VPerm2Idx, IntMaskVT, DAG, DL, true); Res = DAG.getNode(X86ISD::VPERMIL2, DL, MaskVT, V1, V2, VPerm2MaskOp, DAG.getConstant(M2ZImm, DL, MVT::i8)); return DAG.getBitcast(RootVT, Res); } // If we have 3 or more shuffle instructions or a chain involving a variable // mask, we can replace them with a single PSHUFB instruction profitably. // Intel's manuals suggest only using PSHUFB if doing so replacing 5 // instructions, but in practice PSHUFB tends to be *very* fast so we're // more aggressive. if (UnaryShuffle && AllowVariableMask && ((RootVT.is128BitVector() && Subtarget.hasSSSE3()) || (RootVT.is256BitVector() && Subtarget.hasAVX2()) || (RootVT.is512BitVector() && Subtarget.hasBWI()))) { SmallVector PSHUFBMask; int NumBytes = RootVT.getSizeInBits() / 8; int Ratio = NumBytes / NumMaskElts; for (int i = 0; i < NumBytes; ++i) { int M = Mask[i / Ratio]; if (M == SM_SentinelUndef) { PSHUFBMask.push_back(DAG.getUNDEF(MVT::i8)); continue; } if (M == SM_SentinelZero) { PSHUFBMask.push_back(DAG.getConstant(255, DL, MVT::i8)); continue; } M = Ratio * M + i % Ratio; assert((M / 16) == (i / 16) && "Lane crossing detected"); PSHUFBMask.push_back(DAG.getConstant(M, DL, MVT::i8)); } MVT ByteVT = MVT::getVectorVT(MVT::i8, NumBytes); Res = DAG.getBitcast(ByteVT, V1); SDValue PSHUFBMaskOp = DAG.getBuildVector(ByteVT, DL, PSHUFBMask); Res = DAG.getNode(X86ISD::PSHUFB, DL, ByteVT, Res, PSHUFBMaskOp); return DAG.getBitcast(RootVT, Res); } // With XOP, if we have a 128-bit binary input shuffle we can always combine // to VPPERM. We match the depth requirement of PSHUFB - VPPERM is never // slower than PSHUFB on targets that support both. if (AllowVariableMask && RootVT.is128BitVector() && Subtarget.hasXOP()) { // VPPERM Mask Operation // Bits[4:0] - Byte Index (0 - 31) // Bits[7:5] - Permute Operation (0 - Source byte, 4 - ZERO) SmallVector VPPERMMask; int NumBytes = 16; int Ratio = NumBytes / NumMaskElts; for (int i = 0; i < NumBytes; ++i) { int M = Mask[i / Ratio]; if (M == SM_SentinelUndef) { VPPERMMask.push_back(DAG.getUNDEF(MVT::i8)); continue; } if (M == SM_SentinelZero) { VPPERMMask.push_back(DAG.getConstant(128, DL, MVT::i8)); continue; } M = Ratio * M + i % Ratio; VPPERMMask.push_back(DAG.getConstant(M, DL, MVT::i8)); } MVT ByteVT = MVT::v16i8; V1 = DAG.getBitcast(ByteVT, V1); V2 = DAG.getBitcast(ByteVT, V2); SDValue VPPERMMaskOp = DAG.getBuildVector(ByteVT, DL, VPPERMMask); Res = DAG.getNode(X86ISD::VPPERM, DL, ByteVT, V1, V2, VPPERMMaskOp); return DAG.getBitcast(RootVT, Res); } // Failed to find any combines. return SDValue(); } // Attempt to constant fold all of the constant source ops. // Returns true if the entire shuffle is folded to a constant. // TODO: Extend this to merge multiple constant Ops and update the mask. static SDValue combineX86ShufflesConstants(ArrayRef Ops, ArrayRef Mask, SDValue Root, bool HasVariableMask, SelectionDAG &DAG, const X86Subtarget &Subtarget) { MVT VT = Root.getSimpleValueType(); unsigned SizeInBits = VT.getSizeInBits(); unsigned NumMaskElts = Mask.size(); unsigned MaskSizeInBits = SizeInBits / NumMaskElts; unsigned NumOps = Ops.size(); // Extract constant bits from each source op. bool OneUseConstantOp = false; SmallVector UndefEltsOps(NumOps); SmallVector, 16> RawBitsOps(NumOps); for (unsigned i = 0; i != NumOps; ++i) { SDValue SrcOp = Ops[i]; OneUseConstantOp |= SrcOp.hasOneUse(); if (!getTargetConstantBitsFromNode(SrcOp, MaskSizeInBits, UndefEltsOps[i], RawBitsOps[i])) return SDValue(); } // Only fold if at least one of the constants is only used once or // the combined shuffle has included a variable mask shuffle, this // is to avoid constant pool bloat. if (!OneUseConstantOp && !HasVariableMask) return SDValue(); // Shuffle the constant bits according to the mask. APInt UndefElts(NumMaskElts, 0); APInt ZeroElts(NumMaskElts, 0); APInt ConstantElts(NumMaskElts, 0); SmallVector ConstantBitData(NumMaskElts, APInt::getNullValue(MaskSizeInBits)); for (unsigned i = 0; i != NumMaskElts; ++i) { int M = Mask[i]; if (M == SM_SentinelUndef) { UndefElts.setBit(i); continue; } else if (M == SM_SentinelZero) { ZeroElts.setBit(i); continue; } assert(0 <= M && M < (int)(NumMaskElts * NumOps)); unsigned SrcOpIdx = (unsigned)M / NumMaskElts; unsigned SrcMaskIdx = (unsigned)M % NumMaskElts; auto &SrcUndefElts = UndefEltsOps[SrcOpIdx]; if (SrcUndefElts[SrcMaskIdx]) { UndefElts.setBit(i); continue; } auto &SrcEltBits = RawBitsOps[SrcOpIdx]; APInt &Bits = SrcEltBits[SrcMaskIdx]; if (!Bits) { ZeroElts.setBit(i); continue; } ConstantElts.setBit(i); ConstantBitData[i] = Bits; } assert((UndefElts | ZeroElts | ConstantElts).isAllOnesValue()); // Create the constant data. MVT MaskSVT; if (VT.isFloatingPoint() && (MaskSizeInBits == 32 || MaskSizeInBits == 64)) MaskSVT = MVT::getFloatingPointVT(MaskSizeInBits); else MaskSVT = MVT::getIntegerVT(MaskSizeInBits); MVT MaskVT = MVT::getVectorVT(MaskSVT, NumMaskElts); SDLoc DL(Root); SDValue CstOp = getConstVector(ConstantBitData, UndefElts, MaskVT, DAG, DL); return DAG.getBitcast(VT, CstOp); } /// Fully generic combining of x86 shuffle instructions. /// /// This should be the last combine run over the x86 shuffle instructions. Once /// they have been fully optimized, this will recursively consider all chains /// of single-use shuffle instructions, build a generic model of the cumulative /// shuffle operation, and check for simpler instructions which implement this /// operation. We use this primarily for two purposes: /// /// 1) Collapse generic shuffles to specialized single instructions when /// equivalent. In most cases, this is just an encoding size win, but /// sometimes we will collapse multiple generic shuffles into a single /// special-purpose shuffle. /// 2) Look for sequences of shuffle instructions with 3 or more total /// instructions, and replace them with the slightly more expensive SSSE3 /// PSHUFB instruction if available. We do this as the last combining step /// to ensure we avoid using PSHUFB if we can implement the shuffle with /// a suitable short sequence of other instructions. The PSHUFB will either /// use a register or have to read from memory and so is slightly (but only /// slightly) more expensive than the other shuffle instructions. /// /// Because this is inherently a quadratic operation (for each shuffle in /// a chain, we recurse up the chain), the depth is limited to 8 instructions. /// This should never be an issue in practice as the shuffle lowering doesn't /// produce sequences of more than 8 instructions. /// /// FIXME: We will currently miss some cases where the redundant shuffling /// would simplify under the threshold for PSHUFB formation because of /// combine-ordering. To fix this, we should do the redundant instruction /// combining in this recursive walk. static SDValue combineX86ShufflesRecursively( ArrayRef SrcOps, int SrcOpIndex, SDValue Root, ArrayRef RootMask, ArrayRef SrcNodes, unsigned Depth, bool HasVariableMask, bool AllowVariableMask, SelectionDAG &DAG, const X86Subtarget &Subtarget) { // Bound the depth of our recursive combine because this is ultimately // quadratic in nature. const unsigned MaxRecursionDepth = 8; if (Depth > MaxRecursionDepth) return SDValue(); // Directly rip through bitcasts to find the underlying operand. SDValue Op = SrcOps[SrcOpIndex]; Op = peekThroughOneUseBitcasts(Op); MVT VT = Op.getSimpleValueType(); if (!VT.isVector()) return SDValue(); // Bail if we hit a non-vector. assert(Root.getSimpleValueType().isVector() && "Shuffles operate on vector types!"); assert(VT.getSizeInBits() == Root.getSimpleValueType().getSizeInBits() && "Can only combine shuffles of the same vector register size."); // Extract target shuffle mask and resolve sentinels and inputs. SmallVector OpMask; SmallVector OpInputs; if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask, DAG)) return SDValue(); // TODO - Add support for more than 2 inputs. if (2 < OpInputs.size()) return SDValue(); SDValue Input0 = (OpInputs.size() > 0 ? OpInputs[0] : SDValue()); SDValue Input1 = (OpInputs.size() > 1 ? OpInputs[1] : SDValue()); // Add the inputs to the Ops list, avoiding duplicates. SmallVector Ops(SrcOps.begin(), SrcOps.end()); auto AddOp = [&Ops](SDValue Input, int InsertionPoint) -> int { if (!Input) return -1; // Attempt to find an existing match. SDValue InputBC = peekThroughBitcasts(Input); for (int i = 0, e = Ops.size(); i < e; ++i) if (InputBC == peekThroughBitcasts(Ops[i])) return i; // Match failed - should we replace an existing Op? if (InsertionPoint >= 0) { Ops[InsertionPoint] = Input; return InsertionPoint; } // Add to the end of the Ops list. Ops.push_back(Input); return Ops.size() - 1; }; int InputIdx0 = AddOp(Input0, SrcOpIndex); int InputIdx1 = AddOp(Input1, -1); assert(((RootMask.size() > OpMask.size() && RootMask.size() % OpMask.size() == 0) || (OpMask.size() > RootMask.size() && OpMask.size() % RootMask.size() == 0) || OpMask.size() == RootMask.size()) && "The smaller number of elements must divide the larger."); // This function can be performance-critical, so we rely on the power-of-2 // knowledge that we have about the mask sizes to replace div/rem ops with // bit-masks and shifts. assert(isPowerOf2_32(RootMask.size()) && "Non-power-of-2 shuffle mask sizes"); assert(isPowerOf2_32(OpMask.size()) && "Non-power-of-2 shuffle mask sizes"); unsigned RootMaskSizeLog2 = countTrailingZeros(RootMask.size()); unsigned OpMaskSizeLog2 = countTrailingZeros(OpMask.size()); unsigned MaskWidth = std::max(OpMask.size(), RootMask.size()); unsigned RootRatio = std::max(1, OpMask.size() >> RootMaskSizeLog2); unsigned OpRatio = std::max(1, RootMask.size() >> OpMaskSizeLog2); assert((RootRatio == 1 || OpRatio == 1) && "Must not have a ratio for both incoming and op masks!"); assert(isPowerOf2_32(MaskWidth) && "Non-power-of-2 shuffle mask sizes"); assert(isPowerOf2_32(RootRatio) && "Non-power-of-2 shuffle mask sizes"); assert(isPowerOf2_32(OpRatio) && "Non-power-of-2 shuffle mask sizes"); unsigned RootRatioLog2 = countTrailingZeros(RootRatio); unsigned OpRatioLog2 = countTrailingZeros(OpRatio); SmallVector Mask(MaskWidth, SM_SentinelUndef); // Merge this shuffle operation's mask into our accumulated mask. Note that // this shuffle's mask will be the first applied to the input, followed by the // root mask to get us all the way to the root value arrangement. The reason // for this order is that we are recursing up the operation chain. for (unsigned i = 0; i < MaskWidth; ++i) { unsigned RootIdx = i >> RootRatioLog2; if (RootMask[RootIdx] < 0) { // This is a zero or undef lane, we're done. Mask[i] = RootMask[RootIdx]; continue; } unsigned RootMaskedIdx = RootRatio == 1 ? RootMask[RootIdx] : (RootMask[RootIdx] << RootRatioLog2) + (i & (RootRatio - 1)); // Just insert the scaled root mask value if it references an input other // than the SrcOp we're currently inserting. if ((RootMaskedIdx < (SrcOpIndex * MaskWidth)) || (((SrcOpIndex + 1) * MaskWidth) <= RootMaskedIdx)) { Mask[i] = RootMaskedIdx; continue; } RootMaskedIdx = RootMaskedIdx & (MaskWidth - 1); unsigned OpIdx = RootMaskedIdx >> OpRatioLog2; if (OpMask[OpIdx] < 0) { // The incoming lanes are zero or undef, it doesn't matter which ones we // are using. Mask[i] = OpMask[OpIdx]; continue; } // Ok, we have non-zero lanes, map them through to one of the Op's inputs. unsigned OpMaskedIdx = OpRatio == 1 ? OpMask[OpIdx] : (OpMask[OpIdx] << OpRatioLog2) + (RootMaskedIdx & (OpRatio - 1)); OpMaskedIdx = OpMaskedIdx & (MaskWidth - 1); if (OpMask[OpIdx] < (int)OpMask.size()) { assert(0 <= InputIdx0 && "Unknown target shuffle input"); OpMaskedIdx += InputIdx0 * MaskWidth; } else { assert(0 <= InputIdx1 && "Unknown target shuffle input"); OpMaskedIdx += InputIdx1 * MaskWidth; } Mask[i] = OpMaskedIdx; } // Handle the all undef/zero cases early. if (all_of(Mask, [](int Idx) { return Idx == SM_SentinelUndef; })) return DAG.getUNDEF(Root.getValueType()); // TODO - should we handle the mixed zero/undef case as well? Just returning // a zero mask will lose information on undef elements possibly reducing // future combine possibilities. if (all_of(Mask, [](int Idx) { return Idx < 0; })) return getZeroVector(Root.getSimpleValueType(), Subtarget, DAG, SDLoc(Root)); // Remove unused shuffle source ops. resolveTargetShuffleInputsAndMask(Ops, Mask); assert(!Ops.empty() && "Shuffle with no inputs detected"); HasVariableMask |= isTargetShuffleVariableMask(Op.getOpcode()); // Update the list of shuffle nodes that have been combined so far. SmallVector CombinedNodes(SrcNodes.begin(), SrcNodes.end()); CombinedNodes.push_back(Op.getNode()); // See if we can recurse into each shuffle source op (if it's a target // shuffle). The source op should only be generally combined if it either has // a single use (i.e. current Op) or all its users have already been combined, // if not then we can still combine but should prevent generation of variable // shuffles to avoid constant pool bloat. // Don't recurse if we already have more source ops than we can combine in // the remaining recursion depth. if (Ops.size() < (MaxRecursionDepth - Depth)) { for (int i = 0, e = Ops.size(); i < e; ++i) { bool AllowVar = false; if (Ops[i].getNode()->hasOneUse() || SDNode::areOnlyUsersOf(CombinedNodes, Ops[i].getNode())) AllowVar = AllowVariableMask; if (SDValue Res = combineX86ShufflesRecursively( Ops, i, Root, Mask, CombinedNodes, Depth + 1, HasVariableMask, AllowVar, DAG, Subtarget)) return Res; } } // Attempt to constant fold all of the constant source ops. if (SDValue Cst = combineX86ShufflesConstants( Ops, Mask, Root, HasVariableMask, DAG, Subtarget)) return Cst; // We can only combine unary and binary shuffle mask cases. if (Ops.size() > 2) return SDValue(); // Minor canonicalization of the accumulated shuffle mask to make it easier // to match below. All this does is detect masks with sequential pairs of // elements, and shrink them to the half-width mask. It does this in a loop // so it will reduce the size of the mask to the minimal width mask which // performs an equivalent shuffle. SmallVector WidenedMask; while (Mask.size() > 1 && canWidenShuffleElements(Mask, WidenedMask)) { Mask = std::move(WidenedMask); } // Canonicalization of binary shuffle masks to improve pattern matching by // commuting the inputs. if (Ops.size() == 2 && canonicalizeShuffleMaskWithCommute(Mask)) { ShuffleVectorSDNode::commuteMask(Mask); std::swap(Ops[0], Ops[1]); } // Finally, try to combine into a single shuffle instruction. return combineX86ShuffleChain(Ops, Root, Mask, Depth, HasVariableMask, AllowVariableMask, DAG, Subtarget); } /// Get the PSHUF-style mask from PSHUF node. /// /// This is a very minor wrapper around getTargetShuffleMask to easy forming v4 /// PSHUF-style masks that can be reused with such instructions. static SmallVector getPSHUFShuffleMask(SDValue N) { MVT VT = N.getSimpleValueType(); SmallVector Mask; SmallVector Ops; bool IsUnary; bool HaveMask = getTargetShuffleMask(N.getNode(), VT, false, Ops, Mask, IsUnary); (void)HaveMask; assert(HaveMask); // If we have more than 128-bits, only the low 128-bits of shuffle mask // matter. Check that the upper masks are repeats and remove them. if (VT.getSizeInBits() > 128) { int LaneElts = 128 / VT.getScalarSizeInBits(); #ifndef NDEBUG for (int i = 1, NumLanes = VT.getSizeInBits() / 128; i < NumLanes; ++i) for (int j = 0; j < LaneElts; ++j) assert(Mask[j] == Mask[i * LaneElts + j] - (LaneElts * i) && "Mask doesn't repeat in high 128-bit lanes!"); #endif Mask.resize(LaneElts); } switch (N.getOpcode()) { case X86ISD::PSHUFD: return Mask; case X86ISD::PSHUFLW: Mask.resize(4); return Mask; case X86ISD::PSHUFHW: Mask.erase(Mask.begin(), Mask.begin() + 4); for (int &M : Mask) M -= 4; return Mask; default: llvm_unreachable("No valid shuffle instruction found!"); } } /// Search for a combinable shuffle across a chain ending in pshufd. /// /// We walk up the chain and look for a combinable shuffle, skipping over /// shuffles that we could hoist this shuffle's transformation past without /// altering anything. static SDValue combineRedundantDWordShuffle(SDValue N, MutableArrayRef Mask, SelectionDAG &DAG) { assert(N.getOpcode() == X86ISD::PSHUFD && "Called with something other than an x86 128-bit half shuffle!"); SDLoc DL(N); // Walk up a single-use chain looking for a combinable shuffle. Keep a stack // of the shuffles in the chain so that we can form a fresh chain to replace // this one. SmallVector Chain; SDValue V = N.getOperand(0); for (; V.hasOneUse(); V = V.getOperand(0)) { switch (V.getOpcode()) { default: return SDValue(); // Nothing combined! case ISD::BITCAST: // Skip bitcasts as we always know the type for the target specific // instructions. continue; case X86ISD::PSHUFD: // Found another dword shuffle. break; case X86ISD::PSHUFLW: // Check that the low words (being shuffled) are the identity in the // dword shuffle, and the high words are self-contained. if (Mask[0] != 0 || Mask[1] != 1 || !(Mask[2] >= 2 && Mask[2] < 4 && Mask[3] >= 2 && Mask[3] < 4)) return SDValue(); Chain.push_back(V); continue; case X86ISD::PSHUFHW: // Check that the high words (being shuffled) are the identity in the // dword shuffle, and the low words are self-contained. if (Mask[2] != 2 || Mask[3] != 3 || !(Mask[0] >= 0 && Mask[0] < 2 && Mask[1] >= 0 && Mask[1] < 2)) return SDValue(); Chain.push_back(V); continue; case X86ISD::UNPCKL: case X86ISD::UNPCKH: // For either i8 -> i16 or i16 -> i32 unpacks, we can combine a dword // shuffle into a preceding word shuffle. if (V.getSimpleValueType().getVectorElementType() != MVT::i8 && V.getSimpleValueType().getVectorElementType() != MVT::i16) return SDValue(); // Search for a half-shuffle which we can combine with. unsigned CombineOp = V.getOpcode() == X86ISD::UNPCKL ? X86ISD::PSHUFLW : X86ISD::PSHUFHW; if (V.getOperand(0) != V.getOperand(1) || !V->isOnlyUserOf(V.getOperand(0).getNode())) return SDValue(); Chain.push_back(V); V = V.getOperand(0); do { switch (V.getOpcode()) { default: return SDValue(); // Nothing to combine. case X86ISD::PSHUFLW: case X86ISD::PSHUFHW: if (V.getOpcode() == CombineOp) break; Chain.push_back(V); LLVM_FALLTHROUGH; case ISD::BITCAST: V = V.getOperand(0); continue; } break; } while (V.hasOneUse()); break; } // Break out of the loop if we break out of the switch. break; } if (!V.hasOneUse()) // We fell out of the loop without finding a viable combining instruction. return SDValue(); // Merge this node's mask and our incoming mask. SmallVector VMask = getPSHUFShuffleMask(V); for (int &M : Mask) M = VMask[M]; V = DAG.getNode(V.getOpcode(), DL, V.getValueType(), V.getOperand(0), getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); // Rebuild the chain around this new shuffle. while (!Chain.empty()) { SDValue W = Chain.pop_back_val(); if (V.getValueType() != W.getOperand(0).getValueType()) V = DAG.getBitcast(W.getOperand(0).getValueType(), V); switch (W.getOpcode()) { default: llvm_unreachable("Only PSHUF and UNPCK instructions get here!"); case X86ISD::UNPCKL: case X86ISD::UNPCKH: V = DAG.getNode(W.getOpcode(), DL, W.getValueType(), V, V); break; case X86ISD::PSHUFD: case X86ISD::PSHUFLW: case X86ISD::PSHUFHW: V = DAG.getNode(W.getOpcode(), DL, W.getValueType(), V, W.getOperand(1)); break; } } if (V.getValueType() != N.getValueType()) V = DAG.getBitcast(N.getValueType(), V); // Return the new chain to replace N. return V; } /// Try to combine x86 target specific shuffles. static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { SDLoc DL(N); MVT VT = N.getSimpleValueType(); SmallVector Mask; unsigned Opcode = N.getOpcode(); // Combine binary shuffle of 2 similar 'Horizontal' instructions into a // single instruction. if (VT.getScalarSizeInBits() == 64 && (Opcode == X86ISD::MOVSD || Opcode == X86ISD::UNPCKH || Opcode == X86ISD::UNPCKL)) { auto BC0 = peekThroughBitcasts(N.getOperand(0)); auto BC1 = peekThroughBitcasts(N.getOperand(1)); EVT VT0 = BC0.getValueType(); EVT VT1 = BC1.getValueType(); unsigned Opcode0 = BC0.getOpcode(); unsigned Opcode1 = BC1.getOpcode(); if (Opcode0 == Opcode1 && VT0 == VT1 && (Opcode0 == X86ISD::FHADD || Opcode0 == X86ISD::HADD || Opcode0 == X86ISD::FHSUB || Opcode0 == X86ISD::HSUB || Opcode0 == X86ISD::PACKSS || Opcode0 == X86ISD::PACKUS)) { SDValue Lo, Hi; if (Opcode == X86ISD::MOVSD) { Lo = BC1.getOperand(0); Hi = BC0.getOperand(1); } else { Lo = BC0.getOperand(Opcode == X86ISD::UNPCKH ? 1 : 0); Hi = BC1.getOperand(Opcode == X86ISD::UNPCKH ? 1 : 0); } SDValue Horiz = DAG.getNode(Opcode0, DL, VT0, Lo, Hi); return DAG.getBitcast(VT, Horiz); } } switch (Opcode) { case X86ISD::VBROADCAST: { // If broadcasting from another shuffle, attempt to simplify it. // TODO - we really need a general SimplifyDemandedVectorElts mechanism. SDValue Src = N.getOperand(0); SDValue BC = peekThroughBitcasts(Src); EVT SrcVT = Src.getValueType(); EVT BCVT = BC.getValueType(); if (isTargetShuffle(BC.getOpcode()) && VT.getScalarSizeInBits() % BCVT.getScalarSizeInBits() == 0) { unsigned Scale = VT.getScalarSizeInBits() / BCVT.getScalarSizeInBits(); SmallVector DemandedMask(BCVT.getVectorNumElements(), SM_SentinelUndef); for (unsigned i = 0; i != Scale; ++i) DemandedMask[i] = i; if (SDValue Res = combineX86ShufflesRecursively( {BC}, 0, BC, DemandedMask, {}, /*Depth*/ 1, /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget)) return DAG.getNode(X86ISD::VBROADCAST, DL, VT, DAG.getBitcast(SrcVT, Res)); } return SDValue(); } case X86ISD::PSHUFD: case X86ISD::PSHUFLW: case X86ISD::PSHUFHW: Mask = getPSHUFShuffleMask(N); assert(Mask.size() == 4); break; case X86ISD::MOVSD: case X86ISD::MOVSS: { SDValue N0 = N.getOperand(0); SDValue N1 = N.getOperand(1); // Canonicalize scalar FPOps: // MOVS*(N0, OP(N0, N1)) --> MOVS*(N0, SCALAR_TO_VECTOR(OP(N0[0], N1[0]))) // If commutable, allow OP(N1[0], N0[0]). unsigned Opcode1 = N1.getOpcode(); if (Opcode1 == ISD::FADD || Opcode1 == ISD::FMUL || Opcode1 == ISD::FSUB || Opcode1 == ISD::FDIV) { SDValue N10 = N1.getOperand(0); SDValue N11 = N1.getOperand(1); if (N10 == N0 || (N11 == N0 && (Opcode1 == ISD::FADD || Opcode1 == ISD::FMUL))) { if (N10 != N0) std::swap(N10, N11); MVT SVT = VT.getVectorElementType(); SDValue ZeroIdx = DAG.getIntPtrConstant(0, DL); N10 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SVT, N10, ZeroIdx); N11 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SVT, N11, ZeroIdx); SDValue Scl = DAG.getNode(Opcode1, DL, SVT, N10, N11); SDValue SclVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Scl); return DAG.getNode(Opcode, DL, VT, N0, SclVec); } } return SDValue(); } case X86ISD::INSERTPS: { assert(VT == MVT::v4f32 && "INSERTPS ValueType must be MVT::v4f32"); SDValue Op0 = N.getOperand(0); SDValue Op1 = N.getOperand(1); SDValue Op2 = N.getOperand(2); unsigned InsertPSMask = cast(Op2)->getZExtValue(); unsigned SrcIdx = (InsertPSMask >> 6) & 0x3; unsigned DstIdx = (InsertPSMask >> 4) & 0x3; unsigned ZeroMask = InsertPSMask & 0xF; // If we zero out all elements from Op0 then we don't need to reference it. if (((ZeroMask | (1u << DstIdx)) == 0xF) && !Op0.isUndef()) return DAG.getNode(X86ISD::INSERTPS, DL, VT, DAG.getUNDEF(VT), Op1, DAG.getConstant(InsertPSMask, DL, MVT::i8)); // If we zero out the element from Op1 then we don't need to reference it. if ((ZeroMask & (1u << DstIdx)) && !Op1.isUndef()) return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT), DAG.getConstant(InsertPSMask, DL, MVT::i8)); // Attempt to merge insertps Op1 with an inner target shuffle node. SmallVector TargetMask1; SmallVector Ops1; if (setTargetShuffleZeroElements(Op1, TargetMask1, Ops1)) { int M = TargetMask1[SrcIdx]; if (isUndefOrZero(M)) { // Zero/UNDEF insertion - zero out element and remove dependency. InsertPSMask |= (1u << DstIdx); return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT), DAG.getConstant(InsertPSMask, DL, MVT::i8)); } // Update insertps mask srcidx and reference the source input directly. assert(0 <= M && M < 8 && "Shuffle index out of range"); InsertPSMask = (InsertPSMask & 0x3f) | ((M & 0x3) << 6); Op1 = Ops1[M < 4 ? 0 : 1]; return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1, DAG.getConstant(InsertPSMask, DL, MVT::i8)); } // Attempt to merge insertps Op0 with an inner target shuffle node. SmallVector TargetMask0; SmallVector Ops0; if (!setTargetShuffleZeroElements(Op0, TargetMask0, Ops0)) return SDValue(); bool Updated = false; bool UseInput00 = false; bool UseInput01 = false; for (int i = 0; i != 4; ++i) { int M = TargetMask0[i]; if ((InsertPSMask & (1u << i)) || (i == (int)DstIdx)) { // No change if element is already zero or the inserted element. continue; } else if (isUndefOrZero(M)) { // If the target mask is undef/zero then we must zero the element. InsertPSMask |= (1u << i); Updated = true; continue; } // The input vector element must be inline. if (M != i && M != (i + 4)) return SDValue(); // Determine which inputs of the target shuffle we're using. UseInput00 |= (0 <= M && M < 4); UseInput01 |= (4 <= M); } // If we're not using both inputs of the target shuffle then use the // referenced input directly. if (UseInput00 && !UseInput01) { Updated = true; Op0 = Ops0[0]; } else if (!UseInput00 && UseInput01) { Updated = true; Op0 = Ops0[1]; } if (Updated) return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1, DAG.getConstant(InsertPSMask, DL, MVT::i8)); return SDValue(); } default: return SDValue(); } // Nuke no-op shuffles that show up after combining. if (isNoopShuffleMask(Mask)) return N.getOperand(0); // Look for simplifications involving one or two shuffle instructions. SDValue V = N.getOperand(0); switch (N.getOpcode()) { default: break; case X86ISD::PSHUFLW: case X86ISD::PSHUFHW: assert(VT.getVectorElementType() == MVT::i16 && "Bad word shuffle type!"); // See if this reduces to a PSHUFD which is no more expensive and can // combine with more operations. Note that it has to at least flip the // dwords as otherwise it would have been removed as a no-op. if (makeArrayRef(Mask).equals({2, 3, 0, 1})) { int DMask[] = {0, 1, 2, 3}; int DOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 2; DMask[DOffset + 0] = DOffset + 1; DMask[DOffset + 1] = DOffset + 0; MVT DVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2); V = DAG.getBitcast(DVT, V); V = DAG.getNode(X86ISD::PSHUFD, DL, DVT, V, getV4X86ShuffleImm8ForMask(DMask, DL, DAG)); return DAG.getBitcast(VT, V); } // Look for shuffle patterns which can be implemented as a single unpack. // FIXME: This doesn't handle the location of the PSHUFD generically, and // only works when we have a PSHUFD followed by two half-shuffles. if (Mask[0] == Mask[1] && Mask[2] == Mask[3] && (V.getOpcode() == X86ISD::PSHUFLW || V.getOpcode() == X86ISD::PSHUFHW) && V.getOpcode() != N.getOpcode() && V.hasOneUse()) { SDValue D = peekThroughOneUseBitcasts(V.getOperand(0)); if (D.getOpcode() == X86ISD::PSHUFD && D.hasOneUse()) { SmallVector VMask = getPSHUFShuffleMask(V); SmallVector DMask = getPSHUFShuffleMask(D); int NOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 4; int VOffset = V.getOpcode() == X86ISD::PSHUFLW ? 0 : 4; int WordMask[8]; for (int i = 0; i < 4; ++i) { WordMask[i + NOffset] = Mask[i] + NOffset; WordMask[i + VOffset] = VMask[i] + VOffset; } // Map the word mask through the DWord mask. int MappedMask[8]; for (int i = 0; i < 8; ++i) MappedMask[i] = 2 * DMask[WordMask[i] / 2] + WordMask[i] % 2; if (makeArrayRef(MappedMask).equals({0, 0, 1, 1, 2, 2, 3, 3}) || makeArrayRef(MappedMask).equals({4, 4, 5, 5, 6, 6, 7, 7})) { // We can replace all three shuffles with an unpack. V = DAG.getBitcast(VT, D.getOperand(0)); return DAG.getNode(MappedMask[0] == 0 ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL, VT, V, V); } } } break; case X86ISD::PSHUFD: if (SDValue NewN = combineRedundantDWordShuffle(N, Mask, DAG)) return NewN; break; } return SDValue(); } /// Checks if the shuffle mask takes subsequent elements /// alternately from two vectors. /// For example <0, 5, 2, 7> or <8, 1, 10, 3, 12, 5, 14, 7> are both correct. static bool isAddSubOrSubAddMask(ArrayRef Mask, bool &Op0Even) { int ParitySrc[2] = {-1, -1}; unsigned Size = Mask.size(); for (unsigned i = 0; i != Size; ++i) { int M = Mask[i]; if (M < 0) continue; // Make sure we are using the matching element from the input. if ((M % Size) != i) return false; // Make sure we use the same input for all elements of the same parity. int Src = M / Size; if (ParitySrc[i % 2] >= 0 && ParitySrc[i % 2] != Src) return false; ParitySrc[i % 2] = Src; } // Make sure each input is used. if (ParitySrc[0] < 0 || ParitySrc[1] < 0 || ParitySrc[0] == ParitySrc[1]) return false; Op0Even = ParitySrc[0] == 0; return true; } /// Returns true iff the shuffle node \p N can be replaced with ADDSUB(SUBADD) /// operation. If true is returned then the operands of ADDSUB(SUBADD) operation /// are written to the parameters \p Opnd0 and \p Opnd1. /// /// We combine shuffle to ADDSUB(SUBADD) directly on the abstract vector shuffle nodes /// so it is easier to generically match. We also insert dummy vector shuffle /// nodes for the operands which explicitly discard the lanes which are unused /// by this operation to try to flow through the rest of the combiner /// the fact that they're unused. static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget, SelectionDAG &DAG, SDValue &Opnd0, SDValue &Opnd1, bool &IsSubAdd) { EVT VT = N->getValueType(0); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!Subtarget.hasSSE3() || !TLI.isTypeLegal(VT) || !VT.getSimpleVT().isFloatingPoint()) return false; // We only handle target-independent shuffles. // FIXME: It would be easy and harmless to use the target shuffle mask // extraction tool to support more. if (N->getOpcode() != ISD::VECTOR_SHUFFLE) return false; SDValue V1 = N->getOperand(0); SDValue V2 = N->getOperand(1); // Make sure we have an FADD and an FSUB. if ((V1.getOpcode() != ISD::FADD && V1.getOpcode() != ISD::FSUB) || (V2.getOpcode() != ISD::FADD && V2.getOpcode() != ISD::FSUB) || V1.getOpcode() == V2.getOpcode()) return false; // If there are other uses of these operations we can't fold them. if (!V1->hasOneUse() || !V2->hasOneUse()) return false; // Ensure that both operations have the same operands. Note that we can // commute the FADD operands. SDValue LHS, RHS; if (V1.getOpcode() == ISD::FSUB) { LHS = V1->getOperand(0); RHS = V1->getOperand(1); if ((V2->getOperand(0) != LHS || V2->getOperand(1) != RHS) && (V2->getOperand(0) != RHS || V2->getOperand(1) != LHS)) return false; } else { assert(V2.getOpcode() == ISD::FSUB && "Unexpected opcode"); LHS = V2->getOperand(0); RHS = V2->getOperand(1); if ((V1->getOperand(0) != LHS || V1->getOperand(1) != RHS) && (V1->getOperand(0) != RHS || V1->getOperand(1) != LHS)) return false; } ArrayRef Mask = cast(N)->getMask(); bool Op0Even; if (!isAddSubOrSubAddMask(Mask, Op0Even)) return false; // It's a subadd if the vector in the even parity is an FADD. IsSubAdd = Op0Even ? V1->getOpcode() == ISD::FADD : V2->getOpcode() == ISD::FADD; Opnd0 = LHS; Opnd1 = RHS; return true; } /// Combine shuffle of two fma nodes into FMAddSub or FMSubAdd. static SDValue combineShuffleToFMAddSub(SDNode *N, const X86Subtarget &Subtarget, SelectionDAG &DAG) { // We only handle target-independent shuffles. // FIXME: It would be easy and harmless to use the target shuffle mask // extraction tool to support more. if (N->getOpcode() != ISD::VECTOR_SHUFFLE) return SDValue(); MVT VT = N->getSimpleValueType(0); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!Subtarget.hasAnyFMA() || !TLI.isTypeLegal(VT)) return SDValue(); // We're trying to match (shuffle fma(a, b, c), X86Fmsub(a, b, c). SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); SDValue FMAdd = Op0, FMSub = Op1; if (FMSub.getOpcode() != X86ISD::FMSUB) std::swap(FMAdd, FMSub); if (FMAdd.getOpcode() != ISD::FMA || FMSub.getOpcode() != X86ISD::FMSUB || FMAdd.getOperand(0) != FMSub.getOperand(0) || !FMAdd.hasOneUse() || FMAdd.getOperand(1) != FMSub.getOperand(1) || !FMSub.hasOneUse() || FMAdd.getOperand(2) != FMSub.getOperand(2)) return SDValue(); // Check for correct shuffle mask. ArrayRef Mask = cast(N)->getMask(); bool Op0Even; if (!isAddSubOrSubAddMask(Mask, Op0Even)) return SDValue(); // FMAddSub takes zeroth operand from FMSub node. SDLoc DL(N); bool IsSubAdd = Op0Even ? Op0 == FMAdd : Op1 == FMAdd; unsigned Opcode = IsSubAdd ? X86ISD::FMSUBADD : X86ISD::FMADDSUB; return DAG.getNode(Opcode, DL, VT, FMAdd.getOperand(0), FMAdd.getOperand(1), FMAdd.getOperand(2)); } /// Try to combine a shuffle into a target-specific add-sub or /// mul-add-sub node. static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N, const X86Subtarget &Subtarget, SelectionDAG &DAG) { if (SDValue V = combineShuffleToFMAddSub(N, Subtarget, DAG)) return V; SDValue Opnd0, Opnd1; bool IsSubAdd; if (!isAddSubOrSubAdd(N, Subtarget, DAG, Opnd0, Opnd1, IsSubAdd)) return SDValue(); MVT VT = N->getSimpleValueType(0); SDLoc DL(N); // Try to generate X86ISD::FMADDSUB node here. SDValue Opnd2; if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, 2)) { unsigned Opc = IsSubAdd ? X86ISD::FMSUBADD : X86ISD::FMADDSUB; return DAG.getNode(Opc, DL, VT, Opnd0, Opnd1, Opnd2); } if (IsSubAdd) return SDValue(); // Do not generate X86ISD::ADDSUB node for 512-bit types even though // the ADDSUB idiom has been successfully recognized. There are no known // X86 targets with 512-bit ADDSUB instructions! if (VT.is512BitVector()) return SDValue(); return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1); } // We are looking for a shuffle where both sources are concatenated with undef // and have a width that is half of the output's width. AVX2 has VPERMD/Q, so // if we can express this as a single-source shuffle, that's preferable. static SDValue combineShuffleOfConcatUndef(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { if (!Subtarget.hasAVX2() || !isa(N)) return SDValue(); EVT VT = N->getValueType(0); // We only care about shuffles of 128/256-bit vectors of 32/64-bit values. if (!VT.is128BitVector() && !VT.is256BitVector()) return SDValue(); if (VT.getVectorElementType() != MVT::i32 && VT.getVectorElementType() != MVT::i64 && VT.getVectorElementType() != MVT::f32 && VT.getVectorElementType() != MVT::f64) return SDValue(); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); // Check that both sources are concats with undef. if (N0.getOpcode() != ISD::CONCAT_VECTORS || N1.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 || N1.getNumOperands() != 2 || !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef()) return SDValue(); // Construct the new shuffle mask. Elements from the first source retain their // index, but elements from the second source no longer need to skip an undef. SmallVector Mask; int NumElts = VT.getVectorNumElements(); ShuffleVectorSDNode *SVOp = cast(N); for (int Elt : SVOp->getMask()) Mask.push_back(Elt < NumElts ? Elt : (Elt - NumElts / 2)); SDLoc DL(N); SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, N0.getOperand(0), N1.getOperand(0)); return DAG.getVectorShuffle(VT, DL, Concat, DAG.getUNDEF(VT), Mask); } /// Eliminate a redundant shuffle of a horizontal math op. static SDValue foldShuffleOfHorizOp(SDNode *N) { if (N->getOpcode() != ISD::VECTOR_SHUFFLE || !N->getOperand(1).isUndef()) return SDValue(); SDValue HOp = N->getOperand(0); if (HOp.getOpcode() != X86ISD::HADD && HOp.getOpcode() != X86ISD::FHADD && HOp.getOpcode() != X86ISD::HSUB && HOp.getOpcode() != X86ISD::FHSUB) return SDValue(); // 128-bit horizontal math instructions are defined to operate on adjacent // lanes of each operand as: // v4X32: A[0] + A[1] , A[2] + A[3] , B[0] + B[1] , B[2] + B[3] // ...similarly for v2f64 and v8i16. // TODO: Handle UNDEF operands. if (HOp.getOperand(0) != HOp.getOperand(1)) return SDValue(); // When the operands of a horizontal math op are identical, the low half of // the result is the same as the high half. If the shuffle is also replicating // low and high halves, we don't need the shuffle. // shuffle (hadd X, X), undef, [low half...high half] --> hadd X, X ArrayRef Mask = cast(N)->getMask(); // TODO: Other mask possibilities like {1,1} and {1,0} could be added here, // but this should be tied to whatever horizontal op matching and shuffle // canonicalization are producing. if (HOp.getValueSizeInBits() == 128 && (isTargetShuffleEquivalent(Mask, {0, 0}) || isTargetShuffleEquivalent(Mask, {0, 1, 0, 1}) || isTargetShuffleEquivalent(Mask, {0, 1, 2, 3, 0, 1, 2, 3}))) return HOp; if (HOp.getValueSizeInBits() == 256 && (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2}) || isTargetShuffleEquivalent(Mask, {0, 1, 0, 1, 4, 5, 4, 5}) || isTargetShuffleEquivalent( Mask, {0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 8, 9, 10, 11}))) return HOp; return SDValue(); } static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { SDLoc dl(N); EVT VT = N->getValueType(0); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // If we have legalized the vector types, look for blends of FADD and FSUB // nodes that we can fuse into an ADDSUB, FMADDSUB, or FMSUBADD node. if (TLI.isTypeLegal(VT)) { if (SDValue AddSub = combineShuffleToAddSubOrFMAddSub(N, Subtarget, DAG)) return AddSub; if (SDValue HAddSub = foldShuffleOfHorizOp(N)) return HAddSub; } // During Type Legalization, when promoting illegal vector types, // the backend might introduce new shuffle dag nodes and bitcasts. // // This code performs the following transformation: // fold: (shuffle (bitcast (BINOP A, B)), Undef, ) -> // (shuffle (BINOP (bitcast A), (bitcast B)), Undef, ) // // We do this only if both the bitcast and the BINOP dag nodes have // one use. Also, perform this transformation only if the new binary // operation is legal. This is to avoid introducing dag nodes that // potentially need to be further expanded (or custom lowered) into a // less optimal sequence of dag nodes. if (!DCI.isBeforeLegalize() && DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::VECTOR_SHUFFLE && N->getOperand(0).getOpcode() == ISD::BITCAST && N->getOperand(1).isUndef() && N->getOperand(0).hasOneUse()) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue BC0 = N0.getOperand(0); EVT SVT = BC0.getValueType(); unsigned Opcode = BC0.getOpcode(); unsigned NumElts = VT.getVectorNumElements(); if (BC0.hasOneUse() && SVT.isVector() && SVT.getVectorNumElements() * 2 == NumElts && TLI.isOperationLegal(Opcode, VT)) { bool CanFold = false; switch (Opcode) { default : break; case ISD::ADD: case ISD::SUB: case ISD::MUL: // isOperationLegal lies for integer ops on floating point types. CanFold = VT.isInteger(); break; case ISD::FADD: case ISD::FSUB: case ISD::FMUL: // isOperationLegal lies for floating point ops on integer types. CanFold = VT.isFloatingPoint(); break; } unsigned SVTNumElts = SVT.getVectorNumElements(); ShuffleVectorSDNode *SVOp = cast(N); for (unsigned i = 0, e = SVTNumElts; i != e && CanFold; ++i) CanFold = SVOp->getMaskElt(i) == (int)(i * 2); for (unsigned i = SVTNumElts, e = NumElts; i != e && CanFold; ++i) CanFold = SVOp->getMaskElt(i) < 0; if (CanFold) { SDValue BC00 = DAG.getBitcast(VT, BC0.getOperand(0)); SDValue BC01 = DAG.getBitcast(VT, BC0.getOperand(1)); SDValue NewBinOp = DAG.getNode(BC0.getOpcode(), dl, VT, BC00, BC01); return DAG.getVectorShuffle(VT, dl, NewBinOp, N1, SVOp->getMask()); } } } // Combine a vector_shuffle that is equal to build_vector load1, load2, load3, // load4, <0, 1, 2, 3> into a 128-bit load if the load addresses are // consecutive, non-overlapping, and in the right order. SmallVector Elts; for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { if (SDValue Elt = getShuffleScalarElt(N, i, DAG, 0)) { Elts.push_back(Elt); continue; } Elts.clear(); break; } if (Elts.size() == VT.getVectorNumElements()) if (SDValue LD = EltsFromConsecutiveLoads(VT, Elts, dl, DAG, Subtarget, true)) return LD; // For AVX2, we sometimes want to combine // (vector_shuffle (concat_vectors t1, undef) // (concat_vectors t2, undef)) // Into: // (vector_shuffle (concat_vectors t1, t2), undef) // Since the latter can be efficiently lowered with VPERMD/VPERMQ if (SDValue ShufConcat = combineShuffleOfConcatUndef(N, DAG, Subtarget)) return ShufConcat; if (isTargetShuffle(N->getOpcode())) { SDValue Op(N, 0); if (SDValue Shuffle = combineTargetShuffle(Op, DAG, DCI, Subtarget)) return Shuffle; // Try recursively combining arbitrary sequences of x86 shuffle // instructions into higher-order shuffles. We do this after combining // specific PSHUF instruction sequences into their minimal form so that we // can evaluate how many specialized shuffle instructions are involved in // a particular chain. if (SDValue Res = combineX86ShufflesRecursively( {Op}, 0, Op, {0}, {}, /*Depth*/ 1, /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget)) return Res; // Simplify source operands based on shuffle mask. // TODO - merge this into combineX86ShufflesRecursively. APInt KnownUndef, KnownZero; APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); if (TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, DCI)) return SDValue(N, 0); } // Look for a truncating shuffle to v2i32 of a PMULUDQ where one of the // operands is an extend from v2i32 to v2i64. Turn it into a pmulld. // FIXME: This can probably go away once we default to widening legalization. if (Subtarget.hasSSE41() && VT == MVT::v4i32 && N->getOpcode() == ISD::VECTOR_SHUFFLE && N->getOperand(0).getOpcode() == ISD::BITCAST && N->getOperand(0).getOperand(0).getOpcode() == X86ISD::PMULUDQ) { SDValue BC = N->getOperand(0); SDValue MULUDQ = BC.getOperand(0); ShuffleVectorSDNode *SVOp = cast(N); ArrayRef Mask = SVOp->getMask(); if (BC.hasOneUse() && MULUDQ.hasOneUse() && Mask[0] == 0 && Mask[1] == 2 && Mask[2] == -1 && Mask[3] == -1) { SDValue Op0 = MULUDQ.getOperand(0); SDValue Op1 = MULUDQ.getOperand(1); if (Op0.getOpcode() == ISD::BITCAST && Op0.getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE && Op0.getOperand(0).getValueType() == MVT::v4i32) { ShuffleVectorSDNode *SVOp0 = cast(Op0.getOperand(0)); ArrayRef Mask2 = SVOp0->getMask(); if (Mask2[0] == 0 && Mask2[1] == -1 && Mask2[2] == 1 && Mask2[3] == -1) { Op0 = SVOp0->getOperand(0); Op1 = DAG.getBitcast(MVT::v4i32, Op1); Op1 = DAG.getVectorShuffle(MVT::v4i32, dl, Op1, Op1, Mask); return DAG.getNode(ISD::MUL, dl, MVT::v4i32, Op0, Op1); } } if (Op1.getOpcode() == ISD::BITCAST && Op1.getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE && Op1.getOperand(0).getValueType() == MVT::v4i32) { ShuffleVectorSDNode *SVOp1 = cast(Op1.getOperand(0)); ArrayRef Mask2 = SVOp1->getMask(); if (Mask2[0] == 0 && Mask2[1] == -1 && Mask2[2] == 1 && Mask2[3] == -1) { Op0 = DAG.getBitcast(MVT::v4i32, Op0); Op0 = DAG.getVectorShuffle(MVT::v4i32, dl, Op0, Op0, Mask); Op1 = SVOp1->getOperand(0); return DAG.getNode(ISD::MUL, dl, MVT::v4i32, Op0, Op1); } } } } return SDValue(); } bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth) const { int NumElts = DemandedElts.getBitWidth(); unsigned Opc = Op.getOpcode(); EVT VT = Op.getValueType(); // Handle special case opcodes. switch (Opc) { case X86ISD::VSHL: case X86ISD::VSRL: case X86ISD::VSRA: { // We only need the bottom 64-bits of the (128-bit) shift amount. SDValue Amt = Op.getOperand(1); MVT AmtVT = Amt.getSimpleValueType(); assert(AmtVT.is128BitVector() && "Unexpected value type"); APInt AmtUndef, AmtZero; unsigned NumAmtElts = AmtVT.getVectorNumElements(); APInt AmtElts = APInt::getLowBitsSet(NumAmtElts, NumAmtElts / 2); if (SimplifyDemandedVectorElts(Amt, AmtElts, AmtUndef, AmtZero, TLO, Depth + 1)) return true; LLVM_FALLTHROUGH; } case X86ISD::VSHLI: case X86ISD::VSRLI: case X86ISD::VSRAI: { SDValue Src = Op.getOperand(0); APInt SrcUndef; if (SimplifyDemandedVectorElts(Src, DemandedElts, SrcUndef, KnownZero, TLO, Depth + 1)) return true; // TODO convert SrcUndef to KnownUndef. break; } case X86ISD::CVTSI2P: case X86ISD::CVTUI2P: { SDValue Src = Op.getOperand(0); MVT SrcVT = Src.getSimpleValueType(); APInt SrcUndef, SrcZero; APInt SrcElts = DemandedElts.zextOrTrunc(SrcVT.getVectorNumElements()); if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO, Depth + 1)) return true; break; } case X86ISD::PACKSS: case X86ISD::PACKUS: { APInt DemandedLHS, DemandedRHS; getPackDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS); APInt SrcUndef, SrcZero; if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, SrcUndef, SrcZero, TLO, Depth + 1)) return true; if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, SrcUndef, SrcZero, TLO, Depth + 1)) return true; break; } case X86ISD::VBROADCAST: { SDValue Src = Op.getOperand(0); MVT SrcVT = Src.getSimpleValueType(); if (!SrcVT.isVector()) return false; // Don't bother broadcasting if we just need the 0'th element. if (DemandedElts == 1) { if(Src.getValueType() != VT) Src = widenSubVector(VT.getSimpleVT(), Src, false, Subtarget, TLO.DAG, SDLoc(Op)); return TLO.CombineTo(Op, Src); } APInt SrcUndef, SrcZero; APInt SrcElts = APInt::getOneBitSet(SrcVT.getVectorNumElements(), 0); if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO, Depth + 1)) return true; break; } case X86ISD::PSHUFB: { // TODO - simplify other variable shuffle masks. SDValue Mask = Op.getOperand(1); APInt MaskUndef, MaskZero; if (SimplifyDemandedVectorElts(Mask, DemandedElts, MaskUndef, MaskZero, TLO, Depth + 1)) return true; break; } } // Simplify target shuffles. if (!isTargetShuffle(Opc) || !VT.isSimple()) return false; // Get target shuffle mask. bool IsUnary; SmallVector OpMask; SmallVector OpInputs; if (!getTargetShuffleMask(Op.getNode(), VT.getSimpleVT(), true, OpInputs, OpMask, IsUnary)) return false; // Shuffle inputs must be the same type as the result. if (llvm::any_of(OpInputs, [VT](SDValue V) { return VT != V.getValueType(); })) return false; // Clear known elts that might have been set above. KnownZero.clearAllBits(); KnownUndef.clearAllBits(); // Check if shuffle mask can be simplified to undef/zero/identity. int NumSrcs = OpInputs.size(); for (int i = 0; i != NumElts; ++i) { int &M = OpMask[i]; if (!DemandedElts[i]) M = SM_SentinelUndef; else if (0 <= M && OpInputs[M / NumElts].isUndef()) M = SM_SentinelUndef; } if (isUndefInRange(OpMask, 0, NumElts)) { KnownUndef.setAllBits(); return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); } if (isUndefOrZeroInRange(OpMask, 0, NumElts)) { KnownZero.setAllBits(); return TLO.CombineTo( Op, getZeroVector(VT.getSimpleVT(), Subtarget, TLO.DAG, SDLoc(Op))); } for (int Src = 0; Src != NumSrcs; ++Src) if (isSequentialOrUndefInRange(OpMask, 0, NumElts, Src * NumElts)) return TLO.CombineTo(Op, OpInputs[Src]); // Attempt to simplify inputs. for (int Src = 0; Src != NumSrcs; ++Src) { int Lo = Src * NumElts; APInt SrcElts = APInt::getNullValue(NumElts); for (int i = 0; i != NumElts; ++i) if (DemandedElts[i]) { int M = OpMask[i] - Lo; if (0 <= M && M < NumElts) SrcElts.setBit(M); } APInt SrcUndef, SrcZero; if (SimplifyDemandedVectorElts(OpInputs[Src], SrcElts, SrcUndef, SrcZero, TLO, Depth + 1)) return true; } // Extract known zero/undef elements. // TODO - Propagate input undef/zero elts. for (int i = 0; i != NumElts; ++i) { if (OpMask[i] == SM_SentinelUndef) KnownUndef.setBit(i); if (OpMask[i] == SM_SentinelZero) KnownZero.setBit(i); } return false; } bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( SDValue Op, const APInt &OriginalDemandedBits, const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const { EVT VT = Op.getValueType(); unsigned BitWidth = OriginalDemandedBits.getBitWidth(); unsigned Opc = Op.getOpcode(); switch(Opc) { case X86ISD::PMULDQ: case X86ISD::PMULUDQ: { // PMULDQ/PMULUDQ only uses lower 32 bits from each vector element. KnownBits KnownOp; SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); // FIXME: Can we bound this better? APInt DemandedMask = APInt::getLowBitsSet(64, 32); if (SimplifyDemandedBits(LHS, DemandedMask, KnownOp, TLO, Depth + 1)) return true; if (SimplifyDemandedBits(RHS, DemandedMask, KnownOp, TLO, Depth + 1)) return true; break; } case X86ISD::VSHLI: { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); if (auto *ShiftImm = dyn_cast(Op1)) { if (ShiftImm->getAPIntValue().uge(BitWidth)) break; unsigned ShAmt = ShiftImm->getZExtValue(); APInt DemandedMask = OriginalDemandedBits.lshr(ShAmt); // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a // single shift. We can do this if the bottom bits (which are shifted // out) are never demanded. if (Op0.getOpcode() == X86ISD::VSRLI && OriginalDemandedBits.countTrailingZeros() >= ShAmt) { if (auto *Shift2Imm = dyn_cast(Op0.getOperand(1))) { if (Shift2Imm->getAPIntValue().ult(BitWidth)) { int Diff = ShAmt - Shift2Imm->getZExtValue(); if (Diff == 0) return TLO.CombineTo(Op, Op0.getOperand(0)); unsigned NewOpc = Diff < 0 ? X86ISD::VSRLI : X86ISD::VSHLI; SDValue NewShift = TLO.DAG.getNode( NewOpc, SDLoc(Op), VT, Op0.getOperand(0), TLO.DAG.getConstant(std::abs(Diff), SDLoc(Op), MVT::i8)); return TLO.CombineTo(Op, NewShift); } } } if (SimplifyDemandedBits(Op0, DemandedMask, OriginalDemandedElts, Known, TLO, Depth + 1)) return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known.Zero <<= ShAmt; Known.One <<= ShAmt; // Low bits known zero. Known.Zero.setLowBits(ShAmt); } break; } case X86ISD::VSRLI: { if (auto *ShiftImm = dyn_cast(Op.getOperand(1))) { if (ShiftImm->getAPIntValue().uge(BitWidth)) break; unsigned ShAmt = ShiftImm->getZExtValue(); APInt DemandedMask = OriginalDemandedBits << ShAmt; if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask, OriginalDemandedElts, Known, TLO, Depth + 1)) return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known.Zero.lshrInPlace(ShAmt); Known.One.lshrInPlace(ShAmt); // High bits known zero. Known.Zero.setHighBits(ShAmt); } break; } case X86ISD::VSRAI: { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); if (auto *ShiftImm = dyn_cast(Op1)) { if (ShiftImm->getAPIntValue().uge(BitWidth)) break; unsigned ShAmt = ShiftImm->getZExtValue(); APInt DemandedMask = OriginalDemandedBits << ShAmt; // If we just want the sign bit then we don't need to shift it. if (OriginalDemandedBits.isSignMask()) return TLO.CombineTo(Op, Op0); // fold (VSRAI (VSHLI X, C1), C1) --> X iff NumSignBits(X) > C1 if (Op0.getOpcode() == X86ISD::VSHLI && Op1 == Op0.getOperand(1)) { SDValue Op00 = Op0.getOperand(0); unsigned NumSignBits = TLO.DAG.ComputeNumSignBits(Op00, OriginalDemandedElts); if (ShAmt < NumSignBits) return TLO.CombineTo(Op, Op00); } // If any of the demanded bits are produced by the sign extension, we also // demand the input sign bit. if (OriginalDemandedBits.countLeadingZeros() < ShAmt) DemandedMask.setSignBit(); if (SimplifyDemandedBits(Op0, DemandedMask, OriginalDemandedElts, Known, TLO, Depth + 1)) return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known.Zero.lshrInPlace(ShAmt); Known.One.lshrInPlace(ShAmt); // If the input sign bit is known to be zero, or if none of the top bits // are demanded, turn this into an unsigned shift right. if (Known.Zero[BitWidth - ShAmt - 1] || OriginalDemandedBits.countLeadingZeros() >= ShAmt) return TLO.CombineTo( Op, TLO.DAG.getNode(X86ISD::VSRLI, SDLoc(Op), VT, Op0, Op1)); // High bits are known one. if (Known.One[BitWidth - ShAmt - 1]) Known.One.setHighBits(ShAmt); } break; } case X86ISD::MOVMSK: { SDValue Src = Op.getOperand(0); MVT SrcVT = Src.getSimpleValueType(); unsigned SrcBits = SrcVT.getScalarSizeInBits(); unsigned NumElts = SrcVT.getVectorNumElements(); // If we don't need the sign bits at all just return zero. if (OriginalDemandedBits.countTrailingZeros() >= NumElts) return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT)); // Only demand the vector elements of the sign bits we need. APInt KnownUndef, KnownZero; APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts); if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero, TLO, Depth + 1)) return true; Known.Zero = KnownZero.zextOrSelf(BitWidth); Known.Zero.setHighBits(BitWidth - NumElts); // MOVMSK only uses the MSB from each vector element. KnownBits KnownSrc; if (SimplifyDemandedBits(Src, APInt::getSignMask(SrcBits), DemandedElts, KnownSrc, TLO, Depth + 1)) return true; if (KnownSrc.One[SrcBits - 1]) Known.One.setLowBits(NumElts); else if (KnownSrc.Zero[SrcBits - 1]) Known.Zero.setLowBits(NumElts); return false; } } return TargetLowering::SimplifyDemandedBitsForTargetNode( Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth); } /// Check if a vector extract from a target-specific shuffle of a load can be /// folded into a single element load. /// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but /// shuffles have been custom lowered so we need to handle those here. static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { if (DCI.isBeforeLegalizeOps()) return SDValue(); SDValue InVec = N->getOperand(0); SDValue EltNo = N->getOperand(1); EVT EltVT = N->getValueType(0); if (!isa(EltNo)) return SDValue(); EVT OriginalVT = InVec.getValueType(); // Peek through bitcasts, don't duplicate a load with other uses. InVec = peekThroughOneUseBitcasts(InVec); EVT CurrentVT = InVec.getValueType(); if (!CurrentVT.isVector() || CurrentVT.getVectorNumElements() != OriginalVT.getVectorNumElements()) return SDValue(); if (!isTargetShuffle(InVec.getOpcode())) return SDValue(); // Don't duplicate a load with other uses. if (!InVec.hasOneUse()) return SDValue(); SmallVector ShuffleMask; SmallVector ShuffleOps; bool UnaryShuffle; if (!getTargetShuffleMask(InVec.getNode(), CurrentVT.getSimpleVT(), true, ShuffleOps, ShuffleMask, UnaryShuffle)) return SDValue(); // Select the input vector, guarding against out of range extract vector. unsigned NumElems = CurrentVT.getVectorNumElements(); int Elt = cast(EltNo)->getZExtValue(); int Idx = (Elt > (int)NumElems) ? SM_SentinelUndef : ShuffleMask[Elt]; if (Idx == SM_SentinelZero) return EltVT.isInteger() ? DAG.getConstant(0, SDLoc(N), EltVT) : DAG.getConstantFP(+0.0, SDLoc(N), EltVT); if (Idx == SM_SentinelUndef) return DAG.getUNDEF(EltVT); // Bail if any mask element is SM_SentinelZero - getVectorShuffle below // won't handle it. if (llvm::any_of(ShuffleMask, [](int M) { return M == SM_SentinelZero; })) return SDValue(); assert(0 <= Idx && Idx < (int)(2 * NumElems) && "Shuffle index out of range"); SDValue LdNode = (Idx < (int)NumElems) ? ShuffleOps[0] : ShuffleOps[1]; // If inputs to shuffle are the same for both ops, then allow 2 uses unsigned AllowedUses = (ShuffleOps.size() > 1 && ShuffleOps[0] == ShuffleOps[1]) ? 2 : 1; if (LdNode.getOpcode() == ISD::BITCAST) { // Don't duplicate a load with other uses. if (!LdNode.getNode()->hasNUsesOfValue(AllowedUses, 0)) return SDValue(); AllowedUses = 1; // only allow 1 load use if we have a bitcast LdNode = LdNode.getOperand(0); } if (!ISD::isNormalLoad(LdNode.getNode())) return SDValue(); LoadSDNode *LN0 = cast(LdNode); if (!LN0 ||!LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile()) return SDValue(); // If there's a bitcast before the shuffle, check if the load type and // alignment is valid. unsigned Align = LN0->getAlignment(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment( EltVT.getTypeForEVT(*DAG.getContext())); if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, EltVT)) return SDValue(); // All checks match so transform back to vector_shuffle so that DAG combiner // can finish the job SDLoc dl(N); // Create shuffle node taking into account the case that its a unary shuffle SDValue Shuffle = (UnaryShuffle) ? DAG.getUNDEF(CurrentVT) : ShuffleOps[1]; Shuffle = DAG.getVectorShuffle(CurrentVT, dl, ShuffleOps[0], Shuffle, ShuffleMask); Shuffle = DAG.getBitcast(OriginalVT, Shuffle); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), Shuffle, EltNo); } // Try to match patterns such as // (i16 bitcast (v16i1 x)) // -> // (i16 movmsk (16i8 sext (v16i1 x))) // before the illegal vector is scalarized on subtargets that don't have legal // vxi1 types. static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast, const X86Subtarget &Subtarget) { EVT VT = BitCast.getValueType(); SDValue N0 = BitCast.getOperand(0); EVT VecVT = N0->getValueType(0); if (!VT.isScalarInteger() || !VecVT.isSimple()) return SDValue(); // If the input is a truncate from v16i8 or v32i8 go ahead and use a // movmskb even with avx512. This will be better than truncating to vXi1 and // using a kmov. This can especially help KNL if the input is a v16i8/v32i8 // vpcmpeqb/vpcmpgtb. bool IsTruncated = N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && (N0.getOperand(0).getValueType() == MVT::v16i8 || N0.getOperand(0).getValueType() == MVT::v32i8 || N0.getOperand(0).getValueType() == MVT::v64i8); // With AVX512 vxi1 types are legal and we prefer using k-regs. // MOVMSK is supported in SSE2 or later. if (!Subtarget.hasSSE2() || (Subtarget.hasAVX512() && !IsTruncated)) return SDValue(); // There are MOVMSK flavors for types v16i8, v32i8, v4f32, v8f32, v4f64 and // v8f64. So all legal 128-bit and 256-bit vectors are covered except for // v8i16 and v16i16. // For these two cases, we can shuffle the upper element bytes to a // consecutive sequence at the start of the vector and treat the results as // v16i8 or v32i8, and for v16i8 this is the preferable solution. However, // for v16i16 this is not the case, because the shuffle is expensive, so we // avoid sign-extending to this type entirely. // For example, t0 := (v8i16 sext(v8i1 x)) needs to be shuffled as: // (v16i8 shuffle <0,2,4,6,8,10,12,14,u,u,...,u> (v16i8 bitcast t0), undef) MVT SExtVT; switch (VecVT.getSimpleVT().SimpleTy) { default: return SDValue(); case MVT::v2i1: SExtVT = MVT::v2i64; break; case MVT::v4i1: SExtVT = MVT::v4i32; // For cases such as (i4 bitcast (v4i1 setcc v4i64 v1, v2)) // sign-extend to a 256-bit operation to avoid truncation. if (N0->getOpcode() == ISD::SETCC && Subtarget.hasAVX() && N0->getOperand(0).getValueType().is256BitVector()) { SExtVT = MVT::v4i64; } break; case MVT::v8i1: SExtVT = MVT::v8i16; // For cases such as (i8 bitcast (v8i1 setcc v8i32 v1, v2)), // sign-extend to a 256-bit operation to match the compare. // If the setcc operand is 128-bit, prefer sign-extending to 128-bit over // 256-bit because the shuffle is cheaper than sign extending the result of // the compare. if (N0->getOpcode() == ISD::SETCC && Subtarget.hasAVX() && (N0->getOperand(0).getValueType().is256BitVector() || N0->getOperand(0).getValueType().is512BitVector())) { SExtVT = MVT::v8i32; } break; case MVT::v16i1: SExtVT = MVT::v16i8; // For the case (i16 bitcast (v16i1 setcc v16i16 v1, v2)), // it is not profitable to sign-extend to 256-bit because this will // require an extra cross-lane shuffle which is more expensive than // truncating the result of the compare to 128-bits. break; case MVT::v32i1: SExtVT = MVT::v32i8; break; case MVT::v64i1: // If we have AVX512F, but not AVX512BW and the input is truncated from // v64i8 checked earlier. Then split the input and make two pmovmskbs. if (Subtarget.hasAVX512() && !Subtarget.hasBWI()) { SExtVT = MVT::v64i8; break; } return SDValue(); }; SDLoc DL(BitCast); SDValue V = DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, N0); if (SExtVT == MVT::v64i8) { SDValue Lo, Hi; std::tie(Lo, Hi) = DAG.SplitVector(V, DL); Lo = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Lo); Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Lo); Hi = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Hi); Hi = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Hi); Hi = DAG.getNode(ISD::SHL, DL, MVT::i64, Hi, DAG.getConstant(32, DL, MVT::i8)); V = DAG.getNode(ISD::OR, DL, MVT::i64, Lo, Hi); } else if (SExtVT == MVT::v16i8 || SExtVT == MVT::v32i8) { V = getPMOVMSKB(DL, V, DAG, Subtarget); } else { if (SExtVT == MVT::v8i16) V = DAG.getNode(X86ISD::PACKSS, DL, MVT::v16i8, V, DAG.getUNDEF(MVT::v8i16)); V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V); } return DAG.getZExtOrTrunc(V, DL, VT); } // Convert a vXi1 constant build vector to the same width scalar integer. static SDValue combinevXi1ConstantToInteger(SDValue Op, SelectionDAG &DAG) { EVT SrcVT = Op.getValueType(); assert(SrcVT.getVectorElementType() == MVT::i1 && "Expected a vXi1 vector"); assert(ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && "Expected a constant build vector"); APInt Imm(SrcVT.getVectorNumElements(), 0); for (unsigned Idx = 0, e = Op.getNumOperands(); Idx < e; ++Idx) { SDValue In = Op.getOperand(Idx); if (!In.isUndef() && (cast(In)->getZExtValue() & 0x1)) Imm.setBit(Idx); } EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), Imm.getBitWidth()); return DAG.getConstant(Imm, SDLoc(Op), IntVT); } static SDValue combineCastedMaskArithmetic(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { assert(N->getOpcode() == ISD::BITCAST && "Expected a bitcast"); if (!DCI.isBeforeLegalizeOps()) return SDValue(); // Only do this if we have k-registers. if (!Subtarget.hasAVX512()) return SDValue(); EVT DstVT = N->getValueType(0); SDValue Op = N->getOperand(0); EVT SrcVT = Op.getValueType(); if (!Op.hasOneUse()) return SDValue(); // Look for logic ops. if (Op.getOpcode() != ISD::AND && Op.getOpcode() != ISD::OR && Op.getOpcode() != ISD::XOR) return SDValue(); // Make sure we have a bitcast between mask registers and a scalar type. if (!(SrcVT.isVector() && SrcVT.getVectorElementType() == MVT::i1 && DstVT.isScalarInteger()) && !(DstVT.isVector() && DstVT.getVectorElementType() == MVT::i1 && SrcVT.isScalarInteger())) return SDValue(); SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); if (LHS.hasOneUse() && LHS.getOpcode() == ISD::BITCAST && LHS.getOperand(0).getValueType() == DstVT) return DAG.getNode(Op.getOpcode(), SDLoc(N), DstVT, LHS.getOperand(0), DAG.getBitcast(DstVT, RHS)); if (RHS.hasOneUse() && RHS.getOpcode() == ISD::BITCAST && RHS.getOperand(0).getValueType() == DstVT) return DAG.getNode(Op.getOpcode(), SDLoc(N), DstVT, DAG.getBitcast(DstVT, LHS), RHS.getOperand(0)); // If the RHS is a vXi1 build vector, this is a good reason to flip too. // Most of these have to move a constant from the scalar domain anyway. if (ISD::isBuildVectorOfConstantSDNodes(RHS.getNode())) { RHS = combinevXi1ConstantToInteger(RHS, DAG); return DAG.getNode(Op.getOpcode(), SDLoc(N), DstVT, DAG.getBitcast(DstVT, LHS), RHS); } return SDValue(); } static SDValue createMMXBuildVector(SDValue N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { SDLoc DL(N); unsigned NumElts = N.getNumOperands(); auto *BV = cast(N); SDValue Splat = BV->getSplatValue(); // Build MMX element from integer GPR or SSE float values. auto CreateMMXElement = [&](SDValue V) { if (V.isUndef()) return DAG.getUNDEF(MVT::x86mmx); if (V.getValueType().isFloatingPoint()) { if (Subtarget.hasSSE1() && !isa(V)) { V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4f32, V); V = DAG.getBitcast(MVT::v2i64, V); return DAG.getNode(X86ISD::MOVDQ2Q, DL, MVT::x86mmx, V); } V = DAG.getBitcast(MVT::i32, V); } else { V = DAG.getAnyExtOrTrunc(V, DL, MVT::i32); } return DAG.getNode(X86ISD::MMX_MOVW2D, DL, MVT::x86mmx, V); }; // Convert build vector ops to MMX data in the bottom elements. SmallVector Ops; // Broadcast - use (PUNPCKL+)PSHUFW to broadcast single element. if (Splat) { if (Splat.isUndef()) return DAG.getUNDEF(MVT::x86mmx); Splat = CreateMMXElement(Splat); if (Subtarget.hasSSE1()) { // Unpack v8i8 to splat i8 elements to lowest 16-bits. if (NumElts == 8) Splat = DAG.getNode( ISD::INTRINSIC_WO_CHAIN, DL, MVT::x86mmx, DAG.getConstant(Intrinsic::x86_mmx_punpcklbw, DL, MVT::i32), Splat, Splat); // Use PSHUFW to repeat 16-bit elements. unsigned ShufMask = (NumElts > 2 ? 0 : 0x44); return DAG.getNode( ISD::INTRINSIC_WO_CHAIN, DL, MVT::x86mmx, DAG.getConstant(Intrinsic::x86_sse_pshuf_w, DL, MVT::i32), Splat, DAG.getConstant(ShufMask, DL, MVT::i8)); } Ops.append(NumElts, Splat); } else { for (unsigned i = 0; i != NumElts; ++i) Ops.push_back(CreateMMXElement(N.getOperand(i))); } // Use tree of PUNPCKLs to build up general MMX vector. while (Ops.size() > 1) { unsigned NumOps = Ops.size(); unsigned IntrinOp = (NumOps == 2 ? Intrinsic::x86_mmx_punpckldq : (NumOps == 4 ? Intrinsic::x86_mmx_punpcklwd : Intrinsic::x86_mmx_punpcklbw)); SDValue Intrin = DAG.getConstant(IntrinOp, DL, MVT::i32); for (unsigned i = 0; i != NumOps; i += 2) Ops[i / 2] = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::x86mmx, Intrin, Ops[i], Ops[i + 1]); Ops.resize(NumOps / 2); } return Ops[0]; } static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); EVT SrcVT = N0.getValueType(); // Try to match patterns such as // (i16 bitcast (v16i1 x)) // -> // (i16 movmsk (16i8 sext (v16i1 x))) // before the setcc result is scalarized on subtargets that don't have legal // vxi1 types. if (DCI.isBeforeLegalize()) { if (SDValue V = combineBitcastvxi1(DAG, SDValue(N, 0), Subtarget)) return V; // If this is a bitcast between a MVT::v4i1/v2i1 and an illegal integer // type, widen both sides to avoid a trip through memory. if ((VT == MVT::v4i1 || VT == MVT::v2i1) && SrcVT.isScalarInteger() && Subtarget.hasAVX512()) { SDLoc dl(N); N0 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i8, N0); N0 = DAG.getBitcast(MVT::v8i1, N0); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, N0, DAG.getIntPtrConstant(0, dl)); } // If this is a bitcast between a MVT::v4i1/v2i1 and an illegal integer // type, widen both sides to avoid a trip through memory. if ((SrcVT == MVT::v4i1 || SrcVT == MVT::v2i1) && VT.isScalarInteger() && Subtarget.hasAVX512()) { SDLoc dl(N); unsigned NumConcats = 8 / SrcVT.getVectorNumElements(); SmallVector Ops(NumConcats, DAG.getUNDEF(SrcVT)); Ops[0] = N0; N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops); N0 = DAG.getBitcast(MVT::i8, N0); return DAG.getNode(ISD::TRUNCATE, dl, VT, N0); } } // Since MMX types are special and don't usually play with other vector types, // it's better to handle them early to be sure we emit efficient code by // avoiding store-load conversions. if (VT == MVT::x86mmx) { // Detect MMX constant vectors. APInt UndefElts; SmallVector EltBits; if (getTargetConstantBitsFromNode(N0, 64, UndefElts, EltBits)) { SDLoc DL(N0); // Handle zero-extension of i32 with MOVD. if (EltBits[0].countLeadingZeros() >= 32) return DAG.getNode(X86ISD::MMX_MOVW2D, DL, VT, DAG.getConstant(EltBits[0].trunc(32), DL, MVT::i32)); // Else, bitcast to a double. // TODO - investigate supporting sext 32-bit immediates on x86_64. APFloat F64(APFloat::IEEEdouble(), EltBits[0]); return DAG.getBitcast(VT, DAG.getConstantFP(F64, DL, MVT::f64)); } // Detect bitcasts to x86mmx low word. if (N0.getOpcode() == ISD::BUILD_VECTOR && (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8) && N0.getOperand(0).getValueType() == SrcVT.getScalarType()) { bool LowUndef = true, AllUndefOrZero = true; for (unsigned i = 1, e = SrcVT.getVectorNumElements(); i != e; ++i) { SDValue Op = N0.getOperand(i); LowUndef &= Op.isUndef() || (i >= e/2); AllUndefOrZero &= (Op.isUndef() || isNullConstant(Op)); } if (AllUndefOrZero) { SDValue N00 = N0.getOperand(0); SDLoc dl(N00); N00 = LowUndef ? DAG.getAnyExtOrTrunc(N00, dl, MVT::i32) : DAG.getZExtOrTrunc(N00, dl, MVT::i32); return DAG.getNode(X86ISD::MMX_MOVW2D, dl, VT, N00); } } // Detect bitcasts of 64-bit build vectors and convert to a // MMX UNPCK/PSHUFW which takes MMX type inputs with the value in the // lowest element. if (N0.getOpcode() == ISD::BUILD_VECTOR && (SrcVT == MVT::v2f32 || SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8)) return createMMXBuildVector(N0, DAG, Subtarget); // Detect bitcasts between element or subvector extraction to x86mmx. if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT || N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) && isNullConstant(N0.getOperand(1))) { SDValue N00 = N0.getOperand(0); if (N00.getValueType().is128BitVector()) return DAG.getNode(X86ISD::MOVDQ2Q, SDLoc(N00), VT, DAG.getBitcast(MVT::v2i64, N00)); } // Detect bitcasts from FP_TO_SINT to x86mmx. if (SrcVT == MVT::v2i32 && N0.getOpcode() == ISD::FP_TO_SINT) { SDLoc DL(N0); SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4i32, N0, DAG.getUNDEF(MVT::v2i32)); return DAG.getNode(X86ISD::MOVDQ2Q, DL, VT, DAG.getBitcast(MVT::v2i64, Res)); } } // Try to remove a bitcast of constant vXi1 vector. We have to legalize // most of these to scalar anyway. if (Subtarget.hasAVX512() && VT.isScalarInteger() && SrcVT.isVector() && SrcVT.getVectorElementType() == MVT::i1 && ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { return combinevXi1ConstantToInteger(N0, DAG); } if (Subtarget.hasAVX512() && SrcVT.isScalarInteger() && VT.isVector() && VT.getVectorElementType() == MVT::i1 && isa(N0)) { auto *C = cast(N0); if (C->isAllOnesValue()) return DAG.getConstant(1, SDLoc(N0), VT); if (C->isNullValue()) return DAG.getConstant(0, SDLoc(N0), VT); } // Try to remove bitcasts from input and output of mask arithmetic to // remove GPR<->K-register crossings. if (SDValue V = combineCastedMaskArithmetic(N, DAG, DCI, Subtarget)) return V; // Convert a bitcasted integer logic operation that has one bitcasted // floating-point operand into a floating-point logic operation. This may // create a load of a constant, but that is cheaper than materializing the // constant in an integer register and transferring it to an SSE register or // transferring the SSE operand to integer register and back. unsigned FPOpcode; switch (N0.getOpcode()) { case ISD::AND: FPOpcode = X86ISD::FAND; break; case ISD::OR: FPOpcode = X86ISD::FOR; break; case ISD::XOR: FPOpcode = X86ISD::FXOR; break; default: return SDValue(); } if (!((Subtarget.hasSSE1() && VT == MVT::f32) || (Subtarget.hasSSE2() && VT == MVT::f64))) return SDValue(); SDValue LogicOp0 = N0.getOperand(0); SDValue LogicOp1 = N0.getOperand(1); SDLoc DL0(N0); // bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y)) if (N0.hasOneUse() && LogicOp0.getOpcode() == ISD::BITCAST && LogicOp0.hasOneUse() && LogicOp0.getOperand(0).getValueType() == VT && !isa(LogicOp0.getOperand(0))) { SDValue CastedOp1 = DAG.getBitcast(VT, LogicOp1); return DAG.getNode(FPOpcode, DL0, VT, LogicOp0.getOperand(0), CastedOp1); } // bitcast(logic(X, bitcast(Y))) --> logic'(bitcast(X), Y) if (N0.hasOneUse() && LogicOp1.getOpcode() == ISD::BITCAST && LogicOp1.hasOneUse() && LogicOp1.getOperand(0).getValueType() == VT && !isa(LogicOp1.getOperand(0))) { SDValue CastedOp0 = DAG.getBitcast(VT, LogicOp0); return DAG.getNode(FPOpcode, DL0, VT, LogicOp1.getOperand(0), CastedOp0); } return SDValue(); } // Given a select, detect the following pattern: // 1: %2 = zext %0 to // 2: %3 = zext %1 to // 3: %4 = sub nsw %2, %3 // 4: %5 = icmp sgt %4, [0 x N] or [-1 x N] // 5: %6 = sub nsw zeroinitializer, %4 // 6: %7 = select %5, %4, %6 // This is useful as it is the input into a SAD pattern. static bool detectZextAbsDiff(const SDValue &Select, SDValue &Op0, SDValue &Op1) { // Check the condition of the select instruction is greater-than. SDValue SetCC = Select->getOperand(0); if (SetCC.getOpcode() != ISD::SETCC) return false; ISD::CondCode CC = cast(SetCC.getOperand(2))->get(); if (CC != ISD::SETGT && CC != ISD::SETLT) return false; SDValue SelectOp1 = Select->getOperand(1); SDValue SelectOp2 = Select->getOperand(2); // The following instructions assume SelectOp1 is the subtraction operand // and SelectOp2 is the negation operand. // In the case of SETLT this is the other way around. if (CC == ISD::SETLT) std::swap(SelectOp1, SelectOp2); // The second operand of the select should be the negation of the first // operand, which is implemented as 0 - SelectOp1. if (!(SelectOp2.getOpcode() == ISD::SUB && ISD::isBuildVectorAllZeros(SelectOp2.getOperand(0).getNode()) && SelectOp2.getOperand(1) == SelectOp1)) return false; // The first operand of SetCC is the first operand of the select, which is the // difference between the two input vectors. if (SetCC.getOperand(0) != SelectOp1) return false; // In SetLT case, The second operand of the comparison can be either 1 or 0. APInt SplatVal; if ((CC == ISD::SETLT) && !((ISD::isConstantSplatVector(SetCC.getOperand(1).getNode(), SplatVal) && SplatVal.isOneValue()) || (ISD::isBuildVectorAllZeros(SetCC.getOperand(1).getNode())))) return false; // In SetGT case, The second operand of the comparison can be either -1 or 0. if ((CC == ISD::SETGT) && !(ISD::isBuildVectorAllZeros(SetCC.getOperand(1).getNode()) || ISD::isBuildVectorAllOnes(SetCC.getOperand(1).getNode()))) return false; // The first operand of the select is the difference between the two input // vectors. if (SelectOp1.getOpcode() != ISD::SUB) return false; Op0 = SelectOp1.getOperand(0); Op1 = SelectOp1.getOperand(1); // Check if the operands of the sub are zero-extended from vectors of i8. if (Op0.getOpcode() != ISD::ZERO_EXTEND || Op0.getOperand(0).getValueType().getVectorElementType() != MVT::i8 || Op1.getOpcode() != ISD::ZERO_EXTEND || Op1.getOperand(0).getValueType().getVectorElementType() != MVT::i8) return false; return true; } // Given two zexts of to , create a PSADBW of the inputs // to these zexts. static SDValue createPSADBW(SelectionDAG &DAG, const SDValue &Zext0, const SDValue &Zext1, const SDLoc &DL, const X86Subtarget &Subtarget) { // Find the appropriate width for the PSADBW. EVT InVT = Zext0.getOperand(0).getValueType(); unsigned RegSize = std::max(128u, InVT.getSizeInBits()); // "Zero-extend" the i8 vectors. This is not a per-element zext, rather we // fill in the missing vector elements with 0. unsigned NumConcat = RegSize / InVT.getSizeInBits(); SmallVector Ops(NumConcat, DAG.getConstant(0, DL, InVT)); Ops[0] = Zext0.getOperand(0); MVT ExtendedVT = MVT::getVectorVT(MVT::i8, RegSize / 8); SDValue SadOp0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, ExtendedVT, Ops); Ops[0] = Zext1.getOperand(0); SDValue SadOp1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, ExtendedVT, Ops); // Actually build the SAD, split as 128/256/512 bits for SSE/AVX2/AVX512BW. auto PSADBWBuilder = [](SelectionDAG &DAG, const SDLoc &DL, ArrayRef Ops) { MVT VT = MVT::getVectorVT(MVT::i64, Ops[0].getValueSizeInBits() / 64); return DAG.getNode(X86ISD::PSADBW, DL, VT, Ops); }; MVT SadVT = MVT::getVectorVT(MVT::i64, RegSize / 64); return SplitOpsAndApply(DAG, Subtarget, DL, SadVT, { SadOp0, SadOp1 }, PSADBWBuilder); } // Attempt to replace an min/max v8i16/v16i8 horizontal reduction with // PHMINPOSUW. static SDValue combineHorizontalMinMaxResult(SDNode *Extract, SelectionDAG &DAG, const X86Subtarget &Subtarget) { // Bail without SSE41. if (!Subtarget.hasSSE41()) return SDValue(); EVT ExtractVT = Extract->getValueType(0); if (ExtractVT != MVT::i16 && ExtractVT != MVT::i8) return SDValue(); // Check for SMAX/SMIN/UMAX/UMIN horizontal reduction patterns. ISD::NodeType BinOp; SDValue Src = DAG.matchBinOpReduction( Extract, BinOp, {ISD::SMAX, ISD::SMIN, ISD::UMAX, ISD::UMIN}); if (!Src) return SDValue(); EVT SrcVT = Src.getValueType(); EVT SrcSVT = SrcVT.getScalarType(); if (SrcSVT != ExtractVT || (SrcVT.getSizeInBits() % 128) != 0) return SDValue(); SDLoc DL(Extract); SDValue MinPos = Src; // First, reduce the source down to 128-bit, applying BinOp to lo/hi. while (SrcVT.getSizeInBits() > 128) { unsigned NumElts = SrcVT.getVectorNumElements(); unsigned NumSubElts = NumElts / 2; SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcSVT, NumSubElts); unsigned SubSizeInBits = SrcVT.getSizeInBits(); SDValue Lo = extractSubVector(MinPos, 0, DAG, DL, SubSizeInBits); SDValue Hi = extractSubVector(MinPos, NumSubElts, DAG, DL, SubSizeInBits); MinPos = DAG.getNode(BinOp, DL, SrcVT, Lo, Hi); } assert(((SrcVT == MVT::v8i16 && ExtractVT == MVT::i16) || (SrcVT == MVT::v16i8 && ExtractVT == MVT::i8)) && "Unexpected value type"); // PHMINPOSUW applies to UMIN(v8i16), for SMIN/SMAX/UMAX we must apply a mask // to flip the value accordingly. SDValue Mask; unsigned MaskEltsBits = ExtractVT.getSizeInBits(); if (BinOp == ISD::SMAX) Mask = DAG.getConstant(APInt::getSignedMaxValue(MaskEltsBits), DL, SrcVT); else if (BinOp == ISD::SMIN) Mask = DAG.getConstant(APInt::getSignedMinValue(MaskEltsBits), DL, SrcVT); else if (BinOp == ISD::UMAX) Mask = DAG.getConstant(APInt::getAllOnesValue(MaskEltsBits), DL, SrcVT); if (Mask) MinPos = DAG.getNode(ISD::XOR, DL, SrcVT, Mask, MinPos); // For v16i8 cases we need to perform UMIN on pairs of byte elements, // shuffling each upper element down and insert zeros. This means that the // v16i8 UMIN will leave the upper element as zero, performing zero-extension // ready for the PHMINPOS. if (ExtractVT == MVT::i8) { SDValue Upper = DAG.getVectorShuffle( SrcVT, DL, MinPos, DAG.getConstant(0, DL, MVT::v16i8), {1, 16, 3, 16, 5, 16, 7, 16, 9, 16, 11, 16, 13, 16, 15, 16}); MinPos = DAG.getNode(ISD::UMIN, DL, SrcVT, MinPos, Upper); } // Perform the PHMINPOS on a v8i16 vector, MinPos = DAG.getBitcast(MVT::v8i16, MinPos); MinPos = DAG.getNode(X86ISD::PHMINPOS, DL, MVT::v8i16, MinPos); MinPos = DAG.getBitcast(SrcVT, MinPos); if (Mask) MinPos = DAG.getNode(ISD::XOR, DL, SrcVT, Mask, MinPos); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtractVT, MinPos, DAG.getIntPtrConstant(0, DL)); } // Attempt to replace an all_of/any_of style horizontal reduction with a MOVMSK. static SDValue combineHorizontalPredicateResult(SDNode *Extract, SelectionDAG &DAG, const X86Subtarget &Subtarget) { // Bail without SSE2 or with AVX512VL (which uses predicate registers). if (!Subtarget.hasSSE2() || Subtarget.hasVLX()) return SDValue(); EVT ExtractVT = Extract->getValueType(0); unsigned BitWidth = ExtractVT.getSizeInBits(); if (ExtractVT != MVT::i64 && ExtractVT != MVT::i32 && ExtractVT != MVT::i16 && ExtractVT != MVT::i8) return SDValue(); // Check for OR(any_of) and AND(all_of) horizontal reduction patterns. ISD::NodeType BinOp; SDValue Match = DAG.matchBinOpReduction(Extract, BinOp, {ISD::OR, ISD::AND}); if (!Match) return SDValue(); // EXTRACT_VECTOR_ELT can require implicit extension of the vector element // which we can't support here for now. if (Match.getScalarValueSizeInBits() != BitWidth) return SDValue(); // We require AVX2 for PMOVMSKB for v16i16/v32i8; unsigned MatchSizeInBits = Match.getValueSizeInBits(); if (!(MatchSizeInBits == 128 || (MatchSizeInBits == 256 && ((Subtarget.hasAVX() && BitWidth >= 32) || Subtarget.hasAVX2())))) return SDValue(); // Don't bother performing this for 2-element vectors. if (Match.getValueType().getVectorNumElements() <= 2) return SDValue(); // Check that we are extracting a reduction of all sign bits. if (DAG.ComputeNumSignBits(Match) != BitWidth) return SDValue(); // For 32/64 bit comparisons use MOVMSKPS/MOVMSKPD, else PMOVMSKB. MVT MaskVT; if (64 == BitWidth || 32 == BitWidth) MaskVT = MVT::getVectorVT(MVT::getFloatingPointVT(BitWidth), MatchSizeInBits / BitWidth); else MaskVT = MVT::getVectorVT(MVT::i8, MatchSizeInBits / 8); APInt CompareBits; ISD::CondCode CondCode; if (BinOp == ISD::OR) { // any_of -> MOVMSK != 0 CompareBits = APInt::getNullValue(32); CondCode = ISD::CondCode::SETNE; } else { // all_of -> MOVMSK == ((1 << NumElts) - 1) CompareBits = APInt::getLowBitsSet(32, MaskVT.getVectorNumElements()); CondCode = ISD::CondCode::SETEQ; } // Perform the select as i32/i64 and then truncate to avoid partial register // stalls. unsigned ResWidth = std::max(BitWidth, 32u); EVT ResVT = EVT::getIntegerVT(*DAG.getContext(), ResWidth); SDLoc DL(Extract); SDValue Zero = DAG.getConstant(0, DL, ResVT); SDValue Ones = DAG.getAllOnesConstant(DL, ResVT); SDValue Res = DAG.getBitcast(MaskVT, Match); Res = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Res); Res = DAG.getSelectCC(DL, Res, DAG.getConstant(CompareBits, DL, MVT::i32), Ones, Zero, CondCode); return DAG.getSExtOrTrunc(Res, DL, ExtractVT); } static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG, const X86Subtarget &Subtarget) { // PSADBW is only supported on SSE2 and up. if (!Subtarget.hasSSE2()) return SDValue(); // Verify the type we're extracting from is any integer type above i16. EVT VT = Extract->getOperand(0).getValueType(); if (!VT.isSimple() || !(VT.getVectorElementType().getSizeInBits() > 16)) return SDValue(); unsigned RegSize = 128; if (Subtarget.useBWIRegs()) RegSize = 512; else if (Subtarget.hasAVX()) RegSize = 256; // We handle upto v16i* for SSE2 / v32i* for AVX / v64i* for AVX512. // TODO: We should be able to handle larger vectors by splitting them before // feeding them into several SADs, and then reducing over those. if (RegSize / VT.getVectorNumElements() < 8) return SDValue(); // Match shuffle + add pyramid. ISD::NodeType BinOp; SDValue Root = DAG.matchBinOpReduction(Extract, BinOp, {ISD::ADD}); // The operand is expected to be zero extended from i8 // (verified in detectZextAbsDiff). // In order to convert to i64 and above, additional any/zero/sign // extend is expected. // The zero extend from 32 bit has no mathematical effect on the result. // Also the sign extend is basically zero extend // (extends the sign bit which is zero). // So it is correct to skip the sign/zero extend instruction. if (Root && (Root.getOpcode() == ISD::SIGN_EXTEND || Root.getOpcode() == ISD::ZERO_EXTEND || Root.getOpcode() == ISD::ANY_EXTEND)) Root = Root.getOperand(0); // If there was a match, we want Root to be a select that is the root of an // abs-diff pattern. if (!Root || (Root.getOpcode() != ISD::VSELECT)) return SDValue(); // Check whether we have an abs-diff pattern feeding into the select. SDValue Zext0, Zext1; if (!detectZextAbsDiff(Root, Zext0, Zext1)) return SDValue(); // Create the SAD instruction. SDLoc DL(Extract); SDValue SAD = createPSADBW(DAG, Zext0, Zext1, DL, Subtarget); // If the original vector was wider than 8 elements, sum over the results // in the SAD vector. unsigned Stages = Log2_32(VT.getVectorNumElements()); MVT SadVT = SAD.getSimpleValueType(); if (Stages > 3) { unsigned SadElems = SadVT.getVectorNumElements(); for(unsigned i = Stages - 3; i > 0; --i) { SmallVector Mask(SadElems, -1); for(unsigned j = 0, MaskEnd = 1 << (i - 1); j < MaskEnd; ++j) Mask[j] = MaskEnd + j; SDValue Shuffle = DAG.getVectorShuffle(SadVT, DL, SAD, DAG.getUNDEF(SadVT), Mask); SAD = DAG.getNode(ISD::ADD, DL, SadVT, SAD, Shuffle); } } MVT Type = Extract->getSimpleValueType(0); unsigned TypeSizeInBits = Type.getSizeInBits(); // Return the lowest TypeSizeInBits bits. MVT ResVT = MVT::getVectorVT(Type, SadVT.getSizeInBits() / TypeSizeInBits); SAD = DAG.getBitcast(ResVT, SAD); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Type, SAD, Extract->getOperand(1)); } // Attempt to peek through a target shuffle and extract the scalar from the // source. static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { if (DCI.isBeforeLegalizeOps()) return SDValue(); SDValue Src = N->getOperand(0); SDValue Idx = N->getOperand(1); EVT VT = N->getValueType(0); EVT SrcVT = Src.getValueType(); EVT SrcSVT = SrcVT.getVectorElementType(); unsigned NumSrcElts = SrcVT.getVectorNumElements(); // Don't attempt this for boolean mask vectors or unknown extraction indices. if (SrcSVT == MVT::i1 || !isa(Idx)) return SDValue(); // Handle extract(broadcast(scalar_value)), it doesn't matter what index is. if (X86ISD::VBROADCAST == Src.getOpcode() && Src.getOperand(0).getValueType() == VT) return Src.getOperand(0); // Resolve the target shuffle inputs and mask. SmallVector Mask; SmallVector Ops; if (!resolveTargetShuffleInputs(peekThroughBitcasts(Src), Ops, Mask, DAG)) return SDValue(); // Attempt to narrow/widen the shuffle mask to the correct size. if (Mask.size() != NumSrcElts) { if ((NumSrcElts % Mask.size()) == 0) { SmallVector ScaledMask; int Scale = NumSrcElts / Mask.size(); scaleShuffleMask(Scale, Mask, ScaledMask); Mask = std::move(ScaledMask); } else if ((Mask.size() % NumSrcElts) == 0) { // Simplify Mask based on demanded element. int ExtractIdx = (int)N->getConstantOperandVal(1); int Scale = Mask.size() / NumSrcElts; int Lo = Scale * ExtractIdx; int Hi = Scale * (ExtractIdx + 1); for (int i = 0, e = (int)Mask.size(); i != e; ++i) if (i < Lo || Hi <= i) Mask[i] = SM_SentinelUndef; SmallVector WidenedMask; while (Mask.size() > NumSrcElts && canWidenShuffleElements(Mask, WidenedMask)) Mask = std::move(WidenedMask); // TODO - investigate support for wider shuffle masks with known upper // undef/zero elements for implicit zero-extension. } } // Check if narrowing/widening failed. if (Mask.size() != NumSrcElts) return SDValue(); int SrcIdx = Mask[N->getConstantOperandVal(1)]; SDLoc dl(N); // If the shuffle source element is undef/zero then we can just accept it. if (SrcIdx == SM_SentinelUndef) return DAG.getUNDEF(VT); if (SrcIdx == SM_SentinelZero) return VT.isFloatingPoint() ? DAG.getConstantFP(0.0, dl, VT) : DAG.getConstant(0, dl, VT); SDValue SrcOp = Ops[SrcIdx / Mask.size()]; SrcOp = DAG.getBitcast(SrcVT, SrcOp); SrcIdx = SrcIdx % Mask.size(); // We can only extract other elements from 128-bit vectors and in certain // circumstances, depending on SSE-level. // TODO: Investigate using extract_subvector for larger vectors. // TODO: Investigate float/double extraction if it will be just stored. if ((SrcVT == MVT::v4i32 || SrcVT == MVT::v2i64) && ((SrcIdx == 0 && Subtarget.hasSSE2()) || Subtarget.hasSSE41())) { assert(SrcSVT == VT && "Unexpected extraction type"); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcSVT, SrcOp, DAG.getIntPtrConstant(SrcIdx, dl)); } if ((SrcVT == MVT::v8i16 && Subtarget.hasSSE2()) || (SrcVT == MVT::v16i8 && Subtarget.hasSSE41())) { assert(VT.getSizeInBits() >= SrcSVT.getSizeInBits() && "Unexpected extraction type"); unsigned OpCode = (SrcVT == MVT::v8i16 ? X86ISD::PEXTRW : X86ISD::PEXTRB); SDValue ExtOp = DAG.getNode(OpCode, dl, MVT::i32, SrcOp, DAG.getIntPtrConstant(SrcIdx, dl)); return DAG.getZExtOrTrunc(ExtOp, dl, VT); } return SDValue(); } /// Detect vector gather/scatter index generation and convert it from being a /// bunch of shuffles and extracts into a somewhat faster sequence. /// For i686, the best sequence is apparently storing the value and loading /// scalars back, while for x64 we should use 64-bit extracts and shifts. static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { if (SDValue NewOp = combineExtractWithShuffle(N, DAG, DCI, Subtarget)) return NewOp; // TODO - Remove this once we can handle the implicit zero-extension of // X86ISD::PEXTRW/X86ISD::PEXTRB in: // XFormVExtractWithShuffleIntoLoad, combineHorizontalPredicateResult and // combineBasicSADPattern. if (N->getOpcode() != ISD::EXTRACT_VECTOR_ELT) return SDValue(); if (SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI)) return NewOp; SDValue InputVector = N->getOperand(0); SDValue EltIdx = N->getOperand(1); EVT SrcVT = InputVector.getValueType(); EVT VT = N->getValueType(0); SDLoc dl(InputVector); // Detect mmx extraction of all bits as a i64. It works better as a bitcast. if (InputVector.getOpcode() == ISD::BITCAST && InputVector.hasOneUse() && VT == MVT::i64 && SrcVT == MVT::v1i64 && isNullConstant(EltIdx)) { SDValue MMXSrc = InputVector.getOperand(0); // The bitcast source is a direct mmx result. if (MMXSrc.getValueType() == MVT::x86mmx) return DAG.getBitcast(VT, InputVector); } // Detect mmx to i32 conversion through a v2i32 elt extract. if (InputVector.getOpcode() == ISD::BITCAST && InputVector.hasOneUse() && VT == MVT::i32 && SrcVT == MVT::v2i32 && isNullConstant(EltIdx)) { SDValue MMXSrc = InputVector.getOperand(0); // The bitcast source is a direct mmx result. if (MMXSrc.getValueType() == MVT::x86mmx) return DAG.getNode(X86ISD::MMX_MOVD2W, dl, MVT::i32, MMXSrc); } if (VT == MVT::i1 && InputVector.getOpcode() == ISD::BITCAST && isa(EltIdx) && isa(InputVector.getOperand(0))) { uint64_t ExtractedElt = N->getConstantOperandVal(1); auto *InputC = cast(InputVector.getOperand(0)); const APInt &InputValue = InputC->getAPIntValue(); uint64_t Res = InputValue[ExtractedElt]; return DAG.getConstant(Res, dl, MVT::i1); } // Check whether this extract is the root of a sum of absolute differences // pattern. This has to be done here because we really want it to happen // pre-legalization, if (SDValue SAD = combineBasicSADPattern(N, DAG, Subtarget)) return SAD; // Attempt to replace an all_of/any_of horizontal reduction with a MOVMSK. if (SDValue Cmp = combineHorizontalPredicateResult(N, DAG, Subtarget)) return Cmp; // Attempt to replace min/max v8i16/v16i8 reductions with PHMINPOSUW. if (SDValue MinMax = combineHorizontalMinMaxResult(N, DAG, Subtarget)) return MinMax; return SDValue(); } /// If a vector select has an operand that is -1 or 0, try to simplify the /// select to a bitwise logic operation. /// TODO: Move to DAGCombiner, possibly using TargetLowering::hasAndNot()? static SDValue combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { SDValue Cond = N->getOperand(0); SDValue LHS = N->getOperand(1); SDValue RHS = N->getOperand(2); EVT VT = LHS.getValueType(); EVT CondVT = Cond.getValueType(); SDLoc DL(N); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (N->getOpcode() != ISD::VSELECT) return SDValue(); assert(CondVT.isVector() && "Vector select expects a vector selector!"); bool TValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode()); // Check if the first operand is all zeros and Cond type is vXi1. // This situation only applies to avx512. if (TValIsAllZeros && Subtarget.hasAVX512() && Cond.hasOneUse() && CondVT.getVectorElementType() == MVT::i1) { // Invert the cond to not(cond) : xor(op,allones)=not(op) SDValue CondNew = DAG.getNOT(DL, Cond, CondVT); // Vselect cond, op1, op2 = Vselect not(cond), op2, op1 return DAG.getSelect(DL, VT, CondNew, RHS, LHS); } // To use the condition operand as a bitwise mask, it must have elements that // are the same size as the select elements. Ie, the condition operand must // have already been promoted from the IR select condition type . // Don't check if the types themselves are equal because that excludes // vector floating-point selects. if (CondVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) return SDValue(); bool TValIsAllOnes = ISD::isBuildVectorAllOnes(LHS.getNode()); bool FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); // Try to invert the condition if true value is not all 1s and false value is // not all 0s. if (!TValIsAllOnes && !FValIsAllZeros && // Check if the selector will be produced by CMPP*/PCMP*. Cond.getOpcode() == ISD::SETCC && // Check if SETCC has already been promoted. TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT) == CondVT) { bool FValIsAllOnes = ISD::isBuildVectorAllOnes(RHS.getNode()); if (TValIsAllZeros || FValIsAllOnes) { SDValue CC = Cond.getOperand(2); ISD::CondCode NewCC = ISD::getSetCCInverse(cast(CC)->get(), Cond.getOperand(0).getValueType().isInteger()); Cond = DAG.getSetCC(DL, CondVT, Cond.getOperand(0), Cond.getOperand(1), NewCC); std::swap(LHS, RHS); TValIsAllOnes = FValIsAllOnes; FValIsAllZeros = TValIsAllZeros; } } // Cond value must be 'sign splat' to be converted to a logical op. if (DAG.ComputeNumSignBits(Cond) != CondVT.getScalarSizeInBits()) return SDValue(); // vselect Cond, 111..., 000... -> Cond if (TValIsAllOnes && FValIsAllZeros) return DAG.getBitcast(VT, Cond); if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(CondVT)) return SDValue(); // vselect Cond, 111..., X -> or Cond, X if (TValIsAllOnes) { SDValue CastRHS = DAG.getBitcast(CondVT, RHS); SDValue Or = DAG.getNode(ISD::OR, DL, CondVT, Cond, CastRHS); return DAG.getBitcast(VT, Or); } // vselect Cond, X, 000... -> and Cond, X if (FValIsAllZeros) { SDValue CastLHS = DAG.getBitcast(CondVT, LHS); SDValue And = DAG.getNode(ISD::AND, DL, CondVT, Cond, CastLHS); return DAG.getBitcast(VT, And); } // vselect Cond, 000..., X -> andn Cond, X if (TValIsAllZeros) { MVT AndNVT = MVT::getVectorVT(MVT::i64, CondVT.getSizeInBits() / 64); SDValue CastCond = DAG.getBitcast(AndNVT, Cond); SDValue CastRHS = DAG.getBitcast(AndNVT, RHS); SDValue AndN = DAG.getNode(X86ISD::ANDNP, DL, AndNVT, CastCond, CastRHS); return DAG.getBitcast(VT, AndN); } return SDValue(); } static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) { SDValue Cond = N->getOperand(0); SDValue LHS = N->getOperand(1); SDValue RHS = N->getOperand(2); SDLoc DL(N); auto *TrueC = dyn_cast(LHS); auto *FalseC = dyn_cast(RHS); if (!TrueC || !FalseC) return SDValue(); // Don't do this for crazy integer types. EVT VT = N->getValueType(0); if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) return SDValue(); // We're going to use the condition bit in math or logic ops. We could allow // this with a wider condition value (post-legalization it becomes an i8), // but if nothing is creating selects that late, it doesn't matter. if (Cond.getValueType() != MVT::i1) return SDValue(); // A power-of-2 multiply is just a shift. LEA also cheaply handles multiply by // 3, 5, or 9 with i32/i64, so those get transformed too. // TODO: For constants that overflow or do not differ by power-of-2 or small // multiplier, convert to 'and' + 'add'. const APInt &TrueVal = TrueC->getAPIntValue(); const APInt &FalseVal = FalseC->getAPIntValue(); bool OV; APInt Diff = TrueVal.ssub_ov(FalseVal, OV); if (OV) return SDValue(); APInt AbsDiff = Diff.abs(); if (AbsDiff.isPowerOf2() || ((VT == MVT::i32 || VT == MVT::i64) && (AbsDiff == 3 || AbsDiff == 5 || AbsDiff == 9))) { // We need a positive multiplier constant for shift/LEA codegen. The 'not' // of the condition can usually be folded into a compare predicate, but even // without that, the sequence should be cheaper than a CMOV alternative. if (TrueVal.slt(FalseVal)) { Cond = DAG.getNOT(DL, Cond, MVT::i1); std::swap(TrueC, FalseC); } // select Cond, TC, FC --> (zext(Cond) * (TC - FC)) + FC SDValue R = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); // Multiply condition by the difference if non-one. if (!AbsDiff.isOneValue()) R = DAG.getNode(ISD::MUL, DL, VT, R, DAG.getConstant(AbsDiff, DL, VT)); // Add the base if non-zero. if (!FalseC->isNullValue()) R = DAG.getNode(ISD::ADD, DL, VT, R, SDValue(FalseC, 0)); return R; } return SDValue(); } /// If this is a *dynamic* select (non-constant condition) and we can match /// this node with one of the variable blend instructions, restructure the /// condition so that blends can use the high (sign) bit of each element. /// This function will also call SimplfiyDemandedBits on already created /// BLENDV to perform additional simplifications. static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { SDValue Cond = N->getOperand(0); if ((N->getOpcode() != ISD::VSELECT && N->getOpcode() != X86ISD::BLENDV) || ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) return SDValue(); // Don't optimize before the condition has been transformed to a legal type // and don't ever optimize vector selects that map to AVX512 mask-registers. unsigned BitWidth = Cond.getScalarValueSizeInBits(); if (BitWidth < 8 || BitWidth > 64) return SDValue(); // We can only handle the cases where VSELECT is directly legal on the // subtarget. We custom lower VSELECT nodes with constant conditions and // this makes it hard to see whether a dynamic VSELECT will correctly // lower, so we both check the operation's status and explicitly handle the // cases where a *dynamic* blend will fail even though a constant-condition // blend could be custom lowered. // FIXME: We should find a better way to handle this class of problems. // Potentially, we should combine constant-condition vselect nodes // pre-legalization into shuffles and not mark as many types as custom // lowered. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = N->getValueType(0); if (!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT)) return SDValue(); // FIXME: We don't support i16-element blends currently. We could and // should support them by making *all* the bits in the condition be set // rather than just the high bit and using an i8-element blend. if (VT.getVectorElementType() == MVT::i16) return SDValue(); // Dynamic blending was only available from SSE4.1 onward. if (VT.is128BitVector() && !Subtarget.hasSSE41()) return SDValue(); // Byte blends are only available in AVX2 if (VT == MVT::v32i8 && !Subtarget.hasAVX2()) return SDValue(); // There are no 512-bit blend instructions that use sign bits. if (VT.is512BitVector()) return SDValue(); // TODO: Add other opcodes eventually lowered into BLEND. for (SDNode::use_iterator UI = Cond->use_begin(), UE = Cond->use_end(); UI != UE; ++UI) if ((UI->getOpcode() != ISD::VSELECT && UI->getOpcode() != X86ISD::BLENDV) || UI.getOperandNo() != 0) return SDValue(); APInt DemandedMask(APInt::getSignMask(BitWidth)); KnownBits Known; TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); if (!TLI.SimplifyDemandedBits(Cond, DemandedMask, Known, TLO, 0, true)) return SDValue(); // If we changed the computation somewhere in the DAG, this change will // affect all users of Cond. Update all the nodes so that we do not use // the generic VSELECT anymore. Otherwise, we may perform wrong // optimizations as we messed with the actual expectation for the vector // boolean values. for (SDNode *U : Cond->uses()) { if (U->getOpcode() == X86ISD::BLENDV) continue; SDValue SB = DAG.getNode(X86ISD::BLENDV, SDLoc(U), U->getValueType(0), Cond, U->getOperand(1), U->getOperand(2)); DAG.ReplaceAllUsesOfValueWith(SDValue(U, 0), SB); DCI.AddToWorklist(U); } DCI.CommitTargetLoweringOpt(TLO); return SDValue(N, 0); } /// Do target-specific dag combines on SELECT and VSELECT nodes. static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { SDLoc DL(N); SDValue Cond = N->getOperand(0); SDValue LHS = N->getOperand(1); SDValue RHS = N->getOperand(2); // Try simplification again because we use this function to optimize // BLENDV nodes that are not handled by the generic combiner. if (SDValue V = DAG.simplifySelect(Cond, LHS, RHS)) return V; EVT VT = LHS.getValueType(); EVT CondVT = Cond.getValueType(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Convert vselects with constant condition into shuffles. if (ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()) && DCI.isBeforeLegalizeOps()) { SmallVector Mask; if (createShuffleMaskFromVSELECT(Mask, Cond)) return DAG.getVectorShuffle(VT, DL, LHS, RHS, Mask); } // If we have SSE[12] support, try to form min/max nodes. SSE min/max // instructions match the semantics of the common C idiom x(Cond.getOperand(2))->get(); unsigned Opcode = 0; // Check for x CC y ? x : y. if (DAG.isEqualTo(LHS, Cond.getOperand(0)) && DAG.isEqualTo(RHS, Cond.getOperand(1))) { switch (CC) { default: break; case ISD::SETULT: // Converting this to a min would handle NaNs incorrectly, and swapping // the operands would cause it to handle comparisons between positive // and negative zero incorrectly. if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) { if (!DAG.getTarget().Options.UnsafeFPMath && !(DAG.isKnownNeverZeroFloat(LHS) || DAG.isKnownNeverZeroFloat(RHS))) break; std::swap(LHS, RHS); } Opcode = X86ISD::FMIN; break; case ISD::SETOLE: // Converting this to a min would handle comparisons between positive // and negative zero incorrectly. if (!DAG.getTarget().Options.UnsafeFPMath && !DAG.isKnownNeverZeroFloat(LHS) && !DAG.isKnownNeverZeroFloat(RHS)) break; Opcode = X86ISD::FMIN; break; case ISD::SETULE: // Converting this to a min would handle both negative zeros and NaNs // incorrectly, but we can swap the operands to fix both. std::swap(LHS, RHS); LLVM_FALLTHROUGH; case ISD::SETOLT: case ISD::SETLT: case ISD::SETLE: Opcode = X86ISD::FMIN; break; case ISD::SETOGE: // Converting this to a max would handle comparisons between positive // and negative zero incorrectly. if (!DAG.getTarget().Options.UnsafeFPMath && !DAG.isKnownNeverZeroFloat(LHS) && !DAG.isKnownNeverZeroFloat(RHS)) break; Opcode = X86ISD::FMAX; break; case ISD::SETUGT: // Converting this to a max would handle NaNs incorrectly, and swapping // the operands would cause it to handle comparisons between positive // and negative zero incorrectly. if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) { if (!DAG.getTarget().Options.UnsafeFPMath && !(DAG.isKnownNeverZeroFloat(LHS) || DAG.isKnownNeverZeroFloat(RHS))) break; std::swap(LHS, RHS); } Opcode = X86ISD::FMAX; break; case ISD::SETUGE: // Converting this to a max would handle both negative zeros and NaNs // incorrectly, but we can swap the operands to fix both. std::swap(LHS, RHS); LLVM_FALLTHROUGH; case ISD::SETOGT: case ISD::SETGT: case ISD::SETGE: Opcode = X86ISD::FMAX; break; } // Check for x CC y ? y : x -- a min/max with reversed arms. } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) && DAG.isEqualTo(RHS, Cond.getOperand(0))) { switch (CC) { default: break; case ISD::SETOGE: // Converting this to a min would handle comparisons between positive // and negative zero incorrectly, and swapping the operands would // cause it to handle NaNs incorrectly. if (!DAG.getTarget().Options.UnsafeFPMath && !(DAG.isKnownNeverZeroFloat(LHS) || DAG.isKnownNeverZeroFloat(RHS))) { if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) break; std::swap(LHS, RHS); } Opcode = X86ISD::FMIN; break; case ISD::SETUGT: // Converting this to a min would handle NaNs incorrectly. if (!DAG.getTarget().Options.UnsafeFPMath && (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) break; Opcode = X86ISD::FMIN; break; case ISD::SETUGE: // Converting this to a min would handle both negative zeros and NaNs // incorrectly, but we can swap the operands to fix both. std::swap(LHS, RHS); LLVM_FALLTHROUGH; case ISD::SETOGT: case ISD::SETGT: case ISD::SETGE: Opcode = X86ISD::FMIN; break; case ISD::SETULT: // Converting this to a max would handle NaNs incorrectly. if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) break; Opcode = X86ISD::FMAX; break; case ISD::SETOLE: // Converting this to a max would handle comparisons between positive // and negative zero incorrectly, and swapping the operands would // cause it to handle NaNs incorrectly. if (!DAG.getTarget().Options.UnsafeFPMath && !DAG.isKnownNeverZeroFloat(LHS) && !DAG.isKnownNeverZeroFloat(RHS)) { if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) break; std::swap(LHS, RHS); } Opcode = X86ISD::FMAX; break; case ISD::SETULE: // Converting this to a max would handle both negative zeros and NaNs // incorrectly, but we can swap the operands to fix both. std::swap(LHS, RHS); LLVM_FALLTHROUGH; case ISD::SETOLT: case ISD::SETLT: case ISD::SETLE: Opcode = X86ISD::FMAX; break; } } if (Opcode) return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS); } // Some mask scalar intrinsics rely on checking if only one bit is set // and implement it in C code like this: // A[0] = (U & 1) ? A[0] : W[0]; // This creates some redundant instructions that break pattern matching. // fold (select (setcc (and (X, 1), 0, seteq), Y, Z)) -> select(and(X, 1),Z,Y) if (Subtarget.hasAVX512() && N->getOpcode() == ISD::SELECT && Cond.getOpcode() == ISD::SETCC && (VT == MVT::f32 || VT == MVT::f64)) { ISD::CondCode CC = cast(Cond.getOperand(2))->get(); SDValue AndNode = Cond.getOperand(0); if (AndNode.getOpcode() == ISD::AND && CC == ISD::SETEQ && isNullConstant(Cond.getOperand(1)) && isOneConstant(AndNode.getOperand(1))) { // LHS and RHS swapped due to // setcc outputting 1 when AND resulted in 0 and vice versa. AndNode = DAG.getZExtOrTrunc(AndNode, DL, MVT::i8); return DAG.getNode(ISD::SELECT, DL, VT, AndNode, RHS, LHS); } } // v16i8 (select v16i1, v16i8, v16i8) does not have a proper // lowering on KNL. In this case we convert it to // v16i8 (select v16i8, v16i8, v16i8) and use AVX instruction. // The same situation all vectors of i8 and i16 without BWI. // Make sure we extend these even before type legalization gets a chance to // split wide vectors. // Since SKX these selects have a proper lowering. if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && CondVT.isVector() && CondVT.getVectorElementType() == MVT::i1 && (ExperimentalVectorWideningLegalization || VT.getVectorNumElements() > 4) && (VT.getVectorElementType() == MVT::i8 || VT.getVectorElementType() == MVT::i16)) { Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); return DAG.getNode(N->getOpcode(), DL, VT, Cond, LHS, RHS); } if (SDValue V = combineSelectOfTwoConstants(N, DAG)) return V; // Canonicalize max and min: // (x > y) ? x : y -> (x >= y) ? x : y // (x < y) ? x : y -> (x <= y) ? x : y // This allows use of COND_S / COND_NS (see TranslateX86CC) which eliminates // the need for an extra compare // against zero. e.g. // (x - y) > 0 : (x - y) ? 0 -> (x - y) >= 0 : (x - y) ? 0 // subl %esi, %edi // testl %edi, %edi // movl $0, %eax // cmovgl %edi, %eax // => // xorl %eax, %eax // subl %esi, $edi // cmovsl %eax, %edi if (N->getOpcode() == ISD::SELECT && Cond.getOpcode() == ISD::SETCC && DAG.isEqualTo(LHS, Cond.getOperand(0)) && DAG.isEqualTo(RHS, Cond.getOperand(1))) { ISD::CondCode CC = cast(Cond.getOperand(2))->get(); switch (CC) { default: break; case ISD::SETLT: case ISD::SETGT: { ISD::CondCode NewCC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGE; Cond = DAG.getSetCC(SDLoc(Cond), Cond.getValueType(), Cond.getOperand(0), Cond.getOperand(1), NewCC); return DAG.getSelect(DL, VT, Cond, LHS, RHS); } } } // Match VSELECTs into subs with unsigned saturation. if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC && // psubus is available in SSE2 for i8 and i16 vectors. Subtarget.hasSSE2() && VT.getVectorNumElements() >= 2 && isPowerOf2_32(VT.getVectorNumElements()) && (VT.getVectorElementType() == MVT::i8 || VT.getVectorElementType() == MVT::i16)) { ISD::CondCode CC = cast(Cond.getOperand(2))->get(); // Check if one of the arms of the VSELECT is a zero vector. If it's on the // left side invert the predicate to simplify logic below. SDValue Other; if (ISD::isBuildVectorAllZeros(LHS.getNode())) { Other = RHS; CC = ISD::getSetCCInverse(CC, true); } else if (ISD::isBuildVectorAllZeros(RHS.getNode())) { Other = LHS; } if (Other.getNode() && Other->getNumOperands() == 2 && Other->getOperand(0) == Cond.getOperand(0)) { SDValue OpLHS = Other->getOperand(0), OpRHS = Other->getOperand(1); SDValue CondRHS = Cond->getOperand(1); // Look for a general sub with unsigned saturation first. // x >= y ? x-y : 0 --> subus x, y // x > y ? x-y : 0 --> subus x, y if ((CC == ISD::SETUGE || CC == ISD::SETUGT) && Other->getOpcode() == ISD::SUB && OpRHS == CondRHS) return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS); if (auto *OpRHSBV = dyn_cast(OpRHS)) { if (isa(CondRHS)) { // If the RHS is a constant we have to reverse the const // canonicalization. // x > C-1 ? x+-C : 0 --> subus x, C // TODO: Handle build_vectors with undef elements. auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) { return Cond->getAPIntValue() == (-Op->getAPIntValue() - 1); }; if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD && ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT)) { OpRHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), OpRHS); return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS); } // Another special case: If C was a sign bit, the sub has been // canonicalized into a xor. // FIXME: Would it be better to use computeKnownBits to determine // whether it's safe to decanonicalize the xor? // x s< 0 ? x^C : 0 --> subus x, C if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode()) { if (CC == ISD::SETLT && Other.getOpcode() == ISD::XOR && ISD::isBuildVectorAllZeros(CondRHS.getNode()) && OpRHSConst->getAPIntValue().isSignMask()) { // Note that we have to rebuild the RHS constant here to ensure we // don't rely on particular values of undef lanes. OpRHS = DAG.getConstant(OpRHSConst->getAPIntValue(), DL, VT); return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS); } } } } } } // Match VSELECTs into add with unsigned saturation. if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC && // paddus is available in SSE2 for i8 and i16 vectors. Subtarget.hasSSE2() && VT.getVectorNumElements() >= 2 && isPowerOf2_32(VT.getVectorNumElements()) && (VT.getVectorElementType() == MVT::i8 || VT.getVectorElementType() == MVT::i16)) { ISD::CondCode CC = cast(Cond.getOperand(2))->get(); SDValue CondLHS = Cond->getOperand(0); SDValue CondRHS = Cond->getOperand(1); // Check if one of the arms of the VSELECT is vector with all bits set. // If it's on the left side invert the predicate to simplify logic below. SDValue Other; if (ISD::isBuildVectorAllOnes(LHS.getNode())) { Other = RHS; CC = ISD::getSetCCInverse(CC, true); } else if (ISD::isBuildVectorAllOnes(RHS.getNode())) { Other = LHS; } if (Other.getNode() && Other.getOpcode() == ISD::ADD) { SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1); // Canonicalize condition operands. if (CC == ISD::SETUGE) { std::swap(CondLHS, CondRHS); CC = ISD::SETULE; } // We can test against either of the addition operands. // x <= x+y ? x+y : ~0 --> addus x, y // x+y >= x ? x+y : ~0 --> addus x, y if (CC == ISD::SETULE && Other == CondRHS && (OpLHS == CondLHS || OpRHS == CondLHS)) return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS); if (isa(OpRHS) && isa(CondRHS) && CondLHS == OpLHS) { // If the RHS is a constant we have to reverse the const // canonicalization. // x > ~C ? x+C : ~0 --> addus x, C auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) { return Cond->getAPIntValue() == ~Op->getAPIntValue(); }; if (CC == ISD::SETULE && ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT)) return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS); } } } // Early exit check if (!TLI.isTypeLegal(VT)) return SDValue(); if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, DCI, Subtarget)) return V; if (SDValue V = combineVSelectToBLENDV(N, DAG, DCI, Subtarget)) return V; // Custom action for SELECT MMX if (VT == MVT::x86mmx) { LHS = DAG.getBitcast(MVT::i64, LHS); RHS = DAG.getBitcast(MVT::i64, RHS); SDValue newSelect = DAG.getNode(ISD::SELECT, DL, MVT::i64, Cond, LHS, RHS); return DAG.getBitcast(VT, newSelect); } return SDValue(); } /// Combine: /// (brcond/cmov/setcc .., (cmp (atomic_load_add x, 1), 0), COND_S) /// to: /// (brcond/cmov/setcc .., (LADD x, 1), COND_LE) /// i.e., reusing the EFLAGS produced by the LOCKed instruction. /// Note that this is only legal for some op/cc combinations. static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC, SelectionDAG &DAG, const X86Subtarget &Subtarget) { // This combine only operates on CMP-like nodes. if (!(Cmp.getOpcode() == X86ISD::CMP || (Cmp.getOpcode() == X86ISD::SUB && !Cmp->hasAnyUseOfValue(0)))) return SDValue(); // Can't replace the cmp if it has more uses than the one we're looking at. // FIXME: We would like to be able to handle this, but would need to make sure // all uses were updated. if (!Cmp.hasOneUse()) return SDValue(); // This only applies to variations of the common case: // (icmp slt x, 0) -> (icmp sle (add x, 1), 0) // (icmp sge x, 0) -> (icmp sgt (add x, 1), 0) // (icmp sle x, 0) -> (icmp slt (sub x, 1), 0) // (icmp sgt x, 0) -> (icmp sge (sub x, 1), 0) // Using the proper condcodes (see below), overflow is checked for. // FIXME: We can generalize both constraints: // - XOR/OR/AND (if they were made to survive AtomicExpand) // - LHS != 1 // if the result is compared. SDValue CmpLHS = Cmp.getOperand(0); SDValue CmpRHS = Cmp.getOperand(1); if (!CmpLHS.hasOneUse()) return SDValue(); unsigned Opc = CmpLHS.getOpcode(); if (Opc != ISD::ATOMIC_LOAD_ADD && Opc != ISD::ATOMIC_LOAD_SUB) return SDValue(); SDValue OpRHS = CmpLHS.getOperand(2); auto *OpRHSC = dyn_cast(OpRHS); if (!OpRHSC) return SDValue(); APInt Addend = OpRHSC->getAPIntValue(); if (Opc == ISD::ATOMIC_LOAD_SUB) Addend = -Addend; auto *CmpRHSC = dyn_cast(CmpRHS); if (!CmpRHSC) return SDValue(); APInt Comparison = CmpRHSC->getAPIntValue(); // If the addend is the negation of the comparison value, then we can do // a full comparison by emitting the atomic arithmetic as a locked sub. if (Comparison == -Addend) { // The CC is fine, but we need to rewrite the LHS of the comparison as an // atomic sub. auto *AN = cast(CmpLHS.getNode()); auto AtomicSub = DAG.getAtomic( ISD::ATOMIC_LOAD_SUB, SDLoc(CmpLHS), CmpLHS.getValueType(), /*Chain*/ CmpLHS.getOperand(0), /*LHS*/ CmpLHS.getOperand(1), /*RHS*/ DAG.getConstant(-Addend, SDLoc(CmpRHS), CmpRHS.getValueType()), AN->getMemOperand()); auto LockOp = lowerAtomicArithWithLOCK(AtomicSub, DAG, Subtarget); DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(0), DAG.getUNDEF(CmpLHS.getValueType())); DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(1), LockOp.getValue(1)); return LockOp; } // We can handle comparisons with zero in a number of cases by manipulating // the CC used. if (!Comparison.isNullValue()) return SDValue(); if (CC == X86::COND_S && Addend == 1) CC = X86::COND_LE; else if (CC == X86::COND_NS && Addend == 1) CC = X86::COND_G; else if (CC == X86::COND_G && Addend == -1) CC = X86::COND_GE; else if (CC == X86::COND_LE && Addend == -1) CC = X86::COND_L; else return SDValue(); SDValue LockOp = lowerAtomicArithWithLOCK(CmpLHS, DAG, Subtarget); DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(0), DAG.getUNDEF(CmpLHS.getValueType())); DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(1), LockOp.getValue(1)); return LockOp; } // Check whether a boolean test is testing a boolean value generated by // X86ISD::SETCC. If so, return the operand of that SETCC and proper condition // code. // // Simplify the following patterns: // (Op (CMP (SETCC Cond EFLAGS) 1) EQ) or // (Op (CMP (SETCC Cond EFLAGS) 0) NEQ) // to (Op EFLAGS Cond) // // (Op (CMP (SETCC Cond EFLAGS) 0) EQ) or // (Op (CMP (SETCC Cond EFLAGS) 1) NEQ) // to (Op EFLAGS !Cond) // // where Op could be BRCOND or CMOV. // static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) { // This combine only operates on CMP-like nodes. if (!(Cmp.getOpcode() == X86ISD::CMP || (Cmp.getOpcode() == X86ISD::SUB && !Cmp->hasAnyUseOfValue(0)))) return SDValue(); // Quit if not used as a boolean value. if (CC != X86::COND_E && CC != X86::COND_NE) return SDValue(); // Check CMP operands. One of them should be 0 or 1 and the other should be // an SetCC or extended from it. SDValue Op1 = Cmp.getOperand(0); SDValue Op2 = Cmp.getOperand(1); SDValue SetCC; const ConstantSDNode* C = nullptr; bool needOppositeCond = (CC == X86::COND_E); bool checkAgainstTrue = false; // Is it a comparison against 1? if ((C = dyn_cast(Op1))) SetCC = Op2; else if ((C = dyn_cast(Op2))) SetCC = Op1; else // Quit if all operands are not constants. return SDValue(); if (C->getZExtValue() == 1) { needOppositeCond = !needOppositeCond; checkAgainstTrue = true; } else if (C->getZExtValue() != 0) // Quit if the constant is neither 0 or 1. return SDValue(); bool truncatedToBoolWithAnd = false; // Skip (zext $x), (trunc $x), or (and $x, 1) node. while (SetCC.getOpcode() == ISD::ZERO_EXTEND || SetCC.getOpcode() == ISD::TRUNCATE || SetCC.getOpcode() == ISD::AND) { if (SetCC.getOpcode() == ISD::AND) { int OpIdx = -1; if (isOneConstant(SetCC.getOperand(0))) OpIdx = 1; if (isOneConstant(SetCC.getOperand(1))) OpIdx = 0; if (OpIdx < 0) break; SetCC = SetCC.getOperand(OpIdx); truncatedToBoolWithAnd = true; } else SetCC = SetCC.getOperand(0); } switch (SetCC.getOpcode()) { case X86ISD::SETCC_CARRY: // Since SETCC_CARRY gives output based on R = CF ? ~0 : 0, it's unsafe to // simplify it if the result of SETCC_CARRY is not canonicalized to 0 or 1, // i.e. it's a comparison against true but the result of SETCC_CARRY is not // truncated to i1 using 'and'. if (checkAgainstTrue && !truncatedToBoolWithAnd) break; assert(X86::CondCode(SetCC.getConstantOperandVal(0)) == X86::COND_B && "Invalid use of SETCC_CARRY!"); LLVM_FALLTHROUGH; case X86ISD::SETCC: // Set the condition code or opposite one if necessary. CC = X86::CondCode(SetCC.getConstantOperandVal(0)); if (needOppositeCond) CC = X86::GetOppositeBranchCondition(CC); return SetCC.getOperand(1); case X86ISD::CMOV: { // Check whether false/true value has canonical one, i.e. 0 or 1. ConstantSDNode *FVal = dyn_cast(SetCC.getOperand(0)); ConstantSDNode *TVal = dyn_cast(SetCC.getOperand(1)); // Quit if true value is not a constant. if (!TVal) return SDValue(); // Quit if false value is not a constant. if (!FVal) { SDValue Op = SetCC.getOperand(0); // Skip 'zext' or 'trunc' node. if (Op.getOpcode() == ISD::ZERO_EXTEND || Op.getOpcode() == ISD::TRUNCATE) Op = Op.getOperand(0); // A special case for rdrand/rdseed, where 0 is set if false cond is // found. if ((Op.getOpcode() != X86ISD::RDRAND && Op.getOpcode() != X86ISD::RDSEED) || Op.getResNo() != 0) return SDValue(); } // Quit if false value is not the constant 0 or 1. bool FValIsFalse = true; if (FVal && FVal->getZExtValue() != 0) { if (FVal->getZExtValue() != 1) return SDValue(); // If FVal is 1, opposite cond is needed. needOppositeCond = !needOppositeCond; FValIsFalse = false; } // Quit if TVal is not the constant opposite of FVal. if (FValIsFalse && TVal->getZExtValue() != 1) return SDValue(); if (!FValIsFalse && TVal->getZExtValue() != 0) return SDValue(); CC = X86::CondCode(SetCC.getConstantOperandVal(2)); if (needOppositeCond) CC = X86::GetOppositeBranchCondition(CC); return SetCC.getOperand(3); } } return SDValue(); } /// Check whether Cond is an AND/OR of SETCCs off of the same EFLAGS. /// Match: /// (X86or (X86setcc) (X86setcc)) /// (X86cmp (and (X86setcc) (X86setcc)), 0) static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0, X86::CondCode &CC1, SDValue &Flags, bool &isAnd) { if (Cond->getOpcode() == X86ISD::CMP) { if (!isNullConstant(Cond->getOperand(1))) return false; Cond = Cond->getOperand(0); } isAnd = false; SDValue SetCC0, SetCC1; switch (Cond->getOpcode()) { default: return false; case ISD::AND: case X86ISD::AND: isAnd = true; LLVM_FALLTHROUGH; case ISD::OR: case X86ISD::OR: SetCC0 = Cond->getOperand(0); SetCC1 = Cond->getOperand(1); break; }; // Make sure we have SETCC nodes, using the same flags value. if (SetCC0.getOpcode() != X86ISD::SETCC || SetCC1.getOpcode() != X86ISD::SETCC || SetCC0->getOperand(1) != SetCC1->getOperand(1)) return false; CC0 = (X86::CondCode)SetCC0->getConstantOperandVal(0); CC1 = (X86::CondCode)SetCC1->getConstantOperandVal(0); Flags = SetCC0->getOperand(1); return true; } // When legalizing carry, we create carries via add X, -1 // If that comes from an actual carry, via setcc, we use the // carry directly. static SDValue combineCarryThroughADD(SDValue EFLAGS) { if (EFLAGS.getOpcode() == X86ISD::ADD) { if (isAllOnesConstant(EFLAGS.getOperand(1))) { SDValue Carry = EFLAGS.getOperand(0); while (Carry.getOpcode() == ISD::TRUNCATE || Carry.getOpcode() == ISD::ZERO_EXTEND || Carry.getOpcode() == ISD::SIGN_EXTEND || Carry.getOpcode() == ISD::ANY_EXTEND || (Carry.getOpcode() == ISD::AND && isOneConstant(Carry.getOperand(1)))) Carry = Carry.getOperand(0); if (Carry.getOpcode() == X86ISD::SETCC || Carry.getOpcode() == X86ISD::SETCC_CARRY) { if (Carry.getConstantOperandVal(0) == X86::COND_B) return Carry.getOperand(1); } } } return SDValue(); } /// Optimize an EFLAGS definition used according to the condition code \p CC /// into a simpler EFLAGS value, potentially returning a new \p CC and replacing /// uses of chain values. static SDValue combineSetCCEFLAGS(SDValue EFLAGS, X86::CondCode &CC, SelectionDAG &DAG, const X86Subtarget &Subtarget) { if (CC == X86::COND_B) if (SDValue Flags = combineCarryThroughADD(EFLAGS)) return Flags; if (SDValue R = checkBoolTestSetCCCombine(EFLAGS, CC)) return R; return combineSetCCAtomicArith(EFLAGS, CC, DAG, Subtarget); } /// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL] static SDValue combineCMov(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { SDLoc DL(N); SDValue FalseOp = N->getOperand(0); SDValue TrueOp = N->getOperand(1); X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2); SDValue Cond = N->getOperand(3); // Try to simplify the EFLAGS and condition code operands. // We can't always do this as FCMOV only supports a subset of X86 cond. if (SDValue Flags = combineSetCCEFLAGS(Cond, CC, DAG, Subtarget)) { if (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC)) { SDValue Ops[] = {FalseOp, TrueOp, DAG.getConstant(CC, DL, MVT::i8), Flags}; return DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops); } } // If this is a select between two integer constants, try to do some // optimizations. Note that the operands are ordered the opposite of SELECT // operands. if (ConstantSDNode *TrueC = dyn_cast(TrueOp)) { if (ConstantSDNode *FalseC = dyn_cast(FalseOp)) { // Canonicalize the TrueC/FalseC values so that TrueC (the true value) is // larger than FalseC (the false value). if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) { CC = X86::GetOppositeBranchCondition(CC); std::swap(TrueC, FalseC); std::swap(TrueOp, FalseOp); } // Optimize C ? 8 : 0 -> zext(setcc(C)) << 3. Likewise for any pow2/0. // This is efficient for any integer data type (including i8/i16) and // shift amount. if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) { Cond = getSETCC(CC, Cond, DL, DAG); // Zero extend the condition if needed. Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, TrueC->getValueType(0), Cond); unsigned ShAmt = TrueC->getAPIntValue().logBase2(); Cond = DAG.getNode(ISD::SHL, DL, Cond.getValueType(), Cond, DAG.getConstant(ShAmt, DL, MVT::i8)); return Cond; } // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst. This is efficient // for any integer data type, including i8/i16. if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) { Cond = getSETCC(CC, Cond, DL, DAG); // Zero extend the condition if needed. Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), Cond); Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond, SDValue(FalseC, 0)); return Cond; } // Optimize cases that will turn into an LEA instruction. This requires // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9). if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) { uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue(); if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff; bool isFastMultiplier = false; if (Diff < 10) { switch ((unsigned char)Diff) { default: break; case 1: // result = add base, cond case 2: // result = lea base( , cond*2) case 3: // result = lea base(cond, cond*2) case 4: // result = lea base( , cond*4) case 5: // result = lea base(cond, cond*4) case 8: // result = lea base( , cond*8) case 9: // result = lea base(cond, cond*8) isFastMultiplier = true; break; } } if (isFastMultiplier) { APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue(); Cond = getSETCC(CC, Cond, DL ,DAG); // Zero extend the condition if needed. Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), Cond); // Scale the condition by the difference. if (Diff != 1) Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond, DAG.getConstant(Diff, DL, Cond.getValueType())); // Add the base if non-zero. if (FalseC->getAPIntValue() != 0) Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond, SDValue(FalseC, 0)); return Cond; } } } } // Handle these cases: // (select (x != c), e, c) -> select (x != c), e, x), // (select (x == c), c, e) -> select (x == c), x, e) // where the c is an integer constant, and the "select" is the combination // of CMOV and CMP. // // The rationale for this change is that the conditional-move from a constant // needs two instructions, however, conditional-move from a register needs // only one instruction. // // CAVEAT: By replacing a constant with a symbolic value, it may obscure // some instruction-combining opportunities. This opt needs to be // postponed as late as possible. // if (!DCI.isBeforeLegalize() && !DCI.isBeforeLegalizeOps()) { // the DCI.xxxx conditions are provided to postpone the optimization as // late as possible. ConstantSDNode *CmpAgainst = nullptr; if ((Cond.getOpcode() == X86ISD::CMP || Cond.getOpcode() == X86ISD::SUB) && (CmpAgainst = dyn_cast(Cond.getOperand(1))) && !isa(Cond.getOperand(0))) { if (CC == X86::COND_NE && CmpAgainst == dyn_cast(FalseOp)) { CC = X86::GetOppositeBranchCondition(CC); std::swap(TrueOp, FalseOp); } if (CC == X86::COND_E && CmpAgainst == dyn_cast(TrueOp)) { SDValue Ops[] = { FalseOp, Cond.getOperand(0), DAG.getConstant(CC, DL, MVT::i8), Cond }; return DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops); } } } // Fold and/or of setcc's to double CMOV: // (CMOV F, T, ((cc1 | cc2) != 0)) -> (CMOV (CMOV F, T, cc1), T, cc2) // (CMOV F, T, ((cc1 & cc2) != 0)) -> (CMOV (CMOV T, F, !cc1), F, !cc2) // // This combine lets us generate: // cmovcc1 (jcc1 if we don't have CMOV) // cmovcc2 (same) // instead of: // setcc1 // setcc2 // and/or // cmovne (jne if we don't have CMOV) // When we can't use the CMOV instruction, it might increase branch // mispredicts. // When we can use CMOV, or when there is no mispredict, this improves // throughput and reduces register pressure. // if (CC == X86::COND_NE) { SDValue Flags; X86::CondCode CC0, CC1; bool isAndSetCC; if (checkBoolTestAndOrSetCCCombine(Cond, CC0, CC1, Flags, isAndSetCC)) { if (isAndSetCC) { std::swap(FalseOp, TrueOp); CC0 = X86::GetOppositeBranchCondition(CC0); CC1 = X86::GetOppositeBranchCondition(CC1); } SDValue LOps[] = {FalseOp, TrueOp, DAG.getConstant(CC0, DL, MVT::i8), Flags}; SDValue LCMOV = DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), LOps); SDValue Ops[] = {LCMOV, TrueOp, DAG.getConstant(CC1, DL, MVT::i8), Flags}; SDValue CMOV = DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops); return CMOV; } } // Fold (CMOV C1, (ADD (CTTZ X), C2), (X != 0)) -> // (ADD (CMOV C1-C2, (CTTZ X), (X != 0)), C2) // Or (CMOV (ADD (CTTZ X), C2), C1, (X == 0)) -> // (ADD (CMOV (CTTZ X), C1-C2, (X == 0)), C2) if ((CC == X86::COND_NE || CC == X86::COND_E) && Cond.getOpcode() == X86ISD::CMP && isNullConstant(Cond.getOperand(1))) { SDValue Add = TrueOp; SDValue Const = FalseOp; // Canonicalize the condition code for easier matching and output. if (CC == X86::COND_E) std::swap(Add, Const); // We might have replaced the constant in the cmov with the LHS of the // compare. If so change it to the RHS of the compare. if (Const == Cond.getOperand(0)) Const = Cond.getOperand(1); // Ok, now make sure that Add is (add (cttz X), C2) and Const is a constant. if (isa(Const) && Add.getOpcode() == ISD::ADD && Add.hasOneUse() && isa(Add.getOperand(1)) && (Add.getOperand(0).getOpcode() == ISD::CTTZ_ZERO_UNDEF || Add.getOperand(0).getOpcode() == ISD::CTTZ) && Add.getOperand(0).getOperand(0) == Cond.getOperand(0)) { EVT VT = N->getValueType(0); // This should constant fold. SDValue Diff = DAG.getNode(ISD::SUB, DL, VT, Const, Add.getOperand(1)); SDValue CMov = DAG.getNode(X86ISD::CMOV, DL, VT, Diff, Add.getOperand(0), DAG.getConstant(X86::COND_NE, DL, MVT::i8), Cond); return DAG.getNode(ISD::ADD, DL, VT, CMov, Add.getOperand(1)); } } return SDValue(); } /// Different mul shrinking modes. enum ShrinkMode { MULS8, MULU8, MULS16, MULU16 }; static bool canReduceVMulWidth(SDNode *N, SelectionDAG &DAG, ShrinkMode &Mode) { EVT VT = N->getOperand(0).getValueType(); if (VT.getScalarSizeInBits() != 32) return false; assert(N->getNumOperands() == 2 && "NumOperands of Mul are 2"); unsigned SignBits[2] = {1, 1}; bool IsPositive[2] = {false, false}; for (unsigned i = 0; i < 2; i++) { SDValue Opd = N->getOperand(i); SignBits[i] = DAG.ComputeNumSignBits(Opd); IsPositive[i] = DAG.SignBitIsZero(Opd); } bool AllPositive = IsPositive[0] && IsPositive[1]; unsigned MinSignBits = std::min(SignBits[0], SignBits[1]); // When ranges are from -128 ~ 127, use MULS8 mode. if (MinSignBits >= 25) Mode = MULS8; // When ranges are from 0 ~ 255, use MULU8 mode. else if (AllPositive && MinSignBits >= 24) Mode = MULU8; // When ranges are from -32768 ~ 32767, use MULS16 mode. else if (MinSignBits >= 17) Mode = MULS16; // When ranges are from 0 ~ 65535, use MULU16 mode. else if (AllPositive && MinSignBits >= 16) Mode = MULU16; else return false; return true; } /// When the operands of vector mul are extended from smaller size values, /// like i8 and i16, the type of mul may be shrinked to generate more /// efficient code. Two typical patterns are handled: /// Pattern1: /// %2 = sext/zext %1 to /// %4 = sext/zext %3 to // or %4 = build_vector %C1, ..., %CN (%C1..%CN are constants) /// %5 = mul %2, %4 /// /// Pattern2: /// %2 = zext/sext %1 to /// %4 = zext/sext %3 to /// or %4 = build_vector %C1, ..., %CN (%C1..%CN are constants) /// %5 = mul %2, %4 /// /// There are four mul shrinking modes: /// If %2 == sext32(trunc8(%2)), i.e., the scalar value range of %2 is /// -128 to 128, and the scalar value range of %4 is also -128 to 128, /// generate pmullw+sext32 for it (MULS8 mode). /// If %2 == zext32(trunc8(%2)), i.e., the scalar value range of %2 is /// 0 to 255, and the scalar value range of %4 is also 0 to 255, /// generate pmullw+zext32 for it (MULU8 mode). /// If %2 == sext32(trunc16(%2)), i.e., the scalar value range of %2 is /// -32768 to 32767, and the scalar value range of %4 is also -32768 to 32767, /// generate pmullw+pmulhw for it (MULS16 mode). /// If %2 == zext32(trunc16(%2)), i.e., the scalar value range of %2 is /// 0 to 65535, and the scalar value range of %4 is also 0 to 65535, /// generate pmullw+pmulhuw for it (MULU16 mode). static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { // Check for legality // pmullw/pmulhw are not supported by SSE. if (!Subtarget.hasSSE2()) return SDValue(); // Check for profitability // pmulld is supported since SSE41. It is better to use pmulld // instead of pmullw+pmulhw, except for subtargets where pmulld is slower than // the expansion. bool OptForMinSize = DAG.getMachineFunction().getFunction().optForMinSize(); if (Subtarget.hasSSE41() && (OptForMinSize || !Subtarget.isPMULLDSlow())) return SDValue(); ShrinkMode Mode; if (!canReduceVMulWidth(N, DAG, Mode)) return SDValue(); SDLoc DL(N); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getOperand(0).getValueType(); unsigned NumElts = VT.getVectorNumElements(); if ((NumElts % 2) != 0) return SDValue(); unsigned RegSize = 128; MVT OpsVT = MVT::getVectorVT(MVT::i16, RegSize / 16); EVT ReducedVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts); // Shrink the operands of mul. SDValue NewN0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N0); SDValue NewN1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N1); if (ExperimentalVectorWideningLegalization || NumElts >= OpsVT.getVectorNumElements()) { // Generate the lower part of mul: pmullw. For MULU8/MULS8, only the // lower part is needed. SDValue MulLo = DAG.getNode(ISD::MUL, DL, ReducedVT, NewN0, NewN1); if (Mode == MULU8 || Mode == MULS8) return DAG.getNode((Mode == MULU8) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND, DL, VT, MulLo); MVT ResVT = MVT::getVectorVT(MVT::i32, NumElts / 2); // Generate the higher part of mul: pmulhw/pmulhuw. For MULU16/MULS16, // the higher part is also needed. SDValue MulHi = DAG.getNode(Mode == MULS16 ? ISD::MULHS : ISD::MULHU, DL, ReducedVT, NewN0, NewN1); // Repack the lower part and higher part result of mul into a wider // result. // Generate shuffle functioning as punpcklwd. SmallVector ShuffleMask(NumElts); for (unsigned i = 0, e = NumElts / 2; i < e; i++) { ShuffleMask[2 * i] = i; ShuffleMask[2 * i + 1] = i + NumElts; } SDValue ResLo = DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask); ResLo = DAG.getBitcast(ResVT, ResLo); // Generate shuffle functioning as punpckhwd. for (unsigned i = 0, e = NumElts / 2; i < e; i++) { ShuffleMask[2 * i] = i + NumElts / 2; ShuffleMask[2 * i + 1] = i + NumElts * 3 / 2; } SDValue ResHi = DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask); ResHi = DAG.getBitcast(ResVT, ResHi); return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ResLo, ResHi); } // When VT.getVectorNumElements() < OpsVT.getVectorNumElements(), we want // to legalize the mul explicitly because implicit legalization for type // <4 x i16> to <4 x i32> sometimes involves unnecessary unpack // instructions which will not exist when we explicitly legalize it by // extending <4 x i16> to <8 x i16> (concatenating the <4 x i16> val with // <4 x i16> undef). // // Legalize the operands of mul. // FIXME: We may be able to handle non-concatenated vectors by insertion. unsigned ReducedSizeInBits = ReducedVT.getSizeInBits(); if ((RegSize % ReducedSizeInBits) != 0) return SDValue(); SmallVector Ops(RegSize / ReducedSizeInBits, DAG.getUNDEF(ReducedVT)); Ops[0] = NewN0; NewN0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, OpsVT, Ops); Ops[0] = NewN1; NewN1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, OpsVT, Ops); if (Mode == MULU8 || Mode == MULS8) { // Generate lower part of mul: pmullw. For MULU8/MULS8, only the lower // part is needed. SDValue Mul = DAG.getNode(ISD::MUL, DL, OpsVT, NewN0, NewN1); // convert the type of mul result to VT. MVT ResVT = MVT::getVectorVT(MVT::i32, RegSize / 32); SDValue Res = DAG.getNode(Mode == MULU8 ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::SIGN_EXTEND_VECTOR_INREG, DL, ResVT, Mul); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, DAG.getIntPtrConstant(0, DL)); } // Generate the lower and higher part of mul: pmulhw/pmulhuw. For // MULU16/MULS16, both parts are needed. SDValue MulLo = DAG.getNode(ISD::MUL, DL, OpsVT, NewN0, NewN1); SDValue MulHi = DAG.getNode(Mode == MULS16 ? ISD::MULHS : ISD::MULHU, DL, OpsVT, NewN0, NewN1); // Repack the lower part and higher part result of mul into a wider // result. Make sure the type of mul result is VT. MVT ResVT = MVT::getVectorVT(MVT::i32, RegSize / 32); SDValue Res = getUnpackl(DAG, DL, OpsVT, MulLo, MulHi); Res = DAG.getBitcast(ResVT, Res); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, DAG.getIntPtrConstant(0, DL)); } static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG, EVT VT, const SDLoc &DL) { auto combineMulShlAddOrSub = [&](int Mult, int Shift, bool isAdd) { SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), DAG.getConstant(Mult, DL, VT)); Result = DAG.getNode(ISD::SHL, DL, VT, Result, DAG.getConstant(Shift, DL, MVT::i8)); Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, Result, N->getOperand(0)); return Result; }; auto combineMulMulAddOrSub = [&](int Mul1, int Mul2, bool isAdd) { SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), DAG.getConstant(Mul1, DL, VT)); Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, Result, DAG.getConstant(Mul2, DL, VT)); Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, Result, N->getOperand(0)); return Result; }; switch (MulAmt) { default: break; case 11: // mul x, 11 => add ((shl (mul x, 5), 1), x) return combineMulShlAddOrSub(5, 1, /*isAdd*/ true); case 21: // mul x, 21 => add ((shl (mul x, 5), 2), x) return combineMulShlAddOrSub(5, 2, /*isAdd*/ true); case 41: // mul x, 41 => add ((shl (mul x, 5), 3), x) return combineMulShlAddOrSub(5, 3, /*isAdd*/ true); case 22: // mul x, 22 => add (add ((shl (mul x, 5), 2), x), x) return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), combineMulShlAddOrSub(5, 2, /*isAdd*/ true)); case 19: // mul x, 19 => add ((shl (mul x, 9), 1), x) return combineMulShlAddOrSub(9, 1, /*isAdd*/ true); case 37: // mul x, 37 => add ((shl (mul x, 9), 2), x) return combineMulShlAddOrSub(9, 2, /*isAdd*/ true); case 73: // mul x, 73 => add ((shl (mul x, 9), 3), x) return combineMulShlAddOrSub(9, 3, /*isAdd*/ true); case 13: // mul x, 13 => add ((shl (mul x, 3), 2), x) return combineMulShlAddOrSub(3, 2, /*isAdd*/ true); case 23: // mul x, 23 => sub ((shl (mul x, 3), 3), x) return combineMulShlAddOrSub(3, 3, /*isAdd*/ false); case 26: // mul x, 26 => add ((mul (mul x, 5), 5), x) return combineMulMulAddOrSub(5, 5, /*isAdd*/ true); case 28: // mul x, 28 => add ((mul (mul x, 9), 3), x) return combineMulMulAddOrSub(9, 3, /*isAdd*/ true); case 29: // mul x, 29 => add (add ((mul (mul x, 9), 3), x), x) return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), combineMulMulAddOrSub(9, 3, /*isAdd*/ true)); } // Another trick. If this is a power 2 + 2/4/8, we can use a shift followed // by a single LEA. // First check if this a sum of two power of 2s because that's easy. Then // count how many zeros are up to the first bit. // TODO: We can do this even without LEA at a cost of two shifts and an add. if (isPowerOf2_64(MulAmt & (MulAmt - 1))) { unsigned ScaleShift = countTrailingZeros(MulAmt); if (ScaleShift >= 1 && ScaleShift < 4) { unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1))); SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), DAG.getConstant(ShiftAmt, DL, MVT::i8)); SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), DAG.getConstant(ScaleShift, DL, MVT::i8)); return DAG.getNode(ISD::ADD, DL, VT, Shift1, Shift2); } } return SDValue(); } // If the upper 17 bits of each element are zero then we can use PMADDWD, // which is always at least as quick as PMULLD, except on KNL. static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { if (!Subtarget.hasSSE2()) return SDValue(); if (Subtarget.isPMADDWDSlow()) return SDValue(); EVT VT = N->getValueType(0); // Only support vXi32 vectors. if (!VT.isVector() || VT.getVectorElementType() != MVT::i32) return SDValue(); // Make sure the vXi16 type is legal. This covers the AVX512 without BWI case. // Also allow v2i32 if it will be widened. MVT WVT = MVT::getVectorVT(MVT::i16, 2 * VT.getVectorNumElements()); if (!((ExperimentalVectorWideningLegalization && VT == MVT::v2i32) || DAG.getTargetLoweringInfo().isTypeLegal(WVT))) return SDValue(); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); // If we are zero extending two steps without SSE4.1, its better to reduce // the vmul width instead. if (!Subtarget.hasSSE41() && (N0.getOpcode() == ISD::ZERO_EXTEND && N0.getOperand(0).getScalarValueSizeInBits() <= 8) && (N1.getOpcode() == ISD::ZERO_EXTEND && N1.getOperand(0).getScalarValueSizeInBits() <= 8)) return SDValue(); APInt Mask17 = APInt::getHighBitsSet(32, 17); if (!DAG.MaskedValueIsZero(N1, Mask17) || !DAG.MaskedValueIsZero(N0, Mask17)) return SDValue(); // Use SplitOpsAndApply to handle AVX splitting. auto PMADDWDBuilder = [](SelectionDAG &DAG, const SDLoc &DL, ArrayRef Ops) { MVT VT = MVT::getVectorVT(MVT::i32, Ops[0].getValueSizeInBits() / 32); return DAG.getNode(X86ISD::VPMADDWD, DL, VT, Ops); }; return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, { DAG.getBitcast(WVT, N0), DAG.getBitcast(WVT, N1) }, PMADDWDBuilder); } static SDValue combineMulToPMULDQ(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { if (!Subtarget.hasSSE2()) return SDValue(); EVT VT = N->getValueType(0); // Only support vXi64 vectors. if (!VT.isVector() || VT.getVectorElementType() != MVT::i64 || VT.getVectorNumElements() < 2 || !isPowerOf2_32(VT.getVectorNumElements())) return SDValue(); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); // MULDQ returns the 64-bit result of the signed multiplication of the lower // 32-bits. We can lower with this if the sign bits stretch that far. if (Subtarget.hasSSE41() && DAG.ComputeNumSignBits(N0) > 32 && DAG.ComputeNumSignBits(N1) > 32) { auto PMULDQBuilder = [](SelectionDAG &DAG, const SDLoc &DL, ArrayRef Ops) { return DAG.getNode(X86ISD::PMULDQ, DL, Ops[0].getValueType(), Ops); }; return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, { N0, N1 }, PMULDQBuilder, /*CheckBWI*/false); } // If the upper bits are zero we can use a single pmuludq. APInt Mask = APInt::getHighBitsSet(64, 32); if (DAG.MaskedValueIsZero(N0, Mask) && DAG.MaskedValueIsZero(N1, Mask)) { auto PMULUDQBuilder = [](SelectionDAG &DAG, const SDLoc &DL, ArrayRef Ops) { return DAG.getNode(X86ISD::PMULUDQ, DL, Ops[0].getValueType(), Ops); }; return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, { N0, N1 }, PMULUDQBuilder, /*CheckBWI*/false); } return SDValue(); } /// Optimize a single multiply with constant into two operations in order to /// implement it with two cheaper instructions, e.g. LEA + SHL, LEA + LEA. static SDValue combineMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { EVT VT = N->getValueType(0); if (SDValue V = combineMulToPMADDWD(N, DAG, Subtarget)) return V; if (SDValue V = combineMulToPMULDQ(N, DAG, Subtarget)) return V; if (DCI.isBeforeLegalize() && VT.isVector()) return reduceVMULWidth(N, DAG, Subtarget); if (!MulConstantOptimization) return SDValue(); // An imul is usually smaller than the alternative sequence. if (DAG.getMachineFunction().getFunction().optForMinSize()) return SDValue(); if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) return SDValue(); if (VT != MVT::i64 && VT != MVT::i32) return SDValue(); ConstantSDNode *C = dyn_cast(N->getOperand(1)); if (!C) return SDValue(); if (isPowerOf2_64(C->getZExtValue())) return SDValue(); int64_t SignMulAmt = C->getSExtValue(); assert(SignMulAmt != INT64_MIN && "Int min should have been handled!"); uint64_t AbsMulAmt = SignMulAmt < 0 ? -SignMulAmt : SignMulAmt; SDLoc DL(N); if (AbsMulAmt == 3 || AbsMulAmt == 5 || AbsMulAmt == 9) { SDValue NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), DAG.getConstant(AbsMulAmt, DL, VT)); if (SignMulAmt < 0) NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), NewMul); return NewMul; } uint64_t MulAmt1 = 0; uint64_t MulAmt2 = 0; if ((AbsMulAmt % 9) == 0) { MulAmt1 = 9; MulAmt2 = AbsMulAmt / 9; } else if ((AbsMulAmt % 5) == 0) { MulAmt1 = 5; MulAmt2 = AbsMulAmt / 5; } else if ((AbsMulAmt % 3) == 0) { MulAmt1 = 3; MulAmt2 = AbsMulAmt / 3; } SDValue NewMul; // For negative multiply amounts, only allow MulAmt2 to be a power of 2. if (MulAmt2 && (isPowerOf2_64(MulAmt2) || (SignMulAmt >= 0 && (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)))) { if (isPowerOf2_64(MulAmt2) && !(SignMulAmt >= 0 && N->hasOneUse() && N->use_begin()->getOpcode() == ISD::ADD)) // If second multiplifer is pow2, issue it first. We want the multiply by // 3, 5, or 9 to be folded into the addressing mode unless the lone use // is an add. Only do this for positive multiply amounts since the // negate would prevent it from being used as an address mode anyway. std::swap(MulAmt1, MulAmt2); if (isPowerOf2_64(MulAmt1)) NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), DAG.getConstant(Log2_64(MulAmt1), DL, MVT::i8)); else NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), DAG.getConstant(MulAmt1, DL, VT)); if (isPowerOf2_64(MulAmt2)) NewMul = DAG.getNode(ISD::SHL, DL, VT, NewMul, DAG.getConstant(Log2_64(MulAmt2), DL, MVT::i8)); else NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul, DAG.getConstant(MulAmt2, DL, VT)); // Negate the result. if (SignMulAmt < 0) NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), NewMul); } else if (!Subtarget.slowLEA()) NewMul = combineMulSpecial(C->getZExtValue(), N, DAG, VT, DL); if (!NewMul) { assert(C->getZExtValue() != 0 && C->getZExtValue() != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX) && "Both cases that could cause potential overflows should have " "already been handled."); if (isPowerOf2_64(AbsMulAmt - 1)) { // (mul x, 2^N + 1) => (add (shl x, N), x) NewMul = DAG.getNode( ISD::ADD, DL, VT, N->getOperand(0), DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), DAG.getConstant(Log2_64(AbsMulAmt - 1), DL, MVT::i8))); // To negate, subtract the number from zero if (SignMulAmt < 0) NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), NewMul); } else if (isPowerOf2_64(AbsMulAmt + 1)) { // (mul x, 2^N - 1) => (sub (shl x, N), x) NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), DAG.getConstant(Log2_64(AbsMulAmt + 1), DL, MVT::i8)); // To negate, reverse the operands of the subtract. if (SignMulAmt < 0) NewMul = DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), NewMul); else NewMul = DAG.getNode(ISD::SUB, DL, VT, NewMul, N->getOperand(0)); } else if (SignMulAmt >= 0 && isPowerOf2_64(AbsMulAmt - 2)) { // (mul x, 2^N + 2) => (add (add (shl x, N), x), x) NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), DAG.getConstant(Log2_64(AbsMulAmt - 2), DL, MVT::i8)); NewMul = DAG.getNode(ISD::ADD, DL, VT, NewMul, N->getOperand(0)); NewMul = DAG.getNode(ISD::ADD, DL, VT, NewMul, N->getOperand(0)); } else if (SignMulAmt >= 0 && isPowerOf2_64(AbsMulAmt + 2)) { // (mul x, 2^N - 2) => (sub (sub (shl x, N), x), x) NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), DAG.getConstant(Log2_64(AbsMulAmt + 2), DL, MVT::i8)); NewMul = DAG.getNode(ISD::SUB, DL, VT, NewMul, N->getOperand(0)); NewMul = DAG.getNode(ISD::SUB, DL, VT, NewMul, N->getOperand(0)); } } return NewMul; } static SDValue combineShiftLeft(SDNode *N, SelectionDAG &DAG) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); ConstantSDNode *N1C = dyn_cast(N1); EVT VT = N0.getValueType(); // fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2)) // since the result of setcc_c is all zero's or all ones. if (VT.isInteger() && !VT.isVector() && N1C && N0.getOpcode() == ISD::AND && N0.getOperand(1).getOpcode() == ISD::Constant) { SDValue N00 = N0.getOperand(0); APInt Mask = cast(N0.getOperand(1))->getAPIntValue(); Mask <<= N1C->getAPIntValue(); bool MaskOK = false; // We can handle cases concerning bit-widening nodes containing setcc_c if // we carefully interrogate the mask to make sure we are semantics // preserving. // The transform is not safe if the result of C1 << C2 exceeds the bitwidth // of the underlying setcc_c operation if the setcc_c was zero extended. // Consider the following example: // zext(setcc_c) -> i32 0x0000FFFF // c1 -> i32 0x0000FFFF // c2 -> i32 0x00000001 // (shl (and (setcc_c), c1), c2) -> i32 0x0001FFFE // (and setcc_c, (c1 << c2)) -> i32 0x0000FFFE if (N00.getOpcode() == X86ISD::SETCC_CARRY) { MaskOK = true; } else if (N00.getOpcode() == ISD::SIGN_EXTEND && N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) { MaskOK = true; } else if ((N00.getOpcode() == ISD::ZERO_EXTEND || N00.getOpcode() == ISD::ANY_EXTEND) && N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) { MaskOK = Mask.isIntN(N00.getOperand(0).getValueSizeInBits()); } if (MaskOK && Mask != 0) { SDLoc DL(N); return DAG.getNode(ISD::AND, DL, VT, N00, DAG.getConstant(Mask, DL, VT)); } } // Hardware support for vector shifts is sparse which makes us scalarize the // vector operations in many cases. Also, on sandybridge ADD is faster than // shl. // (shl V, 1) -> add V,V if (auto *N1BV = dyn_cast(N1)) if (auto *N1SplatC = N1BV->getConstantSplatNode()) { assert(N0.getValueType().isVector() && "Invalid vector shift type"); // We shift all of the values by one. In many cases we do not have // hardware support for this operation. This is better expressed as an ADD // of two values. if (N1SplatC->getAPIntValue() == 1) return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0); } return SDValue(); } static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); unsigned Size = VT.getSizeInBits(); // fold (ashr (shl, a, [56,48,32,24,16]), SarConst) // into (shl, (sext (a), [56,48,32,24,16] - SarConst)) or // into (lshr, (sext (a), SarConst - [56,48,32,24,16])) // depending on sign of (SarConst - [56,48,32,24,16]) // sexts in X86 are MOVs. The MOVs have the same code size // as above SHIFTs (only SHIFT on 1 has lower code size). // However the MOVs have 2 advantages to a SHIFT: // 1. MOVs can write to a register that differs from source // 2. MOVs accept memory operands if (VT.isVector() || N1.getOpcode() != ISD::Constant || N0.getOpcode() != ISD::SHL || !N0.hasOneUse() || N0.getOperand(1).getOpcode() != ISD::Constant) return SDValue(); SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); APInt ShlConst = (cast(N01))->getAPIntValue(); APInt SarConst = (cast(N1))->getAPIntValue(); EVT CVT = N1.getValueType(); if (SarConst.isNegative()) return SDValue(); for (MVT SVT : { MVT::i8, MVT::i16, MVT::i32 }) { unsigned ShiftSize = SVT.getSizeInBits(); // skipping types without corresponding sext/zext and // ShlConst that is not one of [56,48,32,24,16] if (ShiftSize >= Size || ShlConst != Size - ShiftSize) continue; SDLoc DL(N); SDValue NN = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N00, DAG.getValueType(SVT)); SarConst = SarConst - (Size - ShiftSize); if (SarConst == 0) return NN; else if (SarConst.isNegative()) return DAG.getNode(ISD::SHL, DL, VT, NN, DAG.getConstant(-SarConst, DL, CVT)); else return DAG.getNode(ISD::SRA, DL, VT, NN, DAG.getConstant(SarConst, DL, CVT)); } return SDValue(); } static SDValue combineShiftRightLogical(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); // Only do this on the last DAG combine as it can interfere with other // combines. if (!DCI.isAfterLegalizeDAG()) return SDValue(); // Try to improve a sequence of srl (and X, C1), C2 by inverting the order. // TODO: This is a generic DAG combine that became an x86-only combine to // avoid shortcomings in other folds such as bswap, bit-test ('bt'), and // and-not ('andn'). if (N0.getOpcode() != ISD::AND || !N0.hasOneUse()) return SDValue(); auto *ShiftC = dyn_cast(N1); auto *AndC = dyn_cast(N0.getOperand(1)); if (!ShiftC || !AndC) return SDValue(); // If we can shrink the constant mask below 8-bits or 32-bits, then this // transform should reduce code size. It may also enable secondary transforms // from improved known-bits analysis or instruction selection. APInt MaskVal = AndC->getAPIntValue(); // If this can be matched by a zero extend, don't optimize. if (MaskVal.isMask()) { unsigned TO = MaskVal.countTrailingOnes(); if (TO >= 8 && isPowerOf2_32(TO)) return SDValue(); } APInt NewMaskVal = MaskVal.lshr(ShiftC->getAPIntValue()); unsigned OldMaskSize = MaskVal.getMinSignedBits(); unsigned NewMaskSize = NewMaskVal.getMinSignedBits(); if ((OldMaskSize > 8 && NewMaskSize <= 8) || (OldMaskSize > 32 && NewMaskSize <= 32)) { // srl (and X, AndC), ShiftC --> and (srl X, ShiftC), (AndC >> ShiftC) SDLoc DL(N); SDValue NewMask = DAG.getConstant(NewMaskVal, DL, VT); SDValue NewShift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1); return DAG.getNode(ISD::AND, DL, VT, NewShift, NewMask); } return SDValue(); } static SDValue combineShift(SDNode* N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { if (N->getOpcode() == ISD::SHL) if (SDValue V = combineShiftLeft(N, DAG)) return V; if (N->getOpcode() == ISD::SRA) if (SDValue V = combineShiftRightArithmetic(N, DAG)) return V; if (N->getOpcode() == ISD::SRL) if (SDValue V = combineShiftRightLogical(N, DAG, DCI)) return V; return SDValue(); } static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { unsigned Opcode = N->getOpcode(); assert((X86ISD::PACKSS == Opcode || X86ISD::PACKUS == Opcode) && "Unexpected shift opcode"); EVT VT = N->getValueType(0); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); unsigned DstBitsPerElt = VT.getScalarSizeInBits(); unsigned SrcBitsPerElt = 2 * DstBitsPerElt; assert(N0.getScalarValueSizeInBits() == SrcBitsPerElt && N1.getScalarValueSizeInBits() == SrcBitsPerElt && "Unexpected PACKSS/PACKUS input type"); bool IsSigned = (X86ISD::PACKSS == Opcode); // Constant Folding. APInt UndefElts0, UndefElts1; SmallVector EltBits0, EltBits1; if ((N0->isUndef() || N->isOnlyUserOf(N0.getNode())) && (N1->isUndef() || N->isOnlyUserOf(N1.getNode())) && getTargetConstantBitsFromNode(N0, SrcBitsPerElt, UndefElts0, EltBits0) && getTargetConstantBitsFromNode(N1, SrcBitsPerElt, UndefElts1, EltBits1)) { unsigned NumLanes = VT.getSizeInBits() / 128; unsigned NumDstElts = VT.getVectorNumElements(); unsigned NumSrcElts = NumDstElts / 2; unsigned NumDstEltsPerLane = NumDstElts / NumLanes; unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes; APInt Undefs(NumDstElts, 0); SmallVector Bits(NumDstElts, APInt::getNullValue(DstBitsPerElt)); for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { for (unsigned Elt = 0; Elt != NumDstEltsPerLane; ++Elt) { unsigned SrcIdx = Lane * NumSrcEltsPerLane + Elt % NumSrcEltsPerLane; auto &UndefElts = (Elt >= NumSrcEltsPerLane ? UndefElts1 : UndefElts0); auto &EltBits = (Elt >= NumSrcEltsPerLane ? EltBits1 : EltBits0); if (UndefElts[SrcIdx]) { Undefs.setBit(Lane * NumDstEltsPerLane + Elt); continue; } APInt &Val = EltBits[SrcIdx]; if (IsSigned) { // PACKSS: Truncate signed value with signed saturation. // Source values less than dst minint are saturated to minint. // Source values greater than dst maxint are saturated to maxint. if (Val.isSignedIntN(DstBitsPerElt)) Val = Val.trunc(DstBitsPerElt); else if (Val.isNegative()) Val = APInt::getSignedMinValue(DstBitsPerElt); else Val = APInt::getSignedMaxValue(DstBitsPerElt); } else { // PACKUS: Truncate signed value with unsigned saturation. // Source values less than zero are saturated to zero. // Source values greater than dst maxuint are saturated to maxuint. if (Val.isIntN(DstBitsPerElt)) Val = Val.trunc(DstBitsPerElt); else if (Val.isNegative()) Val = APInt::getNullValue(DstBitsPerElt); else Val = APInt::getAllOnesValue(DstBitsPerElt); } Bits[Lane * NumDstEltsPerLane + Elt] = Val; } } return getConstVector(Bits, Undefs, VT.getSimpleVT(), DAG, SDLoc(N)); } // Try to combine a PACKUSWB/PACKSSWB implemented truncate with a regular // truncate to create a larger truncate. if (Subtarget.hasAVX512() && N0.getOpcode() == ISD::TRUNCATE && N1.isUndef() && VT == MVT::v16i8 && N0.getOperand(0).getValueType() == MVT::v8i32) { if ((IsSigned && DAG.ComputeNumSignBits(N0) > 8) || (!IsSigned && DAG.MaskedValueIsZero(N0, APInt::getHighBitsSet(16, 8)))) { if (Subtarget.hasVLX()) return DAG.getNode(X86ISD::VTRUNC, SDLoc(N), VT, N0.getOperand(0)); // Widen input to v16i32 so we can truncate that. SDLoc dl(N); SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i32, N0.getOperand(0), DAG.getUNDEF(MVT::v8i32)); return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Concat); } } // Attempt to combine as shuffle. SDValue Op(N, 0); if (SDValue Res = combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1, /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget)) return Res; return SDValue(); } static SDValue combineVectorShiftVar(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { assert((X86ISD::VSHL == N->getOpcode() || X86ISD::VSRA == N->getOpcode() || X86ISD::VSRL == N->getOpcode()) && "Unexpected shift opcode"); EVT VT = N->getValueType(0); // Shift zero -> zero. if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode())) return DAG.getConstant(0, SDLoc(N), VT); APInt KnownUndef, KnownZero; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef, KnownZero, DCI)) return SDValue(N, 0); return SDValue(); } static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { unsigned Opcode = N->getOpcode(); assert((X86ISD::VSHLI == Opcode || X86ISD::VSRAI == Opcode || X86ISD::VSRLI == Opcode) && "Unexpected shift opcode"); bool LogicalShift = X86ISD::VSHLI == Opcode || X86ISD::VSRLI == Opcode; EVT VT = N->getValueType(0); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); unsigned NumBitsPerElt = VT.getScalarSizeInBits(); assert(VT == N0.getValueType() && (NumBitsPerElt % 8) == 0 && "Unexpected value type"); assert(N1.getValueType() == MVT::i8 && "Unexpected shift amount type"); // Out of range logical bit shifts are guaranteed to be zero. // Out of range arithmetic bit shifts splat the sign bit. unsigned ShiftVal = cast(N1)->getZExtValue(); if (ShiftVal >= NumBitsPerElt) { if (LogicalShift) return DAG.getConstant(0, SDLoc(N), VT); else ShiftVal = NumBitsPerElt - 1; } // Shift N0 by zero -> N0. if (!ShiftVal) return N0; // Shift zero -> zero. if (ISD::isBuildVectorAllZeros(N0.getNode())) return DAG.getConstant(0, SDLoc(N), VT); // Fold (VSRAI (VSRAI X, C1), C2) --> (VSRAI X, (C1 + C2)) with (C1 + C2) // clamped to (NumBitsPerElt - 1). if (Opcode == X86ISD::VSRAI && N0.getOpcode() == X86ISD::VSRAI) { unsigned ShiftVal2 = cast(N0.getOperand(1))->getZExtValue(); unsigned NewShiftVal = ShiftVal + ShiftVal2; if (NewShiftVal >= NumBitsPerElt) NewShiftVal = NumBitsPerElt - 1; return DAG.getNode(X86ISD::VSRAI, SDLoc(N), VT, N0.getOperand(0), DAG.getConstant(NewShiftVal, SDLoc(N), MVT::i8)); } // We can decode 'whole byte' logical bit shifts as shuffles. if (LogicalShift && (ShiftVal % 8) == 0) { SDValue Op(N, 0); if (SDValue Res = combineX86ShufflesRecursively( {Op}, 0, Op, {0}, {}, /*Depth*/ 1, /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget)) return Res; } // Constant Folding. APInt UndefElts; SmallVector EltBits; if (N->isOnlyUserOf(N0.getNode()) && getTargetConstantBitsFromNode(N0, NumBitsPerElt, UndefElts, EltBits)) { assert(EltBits.size() == VT.getVectorNumElements() && "Unexpected shift value type"); for (APInt &Elt : EltBits) { if (X86ISD::VSHLI == Opcode) Elt <<= ShiftVal; else if (X86ISD::VSRAI == Opcode) Elt.ashrInPlace(ShiftVal); else Elt.lshrInPlace(ShiftVal); } return getConstVector(EltBits, UndefElts, VT.getSimpleVT(), DAG, SDLoc(N)); } const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnesValue(NumBitsPerElt), DCI)) return SDValue(N, 0); return SDValue(); } static SDValue combineVectorInsert(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { assert( ((N->getOpcode() == X86ISD::PINSRB && N->getValueType(0) == MVT::v16i8) || (N->getOpcode() == X86ISD::PINSRW && N->getValueType(0) == MVT::v8i16)) && "Unexpected vector insertion"); // Attempt to combine PINSRB/PINSRW patterns to a shuffle. SDValue Op(N, 0); if (SDValue Res = combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1, /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget)) return Res; return SDValue(); } /// Recognize the distinctive (AND (setcc ...) (setcc ..)) where both setccs /// reference the same FP CMP, and rewrite for CMPEQSS and friends. Likewise for /// OR -> CMPNEQSS. static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { unsigned opcode; // SSE1 supports CMP{eq|ne}SS, and SSE2 added CMP{eq|ne}SD, but // we're requiring SSE2 for both. if (Subtarget.hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue CMP0 = N0->getOperand(1); SDValue CMP1 = N1->getOperand(1); SDLoc DL(N); // The SETCCs should both refer to the same CMP. if (CMP0.getOpcode() != X86ISD::CMP || CMP0 != CMP1) return SDValue(); SDValue CMP00 = CMP0->getOperand(0); SDValue CMP01 = CMP0->getOperand(1); EVT VT = CMP00.getValueType(); if (VT == MVT::f32 || VT == MVT::f64) { bool ExpectingFlags = false; // Check for any users that want flags: for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); !ExpectingFlags && UI != UE; ++UI) switch (UI->getOpcode()) { default: case ISD::BR_CC: case ISD::BRCOND: case ISD::SELECT: ExpectingFlags = true; break; case ISD::CopyToReg: case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: break; } if (!ExpectingFlags) { enum X86::CondCode cc0 = (enum X86::CondCode)N0.getConstantOperandVal(0); enum X86::CondCode cc1 = (enum X86::CondCode)N1.getConstantOperandVal(0); if (cc1 == X86::COND_E || cc1 == X86::COND_NE) { X86::CondCode tmp = cc0; cc0 = cc1; cc1 = tmp; } if ((cc0 == X86::COND_E && cc1 == X86::COND_NP) || (cc0 == X86::COND_NE && cc1 == X86::COND_P)) { // FIXME: need symbolic constants for these magic numbers. // See X86ATTInstPrinter.cpp:printSSECC(). unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4; if (Subtarget.hasAVX512()) { SDValue FSetCC = DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CMP00, CMP01, DAG.getConstant(x86cc, DL, MVT::i8)); // Need to fill with zeros to ensure the bitcast will produce zeroes // for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee that. SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v16i1, DAG.getConstant(0, DL, MVT::v16i1), FSetCC, DAG.getIntPtrConstant(0, DL)); return DAG.getZExtOrTrunc(DAG.getBitcast(MVT::i16, Ins), DL, N->getSimpleValueType(0)); } SDValue OnesOrZeroesF = DAG.getNode(X86ISD::FSETCC, DL, CMP00.getValueType(), CMP00, CMP01, DAG.getConstant(x86cc, DL, MVT::i8)); bool is64BitFP = (CMP00.getValueType() == MVT::f64); MVT IntVT = is64BitFP ? MVT::i64 : MVT::i32; if (is64BitFP && !Subtarget.is64Bit()) { // On a 32-bit target, we cannot bitcast the 64-bit float to a // 64-bit integer, since that's not a legal type. Since // OnesOrZeroesF is all ones of all zeroes, we don't need all the // bits, but can do this little dance to extract the lowest 32 bits // and work with those going forward. SDValue Vector64 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, OnesOrZeroesF); SDValue Vector32 = DAG.getBitcast(MVT::v4f32, Vector64); OnesOrZeroesF = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Vector32, DAG.getIntPtrConstant(0, DL)); IntVT = MVT::i32; } SDValue OnesOrZeroesI = DAG.getBitcast(IntVT, OnesOrZeroesF); SDValue ANDed = DAG.getNode(ISD::AND, DL, IntVT, OnesOrZeroesI, DAG.getConstant(1, DL, IntVT)); SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ANDed); return OneBitOfTruth; } } } } return SDValue(); } /// Try to fold: (and (xor X, -1), Y) -> (andnp X, Y). static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) { assert(N->getOpcode() == ISD::AND); MVT VT = N->getSimpleValueType(0); if (!VT.is128BitVector() && !VT.is256BitVector() && !VT.is512BitVector()) return SDValue(); SDValue X, Y; SDValue N0 = peekThroughBitcasts(N->getOperand(0)); SDValue N1 = peekThroughBitcasts(N->getOperand(1)); if (N0.getOpcode() == ISD::XOR && ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode())) { X = N0.getOperand(0); Y = N1; } else if (N1.getOpcode() == ISD::XOR && ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode())) { X = N1.getOperand(0); Y = N0; } else return SDValue(); X = DAG.getBitcast(VT, X); Y = DAG.getBitcast(VT, Y); return DAG.getNode(X86ISD::ANDNP, SDLoc(N), VT, X, Y); } // On AVX/AVX2 the type v8i1 is legalized to v8i16, which is an XMM sized // register. In most cases we actually compare or select YMM-sized registers // and mixing the two types creates horrible code. This method optimizes // some of the transition sequences. // Even with AVX-512 this is still useful for removing casts around logical // operations on vXi1 mask types. static SDValue PromoteMaskArithmetic(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { EVT VT = N->getValueType(0); assert(VT.isVector() && "Expected vector type"); assert((N->getOpcode() == ISD::ANY_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::SIGN_EXTEND) && "Invalid Node"); SDValue Narrow = N->getOperand(0); EVT NarrowVT = Narrow.getValueType(); if (Narrow->getOpcode() != ISD::XOR && Narrow->getOpcode() != ISD::AND && Narrow->getOpcode() != ISD::OR) return SDValue(); SDValue N0 = Narrow->getOperand(0); SDValue N1 = Narrow->getOperand(1); SDLoc DL(Narrow); // The Left side has to be a trunc. if (N0.getOpcode() != ISD::TRUNCATE) return SDValue(); // The type of the truncated inputs. if (N0->getOperand(0).getValueType() != VT) return SDValue(); // The right side has to be a 'trunc' or a constant vector. bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getValueType() == VT; if (!RHSTrunc && !ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) return SDValue(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!TLI.isOperationLegalOrPromote(Narrow->getOpcode(), VT)) return SDValue(); // Set N0 and N1 to hold the inputs to the new wide operation. N0 = N0->getOperand(0); if (RHSTrunc) N1 = N1->getOperand(0); else N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N1); // Generate the wide operation. SDValue Op = DAG.getNode(Narrow->getOpcode(), DL, VT, N0, N1); unsigned Opcode = N->getOpcode(); switch (Opcode) { default: llvm_unreachable("Unexpected opcode"); case ISD::ANY_EXTEND: return Op; case ISD::ZERO_EXTEND: return DAG.getZeroExtendInReg(Op, DL, NarrowVT.getScalarType()); case ISD::SIGN_EXTEND: return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op, DAG.getValueType(NarrowVT)); } } /// If both input operands of a logic op are being cast from floating point /// types, try to convert this into a floating point logic node to avoid /// unnecessary moves from SSE to integer registers. static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { unsigned FPOpcode = ISD::DELETED_NODE; if (N->getOpcode() == ISD::AND) FPOpcode = X86ISD::FAND; else if (N->getOpcode() == ISD::OR) FPOpcode = X86ISD::FOR; else if (N->getOpcode() == ISD::XOR) FPOpcode = X86ISD::FXOR; assert(FPOpcode != ISD::DELETED_NODE && "Unexpected input node for FP logic conversion"); EVT VT = N->getValueType(0); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDLoc DL(N); if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST && ((Subtarget.hasSSE1() && VT == MVT::i32) || (Subtarget.hasSSE2() && VT == MVT::i64))) { SDValue N00 = N0.getOperand(0); SDValue N10 = N1.getOperand(0); EVT N00Type = N00.getValueType(); EVT N10Type = N10.getValueType(); if (N00Type.isFloatingPoint() && N10Type.isFloatingPoint()) { SDValue FPLogic = DAG.getNode(FPOpcode, DL, N00Type, N00, N10); return DAG.getBitcast(VT, FPLogic); } } return SDValue(); } /// If this is a zero/all-bits result that is bitwise-anded with a low bits /// mask. (Mask == 1 for the x86 lowering of a SETCC + ZEXT), replace the 'and' /// with a shift-right to eliminate loading the vector constant mask value. static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { SDValue Op0 = peekThroughBitcasts(N->getOperand(0)); SDValue Op1 = peekThroughBitcasts(N->getOperand(1)); EVT VT0 = Op0.getValueType(); EVT VT1 = Op1.getValueType(); if (VT0 != VT1 || !VT0.isSimple() || !VT0.isInteger()) return SDValue(); APInt SplatVal; if (!ISD::isConstantSplatVector(Op1.getNode(), SplatVal) || !SplatVal.isMask()) return SDValue(); // Don't prevent creation of ANDN. if (isBitwiseNot(Op0)) return SDValue(); if (!SupportedVectorShiftWithImm(VT0.getSimpleVT(), Subtarget, ISD::SRL)) return SDValue(); unsigned EltBitWidth = VT0.getScalarSizeInBits(); if (EltBitWidth != DAG.ComputeNumSignBits(Op0)) return SDValue(); SDLoc DL(N); unsigned ShiftVal = SplatVal.countTrailingOnes(); SDValue ShAmt = DAG.getConstant(EltBitWidth - ShiftVal, DL, MVT::i8); SDValue Shift = DAG.getNode(X86ISD::VSRLI, DL, VT0, Op0, ShAmt); return DAG.getBitcast(N->getValueType(0), Shift); } // Get the index node from the lowered DAG of a GEP IR instruction with one // indexing dimension. static SDValue getIndexFromUnindexedLoad(LoadSDNode *Ld) { if (Ld->isIndexed()) return SDValue(); SDValue Base = Ld->getBasePtr(); if (Base.getOpcode() != ISD::ADD) return SDValue(); SDValue ShiftedIndex = Base.getOperand(0); if (ShiftedIndex.getOpcode() != ISD::SHL) return SDValue(); return ShiftedIndex.getOperand(0); } static bool hasBZHI(const X86Subtarget &Subtarget, MVT VT) { if (Subtarget.hasBMI2() && VT.isScalarInteger()) { switch (VT.getSizeInBits()) { default: return false; case 64: return Subtarget.is64Bit() ? true : false; case 32: return true; } } return false; } // This function recognizes cases where X86 bzhi instruction can replace and // 'and-load' sequence. // In case of loading integer value from an array of constants which is defined // as follows: // // int array[SIZE] = {0x0, 0x1, 0x3, 0x7, 0xF ..., 2^(SIZE-1) - 1} // // then applying a bitwise and on the result with another input. // It's equivalent to performing bzhi (zero high bits) on the input, with the // same index of the load. static SDValue combineAndLoadToBZHI(SDNode *Node, SelectionDAG &DAG, const X86Subtarget &Subtarget) { MVT VT = Node->getSimpleValueType(0); SDLoc dl(Node); // Check if subtarget has BZHI instruction for the node's type if (!hasBZHI(Subtarget, VT)) return SDValue(); // Try matching the pattern for both operands. for (unsigned i = 0; i < 2; i++) { SDValue N = Node->getOperand(i); LoadSDNode *Ld = dyn_cast(N.getNode()); // continue if the operand is not a load instruction if (!Ld) return SDValue(); const Value *MemOp = Ld->getMemOperand()->getValue(); if (!MemOp) return SDValue(); if (const GetElementPtrInst *GEP = dyn_cast(MemOp)) { if (GlobalVariable *GV = dyn_cast(GEP->getOperand(0))) { if (GV->isConstant() && GV->hasDefinitiveInitializer()) { Constant *Init = GV->getInitializer(); Type *Ty = Init->getType(); if (!isa(Init) || !Ty->getArrayElementType()->isIntegerTy() || Ty->getArrayElementType()->getScalarSizeInBits() != VT.getSizeInBits() || Ty->getArrayNumElements() > Ty->getArrayElementType()->getScalarSizeInBits()) continue; // Check if the array's constant elements are suitable to our case. uint64_t ArrayElementCount = Init->getType()->getArrayNumElements(); bool ConstantsMatch = true; for (uint64_t j = 0; j < ArrayElementCount; j++) { ConstantInt *Elem = dyn_cast(Init->getAggregateElement(j)); if (Elem->getZExtValue() != (((uint64_t)1 << j) - 1)) { ConstantsMatch = false; break; } } if (!ConstantsMatch) continue; // Do the transformation (For 32-bit type): // -> (and (load arr[idx]), inp) // <- (and (srl 0xFFFFFFFF, (sub 32, idx))) // that will be replaced with one bzhi instruction. SDValue Inp = (i == 0) ? Node->getOperand(1) : Node->getOperand(0); SDValue SizeC = DAG.getConstant(VT.getSizeInBits(), dl, MVT::i32); // Get the Node which indexes into the array. SDValue Index = getIndexFromUnindexedLoad(Ld); if (!Index) return SDValue(); Index = DAG.getZExtOrTrunc(Index, dl, MVT::i32); SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, SizeC, Index); Sub = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Sub); SDValue AllOnes = DAG.getAllOnesConstant(dl, VT); SDValue LShr = DAG.getNode(ISD::SRL, dl, VT, AllOnes, Sub); return DAG.getNode(ISD::AND, dl, VT, Inp, LShr); } } } } return SDValue(); } // Look for (and (ctpop X), 1) which is the IR form of __builtin_parity. // Turn it into series of XORs and a setnp. static SDValue combineParity(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { EVT VT = N->getValueType(0); // We only support 64-bit and 32-bit. 64-bit requires special handling // unless the 64-bit popcnt instruction is legal. if (VT != MVT::i32 && VT != MVT::i64) return SDValue(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.isTypeLegal(VT) && TLI.isOperationLegal(ISD::CTPOP, VT)) return SDValue(); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); // LHS needs to be a single use CTPOP. if (N0.getOpcode() != ISD::CTPOP || !N0.hasOneUse()) return SDValue(); // RHS needs to be 1. if (!isOneConstant(N1)) return SDValue(); SDLoc DL(N); SDValue X = N0.getOperand(0); // If this is 64-bit, its always best to xor the two 32-bit pieces together // even if we have popcnt. if (VT == MVT::i64) { SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, DAG.getNode(ISD::SRL, DL, VT, X, DAG.getConstant(32, DL, MVT::i8))); SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X); X = DAG.getNode(ISD::XOR, DL, MVT::i32, Lo, Hi); // Generate a 32-bit parity idiom. This will bring us back here if we need // to expand it too. SDValue Parity = DAG.getNode(ISD::AND, DL, MVT::i32, DAG.getNode(ISD::CTPOP, DL, MVT::i32, X), DAG.getConstant(1, DL, MVT::i32)); return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Parity); } assert(VT == MVT::i32 && "Unexpected VT!"); // Xor the high and low 16-bits together using a 32-bit operation. SDValue Hi16 = DAG.getNode(ISD::SRL, DL, VT, X, DAG.getConstant(16, DL, MVT::i8)); X = DAG.getNode(ISD::XOR, DL, VT, X, Hi16); // Finally xor the low 2 bytes together and use a 8-bit flag setting xor. // This should allow an h-reg to be used to save a shift. // FIXME: We only get an h-reg in 32-bit mode. SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, DAG.getNode(ISD::SRL, DL, VT, X, DAG.getConstant(8, DL, MVT::i8))); SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, X); SDVTList VTs = DAG.getVTList(MVT::i8, MVT::i32); SDValue Flags = DAG.getNode(X86ISD::XOR, DL, VTs, Lo, Hi).getValue(1); // Copy the inverse of the parity flag into a register with setcc. SDValue Setnp = getSETCC(X86::COND_NP, Flags, DL, DAG); // Zero extend to original type. return DAG.getNode(ISD::ZERO_EXTEND, DL, N->getValueType(0), Setnp); } static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { EVT VT = N->getValueType(0); // If this is SSE1 only convert to FAND to avoid scalarization. if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32) { return DAG.getBitcast( MVT::v4i32, DAG.getNode(X86ISD::FAND, SDLoc(N), MVT::v4f32, DAG.getBitcast(MVT::v4f32, N->getOperand(0)), DAG.getBitcast(MVT::v4f32, N->getOperand(1)))); } // Use a 32-bit and+zext if upper bits known zero. if (VT == MVT::i64 && Subtarget.is64Bit() && !isa(N->getOperand(1))) { APInt HiMask = APInt::getHighBitsSet(64, 32); if (DAG.MaskedValueIsZero(N->getOperand(1), HiMask) || DAG.MaskedValueIsZero(N->getOperand(0), HiMask)) { SDLoc dl(N); SDValue LHS = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, N->getOperand(0)); SDValue RHS = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, N->getOperand(1)); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, DAG.getNode(ISD::AND, dl, MVT::i32, LHS, RHS)); } } // This must be done before legalization has expanded the ctpop. if (SDValue V = combineParity(N, DAG, Subtarget)) return V; if (DCI.isBeforeLegalizeOps()) return SDValue(); if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget)) return R; if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget)) return FPLogic; if (SDValue R = combineANDXORWithAllOnesIntoANDNP(N, DAG)) return R; if (SDValue ShiftRight = combineAndMaskToShift(N, DAG, Subtarget)) return ShiftRight; if (SDValue R = combineAndLoadToBZHI(N, DAG, Subtarget)) return R; // Attempt to recursively combine a bitmask AND with shuffles. if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) { SDValue Op(N, 0); if (SDValue Res = combineX86ShufflesRecursively( {Op}, 0, Op, {0}, {}, /*Depth*/ 1, /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget)) return Res; } // Attempt to combine a scalar bitmask AND with an extracted shuffle. if ((VT.getScalarSizeInBits() % 8) == 0 && N->getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT && isa(N->getOperand(0).getOperand(1))) { SDValue BitMask = N->getOperand(1); SDValue SrcVec = N->getOperand(0).getOperand(0); EVT SrcVecVT = SrcVec.getValueType(); // Check that the constant bitmask masks whole bytes. APInt UndefElts; SmallVector EltBits; if (VT == SrcVecVT.getScalarType() && N->getOperand(0)->isOnlyUserOf(SrcVec.getNode()) && getTargetConstantBitsFromNode(BitMask, 8, UndefElts, EltBits) && llvm::all_of(EltBits, [](APInt M) { return M.isNullValue() || M.isAllOnesValue(); })) { unsigned NumElts = SrcVecVT.getVectorNumElements(); unsigned Scale = SrcVecVT.getScalarSizeInBits() / 8; unsigned Idx = N->getOperand(0).getConstantOperandVal(1); // Create a root shuffle mask from the byte mask and the extracted index. SmallVector ShuffleMask(NumElts * Scale, SM_SentinelUndef); for (unsigned i = 0; i != Scale; ++i) { if (UndefElts[i]) continue; int VecIdx = Scale * Idx + i; ShuffleMask[VecIdx] = EltBits[i].isNullValue() ? SM_SentinelZero : VecIdx; } if (SDValue Shuffle = combineX86ShufflesRecursively( {SrcVec}, 0, SrcVec, ShuffleMask, {}, /*Depth*/ 2, /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget)) return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), VT, Shuffle, N->getOperand(0).getOperand(1)); } } return SDValue(); } // Try to match OR(AND(~MASK,X),AND(MASK,Y)) logic pattern. static bool matchLogicBlend(SDNode *N, SDValue &X, SDValue &Y, SDValue &Mask) { if (N->getOpcode() != ISD::OR) return false; SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); // Canonicalize AND to LHS. if (N1.getOpcode() == ISD::AND) std::swap(N0, N1); // Attempt to match OR(AND(M,Y),ANDNP(M,X)). if (N0.getOpcode() != ISD::AND || N1.getOpcode() != X86ISD::ANDNP) return false; Mask = N1.getOperand(0); X = N1.getOperand(1); // Check to see if the mask appeared in both the AND and ANDNP. if (N0.getOperand(0) == Mask) Y = N0.getOperand(1); else if (N0.getOperand(1) == Mask) Y = N0.getOperand(0); else return false; // TODO: Attempt to match against AND(XOR(-1,M),Y) as well, waiting for // ANDNP combine allows other combines to happen that prevent matching. return true; } // Try to fold: // (or (and (m, y), (pandn m, x))) // into: // (vselect m, x, y) // As a special case, try to fold: // (or (and (m, (sub 0, x)), (pandn m, x))) // into: // (sub (xor X, M), M) static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { assert(N->getOpcode() == ISD::OR && "Unexpected Opcode"); EVT VT = N->getValueType(0); if (!((VT.is128BitVector() && Subtarget.hasSSE2()) || (VT.is256BitVector() && Subtarget.hasInt256()))) return SDValue(); SDValue X, Y, Mask; if (!matchLogicBlend(N, X, Y, Mask)) return SDValue(); // Validate that X, Y, and Mask are bitcasts, and see through them. Mask = peekThroughBitcasts(Mask); X = peekThroughBitcasts(X); Y = peekThroughBitcasts(Y); EVT MaskVT = Mask.getValueType(); unsigned EltBits = MaskVT.getScalarSizeInBits(); // TODO: Attempt to handle floating point cases as well? if (!MaskVT.isInteger() || DAG.ComputeNumSignBits(Mask) != EltBits) return SDValue(); SDLoc DL(N); // Try to match: // (or (and (M, (sub 0, X)), (pandn M, X))) // which is a special case of vselect: // (vselect M, (sub 0, X), X) // Per: // http://graphics.stanford.edu/~seander/bithacks.html#ConditionalNegate // We know that, if fNegate is 0 or 1: // (fNegate ? -v : v) == ((v ^ -fNegate) + fNegate) // // Here, we have a mask, M (all 1s or 0), and, similarly, we know that: // ((M & 1) ? -X : X) == ((X ^ -(M & 1)) + (M & 1)) // ( M ? -X : X) == ((X ^ M ) + (M & 1)) // This lets us transform our vselect to: // (add (xor X, M), (and M, 1)) // And further to: // (sub (xor X, M), M) if (X.getValueType() == MaskVT && Y.getValueType() == MaskVT && DAG.getTargetLoweringInfo().isOperationLegal(ISD::SUB, MaskVT)) { auto IsNegV = [](SDNode *N, SDValue V) { return N->getOpcode() == ISD::SUB && N->getOperand(1) == V && ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()); }; SDValue V; if (IsNegV(Y.getNode(), X)) V = X; else if (IsNegV(X.getNode(), Y)) V = Y; if (V) { SDValue SubOp1 = DAG.getNode(ISD::XOR, DL, MaskVT, V, Mask); SDValue SubOp2 = Mask; // If the negate was on the false side of the select, then // the operands of the SUB need to be swapped. PR 27251. // This is because the pattern being matched above is // (vselect M, (sub (0, X), X) -> (sub (xor X, M), M) // but if the pattern matched was // (vselect M, X, (sub (0, X))), that is really negation of the pattern // above, -(vselect M, (sub 0, X), X), and therefore the replacement // pattern also needs to be a negation of the replacement pattern above. // And -(sub X, Y) is just sub (Y, X), so swapping the operands of the // sub accomplishes the negation of the replacement pattern. if (V == Y) std::swap(SubOp1, SubOp2); SDValue Res = DAG.getNode(ISD::SUB, DL, MaskVT, SubOp1, SubOp2); return DAG.getBitcast(VT, Res); } } // PBLENDVB is only available on SSE 4.1. if (!Subtarget.hasSSE41()) return SDValue(); MVT BlendVT = VT.is256BitVector() ? MVT::v32i8 : MVT::v16i8; X = DAG.getBitcast(BlendVT, X); Y = DAG.getBitcast(BlendVT, Y); Mask = DAG.getBitcast(BlendVT, Mask); Mask = DAG.getSelect(DL, BlendVT, Mask, Y, X); return DAG.getBitcast(VT, Mask); } // Helper function for combineOrCmpEqZeroToCtlzSrl // Transforms: // seteq(cmp x, 0) // into: // srl(ctlz x), log2(bitsize(x)) // Input pattern is checked by caller. static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, EVT ExtTy, SelectionDAG &DAG) { SDValue Cmp = Op.getOperand(1); EVT VT = Cmp.getOperand(0).getValueType(); unsigned Log2b = Log2_32(VT.getSizeInBits()); SDLoc dl(Op); SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Cmp->getOperand(0)); // The result of the shift is true or false, and on X86, the 32-bit // encoding of shr and lzcnt is more desirable. SDValue Trunc = DAG.getZExtOrTrunc(Clz, dl, MVT::i32); SDValue Scc = DAG.getNode(ISD::SRL, dl, MVT::i32, Trunc, DAG.getConstant(Log2b, dl, MVT::i8)); return DAG.getZExtOrTrunc(Scc, dl, ExtTy); } // Try to transform: // zext(or(setcc(eq, (cmp x, 0)), setcc(eq, (cmp y, 0)))) // into: // srl(or(ctlz(x), ctlz(y)), log2(bitsize(x)) // Will also attempt to match more generic cases, eg: // zext(or(or(setcc(eq, cmp 0), setcc(eq, cmp 0)), setcc(eq, cmp 0))) // Only applies if the target supports the FastLZCNT feature. static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { if (DCI.isBeforeLegalize() || !Subtarget.getTargetLowering()->isCtlzFast()) return SDValue(); auto isORCandidate = [](SDValue N) { return (N->getOpcode() == ISD::OR && N->hasOneUse()); }; // Check the zero extend is extending to 32-bit or more. The code generated by // srl(ctlz) for 16-bit or less variants of the pattern would require extra // instructions to clear the upper bits. if (!N->hasOneUse() || !N->getSimpleValueType(0).bitsGE(MVT::i32) || !isORCandidate(N->getOperand(0))) return SDValue(); // Check the node matches: setcc(eq, cmp 0) auto isSetCCCandidate = [](SDValue N) { return N->getOpcode() == X86ISD::SETCC && N->hasOneUse() && X86::CondCode(N->getConstantOperandVal(0)) == X86::COND_E && N->getOperand(1).getOpcode() == X86ISD::CMP && isNullConstant(N->getOperand(1).getOperand(1)) && N->getOperand(1).getValueType().bitsGE(MVT::i32); }; SDNode *OR = N->getOperand(0).getNode(); SDValue LHS = OR->getOperand(0); SDValue RHS = OR->getOperand(1); // Save nodes matching or(or, setcc(eq, cmp 0)). SmallVector ORNodes; while (((isORCandidate(LHS) && isSetCCCandidate(RHS)) || (isORCandidate(RHS) && isSetCCCandidate(LHS)))) { ORNodes.push_back(OR); OR = (LHS->getOpcode() == ISD::OR) ? LHS.getNode() : RHS.getNode(); LHS = OR->getOperand(0); RHS = OR->getOperand(1); } // The last OR node should match or(setcc(eq, cmp 0), setcc(eq, cmp 0)). if (!(isSetCCCandidate(LHS) && isSetCCCandidate(RHS)) || !isORCandidate(SDValue(OR, 0))) return SDValue(); // We have a or(setcc(eq, cmp 0), setcc(eq, cmp 0)) pattern, try to lower it // to // or(srl(ctlz),srl(ctlz)). // The dag combiner can then fold it into: // srl(or(ctlz, ctlz)). EVT VT = OR->getValueType(0); SDValue NewLHS = lowerX86CmpEqZeroToCtlzSrl(LHS, VT, DAG); SDValue Ret, NewRHS; if (NewLHS && (NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG))) Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, NewLHS, NewRHS); if (!Ret) return SDValue(); // Try to lower nodes matching the or(or, setcc(eq, cmp 0)) pattern. while (ORNodes.size() > 0) { OR = ORNodes.pop_back_val(); LHS = OR->getOperand(0); RHS = OR->getOperand(1); // Swap rhs with lhs to match or(setcc(eq, cmp, 0), or). if (RHS->getOpcode() == ISD::OR) std::swap(LHS, RHS); EVT VT = OR->getValueType(0); SDValue NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG); if (!NewRHS) return SDValue(); Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, Ret, NewRHS); } if (Ret) Ret = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret); return Ret; } static SDValue combineOr(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); // If this is SSE1 only convert to FOR to avoid scalarization. if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32) { return DAG.getBitcast(MVT::v4i32, DAG.getNode(X86ISD::FOR, SDLoc(N), MVT::v4f32, DAG.getBitcast(MVT::v4f32, N0), DAG.getBitcast(MVT::v4f32, N1))); } if (DCI.isBeforeLegalizeOps()) return SDValue(); if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget)) return R; if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget)) return FPLogic; if (SDValue R = combineLogicBlendIntoPBLENDV(N, DAG, Subtarget)) return R; // Attempt to recursively combine an OR of shuffles. if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) { SDValue Op(N, 0); if (SDValue Res = combineX86ShufflesRecursively( {Op}, 0, Op, {0}, {}, /*Depth*/ 1, /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget)) return Res; } if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64) return SDValue(); // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c) bool OptForSize = DAG.getMachineFunction().getFunction().optForSize(); unsigned Bits = VT.getScalarSizeInBits(); // SHLD/SHRD instructions have lower register pressure, but on some // platforms they have higher latency than the equivalent // series of shifts/or that would otherwise be generated. // Don't fold (or (x << c) | (y >> (64 - c))) if SHLD/SHRD instructions // have higher latencies and we are not optimizing for size. if (!OptForSize && Subtarget.isSHLDSlow()) return SDValue(); if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) std::swap(N0, N1); if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) return SDValue(); if (!N0.hasOneUse() || !N1.hasOneUse()) return SDValue(); SDValue ShAmt0 = N0.getOperand(1); if (ShAmt0.getValueType() != MVT::i8) return SDValue(); SDValue ShAmt1 = N1.getOperand(1); if (ShAmt1.getValueType() != MVT::i8) return SDValue(); // Peek through any modulo shift masks. SDValue ShMsk0; if (ShAmt0.getOpcode() == ISD::AND && isa(ShAmt0.getOperand(1)) && ShAmt0.getConstantOperandVal(1) == (Bits - 1)) { ShMsk0 = ShAmt0; ShAmt0 = ShAmt0.getOperand(0); } SDValue ShMsk1; if (ShAmt1.getOpcode() == ISD::AND && isa(ShAmt1.getOperand(1)) && ShAmt1.getConstantOperandVal(1) == (Bits - 1)) { ShMsk1 = ShAmt1; ShAmt1 = ShAmt1.getOperand(0); } if (ShAmt0.getOpcode() == ISD::TRUNCATE) ShAmt0 = ShAmt0.getOperand(0); if (ShAmt1.getOpcode() == ISD::TRUNCATE) ShAmt1 = ShAmt1.getOperand(0); SDLoc DL(N); unsigned Opc = X86ISD::SHLD; SDValue Op0 = N0.getOperand(0); SDValue Op1 = N1.getOperand(0); if (ShAmt0.getOpcode() == ISD::SUB || ShAmt0.getOpcode() == ISD::XOR) { Opc = X86ISD::SHRD; std::swap(Op0, Op1); std::swap(ShAmt0, ShAmt1); std::swap(ShMsk0, ShMsk1); } // OR( SHL( X, C ), SRL( Y, 32 - C ) ) -> SHLD( X, Y, C ) // OR( SRL( X, C ), SHL( Y, 32 - C ) ) -> SHRD( X, Y, C ) // OR( SHL( X, C ), SRL( SRL( Y, 1 ), XOR( C, 31 ) ) ) -> SHLD( X, Y, C ) // OR( SRL( X, C ), SHL( SHL( Y, 1 ), XOR( C, 31 ) ) ) -> SHRD( X, Y, C ) // OR( SHL( X, AND( C, 31 ) ), SRL( Y, AND( 0 - C, 31 ) ) ) -> SHLD( X, Y, C ) // OR( SRL( X, AND( C, 31 ) ), SHL( Y, AND( 0 - C, 31 ) ) ) -> SHRD( X, Y, C ) if (ShAmt1.getOpcode() == ISD::SUB) { SDValue Sum = ShAmt1.getOperand(0); if (auto *SumC = dyn_cast(Sum)) { SDValue ShAmt1Op1 = ShAmt1.getOperand(1); if (ShAmt1Op1.getOpcode() == ISD::TRUNCATE) ShAmt1Op1 = ShAmt1Op1.getOperand(0); if ((SumC->getAPIntValue() == Bits || (SumC->getAPIntValue() == 0 && ShMsk1)) && ShAmt1Op1 == ShAmt0) return DAG.getNode(Opc, DL, VT, Op0, Op1, DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0)); } } else if (auto *ShAmt1C = dyn_cast(ShAmt1)) { auto *ShAmt0C = dyn_cast(ShAmt0); if (ShAmt0C && (ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue()) == Bits) return DAG.getNode(Opc, DL, VT, N0.getOperand(0), N1.getOperand(0), DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0)); } else if (ShAmt1.getOpcode() == ISD::XOR) { SDValue Mask = ShAmt1.getOperand(1); if (auto *MaskC = dyn_cast(Mask)) { unsigned InnerShift = (X86ISD::SHLD == Opc ? ISD::SRL : ISD::SHL); SDValue ShAmt1Op0 = ShAmt1.getOperand(0); if (ShAmt1Op0.getOpcode() == ISD::TRUNCATE) ShAmt1Op0 = ShAmt1Op0.getOperand(0); if (MaskC->getSExtValue() == (Bits - 1) && (ShAmt1Op0 == ShAmt0 || ShAmt1Op0 == ShMsk0)) { if (Op1.getOpcode() == InnerShift && isa(Op1.getOperand(1)) && Op1.getConstantOperandVal(1) == 1) { return DAG.getNode(Opc, DL, VT, Op0, Op1.getOperand(0), DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0)); } // Test for ADD( Y, Y ) as an equivalent to SHL( Y, 1 ). if (InnerShift == ISD::SHL && Op1.getOpcode() == ISD::ADD && Op1.getOperand(0) == Op1.getOperand(1)) { return DAG.getNode(Opc, DL, VT, Op0, Op1.getOperand(0), DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0)); } } } } return SDValue(); } /// Try to turn tests against the signbit in the form of: /// XOR(TRUNCATE(SRL(X, size(X)-1)), 1) /// into: /// SETGT(X, -1) static SDValue foldXorTruncShiftIntoCmp(SDNode *N, SelectionDAG &DAG) { // This is only worth doing if the output type is i8 or i1. EVT ResultType = N->getValueType(0); if (ResultType != MVT::i8 && ResultType != MVT::i1) return SDValue(); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); // We should be performing an xor against a truncated shift. if (N0.getOpcode() != ISD::TRUNCATE || !N0.hasOneUse()) return SDValue(); // Make sure we are performing an xor against one. if (!isOneConstant(N1)) return SDValue(); // SetCC on x86 zero extends so only act on this if it's a logical shift. SDValue Shift = N0.getOperand(0); if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse()) return SDValue(); // Make sure we are truncating from one of i16, i32 or i64. EVT ShiftTy = Shift.getValueType(); if (ShiftTy != MVT::i16 && ShiftTy != MVT::i32 && ShiftTy != MVT::i64) return SDValue(); // Make sure the shift amount extracts the sign bit. if (!isa(Shift.getOperand(1)) || Shift.getConstantOperandVal(1) != ShiftTy.getSizeInBits() - 1) return SDValue(); // Create a greater-than comparison against -1. // N.B. Using SETGE against 0 works but we want a canonical looking // comparison, using SETGT matches up with what TranslateX86CC. SDLoc DL(N); SDValue ShiftOp = Shift.getOperand(0); EVT ShiftOpTy = ShiftOp.getValueType(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT SetCCResultType = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ResultType); SDValue Cond = DAG.getSetCC(DL, SetCCResultType, ShiftOp, DAG.getConstant(-1, DL, ShiftOpTy), ISD::SETGT); if (SetCCResultType != ResultType) Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, ResultType, Cond); return Cond; } /// Turn vector tests of the signbit in the form of: /// xor (sra X, elt_size(X)-1), -1 /// into: /// pcmpgt X, -1 /// /// This should be called before type legalization because the pattern may not /// persist after that. static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { EVT VT = N->getValueType(0); if (!VT.isSimple()) return SDValue(); switch (VT.getSimpleVT().SimpleTy) { default: return SDValue(); case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: if (!Subtarget.hasSSE2()) return SDValue(); break; case MVT::v2i64: if (!Subtarget.hasSSE42()) return SDValue(); break; case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64: if (!Subtarget.hasAVX2()) return SDValue(); break; } // There must be a shift right algebraic before the xor, and the xor must be a // 'not' operation. SDValue Shift = N->getOperand(0); SDValue Ones = N->getOperand(1); if (Shift.getOpcode() != ISD::SRA || !Shift.hasOneUse() || !ISD::isBuildVectorAllOnes(Ones.getNode())) return SDValue(); // The shift should be smearing the sign bit across each vector element. auto *ShiftBV = dyn_cast(Shift.getOperand(1)); if (!ShiftBV) return SDValue(); EVT ShiftEltTy = Shift.getValueType().getVectorElementType(); auto *ShiftAmt = ShiftBV->getConstantSplatNode(); if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.getSizeInBits() - 1) return SDValue(); // Create a greater-than comparison against -1. We don't use the more obvious // greater-than-or-equal-to-zero because SSE/AVX don't have that instruction. return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones); } /// Check if truncation with saturation form type \p SrcVT to \p DstVT /// is valid for the given \p Subtarget. static bool isSATValidOnAVX512Subtarget(EVT SrcVT, EVT DstVT, const X86Subtarget &Subtarget) { if (!Subtarget.hasAVX512()) return false; // FIXME: Scalar type may be supported if we move it to vector register. if (!SrcVT.isVector()) return false; EVT SrcElVT = SrcVT.getScalarType(); EVT DstElVT = DstVT.getScalarType(); if (DstElVT != MVT::i8 && DstElVT != MVT::i16 && DstElVT != MVT::i32) return false; if (SrcVT.is512BitVector() || Subtarget.hasVLX()) return SrcElVT.getSizeInBits() >= 32 || Subtarget.hasBWI(); return false; } /// Detect patterns of truncation with unsigned saturation: /// /// 1. (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type). /// Return the source value x to be truncated or SDValue() if the pattern was /// not matched. /// /// 2. (truncate (smin (smax (x, C1), C2)) to dest_type), /// where C1 >= 0 and C2 is unsigned max of destination type. /// /// (truncate (smax (smin (x, C2), C1)) to dest_type) /// where C1 >= 0, C2 is unsigned max of destination type and C1 <= C2. /// /// These two patterns are equivalent to: /// (truncate (umin (smax(x, C1), unsigned_max_of_dest_type)) to dest_type) /// So return the smax(x, C1) value to be truncated or SDValue() if the /// pattern was not matched. static SDValue detectUSatPattern(SDValue In, EVT VT, SelectionDAG &DAG, const SDLoc &DL) { EVT InVT = In.getValueType(); // Saturation with truncation. We truncate from InVT to VT. assert(InVT.getScalarSizeInBits() > VT.getScalarSizeInBits() && "Unexpected types for truncate operation"); // Match min/max and return limit value as a parameter. auto MatchMinMax = [](SDValue V, unsigned Opcode, APInt &Limit) -> SDValue { if (V.getOpcode() == Opcode && ISD::isConstantSplatVector(V.getOperand(1).getNode(), Limit)) return V.getOperand(0); return SDValue(); }; APInt C1, C2; if (SDValue UMin = MatchMinMax(In, ISD::UMIN, C2)) // C2 should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according // the element size of the destination type. if (C2.isMask(VT.getScalarSizeInBits())) return UMin; if (SDValue SMin = MatchMinMax(In, ISD::SMIN, C2)) if (MatchMinMax(SMin, ISD::SMAX, C1)) if (C1.isNonNegative() && C2.isMask(VT.getScalarSizeInBits())) return SMin; if (SDValue SMax = MatchMinMax(In, ISD::SMAX, C1)) if (SDValue SMin = MatchMinMax(SMax, ISD::SMIN, C2)) if (C1.isNonNegative() && C2.isMask(VT.getScalarSizeInBits()) && C2.uge(C1)) { return DAG.getNode(ISD::SMAX, DL, InVT, SMin, In.getOperand(1)); } return SDValue(); } /// Detect patterns of truncation with signed saturation: /// (truncate (smin ((smax (x, signed_min_of_dest_type)), /// signed_max_of_dest_type)) to dest_type) /// or: /// (truncate (smax ((smin (x, signed_max_of_dest_type)), /// signed_min_of_dest_type)) to dest_type). /// With MatchPackUS, the smax/smin range is [0, unsigned_max_of_dest_type]. /// Return the source value to be truncated or SDValue() if the pattern was not /// matched. static SDValue detectSSatPattern(SDValue In, EVT VT, bool MatchPackUS = false) { unsigned NumDstBits = VT.getScalarSizeInBits(); unsigned NumSrcBits = In.getScalarValueSizeInBits(); assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation"); auto MatchMinMax = [](SDValue V, unsigned Opcode, const APInt &Limit) -> SDValue { APInt C; if (V.getOpcode() == Opcode && ISD::isConstantSplatVector(V.getOperand(1).getNode(), C) && C == Limit) return V.getOperand(0); return SDValue(); }; APInt SignedMax, SignedMin; if (MatchPackUS) { SignedMax = APInt::getAllOnesValue(NumDstBits).zext(NumSrcBits); SignedMin = APInt(NumSrcBits, 0); } else { SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits); SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits); } if (SDValue SMin = MatchMinMax(In, ISD::SMIN, SignedMax)) if (SDValue SMax = MatchMinMax(SMin, ISD::SMAX, SignedMin)) return SMax; if (SDValue SMax = MatchMinMax(In, ISD::SMAX, SignedMin)) if (SDValue SMin = MatchMinMax(SMax, ISD::SMIN, SignedMax)) return SMin; return SDValue(); } /// Detect a pattern of truncation with signed saturation. /// The types should allow to use VPMOVSS* instruction on AVX512. /// Return the source value to be truncated or SDValue() if the pattern was not /// matched. static SDValue detectAVX512SSatPattern(SDValue In, EVT VT, const X86Subtarget &Subtarget, const TargetLowering &TLI) { if (!TLI.isTypeLegal(In.getValueType())) return SDValue(); if (!isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget)) return SDValue(); return detectSSatPattern(In, VT); } /// Detect a pattern of truncation with saturation: /// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type). /// The types should allow to use VPMOVUS* instruction on AVX512. /// Return the source value to be truncated or SDValue() if the pattern was not /// matched. static SDValue detectAVX512USatPattern(SDValue In, EVT VT, SelectionDAG &DAG, const SDLoc &DL, const X86Subtarget &Subtarget, const TargetLowering &TLI) { if (!TLI.isTypeLegal(In.getValueType())) return SDValue(); if (!isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget)) return SDValue(); return detectUSatPattern(In, VT, DAG, DL); } static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget) { EVT SVT = VT.getScalarType(); EVT InVT = In.getValueType(); EVT InSVT = InVT.getScalarType(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.isTypeLegal(InVT) && TLI.isTypeLegal(VT) && isSATValidOnAVX512Subtarget(InVT, VT, Subtarget)) { if (auto SSatVal = detectSSatPattern(In, VT)) return DAG.getNode(X86ISD::VTRUNCS, DL, VT, SSatVal); if (auto USatVal = detectUSatPattern(In, VT, DAG, DL)) return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal); } if (VT.isVector() && isPowerOf2_32(VT.getVectorNumElements()) && !Subtarget.hasAVX512() && (SVT == MVT::i8 || SVT == MVT::i16) && (InSVT == MVT::i16 || InSVT == MVT::i32)) { if (auto USatVal = detectSSatPattern(In, VT, true)) { // vXi32 -> vXi8 must be performed as PACKUSWB(PACKSSDW,PACKSSDW). if (SVT == MVT::i8 && InSVT == MVT::i32) { EVT MidVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, VT.getVectorNumElements()); SDValue Mid = truncateVectorWithPACK(X86ISD::PACKSS, MidVT, USatVal, DL, DAG, Subtarget); if (Mid) return truncateVectorWithPACK(X86ISD::PACKUS, VT, Mid, DL, DAG, Subtarget); } else if (SVT == MVT::i8 || Subtarget.hasSSE41()) return truncateVectorWithPACK(X86ISD::PACKUS, VT, USatVal, DL, DAG, Subtarget); } if (auto SSatVal = detectSSatPattern(In, VT)) return truncateVectorWithPACK(X86ISD::PACKSS, VT, SSatVal, DL, DAG, Subtarget); } return SDValue(); } /// This function detects the AVG pattern between vectors of unsigned i8/i16, /// which is c = (a + b + 1) / 2, and replace this operation with the efficient /// X86ISD::AVG instruction. static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &DL) { if (!VT.isVector()) return SDValue(); EVT InVT = In.getValueType(); unsigned NumElems = VT.getVectorNumElements(); EVT ScalarVT = VT.getVectorElementType(); if (!((ScalarVT == MVT::i8 || ScalarVT == MVT::i16) && NumElems >= 2 && isPowerOf2_32(NumElems))) return SDValue(); // InScalarVT is the intermediate type in AVG pattern and it should be greater // than the original input type (i8/i16). EVT InScalarVT = InVT.getVectorElementType(); if (InScalarVT.getSizeInBits() <= ScalarVT.getSizeInBits()) return SDValue(); if (!Subtarget.hasSSE2()) return SDValue(); // Detect the following pattern: // // %1 = zext %a to // %2 = zext %b to // %3 = add nuw nsw %1, // %4 = add nuw nsw %3, %2 // %5 = lshr %N, // %6 = trunc %5 to // // In AVX512, the last instruction can also be a trunc store. if (In.getOpcode() != ISD::SRL) return SDValue(); // A lambda checking the given SDValue is a constant vector and each element // is in the range [Min, Max]. auto IsConstVectorInRange = [](SDValue V, unsigned Min, unsigned Max) { BuildVectorSDNode *BV = dyn_cast(V); if (!BV || !BV->isConstant()) return false; for (SDValue Op : V->ops()) { ConstantSDNode *C = dyn_cast(Op); if (!C) return false; const APInt &Val = C->getAPIntValue(); if (Val.ult(Min) || Val.ugt(Max)) return false; } return true; }; // Check if each element of the vector is left-shifted by one. auto LHS = In.getOperand(0); auto RHS = In.getOperand(1); if (!IsConstVectorInRange(RHS, 1, 1)) return SDValue(); if (LHS.getOpcode() != ISD::ADD) return SDValue(); // Detect a pattern of a + b + 1 where the order doesn't matter. SDValue Operands[3]; Operands[0] = LHS.getOperand(0); Operands[1] = LHS.getOperand(1); auto AVGBuilder = [](SelectionDAG &DAG, const SDLoc &DL, ArrayRef Ops) { return DAG.getNode(X86ISD::AVG, DL, Ops[0].getValueType(), Ops); }; // Take care of the case when one of the operands is a constant vector whose // element is in the range [1, 256]. if (IsConstVectorInRange(Operands[1], 1, ScalarVT == MVT::i8 ? 256 : 65536) && Operands[0].getOpcode() == ISD::ZERO_EXTEND && Operands[0].getOperand(0).getValueType() == VT) { // The pattern is detected. Subtract one from the constant vector, then // demote it and emit X86ISD::AVG instruction. SDValue VecOnes = DAG.getConstant(1, DL, InVT); Operands[1] = DAG.getNode(ISD::SUB, DL, InVT, Operands[1], VecOnes); Operands[1] = DAG.getNode(ISD::TRUNCATE, DL, VT, Operands[1]); return SplitOpsAndApply(DAG, Subtarget, DL, VT, { Operands[0].getOperand(0), Operands[1] }, AVGBuilder); } if (Operands[0].getOpcode() == ISD::ADD) std::swap(Operands[0], Operands[1]); else if (Operands[1].getOpcode() != ISD::ADD) return SDValue(); Operands[2] = Operands[1].getOperand(0); Operands[1] = Operands[1].getOperand(1); // Now we have three operands of two additions. Check that one of them is a // constant vector with ones, and the other two are promoted from i8/i16. for (int i = 0; i < 3; ++i) { if (!IsConstVectorInRange(Operands[i], 1, 1)) continue; std::swap(Operands[i], Operands[2]); // Check if Operands[0] and Operands[1] are results of type promotion. for (int j = 0; j < 2; ++j) if (Operands[j].getOpcode() != ISD::ZERO_EXTEND || Operands[j].getOperand(0).getValueType() != VT) return SDValue(); // The pattern is detected, emit X86ISD::AVG instruction(s). return SplitOpsAndApply(DAG, Subtarget, DL, VT, { Operands[0].getOperand(0), Operands[1].getOperand(0) }, AVGBuilder); } return SDValue(); } static SDValue combineLoad(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { LoadSDNode *Ld = cast(N); EVT RegVT = Ld->getValueType(0); EVT MemVT = Ld->getMemoryVT(); SDLoc dl(Ld); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // For chips with slow 32-byte unaligned loads, break the 32-byte operation // into two 16-byte operations. Also split non-temporal aligned loads on // pre-AVX2 targets as 32-byte loads will lower to regular temporal loads. ISD::LoadExtType Ext = Ld->getExtensionType(); bool Fast; unsigned AddressSpace = Ld->getAddressSpace(); unsigned Alignment = Ld->getAlignment(); if (RegVT.is256BitVector() && !DCI.isBeforeLegalizeOps() && Ext == ISD::NON_EXTLOAD && ((Ld->isNonTemporal() && !Subtarget.hasInt256() && Alignment >= 16) || (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), RegVT, AddressSpace, Alignment, &Fast) && !Fast))) { unsigned NumElems = RegVT.getVectorNumElements(); if (NumElems < 2) return SDValue(); SDValue Ptr = Ld->getBasePtr(); EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), NumElems/2); SDValue Load1 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(), Alignment, Ld->getMemOperand()->getFlags()); Ptr = DAG.getMemBasePlusOffset(Ptr, 16, dl); SDValue Load2 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, Ld->getPointerInfo().getWithOffset(16), MinAlign(Alignment, 16U), Ld->getMemOperand()->getFlags()); SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Load1.getValue(1), Load2.getValue(1)); SDValue NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Load1, Load2); return DCI.CombineTo(N, NewVec, TF, true); } return SDValue(); } /// If V is a build vector of boolean constants and exactly one of those /// constants is true, return the operand index of that true element. /// Otherwise, return -1. static int getOneTrueElt(SDValue V) { // This needs to be a build vector of booleans. // TODO: Checking for the i1 type matches the IR definition for the mask, // but the mask check could be loosened to i8 or other types. That might // also require checking more than 'allOnesValue'; eg, the x86 HW // instructions only require that the MSB is set for each mask element. // The ISD::MSTORE comments/definition do not specify how the mask operand // is formatted. auto *BV = dyn_cast(V); if (!BV || BV->getValueType(0).getVectorElementType() != MVT::i1) return -1; int TrueIndex = -1; unsigned NumElts = BV->getValueType(0).getVectorNumElements(); for (unsigned i = 0; i < NumElts; ++i) { const SDValue &Op = BV->getOperand(i); if (Op.isUndef()) continue; auto *ConstNode = dyn_cast(Op); if (!ConstNode) return -1; if (ConstNode->getAPIntValue().isAllOnesValue()) { // If we already found a one, this is too many. if (TrueIndex >= 0) return -1; TrueIndex = i; } } return TrueIndex; } /// Given a masked memory load/store operation, return true if it has one mask /// bit set. If it has one mask bit set, then also return the memory address of /// the scalar element to load/store, the vector index to insert/extract that /// scalar element, and the alignment for the scalar memory access. static bool getParamsForOneTrueMaskedElt(MaskedLoadStoreSDNode *MaskedOp, SelectionDAG &DAG, SDValue &Addr, SDValue &Index, unsigned &Alignment) { int TrueMaskElt = getOneTrueElt(MaskedOp->getMask()); if (TrueMaskElt < 0) return false; // Get the address of the one scalar element that is specified by the mask // using the appropriate offset from the base pointer. EVT EltVT = MaskedOp->getMemoryVT().getVectorElementType(); Addr = MaskedOp->getBasePtr(); if (TrueMaskElt != 0) { unsigned Offset = TrueMaskElt * EltVT.getStoreSize(); Addr = DAG.getMemBasePlusOffset(Addr, Offset, SDLoc(MaskedOp)); } Index = DAG.getIntPtrConstant(TrueMaskElt, SDLoc(MaskedOp)); Alignment = MinAlign(MaskedOp->getAlignment(), EltVT.getStoreSize()); return true; } /// If exactly one element of the mask is set for a non-extending masked load, /// it is a scalar load and vector insert. /// Note: It is expected that the degenerate cases of an all-zeros or all-ones /// mask have already been optimized in IR, so we don't bother with those here. static SDValue reduceMaskedLoadToScalarLoad(MaskedLoadSDNode *ML, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { // TODO: This is not x86-specific, so it could be lifted to DAGCombiner. // However, some target hooks may need to be added to know when the transform // is profitable. Endianness would also have to be considered. SDValue Addr, VecIndex; unsigned Alignment; if (!getParamsForOneTrueMaskedElt(ML, DAG, Addr, VecIndex, Alignment)) return SDValue(); // Load the one scalar element that is specified by the mask using the // appropriate offset from the base pointer. SDLoc DL(ML); EVT VT = ML->getValueType(0); EVT EltVT = VT.getVectorElementType(); SDValue Load = DAG.getLoad(EltVT, DL, ML->getChain(), Addr, ML->getPointerInfo(), Alignment, ML->getMemOperand()->getFlags()); // Insert the loaded element into the appropriate place in the vector. SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, ML->getPassThru(), Load, VecIndex); return DCI.CombineTo(ML, Insert, Load.getValue(1), true); } static SDValue combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { if (!ISD::isBuildVectorOfConstantSDNodes(ML->getMask().getNode())) return SDValue(); SDLoc DL(ML); EVT VT = ML->getValueType(0); // If we are loading the first and last elements of a vector, it is safe and // always faster to load the whole vector. Replace the masked load with a // vector load and select. unsigned NumElts = VT.getVectorNumElements(); BuildVectorSDNode *MaskBV = cast(ML->getMask()); bool LoadFirstElt = !isNullConstant(MaskBV->getOperand(0)); bool LoadLastElt = !isNullConstant(MaskBV->getOperand(NumElts - 1)); if (LoadFirstElt && LoadLastElt) { SDValue VecLd = DAG.getLoad(VT, DL, ML->getChain(), ML->getBasePtr(), ML->getMemOperand()); SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), VecLd, ML->getPassThru()); return DCI.CombineTo(ML, Blend, VecLd.getValue(1), true); } // Convert a masked load with a constant mask into a masked load and a select. // This allows the select operation to use a faster kind of select instruction // (for example, vblendvps -> vblendps). // Don't try this if the pass-through operand is already undefined. That would // cause an infinite loop because that's what we're about to create. if (ML->getPassThru().isUndef()) return SDValue(); // The new masked load has an undef pass-through operand. The select uses the // original pass-through operand. SDValue NewML = DAG.getMaskedLoad(VT, DL, ML->getChain(), ML->getBasePtr(), ML->getMask(), DAG.getUNDEF(VT), ML->getMemoryVT(), ML->getMemOperand(), ML->getExtensionType()); SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), NewML, ML->getPassThru()); return DCI.CombineTo(ML, Blend, NewML.getValue(1), true); } static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { MaskedLoadSDNode *Mld = cast(N); // TODO: Expanding load with constant mask may be optimized as well. if (Mld->isExpandingLoad()) return SDValue(); if (Mld->getExtensionType() == ISD::NON_EXTLOAD) { if (SDValue ScalarLoad = reduceMaskedLoadToScalarLoad(Mld, DAG, DCI)) return ScalarLoad; // TODO: Do some AVX512 subsets benefit from this transform? if (!Subtarget.hasAVX512()) if (SDValue Blend = combineMaskedLoadConstantMask(Mld, DAG, DCI)) return Blend; } if (Mld->getExtensionType() != ISD::SEXTLOAD) return SDValue(); // Resolve extending loads. EVT VT = Mld->getValueType(0); unsigned NumElems = VT.getVectorNumElements(); EVT LdVT = Mld->getMemoryVT(); SDLoc dl(Mld); assert(LdVT != VT && "Cannot extend to the same type"); unsigned ToSz = VT.getScalarSizeInBits(); unsigned FromSz = LdVT.getScalarSizeInBits(); // From/To sizes and ElemCount must be pow of two. assert (isPowerOf2_32(NumElems * FromSz * ToSz) && "Unexpected size for extending masked load"); unsigned SizeRatio = ToSz / FromSz; assert(SizeRatio * NumElems * FromSz == VT.getSizeInBits()); // Create a type on which we perform the shuffle. EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), LdVT.getScalarType(), NumElems*SizeRatio); assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); // Convert PassThru value. SDValue WidePassThru = DAG.getBitcast(WideVecVT, Mld->getPassThru()); if (!Mld->getPassThru().isUndef()) { SmallVector ShuffleVec(NumElems * SizeRatio, -1); for (unsigned i = 0; i != NumElems; ++i) ShuffleVec[i] = i * SizeRatio; // Can't shuffle using an illegal type. assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) && "WideVecVT should be legal"); WidePassThru = DAG.getVectorShuffle(WideVecVT, dl, WidePassThru, DAG.getUNDEF(WideVecVT), ShuffleVec); } // Prepare the new mask. SDValue NewMask; SDValue Mask = Mld->getMask(); if (Mask.getValueType() == VT) { // Mask and original value have the same type. NewMask = DAG.getBitcast(WideVecVT, Mask); SmallVector ShuffleVec(NumElems * SizeRatio, -1); for (unsigned i = 0; i != NumElems; ++i) ShuffleVec[i] = i * SizeRatio; for (unsigned i = NumElems; i != NumElems * SizeRatio; ++i) ShuffleVec[i] = NumElems * SizeRatio; NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask, DAG.getConstant(0, dl, WideVecVT), ShuffleVec); } else { assert(Mask.getValueType().getVectorElementType() == MVT::i1); unsigned WidenNumElts = NumElems*SizeRatio; unsigned MaskNumElts = VT.getVectorNumElements(); EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, WidenNumElts); unsigned NumConcat = WidenNumElts / MaskNumElts; SDValue ZeroVal = DAG.getConstant(0, dl, Mask.getValueType()); SmallVector Ops(NumConcat, ZeroVal); Ops[0] = Mask; NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops); } SDValue WideLd = DAG.getMaskedLoad(WideVecVT, dl, Mld->getChain(), Mld->getBasePtr(), NewMask, WidePassThru, Mld->getMemoryVT(), Mld->getMemOperand(), ISD::NON_EXTLOAD); SDValue NewVec = getExtendInVec(/*Signed*/true, dl, VT, WideLd, DAG); return DCI.CombineTo(N, NewVec, WideLd.getValue(1), true); } /// If exactly one element of the mask is set for a non-truncating masked store, /// it is a vector extract and scalar store. /// Note: It is expected that the degenerate cases of an all-zeros or all-ones /// mask have already been optimized in IR, so we don't bother with those here. static SDValue reduceMaskedStoreToScalarStore(MaskedStoreSDNode *MS, SelectionDAG &DAG) { // TODO: This is not x86-specific, so it could be lifted to DAGCombiner. // However, some target hooks may need to be added to know when the transform // is profitable. Endianness would also have to be considered. SDValue Addr, VecIndex; unsigned Alignment; if (!getParamsForOneTrueMaskedElt(MS, DAG, Addr, VecIndex, Alignment)) return SDValue(); // Extract the one scalar element that is actually being stored. SDLoc DL(MS); EVT VT = MS->getValue().getValueType(); EVT EltVT = VT.getVectorElementType(); SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, MS->getValue(), VecIndex); // Store that element at the appropriate offset from the base pointer. return DAG.getStore(MS->getChain(), DL, Extract, Addr, MS->getPointerInfo(), Alignment, MS->getMemOperand()->getFlags()); } static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { MaskedStoreSDNode *Mst = cast(N); if (Mst->isCompressingStore()) return SDValue(); EVT VT = Mst->getValue().getValueType(); if (!Mst->isTruncatingStore()) { if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG)) return ScalarStore; // If the mask value has been legalized to a non-boolean vector, try to // simplify ops leading up to it. We only demand the MSB of each lane. SDValue Mask = Mst->getMask(); if (Mask.getScalarValueSizeInBits() != 1) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); APInt DemandedMask(APInt::getSignMask(VT.getScalarSizeInBits())); if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI)) return SDValue(N, 0); } // TODO: AVX512 targets should also be able to simplify something like the // pattern above, but that pattern will be different. It will either need to // match setcc more generally or match PCMPGTM later (in tablegen?). return SDValue(); } // Resolve truncating stores. unsigned NumElems = VT.getVectorNumElements(); EVT StVT = Mst->getMemoryVT(); SDLoc dl(Mst); assert(StVT != VT && "Cannot truncate to the same type"); unsigned FromSz = VT.getScalarSizeInBits(); unsigned ToSz = StVT.getScalarSizeInBits(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // The truncating store is legal in some cases. For example // vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw // are designated for truncate store. // In this case we don't need any further transformations. if (TLI.isTruncStoreLegal(VT, StVT)) return SDValue(); // From/To sizes and ElemCount must be pow of two. assert (isPowerOf2_32(NumElems * FromSz * ToSz) && "Unexpected size for truncating masked store"); // We are going to use the original vector elt for storing. // Accumulated smaller vector elements must be a multiple of the store size. assert (((NumElems * FromSz) % ToSz) == 0 && "Unexpected ratio for truncating masked store"); unsigned SizeRatio = FromSz / ToSz; assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits()); // Create a type on which we perform the shuffle. EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(), NumElems*SizeRatio); assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); SDValue WideVec = DAG.getBitcast(WideVecVT, Mst->getValue()); SmallVector ShuffleVec(NumElems * SizeRatio, -1); for (unsigned i = 0; i != NumElems; ++i) ShuffleVec[i] = i * SizeRatio; // Can't shuffle using an illegal type. assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) && "WideVecVT should be legal"); SDValue TruncatedVal = DAG.getVectorShuffle(WideVecVT, dl, WideVec, DAG.getUNDEF(WideVecVT), ShuffleVec); SDValue NewMask; SDValue Mask = Mst->getMask(); if (Mask.getValueType() == VT) { // Mask and original value have the same type. NewMask = DAG.getBitcast(WideVecVT, Mask); for (unsigned i = 0; i != NumElems; ++i) ShuffleVec[i] = i * SizeRatio; for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i) ShuffleVec[i] = NumElems*SizeRatio; NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask, DAG.getConstant(0, dl, WideVecVT), ShuffleVec); } else { assert(Mask.getValueType().getVectorElementType() == MVT::i1); unsigned WidenNumElts = NumElems*SizeRatio; unsigned MaskNumElts = VT.getVectorNumElements(); EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, WidenNumElts); unsigned NumConcat = WidenNumElts / MaskNumElts; SDValue ZeroVal = DAG.getConstant(0, dl, Mask.getValueType()); SmallVector Ops(NumConcat, ZeroVal); Ops[0] = Mask; NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops); } return DAG.getMaskedStore(Mst->getChain(), dl, TruncatedVal, Mst->getBasePtr(), NewMask, StVT, Mst->getMemOperand(), false); } static SDValue combineStore(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { StoreSDNode *St = cast(N); EVT VT = St->getValue().getValueType(); EVT StVT = St->getMemoryVT(); SDLoc dl(St); SDValue StoredVal = St->getOperand(1); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Convert a store of vXi1 into a store of iX and a bitcast. if (!Subtarget.hasAVX512() && VT == StVT && VT.isVector() && VT.getVectorElementType() == MVT::i1) { EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), VT.getVectorNumElements()); StoredVal = DAG.getBitcast(NewVT, StoredVal); return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), St->getPointerInfo(), St->getAlignment(), St->getMemOperand()->getFlags()); } // If this is a store of a scalar_to_vector to v1i1, just use a scalar store. // This will avoid a copy to k-register. if (VT == MVT::v1i1 && VT == StVT && Subtarget.hasAVX512() && StoredVal.getOpcode() == ISD::SCALAR_TO_VECTOR && StoredVal.getOperand(0).getValueType() == MVT::i8) { return DAG.getStore(St->getChain(), dl, StoredVal.getOperand(0), St->getBasePtr(), St->getPointerInfo(), St->getAlignment(), St->getMemOperand()->getFlags()); } // Widen v2i1/v4i1 stores to v8i1. if ((VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT && Subtarget.hasAVX512()) { unsigned NumConcats = 8 / VT.getVectorNumElements(); SmallVector Ops(NumConcats, DAG.getUNDEF(VT)); Ops[0] = StoredVal; StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops); return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), St->getPointerInfo(), St->getAlignment(), St->getMemOperand()->getFlags()); } // Turn vXi1 stores of constants into a scalar store. if ((VT == MVT::v8i1 || VT == MVT::v16i1 || VT == MVT::v32i1 || VT == MVT::v64i1) && VT == StVT && TLI.isTypeLegal(VT) && ISD::isBuildVectorOfConstantSDNodes(StoredVal.getNode())) { // If its a v64i1 store without 64-bit support, we need two stores. if (VT == MVT::v64i1 && !Subtarget.is64Bit()) { SDValue Lo = DAG.getBuildVector(MVT::v32i1, dl, StoredVal->ops().slice(0, 32)); Lo = combinevXi1ConstantToInteger(Lo, DAG); SDValue Hi = DAG.getBuildVector(MVT::v32i1, dl, StoredVal->ops().slice(32, 32)); Hi = combinevXi1ConstantToInteger(Hi, DAG); unsigned Alignment = St->getAlignment(); SDValue Ptr0 = St->getBasePtr(); SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, 4, dl); SDValue Ch0 = DAG.getStore(St->getChain(), dl, Lo, Ptr0, St->getPointerInfo(), Alignment, St->getMemOperand()->getFlags()); SDValue Ch1 = DAG.getStore(St->getChain(), dl, Hi, Ptr1, St->getPointerInfo().getWithOffset(4), MinAlign(Alignment, 4U), St->getMemOperand()->getFlags()); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1); } StoredVal = combinevXi1ConstantToInteger(StoredVal, DAG); return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), St->getPointerInfo(), St->getAlignment(), St->getMemOperand()->getFlags()); } // If we are saving a concatenation of two XMM registers and 32-byte stores // are slow, such as on Sandy Bridge, perform two 16-byte stores. bool Fast; unsigned AddressSpace = St->getAddressSpace(); unsigned Alignment = St->getAlignment(); if (VT.is256BitVector() && StVT == VT && TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, AddressSpace, Alignment, &Fast) && !Fast) { unsigned NumElems = VT.getVectorNumElements(); if (NumElems < 2) return SDValue(); SDValue Value0 = extract128BitVector(StoredVal, 0, DAG, dl); SDValue Value1 = extract128BitVector(StoredVal, NumElems / 2, DAG, dl); SDValue Ptr0 = St->getBasePtr(); SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, 16, dl); SDValue Ch0 = DAG.getStore(St->getChain(), dl, Value0, Ptr0, St->getPointerInfo(), Alignment, St->getMemOperand()->getFlags()); SDValue Ch1 = DAG.getStore(St->getChain(), dl, Value1, Ptr1, St->getPointerInfo().getWithOffset(16), MinAlign(Alignment, 16U), St->getMemOperand()->getFlags()); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1); } // Optimize trunc store (of multiple scalars) to shuffle and store. // First, pack all of the elements in one place. Next, store to memory // in fewer chunks. if (St->isTruncatingStore() && VT.isVector()) { // Check if we can detect an AVG pattern from the truncation. If yes, // replace the trunc store by a normal store with the result of X86ISD::AVG // instruction. if (SDValue Avg = detectAVGPattern(St->getValue(), St->getMemoryVT(), DAG, Subtarget, dl)) return DAG.getStore(St->getChain(), dl, Avg, St->getBasePtr(), St->getPointerInfo(), St->getAlignment(), St->getMemOperand()->getFlags()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (SDValue Val = detectAVX512SSatPattern(St->getValue(), St->getMemoryVT(), Subtarget, TLI)) return EmitTruncSStore(true /* Signed saturation */, St->getChain(), dl, Val, St->getBasePtr(), St->getMemoryVT(), St->getMemOperand(), DAG); if (SDValue Val = detectAVX512USatPattern(St->getValue(), St->getMemoryVT(), DAG, dl, Subtarget, TLI)) return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(), dl, Val, St->getBasePtr(), St->getMemoryVT(), St->getMemOperand(), DAG); unsigned NumElems = VT.getVectorNumElements(); assert(StVT != VT && "Cannot truncate to the same type"); unsigned FromSz = VT.getScalarSizeInBits(); unsigned ToSz = StVT.getScalarSizeInBits(); // The truncating store is legal in some cases. For example // vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw // are designated for truncate store. // In this case we don't need any further transformations. if (TLI.isTruncStoreLegalOrCustom(VT, StVT)) return SDValue(); // From, To sizes and ElemCount must be pow of two if (!isPowerOf2_32(NumElems * FromSz * ToSz)) return SDValue(); // We are going to use the original vector elt for storing. // Accumulated smaller vector elements must be a multiple of the store size. if (0 != (NumElems * FromSz) % ToSz) return SDValue(); unsigned SizeRatio = FromSz / ToSz; assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits()); // Create a type on which we perform the shuffle EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(), NumElems*SizeRatio); assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); SDValue WideVec = DAG.getBitcast(WideVecVT, St->getValue()); SmallVector ShuffleVec(NumElems * SizeRatio, -1); for (unsigned i = 0; i != NumElems; ++i) ShuffleVec[i] = i * SizeRatio; // Can't shuffle using an illegal type. if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, WideVec, DAG.getUNDEF(WideVecVT), ShuffleVec); // At this point all of the data is stored at the bottom of the // register. We now need to save it to mem. // Find the largest store unit MVT StoreType = MVT::i8; for (MVT Tp : MVT::integer_valuetypes()) { if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToSz) StoreType = Tp; } // On 32bit systems, we can't save 64bit integers. Try bitcasting to F64. if (TLI.isTypeLegal(MVT::f64) && StoreType.getSizeInBits() < 64 && (64 <= NumElems * ToSz)) StoreType = MVT::f64; // Bitcast the original vector into a vector of store-size units EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(), StoreType, VT.getSizeInBits()/StoreType.getSizeInBits()); assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); SDValue ShuffWide = DAG.getBitcast(StoreVecVT, Shuff); SmallVector Chains; SDValue Ptr = St->getBasePtr(); // Perform one or more big stores into memory. for (unsigned i=0, e=(ToSz*NumElems)/StoreType.getSizeInBits(); i!=e; ++i) { SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, StoreType, ShuffWide, DAG.getIntPtrConstant(i, dl)); SDValue Ch = DAG.getStore(St->getChain(), dl, SubVec, Ptr, St->getPointerInfo(), St->getAlignment(), St->getMemOperand()->getFlags()); Ptr = DAG.getMemBasePlusOffset(Ptr, StoreType.getStoreSize(), dl); Chains.push_back(Ch); } return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); } // Turn load->store of MMX types into GPR load/stores. This avoids clobbering // the FP state in cases where an emms may be missing. // A preferable solution to the general problem is to figure out the right // places to insert EMMS. This qualifies as a quick hack. // Similarly, turn load->store of i64 into double load/stores in 32-bit mode. if (VT.getSizeInBits() != 64) return SDValue(); const Function &F = DAG.getMachineFunction().getFunction(); bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat); bool F64IsLegal = !Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE2(); if ((VT.isVector() || (VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit())) && isa(St->getValue()) && !cast(St->getValue())->isVolatile() && St->getChain().hasOneUse() && !St->isVolatile()) { LoadSDNode *Ld = cast(St->getValue().getNode()); SmallVector Ops; if (!ISD::isNormalLoad(Ld)) return SDValue(); // If this is not the MMX case, i.e. we are just turning i64 load/store // into f64 load/store, avoid the transformation if there are multiple // uses of the loaded value. if (!VT.isVector() && !Ld->hasNUsesOfValue(1, 0)) return SDValue(); SDLoc LdDL(Ld); SDLoc StDL(N); // If we are a 64-bit capable x86, lower to a single movq load/store pair. // Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store // pair instead. if (Subtarget.is64Bit() || F64IsLegal) { MVT LdVT = (Subtarget.is64Bit() && (!VT.isFloatingPoint() || !F64IsLegal)) ? MVT::i64 : MVT::f64; SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(), Ld->getMemOperand()); // Make sure new load is placed in same chain order. DAG.makeEquivalentMemoryOrdering(Ld, NewLd); return DAG.getStore(St->getChain(), StDL, NewLd, St->getBasePtr(), St->getMemOperand()); } // Otherwise, lower to two pairs of 32-bit loads / stores. SDValue LoAddr = Ld->getBasePtr(); SDValue HiAddr = DAG.getMemBasePlusOffset(LoAddr, 4, LdDL); SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr, Ld->getPointerInfo(), Ld->getAlignment(), Ld->getMemOperand()->getFlags()); SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr, Ld->getPointerInfo().getWithOffset(4), MinAlign(Ld->getAlignment(), 4), Ld->getMemOperand()->getFlags()); // Make sure new loads are placed in same chain order. DAG.makeEquivalentMemoryOrdering(Ld, LoLd); DAG.makeEquivalentMemoryOrdering(Ld, HiLd); LoAddr = St->getBasePtr(); HiAddr = DAG.getMemBasePlusOffset(LoAddr, 4, StDL); SDValue LoSt = DAG.getStore(St->getChain(), StDL, LoLd, LoAddr, St->getPointerInfo(), St->getAlignment(), St->getMemOperand()->getFlags()); SDValue HiSt = DAG.getStore(St->getChain(), StDL, HiLd, HiAddr, St->getPointerInfo().getWithOffset(4), MinAlign(St->getAlignment(), 4), St->getMemOperand()->getFlags()); return DAG.getNode(ISD::TokenFactor, StDL, MVT::Other, LoSt, HiSt); } // This is similar to the above case, but here we handle a scalar 64-bit // integer store that is extracted from a vector on a 32-bit target. // If we have SSE2, then we can treat it like a floating-point double // to get past legalization. The execution dependencies fixup pass will // choose the optimal machine instruction for the store if this really is // an integer or v2f32 rather than an f64. if (VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit() && St->getOperand(1).getOpcode() == ISD::EXTRACT_VECTOR_ELT) { SDValue OldExtract = St->getOperand(1); SDValue ExtOp0 = OldExtract.getOperand(0); unsigned VecSize = ExtOp0.getValueSizeInBits(); EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, VecSize / 64); SDValue BitCast = DAG.getBitcast(VecVT, ExtOp0); SDValue NewExtract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, BitCast, OldExtract.getOperand(1)); return DAG.getStore(St->getChain(), dl, NewExtract, St->getBasePtr(), St->getPointerInfo(), St->getAlignment(), St->getMemOperand()->getFlags()); } return SDValue(); } /// Return 'true' if this vector operation is "horizontal" /// and return the operands for the horizontal operation in LHS and RHS. A /// horizontal operation performs the binary operation on successive elements /// of its first operand, then on successive elements of its second operand, /// returning the resulting values in a vector. For example, if /// A = < float a0, float a1, float a2, float a3 > /// and /// B = < float b0, float b1, float b2, float b3 > /// then the result of doing a horizontal operation on A and B is /// A horizontal-op B = < a0 op a1, a2 op a3, b0 op b1, b2 op b3 >. /// In short, LHS and RHS are inspected to see if LHS op RHS is of the form /// A horizontal-op B, for some already available A and B, and if so then LHS is /// set to A, RHS to B, and the routine returns 'true'. static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) { // If either operand is undef, bail out. The binop should be simplified. if (LHS.isUndef() || RHS.isUndef()) return false; // Look for the following pattern: // A = < float a0, float a1, float a2, float a3 > // B = < float b0, float b1, float b2, float b3 > // and // LHS = VECTOR_SHUFFLE A, B, <0, 2, 4, 6> // RHS = VECTOR_SHUFFLE A, B, <1, 3, 5, 7> // then LHS op RHS = < a0 op a1, a2 op a3, b0 op b1, b2 op b3 > // which is A horizontal-op B. // At least one of the operands should be a vector shuffle. if (LHS.getOpcode() != ISD::VECTOR_SHUFFLE && RHS.getOpcode() != ISD::VECTOR_SHUFFLE) return false; MVT VT = LHS.getSimpleValueType(); assert((VT.is128BitVector() || VT.is256BitVector()) && "Unsupported vector type for horizontal add/sub"); // View LHS in the form // LHS = VECTOR_SHUFFLE A, B, LMask // If LHS is not a shuffle, then pretend it is the identity shuffle: // LHS = VECTOR_SHUFFLE LHS, undef, <0, 1, ..., N-1> // NOTE: A default initialized SDValue represents an UNDEF of type VT. unsigned NumElts = VT.getVectorNumElements(); SDValue A, B; SmallVector LMask(NumElts); if (LHS.getOpcode() == ISD::VECTOR_SHUFFLE) { if (!LHS.getOperand(0).isUndef()) A = LHS.getOperand(0); if (!LHS.getOperand(1).isUndef()) B = LHS.getOperand(1); ArrayRef Mask = cast(LHS.getNode())->getMask(); llvm::copy(Mask, LMask.begin()); } else { A = LHS; for (unsigned i = 0; i != NumElts; ++i) LMask[i] = i; } // Likewise, view RHS in the form // RHS = VECTOR_SHUFFLE C, D, RMask SDValue C, D; SmallVector RMask(NumElts); if (RHS.getOpcode() == ISD::VECTOR_SHUFFLE) { if (!RHS.getOperand(0).isUndef()) C = RHS.getOperand(0); if (!RHS.getOperand(1).isUndef()) D = RHS.getOperand(1); ArrayRef Mask = cast(RHS.getNode())->getMask(); llvm::copy(Mask, RMask.begin()); } else { C = RHS; for (unsigned i = 0; i != NumElts; ++i) RMask[i] = i; } // If A and B occur in reverse order in RHS, then canonicalize by commuting // RHS operands and shuffle mask. if (A != C) { std::swap(C, D); ShuffleVectorSDNode::commuteMask(RMask); } // Check that the shuffles are both shuffling the same vectors. if (!(A == C && B == D)) return false; // LHS and RHS are now: // LHS = shuffle A, B, LMask // RHS = shuffle A, B, RMask // Check that the masks correspond to performing a horizontal operation. // AVX defines horizontal add/sub to operate independently on 128-bit lanes, // so we just repeat the inner loop if this is a 256-bit op. unsigned Num128BitChunks = VT.getSizeInBits() / 128; unsigned NumEltsPer128BitChunk = NumElts / Num128BitChunks; assert((NumEltsPer128BitChunk % 2 == 0) && "Vector type should have an even number of elements in each lane"); for (unsigned j = 0; j != NumElts; j += NumEltsPer128BitChunk) { for (unsigned i = 0; i != NumEltsPer128BitChunk; ++i) { // Ignore undefined components. int LIdx = LMask[i + j], RIdx = RMask[i + j]; if (LIdx < 0 || RIdx < 0 || (!A.getNode() && (LIdx < (int)NumElts || RIdx < (int)NumElts)) || (!B.getNode() && (LIdx >= (int)NumElts || RIdx >= (int)NumElts))) continue; // The low half of the 128-bit result must choose from A. // The high half of the 128-bit result must choose from B, // unless B is undef. In that case, we are always choosing from A. unsigned NumEltsPer64BitChunk = NumEltsPer128BitChunk / 2; unsigned Src = B.getNode() ? i >= NumEltsPer64BitChunk : 0; // Check that successive elements are being operated on. If not, this is // not a horizontal operation. int Index = 2 * (i % NumEltsPer64BitChunk) + NumElts * Src + j; if (!(LIdx == Index && RIdx == Index + 1) && !(IsCommutative && LIdx == Index + 1 && RIdx == Index)) return false; } } LHS = A.getNode() ? A : B; // If A is 'UNDEF', use B for it. RHS = B.getNode() ? B : A; // If B is 'UNDEF', use A for it. return true; } /// Do target-specific dag combines on floating-point adds/subs. static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { EVT VT = N->getValueType(0); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); bool IsFadd = N->getOpcode() == ISD::FADD; auto HorizOpcode = IsFadd ? X86ISD::FHADD : X86ISD::FHSUB; assert((IsFadd || N->getOpcode() == ISD::FSUB) && "Wrong opcode"); // Try to synthesize horizontal add/sub from adds/subs of shuffles. if (((Subtarget.hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) || (Subtarget.hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) && isHorizontalBinOp(LHS, RHS, IsFadd) && shouldUseHorizontalOp(LHS == RHS, DAG, Subtarget)) return DAG.getNode(HorizOpcode, SDLoc(N), VT, LHS, RHS); return SDValue(); } /// Attempt to pre-truncate inputs to arithmetic ops if it will simplify /// the codegen. /// e.g. TRUNC( BINOP( X, Y ) ) --> BINOP( TRUNC( X ), TRUNC( Y ) ) /// TODO: This overlaps with the generic combiner's visitTRUNCATE. Remove /// anything that is guaranteed to be transformed by DAGCombiner. static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &DL) { assert(N->getOpcode() == ISD::TRUNCATE && "Wrong opcode"); SDValue Src = N->getOperand(0); unsigned Opcode = Src.getOpcode(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = N->getValueType(0); EVT SrcVT = Src.getValueType(); auto IsFreeTruncation = [VT](SDValue Op) { unsigned TruncSizeInBits = VT.getScalarSizeInBits(); // See if this has been extended from a smaller/equal size to // the truncation size, allowing a truncation to combine with the extend. unsigned Opcode = Op.getOpcode(); if ((Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND) && Op.getOperand(0).getScalarValueSizeInBits() <= TruncSizeInBits) return true; // See if this is a single use constant which can be constant folded. SDValue BC = peekThroughOneUseBitcasts(Op); return ISD::isBuildVectorOfConstantSDNodes(BC.getNode()); }; auto TruncateArithmetic = [&](SDValue N0, SDValue N1) { SDValue Trunc0 = DAG.getNode(ISD::TRUNCATE, DL, VT, N0); SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, VT, N1); return DAG.getNode(Opcode, DL, VT, Trunc0, Trunc1); }; // Don't combine if the operation has other uses. if (!Src.hasOneUse()) return SDValue(); // Only support vector truncation for now. // TODO: i64 scalar math would benefit as well. if (!VT.isVector()) return SDValue(); // In most cases its only worth pre-truncating if we're only facing the cost // of one truncation. // i.e. if one of the inputs will constant fold or the input is repeated. switch (Opcode) { case ISD::AND: case ISD::XOR: case ISD::OR: { SDValue Op0 = Src.getOperand(0); SDValue Op1 = Src.getOperand(1); if (TLI.isOperationLegalOrPromote(Opcode, VT) && (Op0 == Op1 || IsFreeTruncation(Op0) || IsFreeTruncation(Op1))) return TruncateArithmetic(Op0, Op1); break; } case ISD::MUL: // X86 is rubbish at scalar and vector i64 multiplies (until AVX512DQ) - its // better to truncate if we have the chance. if (SrcVT.getScalarType() == MVT::i64 && TLI.isOperationLegal(Opcode, VT) && !TLI.isOperationLegal(Opcode, SrcVT)) return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1)); LLVM_FALLTHROUGH; case ISD::ADD: { SDValue Op0 = Src.getOperand(0); SDValue Op1 = Src.getOperand(1); if (TLI.isOperationLegal(Opcode, VT) && (Op0 == Op1 || IsFreeTruncation(Op0) || IsFreeTruncation(Op1))) return TruncateArithmetic(Op0, Op1); break; } case ISD::SUB: { // TODO: ISD::SUB We are conservative and require both sides to be freely // truncatable to avoid interfering with combineSubToSubus. SDValue Op0 = Src.getOperand(0); SDValue Op1 = Src.getOperand(1); if (TLI.isOperationLegal(Opcode, VT) && (Op0 == Op1 || (IsFreeTruncation(Op0) && IsFreeTruncation(Op1)))) return TruncateArithmetic(Op0, Op1); break; } } return SDValue(); } /// Truncate using ISD::AND mask and X86ISD::PACKUS. static SDValue combineVectorTruncationWithPACKUS(SDNode *N, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDValue In = N->getOperand(0); EVT InVT = In.getValueType(); EVT InSVT = InVT.getVectorElementType(); EVT OutVT = N->getValueType(0); EVT OutSVT = OutVT.getVectorElementType(); // Split a long vector into vectors of legal type and mask to unset all bits // that won't appear in the result to prevent saturation. // TODO - we should be doing this at the maximum legal size but this is // causing regressions where we're concatenating back to max width just to // perform the AND and then extracting back again..... unsigned NumSubRegs = InVT.getSizeInBits() / 128; unsigned NumSubRegElts = 128 / InSVT.getSizeInBits(); EVT SubRegVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumSubRegElts); SmallVector SubVecs(NumSubRegs); APInt Mask = APInt::getLowBitsSet(InSVT.getSizeInBits(), OutSVT.getSizeInBits()); SDValue MaskVal = DAG.getConstant(Mask, DL, SubRegVT); for (unsigned i = 0; i < NumSubRegs; i++) { SDValue Sub = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubRegVT, In, DAG.getIntPtrConstant(i * NumSubRegElts, DL)); SubVecs[i] = DAG.getNode(ISD::AND, DL, SubRegVT, Sub, MaskVal); } In = DAG.getNode(ISD::CONCAT_VECTORS, DL, InVT, SubVecs); return truncateVectorWithPACK(X86ISD::PACKUS, OutVT, In, DL, DAG, Subtarget); } /// Truncate a group of v4i32 into v8i16 using X86ISD::PACKSS. static SDValue combineVectorTruncationWithPACKSS(SDNode *N, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDValue In = N->getOperand(0); EVT InVT = In.getValueType(); EVT OutVT = N->getValueType(0); In = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, InVT, In, DAG.getValueType(OutVT)); return truncateVectorWithPACK(X86ISD::PACKSS, OutVT, In, DL, DAG, Subtarget); } /// This function transforms truncation from vXi32/vXi64 to vXi8/vXi16 into /// X86ISD::PACKUS/X86ISD::PACKSS operations. We do it here because after type /// legalization the truncation will be translated into a BUILD_VECTOR with each /// element that is extracted from a vector and then truncated, and it is /// difficult to do this optimization based on them. static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { EVT OutVT = N->getValueType(0); if (!OutVT.isVector()) return SDValue(); SDValue In = N->getOperand(0); if (!In.getValueType().isSimple()) return SDValue(); EVT InVT = In.getValueType(); unsigned NumElems = OutVT.getVectorNumElements(); // TODO: On AVX2, the behavior of X86ISD::PACKUS is different from that on // SSE2, and we need to take care of it specially. // AVX512 provides vpmovdb. if (!Subtarget.hasSSE2() || Subtarget.hasAVX2()) return SDValue(); EVT OutSVT = OutVT.getVectorElementType(); EVT InSVT = InVT.getVectorElementType(); if (!((InSVT == MVT::i32 || InSVT == MVT::i64) && (OutSVT == MVT::i8 || OutSVT == MVT::i16) && isPowerOf2_32(NumElems) && NumElems >= 8)) return SDValue(); // SSSE3's pshufb results in less instructions in the cases below. if (Subtarget.hasSSSE3() && NumElems == 8 && ((OutSVT == MVT::i8 && InSVT != MVT::i64) || (InSVT == MVT::i32 && OutSVT == MVT::i16))) return SDValue(); SDLoc DL(N); // SSE2 provides PACKUS for only 2 x v8i16 -> v16i8 and SSE4.1 provides PACKUS // for 2 x v4i32 -> v8i16. For SSSE3 and below, we need to use PACKSS to // truncate 2 x v4i32 to v8i16. if (Subtarget.hasSSE41() || OutSVT == MVT::i8) return combineVectorTruncationWithPACKUS(N, DL, Subtarget, DAG); if (InSVT == MVT::i32) return combineVectorTruncationWithPACKSS(N, DL, Subtarget, DAG); return SDValue(); } /// This function transforms vector truncation of 'extended sign-bits' or /// 'extended zero-bits' values. /// vXi16/vXi32/vXi64 to vXi8/vXi16/vXi32 into X86ISD::PACKSS/PACKUS operations. static SDValue combineVectorSignBitsTruncation(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget) { // Requires SSE2 but AVX512 has fast truncate. if (!Subtarget.hasSSE2() || Subtarget.hasAVX512()) return SDValue(); if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple()) return SDValue(); SDValue In = N->getOperand(0); if (!In.getValueType().isSimple()) return SDValue(); MVT VT = N->getValueType(0).getSimpleVT(); MVT SVT = VT.getScalarType(); MVT InVT = In.getValueType().getSimpleVT(); MVT InSVT = InVT.getScalarType(); // Check we have a truncation suited for PACKSS/PACKUS. if (!VT.is128BitVector() && !VT.is256BitVector()) return SDValue(); if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32) return SDValue(); if (InSVT != MVT::i16 && InSVT != MVT::i32 && InSVT != MVT::i64) return SDValue(); unsigned NumPackedSignBits = std::min(SVT.getSizeInBits(), 16); unsigned NumPackedZeroBits = Subtarget.hasSSE41() ? NumPackedSignBits : 8; // Use PACKUS if the input has zero-bits that extend all the way to the // packed/truncated value. e.g. masks, zext_in_reg, etc. KnownBits Known = DAG.computeKnownBits(In); unsigned NumLeadingZeroBits = Known.countMinLeadingZeros(); if (NumLeadingZeroBits >= (InSVT.getSizeInBits() - NumPackedZeroBits)) return truncateVectorWithPACK(X86ISD::PACKUS, VT, In, DL, DAG, Subtarget); // Use PACKSS if the input has sign-bits that extend all the way to the // packed/truncated value. e.g. Comparison result, sext_in_reg, etc. unsigned NumSignBits = DAG.ComputeNumSignBits(In); if (NumSignBits > (InSVT.getSizeInBits() - NumPackedSignBits)) return truncateVectorWithPACK(X86ISD::PACKSS, VT, In, DL, DAG, Subtarget); return SDValue(); } // Try to form a MULHU or MULHS node by looking for // (trunc (srl (mul ext, ext), 16)) // TODO: This is X86 specific because we want to be able to handle wide types // before type legalization. But we can only do it if the vector will be // legalized via widening/splitting. Type legalization can't handle promotion // of a MULHU/MULHS. There isn't a way to convey this to the generic DAG // combiner. static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget) { // First instruction should be a right shift of a multiply. if (Src.getOpcode() != ISD::SRL || Src.getOperand(0).getOpcode() != ISD::MUL) return SDValue(); if (!Subtarget.hasSSE2()) return SDValue(); // Only handle vXi16 types that are at least 128-bits unless they will be // widened. if (!VT.isVector() || VT.getVectorElementType() != MVT::i16 || (!ExperimentalVectorWideningLegalization && VT.getVectorNumElements() < 8)) return SDValue(); // Input type should be vXi32. EVT InVT = Src.getValueType(); if (InVT.getVectorElementType() != MVT::i32) return SDValue(); // Need a shift by 16. APInt ShiftAmt; if (!ISD::isConstantSplatVector(Src.getOperand(1).getNode(), ShiftAmt) || ShiftAmt != 16) return SDValue(); SDValue LHS = Src.getOperand(0).getOperand(0); SDValue RHS = Src.getOperand(0).getOperand(1); unsigned ExtOpc = LHS.getOpcode(); if ((ExtOpc != ISD::SIGN_EXTEND && ExtOpc != ISD::ZERO_EXTEND) || RHS.getOpcode() != ExtOpc) return SDValue(); // Peek through the extends. LHS = LHS.getOperand(0); RHS = RHS.getOperand(0); // Ensure the input types match. if (LHS.getValueType() != VT || RHS.getValueType() != VT) return SDValue(); unsigned Opc = ExtOpc == ISD::SIGN_EXTEND ? ISD::MULHS : ISD::MULHU; return DAG.getNode(Opc, DL, VT, LHS, RHS); } // Attempt to match PMADDUBSW, which multiplies corresponding unsigned bytes // from one vector with signed bytes from another vector, adds together // adjacent pairs of 16-bit products, and saturates the result before // truncating to 16-bits. // // Which looks something like this: // (i16 (ssat (add (mul (zext (even elts (i8 A))), (sext (even elts (i8 B)))), // (mul (zext (odd elts (i8 A)), (sext (odd elts (i8 B)))))))) static SDValue detectPMADDUBSW(SDValue In, EVT VT, SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &DL) { if (!VT.isVector() || !Subtarget.hasSSSE3()) return SDValue(); unsigned NumElems = VT.getVectorNumElements(); EVT ScalarVT = VT.getVectorElementType(); if (ScalarVT != MVT::i16 || NumElems < 8 || !isPowerOf2_32(NumElems)) return SDValue(); SDValue SSatVal = detectSSatPattern(In, VT); if (!SSatVal || SSatVal.getOpcode() != ISD::ADD) return SDValue(); // Ok this is a signed saturation of an ADD. See if this ADD is adding pairs // of multiplies from even/odd elements. SDValue N0 = SSatVal.getOperand(0); SDValue N1 = SSatVal.getOperand(1); if (N0.getOpcode() != ISD::MUL || N1.getOpcode() != ISD::MUL) return SDValue(); SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); SDValue N10 = N1.getOperand(0); SDValue N11 = N1.getOperand(1); // TODO: Handle constant vectors and use knownbits/computenumsignbits? // Canonicalize zero_extend to LHS. if (N01.getOpcode() == ISD::ZERO_EXTEND) std::swap(N00, N01); if (N11.getOpcode() == ISD::ZERO_EXTEND) std::swap(N10, N11); // Ensure we have a zero_extend and a sign_extend. if (N00.getOpcode() != ISD::ZERO_EXTEND || N01.getOpcode() != ISD::SIGN_EXTEND || N10.getOpcode() != ISD::ZERO_EXTEND || N11.getOpcode() != ISD::SIGN_EXTEND) return SDValue(); // Peek through the extends. N00 = N00.getOperand(0); N01 = N01.getOperand(0); N10 = N10.getOperand(0); N11 = N11.getOperand(0); // Ensure the extend is from vXi8. if (N00.getValueType().getVectorElementType() != MVT::i8 || N01.getValueType().getVectorElementType() != MVT::i8 || N10.getValueType().getVectorElementType() != MVT::i8 || N11.getValueType().getVectorElementType() != MVT::i8) return SDValue(); // All inputs should be build_vectors. if (N00.getOpcode() != ISD::BUILD_VECTOR || N01.getOpcode() != ISD::BUILD_VECTOR || N10.getOpcode() != ISD::BUILD_VECTOR || N11.getOpcode() != ISD::BUILD_VECTOR) return SDValue(); // N00/N10 are zero extended. N01/N11 are sign extended. // For each element, we need to ensure we have an odd element from one vector // multiplied by the odd element of another vector and the even element from // one of the same vectors being multiplied by the even element from the // other vector. So we need to make sure for each element i, this operator // is being performed: // A[2 * i] * B[2 * i] + A[2 * i + 1] * B[2 * i + 1] SDValue ZExtIn, SExtIn; for (unsigned i = 0; i != NumElems; ++i) { SDValue N00Elt = N00.getOperand(i); SDValue N01Elt = N01.getOperand(i); SDValue N10Elt = N10.getOperand(i); SDValue N11Elt = N11.getOperand(i); // TODO: Be more tolerant to undefs. if (N00Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || N01Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || N10Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || N11Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT) return SDValue(); auto *ConstN00Elt = dyn_cast(N00Elt.getOperand(1)); auto *ConstN01Elt = dyn_cast(N01Elt.getOperand(1)); auto *ConstN10Elt = dyn_cast(N10Elt.getOperand(1)); auto *ConstN11Elt = dyn_cast(N11Elt.getOperand(1)); if (!ConstN00Elt || !ConstN01Elt || !ConstN10Elt || !ConstN11Elt) return SDValue(); unsigned IdxN00 = ConstN00Elt->getZExtValue(); unsigned IdxN01 = ConstN01Elt->getZExtValue(); unsigned IdxN10 = ConstN10Elt->getZExtValue(); unsigned IdxN11 = ConstN11Elt->getZExtValue(); // Add is commutative so indices can be reordered. if (IdxN00 > IdxN10) { std::swap(IdxN00, IdxN10); std::swap(IdxN01, IdxN11); } // N0 indices be the even element. N1 indices must be the next odd element. if (IdxN00 != 2 * i || IdxN10 != 2 * i + 1 || IdxN01 != 2 * i || IdxN11 != 2 * i + 1) return SDValue(); SDValue N00In = N00Elt.getOperand(0); SDValue N01In = N01Elt.getOperand(0); SDValue N10In = N10Elt.getOperand(0); SDValue N11In = N11Elt.getOperand(0); // First time we find an input capture it. if (!ZExtIn) { ZExtIn = N00In; SExtIn = N01In; } if (ZExtIn != N00In || SExtIn != N01In || ZExtIn != N10In || SExtIn != N11In) return SDValue(); } auto PMADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL, ArrayRef Ops) { // Shrink by adding truncate nodes and let DAGCombine fold with the // sources. EVT InVT = Ops[0].getValueType(); assert(InVT.getScalarType() == MVT::i8 && "Unexpected scalar element type"); assert(InVT == Ops[1].getValueType() && "Operands' types mismatch"); EVT ResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, InVT.getVectorNumElements() / 2); return DAG.getNode(X86ISD::VPMADDUBSW, DL, ResVT, Ops[0], Ops[1]); }; return SplitOpsAndApply(DAG, Subtarget, DL, VT, { ZExtIn, SExtIn }, PMADDBuilder); } static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { EVT VT = N->getValueType(0); SDValue Src = N->getOperand(0); SDLoc DL(N); // Attempt to pre-truncate inputs to arithmetic ops instead. if (SDValue V = combineTruncatedArithmetic(N, DAG, Subtarget, DL)) return V; // Try to detect AVG pattern first. if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL)) return Avg; // Try to detect PMADD if (SDValue PMAdd = detectPMADDUBSW(Src, VT, DAG, Subtarget, DL)) return PMAdd; // Try to combine truncation with signed/unsigned saturation. if (SDValue Val = combineTruncateWithSat(Src, VT, DL, DAG, Subtarget)) return Val; // Try to combine PMULHUW/PMULHW for vXi16. if (SDValue V = combinePMULH(Src, VT, DL, DAG, Subtarget)) return V; // The bitcast source is a direct mmx result. // Detect bitcasts between i32 to x86mmx if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) { SDValue BCSrc = Src.getOperand(0); if (BCSrc.getValueType() == MVT::x86mmx) return DAG.getNode(X86ISD::MMX_MOVD2W, DL, MVT::i32, BCSrc); } // Try to truncate extended sign/zero bits with PACKSS/PACKUS. if (SDValue V = combineVectorSignBitsTruncation(N, DL, DAG, Subtarget)) return V; return combineVectorTruncation(N, DAG, Subtarget); } /// Returns the negated value if the node \p N flips sign of FP value. /// /// FP-negation node may have different forms: FNEG(x), FXOR (x, 0x80000000) /// or FSUB(0, x) /// AVX512F does not have FXOR, so FNEG is lowered as /// (bitcast (xor (bitcast x), (bitcast ConstantFP(0x80000000)))). /// In this case we go though all bitcasts. /// This also recognizes splat of a negated value and returns the splat of that /// value. static SDValue isFNEG(SelectionDAG &DAG, SDNode *N) { if (N->getOpcode() == ISD::FNEG) return N->getOperand(0); SDValue Op = peekThroughBitcasts(SDValue(N, 0)); auto VT = Op->getValueType(0); if (auto SVOp = dyn_cast(Op.getNode())) { // For a VECTOR_SHUFFLE(VEC1, VEC2), if the VEC2 is undef, then the negate // of this is VECTOR_SHUFFLE(-VEC1, UNDEF). The mask can be anything here. if (!SVOp->getOperand(1).isUndef()) return SDValue(); if (SDValue NegOp0 = isFNEG(DAG, SVOp->getOperand(0).getNode())) return DAG.getVectorShuffle(VT, SDLoc(SVOp), NegOp0, DAG.getUNDEF(VT), SVOp->getMask()); return SDValue(); } unsigned Opc = Op.getOpcode(); if (Opc == ISD::INSERT_VECTOR_ELT) { // Negate of INSERT_VECTOR_ELT(UNDEF, V, INDEX) is INSERT_VECTOR_ELT(UNDEF, // -V, INDEX). SDValue InsVector = Op.getOperand(0); SDValue InsVal = Op.getOperand(1); if (!InsVector.isUndef()) return SDValue(); if (SDValue NegInsVal = isFNEG(DAG, InsVal.getNode())) return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), VT, InsVector, NegInsVal, Op.getOperand(2)); return SDValue(); } if (Opc != X86ISD::FXOR && Opc != ISD::XOR && Opc != ISD::FSUB) return SDValue(); SDValue Op1 = peekThroughBitcasts(Op.getOperand(1)); if (!Op1.getValueType().isFloatingPoint()) return SDValue(); SDValue Op0 = peekThroughBitcasts(Op.getOperand(0)); // For XOR and FXOR, we want to check if constant bits of Op1 are sign bit // masks. For FSUB, we have to check if constant bits of Op0 are sign bit // masks and hence we swap the operands. if (Opc == ISD::FSUB) std::swap(Op0, Op1); APInt UndefElts; SmallVector EltBits; // Extract constant bits and see if they are all sign bit masks. Ignore the // undef elements. if (getTargetConstantBitsFromNode(Op1, Op1.getScalarValueSizeInBits(), UndefElts, EltBits, /* AllowWholeUndefs */ true, /* AllowPartialUndefs */ false)) { for (unsigned I = 0, E = EltBits.size(); I < E; I++) if (!UndefElts[I] && !EltBits[I].isSignMask()) return SDValue(); return peekThroughBitcasts(Op0); } return SDValue(); } /// Do target-specific dag combines on floating point negations. static SDValue combineFneg(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { EVT OrigVT = N->getValueType(0); SDValue Arg = isFNEG(DAG, N); if (!Arg) return SDValue(); EVT VT = Arg.getValueType(); EVT SVT = VT.getScalarType(); SDLoc DL(N); // Let legalize expand this if it isn't a legal type yet. if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) return SDValue(); // If we're negating a FMUL node on a target with FMA, then we can avoid the // use of a constant by performing (-0 - A*B) instead. // FIXME: Check rounding control flags as well once it becomes available. if (Arg.getOpcode() == ISD::FMUL && (SVT == MVT::f32 || SVT == MVT::f64) && Arg->getFlags().hasNoSignedZeros() && Subtarget.hasAnyFMA()) { SDValue Zero = DAG.getConstantFP(0.0, DL, VT); SDValue NewNode = DAG.getNode(X86ISD::FNMSUB, DL, VT, Arg.getOperand(0), Arg.getOperand(1), Zero); return DAG.getBitcast(OrigVT, NewNode); } // If we're negating an FMA node, then we can adjust the // instruction to include the extra negation. unsigned NewOpcode = 0; if (Arg.hasOneUse() && Subtarget.hasAnyFMA()) { switch (Arg.getOpcode()) { case ISD::FMA: NewOpcode = X86ISD::FNMSUB; break; case X86ISD::FMSUB: NewOpcode = X86ISD::FNMADD; break; case X86ISD::FNMADD: NewOpcode = X86ISD::FMSUB; break; case X86ISD::FNMSUB: NewOpcode = ISD::FMA; break; case X86ISD::FMADD_RND: NewOpcode = X86ISD::FNMSUB_RND; break; case X86ISD::FMSUB_RND: NewOpcode = X86ISD::FNMADD_RND; break; case X86ISD::FNMADD_RND: NewOpcode = X86ISD::FMSUB_RND; break; case X86ISD::FNMSUB_RND: NewOpcode = X86ISD::FMADD_RND; break; // We can't handle scalar intrinsic node here because it would only // invert one element and not the whole vector. But we could try to handle // a negation of the lower element only. } } if (NewOpcode) return DAG.getBitcast(OrigVT, DAG.getNode(NewOpcode, DL, VT, Arg.getNode()->ops())); return SDValue(); } static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { MVT VT = N->getSimpleValueType(0); // If we have integer vector types available, use the integer opcodes. if (!VT.isVector() || !Subtarget.hasSSE2()) return SDValue(); SDLoc dl(N); unsigned IntBits = VT.getScalarSizeInBits(); MVT IntSVT = MVT::getIntegerVT(IntBits); MVT IntVT = MVT::getVectorVT(IntSVT, VT.getSizeInBits() / IntBits); SDValue Op0 = DAG.getBitcast(IntVT, N->getOperand(0)); SDValue Op1 = DAG.getBitcast(IntVT, N->getOperand(1)); unsigned IntOpcode; switch (N->getOpcode()) { default: llvm_unreachable("Unexpected FP logic op"); case X86ISD::FOR: IntOpcode = ISD::OR; break; case X86ISD::FXOR: IntOpcode = ISD::XOR; break; case X86ISD::FAND: IntOpcode = ISD::AND; break; case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break; } SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1); return DAG.getBitcast(VT, IntOp); } /// Fold a xor(setcc cond, val), 1 --> setcc (inverted(cond), val) static SDValue foldXor1SetCC(SDNode *N, SelectionDAG &DAG) { if (N->getOpcode() != ISD::XOR) return SDValue(); SDValue LHS = N->getOperand(0); auto *RHSC = dyn_cast(N->getOperand(1)); if (!RHSC || RHSC->getZExtValue() != 1 || LHS->getOpcode() != X86ISD::SETCC) return SDValue(); X86::CondCode NewCC = X86::GetOppositeBranchCondition( X86::CondCode(LHS->getConstantOperandVal(0))); SDLoc DL(N); return getSETCC(NewCC, LHS->getOperand(1), DL, DAG); } static SDValue combineXor(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { // If this is SSE1 only convert to FXOR to avoid scalarization. if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && N->getValueType(0) == MVT::v4i32) { return DAG.getBitcast( MVT::v4i32, DAG.getNode(X86ISD::FXOR, SDLoc(N), MVT::v4f32, DAG.getBitcast(MVT::v4f32, N->getOperand(0)), DAG.getBitcast(MVT::v4f32, N->getOperand(1)))); } if (SDValue Cmp = foldVectorXorShiftIntoCmp(N, DAG, Subtarget)) return Cmp; if (DCI.isBeforeLegalizeOps()) return SDValue(); if (SDValue SetCC = foldXor1SetCC(N, DAG)) return SetCC; if (SDValue RV = foldXorTruncShiftIntoCmp(N, DAG)) return RV; if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget)) return FPLogic; return combineFneg(N, DAG, Subtarget); } static SDValue combineBEXTR(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); EVT VT = N->getValueType(0); unsigned NumBits = VT.getSizeInBits(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // TODO - Constant Folding. if (auto *Cst1 = dyn_cast(Op1)) { // Reduce Cst1 to the bottom 16-bits. // NOTE: SimplifyDemandedBits won't do this for constants. const APInt &Val1 = Cst1->getAPIntValue(); APInt MaskedVal1 = Val1 & 0xFFFF; if (MaskedVal1 != Val1) return DAG.getNode(X86ISD::BEXTR, SDLoc(N), VT, Op0, DAG.getConstant(MaskedVal1, SDLoc(N), VT)); } // Only bottom 16-bits of the control bits are required. APInt DemandedMask(APInt::getLowBitsSet(NumBits, 16)); if (TLI.SimplifyDemandedBits(Op1, DemandedMask, DCI)) return SDValue(N, 0); return SDValue(); } static bool isNullFPScalarOrVectorConst(SDValue V) { return isNullFPConstant(V) || ISD::isBuildVectorAllZeros(V.getNode()); } /// If a value is a scalar FP zero or a vector FP zero (potentially including /// undefined elements), return a zero constant that may be used to fold away /// that value. In the case of a vector, the returned constant will not contain /// undefined elements even if the input parameter does. This makes it suitable /// to be used as a replacement operand with operations (eg, bitwise-and) where /// an undef should not propagate. static SDValue getNullFPConstForNullVal(SDValue V, SelectionDAG &DAG, const X86Subtarget &Subtarget) { if (!isNullFPScalarOrVectorConst(V)) return SDValue(); if (V.getValueType().isVector()) return getZeroVector(V.getSimpleValueType(), Subtarget, DAG, SDLoc(V)); return V; } static SDValue combineFAndFNotToFAndn(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); SDLoc DL(N); // Vector types are handled in combineANDXORWithAllOnesIntoANDNP(). if (!((VT == MVT::f32 && Subtarget.hasSSE1()) || (VT == MVT::f64 && Subtarget.hasSSE2()) || (VT == MVT::v4f32 && Subtarget.hasSSE1() && !Subtarget.hasSSE2()))) return SDValue(); auto isAllOnesConstantFP = [](SDValue V) { if (V.getSimpleValueType().isVector()) return ISD::isBuildVectorAllOnes(V.getNode()); auto *C = dyn_cast(V); return C && C->getConstantFPValue()->isAllOnesValue(); }; // fand (fxor X, -1), Y --> fandn X, Y if (N0.getOpcode() == X86ISD::FXOR && isAllOnesConstantFP(N0.getOperand(1))) return DAG.getNode(X86ISD::FANDN, DL, VT, N0.getOperand(0), N1); // fand X, (fxor Y, -1) --> fandn Y, X if (N1.getOpcode() == X86ISD::FXOR && isAllOnesConstantFP(N1.getOperand(1))) return DAG.getNode(X86ISD::FANDN, DL, VT, N1.getOperand(0), N0); return SDValue(); } /// Do target-specific dag combines on X86ISD::FAND nodes. static SDValue combineFAnd(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { // FAND(0.0, x) -> 0.0 if (SDValue V = getNullFPConstForNullVal(N->getOperand(0), DAG, Subtarget)) return V; // FAND(x, 0.0) -> 0.0 if (SDValue V = getNullFPConstForNullVal(N->getOperand(1), DAG, Subtarget)) return V; if (SDValue V = combineFAndFNotToFAndn(N, DAG, Subtarget)) return V; return lowerX86FPLogicOp(N, DAG, Subtarget); } /// Do target-specific dag combines on X86ISD::FANDN nodes. static SDValue combineFAndn(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { // FANDN(0.0, x) -> x if (isNullFPScalarOrVectorConst(N->getOperand(0))) return N->getOperand(1); // FANDN(x, 0.0) -> 0.0 if (SDValue V = getNullFPConstForNullVal(N->getOperand(1), DAG, Subtarget)) return V; return lowerX86FPLogicOp(N, DAG, Subtarget); } /// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes. static SDValue combineFOr(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR); // F[X]OR(0.0, x) -> x if (isNullFPScalarOrVectorConst(N->getOperand(0))) return N->getOperand(1); // F[X]OR(x, 0.0) -> x if (isNullFPScalarOrVectorConst(N->getOperand(1))) return N->getOperand(0); if (SDValue NewVal = combineFneg(N, DAG, Subtarget)) return NewVal; return lowerX86FPLogicOp(N, DAG, Subtarget); } /// Do target-specific dag combines on X86ISD::FMIN and X86ISD::FMAX nodes. static SDValue combineFMinFMax(SDNode *N, SelectionDAG &DAG) { assert(N->getOpcode() == X86ISD::FMIN || N->getOpcode() == X86ISD::FMAX); // Only perform optimizations if UnsafeMath is used. if (!DAG.getTarget().Options.UnsafeFPMath) return SDValue(); // If we run in unsafe-math mode, then convert the FMAX and FMIN nodes // into FMINC and FMAXC, which are Commutative operations. unsigned NewOp = 0; switch (N->getOpcode()) { default: llvm_unreachable("unknown opcode"); case X86ISD::FMIN: NewOp = X86ISD::FMINC; break; case X86ISD::FMAX: NewOp = X86ISD::FMAXC; break; } return DAG.getNode(NewOp, SDLoc(N), N->getValueType(0), N->getOperand(0), N->getOperand(1)); } static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { if (Subtarget.useSoftFloat()) return SDValue(); // TODO: If an operand is already known to be a NaN or not a NaN, this // should be an optional swap and FMAX/FMIN. EVT VT = N->getValueType(0); if (!((Subtarget.hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) || (Subtarget.hasSSE2() && (VT == MVT::f64 || VT == MVT::v2f64)) || (Subtarget.hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64)))) return SDValue(); SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); SDLoc DL(N); auto MinMaxOp = N->getOpcode() == ISD::FMAXNUM ? X86ISD::FMAX : X86ISD::FMIN; // If we don't have to respect NaN inputs, this is a direct translation to x86 // min/max instructions. if (DAG.getTarget().Options.NoNaNsFPMath || N->getFlags().hasNoNaNs()) return DAG.getNode(MinMaxOp, DL, VT, Op0, Op1, N->getFlags()); // If we have to respect NaN inputs, this takes at least 3 instructions. // Favor a library call when operating on a scalar and minimizing code size. if (!VT.isVector() && DAG.getMachineFunction().getFunction().optForMinSize()) return SDValue(); EVT SetCCType = DAG.getTargetLoweringInfo().getSetCCResultType( DAG.getDataLayout(), *DAG.getContext(), VT); // There are 4 possibilities involving NaN inputs, and these are the required // outputs: // Op1 // Num NaN // ---------------- // Num | Max | Op0 | // Op0 ---------------- // NaN | Op1 | NaN | // ---------------- // // The SSE FP max/min instructions were not designed for this case, but rather // to implement: // Min = Op1 < Op0 ? Op1 : Op0 // Max = Op1 > Op0 ? Op1 : Op0 // // So they always return Op0 if either input is a NaN. However, we can still // use those instructions for fmaxnum by selecting away a NaN input. // If either operand is NaN, the 2nd source operand (Op0) is passed through. SDValue MinOrMax = DAG.getNode(MinMaxOp, DL, VT, Op1, Op0); SDValue IsOp0Nan = DAG.getSetCC(DL, SetCCType, Op0, Op0, ISD::SETUO); // If Op0 is a NaN, select Op1. Otherwise, select the max. If both operands // are NaN, the NaN value of Op1 is the result. return DAG.getSelect(DL, VT, IsOp0Nan, Op1, MinOrMax); } static SDValue combineX86INT_TO_FP(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { EVT VT = N->getValueType(0); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); APInt KnownUndef, KnownZero; APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef, KnownZero, DCI)) return SDValue(N, 0); return SDValue(); } /// Do target-specific dag combines on X86ISD::ANDNP nodes. static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { MVT VT = N->getSimpleValueType(0); // ANDNP(0, x) -> x if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode())) return N->getOperand(1); // ANDNP(x, 0) -> 0 if (ISD::isBuildVectorAllZeros(N->getOperand(1).getNode())) return DAG.getConstant(0, SDLoc(N), VT); // Turn ANDNP back to AND if input is inverted. if (VT.isVector() && N->getOperand(0).getOpcode() == ISD::XOR && ISD::isBuildVectorAllOnes(N->getOperand(0).getOperand(1).getNode())) { return DAG.getNode(ISD::AND, SDLoc(N), VT, N->getOperand(0).getOperand(0), N->getOperand(1)); } // Attempt to recursively combine a bitmask ANDNP with shuffles. if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) { SDValue Op(N, 0); if (SDValue Res = combineX86ShufflesRecursively( {Op}, 0, Op, {0}, {}, /*Depth*/ 1, /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget)) return Res; } return SDValue(); } static SDValue combineBT(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); // BT ignores high bits in the bit index operand. unsigned BitWidth = N1.getValueSizeInBits(); APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth)); if (SDValue DemandedN1 = DAG.GetDemandedBits(N1, DemandedMask)) return DAG.getNode(X86ISD::BT, SDLoc(N), MVT::i32, N0, DemandedN1); return SDValue(); } // Try to combine sext_in_reg of a cmov of constants by extending the constants. static SDValue combineSextInRegCmov(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT ExtraVT = cast(N1)->getVT(); if (ExtraVT != MVT::i16) return SDValue(); // Look through single use any_extends. if (N0.getOpcode() == ISD::ANY_EXTEND && N0.hasOneUse()) N0 = N0.getOperand(0); // See if we have a single use cmov. if (N0.getOpcode() != X86ISD::CMOV || !N0.hasOneUse()) return SDValue(); SDValue CMovOp0 = N0.getOperand(0); SDValue CMovOp1 = N0.getOperand(1); // Make sure both operands are constants. if (!isa(CMovOp0.getNode()) || !isa(CMovOp1.getNode())) return SDValue(); SDLoc DL(N); // If we looked through an any_extend above, add one to the constants. if (N0.getValueType() != VT) { CMovOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, VT, CMovOp0); CMovOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, VT, CMovOp1); } CMovOp0 = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, CMovOp0, N1); CMovOp1 = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, CMovOp1, N1); return DAG.getNode(X86ISD::CMOV, DL, VT, CMovOp0, CMovOp1, N0.getOperand(2), N0.getOperand(3)); } static SDValue combineSignExtendInReg(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { if (SDValue V = combineSextInRegCmov(N, DAG)) return V; EVT VT = N->getValueType(0); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT ExtraVT = cast(N1)->getVT(); SDLoc dl(N); // The SIGN_EXTEND_INREG to v4i64 is expensive operation on the // both SSE and AVX2 since there is no sign-extended shift right // operation on a vector with 64-bit elements. //(sext_in_reg (v4i64 anyext (v4i32 x )), ExtraVT) -> // (v4i64 sext (v4i32 sext_in_reg (v4i32 x , ExtraVT))) if (VT == MVT::v4i64 && (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND)) { SDValue N00 = N0.getOperand(0); // EXTLOAD has a better solution on AVX2, // it may be replaced with X86ISD::VSEXT node. if (N00.getOpcode() == ISD::LOAD && Subtarget.hasInt256()) if (!ISD::isNormalLoad(N00.getNode())) return SDValue(); if (N00.getValueType() == MVT::v4i32 && ExtraVT.getSizeInBits() < 128) { SDValue Tmp = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, N00, N1); return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i64, Tmp); } } return SDValue(); } /// sext(add_nsw(x, C)) --> add(sext(x), C_sext) /// zext(add_nuw(x, C)) --> add(zext(x), C_zext) /// Promoting a sign/zero extension ahead of a no overflow 'add' exposes /// opportunities to combine math ops, use an LEA, or use a complex addressing /// mode. This can eliminate extend, add, and shift instructions. static SDValue promoteExtBeforeAdd(SDNode *Ext, SelectionDAG &DAG, const X86Subtarget &Subtarget) { if (Ext->getOpcode() != ISD::SIGN_EXTEND && Ext->getOpcode() != ISD::ZERO_EXTEND) return SDValue(); // TODO: This should be valid for other integer types. EVT VT = Ext->getValueType(0); if (VT != MVT::i64) return SDValue(); SDValue Add = Ext->getOperand(0); if (Add.getOpcode() != ISD::ADD) return SDValue(); bool Sext = Ext->getOpcode() == ISD::SIGN_EXTEND; bool NSW = Add->getFlags().hasNoSignedWrap(); bool NUW = Add->getFlags().hasNoUnsignedWrap(); // We need an 'add nsw' feeding into the 'sext' or 'add nuw' feeding // into the 'zext' if ((Sext && !NSW) || (!Sext && !NUW)) return SDValue(); // Having a constant operand to the 'add' ensures that we are not increasing // the instruction count because the constant is extended for free below. // A constant operand can also become the displacement field of an LEA. auto *AddOp1 = dyn_cast(Add.getOperand(1)); if (!AddOp1) return SDValue(); // Don't make the 'add' bigger if there's no hope of combining it with some // other 'add' or 'shl' instruction. // TODO: It may be profitable to generate simpler LEA instructions in place // of single 'add' instructions, but the cost model for selecting an LEA // currently has a high threshold. bool HasLEAPotential = false; for (auto *User : Ext->uses()) { if (User->getOpcode() == ISD::ADD || User->getOpcode() == ISD::SHL) { HasLEAPotential = true; break; } } if (!HasLEAPotential) return SDValue(); // Everything looks good, so pull the '{s|z}ext' ahead of the 'add'. int64_t AddConstant = Sext ? AddOp1->getSExtValue() : AddOp1->getZExtValue(); SDValue AddOp0 = Add.getOperand(0); SDValue NewExt = DAG.getNode(Ext->getOpcode(), SDLoc(Ext), VT, AddOp0); SDValue NewConstant = DAG.getConstant(AddConstant, SDLoc(Add), VT); // The wider add is guaranteed to not wrap because both operands are // sign-extended. SDNodeFlags Flags; Flags.setNoSignedWrap(NSW); Flags.setNoUnsignedWrap(NUW); return DAG.getNode(ISD::ADD, SDLoc(Add), VT, NewExt, NewConstant, Flags); } // If we face {ANY,SIGN,ZERO}_EXTEND that is applied to a CMOV with constant // operands and the result of CMOV is not used anywhere else - promote CMOV // itself instead of promoting its result. This could be beneficial, because: // 1) X86TargetLowering::EmitLoweredSelect later can do merging of two // (or more) pseudo-CMOVs only when they go one-after-another and // getting rid of result extension code after CMOV will help that. // 2) Promotion of constant CMOV arguments is free, hence the // {ANY,SIGN,ZERO}_EXTEND will just be deleted. // 3) 16-bit CMOV encoding is 4 bytes, 32-bit CMOV is 3-byte, so this // promotion is also good in terms of code-size. // (64-bit CMOV is 4-bytes, that's why we don't do 32-bit => 64-bit // promotion). static SDValue combineToExtendCMOV(SDNode *Extend, SelectionDAG &DAG) { SDValue CMovN = Extend->getOperand(0); if (CMovN.getOpcode() != X86ISD::CMOV || !CMovN.hasOneUse()) return SDValue(); EVT TargetVT = Extend->getValueType(0); unsigned ExtendOpcode = Extend->getOpcode(); SDLoc DL(Extend); EVT VT = CMovN.getValueType(); SDValue CMovOp0 = CMovN.getOperand(0); SDValue CMovOp1 = CMovN.getOperand(1); if (!isa(CMovOp0.getNode()) || !isa(CMovOp1.getNode())) return SDValue(); // Only extend to i32 or i64. if (TargetVT != MVT::i32 && TargetVT != MVT::i64) return SDValue(); // Only extend from i16 unless its a sign_extend from i32. Zext/aext from i32 // are free. if (VT != MVT::i16 && !(ExtendOpcode == ISD::SIGN_EXTEND && VT == MVT::i32)) return SDValue(); // If this a zero extend to i64, we should only extend to i32 and use a free // zero extend to finish. EVT ExtendVT = TargetVT; if (TargetVT == MVT::i64 && ExtendOpcode != ISD::SIGN_EXTEND) ExtendVT = MVT::i32; CMovOp0 = DAG.getNode(ExtendOpcode, DL, ExtendVT, CMovOp0); CMovOp1 = DAG.getNode(ExtendOpcode, DL, ExtendVT, CMovOp1); SDValue Res = DAG.getNode(X86ISD::CMOV, DL, ExtendVT, CMovOp0, CMovOp1, CMovN.getOperand(2), CMovN.getOperand(3)); // Finish extending if needed. if (ExtendVT != TargetVT) Res = DAG.getNode(ExtendOpcode, DL, TargetVT, Res); return Res; } // Convert (vXiY *ext(vXi1 bitcast(iX))) to extend_in_reg(broadcast(iX)). // This is more or less the reverse of combineBitcastvxi1. static SDValue combineToExtendBoolVectorInReg(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { unsigned Opcode = N->getOpcode(); if (Opcode != ISD::SIGN_EXTEND && Opcode != ISD::ZERO_EXTEND && Opcode != ISD::ANY_EXTEND) return SDValue(); if (!DCI.isBeforeLegalizeOps()) return SDValue(); if (!Subtarget.hasSSE2() || Subtarget.hasAVX512()) return SDValue(); SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); EVT SVT = VT.getScalarType(); EVT InSVT = N0.getValueType().getScalarType(); unsigned EltSizeInBits = SVT.getSizeInBits(); // Input type must be extending a bool vector (bit-casted from a scalar // integer) to legal integer types. if (!VT.isVector()) return SDValue(); if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16 && SVT != MVT::i8) return SDValue(); if (InSVT != MVT::i1 || N0.getOpcode() != ISD::BITCAST) return SDValue(); SDValue N00 = N0.getOperand(0); EVT SclVT = N0.getOperand(0).getValueType(); if (!SclVT.isScalarInteger()) return SDValue(); SDLoc DL(N); SDValue Vec; SmallVector ShuffleMask; unsigned NumElts = VT.getVectorNumElements(); assert(NumElts == SclVT.getSizeInBits() && "Unexpected bool vector size"); // Broadcast the scalar integer to the vector elements. if (NumElts > EltSizeInBits) { // If the scalar integer is greater than the vector element size, then we // must split it down into sub-sections for broadcasting. For example: // i16 -> v16i8 (i16 -> v8i16 -> v16i8) with 2 sub-sections. // i32 -> v32i8 (i32 -> v8i32 -> v32i8) with 4 sub-sections. assert((NumElts % EltSizeInBits) == 0 && "Unexpected integer scale"); unsigned Scale = NumElts / EltSizeInBits; EVT BroadcastVT = EVT::getVectorVT(*DAG.getContext(), SclVT, EltSizeInBits); Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, BroadcastVT, N00); Vec = DAG.getBitcast(VT, Vec); for (unsigned i = 0; i != Scale; ++i) ShuffleMask.append(EltSizeInBits, i); } else { // For smaller scalar integers, we can simply any-extend it to the vector // element size (we don't care about the upper bits) and broadcast it to all // elements. SDValue Scl = DAG.getAnyExtOrTrunc(N00, DL, SVT); Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Scl); ShuffleMask.append(NumElts, 0); } Vec = DAG.getVectorShuffle(VT, DL, Vec, Vec, ShuffleMask); // Now, mask the relevant bit in each element. SmallVector Bits; for (unsigned i = 0; i != NumElts; ++i) { int BitIdx = (i % EltSizeInBits); APInt Bit = APInt::getBitsSet(EltSizeInBits, BitIdx, BitIdx + 1); Bits.push_back(DAG.getConstant(Bit, DL, SVT)); } SDValue BitMask = DAG.getBuildVector(VT, DL, Bits); Vec = DAG.getNode(ISD::AND, DL, VT, Vec, BitMask); // Compare against the bitmask and extend the result. EVT CCVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts); Vec = DAG.getSetCC(DL, CCVT, Vec, BitMask, ISD::SETEQ); Vec = DAG.getSExtOrTrunc(Vec, DL, VT); // For SEXT, this is now done, otherwise shift the result down for // zero-extension. if (Opcode == ISD::SIGN_EXTEND) return Vec; return DAG.getNode(ISD::SRL, DL, VT, Vec, DAG.getConstant(EltSizeInBits - 1, DL, VT)); } /// Convert a SEXT or ZEXT of a vector to a SIGN_EXTEND_VECTOR_INREG or /// ZERO_EXTEND_VECTOR_INREG, this requires the splitting (or concatenating /// with UNDEFs) of the input to vectors of the same size as the target type /// which then extends the lowest elements. static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { if (ExperimentalVectorWideningLegalization) return SDValue(); unsigned Opcode = N->getOpcode(); if (Opcode != ISD::SIGN_EXTEND && Opcode != ISD::ZERO_EXTEND) return SDValue(); if (!DCI.isBeforeLegalizeOps()) return SDValue(); if (!Subtarget.hasSSE2()) return SDValue(); SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); EVT SVT = VT.getScalarType(); EVT InVT = N0.getValueType(); EVT InSVT = InVT.getScalarType(); // FIXME: Generic DAGCombiner previously had a bug that would cause a // sign_extend of setcc to sometimes return the original node and tricked it // into thinking CombineTo was used which prevented the target combines from // running. // Earlying out here to avoid regressions like this // (v4i32 (sext (v4i1 (setcc (v4i16))))) // Becomes // (v4i32 (sext_invec (v8i16 (concat (v4i16 (setcc (v4i16))), undef)))) // Type legalized to // (v4i32 (sext_invec (v8i16 (trunc_invec (v4i32 (setcc (v4i32))))))) // Leading to a packssdw+pmovsxwd // We could write a DAG combine to fix this, but really we shouldn't be // creating sext_invec that's forcing v8i16 into the DAG. if (N0.getOpcode() == ISD::SETCC) return SDValue(); // Input type must be a vector and we must be extending legal integer types. if (!VT.isVector() || VT.getVectorNumElements() < 2) return SDValue(); if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16) return SDValue(); if (InSVT != MVT::i32 && InSVT != MVT::i16 && InSVT != MVT::i8) return SDValue(); // If the input/output types are both legal then we have at least AVX1 and // we will be able to use SIGN_EXTEND/ZERO_EXTEND directly. if (DAG.getTargetLoweringInfo().isTypeLegal(VT) && DAG.getTargetLoweringInfo().isTypeLegal(InVT)) return SDValue(); SDLoc DL(N); auto ExtendVecSize = [&DAG](const SDLoc &DL, SDValue N, unsigned Size) { EVT InVT = N.getValueType(); EVT OutVT = EVT::getVectorVT(*DAG.getContext(), InVT.getScalarType(), Size / InVT.getScalarSizeInBits()); SmallVector Opnds(Size / InVT.getSizeInBits(), DAG.getUNDEF(InVT)); Opnds[0] = N; return DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Opnds); }; // If target-size is less than 128-bits, extend to a type that would extend // to 128 bits, extend that and extract the original target vector. if (VT.getSizeInBits() < 128 && !(128 % VT.getSizeInBits())) { unsigned Scale = 128 / VT.getSizeInBits(); EVT ExVT = EVT::getVectorVT(*DAG.getContext(), SVT, 128 / SVT.getSizeInBits()); SDValue Ex = ExtendVecSize(DL, N0, Scale * InVT.getSizeInBits()); SDValue SExt = DAG.getNode(Opcode, DL, ExVT, Ex); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SExt, DAG.getIntPtrConstant(0, DL)); } // If target-size is 128-bits (or 256-bits on AVX target), then convert to // ISD::*_EXTEND_VECTOR_INREG which ensures lowering to X86ISD::V*EXT. // Also use this if we don't have SSE41 to allow the legalizer do its job. if (!Subtarget.hasSSE41() || VT.is128BitVector() || (VT.is256BitVector() && Subtarget.hasAVX()) || (VT.is512BitVector() && Subtarget.useAVX512Regs())) { SDValue ExOp = ExtendVecSize(DL, N0, VT.getSizeInBits()); Opcode = Opcode == ISD::SIGN_EXTEND ? ISD::SIGN_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND_VECTOR_INREG; return DAG.getNode(Opcode, DL, VT, ExOp); } auto SplitAndExtendInReg = [&](unsigned SplitSize) { unsigned NumVecs = VT.getSizeInBits() / SplitSize; unsigned NumSubElts = SplitSize / SVT.getSizeInBits(); EVT SubVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumSubElts); EVT InSubVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumSubElts); unsigned IROpc = Opcode == ISD::SIGN_EXTEND ? ISD::SIGN_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND_VECTOR_INREG; SmallVector Opnds; for (unsigned i = 0, Offset = 0; i != NumVecs; ++i, Offset += NumSubElts) { SDValue SrcVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InSubVT, N0, DAG.getIntPtrConstant(Offset, DL)); SrcVec = ExtendVecSize(DL, SrcVec, SplitSize); SrcVec = DAG.getNode(IROpc, DL, SubVT, SrcVec); Opnds.push_back(SrcVec); } return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds); }; // On pre-AVX targets, split into 128-bit nodes of // ISD::*_EXTEND_VECTOR_INREG. if (!Subtarget.hasAVX() && !(VT.getSizeInBits() % 128)) return SplitAndExtendInReg(128); // On pre-AVX512 targets, split into 256-bit nodes of // ISD::*_EXTEND_VECTOR_INREG. if (!Subtarget.useAVX512Regs() && !(VT.getSizeInBits() % 256)) return SplitAndExtendInReg(256); return SDValue(); } // Attempt to combine a (sext/zext (setcc)) to a setcc with a xmm/ymm/zmm // result type. static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); SDLoc dl(N); // Only do this combine with AVX512 for vector extends. if (!Subtarget.hasAVX512() || !VT.isVector() || N0->getOpcode() != ISD::SETCC) return SDValue(); // Only combine legal element types. EVT SVT = VT.getVectorElementType(); if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32 && SVT != MVT::i64 && SVT != MVT::f32 && SVT != MVT::f64) return SDValue(); // We can only do this if the vector size in 256 bits or less. unsigned Size = VT.getSizeInBits(); if (Size > 256) return SDValue(); // Don't fold if the condition code can't be handled by PCMPEQ/PCMPGT since // that's the only integer compares with we have. ISD::CondCode CC = cast(N0->getOperand(2))->get(); if (ISD::isUnsignedIntSetCC(CC)) return SDValue(); // Only do this combine if the extension will be fully consumed by the setcc. EVT N00VT = N0.getOperand(0).getValueType(); EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger(); if (Size != MatchingVecType.getSizeInBits()) return SDValue(); SDValue Res = DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); if (N->getOpcode() == ISD::ZERO_EXTEND) Res = DAG.getZeroExtendInReg(Res, dl, N0.getValueType().getScalarType()); return Res; } static SDValue combineSext(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); EVT InVT = N0.getValueType(); SDLoc DL(N); if (SDValue NewCMov = combineToExtendCMOV(N, DAG)) return NewCMov; if (!DCI.isBeforeLegalizeOps()) return SDValue(); if (SDValue V = combineExtSetcc(N, DAG, Subtarget)) return V; if (InVT == MVT::i1 && N0.getOpcode() == ISD::XOR && isAllOnesConstant(N0.getOperand(1)) && N0.hasOneUse()) { // Invert and sign-extend a boolean is the same as zero-extend and subtract // 1 because 0 becomes -1 and 1 becomes 0. The subtract is efficiently // lowered with an LEA or a DEC. This is the same as: select Bool, 0, -1. // sext (xor Bool, -1) --> sub (zext Bool), 1 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); return DAG.getNode(ISD::SUB, DL, VT, Zext, DAG.getConstant(1, DL, VT)); } if (SDValue V = combineToExtendVectorInReg(N, DAG, DCI, Subtarget)) return V; if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget)) return V; if (VT.isVector()) if (SDValue R = PromoteMaskArithmetic(N, DAG, Subtarget)) return R; if (SDValue NewAdd = promoteExtBeforeAdd(N, DAG, Subtarget)) return NewAdd; return SDValue(); } static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) { if (NegMul) { switch (Opcode) { default: llvm_unreachable("Unexpected opcode"); case ISD::FMA: Opcode = X86ISD::FNMADD; break; case X86ISD::FMADD_RND: Opcode = X86ISD::FNMADD_RND; break; case X86ISD::FMSUB: Opcode = X86ISD::FNMSUB; break; case X86ISD::FMSUB_RND: Opcode = X86ISD::FNMSUB_RND; break; case X86ISD::FNMADD: Opcode = ISD::FMA; break; case X86ISD::FNMADD_RND: Opcode = X86ISD::FMADD_RND; break; case X86ISD::FNMSUB: Opcode = X86ISD::FMSUB; break; case X86ISD::FNMSUB_RND: Opcode = X86ISD::FMSUB_RND; break; } } if (NegAcc) { switch (Opcode) { default: llvm_unreachable("Unexpected opcode"); case ISD::FMA: Opcode = X86ISD::FMSUB; break; case X86ISD::FMADD_RND: Opcode = X86ISD::FMSUB_RND; break; case X86ISD::FMSUB: Opcode = ISD::FMA; break; case X86ISD::FMSUB_RND: Opcode = X86ISD::FMADD_RND; break; case X86ISD::FNMADD: Opcode = X86ISD::FNMSUB; break; case X86ISD::FNMADD_RND: Opcode = X86ISD::FNMSUB_RND; break; case X86ISD::FNMSUB: Opcode = X86ISD::FNMADD; break; case X86ISD::FNMSUB_RND: Opcode = X86ISD::FNMADD_RND; break; } } return Opcode; } static SDValue combineFMA(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { SDLoc dl(N); EVT VT = N->getValueType(0); // Let legalize expand this if it isn't a legal type yet. if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) return SDValue(); EVT ScalarVT = VT.getScalarType(); if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget.hasAnyFMA()) return SDValue(); SDValue A = N->getOperand(0); SDValue B = N->getOperand(1); SDValue C = N->getOperand(2); auto invertIfNegative = [&DAG](SDValue &V) { if (SDValue NegVal = isFNEG(DAG, V.getNode())) { V = DAG.getBitcast(V.getValueType(), NegVal); return true; } // Look through extract_vector_elts. If it comes from an FNEG, create a // new extract from the FNEG input. if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT && isNullConstant(V.getOperand(1))) { if (SDValue NegVal = isFNEG(DAG, V.getOperand(0).getNode())) { NegVal = DAG.getBitcast(V.getOperand(0).getValueType(), NegVal); V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V), V.getValueType(), NegVal, V.getOperand(1)); return true; } } return false; }; // Do not convert the passthru input of scalar intrinsics. // FIXME: We could allow negations of the lower element only. bool NegA = invertIfNegative(A); bool NegB = invertIfNegative(B); bool NegC = invertIfNegative(C); if (!NegA && !NegB && !NegC) return SDValue(); unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC); if (N->getNumOperands() == 4) return DAG.getNode(NewOpcode, dl, VT, A, B, C, N->getOperand(3)); return DAG.getNode(NewOpcode, dl, VT, A, B, C); } // Combine FMADDSUB(A, B, FNEG(C)) -> FMSUBADD(A, B, C) static SDValue combineFMADDSUB(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { SDLoc dl(N); EVT VT = N->getValueType(0); SDValue NegVal = isFNEG(DAG, N->getOperand(2).getNode()); if (!NegVal) return SDValue(); unsigned NewOpcode; switch (N->getOpcode()) { default: llvm_unreachable("Unexpected opcode!"); case X86ISD::FMADDSUB: NewOpcode = X86ISD::FMSUBADD; break; case X86ISD::FMADDSUB_RND: NewOpcode = X86ISD::FMSUBADD_RND; break; case X86ISD::FMSUBADD: NewOpcode = X86ISD::FMADDSUB; break; case X86ISD::FMSUBADD_RND: NewOpcode = X86ISD::FMADDSUB_RND; break; } if (N->getNumOperands() == 4) return DAG.getNode(NewOpcode, dl, VT, N->getOperand(0), N->getOperand(1), NegVal, N->getOperand(3)); return DAG.getNode(NewOpcode, dl, VT, N->getOperand(0), N->getOperand(1), NegVal); } static SDValue combineZext(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { // (i32 zext (and (i8 x86isd::setcc_carry), 1)) -> // (and (i32 x86isd::setcc_carry), 1) // This eliminates the zext. This transformation is necessary because // ISD::SETCC is always legalized to i8. SDLoc dl(N); SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); if (N0.getOpcode() == ISD::AND && N0.hasOneUse() && N0.getOperand(0).hasOneUse()) { SDValue N00 = N0.getOperand(0); if (N00.getOpcode() == X86ISD::SETCC_CARRY) { if (!isOneConstant(N0.getOperand(1))) return SDValue(); return DAG.getNode(ISD::AND, dl, VT, DAG.getNode(X86ISD::SETCC_CARRY, dl, VT, N00.getOperand(0), N00.getOperand(1)), DAG.getConstant(1, dl, VT)); } } if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && N0.getOperand(0).hasOneUse()) { SDValue N00 = N0.getOperand(0); if (N00.getOpcode() == X86ISD::SETCC_CARRY) { return DAG.getNode(ISD::AND, dl, VT, DAG.getNode(X86ISD::SETCC_CARRY, dl, VT, N00.getOperand(0), N00.getOperand(1)), DAG.getConstant(1, dl, VT)); } } if (SDValue NewCMov = combineToExtendCMOV(N, DAG)) return NewCMov; if (DCI.isBeforeLegalizeOps()) if (SDValue V = combineExtSetcc(N, DAG, Subtarget)) return V; if (SDValue V = combineToExtendVectorInReg(N, DAG, DCI, Subtarget)) return V; if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget)) return V; if (VT.isVector()) if (SDValue R = PromoteMaskArithmetic(N, DAG, Subtarget)) return R; if (SDValue NewAdd = promoteExtBeforeAdd(N, DAG, Subtarget)) return NewAdd; if (SDValue R = combineOrCmpEqZeroToCtlzSrl(N, DAG, DCI, Subtarget)) return R; return SDValue(); } /// Try to map a 128-bit or larger integer comparison to vector instructions /// before type legalization splits it up into chunks. static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG, const X86Subtarget &Subtarget) { ISD::CondCode CC = cast(SetCC->getOperand(2))->get(); assert((CC == ISD::SETNE || CC == ISD::SETEQ) && "Bad comparison predicate"); // We're looking for an oversized integer equality comparison. SDValue X = SetCC->getOperand(0); SDValue Y = SetCC->getOperand(1); EVT OpVT = X.getValueType(); unsigned OpSize = OpVT.getSizeInBits(); if (!OpVT.isScalarInteger() || OpSize < 128) return SDValue(); // Ignore a comparison with zero because that gets special treatment in // EmitTest(). But make an exception for the special case of a pair of // logically-combined vector-sized operands compared to zero. This pattern may // be generated by the memcmp expansion pass with oversized integer compares // (see PR33325). bool IsOrXorXorCCZero = isNullConstant(Y) && X.getOpcode() == ISD::OR && X.getOperand(0).getOpcode() == ISD::XOR && X.getOperand(1).getOpcode() == ISD::XOR; if (isNullConstant(Y) && !IsOrXorXorCCZero) return SDValue(); // Bail out if we know that this is not really just an oversized integer. if (peekThroughBitcasts(X).getValueType() == MVT::f128 || peekThroughBitcasts(Y).getValueType() == MVT::f128) return SDValue(); // TODO: Use PXOR + PTEST for SSE4.1 or later? EVT VT = SetCC->getValueType(0); SDLoc DL(SetCC); if ((OpSize == 128 && Subtarget.hasSSE2()) || (OpSize == 256 && Subtarget.hasAVX2()) || (OpSize == 512 && Subtarget.useAVX512Regs())) { EVT VecVT = OpSize == 512 ? MVT::v16i32 : OpSize == 256 ? MVT::v32i8 : MVT::v16i8; EVT CmpVT = OpSize == 512 ? MVT::v16i1 : VecVT; SDValue Cmp; if (IsOrXorXorCCZero) { // This is a bitwise-combined equality comparison of 2 pairs of vectors: // setcc i128 (or (xor A, B), (xor C, D)), 0, eq|ne // Use 2 vector equality compares and 'and' the results before doing a // MOVMSK. SDValue A = DAG.getBitcast(VecVT, X.getOperand(0).getOperand(0)); SDValue B = DAG.getBitcast(VecVT, X.getOperand(0).getOperand(1)); SDValue C = DAG.getBitcast(VecVT, X.getOperand(1).getOperand(0)); SDValue D = DAG.getBitcast(VecVT, X.getOperand(1).getOperand(1)); SDValue Cmp1 = DAG.getSetCC(DL, CmpVT, A, B, ISD::SETEQ); SDValue Cmp2 = DAG.getSetCC(DL, CmpVT, C, D, ISD::SETEQ); Cmp = DAG.getNode(ISD::AND, DL, CmpVT, Cmp1, Cmp2); } else { SDValue VecX = DAG.getBitcast(VecVT, X); SDValue VecY = DAG.getBitcast(VecVT, Y); Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETEQ); } // For 512-bits we want to emit a setcc that will lower to kortest. if (OpSize == 512) return DAG.getSetCC(DL, VT, DAG.getBitcast(MVT::i16, Cmp), DAG.getConstant(0xFFFF, DL, MVT::i16), CC); // If all bytes match (bitmask is 0x(FFFF)FFFF), that's equality. // setcc i128 X, Y, eq --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, eq // setcc i128 X, Y, ne --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, ne // setcc i256 X, Y, eq --> setcc (vpmovmskb (vpcmpeqb X, Y)), 0xFFFFFFFF, eq // setcc i256 X, Y, ne --> setcc (vpmovmskb (vpcmpeqb X, Y)), 0xFFFFFFFF, ne SDValue MovMsk = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Cmp); SDValue FFFFs = DAG.getConstant(OpSize == 128 ? 0xFFFF : 0xFFFFFFFF, DL, MVT::i32); return DAG.getSetCC(DL, VT, MovMsk, FFFFs, CC); } return SDValue(); } static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { ISD::CondCode CC = cast(N->getOperand(2))->get(); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); EVT VT = N->getValueType(0); EVT OpVT = LHS.getValueType(); SDLoc DL(N); if (CC == ISD::SETNE || CC == ISD::SETEQ) { // 0-x == y --> x+y == 0 // 0-x != y --> x+y != 0 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) && LHS.hasOneUse()) { SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, RHS, LHS.getOperand(1)); return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC); } // x == 0-y --> x+y == 0 // x != 0-y --> x+y != 0 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) && RHS.hasOneUse()) { SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1)); return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC); } if (SDValue V = combineVectorSizedSetCCEquality(N, DAG, Subtarget)) return V; } if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && (CC == ISD::SETNE || CC == ISD::SETEQ || ISD::isSignedIntSetCC(CC))) { // Put build_vectors on the right. if (LHS.getOpcode() == ISD::BUILD_VECTOR) { std::swap(LHS, RHS); CC = ISD::getSetCCSwappedOperands(CC); } bool IsSEXT0 = (LHS.getOpcode() == ISD::SIGN_EXTEND) && (LHS.getOperand(0).getValueType().getVectorElementType() == MVT::i1); bool IsVZero1 = ISD::isBuildVectorAllZeros(RHS.getNode()); if (IsSEXT0 && IsVZero1) { assert(VT == LHS.getOperand(0).getValueType() && "Uexpected operand type"); if (CC == ISD::SETGT) return DAG.getConstant(0, DL, VT); if (CC == ISD::SETLE) return DAG.getConstant(1, DL, VT); if (CC == ISD::SETEQ || CC == ISD::SETGE) return DAG.getNOT(DL, LHS.getOperand(0), VT); assert((CC == ISD::SETNE || CC == ISD::SETLT) && "Unexpected condition code!"); return LHS.getOperand(0); } } // If we have AVX512, but not BWI and this is a vXi16/vXi8 setcc, just // pre-promote its result type since vXi1 vectors don't get promoted // during type legalization. // NOTE: The element count check is to ignore operand types that need to // go through type promotion to a 128-bit vector. if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && VT.isVector() && VT.getVectorElementType() == MVT::i1 && (ExperimentalVectorWideningLegalization || VT.getVectorNumElements() > 4) && (OpVT.getVectorElementType() == MVT::i8 || OpVT.getVectorElementType() == MVT::i16)) { SDValue Setcc = DAG.getNode(ISD::SETCC, DL, OpVT, LHS, RHS, N->getOperand(2)); return DAG.getNode(ISD::TRUNCATE, DL, VT, Setcc); } // For an SSE1-only target, lower a comparison of v4f32 to X86ISD::CMPP early // to avoid scalarization via legalization because v4i32 is not a legal type. if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32 && LHS.getValueType() == MVT::v4f32) return LowerVSETCC(SDValue(N, 0), Subtarget, DAG); return SDValue(); } static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { SDValue Src = N->getOperand(0); MVT SrcVT = Src.getSimpleValueType(); MVT VT = N->getSimpleValueType(0); // Perform constant folding. if (ISD::isBuildVectorOfConstantSDNodes(Src.getNode())) { assert(VT== MVT::i32 && "Unexpected result type"); APInt Imm(32, 0); for (unsigned Idx = 0, e = Src.getNumOperands(); Idx < e; ++Idx) { SDValue In = Src.getOperand(Idx); if (!In.isUndef() && cast(In)->getAPIntValue().isNegative()) Imm.setBit(Idx); } return DAG.getConstant(Imm, SDLoc(N), VT); } // Look through int->fp bitcasts that don't change the element width. if (Src.getOpcode() == ISD::BITCAST && Src.hasOneUse() && SrcVT.isFloatingPoint() && Src.getOperand(0).getValueType() == EVT(SrcVT).changeVectorElementTypeToInteger()) Src = Src.getOperand(0); // Simplify the inputs. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); APInt DemandedMask(APInt::getAllOnesValue(VT.getScalarSizeInBits())); if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI)) return SDValue(N, 0); // Combine (movmsk (setne (and X, (1 << C)), 0)) -> (movmsk (X << C)). // Only do this when the setcc input and output types are the same and the // setcc and the 'and' node have a single use. // FIXME: Support 256-bits with AVX1. The movmsk is split, but the and isn't. APInt SplatVal; if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse() && Src.getOperand(0).getValueType() == Src.getValueType() && cast(Src.getOperand(2))->get() == ISD::SETNE && ISD::isBuildVectorAllZeros(Src.getOperand(1).getNode()) && Src.getOperand(0).getOpcode() == ISD::AND) { SDValue And = Src.getOperand(0); if (And.hasOneUse() && ISD::isConstantSplatVector(And.getOperand(1).getNode(), SplatVal) && SplatVal.isPowerOf2()) { MVT VT = Src.getSimpleValueType(); unsigned BitWidth = VT.getScalarSizeInBits(); unsigned ShAmt = BitWidth - SplatVal.logBase2() - 1; SDLoc DL(And); SDValue X = And.getOperand(0); // If the element type is i8, we need to bitcast to i16 to use a legal // shift. If we wait until lowering we end up with an extra and to bits // from crossing the 8-bit elements, but we don't care about that here. if (VT.getVectorElementType() == MVT::i8) { VT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2); X = DAG.getBitcast(VT, X); } SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShAmt, DL, VT)); SDValue Cast = DAG.getBitcast(SrcVT, Shl); return DAG.getNode(X86ISD::MOVMSK, SDLoc(N), N->getValueType(0), Cast); } } return SDValue(); } static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { SDLoc DL(N); if (DCI.isBeforeLegalizeOps()) { SDValue Index = N->getOperand(4); // Remove any sign extends from 32 or smaller to larger than 32. // Only do this before LegalizeOps in case we need the sign extend for // legalization. if (Index.getOpcode() == ISD::SIGN_EXTEND) { if (Index.getScalarValueSizeInBits() > 32 && Index.getOperand(0).getScalarValueSizeInBits() <= 32) { SmallVector NewOps(N->op_begin(), N->op_end()); NewOps[4] = Index.getOperand(0); SDNode *Res = DAG.UpdateNodeOperands(N, NewOps); if (Res == N) { // The original sign extend has less users, add back to worklist in // case it needs to be removed DCI.AddToWorklist(Index.getNode()); DCI.AddToWorklist(N); } return SDValue(Res, 0); } } // Make sure the index is either i32 or i64 unsigned ScalarSize = Index.getScalarValueSizeInBits(); if (ScalarSize != 32 && ScalarSize != 64) { MVT EltVT = ScalarSize > 32 ? MVT::i64 : MVT::i32; EVT IndexVT = EVT::getVectorVT(*DAG.getContext(), EltVT, Index.getValueType().getVectorNumElements()); Index = DAG.getSExtOrTrunc(Index, DL, IndexVT); SmallVector NewOps(N->op_begin(), N->op_end()); NewOps[4] = Index; SDNode *Res = DAG.UpdateNodeOperands(N, NewOps); if (Res == N) DCI.AddToWorklist(N); return SDValue(Res, 0); } // Try to remove zero extends from 32->64 if we know the sign bit of // the input is zero. if (Index.getOpcode() == ISD::ZERO_EXTEND && Index.getScalarValueSizeInBits() == 64 && Index.getOperand(0).getScalarValueSizeInBits() == 32) { if (DAG.SignBitIsZero(Index.getOperand(0))) { SmallVector NewOps(N->op_begin(), N->op_end()); NewOps[4] = Index.getOperand(0); SDNode *Res = DAG.UpdateNodeOperands(N, NewOps); if (Res == N) { // The original sign extend has less users, add back to worklist in // case it needs to be removed DCI.AddToWorklist(Index.getNode()); DCI.AddToWorklist(N); } return SDValue(Res, 0); } } } // With AVX2 we only demand the upper bit of the mask. if (!Subtarget.hasAVX512()) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Mask = N->getOperand(2); APInt DemandedMask(APInt::getSignMask(Mask.getScalarValueSizeInBits())); if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI)) return SDValue(N, 0); } return SDValue(); } // Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT static SDValue combineX86SetCC(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { SDLoc DL(N); X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0)); SDValue EFLAGS = N->getOperand(1); // Try to simplify the EFLAGS and condition code operands. if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG, Subtarget)) return getSETCC(CC, Flags, DL, DAG); return SDValue(); } /// Optimize branch condition evaluation. static SDValue combineBrCond(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { SDLoc DL(N); SDValue EFLAGS = N->getOperand(3); X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(2)); // Try to simplify the EFLAGS and condition code operands. // Make sure to not keep references to operands, as combineSetCCEFLAGS can // RAUW them under us. if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG, Subtarget)) { SDValue Cond = DAG.getConstant(CC, DL, MVT::i8); return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), N->getOperand(0), N->getOperand(1), Cond, Flags); } return SDValue(); } static SDValue combineVectorCompareAndMaskUnaryOp(SDNode *N, SelectionDAG &DAG) { // Take advantage of vector comparisons producing 0 or -1 in each lane to // optimize away operation when it's from a constant. // // The general transformation is: // UNARYOP(AND(VECTOR_CMP(x,y), constant)) --> // AND(VECTOR_CMP(x,y), constant2) // constant2 = UNARYOP(constant) // Early exit if this isn't a vector operation, the operand of the // unary operation isn't a bitwise AND, or if the sizes of the operations // aren't the same. EVT VT = N->getValueType(0); if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND || N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC || VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits()) return SDValue(); // Now check that the other operand of the AND is a constant. We could // make the transformation for non-constant splats as well, but it's unclear // that would be a benefit as it would not eliminate any operations, just // perform one more step in scalar code before moving to the vector unit. if (BuildVectorSDNode *BV = dyn_cast(N->getOperand(0)->getOperand(1))) { // Bail out if the vector isn't a constant. if (!BV->isConstant()) return SDValue(); // Everything checks out. Build up the new and improved node. SDLoc DL(N); EVT IntVT = BV->getValueType(0); // Create a new constant of the appropriate type for the transformed // DAG. SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0)); // The AND node needs bitcasts to/from an integer vector type around it. SDValue MaskConst = DAG.getBitcast(IntVT, SourceConst); SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT, N->getOperand(0)->getOperand(0), MaskConst); SDValue Res = DAG.getBitcast(VT, NewAnd); return Res; } return SDValue(); } static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { SDValue Op0 = N->getOperand(0); EVT VT = N->getValueType(0); EVT InVT = Op0.getValueType(); // UINT_TO_FP(vXi1) -> SINT_TO_FP(ZEXT(vXi1 to vXi32)) // UINT_TO_FP(vXi8) -> SINT_TO_FP(ZEXT(vXi8 to vXi32)) // UINT_TO_FP(vXi16) -> SINT_TO_FP(ZEXT(vXi16 to vXi32)) if (InVT.isVector() && InVT.getScalarSizeInBits() < 32) { SDLoc dl(N); EVT DstVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, InVT.getVectorNumElements()); SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0); // UINT_TO_FP isn't legal without AVX512 so use SINT_TO_FP. return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P); } // Since UINT_TO_FP is legal (it's marked custom), dag combiner won't // optimize it to a SINT_TO_FP when the sign bit is known zero. Perform // the optimization here. if (DAG.SignBitIsZero(Op0)) return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, Op0); return SDValue(); } static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { // First try to optimize away the conversion entirely when it's // conditionally from a constant. Vectors only. if (SDValue Res = combineVectorCompareAndMaskUnaryOp(N, DAG)) return Res; // Now move on to more general possibilities. SDValue Op0 = N->getOperand(0); EVT VT = N->getValueType(0); EVT InVT = Op0.getValueType(); // SINT_TO_FP(vXi1) -> SINT_TO_FP(SEXT(vXi1 to vXi32)) // SINT_TO_FP(vXi8) -> SINT_TO_FP(SEXT(vXi8 to vXi32)) // SINT_TO_FP(vXi16) -> SINT_TO_FP(SEXT(vXi16 to vXi32)) if (InVT.isVector() && InVT.getScalarSizeInBits() < 32) { SDLoc dl(N); EVT DstVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, InVT.getVectorNumElements()); SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0); return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P); } // Without AVX512DQ we only support i64 to float scalar conversion. For both // vectors and scalars, see if we know that the upper bits are all the sign // bit, in which case we can truncate the input to i32 and convert from that. if (InVT.getScalarSizeInBits() > 32 && !Subtarget.hasDQI()) { unsigned BitWidth = InVT.getScalarSizeInBits(); unsigned NumSignBits = DAG.ComputeNumSignBits(Op0); if (NumSignBits >= (BitWidth - 31)) { EVT TruncVT = EVT::getIntegerVT(*DAG.getContext(), 32); if (InVT.isVector()) TruncVT = EVT::getVectorVT(*DAG.getContext(), TruncVT, InVT.getVectorNumElements()); SDLoc dl(N); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Op0); return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Trunc); } } // Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have // a 32-bit target where SSE doesn't support i64->FP operations. if (!Subtarget.useSoftFloat() && Subtarget.hasX87() && Op0.getOpcode() == ISD::LOAD) { LoadSDNode *Ld = cast(Op0.getNode()); EVT LdVT = Ld->getValueType(0); // This transformation is not supported if the result type is f16 or f128. if (VT == MVT::f16 || VT == MVT::f128) return SDValue(); // If we have AVX512DQ we can use packed conversion instructions unless // the VT is f80. if (Subtarget.hasDQI() && VT != MVT::f80) return SDValue(); if (!Ld->isVolatile() && !VT.isVector() && ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() && !Subtarget.is64Bit() && LdVT == MVT::i64) { SDValue FILDChain = Subtarget.getTargetLowering()->BuildFILD( SDValue(N, 0), LdVT, Ld->getChain(), Op0, DAG); DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), FILDChain.getValue(1)); return FILDChain; } } return SDValue(); } static bool needCarryOrOverflowFlag(SDValue Flags) { assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!"); for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end(); UI != UE; ++UI) { SDNode *User = *UI; X86::CondCode CC; switch (User->getOpcode()) { default: // Be conservative. return true; case X86ISD::SETCC: case X86ISD::SETCC_CARRY: CC = (X86::CondCode)User->getConstantOperandVal(0); break; case X86ISD::BRCOND: CC = (X86::CondCode)User->getConstantOperandVal(2); break; case X86ISD::CMOV: CC = (X86::CondCode)User->getConstantOperandVal(2); break; } switch (CC) { default: break; case X86::COND_A: case X86::COND_AE: case X86::COND_B: case X86::COND_BE: case X86::COND_O: case X86::COND_NO: case X86::COND_G: case X86::COND_GE: case X86::COND_L: case X86::COND_LE: return true; } } return false; } static bool onlyZeroFlagUsed(SDValue Flags) { assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!"); for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end(); UI != UE; ++UI) { SDNode *User = *UI; unsigned CCOpNo; switch (User->getOpcode()) { default: // Be conservative. return false; case X86ISD::SETCC: CCOpNo = 0; break; case X86ISD::SETCC_CARRY: CCOpNo = 0; break; case X86ISD::BRCOND: CCOpNo = 2; break; case X86ISD::CMOV: CCOpNo = 2; break; } X86::CondCode CC = (X86::CondCode)User->getConstantOperandVal(CCOpNo); if (CC != X86::COND_E && CC != X86::COND_NE) return false; } return true; } static SDValue combineCMP(SDNode *N, SelectionDAG &DAG) { // Only handle test patterns. if (!isNullConstant(N->getOperand(1))) return SDValue(); // If we have a CMP of a truncated binop, see if we can make a smaller binop // and use its flags directly. // TODO: Maybe we should try promoting compares that only use the zero flag // first if we can prove the upper bits with computeKnownBits? SDLoc dl(N); SDValue Op = N->getOperand(0); EVT VT = Op.getValueType(); // If we have a constant logical shift that's only used in a comparison // against zero turn it into an equivalent AND. This allows turning it into // a TEST instruction later. if ((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) && Op.hasOneUse() && isa(Op.getOperand(1)) && onlyZeroFlagUsed(SDValue(N, 0))) { EVT VT = Op.getValueType(); unsigned BitWidth = VT.getSizeInBits(); unsigned ShAmt = Op.getConstantOperandVal(1); if (ShAmt < BitWidth) { // Avoid undefined shifts. APInt Mask = Op.getOpcode() == ISD::SRL ? APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt) : APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt); if (Mask.isSignedIntN(32)) { Op = DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), DAG.getConstant(Mask, dl, VT)); return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, DAG.getConstant(0, dl, VT)); } } } // Look for a truncate with a single use. if (Op.getOpcode() != ISD::TRUNCATE || !Op.hasOneUse()) return SDValue(); Op = Op.getOperand(0); // Arithmetic op can only have one use. if (!Op.hasOneUse()) return SDValue(); unsigned NewOpc; switch (Op.getOpcode()) { default: return SDValue(); case ISD::AND: // Skip and with constant. We have special handling for and with immediate // during isel to generate test instructions. if (isa(Op.getOperand(1))) return SDValue(); NewOpc = X86ISD::AND; break; case ISD::OR: NewOpc = X86ISD::OR; break; case ISD::XOR: NewOpc = X86ISD::XOR; break; case ISD::ADD: // If the carry or overflow flag is used, we can't truncate. if (needCarryOrOverflowFlag(SDValue(N, 0))) return SDValue(); NewOpc = X86ISD::ADD; break; case ISD::SUB: // If the carry or overflow flag is used, we can't truncate. if (needCarryOrOverflowFlag(SDValue(N, 0))) return SDValue(); NewOpc = X86ISD::SUB; break; } // We found an op we can narrow. Truncate its inputs. SDValue Op0 = DAG.getNode(ISD::TRUNCATE, dl, VT, Op.getOperand(0)); SDValue Op1 = DAG.getNode(ISD::TRUNCATE, dl, VT, Op.getOperand(1)); // Use a X86 specific opcode to avoid DAG combine messing with it. SDVTList VTs = DAG.getVTList(VT, MVT::i32); Op = DAG.getNode(NewOpc, dl, VTs, Op0, Op1); // For AND, keep a CMP so that we can match the test pattern. if (NewOpc == X86ISD::AND) return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, DAG.getConstant(0, dl, VT)); // Return the flags. return Op.getValue(1); } static SDValue combineSBB(SDNode *N, SelectionDAG &DAG) { if (SDValue Flags = combineCarryThroughADD(N->getOperand(2))) { MVT VT = N->getSimpleValueType(0); SDVTList VTs = DAG.getVTList(VT, MVT::i32); return DAG.getNode(X86ISD::SBB, SDLoc(N), VTs, N->getOperand(0), N->getOperand(1), Flags); } return SDValue(); } // Optimize RES, EFLAGS = X86ISD::ADC LHS, RHS, EFLAGS static SDValue combineADC(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { // If the LHS and RHS of the ADC node are zero, then it can't overflow and // the result is either zero or one (depending on the input carry bit). // Strength reduce this down to a "set on carry" aka SETCC_CARRY&1. if (X86::isZeroNode(N->getOperand(0)) && X86::isZeroNode(N->getOperand(1)) && // We don't have a good way to replace an EFLAGS use, so only do this when // dead right now. SDValue(N, 1).use_empty()) { SDLoc DL(N); EVT VT = N->getValueType(0); SDValue CarryOut = DAG.getConstant(0, DL, N->getValueType(1)); SDValue Res1 = DAG.getNode(ISD::AND, DL, VT, DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, DAG.getConstant(X86::COND_B, DL, MVT::i8), N->getOperand(2)), DAG.getConstant(1, DL, VT)); return DCI.CombineTo(N, Res1, CarryOut); } if (SDValue Flags = combineCarryThroughADD(N->getOperand(2))) { MVT VT = N->getSimpleValueType(0); SDVTList VTs = DAG.getVTList(VT, MVT::i32); return DAG.getNode(X86ISD::ADC, SDLoc(N), VTs, N->getOperand(0), N->getOperand(1), Flags); } return SDValue(); } /// If this is an add or subtract where one operand is produced by a cmp+setcc, /// then try to convert it to an ADC or SBB. This replaces TEST+SET+{ADD/SUB} /// with CMP+{ADC, SBB}. static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) { bool IsSub = N->getOpcode() == ISD::SUB; SDValue X = N->getOperand(0); SDValue Y = N->getOperand(1); // If this is an add, canonicalize a zext operand to the RHS. // TODO: Incomplete? What if both sides are zexts? if (!IsSub && X.getOpcode() == ISD::ZERO_EXTEND && Y.getOpcode() != ISD::ZERO_EXTEND) std::swap(X, Y); // Look through a one-use zext. bool PeekedThroughZext = false; if (Y.getOpcode() == ISD::ZERO_EXTEND && Y.hasOneUse()) { Y = Y.getOperand(0); PeekedThroughZext = true; } // If this is an add, canonicalize a setcc operand to the RHS. // TODO: Incomplete? What if both sides are setcc? // TODO: Should we allow peeking through a zext of the other operand? if (!IsSub && !PeekedThroughZext && X.getOpcode() == X86ISD::SETCC && Y.getOpcode() != X86ISD::SETCC) std::swap(X, Y); if (Y.getOpcode() != X86ISD::SETCC || !Y.hasOneUse()) return SDValue(); SDLoc DL(N); EVT VT = N->getValueType(0); X86::CondCode CC = (X86::CondCode)Y.getConstantOperandVal(0); // If X is -1 or 0, then we have an opportunity to avoid constants required in // the general case below. auto *ConstantX = dyn_cast(X); if (ConstantX) { if ((!IsSub && CC == X86::COND_AE && ConstantX->isAllOnesValue()) || (IsSub && CC == X86::COND_B && ConstantX->isNullValue())) { // This is a complicated way to get -1 or 0 from the carry flag: // -1 + SETAE --> -1 + (!CF) --> CF ? -1 : 0 --> SBB %eax, %eax // 0 - SETB --> 0 - (CF) --> CF ? -1 : 0 --> SBB %eax, %eax return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, DAG.getConstant(X86::COND_B, DL, MVT::i8), Y.getOperand(1)); } if ((!IsSub && CC == X86::COND_BE && ConstantX->isAllOnesValue()) || (IsSub && CC == X86::COND_A && ConstantX->isNullValue())) { SDValue EFLAGS = Y->getOperand(1); if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() && EFLAGS.getValueType().isInteger() && !isa(EFLAGS.getOperand(1))) { // Swap the operands of a SUB, and we have the same pattern as above. // -1 + SETBE (SUB A, B) --> -1 + SETAE (SUB B, A) --> SUB + SBB // 0 - SETA (SUB A, B) --> 0 - SETB (SUB B, A) --> SUB + SBB SDValue NewSub = DAG.getNode( X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(), EFLAGS.getOperand(1), EFLAGS.getOperand(0)); SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo()); return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, DAG.getConstant(X86::COND_B, DL, MVT::i8), NewEFLAGS); } } } if (CC == X86::COND_B) { // X + SETB Z --> adc X, 0 // X - SETB Z --> sbb X, 0 return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, DAG.getVTList(VT, MVT::i32), X, DAG.getConstant(0, DL, VT), Y.getOperand(1)); } if (CC == X86::COND_A) { SDValue EFLAGS = Y->getOperand(1); // Try to convert COND_A into COND_B in an attempt to facilitate // materializing "setb reg". // // Do not flip "e > c", where "c" is a constant, because Cmp instruction // cannot take an immediate as its first operand. // if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() && EFLAGS.getValueType().isInteger() && !isa(EFLAGS.getOperand(1))) { SDValue NewSub = DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(), EFLAGS.getOperand(1), EFLAGS.getOperand(0)); SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo()); return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, DAG.getVTList(VT, MVT::i32), X, DAG.getConstant(0, DL, VT), NewEFLAGS); } } if (CC != X86::COND_E && CC != X86::COND_NE) return SDValue(); SDValue Cmp = Y.getOperand(1); if (Cmp.getOpcode() != X86ISD::CMP || !Cmp.hasOneUse() || !X86::isZeroNode(Cmp.getOperand(1)) || !Cmp.getOperand(0).getValueType().isInteger()) return SDValue(); SDValue Z = Cmp.getOperand(0); EVT ZVT = Z.getValueType(); // If X is -1 or 0, then we have an opportunity to avoid constants required in // the general case below. if (ConstantX) { // 'neg' sets the carry flag when Z != 0, so create 0 or -1 using 'sbb' with // fake operands: // 0 - (Z != 0) --> sbb %eax, %eax, (neg Z) // -1 + (Z == 0) --> sbb %eax, %eax, (neg Z) if ((IsSub && CC == X86::COND_NE && ConstantX->isNullValue()) || (!IsSub && CC == X86::COND_E && ConstantX->isAllOnesValue())) { SDValue Zero = DAG.getConstant(0, DL, ZVT); SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32); SDValue Neg = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Zero, Z); return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, DAG.getConstant(X86::COND_B, DL, MVT::i8), SDValue(Neg.getNode(), 1)); } // cmp with 1 sets the carry flag when Z == 0, so create 0 or -1 using 'sbb' // with fake operands: // 0 - (Z == 0) --> sbb %eax, %eax, (cmp Z, 1) // -1 + (Z != 0) --> sbb %eax, %eax, (cmp Z, 1) if ((IsSub && CC == X86::COND_E && ConstantX->isNullValue()) || (!IsSub && CC == X86::COND_NE && ConstantX->isAllOnesValue())) { SDValue One = DAG.getConstant(1, DL, ZVT); SDValue Cmp1 = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Z, One); return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, DAG.getConstant(X86::COND_B, DL, MVT::i8), Cmp1); } } // (cmp Z, 1) sets the carry flag if Z is 0. SDValue One = DAG.getConstant(1, DL, ZVT); SDValue Cmp1 = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Z, One); // Add the flags type for ADC/SBB nodes. SDVTList VTs = DAG.getVTList(VT, MVT::i32); // X - (Z != 0) --> sub X, (zext(setne Z, 0)) --> adc X, -1, (cmp Z, 1) // X + (Z != 0) --> add X, (zext(setne Z, 0)) --> sbb X, -1, (cmp Z, 1) if (CC == X86::COND_NE) return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL, VTs, X, DAG.getConstant(-1ULL, DL, VT), Cmp1); // X - (Z == 0) --> sub X, (zext(sete Z, 0)) --> sbb X, 0, (cmp Z, 1) // X + (Z == 0) --> add X, (zext(sete Z, 0)) --> adc X, 0, (cmp Z, 1) return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, VTs, X, DAG.getConstant(0, DL, VT), Cmp1); } static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { if (!Subtarget.hasSSE2()) return SDValue(); SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); EVT VT = N->getValueType(0); // If the vector size is less than 128, or greater than the supported RegSize, // do not use PMADD. if (!VT.isVector() || VT.getVectorNumElements() < 8) return SDValue(); if (Op0.getOpcode() != ISD::MUL) std::swap(Op0, Op1); if (Op0.getOpcode() != ISD::MUL) return SDValue(); ShrinkMode Mode; if (!canReduceVMulWidth(Op0.getNode(), DAG, Mode) || Mode == MULU16) return SDValue(); SDLoc DL(N); EVT ReducedVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, VT.getVectorNumElements()); EVT MAddVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, VT.getVectorNumElements() / 2); // Madd vector size is half of the original vector size auto PMADDWDBuilder = [](SelectionDAG &DAG, const SDLoc &DL, ArrayRef Ops) { MVT VT = MVT::getVectorVT(MVT::i32, Ops[0].getValueSizeInBits() / 32); return DAG.getNode(X86ISD::VPMADDWD, DL, VT, Ops); }; auto BuildPMADDWD = [&](SDValue Mul) { // Shrink the operands of mul. SDValue N0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, Mul.getOperand(0)); SDValue N1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, Mul.getOperand(1)); SDValue Madd = SplitOpsAndApply(DAG, Subtarget, DL, MAddVT, { N0, N1 }, PMADDWDBuilder); // Fill the rest of the output with 0 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Madd, DAG.getConstant(0, DL, MAddVT)); }; Op0 = BuildPMADDWD(Op0); // It's possible that Op1 is also a mul we can reduce. if (Op1.getOpcode() == ISD::MUL && canReduceVMulWidth(Op1.getNode(), DAG, Mode) && Mode != MULU16) { Op1 = BuildPMADDWD(Op1); } return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1); } static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { if (!Subtarget.hasSSE2()) return SDValue(); SDLoc DL(N); EVT VT = N->getValueType(0); SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); // TODO: There's nothing special about i32, any integer type above i16 should // work just as well. if (!VT.isVector() || !VT.isSimple() || !(VT.getVectorElementType() == MVT::i32)) return SDValue(); unsigned RegSize = 128; if (Subtarget.useBWIRegs()) RegSize = 512; else if (Subtarget.hasAVX()) RegSize = 256; // We only handle v16i32 for SSE2 / v32i32 for AVX / v64i32 for AVX512. // TODO: We should be able to handle larger vectors by splitting them before // feeding them into several SADs, and then reducing over those. if (VT.getSizeInBits() / 4 > RegSize) return SDValue(); // We know N is a reduction add, which means one of its operands is a phi. // To match SAD, we need the other operand to be a vector select. if (Op0.getOpcode() != ISD::VSELECT) std::swap(Op0, Op1); if (Op0.getOpcode() != ISD::VSELECT) return SDValue(); auto BuildPSADBW = [&](SDValue Op0, SDValue Op1) { // SAD pattern detected. Now build a SAD instruction and an addition for // reduction. Note that the number of elements of the result of SAD is less // than the number of elements of its input. Therefore, we could only update // part of elements in the reduction vector. SDValue Sad = createPSADBW(DAG, Op0, Op1, DL, Subtarget); // The output of PSADBW is a vector of i64. // We need to turn the vector of i64 into a vector of i32. // If the reduction vector is at least as wide as the psadbw result, just // bitcast. If it's narrower, truncate - the high i32 of each i64 is zero // anyway. MVT ResVT = MVT::getVectorVT(MVT::i32, Sad.getValueSizeInBits() / 32); if (VT.getSizeInBits() >= ResVT.getSizeInBits()) Sad = DAG.getNode(ISD::BITCAST, DL, ResVT, Sad); else Sad = DAG.getNode(ISD::TRUNCATE, DL, VT, Sad); if (VT.getSizeInBits() > ResVT.getSizeInBits()) { // Fill the upper elements with zero to match the add width. SDValue Zero = DAG.getConstant(0, DL, VT); Sad = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Zero, Sad, DAG.getIntPtrConstant(0, DL)); } return Sad; }; // Check whether we have an abs-diff pattern feeding into the select. SDValue SadOp0, SadOp1; if (!detectZextAbsDiff(Op0, SadOp0, SadOp1)) return SDValue(); Op0 = BuildPSADBW(SadOp0, SadOp1); // It's possible we have a sad on the other side too. if (Op1.getOpcode() == ISD::VSELECT && detectZextAbsDiff(Op1, SadOp0, SadOp1)) { Op1 = BuildPSADBW(SadOp0, SadOp1); } return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1); } /// Convert vector increment or decrement to sub/add with an all-ones constant: /// add X, <1, 1...> --> sub X, <-1, -1...> /// sub X, <1, 1...> --> add X, <-1, -1...> /// The all-ones vector constant can be materialized using a pcmpeq instruction /// that is commonly recognized as an idiom (has no register dependency), so /// that's better/smaller than loading a splat 1 constant. static SDValue combineIncDecVector(SDNode *N, SelectionDAG &DAG) { assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && "Unexpected opcode for increment/decrement transform"); // Pseudo-legality check: getOnesVector() expects one of these types, so bail // out and wait for legalization if we have an unsupported vector length. EVT VT = N->getValueType(0); if (!VT.is128BitVector() && !VT.is256BitVector() && !VT.is512BitVector()) return SDValue(); APInt SplatVal; if (!isConstantSplat(N->getOperand(1), SplatVal) || !SplatVal.isOneValue()) return SDValue(); SDValue AllOnesVec = getOnesVector(VT, DAG, SDLoc(N)); unsigned NewOpcode = N->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD; return DAG.getNode(NewOpcode, SDLoc(N), VT, N->getOperand(0), AllOnesVec); } static SDValue matchPMADDWD(SelectionDAG &DAG, SDValue Op0, SDValue Op1, const SDLoc &DL, EVT VT, const X86Subtarget &Subtarget) { // Example of pattern we try to detect: // t := (v8i32 mul (sext (v8i16 x0), (sext (v8i16 x1)))) //(add (build_vector (extract_elt t, 0), // (extract_elt t, 2), // (extract_elt t, 4), // (extract_elt t, 6)), // (build_vector (extract_elt t, 1), // (extract_elt t, 3), // (extract_elt t, 5), // (extract_elt t, 7))) if (!Subtarget.hasSSE2()) return SDValue(); if (Op0.getOpcode() != ISD::BUILD_VECTOR || Op1.getOpcode() != ISD::BUILD_VECTOR) return SDValue(); if (!VT.isVector() || VT.getVectorElementType() != MVT::i32 || VT.getVectorNumElements() < 4 || !isPowerOf2_32(VT.getVectorNumElements())) return SDValue(); // Check if one of Op0,Op1 is of the form: // (build_vector (extract_elt Mul, 0), // (extract_elt Mul, 2), // (extract_elt Mul, 4), // ... // the other is of the form: // (build_vector (extract_elt Mul, 1), // (extract_elt Mul, 3), // (extract_elt Mul, 5), // ... // and identify Mul. SDValue Mul; for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; i += 2) { SDValue Op0L = Op0->getOperand(i), Op1L = Op1->getOperand(i), Op0H = Op0->getOperand(i + 1), Op1H = Op1->getOperand(i + 1); // TODO: Be more tolerant to undefs. if (Op0L.getOpcode() != ISD::EXTRACT_VECTOR_ELT || Op1L.getOpcode() != ISD::EXTRACT_VECTOR_ELT || Op0H.getOpcode() != ISD::EXTRACT_VECTOR_ELT || Op1H.getOpcode() != ISD::EXTRACT_VECTOR_ELT) return SDValue(); auto *Const0L = dyn_cast(Op0L->getOperand(1)); auto *Const1L = dyn_cast(Op1L->getOperand(1)); auto *Const0H = dyn_cast(Op0H->getOperand(1)); auto *Const1H = dyn_cast(Op1H->getOperand(1)); if (!Const0L || !Const1L || !Const0H || !Const1H) return SDValue(); unsigned Idx0L = Const0L->getZExtValue(), Idx1L = Const1L->getZExtValue(), Idx0H = Const0H->getZExtValue(), Idx1H = Const1H->getZExtValue(); // Commutativity of mul allows factors of a product to reorder. if (Idx0L > Idx1L) std::swap(Idx0L, Idx1L); if (Idx0H > Idx1H) std::swap(Idx0H, Idx1H); // Commutativity of add allows pairs of factors to reorder. if (Idx0L > Idx0H) { std::swap(Idx0L, Idx0H); std::swap(Idx1L, Idx1H); } if (Idx0L != 2 * i || Idx1L != 2 * i + 1 || Idx0H != 2 * i + 2 || Idx1H != 2 * i + 3) return SDValue(); if (!Mul) { // First time an extract_elt's source vector is visited. Must be a MUL // with 2X number of vector elements than the BUILD_VECTOR. // Both extracts must be from same MUL. Mul = Op0L->getOperand(0); if (Mul->getOpcode() != ISD::MUL || Mul.getValueType().getVectorNumElements() != 2 * e) return SDValue(); } // Check that the extract is from the same MUL previously seen. if (Mul != Op0L->getOperand(0) || Mul != Op1L->getOperand(0) || Mul != Op0H->getOperand(0) || Mul != Op1H->getOperand(0)) return SDValue(); } // Check if the Mul source can be safely shrunk. ShrinkMode Mode; if (!canReduceVMulWidth(Mul.getNode(), DAG, Mode) || Mode == MULU16) return SDValue(); auto PMADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL, ArrayRef Ops) { // Shrink by adding truncate nodes and let DAGCombine fold with the // sources. EVT InVT = Ops[0].getValueType(); assert(InVT.getScalarType() == MVT::i32 && "Unexpected scalar element type"); assert(InVT == Ops[1].getValueType() && "Operands' types mismatch"); EVT ResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, InVT.getVectorNumElements() / 2); EVT TruncVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, InVT.getVectorNumElements()); return DAG.getNode(X86ISD::VPMADDWD, DL, ResVT, DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Ops[0]), DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Ops[1])); }; return SplitOpsAndApply(DAG, Subtarget, DL, VT, { Mul.getOperand(0), Mul.getOperand(1) }, PMADDBuilder); } // Try to turn (add (umax X, C), -C) into (psubus X, C) static SDValue combineAddToSUBUS(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { if (!Subtarget.hasSSE2()) return SDValue(); EVT VT = N->getValueType(0); // psubus is available in SSE2 for i8 and i16 vectors. if (!VT.isVector() || VT.getVectorNumElements() < 2 || !isPowerOf2_32(VT.getVectorNumElements()) || !(VT.getVectorElementType() == MVT::i8 || VT.getVectorElementType() == MVT::i16)) return SDValue(); SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); if (Op0.getOpcode() != ISD::UMAX) return SDValue(); // The add should have a constant that is the negative of the max. // TODO: Handle build_vectors with undef elements. auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) { return Max->getAPIntValue() == (-Op->getAPIntValue()); }; if (!ISD::matchBinaryPredicate(Op0.getOperand(1), Op1, MatchUSUBSAT)) return SDValue(); SDLoc DL(N); return DAG.getNode(ISD::USUBSAT, DL, VT, Op0.getOperand(0), Op0.getOperand(1)); } // Attempt to turn this pattern into PMADDWD. // (mul (add (zext (build_vector)), (zext (build_vector))), // (add (zext (build_vector)), (zext (build_vector))) static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDValue N0, SDValue N1, const SDLoc &DL, EVT VT, const X86Subtarget &Subtarget) { if (!Subtarget.hasSSE2()) return SDValue(); if (N0.getOpcode() != ISD::MUL || N1.getOpcode() != ISD::MUL) return SDValue(); if (!VT.isVector() || VT.getVectorElementType() != MVT::i32 || VT.getVectorNumElements() < 4 || !isPowerOf2_32(VT.getVectorNumElements())) return SDValue(); SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); SDValue N10 = N1.getOperand(0); SDValue N11 = N1.getOperand(1); // All inputs need to be sign extends. // TODO: Support ZERO_EXTEND from known positive? if (N00.getOpcode() != ISD::SIGN_EXTEND || N01.getOpcode() != ISD::SIGN_EXTEND || N10.getOpcode() != ISD::SIGN_EXTEND || N11.getOpcode() != ISD::SIGN_EXTEND) return SDValue(); // Peek through the extends. N00 = N00.getOperand(0); N01 = N01.getOperand(0); N10 = N10.getOperand(0); N11 = N11.getOperand(0); // Must be extending from vXi16. EVT InVT = N00.getValueType(); if (InVT.getVectorElementType() != MVT::i16 || N01.getValueType() != InVT || N10.getValueType() != InVT || N11.getValueType() != InVT) return SDValue(); // All inputs should be build_vectors. if (N00.getOpcode() != ISD::BUILD_VECTOR || N01.getOpcode() != ISD::BUILD_VECTOR || N10.getOpcode() != ISD::BUILD_VECTOR || N11.getOpcode() != ISD::BUILD_VECTOR) return SDValue(); // For each element, we need to ensure we have an odd element from one vector // multiplied by the odd element of another vector and the even element from // one of the same vectors being multiplied by the even element from the // other vector. So we need to make sure for each element i, this operator // is being performed: // A[2 * i] * B[2 * i] + A[2 * i + 1] * B[2 * i + 1] SDValue In0, In1; for (unsigned i = 0; i != N00.getNumOperands(); ++i) { SDValue N00Elt = N00.getOperand(i); SDValue N01Elt = N01.getOperand(i); SDValue N10Elt = N10.getOperand(i); SDValue N11Elt = N11.getOperand(i); // TODO: Be more tolerant to undefs. if (N00Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || N01Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || N10Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || N11Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT) return SDValue(); auto *ConstN00Elt = dyn_cast(N00Elt.getOperand(1)); auto *ConstN01Elt = dyn_cast(N01Elt.getOperand(1)); auto *ConstN10Elt = dyn_cast(N10Elt.getOperand(1)); auto *ConstN11Elt = dyn_cast(N11Elt.getOperand(1)); if (!ConstN00Elt || !ConstN01Elt || !ConstN10Elt || !ConstN11Elt) return SDValue(); unsigned IdxN00 = ConstN00Elt->getZExtValue(); unsigned IdxN01 = ConstN01Elt->getZExtValue(); unsigned IdxN10 = ConstN10Elt->getZExtValue(); unsigned IdxN11 = ConstN11Elt->getZExtValue(); // Add is commutative so indices can be reordered. if (IdxN00 > IdxN10) { std::swap(IdxN00, IdxN10); std::swap(IdxN01, IdxN11); } // N0 indices be the even element. N1 indices must be the next odd element. if (IdxN00 != 2 * i || IdxN10 != 2 * i + 1 || IdxN01 != 2 * i || IdxN11 != 2 * i + 1) return SDValue(); SDValue N00In = N00Elt.getOperand(0); SDValue N01In = N01Elt.getOperand(0); SDValue N10In = N10Elt.getOperand(0); SDValue N11In = N11Elt.getOperand(0); // First time we find an input capture it. if (!In0) { In0 = N00In; In1 = N01In; } // Mul is commutative so the input vectors can be in any order. // Canonicalize to make the compares easier. if (In0 != N00In) std::swap(N00In, N01In); if (In0 != N10In) std::swap(N10In, N11In); if (In0 != N00In || In1 != N01In || In0 != N10In || In1 != N11In) return SDValue(); } auto PMADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL, ArrayRef Ops) { // Shrink by adding truncate nodes and let DAGCombine fold with the // sources. EVT InVT = Ops[0].getValueType(); assert(InVT.getScalarType() == MVT::i16 && "Unexpected scalar element type"); assert(InVT == Ops[1].getValueType() && "Operands' types mismatch"); EVT ResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, InVT.getVectorNumElements() / 2); return DAG.getNode(X86ISD::VPMADDWD, DL, ResVT, Ops[0], Ops[1]); }; return SplitOpsAndApply(DAG, Subtarget, DL, VT, { In0, In1 }, PMADDBuilder); } static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { const SDNodeFlags Flags = N->getFlags(); if (Flags.hasVectorReduction()) { if (SDValue Sad = combineLoopSADPattern(N, DAG, Subtarget)) return Sad; if (SDValue MAdd = combineLoopMAddPattern(N, DAG, Subtarget)) return MAdd; } EVT VT = N->getValueType(0); SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); if (SDValue MAdd = matchPMADDWD(DAG, Op0, Op1, SDLoc(N), VT, Subtarget)) return MAdd; if (SDValue MAdd = matchPMADDWD_2(DAG, Op0, Op1, SDLoc(N), VT, Subtarget)) return MAdd; // Try to synthesize horizontal adds from adds of shuffles. if ((VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v16i16 || VT == MVT::v8i32) && Subtarget.hasSSSE3() && isHorizontalBinOp(Op0, Op1, true) && shouldUseHorizontalOp(Op0 == Op1, DAG, Subtarget)) { auto HADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL, ArrayRef Ops) { return DAG.getNode(X86ISD::HADD, DL, Ops[0].getValueType(), Ops); }; return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {Op0, Op1}, HADDBuilder); } if (SDValue V = combineIncDecVector(N, DAG)) return V; if (SDValue V = combineAddToSUBUS(N, DAG, Subtarget)) return V; return combineAddOrSubToADCOrSBB(N, DAG); } static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); EVT VT = N->getValueType(0); // PSUBUS is supported, starting from SSE2, but truncation for v8i32 // is only worth it with SSSE3 (PSHUFB). if (!(Subtarget.hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) && !(Subtarget.hasSSSE3() && (VT == MVT::v8i32 || VT == MVT::v8i64)) && !(Subtarget.hasAVX() && (VT == MVT::v32i8 || VT == MVT::v16i16)) && !(Subtarget.useBWIRegs() && (VT == MVT::v64i8 || VT == MVT::v32i16 || VT == MVT::v16i32 || VT == MVT::v8i64))) return SDValue(); SDValue SubusLHS, SubusRHS; // Try to find umax(a,b) - b or a - umin(a,b) patterns // they may be converted to subus(a,b). // TODO: Need to add IR canonicalization for this code. if (Op0.getOpcode() == ISD::UMAX) { SubusRHS = Op1; SDValue MaxLHS = Op0.getOperand(0); SDValue MaxRHS = Op0.getOperand(1); if (MaxLHS == Op1) SubusLHS = MaxRHS; else if (MaxRHS == Op1) SubusLHS = MaxLHS; else return SDValue(); } else if (Op1.getOpcode() == ISD::UMIN) { SubusLHS = Op0; SDValue MinLHS = Op1.getOperand(0); SDValue MinRHS = Op1.getOperand(1); if (MinLHS == Op0) SubusRHS = MinRHS; else if (MinRHS == Op0) SubusRHS = MinLHS; else return SDValue(); } else return SDValue(); auto USUBSATBuilder = [](SelectionDAG &DAG, const SDLoc &DL, ArrayRef Ops) { return DAG.getNode(ISD::USUBSAT, DL, Ops[0].getValueType(), Ops); }; // PSUBUS doesn't support v8i32/v8i64/v16i32, but it can be enabled with // special preprocessing in some cases. if (VT != MVT::v8i32 && VT != MVT::v16i32 && VT != MVT::v8i64) return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, { SubusLHS, SubusRHS }, USUBSATBuilder); // Special preprocessing case can be only applied // if the value was zero extended from 16 bit, // so we require first 16 bits to be zeros for 32 bit // values, or first 48 bits for 64 bit values. KnownBits Known = DAG.computeKnownBits(SubusLHS); unsigned NumZeros = Known.countMinLeadingZeros(); if ((VT == MVT::v8i64 && NumZeros < 48) || NumZeros < 16) return SDValue(); EVT ExtType = SubusLHS.getValueType(); EVT ShrinkedType; if (VT == MVT::v8i32 || VT == MVT::v8i64) ShrinkedType = MVT::v8i16; else ShrinkedType = NumZeros >= 24 ? MVT::v16i8 : MVT::v16i16; // If SubusLHS is zeroextended - truncate SubusRHS to it's // size SubusRHS = umin(0xFFF.., SubusRHS). SDValue SaturationConst = DAG.getConstant(APInt::getLowBitsSet(ExtType.getScalarSizeInBits(), ShrinkedType.getScalarSizeInBits()), SDLoc(SubusLHS), ExtType); SDValue UMin = DAG.getNode(ISD::UMIN, SDLoc(SubusLHS), ExtType, SubusRHS, SaturationConst); SDValue NewSubusLHS = DAG.getZExtOrTrunc(SubusLHS, SDLoc(SubusLHS), ShrinkedType); SDValue NewSubusRHS = DAG.getZExtOrTrunc(UMin, SDLoc(SubusRHS), ShrinkedType); SDValue Psubus = SplitOpsAndApply(DAG, Subtarget, SDLoc(N), ShrinkedType, { NewSubusLHS, NewSubusRHS }, USUBSATBuilder); // Zero extend the result, it may be used somewhere as 32 bit, // if not zext and following trunc will shrink. return DAG.getZExtOrTrunc(Psubus, SDLoc(N), ExtType); } static SDValue combineSub(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); // X86 can't encode an immediate LHS of a sub. See if we can push the // negation into a preceding instruction. if (ConstantSDNode *C = dyn_cast(Op0)) { // If the RHS of the sub is a XOR with one use and a constant, invert the // immediate. Then add one to the LHS of the sub so we can turn // X-Y -> X+~Y+1, saving one register. if (Op1->hasOneUse() && Op1.getOpcode() == ISD::XOR && isa(Op1.getOperand(1))) { APInt XorC = cast(Op1.getOperand(1))->getAPIntValue(); EVT VT = Op0.getValueType(); SDValue NewXor = DAG.getNode(ISD::XOR, SDLoc(Op1), VT, Op1.getOperand(0), DAG.getConstant(~XorC, SDLoc(Op1), VT)); return DAG.getNode(ISD::ADD, SDLoc(N), VT, NewXor, DAG.getConstant(C->getAPIntValue() + 1, SDLoc(N), VT)); } } // Try to synthesize horizontal subs from subs of shuffles. EVT VT = N->getValueType(0); if ((VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v16i16 || VT == MVT::v8i32) && Subtarget.hasSSSE3() && isHorizontalBinOp(Op0, Op1, false) && shouldUseHorizontalOp(Op0 == Op1, DAG, Subtarget)) { auto HSUBBuilder = [](SelectionDAG &DAG, const SDLoc &DL, ArrayRef Ops) { return DAG.getNode(X86ISD::HSUB, DL, Ops[0].getValueType(), Ops); }; return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {Op0, Op1}, HSUBBuilder); } if (SDValue V = combineIncDecVector(N, DAG)) return V; // Try to create PSUBUS if SUB's argument is max/min if (SDValue V = combineSubToSubus(N, DAG, Subtarget)) return V; return combineAddOrSubToADCOrSBB(N, DAG); } static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { MVT VT = N->getSimpleValueType(0); SDLoc DL(N); if (N->getOperand(0) == N->getOperand(1)) { if (N->getOpcode() == X86ISD::PCMPEQ) return DAG.getConstant(-1, DL, VT); if (N->getOpcode() == X86ISD::PCMPGT) return DAG.getConstant(0, DL, VT); } return SDValue(); } static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { if (DCI.isBeforeLegalizeOps()) return SDValue(); MVT OpVT = N->getSimpleValueType(0); bool IsI1Vector = OpVT.getVectorElementType() == MVT::i1; SDLoc dl(N); SDValue Vec = N->getOperand(0); SDValue SubVec = N->getOperand(1); unsigned IdxVal = N->getConstantOperandVal(2); MVT SubVecVT = SubVec.getSimpleValueType(); if (ISD::isBuildVectorAllZeros(Vec.getNode())) { // Inserting zeros into zeros is a nop. if (ISD::isBuildVectorAllZeros(SubVec.getNode())) return getZeroVector(OpVT, Subtarget, DAG, dl); // If we're inserting into a zero vector and then into a larger zero vector, // just insert into the larger zero vector directly. if (SubVec.getOpcode() == ISD::INSERT_SUBVECTOR && ISD::isBuildVectorAllZeros(SubVec.getOperand(0).getNode())) { unsigned Idx2Val = SubVec.getConstantOperandVal(2); return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, getZeroVector(OpVT, Subtarget, DAG, dl), SubVec.getOperand(1), DAG.getIntPtrConstant(IdxVal + Idx2Val, dl)); } // If we're inserting into a zero vector and our input was extracted from an // insert into a zero vector of the same type and the extraction was at // least as large as the original insertion. Just insert the original // subvector into a zero vector. if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR && IdxVal == 0 && SubVec.getConstantOperandVal(1) == 0 && SubVec.getOperand(0).getOpcode() == ISD::INSERT_SUBVECTOR) { SDValue Ins = SubVec.getOperand(0); if (Ins.getConstantOperandVal(2) == 0 && ISD::isBuildVectorAllZeros(Ins.getOperand(0).getNode()) && Ins.getOperand(1).getValueSizeInBits() <= SubVecVT.getSizeInBits()) return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, getZeroVector(OpVT, Subtarget, DAG, dl), Ins.getOperand(1), N->getOperand(2)); } // If we're inserting a bitcast into zeros, rewrite the insert and move the // bitcast to the other side. This helps with detecting zero extending // during isel. // TODO: Is this useful for other indices than 0? if (!IsI1Vector && SubVec.getOpcode() == ISD::BITCAST && IdxVal == 0) { MVT CastVT = SubVec.getOperand(0).getSimpleValueType(); unsigned NumElems = OpVT.getSizeInBits() / CastVT.getScalarSizeInBits(); MVT NewVT = MVT::getVectorVT(CastVT.getVectorElementType(), NumElems); SDValue Insert = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NewVT, DAG.getBitcast(NewVT, Vec), SubVec.getOperand(0), N->getOperand(2)); return DAG.getBitcast(OpVT, Insert); } } // Stop here if this is an i1 vector. if (IsI1Vector) return SDValue(); // If this is an insert of an extract, combine to a shuffle. Don't do this // if the insert or extract can be represented with a subregister operation. if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR && SubVec.getOperand(0).getSimpleValueType() == OpVT && (IdxVal != 0 || !Vec.isUndef())) { int ExtIdxVal = SubVec.getConstantOperandVal(1); if (ExtIdxVal != 0) { int VecNumElts = OpVT.getVectorNumElements(); int SubVecNumElts = SubVecVT.getVectorNumElements(); SmallVector Mask(VecNumElts); // First create an identity shuffle mask. for (int i = 0; i != VecNumElts; ++i) Mask[i] = i; // Now insert the extracted portion. for (int i = 0; i != SubVecNumElts; ++i) Mask[i + IdxVal] = i + ExtIdxVal + VecNumElts; return DAG.getVectorShuffle(OpVT, dl, Vec, SubVec.getOperand(0), Mask); } } // Fold two 16-byte or 32-byte subvector loads into one 32-byte or 64-byte // load: // (insert_subvector (insert_subvector undef, (load16 addr), 0), // (load16 addr + 16), Elts/2) // --> load32 addr // or: // (insert_subvector (insert_subvector undef, (load32 addr), 0), // (load32 addr + 32), Elts/2) // --> load64 addr // or a 16-byte or 32-byte broadcast: // (insert_subvector (insert_subvector undef, (load16 addr), 0), // (load16 addr), Elts/2) // --> X86SubVBroadcast(load16 addr) // or: // (insert_subvector (insert_subvector undef, (load32 addr), 0), // (load32 addr), Elts/2) // --> X86SubVBroadcast(load32 addr) if ((IdxVal == OpVT.getVectorNumElements() / 2) && Vec.getOpcode() == ISD::INSERT_SUBVECTOR && OpVT.getSizeInBits() == SubVecVT.getSizeInBits() * 2) { if (isNullConstant(Vec.getOperand(2))) { SDValue SubVec2 = Vec.getOperand(1); // If needed, look through bitcasts to get to the load. if (auto *FirstLd = dyn_cast(peekThroughBitcasts(SubVec2))) { bool Fast; unsigned Alignment = FirstLd->getAlignment(); unsigned AS = FirstLd->getAddressSpace(); const X86TargetLowering *TLI = Subtarget.getTargetLowering(); if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), OpVT, AS, Alignment, &Fast) && Fast) { SDValue Ops[] = {SubVec2, SubVec}; if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, Subtarget, false)) return Ld; } } // If lower/upper loads are the same and there's no other use of the lower // load, then splat the loaded value with a broadcast. if (auto *Ld = dyn_cast(peekThroughOneUseBitcasts(SubVec2))) if (SubVec2 == SubVec && ISD::isNormalLoad(Ld) && Vec.hasOneUse()) return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, SubVec); // If this is subv_broadcast insert into both halves, use a larger // subv_broadcast. if (SubVec.getOpcode() == X86ISD::SUBV_BROADCAST && SubVec == SubVec2) return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, SubVec.getOperand(0)); // If we're inserting all zeros into the upper half, change this to // an insert into an all zeros vector. We will match this to a move // with implicit upper bit zeroing during isel. if (ISD::isBuildVectorAllZeros(SubVec.getNode())) return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, getZeroVector(OpVT, Subtarget, DAG, dl), SubVec2, Vec.getOperand(2)); // If we are inserting into both halves of the vector, the starting // vector should be undef. If it isn't, make it so. Only do this if the // the early insert has no other uses. // TODO: Should this be a generic DAG combine? if (!Vec.getOperand(0).isUndef() && Vec.hasOneUse()) { Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, DAG.getUNDEF(OpVT), SubVec2, Vec.getOperand(2)); return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Vec, SubVec, N->getOperand(2)); } } } return SDValue(); } static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { // For AVX1 only, if we are extracting from a 256-bit and+not (which will // eventually get combined/lowered into ANDNP) with a concatenated operand, // split the 'and' into 128-bit ops to avoid the concatenate and extract. // We let generic combining take over from there to simplify the // insert/extract and 'not'. // This pattern emerges during AVX1 legalization. We handle it before lowering // to avoid complications like splitting constant vector loads. // Capture the original wide type in the likely case that we need to bitcast // back to this type. EVT VT = N->getValueType(0); EVT WideVecVT = N->getOperand(0).getValueType(); SDValue WideVec = peekThroughBitcasts(N->getOperand(0)); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (Subtarget.hasAVX() && !Subtarget.hasAVX2() && TLI.isTypeLegal(WideVecVT) && WideVecVT.getSizeInBits() == 256 && WideVec.getOpcode() == ISD::AND) { auto isConcatenatedNot = [] (SDValue V) { V = peekThroughBitcasts(V); if (!isBitwiseNot(V)) return false; SDValue NotOp = V->getOperand(0); return peekThroughBitcasts(NotOp).getOpcode() == ISD::CONCAT_VECTORS; }; if (isConcatenatedNot(WideVec.getOperand(0)) || isConcatenatedNot(WideVec.getOperand(1))) { // extract (and v4i64 X, (not (concat Y1, Y2))), n -> andnp v2i64 X(n), Y1 SDValue Concat = split256IntArith(WideVec, DAG); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, DAG.getBitcast(WideVecVT, Concat), N->getOperand(1)); } } if (DCI.isBeforeLegalizeOps()) return SDValue(); MVT OpVT = N->getSimpleValueType(0); SDValue InVec = N->getOperand(0); unsigned IdxVal = cast(N->getOperand(1))->getZExtValue(); if (ISD::isBuildVectorAllZeros(InVec.getNode())) return getZeroVector(OpVT, Subtarget, DAG, SDLoc(N)); if (ISD::isBuildVectorAllOnes(InVec.getNode())) { if (OpVT.getScalarType() == MVT::i1) return DAG.getConstant(1, SDLoc(N), OpVT); return getOnesVector(OpVT, DAG, SDLoc(N)); } if (InVec.getOpcode() == ISD::BUILD_VECTOR) return DAG.getBuildVector( OpVT, SDLoc(N), InVec.getNode()->ops().slice(IdxVal, OpVT.getVectorNumElements())); // If we're extracting the lowest subvector and we're the only user, // we may be able to perform this with a smaller vector width. if (IdxVal == 0 && InVec.hasOneUse()) { unsigned InOpcode = InVec.getOpcode(); if (OpVT == MVT::v2f64 && InVec.getValueType() == MVT::v4f64) { // v2f64 CVTDQ2PD(v4i32). if (InOpcode == ISD::SINT_TO_FP && InVec.getOperand(0).getValueType() == MVT::v4i32) { return DAG.getNode(X86ISD::CVTSI2P, SDLoc(N), OpVT, InVec.getOperand(0)); } // v2f64 CVTPS2PD(v4f32). if (InOpcode == ISD::FP_EXTEND && InVec.getOperand(0).getValueType() == MVT::v4f32) { return DAG.getNode(X86ISD::VFPEXT, SDLoc(N), OpVT, InVec.getOperand(0)); } } if ((InOpcode == ISD::ZERO_EXTEND || InOpcode == ISD::SIGN_EXTEND) && OpVT.is128BitVector() && InVec.getOperand(0).getSimpleValueType().is128BitVector()) { unsigned ExtOp = InOpcode == ISD::ZERO_EXTEND ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::SIGN_EXTEND_VECTOR_INREG; return DAG.getNode(ExtOp, SDLoc(N), OpVT, InVec.getOperand(0)); } if ((InOpcode == ISD::ZERO_EXTEND_VECTOR_INREG || InOpcode == ISD::SIGN_EXTEND_VECTOR_INREG) && OpVT.is128BitVector() && InVec.getOperand(0).getSimpleValueType().is128BitVector()) { return DAG.getNode(InOpcode, SDLoc(N), OpVT, InVec.getOperand(0)); } if (InOpcode == ISD::BITCAST) { // TODO - do this for target shuffles in general. SDValue InVecBC = peekThroughOneUseBitcasts(InVec); if (InVecBC.getOpcode() == X86ISD::PSHUFB && OpVT.is128BitVector()) { SDLoc DL(N); SDValue SubPSHUFB = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, extract128BitVector(InVecBC.getOperand(0), 0, DAG, DL), extract128BitVector(InVecBC.getOperand(1), 0, DAG, DL)); return DAG.getBitcast(OpVT, SubPSHUFB); } } } return SDValue(); } static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); SDValue Src = N->getOperand(0); // If this is a scalar to vector to v1i1 from an AND with 1, bypass the and. // This occurs frequently in our masked scalar intrinsic code and our // floating point select lowering with AVX512. // TODO: SimplifyDemandedBits instead? if (VT == MVT::v1i1 && Src.getOpcode() == ISD::AND && Src.hasOneUse()) if (auto *C = dyn_cast(Src.getOperand(1))) if (C->getAPIntValue().isOneValue()) return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), MVT::v1i1, Src.getOperand(0)); // Combine scalar_to_vector of an extract_vector_elt into an extract_subvec. if (VT == MVT::v1i1 && Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT && Src.hasOneUse() && Src.getOperand(0).getValueType().isVector() && Src.getOperand(0).getValueType().getVectorElementType() == MVT::i1) if (auto *C = dyn_cast(Src.getOperand(1))) if (C->isNullValue()) return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, Src.getOperand(0), Src.getOperand(1)); return SDValue(); } // Simplify PMULDQ and PMULUDQ operations. static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); // Canonicalize constant to RHS. if (DAG.isConstantIntBuildVectorOrConstantInt(LHS) && !DAG.isConstantIntBuildVectorOrConstantInt(RHS)) return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), RHS, LHS); // Multiply by zero. if (ISD::isBuildVectorAllZeros(RHS.getNode())) return RHS; // Aggressively peek through ops to get at the demanded low bits. APInt DemandedMask = APInt::getLowBitsSet(64, 32); SDValue DemandedLHS = DAG.GetDemandedBits(LHS, DemandedMask); SDValue DemandedRHS = DAG.GetDemandedBits(RHS, DemandedMask); if (DemandedLHS || DemandedRHS) return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), DemandedLHS ? DemandedLHS : LHS, DemandedRHS ? DemandedRHS : RHS); // PMULDQ/PMULUDQ only uses lower 32 bits from each vector element. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnesValue(64), DCI)) return SDValue(N, 0); return SDValue(); } SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; switch (N->getOpcode()) { default: break; case ISD::SCALAR_TO_VECTOR: return combineScalarToVector(N, DAG); case ISD::EXTRACT_VECTOR_ELT: case X86ISD::PEXTRW: case X86ISD::PEXTRB: return combineExtractVectorElt(N, DAG, DCI, Subtarget); case ISD::INSERT_SUBVECTOR: return combineInsertSubvector(N, DAG, DCI, Subtarget); case ISD::EXTRACT_SUBVECTOR: return combineExtractSubvector(N, DAG, DCI, Subtarget); case ISD::VSELECT: case ISD::SELECT: case X86ISD::BLENDV: return combineSelect(N, DAG, DCI, Subtarget); case ISD::BITCAST: return combineBitcast(N, DAG, DCI, Subtarget); case X86ISD::CMOV: return combineCMov(N, DAG, DCI, Subtarget); case X86ISD::CMP: return combineCMP(N, DAG); case ISD::ADD: return combineAdd(N, DAG, Subtarget); case ISD::SUB: return combineSub(N, DAG, Subtarget); case X86ISD::SBB: return combineSBB(N, DAG); case X86ISD::ADC: return combineADC(N, DAG, DCI); case ISD::MUL: return combineMul(N, DAG, DCI, Subtarget); case ISD::SHL: case ISD::SRA: case ISD::SRL: return combineShift(N, DAG, DCI, Subtarget); case ISD::AND: return combineAnd(N, DAG, DCI, Subtarget); case ISD::OR: return combineOr(N, DAG, DCI, Subtarget); case ISD::XOR: return combineXor(N, DAG, DCI, Subtarget); case X86ISD::BEXTR: return combineBEXTR(N, DAG, DCI, Subtarget); case ISD::LOAD: return combineLoad(N, DAG, DCI, Subtarget); case ISD::MLOAD: return combineMaskedLoad(N, DAG, DCI, Subtarget); case ISD::STORE: return combineStore(N, DAG, Subtarget); case ISD::MSTORE: return combineMaskedStore(N, DAG, DCI, Subtarget); case ISD::SINT_TO_FP: return combineSIntToFP(N, DAG, Subtarget); case ISD::UINT_TO_FP: return combineUIntToFP(N, DAG, Subtarget); case ISD::FADD: case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget); case ISD::FNEG: return combineFneg(N, DAG, Subtarget); case ISD::TRUNCATE: return combineTruncate(N, DAG, Subtarget); case X86ISD::ANDNP: return combineAndnp(N, DAG, DCI, Subtarget); case X86ISD::FAND: return combineFAnd(N, DAG, Subtarget); case X86ISD::FANDN: return combineFAndn(N, DAG, Subtarget); case X86ISD::FXOR: case X86ISD::FOR: return combineFOr(N, DAG, Subtarget); case X86ISD::FMIN: case X86ISD::FMAX: return combineFMinFMax(N, DAG); case ISD::FMINNUM: case ISD::FMAXNUM: return combineFMinNumFMaxNum(N, DAG, Subtarget); case X86ISD::CVTSI2P: case X86ISD::CVTUI2P: return combineX86INT_TO_FP(N, DAG, DCI); case X86ISD::BT: return combineBT(N, DAG, DCI); case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: return combineZext(N, DAG, DCI, Subtarget); case ISD::SIGN_EXTEND: return combineSext(N, DAG, DCI, Subtarget); case ISD::SIGN_EXTEND_INREG: return combineSignExtendInReg(N, DAG, Subtarget); case ISD::SETCC: return combineSetCC(N, DAG, Subtarget); case X86ISD::SETCC: return combineX86SetCC(N, DAG, Subtarget); case X86ISD::BRCOND: return combineBrCond(N, DAG, Subtarget); case X86ISD::PACKSS: case X86ISD::PACKUS: return combineVectorPack(N, DAG, DCI, Subtarget); case X86ISD::VSHL: case X86ISD::VSRA: case X86ISD::VSRL: return combineVectorShiftVar(N, DAG, DCI, Subtarget); case X86ISD::VSHLI: case X86ISD::VSRAI: case X86ISD::VSRLI: return combineVectorShiftImm(N, DAG, DCI, Subtarget); case X86ISD::PINSRB: case X86ISD::PINSRW: return combineVectorInsert(N, DAG, DCI, Subtarget); case X86ISD::SHUFP: // Handle all target specific shuffles case X86ISD::INSERTPS: case X86ISD::EXTRQI: case X86ISD::INSERTQI: case X86ISD::PALIGNR: case X86ISD::VSHLDQ: case X86ISD::VSRLDQ: case X86ISD::BLENDI: case X86ISD::UNPCKH: case X86ISD::UNPCKL: case X86ISD::MOVHLPS: case X86ISD::MOVLHPS: case X86ISD::PSHUFB: case X86ISD::PSHUFD: case X86ISD::PSHUFHW: case X86ISD::PSHUFLW: case X86ISD::MOVSHDUP: case X86ISD::MOVSLDUP: case X86ISD::MOVDDUP: case X86ISD::MOVSS: case X86ISD::MOVSD: case X86ISD::VBROADCAST: case X86ISD::VPPERM: case X86ISD::VPERMI: case X86ISD::VPERMV: case X86ISD::VPERMV3: case X86ISD::VPERMIL2: case X86ISD::VPERMILPI: case X86ISD::VPERMILPV: case X86ISD::VPERM2X128: case X86ISD::SHUF128: case X86ISD::VZEXT_MOVL: case ISD::VECTOR_SHUFFLE: return combineShuffle(N, DAG, DCI,Subtarget); case X86ISD::FMADD_RND: case X86ISD::FMSUB: case X86ISD::FMSUB_RND: case X86ISD::FNMADD: case X86ISD::FNMADD_RND: case X86ISD::FNMSUB: case X86ISD::FNMSUB_RND: case ISD::FMA: return combineFMA(N, DAG, Subtarget); case X86ISD::FMADDSUB_RND: case X86ISD::FMSUBADD_RND: case X86ISD::FMADDSUB: case X86ISD::FMSUBADD: return combineFMADDSUB(N, DAG, Subtarget); case X86ISD::MOVMSK: return combineMOVMSK(N, DAG, DCI); case X86ISD::MGATHER: case X86ISD::MSCATTER: case ISD::MGATHER: case ISD::MSCATTER: return combineGatherScatter(N, DAG, DCI, Subtarget); case X86ISD::PCMPEQ: case X86ISD::PCMPGT: return combineVectorCompare(N, DAG, Subtarget); case X86ISD::PMULDQ: case X86ISD::PMULUDQ: return combinePMULDQ(N, DAG, DCI); } return SDValue(); } bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const { if (!isTypeLegal(VT)) return false; // There are no vXi8 shifts. if (Opc == ISD::SHL && VT.isVector() && VT.getVectorElementType() == MVT::i8) return false; // 8-bit multiply is probably not much cheaper than 32-bit multiply, and // we have specializations to turn 32-bit multiply into LEA or other ops. // Also, see the comment in "IsDesirableToPromoteOp" - where we additionally // check for a constant operand to the multiply. if (Opc == ISD::MUL && VT == MVT::i8) return false; // i16 instruction encodings are longer and some i16 instructions are slow, // so those are not desirable. if (VT == MVT::i16) { switch (Opc) { default: break; case ISD::LOAD: case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: case ISD::SHL: case ISD::SRL: case ISD::SUB: case ISD::ADD: case ISD::MUL: case ISD::AND: case ISD::OR: case ISD::XOR: return false; } } // Any legal type not explicitly accounted for above here is desirable. return true; } SDValue X86TargetLowering::expandIndirectJTBranch(const SDLoc& dl, SDValue Value, SDValue Addr, SelectionDAG &DAG) const { const Module *M = DAG.getMachineFunction().getMMI().getModule(); Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch"); if (IsCFProtectionSupported) { // In case control-flow branch protection is enabled, we need to add // notrack prefix to the indirect branch. // In order to do that we create NT_BRIND SDNode. // Upon ISEL, the pattern will convert it to jmp with NoTrack prefix. return DAG.getNode(X86ISD::NT_BRIND, dl, MVT::Other, Value, Addr); } return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, DAG); } bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const { EVT VT = Op.getValueType(); bool Is8BitMulByConstant = VT == MVT::i8 && Op.getOpcode() == ISD::MUL && isa(Op.getOperand(1)); // i16 is legal, but undesirable since i16 instruction encodings are longer // and some i16 instructions are slow. // 8-bit multiply-by-constant can usually be expanded to something cheaper // using LEA and/or other ALU ops. if (VT != MVT::i16 && !Is8BitMulByConstant) return false; auto IsFoldableRMW = [](SDValue Load, SDValue Op) { if (!Op.hasOneUse()) return false; SDNode *User = *Op->use_begin(); if (!ISD::isNormalStore(User)) return false; auto *Ld = cast(Load); auto *St = cast(User); return Ld->getBasePtr() == St->getBasePtr(); }; auto IsFoldableAtomicRMW = [](SDValue Load, SDValue Op) { if (!Load.hasOneUse() || Load.getOpcode() != ISD::ATOMIC_LOAD) return false; if (!Op.hasOneUse()) return false; SDNode *User = *Op->use_begin(); if (User->getOpcode() != ISD::ATOMIC_STORE) return false; auto *Ld = cast(Load); auto *St = cast(User); return Ld->getBasePtr() == St->getBasePtr(); }; bool Commute = false; switch (Op.getOpcode()) { default: return false; case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: break; case ISD::SHL: case ISD::SRL: { SDValue N0 = Op.getOperand(0); // Look out for (store (shl (load), x)). if (MayFoldLoad(N0) && IsFoldableRMW(N0, Op)) return false; break; } case ISD::ADD: case ISD::MUL: case ISD::AND: case ISD::OR: case ISD::XOR: Commute = true; LLVM_FALLTHROUGH; case ISD::SUB: { SDValue N0 = Op.getOperand(0); SDValue N1 = Op.getOperand(1); // Avoid disabling potential load folding opportunities. if (MayFoldLoad(N1) && (!Commute || !isa(N0) || (Op.getOpcode() != ISD::MUL && IsFoldableRMW(N1, Op)))) return false; if (MayFoldLoad(N0) && ((Commute && !isa(N1)) || (Op.getOpcode() != ISD::MUL && IsFoldableRMW(N0, Op)))) return false; if (IsFoldableAtomicRMW(N0, Op) || (Commute && IsFoldableAtomicRMW(N1, Op))) return false; } } PVT = MVT::i32; return true; } bool X86TargetLowering:: isDesirableToCombineBuildVectorToShuffleTruncate( ArrayRef ShuffleMask, EVT SrcVT, EVT TruncVT) const { assert(SrcVT.getVectorNumElements() == ShuffleMask.size() && "Element count mismatch"); assert( Subtarget.getTargetLowering()->isShuffleMaskLegal(ShuffleMask, SrcVT) && "Shuffle Mask expected to be legal"); // For 32-bit elements VPERMD is better than shuffle+truncate. // TODO: After we improve lowerBuildVector, add execption for VPERMW. if (SrcVT.getScalarSizeInBits() == 32 || !Subtarget.hasAVX2()) return false; if (is128BitLaneCrossingShuffleMask(SrcVT.getSimpleVT(), ShuffleMask)) return false; return true; } //===----------------------------------------------------------------------===// // X86 Inline Assembly Support //===----------------------------------------------------------------------===// // Helper to match a string separated by whitespace. static bool matchAsm(StringRef S, ArrayRef Pieces) { S = S.substr(S.find_first_not_of(" \t")); // Skip leading whitespace. for (StringRef Piece : Pieces) { if (!S.startswith(Piece)) // Check if the piece matches. return false; S = S.substr(Piece.size()); StringRef::size_type Pos = S.find_first_not_of(" \t"); if (Pos == 0) // We matched a prefix. return false; S = S.substr(Pos); } return S.empty(); } static bool clobbersFlagRegisters(const SmallVector &AsmPieces) { if (AsmPieces.size() == 3 || AsmPieces.size() == 4) { if (std::count(AsmPieces.begin(), AsmPieces.end(), "~{cc}") && std::count(AsmPieces.begin(), AsmPieces.end(), "~{flags}") && std::count(AsmPieces.begin(), AsmPieces.end(), "~{fpsr}")) { if (AsmPieces.size() == 3) return true; else if (std::count(AsmPieces.begin(), AsmPieces.end(), "~{dirflag}")) return true; } } return false; } bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { InlineAsm *IA = cast(CI->getCalledValue()); const std::string &AsmStr = IA->getAsmString(); IntegerType *Ty = dyn_cast(CI->getType()); if (!Ty || Ty->getBitWidth() % 16 != 0) return false; // TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a" SmallVector AsmPieces; SplitString(AsmStr, AsmPieces, ";\n"); switch (AsmPieces.size()) { default: return false; case 1: // FIXME: this should verify that we are targeting a 486 or better. If not, // we will turn this bswap into something that will be lowered to logical // ops instead of emitting the bswap asm. For now, we don't support 486 or // lower so don't worry about this. // bswap $0 if (matchAsm(AsmPieces[0], {"bswap", "$0"}) || matchAsm(AsmPieces[0], {"bswapl", "$0"}) || matchAsm(AsmPieces[0], {"bswapq", "$0"}) || matchAsm(AsmPieces[0], {"bswap", "${0:q}"}) || matchAsm(AsmPieces[0], {"bswapl", "${0:q}"}) || matchAsm(AsmPieces[0], {"bswapq", "${0:q}"})) { // No need to check constraints, nothing other than the equivalent of // "=r,0" would be valid here. return IntrinsicLowering::LowerToByteSwap(CI); } // rorw $$8, ${0:w} --> llvm.bswap.i16 if (CI->getType()->isIntegerTy(16) && IA->getConstraintString().compare(0, 5, "=r,0,") == 0 && (matchAsm(AsmPieces[0], {"rorw", "$$8,", "${0:w}"}) || matchAsm(AsmPieces[0], {"rolw", "$$8,", "${0:w}"}))) { AsmPieces.clear(); StringRef ConstraintsStr = IA->getConstraintString(); SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ","); array_pod_sort(AsmPieces.begin(), AsmPieces.end()); if (clobbersFlagRegisters(AsmPieces)) return IntrinsicLowering::LowerToByteSwap(CI); } break; case 3: if (CI->getType()->isIntegerTy(32) && IA->getConstraintString().compare(0, 5, "=r,0,") == 0 && matchAsm(AsmPieces[0], {"rorw", "$$8,", "${0:w}"}) && matchAsm(AsmPieces[1], {"rorl", "$$16,", "$0"}) && matchAsm(AsmPieces[2], {"rorw", "$$8,", "${0:w}"})) { AsmPieces.clear(); StringRef ConstraintsStr = IA->getConstraintString(); SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ","); array_pod_sort(AsmPieces.begin(), AsmPieces.end()); if (clobbersFlagRegisters(AsmPieces)) return IntrinsicLowering::LowerToByteSwap(CI); } if (CI->getType()->isIntegerTy(64)) { InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints(); if (Constraints.size() >= 2 && Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" && Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") { // bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64 if (matchAsm(AsmPieces[0], {"bswap", "%eax"}) && matchAsm(AsmPieces[1], {"bswap", "%edx"}) && matchAsm(AsmPieces[2], {"xchgl", "%eax,", "%edx"})) return IntrinsicLowering::LowerToByteSwap(CI); } } break; } return false; } /// Given a constraint letter, return the type of constraint for this target. X86TargetLowering::ConstraintType X86TargetLowering::getConstraintType(StringRef Constraint) const { if (Constraint.size() == 1) { switch (Constraint[0]) { case 'R': case 'q': case 'Q': case 'f': case 't': case 'u': case 'y': case 'x': case 'v': case 'Y': case 'l': case 'k': // AVX512 masking registers. return C_RegisterClass; case 'a': case 'b': case 'c': case 'd': case 'S': case 'D': case 'A': return C_Register; case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'G': case 'C': case 'e': case 'Z': return C_Other; default: break; } } else if (Constraint.size() == 2) { switch (Constraint[0]) { default: break; case 'Y': switch (Constraint[1]) { default: break; case 'z': case '0': return C_Register; case 'i': case 'm': case 'k': case 't': case '2': return C_RegisterClass; } } } return TargetLowering::getConstraintType(Constraint); } /// Examine constraint type and operand type and determine a weight value. /// This object must already have been set up with the operand type /// and the current alternative constraint selected. TargetLowering::ConstraintWeight X86TargetLowering::getSingleConstraintMatchWeight( AsmOperandInfo &info, const char *constraint) const { ConstraintWeight weight = CW_Invalid; Value *CallOperandVal = info.CallOperandVal; // If we don't have a value, we can't do a match, // but allow it at the lowest weight. if (!CallOperandVal) return CW_Default; Type *type = CallOperandVal->getType(); // Look at the constraint type. switch (*constraint) { default: weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); LLVM_FALLTHROUGH; case 'R': case 'q': case 'Q': case 'a': case 'b': case 'c': case 'd': case 'S': case 'D': case 'A': if (CallOperandVal->getType()->isIntegerTy()) weight = CW_SpecificReg; break; case 'f': case 't': case 'u': if (type->isFloatingPointTy()) weight = CW_SpecificReg; break; case 'y': if (type->isX86_MMXTy() && Subtarget.hasMMX()) weight = CW_SpecificReg; break; case 'Y': { unsigned Size = StringRef(constraint).size(); // Pick 'i' as the next char as 'Yi' and 'Y' are synonymous, when matching 'Y' char NextChar = Size == 2 ? constraint[1] : 'i'; if (Size > 2) break; switch (NextChar) { default: return CW_Invalid; // XMM0 case 'z': case '0': if ((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) return CW_SpecificReg; return CW_Invalid; // Conditional OpMask regs (AVX512) case 'k': if ((type->getPrimitiveSizeInBits() == 64) && Subtarget.hasAVX512()) return CW_Register; return CW_Invalid; // Any MMX reg case 'm': if (type->isX86_MMXTy() && Subtarget.hasMMX()) return weight; return CW_Invalid; // Any SSE reg when ISA >= SSE2, same as 'Y' case 'i': case 't': case '2': if (!Subtarget.hasSSE2()) return CW_Invalid; break; } // Fall through (handle "Y" constraint). LLVM_FALLTHROUGH; } case 'v': if ((type->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512()) weight = CW_Register; LLVM_FALLTHROUGH; case 'x': if (((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) || ((type->getPrimitiveSizeInBits() == 256) && Subtarget.hasAVX())) weight = CW_Register; break; case 'k': // Enable conditional vector operations using %k<#> registers. if ((type->getPrimitiveSizeInBits() == 64) && Subtarget.hasAVX512()) weight = CW_Register; break; case 'I': if (ConstantInt *C = dyn_cast(info.CallOperandVal)) { if (C->getZExtValue() <= 31) weight = CW_Constant; } break; case 'J': if (ConstantInt *C = dyn_cast(CallOperandVal)) { if (C->getZExtValue() <= 63) weight = CW_Constant; } break; case 'K': if (ConstantInt *C = dyn_cast(CallOperandVal)) { if ((C->getSExtValue() >= -0x80) && (C->getSExtValue() <= 0x7f)) weight = CW_Constant; } break; case 'L': if (ConstantInt *C = dyn_cast(CallOperandVal)) { if ((C->getZExtValue() == 0xff) || (C->getZExtValue() == 0xffff)) weight = CW_Constant; } break; case 'M': if (ConstantInt *C = dyn_cast(CallOperandVal)) { if (C->getZExtValue() <= 3) weight = CW_Constant; } break; case 'N': if (ConstantInt *C = dyn_cast(CallOperandVal)) { if (C->getZExtValue() <= 0xff) weight = CW_Constant; } break; case 'G': case 'C': if (isa(CallOperandVal)) { weight = CW_Constant; } break; case 'e': if (ConstantInt *C = dyn_cast(CallOperandVal)) { if ((C->getSExtValue() >= -0x80000000LL) && (C->getSExtValue() <= 0x7fffffffLL)) weight = CW_Constant; } break; case 'Z': if (ConstantInt *C = dyn_cast(CallOperandVal)) { if (C->getZExtValue() <= 0xffffffff) weight = CW_Constant; } break; } return weight; } /// Try to replace an X constraint, which matches anything, with another that /// has more specific requirements based on the type of the corresponding /// operand. const char *X86TargetLowering:: LowerXConstraint(EVT ConstraintVT) const { // FP X constraints get lowered to SSE1/2 registers if available, otherwise // 'f' like normal targets. if (ConstraintVT.isFloatingPoint()) { if (Subtarget.hasSSE2()) return "Y"; if (Subtarget.hasSSE1()) return "x"; } return TargetLowering::LowerXConstraint(ConstraintVT); } /// Lower the specified operand into the Ops vector. /// If it is invalid, don't add anything to Ops. void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector&Ops, SelectionDAG &DAG) const { SDValue Result; // Only support length 1 constraints for now. if (Constraint.length() > 1) return; char ConstraintLetter = Constraint[0]; switch (ConstraintLetter) { default: break; case 'I': if (ConstantSDNode *C = dyn_cast(Op)) { if (C->getZExtValue() <= 31) { Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), Op.getValueType()); break; } } return; case 'J': if (ConstantSDNode *C = dyn_cast(Op)) { if (C->getZExtValue() <= 63) { Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), Op.getValueType()); break; } } return; case 'K': if (ConstantSDNode *C = dyn_cast(Op)) { if (isInt<8>(C->getSExtValue())) { Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), Op.getValueType()); break; } } return; case 'L': if (ConstantSDNode *C = dyn_cast(Op)) { if (C->getZExtValue() == 0xff || C->getZExtValue() == 0xffff || (Subtarget.is64Bit() && C->getZExtValue() == 0xffffffff)) { Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), Op.getValueType()); break; } } return; case 'M': if (ConstantSDNode *C = dyn_cast(Op)) { if (C->getZExtValue() <= 3) { Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), Op.getValueType()); break; } } return; case 'N': if (ConstantSDNode *C = dyn_cast(Op)) { if (C->getZExtValue() <= 255) { Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), Op.getValueType()); break; } } return; case 'O': if (ConstantSDNode *C = dyn_cast(Op)) { if (C->getZExtValue() <= 127) { Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), Op.getValueType()); break; } } return; case 'e': { // 32-bit signed value if (ConstantSDNode *C = dyn_cast(Op)) { if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()), C->getSExtValue())) { // Widen to 64 bits here to get it sign extended. Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), MVT::i64); break; } // FIXME gcc accepts some relocatable values here too, but only in certain // memory models; it's complicated. } return; } case 'Z': { // 32-bit unsigned value if (ConstantSDNode *C = dyn_cast(Op)) { if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()), C->getZExtValue())) { Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), Op.getValueType()); break; } } // FIXME gcc accepts some relocatable values here too, but only in certain // memory models; it's complicated. return; } case 'i': { // Literal immediates are always ok. if (ConstantSDNode *CST = dyn_cast(Op)) { // Widen to 64 bits here to get it sign extended. Result = DAG.getTargetConstant(CST->getSExtValue(), SDLoc(Op), MVT::i64); break; } // In any sort of PIC mode addresses need to be computed at runtime by // adding in a register or some sort of table lookup. These can't // be used as immediates. if (Subtarget.isPICStyleGOT() || Subtarget.isPICStyleStubPIC()) return; // If we are in non-pic codegen mode, we allow the address of a global (with // an optional displacement) to be used with 'i'. GlobalAddressSDNode *GA = nullptr; int64_t Offset = 0; // Match either (GA), (GA+C), (GA+C1+C2), etc. while (1) { if ((GA = dyn_cast(Op))) { Offset += GA->getOffset(); break; } else if (Op.getOpcode() == ISD::ADD) { if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) { Offset += C->getZExtValue(); Op = Op.getOperand(0); continue; } } else if (Op.getOpcode() == ISD::SUB) { if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) { Offset += -C->getZExtValue(); Op = Op.getOperand(0); continue; } } // Otherwise, this isn't something we can handle, reject it. return; } const GlobalValue *GV = GA->getGlobal(); // If we require an extra load to get this address, as in PIC mode, we // can't accept it. if (isGlobalStubReference(Subtarget.classifyGlobalReference(GV))) return; Result = DAG.getTargetGlobalAddress(GV, SDLoc(Op), GA->getValueType(0), Offset); break; } } if (Result.getNode()) { Ops.push_back(Result); return; } return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } /// Check if \p RC is a general purpose register class. /// I.e., GR* or one of their variant. static bool isGRClass(const TargetRegisterClass &RC) { return RC.hasSuperClassEq(&X86::GR8RegClass) || RC.hasSuperClassEq(&X86::GR16RegClass) || RC.hasSuperClassEq(&X86::GR32RegClass) || RC.hasSuperClassEq(&X86::GR64RegClass) || RC.hasSuperClassEq(&X86::LOW32_ADDR_ACCESS_RBPRegClass); } /// Check if \p RC is a vector register class. /// I.e., FR* / VR* or one of their variant. static bool isFRClass(const TargetRegisterClass &RC) { return RC.hasSuperClassEq(&X86::FR32XRegClass) || RC.hasSuperClassEq(&X86::FR64XRegClass) || RC.hasSuperClassEq(&X86::VR128XRegClass) || RC.hasSuperClassEq(&X86::VR256XRegClass) || RC.hasSuperClassEq(&X86::VR512RegClass); } std::pair X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { // First, see if this is a constraint that directly corresponds to an LLVM // register class. if (Constraint.size() == 1) { // GCC Constraint Letters switch (Constraint[0]) { default: break; // TODO: Slight differences here in allocation order and leaving // RIP in the class. Do they matter any more here than they do // in the normal allocation? case 'k': if (Subtarget.hasAVX512()) { // Only supported in AVX512 or later. switch (VT.SimpleTy) { default: break; case MVT::i32: return std::make_pair(0U, &X86::VK32RegClass); case MVT::i16: return std::make_pair(0U, &X86::VK16RegClass); case MVT::i8: return std::make_pair(0U, &X86::VK8RegClass); case MVT::i1: return std::make_pair(0U, &X86::VK1RegClass); case MVT::i64: return std::make_pair(0U, &X86::VK64RegClass); } } break; case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode. if (Subtarget.is64Bit()) { if (VT == MVT::i32 || VT == MVT::f32) return std::make_pair(0U, &X86::GR32RegClass); if (VT == MVT::i16) return std::make_pair(0U, &X86::GR16RegClass); if (VT == MVT::i8 || VT == MVT::i1) return std::make_pair(0U, &X86::GR8RegClass); if (VT == MVT::i64 || VT == MVT::f64) return std::make_pair(0U, &X86::GR64RegClass); break; } LLVM_FALLTHROUGH; // 32-bit fallthrough case 'Q': // Q_REGS if (VT == MVT::i32 || VT == MVT::f32) return std::make_pair(0U, &X86::GR32_ABCDRegClass); if (VT == MVT::i16) return std::make_pair(0U, &X86::GR16_ABCDRegClass); if (VT == MVT::i8 || VT == MVT::i1) return std::make_pair(0U, &X86::GR8_ABCD_LRegClass); if (VT == MVT::i64) return std::make_pair(0U, &X86::GR64_ABCDRegClass); break; case 'r': // GENERAL_REGS case 'l': // INDEX_REGS if (VT == MVT::i8 || VT == MVT::i1) return std::make_pair(0U, &X86::GR8RegClass); if (VT == MVT::i16) return std::make_pair(0U, &X86::GR16RegClass); if (VT == MVT::i32 || VT == MVT::f32 || !Subtarget.is64Bit()) return std::make_pair(0U, &X86::GR32RegClass); return std::make_pair(0U, &X86::GR64RegClass); case 'R': // LEGACY_REGS if (VT == MVT::i8 || VT == MVT::i1) return std::make_pair(0U, &X86::GR8_NOREXRegClass); if (VT == MVT::i16) return std::make_pair(0U, &X86::GR16_NOREXRegClass); if (VT == MVT::i32 || !Subtarget.is64Bit()) return std::make_pair(0U, &X86::GR32_NOREXRegClass); return std::make_pair(0U, &X86::GR64_NOREXRegClass); case 'f': // FP Stack registers. // If SSE is enabled for this VT, use f80 to ensure the isel moves the // value to the correct fpstack register class. if (VT == MVT::f32 && !isScalarFPTypeInSSEReg(VT)) return std::make_pair(0U, &X86::RFP32RegClass); if (VT == MVT::f64 && !isScalarFPTypeInSSEReg(VT)) return std::make_pair(0U, &X86::RFP64RegClass); return std::make_pair(0U, &X86::RFP80RegClass); case 'y': // MMX_REGS if MMX allowed. if (!Subtarget.hasMMX()) break; return std::make_pair(0U, &X86::VR64RegClass); case 'Y': // SSE_REGS if SSE2 allowed if (!Subtarget.hasSSE2()) break; LLVM_FALLTHROUGH; case 'v': case 'x': // SSE_REGS if SSE1 allowed or AVX_REGS if AVX allowed if (!Subtarget.hasSSE1()) break; bool VConstraint = (Constraint[0] == 'v'); switch (VT.SimpleTy) { default: break; // Scalar SSE types. case MVT::f32: case MVT::i32: if (VConstraint && Subtarget.hasAVX512() && Subtarget.hasVLX()) return std::make_pair(0U, &X86::FR32XRegClass); return std::make_pair(0U, &X86::FR32RegClass); case MVT::f64: case MVT::i64: if (VConstraint && Subtarget.hasVLX()) return std::make_pair(0U, &X86::FR64XRegClass); return std::make_pair(0U, &X86::FR64RegClass); // TODO: Handle f128 and i128 in FR128RegClass after it is tested well. // Vector types. case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: case MVT::v4f32: case MVT::v2f64: if (VConstraint && Subtarget.hasVLX()) return std::make_pair(0U, &X86::VR128XRegClass); return std::make_pair(0U, &X86::VR128RegClass); // AVX types. case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32: case MVT::v4f64: if (VConstraint && Subtarget.hasVLX()) return std::make_pair(0U, &X86::VR256XRegClass); return std::make_pair(0U, &X86::VR256RegClass); case MVT::v8f64: case MVT::v16f32: case MVT::v16i32: case MVT::v8i64: return std::make_pair(0U, &X86::VR512RegClass); } break; } } else if (Constraint.size() == 2 && Constraint[0] == 'Y') { switch (Constraint[1]) { default: break; case 'i': case 't': case '2': return getRegForInlineAsmConstraint(TRI, "Y", VT); case 'm': if (!Subtarget.hasMMX()) break; return std::make_pair(0U, &X86::VR64RegClass); case 'z': case '0': if (!Subtarget.hasSSE1()) break; return std::make_pair(X86::XMM0, &X86::VR128RegClass); case 'k': // This register class doesn't allocate k0 for masked vector operation. if (Subtarget.hasAVX512()) { // Only supported in AVX512. switch (VT.SimpleTy) { default: break; case MVT::i32: return std::make_pair(0U, &X86::VK32WMRegClass); case MVT::i16: return std::make_pair(0U, &X86::VK16WMRegClass); case MVT::i8: return std::make_pair(0U, &X86::VK8WMRegClass); case MVT::i1: return std::make_pair(0U, &X86::VK1WMRegClass); case MVT::i64: return std::make_pair(0U, &X86::VK64WMRegClass); } } break; } } // Use the default implementation in TargetLowering to convert the register // constraint into a member of a register class. std::pair Res; Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); // Not found as a standard register? if (!Res.second) { // Map st(0) -> st(7) -> ST0 if (Constraint.size() == 7 && Constraint[0] == '{' && tolower(Constraint[1]) == 's' && tolower(Constraint[2]) == 't' && Constraint[3] == '(' && (Constraint[4] >= '0' && Constraint[4] <= '7') && Constraint[5] == ')' && Constraint[6] == '}') { // st(7) is not allocatable and thus not a member of RFP80. Return // singleton class in cases where we have a reference to it. if (Constraint[4] == '7') return std::make_pair(X86::FP7, &X86::RFP80_7RegClass); return std::make_pair(X86::FP0 + Constraint[4] - '0', &X86::RFP80RegClass); } // GCC allows "st(0)" to be called just plain "st". if (StringRef("{st}").equals_lower(Constraint)) return std::make_pair(X86::FP0, &X86::RFP80RegClass); // flags -> EFLAGS if (StringRef("{flags}").equals_lower(Constraint)) return std::make_pair(X86::EFLAGS, &X86::CCRRegClass); // dirflag -> DF if (StringRef("{dirflag}").equals_lower(Constraint)) return std::make_pair(X86::DF, &X86::DFCCRRegClass); // fpsr -> FPSW if (StringRef("{fpsr}").equals_lower(Constraint)) return std::make_pair(X86::FPSW, &X86::FPCCRRegClass); // 'A' means [ER]AX + [ER]DX. if (Constraint == "A") { if (Subtarget.is64Bit()) return std::make_pair(X86::RAX, &X86::GR64_ADRegClass); assert((Subtarget.is32Bit() || Subtarget.is16Bit()) && "Expecting 64, 32 or 16 bit subtarget"); return std::make_pair(X86::EAX, &X86::GR32_ADRegClass); } return Res; } // Make sure it isn't a register that requires 64-bit mode. if (!Subtarget.is64Bit() && (isFRClass(*Res.second) || isGRClass(*Res.second)) && TRI->getEncodingValue(Res.first) >= 8) { // Register requires REX prefix, but we're in 32-bit mode. return std::make_pair(0, nullptr); } // Make sure it isn't a register that requires AVX512. if (!Subtarget.hasAVX512() && isFRClass(*Res.second) && TRI->getEncodingValue(Res.first) & 0x10) { // Register requires EVEX prefix. return std::make_pair(0, nullptr); } // Otherwise, check to see if this is a register class of the wrong value // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to // turn into {ax},{dx}. // MVT::Other is used to specify clobber names. if (TRI->isTypeLegalForClass(*Res.second, VT) || VT == MVT::Other) return Res; // Correct type already, nothing to do. // Get a matching integer of the correct size. i.e. "ax" with MVT::32 should // return "eax". This should even work for things like getting 64bit integer // registers when given an f64 type. const TargetRegisterClass *Class = Res.second; // The generic code will match the first register class that contains the // given register. Thus, based on the ordering of the tablegened file, // the "plain" GR classes might not come first. // Therefore, use a helper method. if (isGRClass(*Class)) { unsigned Size = VT.getSizeInBits(); if (Size == 1) Size = 8; unsigned DestReg = getX86SubSuperRegisterOrZero(Res.first, Size); if (DestReg > 0) { bool is64Bit = Subtarget.is64Bit(); const TargetRegisterClass *RC = Size == 8 ? (is64Bit ? &X86::GR8RegClass : &X86::GR8_NOREXRegClass) : Size == 16 ? (is64Bit ? &X86::GR16RegClass : &X86::GR16_NOREXRegClass) : Size == 32 ? (is64Bit ? &X86::GR32RegClass : &X86::GR32_NOREXRegClass) : Size == 64 ? (is64Bit ? &X86::GR64RegClass : nullptr) : nullptr; if (Size == 64 && !is64Bit) { // Model GCC's behavior here and select a fixed pair of 32-bit // registers. switch (Res.first) { case X86::EAX: return std::make_pair(X86::EAX, &X86::GR32_ADRegClass); case X86::EDX: return std::make_pair(X86::EDX, &X86::GR32_DCRegClass); case X86::ECX: return std::make_pair(X86::ECX, &X86::GR32_CBRegClass); case X86::EBX: return std::make_pair(X86::EBX, &X86::GR32_BSIRegClass); case X86::ESI: return std::make_pair(X86::ESI, &X86::GR32_SIDIRegClass); case X86::EDI: return std::make_pair(X86::EDI, &X86::GR32_DIBPRegClass); case X86::EBP: return std::make_pair(X86::EBP, &X86::GR32_BPSPRegClass); default: return std::make_pair(0, nullptr); } } if (RC && RC->contains(DestReg)) return std::make_pair(DestReg, RC); return Res; } // No register found/type mismatch. return std::make_pair(0, nullptr); } else if (isFRClass(*Class)) { // Handle references to XMM physical registers that got mapped into the // wrong class. This can happen with constraints like {xmm0} where the // target independent register mapper will just pick the first match it can // find, ignoring the required type. // TODO: Handle f128 and i128 in FR128RegClass after it is tested well. if (VT == MVT::f32 || VT == MVT::i32) Res.second = &X86::FR32RegClass; else if (VT == MVT::f64 || VT == MVT::i64) Res.second = &X86::FR64RegClass; else if (TRI->isTypeLegalForClass(X86::VR128RegClass, VT)) Res.second = &X86::VR128RegClass; else if (TRI->isTypeLegalForClass(X86::VR256RegClass, VT)) Res.second = &X86::VR256RegClass; else if (TRI->isTypeLegalForClass(X86::VR512RegClass, VT)) Res.second = &X86::VR512RegClass; else { // Type mismatch and not a clobber: Return an error; Res.first = 0; Res.second = nullptr; } } return Res; } int X86TargetLowering::getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const { // Scaling factors are not free at all. // An indexed folded instruction, i.e., inst (reg1, reg2, scale), // will take 2 allocations in the out of order engine instead of 1 // for plain addressing mode, i.e. inst (reg1). // E.g., // vaddps (%rsi,%rdx), %ymm0, %ymm1 // Requires two allocations (one for the load, one for the computation) // whereas: // vaddps (%rsi), %ymm0, %ymm1 // Requires just 1 allocation, i.e., freeing allocations for other operations // and having less micro operations to execute. // // For some X86 architectures, this is even worse because for instance for // stores, the complex addressing mode forces the instruction to use the // "load" ports instead of the dedicated "store" port. // E.g., on Haswell: // vmovaps %ymm1, (%r8, %rdi) can use port 2 or 3. // vmovaps %ymm1, (%r8) can use port 2, 3, or 7. if (isLegalAddressingMode(DL, AM, Ty, AS)) // Scale represents reg2 * scale, thus account for 1 // as soon as we use a second register. return AM.Scale != 0; return -1; } bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const { // Integer division on x86 is expensive. However, when aggressively optimizing // for code size, we prefer to use a div instruction, as it is usually smaller // than the alternative sequence. // The exception to this is vector division. Since x86 doesn't have vector // integer division, leaving the division as-is is a loss even in terms of // size, because it will have to be scalarized, while the alternative code // sequence can be performed in vector form. bool OptSize = Attr.hasAttribute(AttributeList::FunctionIndex, Attribute::MinSize); return OptSize && !VT.isVector(); } void X86TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { if (!Subtarget.is64Bit()) return; // Update IsSplitCSR in X86MachineFunctionInfo. X86MachineFunctionInfo *AFI = Entry->getParent()->getInfo(); AFI->setIsSplitCSR(true); } void X86TargetLowering::insertCopiesSplitCSR( MachineBasicBlock *Entry, const SmallVectorImpl &Exits) const { const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent()); if (!IStart) return; const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); MachineBasicBlock::iterator MBBI = Entry->begin(); for (const MCPhysReg *I = IStart; *I; ++I) { const TargetRegisterClass *RC = nullptr; if (X86::GR64RegClass.contains(*I)) RC = &X86::GR64RegClass; else llvm_unreachable("Unexpected register class in CSRsViaCopy!"); unsigned NewVR = MRI->createVirtualRegister(RC); // Create copy from CSR to a virtual register. // FIXME: this currently does not emit CFI pseudo-instructions, it works // fine for CXX_FAST_TLS since the C++-style TLS access functions should be // nounwind. If we want to generalize this later, we may need to emit // CFI pseudo-instructions. assert(Entry->getParent()->getFunction().hasFnAttribute( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"); Entry->addLiveIn(*I); BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) .addReg(*I); // Insert the copy-back instructions right before the terminator. for (auto *Exit : Exits) BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), TII->get(TargetOpcode::COPY), *I) .addReg(NewVR); } } bool X86TargetLowering::supportSwiftError() const { return Subtarget.is64Bit(); } /// Returns the name of the symbol used to emit stack probes or the empty /// string if not applicable. StringRef X86TargetLowering::getStackProbeSymbolName(MachineFunction &MF) const { // If the function specifically requests stack probes, emit them. if (MF.getFunction().hasFnAttribute("probe-stack")) return MF.getFunction().getFnAttribute("probe-stack").getValueAsString(); // Generally, if we aren't on Windows, the platform ABI does not include // support for stack probes, so don't emit them. if (!Subtarget.isOSWindows() || Subtarget.isTargetMachO() || MF.getFunction().hasFnAttribute("no-stack-arg-probe")) return ""; // We need a stack probe to conform to the Windows ABI. Choose the right // symbol. if (Subtarget.is64Bit()) return Subtarget.isTargetCygMing() ? "___chkstk_ms" : "__chkstk"; return Subtarget.isTargetCygMing() ? "_alloca" : "_chkstk"; } Index: projects/clang800-import/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- projects/clang800-import/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp (revision 344547) +++ projects/clang800-import/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp (revision 344548) @@ -1,3537 +1,3538 @@ //===- InstructionCombining.cpp - Combine multiple instructions -----------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // InstructionCombining - Combine instructions to form fewer, simple // instructions. This pass does not modify the CFG. This pass is where // algebraic simplification happens. // // This pass combines things like: // %Y = add i32 %X, 1 // %Z = add i32 %Y, 1 // into: // %Z = add i32 %X, 2 // // This is a simple worklist driven algorithm. // // This pass guarantees that the following canonicalizations are performed on // the program: // 1. If a binary operator has a constant operand, it is moved to the RHS // 2. Bitwise operators with constant operands are always grouped so that // shifts are performed first, then or's, then and's, then xor's. // 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible // 4. All cmp instructions on boolean values are replaced with logical ops // 5. add X, X is represented as (X*2) => (X << 1) // 6. Multiplies with a power-of-two constant argument are transformed into // shifts. // ... etc. // //===----------------------------------------------------------------------===// #include "InstCombineInternal.h" #include "llvm-c/Initialization.h" #include "llvm-c/Transforms/InstCombine.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetFolder.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/DebugCounter.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/InstCombine/InstCombine.h" #include "llvm/Transforms/InstCombine/InstCombineWorklist.h" #include "llvm/Transforms/Utils/Local.h" #include #include #include #include #include #include using namespace llvm; using namespace llvm::PatternMatch; #define DEBUG_TYPE "instcombine" STATISTIC(NumCombined , "Number of insts combined"); STATISTIC(NumConstProp, "Number of constant folds"); STATISTIC(NumDeadInst , "Number of dead inst eliminated"); STATISTIC(NumSunkInst , "Number of instructions sunk"); STATISTIC(NumExpand, "Number of expansions"); STATISTIC(NumFactor , "Number of factorizations"); STATISTIC(NumReassoc , "Number of reassociations"); DEBUG_COUNTER(VisitCounter, "instcombine-visit", "Controls which instructions are visited"); static cl::opt EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"), cl::init(true)); static cl::opt EnableExpensiveCombines("expensive-combines", cl::desc("Enable expensive instruction combines")); static cl::opt MaxArraySize("instcombine-maxarray-size", cl::init(1024), cl::desc("Maximum array size considered when doing a combine")); // FIXME: Remove this flag when it is no longer necessary to convert // llvm.dbg.declare to avoid inaccurate debug info. Setting this to false // increases variable availability at the cost of accuracy. Variables that // cannot be promoted by mem2reg or SROA will be described as living in memory // for their entire lifetime. However, passes like DSE and instcombine can // delete stores to the alloca, leading to misleading and inaccurate debug // information. This flag can be removed when those passes are fixed. static cl::opt ShouldLowerDbgDeclare("instcombine-lower-dbg-declare", cl::Hidden, cl::init(true)); Value *InstCombiner::EmitGEPOffset(User *GEP) { return llvm::EmitGEPOffset(&Builder, DL, GEP); } /// Return true if it is desirable to convert an integer computation from a /// given bit width to a new bit width. /// We don't want to convert from a legal to an illegal type or from a smaller /// to a larger illegal type. A width of '1' is always treated as a legal type /// because i1 is a fundamental type in IR, and there are many specialized /// optimizations for i1 types. Widths of 8, 16 or 32 are equally treated as /// legal to convert to, in order to open up more combining opportunities. /// NOTE: this treats i8, i16 and i32 specially, due to them being so common /// from frontend languages. bool InstCombiner::shouldChangeType(unsigned FromWidth, unsigned ToWidth) const { bool FromLegal = FromWidth == 1 || DL.isLegalInteger(FromWidth); bool ToLegal = ToWidth == 1 || DL.isLegalInteger(ToWidth); // Convert to widths of 8, 16 or 32 even if they are not legal types. Only // shrink types, to prevent infinite loops. if (ToWidth < FromWidth && (ToWidth == 8 || ToWidth == 16 || ToWidth == 32)) return true; // If this is a legal integer from type, and the result would be an illegal // type, don't do the transformation. if (FromLegal && !ToLegal) return false; // Otherwise, if both are illegal, do not increase the size of the result. We // do allow things like i160 -> i64, but not i64 -> i160. if (!FromLegal && !ToLegal && ToWidth > FromWidth) return false; return true; } /// Return true if it is desirable to convert a computation from 'From' to 'To'. /// We don't want to convert from a legal to an illegal type or from a smaller /// to a larger illegal type. i1 is always treated as a legal type because it is /// a fundamental type in IR, and there are many specialized optimizations for /// i1 types. bool InstCombiner::shouldChangeType(Type *From, Type *To) const { // TODO: This could be extended to allow vectors. Datalayout changes might be // needed to properly support that. if (!From->isIntegerTy() || !To->isIntegerTy()) return false; unsigned FromWidth = From->getPrimitiveSizeInBits(); unsigned ToWidth = To->getPrimitiveSizeInBits(); return shouldChangeType(FromWidth, ToWidth); } // Return true, if No Signed Wrap should be maintained for I. // The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C", // where both B and C should be ConstantInts, results in a constant that does // not overflow. This function only handles the Add and Sub opcodes. For // all other opcodes, the function conservatively returns false. static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) { OverflowingBinaryOperator *OBO = dyn_cast(&I); if (!OBO || !OBO->hasNoSignedWrap()) return false; // We reason about Add and Sub Only. Instruction::BinaryOps Opcode = I.getOpcode(); if (Opcode != Instruction::Add && Opcode != Instruction::Sub) return false; const APInt *BVal, *CVal; if (!match(B, m_APInt(BVal)) || !match(C, m_APInt(CVal))) return false; bool Overflow = false; if (Opcode == Instruction::Add) (void)BVal->sadd_ov(*CVal, Overflow); else (void)BVal->ssub_ov(*CVal, Overflow); return !Overflow; } /// Conservatively clears subclassOptionalData after a reassociation or /// commutation. We preserve fast-math flags when applicable as they can be /// preserved. static void ClearSubclassDataAfterReassociation(BinaryOperator &I) { FPMathOperator *FPMO = dyn_cast(&I); if (!FPMO) { I.clearSubclassOptionalData(); return; } FastMathFlags FMF = I.getFastMathFlags(); I.clearSubclassOptionalData(); I.setFastMathFlags(FMF); } /// Combine constant operands of associative operations either before or after a /// cast to eliminate one of the associative operations: /// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2))) /// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2)) static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1) { auto *Cast = dyn_cast(BinOp1->getOperand(0)); if (!Cast || !Cast->hasOneUse()) return false; // TODO: Enhance logic for other casts and remove this check. auto CastOpcode = Cast->getOpcode(); if (CastOpcode != Instruction::ZExt) return false; // TODO: Enhance logic for other BinOps and remove this check. if (!BinOp1->isBitwiseLogicOp()) return false; auto AssocOpcode = BinOp1->getOpcode(); auto *BinOp2 = dyn_cast(Cast->getOperand(0)); if (!BinOp2 || !BinOp2->hasOneUse() || BinOp2->getOpcode() != AssocOpcode) return false; Constant *C1, *C2; if (!match(BinOp1->getOperand(1), m_Constant(C1)) || !match(BinOp2->getOperand(1), m_Constant(C2))) return false; // TODO: This assumes a zext cast. // Eg, if it was a trunc, we'd cast C1 to the source type because casting C2 // to the destination type might lose bits. // Fold the constants together in the destination type: // (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC) Type *DestTy = C1->getType(); Constant *CastC2 = ConstantExpr::getCast(CastOpcode, C2, DestTy); Constant *FoldedC = ConstantExpr::get(AssocOpcode, C1, CastC2); Cast->setOperand(0, BinOp2->getOperand(0)); BinOp1->setOperand(1, FoldedC); return true; } /// This performs a few simplifications for operators that are associative or /// commutative: /// /// Commutative operators: /// /// 1. Order operands such that they are listed from right (least complex) to /// left (most complex). This puts constants before unary operators before /// binary operators. /// /// Associative operators: /// /// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies. /// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies. /// /// Associative and commutative operators: /// /// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies. /// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies. /// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)" /// if C1 and C2 are constants. bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Instruction::BinaryOps Opcode = I.getOpcode(); bool Changed = false; do { // Order operands such that they are listed from right (least complex) to // left (most complex). This puts constants before unary operators before // binary operators. if (I.isCommutative() && getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) Changed = !I.swapOperands(); BinaryOperator *Op0 = dyn_cast(I.getOperand(0)); BinaryOperator *Op1 = dyn_cast(I.getOperand(1)); if (I.isAssociative()) { // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies. if (Op0 && Op0->getOpcode() == Opcode) { Value *A = Op0->getOperand(0); Value *B = Op0->getOperand(1); Value *C = I.getOperand(1); // Does "B op C" simplify? if (Value *V = SimplifyBinOp(Opcode, B, C, SQ.getWithInstruction(&I))) { // It simplifies to V. Form "A op V". I.setOperand(0, A); I.setOperand(1, V); // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. if (MaintainNoSignedWrap(I, B, C) && (!Op0 || (isa(Op0) && Op0->hasNoSignedWrap()))) { // Note: this is only valid because SimplifyBinOp doesn't look at // the operands to Op0. I.clearSubclassOptionalData(); I.setHasNoSignedWrap(true); } else { ClearSubclassDataAfterReassociation(I); } Changed = true; ++NumReassoc; continue; } } // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies. if (Op1 && Op1->getOpcode() == Opcode) { Value *A = I.getOperand(0); Value *B = Op1->getOperand(0); Value *C = Op1->getOperand(1); // Does "A op B" simplify? if (Value *V = SimplifyBinOp(Opcode, A, B, SQ.getWithInstruction(&I))) { // It simplifies to V. Form "V op C". I.setOperand(0, V); I.setOperand(1, C); // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. ClearSubclassDataAfterReassociation(I); Changed = true; ++NumReassoc; continue; } } } if (I.isAssociative() && I.isCommutative()) { if (simplifyAssocCastAssoc(&I)) { Changed = true; ++NumReassoc; continue; } // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies. if (Op0 && Op0->getOpcode() == Opcode) { Value *A = Op0->getOperand(0); Value *B = Op0->getOperand(1); Value *C = I.getOperand(1); // Does "C op A" simplify? if (Value *V = SimplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) { // It simplifies to V. Form "V op B". I.setOperand(0, V); I.setOperand(1, B); // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. ClearSubclassDataAfterReassociation(I); Changed = true; ++NumReassoc; continue; } } // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies. if (Op1 && Op1->getOpcode() == Opcode) { Value *A = I.getOperand(0); Value *B = Op1->getOperand(0); Value *C = Op1->getOperand(1); // Does "C op A" simplify? if (Value *V = SimplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) { // It simplifies to V. Form "B op V". I.setOperand(0, B); I.setOperand(1, V); // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. ClearSubclassDataAfterReassociation(I); Changed = true; ++NumReassoc; continue; } } // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)" // if C1 and C2 are constants. Value *A, *B; Constant *C1, *C2; if (Op0 && Op1 && Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode && match(Op0, m_OneUse(m_BinOp(m_Value(A), m_Constant(C1)))) && match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2))))) { BinaryOperator *NewBO = BinaryOperator::Create(Opcode, A, B); if (isa(NewBO)) { FastMathFlags Flags = I.getFastMathFlags(); Flags &= Op0->getFastMathFlags(); Flags &= Op1->getFastMathFlags(); NewBO->setFastMathFlags(Flags); } InsertNewInstWith(NewBO, I); NewBO->takeName(Op1); I.setOperand(0, NewBO); I.setOperand(1, ConstantExpr::get(Opcode, C1, C2)); // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. ClearSubclassDataAfterReassociation(I); Changed = true; continue; } } // No further simplifications. return Changed; } while (true); } /// Return whether "X LOp (Y ROp Z)" is always equal to /// "(X LOp Y) ROp (X LOp Z)". static bool leftDistributesOverRight(Instruction::BinaryOps LOp, Instruction::BinaryOps ROp) { // X & (Y | Z) <--> (X & Y) | (X & Z) // X & (Y ^ Z) <--> (X & Y) ^ (X & Z) if (LOp == Instruction::And) return ROp == Instruction::Or || ROp == Instruction::Xor; // X | (Y & Z) <--> (X | Y) & (X | Z) if (LOp == Instruction::Or) return ROp == Instruction::And; // X * (Y + Z) <--> (X * Y) + (X * Z) // X * (Y - Z) <--> (X * Y) - (X * Z) if (LOp == Instruction::Mul) return ROp == Instruction::Add || ROp == Instruction::Sub; return false; } /// Return whether "(X LOp Y) ROp Z" is always equal to /// "(X ROp Z) LOp (Y ROp Z)". static bool rightDistributesOverLeft(Instruction::BinaryOps LOp, Instruction::BinaryOps ROp) { if (Instruction::isCommutative(ROp)) return leftDistributesOverRight(ROp, LOp); // (X {&|^} Y) >> Z <--> (X >> Z) {&|^} (Y >> Z) for all shifts. return Instruction::isBitwiseLogicOp(LOp) && Instruction::isShift(ROp); // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z", // but this requires knowing that the addition does not overflow and other // such subtleties. } /// This function returns identity value for given opcode, which can be used to /// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1). static Value *getIdentityValue(Instruction::BinaryOps Opcode, Value *V) { if (isa(V)) return nullptr; return ConstantExpr::getBinOpIdentity(Opcode, V->getType()); } /// This function predicates factorization using distributive laws. By default, /// it just returns the 'Op' inputs. But for special-cases like /// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add /// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to /// allow more factorization opportunities. static Instruction::BinaryOps getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op, Value *&LHS, Value *&RHS) { assert(Op && "Expected a binary operator"); LHS = Op->getOperand(0); RHS = Op->getOperand(1); if (TopOpcode == Instruction::Add || TopOpcode == Instruction::Sub) { Constant *C; if (match(Op, m_Shl(m_Value(), m_Constant(C)))) { // X << C --> X * (1 << C) RHS = ConstantExpr::getShl(ConstantInt::get(Op->getType(), 1), C); return Instruction::Mul; } // TODO: We can add other conversions e.g. shr => div etc. } return Op->getOpcode(); } /// This tries to simplify binary operations by factorizing out common terms /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)"). Value *InstCombiner::tryFactorization(BinaryOperator &I, Instruction::BinaryOps InnerOpcode, Value *A, Value *B, Value *C, Value *D) { assert(A && B && C && D && "All values must be provided"); Value *V = nullptr; Value *SimplifiedInst = nullptr; Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); // Does "X op' Y" always equal "Y op' X"? bool InnerCommutative = Instruction::isCommutative(InnerOpcode); // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"? if (leftDistributesOverRight(InnerOpcode, TopLevelOpcode)) // Does the instruction have the form "(A op' B) op (A op' D)" or, in the // commutative case, "(A op' B) op (C op' A)"? if (A == C || (InnerCommutative && A == D)) { if (A != C) std::swap(C, D); // Consider forming "A op' (B op D)". // If "B op D" simplifies then it can be formed with no cost. V = SimplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I)); // If "B op D" doesn't simplify then only go on if both of the existing // operations "A op' B" and "C op' D" will be zapped as no longer used. if (!V && LHS->hasOneUse() && RHS->hasOneUse()) V = Builder.CreateBinOp(TopLevelOpcode, B, D, RHS->getName()); if (V) { SimplifiedInst = Builder.CreateBinOp(InnerOpcode, A, V); } } // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"? if (!SimplifiedInst && rightDistributesOverLeft(TopLevelOpcode, InnerOpcode)) // Does the instruction have the form "(A op' B) op (C op' B)" or, in the // commutative case, "(A op' B) op (B op' D)"? if (B == D || (InnerCommutative && B == C)) { if (B != D) std::swap(C, D); // Consider forming "(A op C) op' B". // If "A op C" simplifies then it can be formed with no cost. V = SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I)); // If "A op C" doesn't simplify then only go on if both of the existing // operations "A op' B" and "C op' D" will be zapped as no longer used. if (!V && LHS->hasOneUse() && RHS->hasOneUse()) V = Builder.CreateBinOp(TopLevelOpcode, A, C, LHS->getName()); if (V) { SimplifiedInst = Builder.CreateBinOp(InnerOpcode, V, B); } } if (SimplifiedInst) { ++NumFactor; SimplifiedInst->takeName(&I); // Check if we can add NSW flag to SimplifiedInst. If so, set NSW flag. // TODO: Check for NUW. if (BinaryOperator *BO = dyn_cast(SimplifiedInst)) { if (isa(SimplifiedInst)) { bool HasNSW = false; if (isa(&I)) HasNSW = I.hasNoSignedWrap(); if (auto *LOBO = dyn_cast(LHS)) HasNSW &= LOBO->hasNoSignedWrap(); if (auto *ROBO = dyn_cast(RHS)) HasNSW &= ROBO->hasNoSignedWrap(); // We can propagate 'nsw' if we know that // %Y = mul nsw i16 %X, C // %Z = add nsw i16 %Y, %X // => // %Z = mul nsw i16 %X, C+1 // // iff C+1 isn't INT_MIN const APInt *CInt; if (TopLevelOpcode == Instruction::Add && InnerOpcode == Instruction::Mul) if (match(V, m_APInt(CInt)) && !CInt->isMinSignedValue()) BO->setHasNoSignedWrap(HasNSW); } } } return SimplifiedInst; } /// This tries to simplify binary operations which some other binary operation /// distributes over either by factorizing out common terms /// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in /// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win). /// Returns the simplified value, or null if it didn't simplify. Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); BinaryOperator *Op0 = dyn_cast(LHS); BinaryOperator *Op1 = dyn_cast(RHS); Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); { // Factorization. Value *A, *B, *C, *D; Instruction::BinaryOps LHSOpcode, RHSOpcode; if (Op0) LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B); if (Op1) RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D); // The instruction has the form "(A op' B) op (C op' D)". Try to factorize // a common term. if (Op0 && Op1 && LHSOpcode == RHSOpcode) if (Value *V = tryFactorization(I, LHSOpcode, A, B, C, D)) return V; // The instruction has the form "(A op' B) op (C)". Try to factorize common // term. if (Op0) if (Value *Ident = getIdentityValue(LHSOpcode, RHS)) if (Value *V = tryFactorization(I, LHSOpcode, A, B, RHS, Ident)) return V; // The instruction has the form "(B) op (C op' D)". Try to factorize common // term. if (Op1) if (Value *Ident = getIdentityValue(RHSOpcode, LHS)) if (Value *V = tryFactorization(I, RHSOpcode, LHS, Ident, C, D)) return V; } // Expansion. if (Op0 && rightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) { // The instruction has the form "(A op' B) op C". See if expanding it out // to "(A op C) op' (B op C)" results in simplifications. Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS; Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op' Value *L = SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I)); Value *R = SimplifyBinOp(TopLevelOpcode, B, C, SQ.getWithInstruction(&I)); // Do "A op C" and "B op C" both simplify? if (L && R) { // They do! Return "L op' R". ++NumExpand; C = Builder.CreateBinOp(InnerOpcode, L, R); C->takeName(&I); return C; } // Does "A op C" simplify to the identity value for the inner opcode? if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) { // They do! Return "B op C". ++NumExpand; C = Builder.CreateBinOp(TopLevelOpcode, B, C); C->takeName(&I); return C; } // Does "B op C" simplify to the identity value for the inner opcode? if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) { // They do! Return "A op C". ++NumExpand; C = Builder.CreateBinOp(TopLevelOpcode, A, C); C->takeName(&I); return C; } } if (Op1 && leftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) { // The instruction has the form "A op (B op' C)". See if expanding it out // to "(A op B) op' (A op C)" results in simplifications. Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1); Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op' Value *L = SimplifyBinOp(TopLevelOpcode, A, B, SQ.getWithInstruction(&I)); Value *R = SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I)); // Do "A op B" and "A op C" both simplify? if (L && R) { // They do! Return "L op' R". ++NumExpand; A = Builder.CreateBinOp(InnerOpcode, L, R); A->takeName(&I); return A; } // Does "A op B" simplify to the identity value for the inner opcode? if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) { // They do! Return "A op C". ++NumExpand; A = Builder.CreateBinOp(TopLevelOpcode, A, C); A->takeName(&I); return A; } // Does "A op C" simplify to the identity value for the inner opcode? if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) { // They do! Return "A op B". ++NumExpand; A = Builder.CreateBinOp(TopLevelOpcode, A, B); A->takeName(&I); return A; } } return SimplifySelectsFeedingBinaryOp(I, LHS, RHS); } Value *InstCombiner::SimplifySelectsFeedingBinaryOp(BinaryOperator &I, Value *LHS, Value *RHS) { Instruction::BinaryOps Opcode = I.getOpcode(); // (op (select (a, b, c)), (select (a, d, e))) -> (select (a, (op b, d), (op // c, e))) Value *A, *B, *C, *D, *E; Value *SI = nullptr; if (match(LHS, m_Select(m_Value(A), m_Value(B), m_Value(C))) && match(RHS, m_Select(m_Specific(A), m_Value(D), m_Value(E)))) { bool SelectsHaveOneUse = LHS->hasOneUse() && RHS->hasOneUse(); BuilderTy::FastMathFlagGuard Guard(Builder); if (isa(&I)) Builder.setFastMathFlags(I.getFastMathFlags()); Value *V1 = SimplifyBinOp(Opcode, C, E, SQ.getWithInstruction(&I)); Value *V2 = SimplifyBinOp(Opcode, B, D, SQ.getWithInstruction(&I)); if (V1 && V2) SI = Builder.CreateSelect(A, V2, V1); else if (V2 && SelectsHaveOneUse) SI = Builder.CreateSelect(A, V2, Builder.CreateBinOp(Opcode, C, E)); else if (V1 && SelectsHaveOneUse) SI = Builder.CreateSelect(A, Builder.CreateBinOp(Opcode, B, D), V1); if (SI) SI->takeName(&I); } return SI; } /// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a /// constant zero (which is the 'negate' form). Value *InstCombiner::dyn_castNegVal(Value *V) const { Value *NegV; if (match(V, m_Neg(m_Value(NegV)))) return NegV; // Constants can be considered to be negated values if they can be folded. if (ConstantInt *C = dyn_cast(V)) return ConstantExpr::getNeg(C); if (ConstantDataVector *C = dyn_cast(V)) if (C->getType()->getElementType()->isIntegerTy()) return ConstantExpr::getNeg(C); if (ConstantVector *CV = dyn_cast(V)) { for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) { Constant *Elt = CV->getAggregateElement(i); if (!Elt) return nullptr; if (isa(Elt)) continue; if (!isa(Elt)) return nullptr; } return ConstantExpr::getNeg(CV); } return nullptr; } static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO, InstCombiner::BuilderTy &Builder) { if (auto *Cast = dyn_cast(&I)) return Builder.CreateCast(Cast->getOpcode(), SO, I.getType()); assert(I.isBinaryOp() && "Unexpected opcode for select folding"); // Figure out if the constant is the left or the right argument. bool ConstIsRHS = isa(I.getOperand(1)); Constant *ConstOperand = cast(I.getOperand(ConstIsRHS)); if (auto *SOC = dyn_cast(SO)) { if (ConstIsRHS) return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand); return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC); } Value *Op0 = SO, *Op1 = ConstOperand; if (!ConstIsRHS) std::swap(Op0, Op1); auto *BO = cast(&I); Value *RI = Builder.CreateBinOp(BO->getOpcode(), Op0, Op1, SO->getName() + ".op"); auto *FPInst = dyn_cast(RI); if (FPInst && isa(FPInst)) FPInst->copyFastMathFlags(BO); return RI; } Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { // Don't modify shared select instructions. if (!SI->hasOneUse()) return nullptr; Value *TV = SI->getTrueValue(); Value *FV = SI->getFalseValue(); if (!(isa(TV) || isa(FV))) return nullptr; // Bool selects with constant operands can be folded to logical ops. if (SI->getType()->isIntOrIntVectorTy(1)) return nullptr; // If it's a bitcast involving vectors, make sure it has the same number of // elements on both sides. if (auto *BC = dyn_cast(&Op)) { VectorType *DestTy = dyn_cast(BC->getDestTy()); VectorType *SrcTy = dyn_cast(BC->getSrcTy()); // Verify that either both or neither are vectors. if ((SrcTy == nullptr) != (DestTy == nullptr)) return nullptr; // If vectors, verify that they have the same number of elements. if (SrcTy && SrcTy->getNumElements() != DestTy->getNumElements()) return nullptr; } // Test if a CmpInst instruction is used exclusively by a select as // part of a minimum or maximum operation. If so, refrain from doing // any other folding. This helps out other analyses which understand // non-obfuscated minimum and maximum idioms, such as ScalarEvolution // and CodeGen. And in this case, at least one of the comparison // operands has at least one user besides the compare (the select), // which would often largely negate the benefit of folding anyway. if (auto *CI = dyn_cast(SI->getCondition())) { if (CI->hasOneUse()) { Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1); if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) || (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1)) return nullptr; } } Value *NewTV = foldOperationIntoSelectOperand(Op, TV, Builder); Value *NewFV = foldOperationIntoSelectOperand(Op, FV, Builder); return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI); } static Value *foldOperationIntoPhiValue(BinaryOperator *I, Value *InV, InstCombiner::BuilderTy &Builder) { bool ConstIsRHS = isa(I->getOperand(1)); Constant *C = cast(I->getOperand(ConstIsRHS)); if (auto *InC = dyn_cast(InV)) { if (ConstIsRHS) return ConstantExpr::get(I->getOpcode(), InC, C); return ConstantExpr::get(I->getOpcode(), C, InC); } Value *Op0 = InV, *Op1 = C; if (!ConstIsRHS) std::swap(Op0, Op1); Value *RI = Builder.CreateBinOp(I->getOpcode(), Op0, Op1, "phitmp"); auto *FPInst = dyn_cast(RI); if (FPInst && isa(FPInst)) FPInst->copyFastMathFlags(I); return RI; } Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) { unsigned NumPHIValues = PN->getNumIncomingValues(); if (NumPHIValues == 0) return nullptr; // We normally only transform phis with a single use. However, if a PHI has // multiple uses and they are all the same operation, we can fold *all* of the // uses into the PHI. if (!PN->hasOneUse()) { // Walk the use list for the instruction, comparing them to I. for (User *U : PN->users()) { Instruction *UI = cast(U); if (UI != &I && !I.isIdenticalTo(UI)) return nullptr; } // Otherwise, we can replace *all* users with the new PHI we form. } // Check to see if all of the operands of the PHI are simple constants // (constantint/constantfp/undef). If there is one non-constant value, // remember the BB it is in. If there is more than one or if *it* is a PHI, // bail out. We don't do arbitrary constant expressions here because moving // their computation can be expensive without a cost model. BasicBlock *NonConstBB = nullptr; for (unsigned i = 0; i != NumPHIValues; ++i) { Value *InVal = PN->getIncomingValue(i); if (isa(InVal) && !isa(InVal)) continue; if (isa(InVal)) return nullptr; // Itself a phi. if (NonConstBB) return nullptr; // More than one non-const value. NonConstBB = PN->getIncomingBlock(i); // If the InVal is an invoke at the end of the pred block, then we can't // insert a computation after it without breaking the edge. if (InvokeInst *II = dyn_cast(InVal)) if (II->getParent() == NonConstBB) return nullptr; // If the incoming non-constant value is in I's block, we will remove one // instruction, but insert another equivalent one, leading to infinite // instcombine. if (isPotentiallyReachable(I.getParent(), NonConstBB, &DT, LI)) return nullptr; } // If there is exactly one non-constant value, we can insert a copy of the // operation in that block. However, if this is a critical edge, we would be // inserting the computation on some other paths (e.g. inside a loop). Only // do this if the pred block is unconditionally branching into the phi block. if (NonConstBB != nullptr) { BranchInst *BI = dyn_cast(NonConstBB->getTerminator()); if (!BI || !BI->isUnconditional()) return nullptr; } // Okay, we can do the transformation: create the new PHI node. PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues()); InsertNewInstBefore(NewPN, *PN); NewPN->takeName(PN); // If we are going to have to insert a new computation, do so right before the // predecessor's terminator. if (NonConstBB) Builder.SetInsertPoint(NonConstBB->getTerminator()); // Next, add all of the operands to the PHI. if (SelectInst *SI = dyn_cast(&I)) { // We only currently try to fold the condition of a select when it is a phi, // not the true/false values. Value *TrueV = SI->getTrueValue(); Value *FalseV = SI->getFalseValue(); BasicBlock *PhiTransBB = PN->getParent(); for (unsigned i = 0; i != NumPHIValues; ++i) { BasicBlock *ThisBB = PN->getIncomingBlock(i); Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB); Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB); Value *InV = nullptr; // Beware of ConstantExpr: it may eventually evaluate to getNullValue, // even if currently isNullValue gives false. Constant *InC = dyn_cast(PN->getIncomingValue(i)); // For vector constants, we cannot use isNullValue to fold into // FalseVInPred versus TrueVInPred. When we have individual nonzero // elements in the vector, we will incorrectly fold InC to // `TrueVInPred`. if (InC && !isa(InC) && isa(InC)) InV = InC->isNullValue() ? FalseVInPred : TrueVInPred; else { // Generate the select in the same block as PN's current incoming block. // Note: ThisBB need not be the NonConstBB because vector constants // which are constants by definition are handled here. // FIXME: This can lead to an increase in IR generation because we might // generate selects for vector constant phi operand, that could not be // folded to TrueVInPred or FalseVInPred as done for ConstantInt. For // non-vector phis, this transformation was always profitable because // the select would be generated exactly once in the NonConstBB. Builder.SetInsertPoint(ThisBB->getTerminator()); InV = Builder.CreateSelect(PN->getIncomingValue(i), TrueVInPred, FalseVInPred, "phitmp"); } NewPN->addIncoming(InV, ThisBB); } } else if (CmpInst *CI = dyn_cast(&I)) { Constant *C = cast(I.getOperand(1)); for (unsigned i = 0; i != NumPHIValues; ++i) { Value *InV = nullptr; if (Constant *InC = dyn_cast(PN->getIncomingValue(i))) InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C); else if (isa(CI)) InV = Builder.CreateICmp(CI->getPredicate(), PN->getIncomingValue(i), C, "phitmp"); else InV = Builder.CreateFCmp(CI->getPredicate(), PN->getIncomingValue(i), C, "phitmp"); NewPN->addIncoming(InV, PN->getIncomingBlock(i)); } } else if (auto *BO = dyn_cast(&I)) { for (unsigned i = 0; i != NumPHIValues; ++i) { Value *InV = foldOperationIntoPhiValue(BO, PN->getIncomingValue(i), Builder); NewPN->addIncoming(InV, PN->getIncomingBlock(i)); } } else { CastInst *CI = cast(&I); Type *RetTy = CI->getType(); for (unsigned i = 0; i != NumPHIValues; ++i) { Value *InV; if (Constant *InC = dyn_cast(PN->getIncomingValue(i))) InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy); else InV = Builder.CreateCast(CI->getOpcode(), PN->getIncomingValue(i), I.getType(), "phitmp"); NewPN->addIncoming(InV, PN->getIncomingBlock(i)); } } for (auto UI = PN->user_begin(), E = PN->user_end(); UI != E;) { Instruction *User = cast(*UI++); if (User == &I) continue; replaceInstUsesWith(*User, NewPN); eraseInstFromFunction(*User); } return replaceInstUsesWith(I, NewPN); } Instruction *InstCombiner::foldBinOpIntoSelectOrPhi(BinaryOperator &I) { if (!isa(I.getOperand(1))) return nullptr; if (auto *Sel = dyn_cast(I.getOperand(0))) { if (Instruction *NewSel = FoldOpIntoSelect(I, Sel)) return NewSel; } else if (auto *PN = dyn_cast(I.getOperand(0))) { if (Instruction *NewPhi = foldOpIntoPhi(I, PN)) return NewPhi; } return nullptr; } /// Given a pointer type and a constant offset, determine whether or not there /// is a sequence of GEP indices into the pointed type that will land us at the /// specified offset. If so, fill them into NewIndices and return the resultant /// element type, otherwise return null. Type *InstCombiner::FindElementAtOffset(PointerType *PtrTy, int64_t Offset, SmallVectorImpl &NewIndices) { Type *Ty = PtrTy->getElementType(); if (!Ty->isSized()) return nullptr; // Start with the index over the outer type. Note that the type size // might be zero (even if the offset isn't zero) if the indexed type // is something like [0 x {int, int}] Type *IndexTy = DL.getIndexType(PtrTy); int64_t FirstIdx = 0; if (int64_t TySize = DL.getTypeAllocSize(Ty)) { FirstIdx = Offset/TySize; Offset -= FirstIdx*TySize; // Handle hosts where % returns negative instead of values [0..TySize). if (Offset < 0) { --FirstIdx; Offset += TySize; assert(Offset >= 0); } assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset"); } NewIndices.push_back(ConstantInt::get(IndexTy, FirstIdx)); // Index into the types. If we fail, set OrigBase to null. while (Offset) { // Indexing into tail padding between struct/array elements. if (uint64_t(Offset * 8) >= DL.getTypeSizeInBits(Ty)) return nullptr; if (StructType *STy = dyn_cast(Ty)) { const StructLayout *SL = DL.getStructLayout(STy); assert(Offset < (int64_t)SL->getSizeInBytes() && "Offset must stay within the indexed type"); unsigned Elt = SL->getElementContainingOffset(Offset); NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()), Elt)); Offset -= SL->getElementOffset(Elt); Ty = STy->getElementType(Elt); } else if (ArrayType *AT = dyn_cast(Ty)) { uint64_t EltSize = DL.getTypeAllocSize(AT->getElementType()); assert(EltSize && "Cannot index into a zero-sized array"); NewIndices.push_back(ConstantInt::get(IndexTy,Offset/EltSize)); Offset %= EltSize; Ty = AT->getElementType(); } else { // Otherwise, we can't index into the middle of this atomic type, bail. return nullptr; } } return Ty; } static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) { // If this GEP has only 0 indices, it is the same pointer as // Src. If Src is not a trivial GEP too, don't combine // the indices. if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() && !Src.hasOneUse()) return false; return true; } /// Return a value X such that Val = X * Scale, or null if none. /// If the multiplication is known not to overflow, then NoSignedWrap is set. Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { assert(isa(Val->getType()) && "Can only descale integers!"); assert(cast(Val->getType())->getBitWidth() == Scale.getBitWidth() && "Scale not compatible with value!"); // If Val is zero or Scale is one then Val = Val * Scale. if (match(Val, m_Zero()) || Scale == 1) { NoSignedWrap = true; return Val; } // If Scale is zero then it does not divide Val. if (Scale.isMinValue()) return nullptr; // Look through chains of multiplications, searching for a constant that is // divisible by Scale. For example, descaling X*(Y*(Z*4)) by a factor of 4 // will find the constant factor 4 and produce X*(Y*Z). Descaling X*(Y*8) by // a factor of 4 will produce X*(Y*2). The principle of operation is to bore // down from Val: // // Val = M1 * X || Analysis starts here and works down // M1 = M2 * Y || Doesn't descend into terms with more // M2 = Z * 4 \/ than one use // // Then to modify a term at the bottom: // // Val = M1 * X // M1 = Z * Y || Replaced M2 with Z // // Then to work back up correcting nsw flags. // Op - the term we are currently analyzing. Starts at Val then drills down. // Replaced with its descaled value before exiting from the drill down loop. Value *Op = Val; // Parent - initially null, but after drilling down notes where Op came from. // In the example above, Parent is (Val, 0) when Op is M1, because M1 is the // 0'th operand of Val. std::pair Parent; // Set if the transform requires a descaling at deeper levels that doesn't // overflow. bool RequireNoSignedWrap = false; // Log base 2 of the scale. Negative if not a power of 2. int32_t logScale = Scale.exactLogBase2(); for (;; Op = Parent.first->getOperand(Parent.second)) { // Drill down if (ConstantInt *CI = dyn_cast(Op)) { // If Op is a constant divisible by Scale then descale to the quotient. APInt Quotient(Scale), Remainder(Scale); // Init ensures right bitwidth. APInt::sdivrem(CI->getValue(), Scale, Quotient, Remainder); if (!Remainder.isMinValue()) // Not divisible by Scale. return nullptr; // Replace with the quotient in the parent. Op = ConstantInt::get(CI->getType(), Quotient); NoSignedWrap = true; break; } if (BinaryOperator *BO = dyn_cast(Op)) { if (BO->getOpcode() == Instruction::Mul) { // Multiplication. NoSignedWrap = BO->hasNoSignedWrap(); if (RequireNoSignedWrap && !NoSignedWrap) return nullptr; // There are three cases for multiplication: multiplication by exactly // the scale, multiplication by a constant different to the scale, and // multiplication by something else. Value *LHS = BO->getOperand(0); Value *RHS = BO->getOperand(1); if (ConstantInt *CI = dyn_cast(RHS)) { // Multiplication by a constant. if (CI->getValue() == Scale) { // Multiplication by exactly the scale, replace the multiplication // by its left-hand side in the parent. Op = LHS; break; } // Otherwise drill down into the constant. if (!Op->hasOneUse()) return nullptr; Parent = std::make_pair(BO, 1); continue; } // Multiplication by something else. Drill down into the left-hand side // since that's where the reassociate pass puts the good stuff. if (!Op->hasOneUse()) return nullptr; Parent = std::make_pair(BO, 0); continue; } if (logScale > 0 && BO->getOpcode() == Instruction::Shl && isa(BO->getOperand(1))) { // Multiplication by a power of 2. NoSignedWrap = BO->hasNoSignedWrap(); if (RequireNoSignedWrap && !NoSignedWrap) return nullptr; Value *LHS = BO->getOperand(0); int32_t Amt = cast(BO->getOperand(1))-> getLimitedValue(Scale.getBitWidth()); // Op = LHS << Amt. if (Amt == logScale) { // Multiplication by exactly the scale, replace the multiplication // by its left-hand side in the parent. Op = LHS; break; } if (Amt < logScale || !Op->hasOneUse()) return nullptr; // Multiplication by more than the scale. Reduce the multiplying amount // by the scale in the parent. Parent = std::make_pair(BO, 1); Op = ConstantInt::get(BO->getType(), Amt - logScale); break; } } if (!Op->hasOneUse()) return nullptr; if (CastInst *Cast = dyn_cast(Op)) { if (Cast->getOpcode() == Instruction::SExt) { // Op is sign-extended from a smaller type, descale in the smaller type. unsigned SmallSize = Cast->getSrcTy()->getPrimitiveSizeInBits(); APInt SmallScale = Scale.trunc(SmallSize); // Suppose Op = sext X, and we descale X as Y * SmallScale. We want to // descale Op as (sext Y) * Scale. In order to have // sext (Y * SmallScale) = (sext Y) * Scale // some conditions need to hold however: SmallScale must sign-extend to // Scale and the multiplication Y * SmallScale should not overflow. if (SmallScale.sext(Scale.getBitWidth()) != Scale) // SmallScale does not sign-extend to Scale. return nullptr; assert(SmallScale.exactLogBase2() == logScale); // Require that Y * SmallScale must not overflow. RequireNoSignedWrap = true; // Drill down through the cast. Parent = std::make_pair(Cast, 0); Scale = SmallScale; continue; } if (Cast->getOpcode() == Instruction::Trunc) { // Op is truncated from a larger type, descale in the larger type. // Suppose Op = trunc X, and we descale X as Y * sext Scale. Then // trunc (Y * sext Scale) = (trunc Y) * Scale // always holds. However (trunc Y) * Scale may overflow even if // trunc (Y * sext Scale) does not, so nsw flags need to be cleared // from this point up in the expression (see later). if (RequireNoSignedWrap) return nullptr; // Drill down through the cast. unsigned LargeSize = Cast->getSrcTy()->getPrimitiveSizeInBits(); Parent = std::make_pair(Cast, 0); Scale = Scale.sext(LargeSize); if (logScale + 1 == (int32_t)Cast->getType()->getPrimitiveSizeInBits()) logScale = -1; assert(Scale.exactLogBase2() == logScale); continue; } } // Unsupported expression, bail out. return nullptr; } // If Op is zero then Val = Op * Scale. if (match(Op, m_Zero())) { NoSignedWrap = true; return Op; } // We know that we can successfully descale, so from here on we can safely // modify the IR. Op holds the descaled version of the deepest term in the // expression. NoSignedWrap is 'true' if multiplying Op by Scale is known // not to overflow. if (!Parent.first) // The expression only had one term. return Op; // Rewrite the parent using the descaled version of its operand. assert(Parent.first->hasOneUse() && "Drilled down when more than one use!"); assert(Op != Parent.first->getOperand(Parent.second) && "Descaling was a no-op?"); Parent.first->setOperand(Parent.second, Op); Worklist.Add(Parent.first); // Now work back up the expression correcting nsw flags. The logic is based // on the following observation: if X * Y is known not to overflow as a signed // multiplication, and Y is replaced by a value Z with smaller absolute value, // then X * Z will not overflow as a signed multiplication either. As we work // our way up, having NoSignedWrap 'true' means that the descaled value at the // current level has strictly smaller absolute value than the original. Instruction *Ancestor = Parent.first; do { if (BinaryOperator *BO = dyn_cast(Ancestor)) { // If the multiplication wasn't nsw then we can't say anything about the // value of the descaled multiplication, and we have to clear nsw flags // from this point on up. bool OpNoSignedWrap = BO->hasNoSignedWrap(); NoSignedWrap &= OpNoSignedWrap; if (NoSignedWrap != OpNoSignedWrap) { BO->setHasNoSignedWrap(NoSignedWrap); Worklist.Add(Ancestor); } } else if (Ancestor->getOpcode() == Instruction::Trunc) { // The fact that the descaled input to the trunc has smaller absolute // value than the original input doesn't tell us anything useful about // the absolute values of the truncations. NoSignedWrap = false; } assert((Ancestor->getOpcode() != Instruction::SExt || NoSignedWrap) && "Failed to keep proper track of nsw flags while drilling down?"); if (Ancestor == Val) // Got to the top, all done! return Val; // Move up one level in the expression. assert(Ancestor->hasOneUse() && "Drilled down when more than one use!"); Ancestor = Ancestor->user_back(); } while (true); } Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) { if (!Inst.getType()->isVectorTy()) return nullptr; BinaryOperator::BinaryOps Opcode = Inst.getOpcode(); unsigned NumElts = cast(Inst.getType())->getNumElements(); Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1); assert(cast(LHS->getType())->getNumElements() == NumElts); assert(cast(RHS->getType())->getNumElements() == NumElts); // If both operands of the binop are vector concatenations, then perform the // narrow binop on each pair of the source operands followed by concatenation // of the results. Value *L0, *L1, *R0, *R1; Constant *Mask; if (match(LHS, m_ShuffleVector(m_Value(L0), m_Value(L1), m_Constant(Mask))) && match(RHS, m_ShuffleVector(m_Value(R0), m_Value(R1), m_Specific(Mask))) && LHS->hasOneUse() && RHS->hasOneUse() && - cast(LHS)->isConcat()) { + cast(LHS)->isConcat() && + cast(RHS)->isConcat()) { // This transform does not have the speculative execution constraint as // below because the shuffle is a concatenation. The new binops are // operating on exactly the same elements as the existing binop. // TODO: We could ease the mask requirement to allow different undef lanes, // but that requires an analysis of the binop-with-undef output value. Value *NewBO0 = Builder.CreateBinOp(Opcode, L0, R0); if (auto *BO = dyn_cast(NewBO0)) BO->copyIRFlags(&Inst); Value *NewBO1 = Builder.CreateBinOp(Opcode, L1, R1); if (auto *BO = dyn_cast(NewBO1)) BO->copyIRFlags(&Inst); return new ShuffleVectorInst(NewBO0, NewBO1, Mask); } // It may not be safe to reorder shuffles and things like div, urem, etc. // because we may trap when executing those ops on unknown vector elements. // See PR20059. if (!isSafeToSpeculativelyExecute(&Inst)) return nullptr; auto createBinOpShuffle = [&](Value *X, Value *Y, Constant *M) { Value *XY = Builder.CreateBinOp(Opcode, X, Y); if (auto *BO = dyn_cast(XY)) BO->copyIRFlags(&Inst); return new ShuffleVectorInst(XY, UndefValue::get(XY->getType()), M); }; // If both arguments of the binary operation are shuffles that use the same // mask and shuffle within a single vector, move the shuffle after the binop. Value *V1, *V2; if (match(LHS, m_ShuffleVector(m_Value(V1), m_Undef(), m_Constant(Mask))) && match(RHS, m_ShuffleVector(m_Value(V2), m_Undef(), m_Specific(Mask))) && V1->getType() == V2->getType() && (LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) { // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask) return createBinOpShuffle(V1, V2, Mask); } // If one argument is a shuffle within one vector and the other is a constant, // try moving the shuffle after the binary operation. This canonicalization // intends to move shuffles closer to other shuffles and binops closer to // other binops, so they can be folded. It may also enable demanded elements // transforms. Constant *C; if (match(&Inst, m_c_BinOp( m_OneUse(m_ShuffleVector(m_Value(V1), m_Undef(), m_Constant(Mask))), m_Constant(C))) && V1->getType()->getVectorNumElements() <= NumElts) { assert(Inst.getType()->getScalarType() == V1->getType()->getScalarType() && "Shuffle should not change scalar type"); // Find constant NewC that has property: // shuffle(NewC, ShMask) = C // If such constant does not exist (example: ShMask=<0,0> and C=<1,2>) // reorder is not possible. A 1-to-1 mapping is not required. Example: // ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = bool ConstOp1 = isa(RHS); SmallVector ShMask; ShuffleVectorInst::getShuffleMask(Mask, ShMask); unsigned SrcVecNumElts = V1->getType()->getVectorNumElements(); UndefValue *UndefScalar = UndefValue::get(C->getType()->getScalarType()); SmallVector NewVecC(SrcVecNumElts, UndefScalar); bool MayChange = true; for (unsigned I = 0; I < NumElts; ++I) { Constant *CElt = C->getAggregateElement(I); if (ShMask[I] >= 0) { assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle"); Constant *NewCElt = NewVecC[ShMask[I]]; // Bail out if: // 1. The constant vector contains a constant expression. // 2. The shuffle needs an element of the constant vector that can't // be mapped to a new constant vector. // 3. This is a widening shuffle that copies elements of V1 into the // extended elements (extending with undef is allowed). if (!CElt || (!isa(NewCElt) && NewCElt != CElt) || I >= SrcVecNumElts) { MayChange = false; break; } NewVecC[ShMask[I]] = CElt; } // If this is a widening shuffle, we must be able to extend with undef // elements. If the original binop does not produce an undef in the high // lanes, then this transform is not safe. // TODO: We could shuffle those non-undef constant values into the // result by using a constant vector (rather than an undef vector) // as operand 1 of the new binop, but that might be too aggressive // for target-independent shuffle creation. if (I >= SrcVecNumElts) { Constant *MaybeUndef = ConstOp1 ? ConstantExpr::get(Opcode, UndefScalar, CElt) : ConstantExpr::get(Opcode, CElt, UndefScalar); if (!isa(MaybeUndef)) { MayChange = false; break; } } } if (MayChange) { Constant *NewC = ConstantVector::get(NewVecC); // It may not be safe to execute a binop on a vector with undef elements // because the entire instruction can be folded to undef or create poison // that did not exist in the original code. if (Inst.isIntDivRem() || (Inst.isShift() && ConstOp1)) NewC = getSafeVectorConstantForBinop(Opcode, NewC, ConstOp1); // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask) // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask) Value *NewLHS = ConstOp1 ? V1 : NewC; Value *NewRHS = ConstOp1 ? NewC : V1; return createBinOpShuffle(NewLHS, NewRHS, Mask); } } return nullptr; } /// Try to narrow the width of a binop if at least 1 operand is an extend of /// of a value. This requires a potentially expensive known bits check to make /// sure the narrow op does not overflow. Instruction *InstCombiner::narrowMathIfNoOverflow(BinaryOperator &BO) { // We need at least one extended operand. Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1); // If this is a sub, we swap the operands since we always want an extension // on the RHS. The LHS can be an extension or a constant. if (BO.getOpcode() == Instruction::Sub) std::swap(Op0, Op1); Value *X; bool IsSext = match(Op0, m_SExt(m_Value(X))); if (!IsSext && !match(Op0, m_ZExt(m_Value(X)))) return nullptr; // If both operands are the same extension from the same source type and we // can eliminate at least one (hasOneUse), this might work. CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt; Value *Y; if (!(match(Op1, m_ZExtOrSExt(m_Value(Y))) && X->getType() == Y->getType() && cast(Op1)->getOpcode() == CastOpc && (Op0->hasOneUse() || Op1->hasOneUse()))) { // If that did not match, see if we have a suitable constant operand. // Truncating and extending must produce the same constant. Constant *WideC; if (!Op0->hasOneUse() || !match(Op1, m_Constant(WideC))) return nullptr; Constant *NarrowC = ConstantExpr::getTrunc(WideC, X->getType()); if (ConstantExpr::getCast(CastOpc, NarrowC, BO.getType()) != WideC) return nullptr; Y = NarrowC; } // Swap back now that we found our operands. if (BO.getOpcode() == Instruction::Sub) std::swap(X, Y); // Both operands have narrow versions. Last step: the math must not overflow // in the narrow width. if (!willNotOverflow(BO.getOpcode(), X, Y, BO, IsSext)) return nullptr; // bo (ext X), (ext Y) --> ext (bo X, Y) // bo (ext X), C --> ext (bo X, C') Value *NarrowBO = Builder.CreateBinOp(BO.getOpcode(), X, Y, "narrow"); if (auto *NewBinOp = dyn_cast(NarrowBO)) { if (IsSext) NewBinOp->setHasNoSignedWrap(); else NewBinOp->setHasNoUnsignedWrap(); } return CastInst::Create(CastOpc, NarrowBO, BO.getType()); } Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { SmallVector Ops(GEP.op_begin(), GEP.op_end()); Type *GEPType = GEP.getType(); Type *GEPEltType = GEP.getSourceElementType(); if (Value *V = SimplifyGEPInst(GEPEltType, Ops, SQ.getWithInstruction(&GEP))) return replaceInstUsesWith(GEP, V); Value *PtrOp = GEP.getOperand(0); // Eliminate unneeded casts for indices, and replace indices which displace // by multiples of a zero size type with zero. bool MadeChange = false; // Index width may not be the same width as pointer width. // Data layout chooses the right type based on supported integer types. Type *NewScalarIndexTy = DL.getIndexType(GEP.getPointerOperandType()->getScalarType()); gep_type_iterator GTI = gep_type_begin(GEP); for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E; ++I, ++GTI) { // Skip indices into struct types. if (GTI.isStruct()) continue; Type *IndexTy = (*I)->getType(); Type *NewIndexType = IndexTy->isVectorTy() ? VectorType::get(NewScalarIndexTy, IndexTy->getVectorNumElements()) : NewScalarIndexTy; // If the element type has zero size then any index over it is equivalent // to an index of zero, so replace it with zero if it is not zero already. Type *EltTy = GTI.getIndexedType(); if (EltTy->isSized() && DL.getTypeAllocSize(EltTy) == 0) if (!isa(*I) || !cast(*I)->isNullValue()) { *I = Constant::getNullValue(NewIndexType); MadeChange = true; } if (IndexTy != NewIndexType) { // If we are using a wider index than needed for this platform, shrink // it to what we need. If narrower, sign-extend it to what we need. // This explicit cast can make subsequent optimizations more obvious. *I = Builder.CreateIntCast(*I, NewIndexType, true); MadeChange = true; } } if (MadeChange) return &GEP; // Check to see if the inputs to the PHI node are getelementptr instructions. if (auto *PN = dyn_cast(PtrOp)) { auto *Op1 = dyn_cast(PN->getOperand(0)); if (!Op1) return nullptr; // Don't fold a GEP into itself through a PHI node. This can only happen // through the back-edge of a loop. Folding a GEP into itself means that // the value of the previous iteration needs to be stored in the meantime, // thus requiring an additional register variable to be live, but not // actually achieving anything (the GEP still needs to be executed once per // loop iteration). if (Op1 == &GEP) return nullptr; int DI = -1; for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) { auto *Op2 = dyn_cast(*I); if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands()) return nullptr; // As for Op1 above, don't try to fold a GEP into itself. if (Op2 == &GEP) return nullptr; // Keep track of the type as we walk the GEP. Type *CurTy = nullptr; for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) { if (Op1->getOperand(J)->getType() != Op2->getOperand(J)->getType()) return nullptr; if (Op1->getOperand(J) != Op2->getOperand(J)) { if (DI == -1) { // We have not seen any differences yet in the GEPs feeding the // PHI yet, so we record this one if it is allowed to be a // variable. // The first two arguments can vary for any GEP, the rest have to be // static for struct slots if (J > 1 && CurTy->isStructTy()) return nullptr; DI = J; } else { // The GEP is different by more than one input. While this could be // extended to support GEPs that vary by more than one variable it // doesn't make sense since it greatly increases the complexity and // would result in an R+R+R addressing mode which no backend // directly supports and would need to be broken into several // simpler instructions anyway. return nullptr; } } // Sink down a layer of the type for the next iteration. if (J > 0) { if (J == 1) { CurTy = Op1->getSourceElementType(); } else if (auto *CT = dyn_cast(CurTy)) { CurTy = CT->getTypeAtIndex(Op1->getOperand(J)); } else { CurTy = nullptr; } } } } // If not all GEPs are identical we'll have to create a new PHI node. // Check that the old PHI node has only one use so that it will get // removed. if (DI != -1 && !PN->hasOneUse()) return nullptr; auto *NewGEP = cast(Op1->clone()); if (DI == -1) { // All the GEPs feeding the PHI are identical. Clone one down into our // BB so that it can be merged with the current GEP. GEP.getParent()->getInstList().insert( GEP.getParent()->getFirstInsertionPt(), NewGEP); } else { // All the GEPs feeding the PHI differ at a single offset. Clone a GEP // into the current block so it can be merged, and create a new PHI to // set that index. PHINode *NewPN; { IRBuilderBase::InsertPointGuard Guard(Builder); Builder.SetInsertPoint(PN); NewPN = Builder.CreatePHI(Op1->getOperand(DI)->getType(), PN->getNumOperands()); } for (auto &I : PN->operands()) NewPN->addIncoming(cast(I)->getOperand(DI), PN->getIncomingBlock(I)); NewGEP->setOperand(DI, NewPN); GEP.getParent()->getInstList().insert( GEP.getParent()->getFirstInsertionPt(), NewGEP); NewGEP->setOperand(DI, NewPN); } GEP.setOperand(0, NewGEP); PtrOp = NewGEP; } // Combine Indices - If the source pointer to this getelementptr instruction // is a getelementptr instruction, combine the indices of the two // getelementptr instructions into a single instruction. if (auto *Src = dyn_cast(PtrOp)) { if (!shouldMergeGEPs(*cast(&GEP), *Src)) return nullptr; // Try to reassociate loop invariant GEP chains to enable LICM. if (LI && Src->getNumOperands() == 2 && GEP.getNumOperands() == 2 && Src->hasOneUse()) { if (Loop *L = LI->getLoopFor(GEP.getParent())) { Value *GO1 = GEP.getOperand(1); Value *SO1 = Src->getOperand(1); // Reassociate the two GEPs if SO1 is variant in the loop and GO1 is // invariant: this breaks the dependence between GEPs and allows LICM // to hoist the invariant part out of the loop. if (L->isLoopInvariant(GO1) && !L->isLoopInvariant(SO1)) { // We have to be careful here. // We have something like: // %src = getelementptr , * %base, %idx // %gep = getelementptr , * %src, %idx2 // If we just swap idx & idx2 then we could inadvertantly // change %src from a vector to a scalar, or vice versa. // Cases: // 1) %base a scalar & idx a scalar & idx2 a vector // => Swapping idx & idx2 turns %src into a vector type. // 2) %base a scalar & idx a vector & idx2 a scalar // => Swapping idx & idx2 turns %src in a scalar type // 3) %base, %idx, and %idx2 are scalars // => %src & %gep are scalars // => swapping idx & idx2 is safe // 4) %base a vector // => %src is a vector // => swapping idx & idx2 is safe. auto *SO0 = Src->getOperand(0); auto *SO0Ty = SO0->getType(); if (!isa(GEPType) || // case 3 isa(SO0Ty)) { // case 4 Src->setOperand(1, GO1); GEP.setOperand(1, SO1); return &GEP; } else { // Case 1 or 2 // -- have to recreate %src & %gep // put NewSrc at same location as %src Builder.SetInsertPoint(cast(PtrOp)); auto *NewSrc = cast( Builder.CreateGEP(SO0, GO1, Src->getName())); NewSrc->setIsInBounds(Src->isInBounds()); auto *NewGEP = GetElementPtrInst::Create(nullptr, NewSrc, {SO1}); NewGEP->setIsInBounds(GEP.isInBounds()); return NewGEP; } } } } // Note that if our source is a gep chain itself then we wait for that // chain to be resolved before we perform this transformation. This // avoids us creating a TON of code in some cases. if (auto *SrcGEP = dyn_cast(Src->getOperand(0))) if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP)) return nullptr; // Wait until our source is folded to completion. SmallVector Indices; // Find out whether the last index in the source GEP is a sequential idx. bool EndsWithSequential = false; for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src); I != E; ++I) EndsWithSequential = I.isSequential(); // Can we combine the two pointer arithmetics offsets? if (EndsWithSequential) { // Replace: gep (gep %P, long B), long A, ... // With: T = long A+B; gep %P, T, ... Value *SO1 = Src->getOperand(Src->getNumOperands()-1); Value *GO1 = GEP.getOperand(1); // If they aren't the same type, then the input hasn't been processed // by the loop above yet (which canonicalizes sequential index types to // intptr_t). Just avoid transforming this until the input has been // normalized. if (SO1->getType() != GO1->getType()) return nullptr; Value *Sum = SimplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP)); // Only do the combine when we are sure the cost after the // merge is never more than that before the merge. if (Sum == nullptr) return nullptr; // Update the GEP in place if possible. if (Src->getNumOperands() == 2) { GEP.setOperand(0, Src->getOperand(0)); GEP.setOperand(1, Sum); return &GEP; } Indices.append(Src->op_begin()+1, Src->op_end()-1); Indices.push_back(Sum); Indices.append(GEP.op_begin()+2, GEP.op_end()); } else if (isa(*GEP.idx_begin()) && cast(*GEP.idx_begin())->isNullValue() && Src->getNumOperands() != 1) { // Otherwise we can do the fold if the first index of the GEP is a zero Indices.append(Src->op_begin()+1, Src->op_end()); Indices.append(GEP.idx_begin()+1, GEP.idx_end()); } if (!Indices.empty()) return GEP.isInBounds() && Src->isInBounds() ? GetElementPtrInst::CreateInBounds( Src->getSourceElementType(), Src->getOperand(0), Indices, GEP.getName()) : GetElementPtrInst::Create(Src->getSourceElementType(), Src->getOperand(0), Indices, GEP.getName()); } if (GEP.getNumIndices() == 1) { unsigned AS = GEP.getPointerAddressSpace(); if (GEP.getOperand(1)->getType()->getScalarSizeInBits() == DL.getIndexSizeInBits(AS)) { uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType); bool Matched = false; uint64_t C; Value *V = nullptr; if (TyAllocSize == 1) { V = GEP.getOperand(1); Matched = true; } else if (match(GEP.getOperand(1), m_AShr(m_Value(V), m_ConstantInt(C)))) { if (TyAllocSize == 1ULL << C) Matched = true; } else if (match(GEP.getOperand(1), m_SDiv(m_Value(V), m_ConstantInt(C)))) { if (TyAllocSize == C) Matched = true; } if (Matched) { // Canonicalize (gep i8* X, -(ptrtoint Y)) // to (inttoptr (sub (ptrtoint X), (ptrtoint Y))) // The GEP pattern is emitted by the SCEV expander for certain kinds of // pointer arithmetic. if (match(V, m_Neg(m_PtrToInt(m_Value())))) { Operator *Index = cast(V); Value *PtrToInt = Builder.CreatePtrToInt(PtrOp, Index->getType()); Value *NewSub = Builder.CreateSub(PtrToInt, Index->getOperand(1)); return CastInst::Create(Instruction::IntToPtr, NewSub, GEPType); } // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) // to (bitcast Y) Value *Y; if (match(V, m_Sub(m_PtrToInt(m_Value(Y)), m_PtrToInt(m_Specific(GEP.getOperand(0)))))) return CastInst::CreatePointerBitCastOrAddrSpaceCast(Y, GEPType); } } } // We do not handle pointer-vector geps here. if (GEPType->isVectorTy()) return nullptr; // Handle gep(bitcast x) and gep(gep x, 0, 0, 0). Value *StrippedPtr = PtrOp->stripPointerCasts(); PointerType *StrippedPtrTy = cast(StrippedPtr->getType()); if (StrippedPtr != PtrOp) { bool HasZeroPointerIndex = false; if (auto *C = dyn_cast(GEP.getOperand(1))) HasZeroPointerIndex = C->isZero(); // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... // into : GEP [10 x i8]* X, i32 0, ... // // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ... // into : GEP i8* X, ... // // This occurs when the program declares an array extern like "int X[];" if (HasZeroPointerIndex) { if (auto *CATy = dyn_cast(GEPEltType)) { // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ? if (CATy->getElementType() == StrippedPtrTy->getElementType()) { // -> GEP i8* X, ... SmallVector Idx(GEP.idx_begin()+1, GEP.idx_end()); GetElementPtrInst *Res = GetElementPtrInst::Create( StrippedPtrTy->getElementType(), StrippedPtr, Idx, GEP.getName()); Res->setIsInBounds(GEP.isInBounds()); if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace()) return Res; // Insert Res, and create an addrspacecast. // e.g., // GEP (addrspacecast i8 addrspace(1)* X to [0 x i8]*), i32 0, ... // -> // %0 = GEP i8 addrspace(1)* X, ... // addrspacecast i8 addrspace(1)* %0 to i8* return new AddrSpaceCastInst(Builder.Insert(Res), GEPType); } if (auto *XATy = dyn_cast(StrippedPtrTy->getElementType())) { // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ? if (CATy->getElementType() == XATy->getElementType()) { // -> GEP [10 x i8]* X, i32 0, ... // At this point, we know that the cast source type is a pointer // to an array of the same type as the destination pointer // array. Because the array type is never stepped over (there // is a leading zero) we can fold the cast into this GEP. if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace()) { GEP.setOperand(0, StrippedPtr); GEP.setSourceElementType(XATy); return &GEP; } // Cannot replace the base pointer directly because StrippedPtr's // address space is different. Instead, create a new GEP followed by // an addrspacecast. // e.g., // GEP (addrspacecast [10 x i8] addrspace(1)* X to [0 x i8]*), // i32 0, ... // -> // %0 = GEP [10 x i8] addrspace(1)* X, ... // addrspacecast i8 addrspace(1)* %0 to i8* SmallVector Idx(GEP.idx_begin(), GEP.idx_end()); Value *NewGEP = GEP.isInBounds() ? Builder.CreateInBoundsGEP( nullptr, StrippedPtr, Idx, GEP.getName()) : Builder.CreateGEP(nullptr, StrippedPtr, Idx, GEP.getName()); return new AddrSpaceCastInst(NewGEP, GEPType); } } } } else if (GEP.getNumOperands() == 2) { // Transform things like: // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast Type *SrcEltTy = StrippedPtrTy->getElementType(); if (SrcEltTy->isArrayTy() && DL.getTypeAllocSize(SrcEltTy->getArrayElementType()) == DL.getTypeAllocSize(GEPEltType)) { Type *IdxType = DL.getIndexType(GEPType); Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) }; Value *NewGEP = GEP.isInBounds() ? Builder.CreateInBoundsGEP(nullptr, StrippedPtr, Idx, GEP.getName()) : Builder.CreateGEP(nullptr, StrippedPtr, Idx, GEP.getName()); // V and GEP are both pointer types --> BitCast return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, GEPType); } // Transform things like: // %V = mul i64 %N, 4 // %t = getelementptr i8* bitcast (i32* %arr to i8*), i32 %V // into: %t1 = getelementptr i32* %arr, i32 %N; bitcast if (GEPEltType->isSized() && SrcEltTy->isSized()) { // Check that changing the type amounts to dividing the index by a scale // factor. uint64_t ResSize = DL.getTypeAllocSize(GEPEltType); uint64_t SrcSize = DL.getTypeAllocSize(SrcEltTy); if (ResSize && SrcSize % ResSize == 0) { Value *Idx = GEP.getOperand(1); unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits(); uint64_t Scale = SrcSize / ResSize; // Earlier transforms ensure that the index has the right type // according to Data Layout, which considerably simplifies the // logic by eliminating implicit casts. assert(Idx->getType() == DL.getIndexType(GEPType) && "Index type does not match the Data Layout preferences"); bool NSW; if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) { // Successfully decomposed Idx as NewIdx * Scale, form a new GEP. // If the multiplication NewIdx * Scale may overflow then the new // GEP may not be "inbounds". Value *NewGEP = GEP.isInBounds() && NSW ? Builder.CreateInBoundsGEP(nullptr, StrippedPtr, NewIdx, GEP.getName()) : Builder.CreateGEP(nullptr, StrippedPtr, NewIdx, GEP.getName()); // The NewGEP must be pointer typed, so must the old one -> BitCast return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, GEPType); } } } // Similarly, transform things like: // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp // (where tmp = 8*tmp2) into: // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast if (GEPEltType->isSized() && SrcEltTy->isSized() && SrcEltTy->isArrayTy()) { // Check that changing to the array element type amounts to dividing the // index by a scale factor. uint64_t ResSize = DL.getTypeAllocSize(GEPEltType); uint64_t ArrayEltSize = DL.getTypeAllocSize(SrcEltTy->getArrayElementType()); if (ResSize && ArrayEltSize % ResSize == 0) { Value *Idx = GEP.getOperand(1); unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits(); uint64_t Scale = ArrayEltSize / ResSize; // Earlier transforms ensure that the index has the right type // according to the Data Layout, which considerably simplifies // the logic by eliminating implicit casts. assert(Idx->getType() == DL.getIndexType(GEPType) && "Index type does not match the Data Layout preferences"); bool NSW; if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) { // Successfully decomposed Idx as NewIdx * Scale, form a new GEP. // If the multiplication NewIdx * Scale may overflow then the new // GEP may not be "inbounds". Type *IndTy = DL.getIndexType(GEPType); Value *Off[2] = {Constant::getNullValue(IndTy), NewIdx}; Value *NewGEP = GEP.isInBounds() && NSW ? Builder.CreateInBoundsGEP( SrcEltTy, StrippedPtr, Off, GEP.getName()) : Builder.CreateGEP(SrcEltTy, StrippedPtr, Off, GEP.getName()); // The NewGEP must be pointer typed, so must the old one -> BitCast return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, GEPType); } } } } } // addrspacecast between types is canonicalized as a bitcast, then an // addrspacecast. To take advantage of the below bitcast + struct GEP, look // through the addrspacecast. Value *ASCStrippedPtrOp = PtrOp; if (auto *ASC = dyn_cast(PtrOp)) { // X = bitcast A addrspace(1)* to B addrspace(1)* // Y = addrspacecast A addrspace(1)* to B addrspace(2)* // Z = gep Y, <...constant indices...> // Into an addrspacecasted GEP of the struct. if (auto *BC = dyn_cast(ASC->getOperand(0))) ASCStrippedPtrOp = BC; } if (auto *BCI = dyn_cast(ASCStrippedPtrOp)) { Value *SrcOp = BCI->getOperand(0); PointerType *SrcType = cast(BCI->getSrcTy()); Type *SrcEltType = SrcType->getElementType(); // GEP directly using the source operand if this GEP is accessing an element // of a bitcasted pointer to vector or array of the same dimensions: // gep (bitcast * X to [c x ty]*), Y, Z --> gep X, Y, Z // gep (bitcast [c x ty]* X to *), Y, Z --> gep X, Y, Z auto areMatchingArrayAndVecTypes = [](Type *ArrTy, Type *VecTy) { return ArrTy->getArrayElementType() == VecTy->getVectorElementType() && ArrTy->getArrayNumElements() == VecTy->getVectorNumElements(); }; if (GEP.getNumOperands() == 3 && ((GEPEltType->isArrayTy() && SrcEltType->isVectorTy() && areMatchingArrayAndVecTypes(GEPEltType, SrcEltType)) || (GEPEltType->isVectorTy() && SrcEltType->isArrayTy() && areMatchingArrayAndVecTypes(SrcEltType, GEPEltType)))) { // Create a new GEP here, as using `setOperand()` followed by // `setSourceElementType()` won't actually update the type of the // existing GEP Value. Causing issues if this Value is accessed when // constructing an AddrSpaceCastInst Value *NGEP = GEP.isInBounds() ? Builder.CreateInBoundsGEP(nullptr, SrcOp, {Ops[1], Ops[2]}) : Builder.CreateGEP(nullptr, SrcOp, {Ops[1], Ops[2]}); NGEP->takeName(&GEP); // Preserve GEP address space to satisfy users if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace()) return new AddrSpaceCastInst(NGEP, GEPType); return replaceInstUsesWith(GEP, NGEP); } // See if we can simplify: // X = bitcast A* to B* // Y = gep X, <...constant indices...> // into a gep of the original struct. This is important for SROA and alias // analysis of unions. If "A" is also a bitcast, wait for A/X to be merged. unsigned OffsetBits = DL.getIndexTypeSizeInBits(GEPType); APInt Offset(OffsetBits, 0); if (!isa(SrcOp) && GEP.accumulateConstantOffset(DL, Offset)) { // If this GEP instruction doesn't move the pointer, just replace the GEP // with a bitcast of the real input to the dest type. if (!Offset) { // If the bitcast is of an allocation, and the allocation will be // converted to match the type of the cast, don't touch this. if (isa(SrcOp) || isAllocationFn(SrcOp, &TLI)) { // See if the bitcast simplifies, if so, don't nuke this GEP yet. if (Instruction *I = visitBitCast(*BCI)) { if (I != BCI) { I->takeName(BCI); BCI->getParent()->getInstList().insert(BCI->getIterator(), I); replaceInstUsesWith(*BCI, I); } return &GEP; } } if (SrcType->getPointerAddressSpace() != GEP.getAddressSpace()) return new AddrSpaceCastInst(SrcOp, GEPType); return new BitCastInst(SrcOp, GEPType); } // Otherwise, if the offset is non-zero, we need to find out if there is a // field at Offset in 'A's type. If so, we can pull the cast through the // GEP. SmallVector NewIndices; if (FindElementAtOffset(SrcType, Offset.getSExtValue(), NewIndices)) { Value *NGEP = GEP.isInBounds() ? Builder.CreateInBoundsGEP(nullptr, SrcOp, NewIndices) : Builder.CreateGEP(nullptr, SrcOp, NewIndices); if (NGEP->getType() == GEPType) return replaceInstUsesWith(GEP, NGEP); NGEP->takeName(&GEP); if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace()) return new AddrSpaceCastInst(NGEP, GEPType); return new BitCastInst(NGEP, GEPType); } } } if (!GEP.isInBounds()) { unsigned IdxWidth = DL.getIndexSizeInBits(PtrOp->getType()->getPointerAddressSpace()); APInt BasePtrOffset(IdxWidth, 0); Value *UnderlyingPtrOp = PtrOp->stripAndAccumulateInBoundsConstantOffsets(DL, BasePtrOffset); if (auto *AI = dyn_cast(UnderlyingPtrOp)) { if (GEP.accumulateConstantOffset(DL, BasePtrOffset) && BasePtrOffset.isNonNegative()) { APInt AllocSize(IdxWidth, DL.getTypeAllocSize(AI->getAllocatedType())); if (BasePtrOffset.ule(AllocSize)) { return GetElementPtrInst::CreateInBounds( PtrOp, makeArrayRef(Ops).slice(1), GEP.getName()); } } } } return nullptr; } static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo *TLI, Instruction *AI) { if (isa(V)) return true; if (auto *LI = dyn_cast(V)) return isa(LI->getPointerOperand()); // Two distinct allocations will never be equal. // We rely on LookThroughBitCast in isAllocLikeFn being false, since looking // through bitcasts of V can cause // the result statement below to be true, even when AI and V (ex: // i8* ->i32* ->i8* of AI) are the same allocations. return isAllocLikeFn(V, TLI) && V != AI; } static bool isAllocSiteRemovable(Instruction *AI, SmallVectorImpl &Users, const TargetLibraryInfo *TLI) { SmallVector Worklist; Worklist.push_back(AI); do { Instruction *PI = Worklist.pop_back_val(); for (User *U : PI->users()) { Instruction *I = cast(U); switch (I->getOpcode()) { default: // Give up the moment we see something we can't handle. return false; case Instruction::AddrSpaceCast: case Instruction::BitCast: case Instruction::GetElementPtr: Users.emplace_back(I); Worklist.push_back(I); continue; case Instruction::ICmp: { ICmpInst *ICI = cast(I); // We can fold eq/ne comparisons with null to false/true, respectively. // We also fold comparisons in some conditions provided the alloc has // not escaped (see isNeverEqualToUnescapedAlloc). if (!ICI->isEquality()) return false; unsigned OtherIndex = (ICI->getOperand(0) == PI) ? 1 : 0; if (!isNeverEqualToUnescapedAlloc(ICI->getOperand(OtherIndex), TLI, AI)) return false; Users.emplace_back(I); continue; } case Instruction::Call: // Ignore no-op and store intrinsics. if (IntrinsicInst *II = dyn_cast(I)) { switch (II->getIntrinsicID()) { default: return false; case Intrinsic::memmove: case Intrinsic::memcpy: case Intrinsic::memset: { MemIntrinsic *MI = cast(II); if (MI->isVolatile() || MI->getRawDest() != PI) return false; LLVM_FALLTHROUGH; } case Intrinsic::invariant_start: case Intrinsic::invariant_end: case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: case Intrinsic::objectsize: Users.emplace_back(I); continue; } } if (isFreeCall(I, TLI)) { Users.emplace_back(I); continue; } return false; case Instruction::Store: { StoreInst *SI = cast(I); if (SI->isVolatile() || SI->getPointerOperand() != PI) return false; Users.emplace_back(I); continue; } } llvm_unreachable("missing a return?"); } } while (!Worklist.empty()); return true; } Instruction *InstCombiner::visitAllocSite(Instruction &MI) { // If we have a malloc call which is only used in any amount of comparisons to // null and free calls, delete the calls and replace the comparisons with true // or false as appropriate. // This is based on the principle that we can substitute our own allocation // function (which will never return null) rather than knowledge of the // specific function being called. In some sense this can change the permitted // outputs of a program (when we convert a malloc to an alloca, the fact that // the allocation is now on the stack is potentially visible, for example), // but we believe in a permissible manner. SmallVector Users; // If we are removing an alloca with a dbg.declare, insert dbg.value calls // before each store. TinyPtrVector DIIs; std::unique_ptr DIB; if (isa(MI)) { DIIs = FindDbgAddrUses(&MI); DIB.reset(new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false)); } if (isAllocSiteRemovable(&MI, Users, &TLI)) { for (unsigned i = 0, e = Users.size(); i != e; ++i) { // Lowering all @llvm.objectsize calls first because they may // use a bitcast/GEP of the alloca we are removing. if (!Users[i]) continue; Instruction *I = cast(&*Users[i]); if (IntrinsicInst *II = dyn_cast(I)) { if (II->getIntrinsicID() == Intrinsic::objectsize) { ConstantInt *Result = lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/true); replaceInstUsesWith(*I, Result); eraseInstFromFunction(*I); Users[i] = nullptr; // Skip examining in the next loop. } } } for (unsigned i = 0, e = Users.size(); i != e; ++i) { if (!Users[i]) continue; Instruction *I = cast(&*Users[i]); if (ICmpInst *C = dyn_cast(I)) { replaceInstUsesWith(*C, ConstantInt::get(Type::getInt1Ty(C->getContext()), C->isFalseWhenEqual())); } else if (isa(I) || isa(I) || isa(I)) { replaceInstUsesWith(*I, UndefValue::get(I->getType())); } else if (auto *SI = dyn_cast(I)) { for (auto *DII : DIIs) ConvertDebugDeclareToDebugValue(DII, SI, *DIB); } eraseInstFromFunction(*I); } if (InvokeInst *II = dyn_cast(&MI)) { // Replace invoke with a NOP intrinsic to maintain the original CFG Module *M = II->getModule(); Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing); InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(), None, "", II->getParent()); } for (auto *DII : DIIs) eraseInstFromFunction(*DII); return eraseInstFromFunction(MI); } return nullptr; } /// Move the call to free before a NULL test. /// /// Check if this free is accessed after its argument has been test /// against NULL (property 0). /// If yes, it is legal to move this call in its predecessor block. /// /// The move is performed only if the block containing the call to free /// will be removed, i.e.: /// 1. it has only one predecessor P, and P has two successors /// 2. it contains the call, noops, and an unconditional branch /// 3. its successor is the same as its predecessor's successor /// /// The profitability is out-of concern here and this function should /// be called only if the caller knows this transformation would be /// profitable (e.g., for code size). static Instruction *tryToMoveFreeBeforeNullTest(CallInst &FI, const DataLayout &DL) { Value *Op = FI.getArgOperand(0); BasicBlock *FreeInstrBB = FI.getParent(); BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor(); // Validate part of constraint #1: Only one predecessor // FIXME: We can extend the number of predecessor, but in that case, we // would duplicate the call to free in each predecessor and it may // not be profitable even for code size. if (!PredBB) return nullptr; // Validate constraint #2: Does this block contains only the call to // free, noops, and an unconditional branch? BasicBlock *SuccBB; Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator(); if (!match(FreeInstrBBTerminator, m_UnconditionalBr(SuccBB))) return nullptr; // If there are only 2 instructions in the block, at this point, // this is the call to free and unconditional. // If there are more than 2 instructions, check that they are noops // i.e., they won't hurt the performance of the generated code. if (FreeInstrBB->size() != 2) { for (const Instruction &Inst : *FreeInstrBB) { if (&Inst == &FI || &Inst == FreeInstrBBTerminator) continue; auto *Cast = dyn_cast(&Inst); if (!Cast || !Cast->isNoopCast(DL)) return nullptr; } } // Validate the rest of constraint #1 by matching on the pred branch. Instruction *TI = PredBB->getTerminator(); BasicBlock *TrueBB, *FalseBB; ICmpInst::Predicate Pred; if (!match(TI, m_Br(m_ICmp(Pred, m_CombineOr(m_Specific(Op), m_Specific(Op->stripPointerCasts())), m_Zero()), TrueBB, FalseBB))) return nullptr; if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE) return nullptr; // Validate constraint #3: Ensure the null case just falls through. if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB)) return nullptr; assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) && "Broken CFG: missing edge from predecessor to successor"); // At this point, we know that everything in FreeInstrBB can be moved // before TI. for (BasicBlock::iterator It = FreeInstrBB->begin(), End = FreeInstrBB->end(); It != End;) { Instruction &Instr = *It++; if (&Instr == FreeInstrBBTerminator) break; Instr.moveBefore(TI); } assert(FreeInstrBB->size() == 1 && "Only the branch instruction should remain"); return &FI; } Instruction *InstCombiner::visitFree(CallInst &FI) { Value *Op = FI.getArgOperand(0); // free undef -> unreachable. if (isa(Op)) { // Insert a new store to null because we cannot modify the CFG here. Builder.CreateStore(ConstantInt::getTrue(FI.getContext()), UndefValue::get(Type::getInt1PtrTy(FI.getContext()))); return eraseInstFromFunction(FI); } // If we have 'free null' delete the instruction. This can happen in stl code // when lots of inlining happens. if (isa(Op)) return eraseInstFromFunction(FI); // If we optimize for code size, try to move the call to free before the null // test so that simplify cfg can remove the empty block and dead code // elimination the branch. I.e., helps to turn something like: // if (foo) free(foo); // into // free(foo); if (MinimizeSize) if (Instruction *I = tryToMoveFreeBeforeNullTest(FI, DL)) return I; return nullptr; } Instruction *InstCombiner::visitReturnInst(ReturnInst &RI) { if (RI.getNumOperands() == 0) // ret void return nullptr; Value *ResultOp = RI.getOperand(0); Type *VTy = ResultOp->getType(); if (!VTy->isIntegerTy()) return nullptr; // There might be assume intrinsics dominating this return that completely // determine the value. If so, constant fold it. KnownBits Known = computeKnownBits(ResultOp, 0, &RI); if (Known.isConstant()) RI.setOperand(0, Constant::getIntegerValue(VTy, Known.getConstant())); return nullptr; } Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { // Change br (not X), label True, label False to: br X, label False, True Value *X = nullptr; BasicBlock *TrueDest; BasicBlock *FalseDest; if (match(&BI, m_Br(m_Not(m_Value(X)), TrueDest, FalseDest)) && !isa(X)) { // Swap Destinations and condition... BI.setCondition(X); BI.swapSuccessors(); return &BI; } // If the condition is irrelevant, remove the use so that other // transforms on the condition become more effective. if (BI.isConditional() && !isa(BI.getCondition()) && BI.getSuccessor(0) == BI.getSuccessor(1)) { BI.setCondition(ConstantInt::getFalse(BI.getCondition()->getType())); return &BI; } // Canonicalize, for example, icmp_ne -> icmp_eq or fcmp_one -> fcmp_oeq. CmpInst::Predicate Pred; if (match(&BI, m_Br(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), TrueDest, FalseDest)) && !isCanonicalPredicate(Pred)) { // Swap destinations and condition. CmpInst *Cond = cast(BI.getCondition()); Cond->setPredicate(CmpInst::getInversePredicate(Pred)); BI.swapSuccessors(); Worklist.Add(Cond); return &BI; } return nullptr; } Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { Value *Cond = SI.getCondition(); Value *Op0; ConstantInt *AddRHS; if (match(Cond, m_Add(m_Value(Op0), m_ConstantInt(AddRHS)))) { // Change 'switch (X+4) case 1:' into 'switch (X) case -3'. for (auto Case : SI.cases()) { Constant *NewCase = ConstantExpr::getSub(Case.getCaseValue(), AddRHS); assert(isa(NewCase) && "Result of expression should be constant"); Case.setValue(cast(NewCase)); } SI.setCondition(Op0); return &SI; } KnownBits Known = computeKnownBits(Cond, 0, &SI); unsigned LeadingKnownZeros = Known.countMinLeadingZeros(); unsigned LeadingKnownOnes = Known.countMinLeadingOnes(); // Compute the number of leading bits we can ignore. // TODO: A better way to determine this would use ComputeNumSignBits(). for (auto &C : SI.cases()) { LeadingKnownZeros = std::min( LeadingKnownZeros, C.getCaseValue()->getValue().countLeadingZeros()); LeadingKnownOnes = std::min( LeadingKnownOnes, C.getCaseValue()->getValue().countLeadingOnes()); } unsigned NewWidth = Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes); // Shrink the condition operand if the new type is smaller than the old type. // But do not shrink to a non-standard type, because backend can't generate // good code for that yet. // TODO: We can make it aggressive again after fixing PR39569. if (NewWidth > 0 && NewWidth < Known.getBitWidth() && shouldChangeType(Known.getBitWidth(), NewWidth)) { IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth); Builder.SetInsertPoint(&SI); Value *NewCond = Builder.CreateTrunc(Cond, Ty, "trunc"); SI.setCondition(NewCond); for (auto Case : SI.cases()) { APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth); Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase)); } return &SI; } return nullptr; } Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { Value *Agg = EV.getAggregateOperand(); if (!EV.hasIndices()) return replaceInstUsesWith(EV, Agg); if (Value *V = SimplifyExtractValueInst(Agg, EV.getIndices(), SQ.getWithInstruction(&EV))) return replaceInstUsesWith(EV, V); if (InsertValueInst *IV = dyn_cast(Agg)) { // We're extracting from an insertvalue instruction, compare the indices const unsigned *exti, *exte, *insi, *inse; for (exti = EV.idx_begin(), insi = IV->idx_begin(), exte = EV.idx_end(), inse = IV->idx_end(); exti != exte && insi != inse; ++exti, ++insi) { if (*insi != *exti) // The insert and extract both reference distinctly different elements. // This means the extract is not influenced by the insert, and we can // replace the aggregate operand of the extract with the aggregate // operand of the insert. i.e., replace // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1 // %E = extractvalue { i32, { i32 } } %I, 0 // with // %E = extractvalue { i32, { i32 } } %A, 0 return ExtractValueInst::Create(IV->getAggregateOperand(), EV.getIndices()); } if (exti == exte && insi == inse) // Both iterators are at the end: Index lists are identical. Replace // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0 // %C = extractvalue { i32, { i32 } } %B, 1, 0 // with "i32 42" return replaceInstUsesWith(EV, IV->getInsertedValueOperand()); if (exti == exte) { // The extract list is a prefix of the insert list. i.e. replace // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0 // %E = extractvalue { i32, { i32 } } %I, 1 // with // %X = extractvalue { i32, { i32 } } %A, 1 // %E = insertvalue { i32 } %X, i32 42, 0 // by switching the order of the insert and extract (though the // insertvalue should be left in, since it may have other uses). Value *NewEV = Builder.CreateExtractValue(IV->getAggregateOperand(), EV.getIndices()); return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(), makeArrayRef(insi, inse)); } if (insi == inse) // The insert list is a prefix of the extract list // We can simply remove the common indices from the extract and make it // operate on the inserted value instead of the insertvalue result. // i.e., replace // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1 // %E = extractvalue { i32, { i32 } } %I, 1, 0 // with // %E extractvalue { i32 } { i32 42 }, 0 return ExtractValueInst::Create(IV->getInsertedValueOperand(), makeArrayRef(exti, exte)); } if (IntrinsicInst *II = dyn_cast(Agg)) { // We're extracting from an intrinsic, see if we're the only user, which // allows us to simplify multiple result intrinsics to simpler things that // just get one value. if (II->hasOneUse()) { // Check if we're grabbing the overflow bit or the result of a 'with // overflow' intrinsic. If it's the latter we can remove the intrinsic // and replace it with a traditional binary instruction. switch (II->getIntrinsicID()) { case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: if (*EV.idx_begin() == 0) { // Normal result. Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); replaceInstUsesWith(*II, UndefValue::get(II->getType())); eraseInstFromFunction(*II); return BinaryOperator::CreateAdd(LHS, RHS); } // If the normal result of the add is dead, and the RHS is a constant, // we can transform this into a range comparison. // overflow = uadd a, -4 --> overflow = icmp ugt a, 3 if (II->getIntrinsicID() == Intrinsic::uadd_with_overflow) if (ConstantInt *CI = dyn_cast(II->getArgOperand(1))) return new ICmpInst(ICmpInst::ICMP_UGT, II->getArgOperand(0), ConstantExpr::getNot(CI)); break; case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: if (*EV.idx_begin() == 0) { // Normal result. Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); replaceInstUsesWith(*II, UndefValue::get(II->getType())); eraseInstFromFunction(*II); return BinaryOperator::CreateSub(LHS, RHS); } break; case Intrinsic::umul_with_overflow: case Intrinsic::smul_with_overflow: if (*EV.idx_begin() == 0) { // Normal result. Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); replaceInstUsesWith(*II, UndefValue::get(II->getType())); eraseInstFromFunction(*II); return BinaryOperator::CreateMul(LHS, RHS); } break; default: break; } } } if (LoadInst *L = dyn_cast(Agg)) // If the (non-volatile) load only has one use, we can rewrite this to a // load from a GEP. This reduces the size of the load. If a load is used // only by extractvalue instructions then this either must have been // optimized before, or it is a struct with padding, in which case we // don't want to do the transformation as it loses padding knowledge. if (L->isSimple() && L->hasOneUse()) { // extractvalue has integer indices, getelementptr has Value*s. Convert. SmallVector Indices; // Prefix an i32 0 since we need the first element. Indices.push_back(Builder.getInt32(0)); for (ExtractValueInst::idx_iterator I = EV.idx_begin(), E = EV.idx_end(); I != E; ++I) Indices.push_back(Builder.getInt32(*I)); // We need to insert these at the location of the old load, not at that of // the extractvalue. Builder.SetInsertPoint(L); Value *GEP = Builder.CreateInBoundsGEP(L->getType(), L->getPointerOperand(), Indices); Instruction *NL = Builder.CreateLoad(GEP); // Whatever aliasing information we had for the orignal load must also // hold for the smaller load, so propagate the annotations. AAMDNodes Nodes; L->getAAMetadata(Nodes); NL->setAAMetadata(Nodes); // Returning the load directly will cause the main loop to insert it in // the wrong spot, so use replaceInstUsesWith(). return replaceInstUsesWith(EV, NL); } // We could simplify extracts from other values. Note that nested extracts may // already be simplified implicitly by the above: extract (extract (insert) ) // will be translated into extract ( insert ( extract ) ) first and then just // the value inserted, if appropriate. Similarly for extracts from single-use // loads: extract (extract (load)) will be translated to extract (load (gep)) // and if again single-use then via load (gep (gep)) to load (gep). // However, double extracts from e.g. function arguments or return values // aren't handled yet. return nullptr; } /// Return 'true' if the given typeinfo will match anything. static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) { switch (Personality) { case EHPersonality::GNU_C: case EHPersonality::GNU_C_SjLj: case EHPersonality::Rust: // The GCC C EH and Rust personality only exists to support cleanups, so // it's not clear what the semantics of catch clauses are. return false; case EHPersonality::Unknown: return false; case EHPersonality::GNU_Ada: // While __gnat_all_others_value will match any Ada exception, it doesn't // match foreign exceptions (or didn't, before gcc-4.7). return false; case EHPersonality::GNU_CXX: case EHPersonality::GNU_CXX_SjLj: case EHPersonality::GNU_ObjC: case EHPersonality::MSVC_X86SEH: case EHPersonality::MSVC_Win64SEH: case EHPersonality::MSVC_CXX: case EHPersonality::CoreCLR: case EHPersonality::Wasm_CXX: return TypeInfo->isNullValue(); } llvm_unreachable("invalid enum"); } static bool shorter_filter(const Value *LHS, const Value *RHS) { return cast(LHS->getType())->getNumElements() < cast(RHS->getType())->getNumElements(); } Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) { // The logic here should be correct for any real-world personality function. // However if that turns out not to be true, the offending logic can always // be conditioned on the personality function, like the catch-all logic is. EHPersonality Personality = classifyEHPersonality(LI.getParent()->getParent()->getPersonalityFn()); // Simplify the list of clauses, eg by removing repeated catch clauses // (these are often created by inlining). bool MakeNewInstruction = false; // If true, recreate using the following: SmallVector NewClauses; // - Clauses for the new instruction; bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup. SmallPtrSet AlreadyCaught; // Typeinfos known caught already. for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) { bool isLastClause = i + 1 == e; if (LI.isCatch(i)) { // A catch clause. Constant *CatchClause = LI.getClause(i); Constant *TypeInfo = CatchClause->stripPointerCasts(); // If we already saw this clause, there is no point in having a second // copy of it. if (AlreadyCaught.insert(TypeInfo).second) { // This catch clause was not already seen. NewClauses.push_back(CatchClause); } else { // Repeated catch clause - drop the redundant copy. MakeNewInstruction = true; } // If this is a catch-all then there is no point in keeping any following // clauses or marking the landingpad as having a cleanup. if (isCatchAll(Personality, TypeInfo)) { if (!isLastClause) MakeNewInstruction = true; CleanupFlag = false; break; } } else { // A filter clause. If any of the filter elements were already caught // then they can be dropped from the filter. It is tempting to try to // exploit the filter further by saying that any typeinfo that does not // occur in the filter can't be caught later (and thus can be dropped). // However this would be wrong, since typeinfos can match without being // equal (for example if one represents a C++ class, and the other some // class derived from it). assert(LI.isFilter(i) && "Unsupported landingpad clause!"); Constant *FilterClause = LI.getClause(i); ArrayType *FilterType = cast(FilterClause->getType()); unsigned NumTypeInfos = FilterType->getNumElements(); // An empty filter catches everything, so there is no point in keeping any // following clauses or marking the landingpad as having a cleanup. By // dealing with this case here the following code is made a bit simpler. if (!NumTypeInfos) { NewClauses.push_back(FilterClause); if (!isLastClause) MakeNewInstruction = true; CleanupFlag = false; break; } bool MakeNewFilter = false; // If true, make a new filter. SmallVector NewFilterElts; // New elements. if (isa(FilterClause)) { // Not an empty filter - it contains at least one null typeinfo. assert(NumTypeInfos > 0 && "Should have handled empty filter already!"); Constant *TypeInfo = Constant::getNullValue(FilterType->getElementType()); // If this typeinfo is a catch-all then the filter can never match. if (isCatchAll(Personality, TypeInfo)) { // Throw the filter away. MakeNewInstruction = true; continue; } // There is no point in having multiple copies of this typeinfo, so // discard all but the first copy if there is more than one. NewFilterElts.push_back(TypeInfo); if (NumTypeInfos > 1) MakeNewFilter = true; } else { ConstantArray *Filter = cast(FilterClause); SmallPtrSet SeenInFilter; // For uniquing the elements. NewFilterElts.reserve(NumTypeInfos); // Remove any filter elements that were already caught or that already // occurred in the filter. While there, see if any of the elements are // catch-alls. If so, the filter can be discarded. bool SawCatchAll = false; for (unsigned j = 0; j != NumTypeInfos; ++j) { Constant *Elt = Filter->getOperand(j); Constant *TypeInfo = Elt->stripPointerCasts(); if (isCatchAll(Personality, TypeInfo)) { // This element is a catch-all. Bail out, noting this fact. SawCatchAll = true; break; } // Even if we've seen a type in a catch clause, we don't want to // remove it from the filter. An unexpected type handler may be // set up for a call site which throws an exception of the same // type caught. In order for the exception thrown by the unexpected // handler to propagate correctly, the filter must be correctly // described for the call site. // // Example: // // void unexpected() { throw 1;} // void foo() throw (int) { // std::set_unexpected(unexpected); // try { // throw 2.0; // } catch (int i) {} // } // There is no point in having multiple copies of the same typeinfo in // a filter, so only add it if we didn't already. if (SeenInFilter.insert(TypeInfo).second) NewFilterElts.push_back(cast(Elt)); } // A filter containing a catch-all cannot match anything by definition. if (SawCatchAll) { // Throw the filter away. MakeNewInstruction = true; continue; } // If we dropped something from the filter, make a new one. if (NewFilterElts.size() < NumTypeInfos) MakeNewFilter = true; } if (MakeNewFilter) { FilterType = ArrayType::get(FilterType->getElementType(), NewFilterElts.size()); FilterClause = ConstantArray::get(FilterType, NewFilterElts); MakeNewInstruction = true; } NewClauses.push_back(FilterClause); // If the new filter is empty then it will catch everything so there is // no point in keeping any following clauses or marking the landingpad // as having a cleanup. The case of the original filter being empty was // already handled above. if (MakeNewFilter && !NewFilterElts.size()) { assert(MakeNewInstruction && "New filter but not a new instruction!"); CleanupFlag = false; break; } } } // If several filters occur in a row then reorder them so that the shortest // filters come first (those with the smallest number of elements). This is // advantageous because shorter filters are more likely to match, speeding up // unwinding, but mostly because it increases the effectiveness of the other // filter optimizations below. for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) { unsigned j; // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters. for (j = i; j != e; ++j) if (!isa(NewClauses[j]->getType())) break; // Check whether the filters are already sorted by length. We need to know // if sorting them is actually going to do anything so that we only make a // new landingpad instruction if it does. for (unsigned k = i; k + 1 < j; ++k) if (shorter_filter(NewClauses[k+1], NewClauses[k])) { // Not sorted, so sort the filters now. Doing an unstable sort would be // correct too but reordering filters pointlessly might confuse users. std::stable_sort(NewClauses.begin() + i, NewClauses.begin() + j, shorter_filter); MakeNewInstruction = true; break; } // Look for the next batch of filters. i = j + 1; } // If typeinfos matched if and only if equal, then the elements of a filter L // that occurs later than a filter F could be replaced by the intersection of // the elements of F and L. In reality two typeinfos can match without being // equal (for example if one represents a C++ class, and the other some class // derived from it) so it would be wrong to perform this transform in general. // However the transform is correct and useful if F is a subset of L. In that // case L can be replaced by F, and thus removed altogether since repeating a // filter is pointless. So here we look at all pairs of filters F and L where // L follows F in the list of clauses, and remove L if every element of F is // an element of L. This can occur when inlining C++ functions with exception // specifications. for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) { // Examine each filter in turn. Value *Filter = NewClauses[i]; ArrayType *FTy = dyn_cast(Filter->getType()); if (!FTy) // Not a filter - skip it. continue; unsigned FElts = FTy->getNumElements(); // Examine each filter following this one. Doing this backwards means that // we don't have to worry about filters disappearing under us when removed. for (unsigned j = NewClauses.size() - 1; j != i; --j) { Value *LFilter = NewClauses[j]; ArrayType *LTy = dyn_cast(LFilter->getType()); if (!LTy) // Not a filter - skip it. continue; // If Filter is a subset of LFilter, i.e. every element of Filter is also // an element of LFilter, then discard LFilter. SmallVectorImpl::iterator J = NewClauses.begin() + j; // If Filter is empty then it is a subset of LFilter. if (!FElts) { // Discard LFilter. NewClauses.erase(J); MakeNewInstruction = true; // Move on to the next filter. continue; } unsigned LElts = LTy->getNumElements(); // If Filter is longer than LFilter then it cannot be a subset of it. if (FElts > LElts) // Move on to the next filter. continue; // At this point we know that LFilter has at least one element. if (isa(LFilter)) { // LFilter only contains zeros. // Filter is a subset of LFilter iff Filter contains only zeros (as we // already know that Filter is not longer than LFilter). if (isa(Filter)) { assert(FElts <= LElts && "Should have handled this case earlier!"); // Discard LFilter. NewClauses.erase(J); MakeNewInstruction = true; } // Move on to the next filter. continue; } ConstantArray *LArray = cast(LFilter); if (isa(Filter)) { // Filter only contains zeros. // Since Filter is non-empty and contains only zeros, it is a subset of // LFilter iff LFilter contains a zero. assert(FElts > 0 && "Should have eliminated the empty filter earlier!"); for (unsigned l = 0; l != LElts; ++l) if (LArray->getOperand(l)->isNullValue()) { // LFilter contains a zero - discard it. NewClauses.erase(J); MakeNewInstruction = true; break; } // Move on to the next filter. continue; } // At this point we know that both filters are ConstantArrays. Loop over // operands to see whether every element of Filter is also an element of // LFilter. Since filters tend to be short this is probably faster than // using a method that scales nicely. ConstantArray *FArray = cast(Filter); bool AllFound = true; for (unsigned f = 0; f != FElts; ++f) { Value *FTypeInfo = FArray->getOperand(f)->stripPointerCasts(); AllFound = false; for (unsigned l = 0; l != LElts; ++l) { Value *LTypeInfo = LArray->getOperand(l)->stripPointerCasts(); if (LTypeInfo == FTypeInfo) { AllFound = true; break; } } if (!AllFound) break; } if (AllFound) { // Discard LFilter. NewClauses.erase(J); MakeNewInstruction = true; } // Move on to the next filter. } } // If we changed any of the clauses, replace the old landingpad instruction // with a new one. if (MakeNewInstruction) { LandingPadInst *NLI = LandingPadInst::Create(LI.getType(), NewClauses.size()); for (unsigned i = 0, e = NewClauses.size(); i != e; ++i) NLI->addClause(NewClauses[i]); // A landing pad with no clauses must have the cleanup flag set. It is // theoretically possible, though highly unlikely, that we eliminated all // clauses. If so, force the cleanup flag to true. if (NewClauses.empty()) CleanupFlag = true; NLI->setCleanup(CleanupFlag); return NLI; } // Even if none of the clauses changed, we may nonetheless have understood // that the cleanup flag is pointless. Clear it if so. if (LI.isCleanup() != CleanupFlag) { assert(!CleanupFlag && "Adding a cleanup, not removing one?!"); LI.setCleanup(CleanupFlag); return &LI; } return nullptr; } /// Try to move the specified instruction from its current block into the /// beginning of DestBlock, which can only happen if it's safe to move the /// instruction past all of the instructions between it and the end of its /// block. static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { assert(I->hasOneUse() && "Invariants didn't hold!"); BasicBlock *SrcBlock = I->getParent(); // Cannot move control-flow-involving, volatile loads, vaarg, etc. if (isa(I) || I->isEHPad() || I->mayHaveSideEffects() || I->isTerminator()) return false; // Do not sink static or dynamic alloca instructions. Static allocas must // remain in the entry block, and dynamic allocas must not be sunk in between // a stacksave / stackrestore pair, which would incorrectly shorten its // lifetime. if (isa(I)) return false; // Do not sink into catchswitch blocks. if (isa(DestBlock->getTerminator())) return false; // Do not sink convergent call instructions. if (auto *CI = dyn_cast(I)) { if (CI->isConvergent()) return false; } // We can only sink load instructions if there is nothing between the load and // the end of block that could change the value. if (I->mayReadFromMemory()) { for (BasicBlock::iterator Scan = I->getIterator(), E = I->getParent()->end(); Scan != E; ++Scan) if (Scan->mayWriteToMemory()) return false; } BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt(); I->moveBefore(&*InsertPos); ++NumSunkInst; // Also sink all related debug uses from the source basic block. Otherwise we // get debug use before the def. SmallVector DbgUsers; findDbgUsers(DbgUsers, I); for (auto *DII : DbgUsers) { if (DII->getParent() == SrcBlock) { DII->moveBefore(&*InsertPos); LLVM_DEBUG(dbgs() << "SINK: " << *DII << '\n'); } } return true; } bool InstCombiner::run() { while (!Worklist.isEmpty()) { Instruction *I = Worklist.RemoveOne(); if (I == nullptr) continue; // skip null values. // Check to see if we can DCE the instruction. if (isInstructionTriviallyDead(I, &TLI)) { LLVM_DEBUG(dbgs() << "IC: DCE: " << *I << '\n'); eraseInstFromFunction(*I); ++NumDeadInst; MadeIRChange = true; continue; } if (!DebugCounter::shouldExecute(VisitCounter)) continue; // Instruction isn't dead, see if we can constant propagate it. if (!I->use_empty() && (I->getNumOperands() == 0 || isa(I->getOperand(0)))) { if (Constant *C = ConstantFoldInstruction(I, DL, &TLI)) { LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n'); // Add operands to the worklist. replaceInstUsesWith(*I, C); ++NumConstProp; if (isInstructionTriviallyDead(I, &TLI)) eraseInstFromFunction(*I); MadeIRChange = true; continue; } } // In general, it is possible for computeKnownBits to determine all bits in // a value even when the operands are not all constants. Type *Ty = I->getType(); if (ExpensiveCombines && !I->use_empty() && Ty->isIntOrIntVectorTy()) { KnownBits Known = computeKnownBits(I, /*Depth*/0, I); if (Known.isConstant()) { Constant *C = ConstantInt::get(Ty, Known.getConstant()); LLVM_DEBUG(dbgs() << "IC: ConstFold (all bits known) to: " << *C << " from: " << *I << '\n'); // Add operands to the worklist. replaceInstUsesWith(*I, C); ++NumConstProp; if (isInstructionTriviallyDead(I, &TLI)) eraseInstFromFunction(*I); MadeIRChange = true; continue; } } // See if we can trivially sink this instruction to a successor basic block. if (EnableCodeSinking && I->hasOneUse()) { BasicBlock *BB = I->getParent(); Instruction *UserInst = cast(*I->user_begin()); BasicBlock *UserParent; // Get the block the use occurs in. if (PHINode *PN = dyn_cast(UserInst)) UserParent = PN->getIncomingBlock(*I->use_begin()); else UserParent = UserInst->getParent(); if (UserParent != BB) { bool UserIsSuccessor = false; // See if the user is one of our successors. for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) if (*SI == UserParent) { UserIsSuccessor = true; break; } // If the user is one of our immediate successors, and if that successor // only has us as a predecessors (we'd have to split the critical edge // otherwise), we can keep going. if (UserIsSuccessor && UserParent->getUniquePredecessor()) { // Okay, the CFG is simple enough, try to sink this instruction. if (TryToSinkInstruction(I, UserParent)) { LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n'); MadeIRChange = true; // We'll add uses of the sunk instruction below, but since sinking // can expose opportunities for it's *operands* add them to the // worklist for (Use &U : I->operands()) if (Instruction *OpI = dyn_cast(U.get())) Worklist.Add(OpI); } } } } // Now that we have an instruction, try combining it to simplify it. Builder.SetInsertPoint(I); Builder.SetCurrentDebugLocation(I->getDebugLoc()); #ifndef NDEBUG std::string OrigI; #endif LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str();); LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n'); if (Instruction *Result = visit(*I)) { ++NumCombined; // Should we replace the old instruction with a new one? if (Result != I) { LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n' << " New = " << *Result << '\n'); if (I->getDebugLoc()) Result->setDebugLoc(I->getDebugLoc()); // Everything uses the new instruction now. I->replaceAllUsesWith(Result); // Move the name to the new instruction first. Result->takeName(I); // Push the new instruction and any users onto the worklist. Worklist.AddUsersToWorkList(*Result); Worklist.Add(Result); // Insert the new instruction into the basic block... BasicBlock *InstParent = I->getParent(); BasicBlock::iterator InsertPos = I->getIterator(); // If we replace a PHI with something that isn't a PHI, fix up the // insertion point. if (!isa(Result) && isa(InsertPos)) InsertPos = InstParent->getFirstInsertionPt(); InstParent->getInstList().insert(InsertPos, Result); eraseInstFromFunction(*I); } else { LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n' << " New = " << *I << '\n'); // If the instruction was modified, it's possible that it is now dead. // if so, remove it. if (isInstructionTriviallyDead(I, &TLI)) { eraseInstFromFunction(*I); } else { Worklist.AddUsersToWorkList(*I); Worklist.Add(I); } } MadeIRChange = true; } } Worklist.Zap(); return MadeIRChange; } /// Walk the function in depth-first order, adding all reachable code to the /// worklist. /// /// This has a couple of tricks to make the code faster and more powerful. In /// particular, we constant fold and DCE instructions as we go, to avoid adding /// them to the worklist (this significantly speeds up instcombine on code where /// many instructions are dead or constant). Additionally, if we find a branch /// whose condition is a known constant, we only visit the reachable successors. static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL, SmallPtrSetImpl &Visited, InstCombineWorklist &ICWorklist, const TargetLibraryInfo *TLI) { bool MadeIRChange = false; SmallVector Worklist; Worklist.push_back(BB); SmallVector InstrsForInstCombineWorklist; DenseMap FoldedConstants; do { BB = Worklist.pop_back_val(); // We have now visited this block! If we've already been here, ignore it. if (!Visited.insert(BB).second) continue; for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { Instruction *Inst = &*BBI++; // DCE instruction if trivially dead. if (isInstructionTriviallyDead(Inst, TLI)) { ++NumDeadInst; LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n'); salvageDebugInfo(*Inst); Inst->eraseFromParent(); MadeIRChange = true; continue; } // ConstantProp instruction if trivially constant. if (!Inst->use_empty() && (Inst->getNumOperands() == 0 || isa(Inst->getOperand(0)))) if (Constant *C = ConstantFoldInstruction(Inst, DL, TLI)) { LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *Inst << '\n'); Inst->replaceAllUsesWith(C); ++NumConstProp; if (isInstructionTriviallyDead(Inst, TLI)) Inst->eraseFromParent(); MadeIRChange = true; continue; } // See if we can constant fold its operands. for (Use &U : Inst->operands()) { if (!isa(U) && !isa(U)) continue; auto *C = cast(U); Constant *&FoldRes = FoldedConstants[C]; if (!FoldRes) FoldRes = ConstantFoldConstant(C, DL, TLI); if (!FoldRes) FoldRes = C; if (FoldRes != C) { LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << *Inst << "\n Old = " << *C << "\n New = " << *FoldRes << '\n'); U = FoldRes; MadeIRChange = true; } } // Skip processing debug intrinsics in InstCombine. Processing these call instructions // consumes non-trivial amount of time and provides no value for the optimization. if (!isa(Inst)) InstrsForInstCombineWorklist.push_back(Inst); } // Recursively visit successors. If this is a branch or switch on a // constant, only visit the reachable successor. Instruction *TI = BB->getTerminator(); if (BranchInst *BI = dyn_cast(TI)) { if (BI->isConditional() && isa(BI->getCondition())) { bool CondVal = cast(BI->getCondition())->getZExtValue(); BasicBlock *ReachableBB = BI->getSuccessor(!CondVal); Worklist.push_back(ReachableBB); continue; } } else if (SwitchInst *SI = dyn_cast(TI)) { if (ConstantInt *Cond = dyn_cast(SI->getCondition())) { Worklist.push_back(SI->findCaseValue(Cond)->getCaseSuccessor()); continue; } } for (BasicBlock *SuccBB : successors(TI)) Worklist.push_back(SuccBB); } while (!Worklist.empty()); // Once we've found all of the instructions to add to instcombine's worklist, // add them in reverse order. This way instcombine will visit from the top // of the function down. This jives well with the way that it adds all uses // of instructions to the worklist after doing a transformation, thus avoiding // some N^2 behavior in pathological cases. ICWorklist.AddInitialGroup(InstrsForInstCombineWorklist); return MadeIRChange; } /// Populate the IC worklist from a function, and prune any dead basic /// blocks discovered in the process. /// /// This also does basic constant propagation and other forward fixing to make /// the combiner itself run much faster. static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL, TargetLibraryInfo *TLI, InstCombineWorklist &ICWorklist) { bool MadeIRChange = false; // Do a depth-first traversal of the function, populate the worklist with // the reachable instructions. Ignore blocks that are not reachable. Keep // track of which blocks we visit. SmallPtrSet Visited; MadeIRChange |= AddReachableCodeToWorklist(&F.front(), DL, Visited, ICWorklist, TLI); // Do a quick scan over the function. If we find any blocks that are // unreachable, remove any instructions inside of them. This prevents // the instcombine code from having to deal with some bad special cases. for (BasicBlock &BB : F) { if (Visited.count(&BB)) continue; unsigned NumDeadInstInBB = removeAllNonTerminatorAndEHPadInstructions(&BB); MadeIRChange |= NumDeadInstInBB > 0; NumDeadInst += NumDeadInstInBB; } return MadeIRChange; } static bool combineInstructionsOverFunction( Function &F, InstCombineWorklist &Worklist, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT, OptimizationRemarkEmitter &ORE, bool ExpensiveCombines = true, LoopInfo *LI = nullptr) { auto &DL = F.getParent()->getDataLayout(); ExpensiveCombines |= EnableExpensiveCombines; /// Builder - This is an IRBuilder that automatically inserts new /// instructions into the worklist when they are created. IRBuilder Builder( F.getContext(), TargetFolder(DL), IRBuilderCallbackInserter([&Worklist, &AC](Instruction *I) { Worklist.Add(I); if (match(I, m_Intrinsic())) AC.registerAssumption(cast(I)); })); // Lower dbg.declare intrinsics otherwise their value may be clobbered // by instcombiner. bool MadeIRChange = false; if (ShouldLowerDbgDeclare) MadeIRChange = LowerDbgDeclare(F); // Iterate while there is work to do. int Iteration = 0; while (true) { ++Iteration; LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " << F.getName() << "\n"); MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist); InstCombiner IC(Worklist, Builder, F.optForMinSize(), ExpensiveCombines, AA, AC, TLI, DT, ORE, DL, LI); IC.MaxArraySizeForCombine = MaxArraySize; if (!IC.run()) break; } return MadeIRChange || Iteration > 1; } PreservedAnalyses InstCombinePass::run(Function &F, FunctionAnalysisManager &AM) { auto &AC = AM.getResult(F); auto &DT = AM.getResult(F); auto &TLI = AM.getResult(F); auto &ORE = AM.getResult(F); auto *LI = AM.getCachedResult(F); auto *AA = &AM.getResult(F); if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE, ExpensiveCombines, LI)) // No changes, all analyses are preserved. return PreservedAnalyses::all(); // Mark all the analyses that instcombine updates as preserved. PreservedAnalyses PA; PA.preserveSet(); PA.preserve(); PA.preserve(); PA.preserve(); return PA; } void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); } bool InstructionCombiningPass::runOnFunction(Function &F) { if (skipFunction(F)) return false; // Required analyses. auto AA = &getAnalysis().getAAResults(); auto &AC = getAnalysis().getAssumptionCache(F); auto &TLI = getAnalysis().getTLI(); auto &DT = getAnalysis().getDomTree(); auto &ORE = getAnalysis().getORE(); // Optional analyses. auto *LIWP = getAnalysisIfAvailable(); auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE, ExpensiveCombines, LI); } char InstructionCombiningPass::ID = 0; INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine", "Combine redundant instructions", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine", "Combine redundant instructions", false, false) // Initialization Routines void llvm::initializeInstCombine(PassRegistry &Registry) { initializeInstructionCombiningPassPass(Registry); } void LLVMInitializeInstCombine(LLVMPassRegistryRef R) { initializeInstructionCombiningPassPass(*unwrap(R)); } FunctionPass *llvm::createInstructionCombiningPass(bool ExpensiveCombines) { return new InstructionCombiningPass(ExpensiveCombines); } void LLVMAddInstructionCombiningPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createInstructionCombiningPass()); } Index: projects/clang800-import/contrib/llvm/lib/Transforms/Scalar/MergeICmps.cpp =================================================================== --- projects/clang800-import/contrib/llvm/lib/Transforms/Scalar/MergeICmps.cpp (revision 344547) +++ projects/clang800-import/contrib/llvm/lib/Transforms/Scalar/MergeICmps.cpp (revision 344548) @@ -1,827 +1,870 @@ //===- MergeICmps.cpp - Optimize chains of integer comparisons ------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This pass turns chains of integer comparisons into memcmp (the memcmp is // later typically inlined as a chain of efficient hardware comparisons). This // typically benefits c++ member or nonmember operator==(). // -// The basic idea is to replace a larger chain of integer comparisons loaded -// from contiguous memory locations into a smaller chain of such integer +// The basic idea is to replace a longer chain of integer comparisons loaded +// from contiguous memory locations into a shorter chain of larger integer // comparisons. Benefits are double: // - There are less jumps, and therefore less opportunities for mispredictions // and I-cache misses. // - Code size is smaller, both because jumps are removed and because the // encoding of a 2*n byte compare is smaller than that of two n-byte // compares. - +// +// Example: +// +// struct S { +// int a; +// char b; +// char c; +// uint16_t d; +// bool operator==(const S& o) const { +// return a == o.a && b == o.b && c == o.c && d == o.d; +// } +// }; +// +// Is optimized as : +// +// bool S::operator==(const S& o) const { +// return memcmp(this, &o, 8) == 0; +// } +// +// Which will later be expanded (ExpandMemCmp) as a single 8-bytes icmp. +// //===----------------------------------------------------------------------===// -#include -#include -#include -#include #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/Pass.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" +#include +#include +#include +#include using namespace llvm; namespace { #define DEBUG_TYPE "mergeicmps" // Returns true if the instruction is a simple load or a simple store static bool isSimpleLoadOrStore(const Instruction *I) { if (const LoadInst *LI = dyn_cast(I)) return LI->isSimple(); if (const StoreInst *SI = dyn_cast(I)) return SI->isSimple(); return false; } -// A BCE atom. +// A BCE atom "Binary Compare Expression Atom" represents an integer load +// that is a constant offset from a base value, e.g. `a` or `o.c` in the example +// at the top. struct BCEAtom { - BCEAtom() : GEP(nullptr), LoadI(nullptr), Offset() {} + BCEAtom() = default; + BCEAtom(GetElementPtrInst *GEP, LoadInst *LoadI, int BaseId, APInt Offset) + : GEP(GEP), LoadI(LoadI), BaseId(BaseId), Offset(Offset) {} - const Value *Base() const { return GEP ? GEP->getPointerOperand() : nullptr; } - + // We want to order BCEAtoms by (Base, Offset). However we cannot use + // the pointer values for Base because these are non-deterministic. + // To make sure that the sort order is stable, we first assign to each atom + // base value an index based on its order of appearance in the chain of + // comparisons. We call this index `BaseOrdering`. For example, for: + // b[3] == c[2] && a[1] == d[1] && b[4] == c[3] + // | block 1 | | block 2 | | block 3 | + // b gets assigned index 0 and a index 1, because b appears as LHS in block 1, + // which is before block 2. + // We then sort by (BaseOrdering[LHS.Base()], LHS.Offset), which is stable. bool operator<(const BCEAtom &O) const { - assert(Base() && "invalid atom"); - assert(O.Base() && "invalid atom"); - // Just ordering by (Base(), Offset) is sufficient. However because this - // means that the ordering will depend on the addresses of the base - // values, which are not reproducible from run to run. To guarantee - // stability, we use the names of the values if they exist; we sort by: - // (Base.getName(), Base(), Offset). - const int NameCmp = Base()->getName().compare(O.Base()->getName()); - if (NameCmp == 0) { - if (Base() == O.Base()) { - return Offset.slt(O.Offset); - } - return Base() < O.Base(); - } - return NameCmp < 0; + return BaseId != O.BaseId ? BaseId < O.BaseId : Offset.slt(O.Offset); } - GetElementPtrInst *GEP; - LoadInst *LoadI; + GetElementPtrInst *GEP = nullptr; + LoadInst *LoadI = nullptr; + unsigned BaseId = 0; APInt Offset; }; +// A class that assigns increasing ids to values in the order in which they are +// seen. See comment in `BCEAtom::operator<()``. +class BaseIdentifier { +public: + // Returns the id for value `Base`, after assigning one if `Base` has not been + // seen before. + int getBaseId(const Value *Base) { + assert(Base && "invalid base"); + const auto Insertion = BaseToIndex.try_emplace(Base, Order); + if (Insertion.second) + ++Order; + return Insertion.first->second; + } + +private: + unsigned Order = 1; + DenseMap BaseToIndex; +}; + // If this value is a load from a constant offset w.r.t. a base address, and // there are no other users of the load or address, returns the base address and // the offset. -BCEAtom visitICmpLoadOperand(Value *const Val) { - BCEAtom Result; - if (auto *const LoadI = dyn_cast(Val)) { - LLVM_DEBUG(dbgs() << "load\n"); - if (LoadI->isUsedOutsideOfBlock(LoadI->getParent())) { - LLVM_DEBUG(dbgs() << "used outside of block\n"); - return {}; - } - // Do not optimize atomic loads to non-atomic memcmp - if (!LoadI->isSimple()) { - LLVM_DEBUG(dbgs() << "volatile or atomic\n"); - return {}; - } - Value *const Addr = LoadI->getOperand(0); - if (auto *const GEP = dyn_cast(Addr)) { - LLVM_DEBUG(dbgs() << "GEP\n"); - if (GEP->isUsedOutsideOfBlock(LoadI->getParent())) { - LLVM_DEBUG(dbgs() << "used outside of block\n"); - return {}; - } - const auto &DL = GEP->getModule()->getDataLayout(); - if (!isDereferenceablePointer(GEP, DL)) { - LLVM_DEBUG(dbgs() << "not dereferenceable\n"); - // We need to make sure that we can do comparison in any order, so we - // require memory to be unconditionnally dereferencable. - return {}; - } - Result.Offset = APInt(DL.getPointerTypeSizeInBits(GEP->getType()), 0); - if (GEP->accumulateConstantOffset(DL, Result.Offset)) { - Result.GEP = GEP; - Result.LoadI = LoadI; - } - } +BCEAtom visitICmpLoadOperand(Value *const Val, BaseIdentifier &BaseId) { + auto *const LoadI = dyn_cast(Val); + if (!LoadI) + return {}; + LLVM_DEBUG(dbgs() << "load\n"); + if (LoadI->isUsedOutsideOfBlock(LoadI->getParent())) { + LLVM_DEBUG(dbgs() << "used outside of block\n"); + return {}; } - return Result; + // Do not optimize atomic loads to non-atomic memcmp + if (!LoadI->isSimple()) { + LLVM_DEBUG(dbgs() << "volatile or atomic\n"); + return {}; + } + Value *const Addr = LoadI->getOperand(0); + auto *const GEP = dyn_cast(Addr); + if (!GEP) + return {}; + LLVM_DEBUG(dbgs() << "GEP\n"); + if (GEP->isUsedOutsideOfBlock(LoadI->getParent())) { + LLVM_DEBUG(dbgs() << "used outside of block\n"); + return {}; + } + const auto &DL = GEP->getModule()->getDataLayout(); + if (!isDereferenceablePointer(GEP, DL)) { + LLVM_DEBUG(dbgs() << "not dereferenceable\n"); + // We need to make sure that we can do comparison in any order, so we + // require memory to be unconditionnally dereferencable. + return {}; + } + APInt Offset = APInt(DL.getPointerTypeSizeInBits(GEP->getType()), 0); + if (!GEP->accumulateConstantOffset(DL, Offset)) + return {}; + return BCEAtom(GEP, LoadI, BaseId.getBaseId(GEP->getPointerOperand()), + Offset); } -// A basic block with a comparison between two BCE atoms. +// A basic block with a comparison between two BCE atoms, e.g. `a == o.a` in the +// example at the top. // The block might do extra work besides the atom comparison, in which case // doesOtherWork() returns true. Under some conditions, the block can be // split into the atom comparison part and the "other work" part // (see canSplit()). // Note: the terminology is misleading: the comparison is symmetric, so there // is no real {l/r}hs. What we want though is to have the same base on the // left (resp. right), so that we can detect consecutive loads. To ensure this // we put the smallest atom on the left. class BCECmpBlock { public: BCECmpBlock() {} BCECmpBlock(BCEAtom L, BCEAtom R, int SizeBits) : Lhs_(L), Rhs_(R), SizeBits_(SizeBits) { if (Rhs_ < Lhs_) std::swap(Rhs_, Lhs_); } - bool IsValid() const { - return Lhs_.Base() != nullptr && Rhs_.Base() != nullptr; - } + bool IsValid() const { return Lhs_.BaseId != 0 && Rhs_.BaseId != 0; } // Assert the block is consistent: If valid, it should also have // non-null members besides Lhs_ and Rhs_. void AssertConsistent() const { if (IsValid()) { assert(BB); assert(CmpI); assert(BranchI); } } const BCEAtom &Lhs() const { return Lhs_; } const BCEAtom &Rhs() const { return Rhs_; } int SizeBits() const { return SizeBits_; } // Returns true if the block does other works besides comparison. bool doesOtherWork() const; // Returns true if the non-BCE-cmp instructions can be separated from BCE-cmp // instructions in the block. bool canSplit(AliasAnalysis *AA) const; // Return true if this all the relevant instructions in the BCE-cmp-block can // be sunk below this instruction. By doing this, we know we can separate the // BCE-cmp-block instructions from the non-BCE-cmp-block instructions in the // block. bool canSinkBCECmpInst(const Instruction *, DenseSet &, AliasAnalysis *AA) const; // We can separate the BCE-cmp-block instructions and the non-BCE-cmp-block // instructions. Split the old block and move all non-BCE-cmp-insts into the // new parent block. void split(BasicBlock *NewParent, AliasAnalysis *AA) const; // The basic block where this comparison happens. BasicBlock *BB = nullptr; // The ICMP for this comparison. ICmpInst *CmpI = nullptr; // The terminating branch. BranchInst *BranchI = nullptr; // The block requires splitting. bool RequireSplit = false; private: BCEAtom Lhs_; BCEAtom Rhs_; int SizeBits_ = 0; }; bool BCECmpBlock::canSinkBCECmpInst(const Instruction *Inst, DenseSet &BlockInsts, AliasAnalysis *AA) const { // If this instruction has side effects and its in middle of the BCE cmp block // instructions, then bail for now. if (Inst->mayHaveSideEffects()) { // Bail if this is not a simple load or store if (!isSimpleLoadOrStore(Inst)) return false; // Disallow stores that might alias the BCE operands MemoryLocation LLoc = MemoryLocation::get(Lhs_.LoadI); MemoryLocation RLoc = MemoryLocation::get(Rhs_.LoadI); if (isModSet(AA->getModRefInfo(Inst, LLoc)) || isModSet(AA->getModRefInfo(Inst, RLoc))) return false; } // Make sure this instruction does not use any of the BCE cmp block // instructions as operand. for (auto BI : BlockInsts) { if (is_contained(Inst->operands(), BI)) return false; } return true; } void BCECmpBlock::split(BasicBlock *NewParent, AliasAnalysis *AA) const { DenseSet BlockInsts( {Lhs_.GEP, Rhs_.GEP, Lhs_.LoadI, Rhs_.LoadI, CmpI, BranchI}); llvm::SmallVector OtherInsts; for (Instruction &Inst : *BB) { if (BlockInsts.count(&Inst)) continue; assert(canSinkBCECmpInst(&Inst, BlockInsts, AA) && "Split unsplittable block"); // This is a non-BCE-cmp-block instruction. And it can be separated // from the BCE-cmp-block instruction. OtherInsts.push_back(&Inst); } // Do the actual spliting. for (Instruction *Inst : reverse(OtherInsts)) { Inst->moveBefore(&*NewParent->begin()); } } bool BCECmpBlock::canSplit(AliasAnalysis *AA) const { DenseSet BlockInsts( {Lhs_.GEP, Rhs_.GEP, Lhs_.LoadI, Rhs_.LoadI, CmpI, BranchI}); for (Instruction &Inst : *BB) { if (!BlockInsts.count(&Inst)) { if (!canSinkBCECmpInst(&Inst, BlockInsts, AA)) return false; } } return true; } bool BCECmpBlock::doesOtherWork() const { AssertConsistent(); // All the instructions we care about in the BCE cmp block. DenseSet BlockInsts( {Lhs_.GEP, Rhs_.GEP, Lhs_.LoadI, Rhs_.LoadI, CmpI, BranchI}); // TODO(courbet): Can we allow some other things ? This is very conservative. // We might be able to get away with anything does not have any side // effects outside of the basic block. // Note: The GEPs and/or loads are not necessarily in the same block. for (const Instruction &Inst : *BB) { if (!BlockInsts.count(&Inst)) return true; } return false; } // Visit the given comparison. If this is a comparison between two valid // BCE atoms, returns the comparison. BCECmpBlock visitICmp(const ICmpInst *const CmpI, - const ICmpInst::Predicate ExpectedPredicate) { + const ICmpInst::Predicate ExpectedPredicate, + BaseIdentifier &BaseId) { // The comparison can only be used once: // - For intermediate blocks, as a branch condition. // - For the final block, as an incoming value for the Phi. // If there are any other uses of the comparison, we cannot merge it with // other comparisons as we would create an orphan use of the value. if (!CmpI->hasOneUse()) { LLVM_DEBUG(dbgs() << "cmp has several uses\n"); return {}; } - if (CmpI->getPredicate() == ExpectedPredicate) { - LLVM_DEBUG(dbgs() << "cmp " - << (ExpectedPredicate == ICmpInst::ICMP_EQ ? "eq" : "ne") - << "\n"); - auto Lhs = visitICmpLoadOperand(CmpI->getOperand(0)); - if (!Lhs.Base()) return {}; - auto Rhs = visitICmpLoadOperand(CmpI->getOperand(1)); - if (!Rhs.Base()) return {}; - const auto &DL = CmpI->getModule()->getDataLayout(); - return BCECmpBlock(std::move(Lhs), std::move(Rhs), - DL.getTypeSizeInBits(CmpI->getOperand(0)->getType())); - } - return {}; + if (CmpI->getPredicate() != ExpectedPredicate) + return {}; + LLVM_DEBUG(dbgs() << "cmp " + << (ExpectedPredicate == ICmpInst::ICMP_EQ ? "eq" : "ne") + << "\n"); + auto Lhs = visitICmpLoadOperand(CmpI->getOperand(0), BaseId); + if (!Lhs.BaseId) + return {}; + auto Rhs = visitICmpLoadOperand(CmpI->getOperand(1), BaseId); + if (!Rhs.BaseId) + return {}; + const auto &DL = CmpI->getModule()->getDataLayout(); + return BCECmpBlock(std::move(Lhs), std::move(Rhs), + DL.getTypeSizeInBits(CmpI->getOperand(0)->getType())); } // Visit the given comparison block. If this is a comparison between two valid // BCE atoms, returns the comparison. BCECmpBlock visitCmpBlock(Value *const Val, BasicBlock *const Block, - const BasicBlock *const PhiBlock) { + const BasicBlock *const PhiBlock, + BaseIdentifier &BaseId) { if (Block->empty()) return {}; auto *const BranchI = dyn_cast(Block->getTerminator()); if (!BranchI) return {}; LLVM_DEBUG(dbgs() << "branch\n"); if (BranchI->isUnconditional()) { // In this case, we expect an incoming value which is the result of the // comparison. This is the last link in the chain of comparisons (note // that this does not mean that this is the last incoming value, blocks // can be reordered). auto *const CmpI = dyn_cast(Val); if (!CmpI) return {}; LLVM_DEBUG(dbgs() << "icmp\n"); - auto Result = visitICmp(CmpI, ICmpInst::ICMP_EQ); + auto Result = visitICmp(CmpI, ICmpInst::ICMP_EQ, BaseId); Result.CmpI = CmpI; Result.BranchI = BranchI; return Result; } else { // In this case, we expect a constant incoming value (the comparison is // chained). const auto *const Const = dyn_cast(Val); LLVM_DEBUG(dbgs() << "const\n"); if (!Const->isZero()) return {}; LLVM_DEBUG(dbgs() << "false\n"); auto *const CmpI = dyn_cast(BranchI->getCondition()); if (!CmpI) return {}; LLVM_DEBUG(dbgs() << "icmp\n"); assert(BranchI->getNumSuccessors() == 2 && "expecting a cond branch"); BasicBlock *const FalseBlock = BranchI->getSuccessor(1); auto Result = visitICmp( - CmpI, FalseBlock == PhiBlock ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE); + CmpI, FalseBlock == PhiBlock ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, + BaseId); Result.CmpI = CmpI; Result.BranchI = BranchI; return Result; } return {}; } static inline void enqueueBlock(std::vector &Comparisons, BCECmpBlock &Comparison) { LLVM_DEBUG(dbgs() << "Block '" << Comparison.BB->getName() << "': Found cmp of " << Comparison.SizeBits() - << " bits between " << Comparison.Lhs().Base() << " + " + << " bits between " << Comparison.Lhs().BaseId << " + " << Comparison.Lhs().Offset << " and " - << Comparison.Rhs().Base() << " + " + << Comparison.Rhs().BaseId << " + " << Comparison.Rhs().Offset << "\n"); LLVM_DEBUG(dbgs() << "\n"); Comparisons.push_back(Comparison); } // A chain of comparisons. class BCECmpChain { public: BCECmpChain(const std::vector &Blocks, PHINode &Phi, AliasAnalysis *AA); int size() const { return Comparisons_.size(); } #ifdef MERGEICMPS_DOT_ON void dump() const; #endif // MERGEICMPS_DOT_ON bool simplify(const TargetLibraryInfo *const TLI, AliasAnalysis *AA); private: static bool IsContiguous(const BCECmpBlock &First, const BCECmpBlock &Second) { - return First.Lhs().Base() == Second.Lhs().Base() && - First.Rhs().Base() == Second.Rhs().Base() && + return First.Lhs().BaseId == Second.Lhs().BaseId && + First.Rhs().BaseId == Second.Rhs().BaseId && First.Lhs().Offset + First.SizeBits() / 8 == Second.Lhs().Offset && First.Rhs().Offset + First.SizeBits() / 8 == Second.Rhs().Offset; } // Merges the given comparison blocks into one memcmp block and update // branches. Comparisons are assumed to be continguous. If NextBBInChain is // null, the merged block will link to the phi block. void mergeComparisons(ArrayRef Comparisons, BasicBlock *const NextBBInChain, PHINode &Phi, const TargetLibraryInfo *const TLI, AliasAnalysis *AA); PHINode &Phi_; std::vector Comparisons_; // The original entry block (before sorting); BasicBlock *EntryBlock_; }; BCECmpChain::BCECmpChain(const std::vector &Blocks, PHINode &Phi, AliasAnalysis *AA) : Phi_(Phi) { assert(!Blocks.empty() && "a chain should have at least one block"); // Now look inside blocks to check for BCE comparisons. std::vector Comparisons; + BaseIdentifier BaseId; for (size_t BlockIdx = 0; BlockIdx < Blocks.size(); ++BlockIdx) { BasicBlock *const Block = Blocks[BlockIdx]; assert(Block && "invalid block"); BCECmpBlock Comparison = visitCmpBlock(Phi.getIncomingValueForBlock(Block), - Block, Phi.getParent()); + Block, Phi.getParent(), BaseId); Comparison.BB = Block; if (!Comparison.IsValid()) { LLVM_DEBUG(dbgs() << "chain with invalid BCECmpBlock, no merge.\n"); return; } if (Comparison.doesOtherWork()) { LLVM_DEBUG(dbgs() << "block '" << Comparison.BB->getName() << "' does extra work besides compare\n"); if (Comparisons.empty()) { // This is the initial block in the chain, in case this block does other // work, we can try to split the block and move the irrelevant // instructions to the predecessor. // // If this is not the initial block in the chain, splitting it wont // work. // // As once split, there will still be instructions before the BCE cmp // instructions that do other work in program order, i.e. within the // chain before sorting. Unless we can abort the chain at this point // and start anew. // // NOTE: we only handle block with single predecessor for now. if (Comparison.canSplit(AA)) { LLVM_DEBUG(dbgs() << "Split initial block '" << Comparison.BB->getName() << "' that does extra work besides compare\n"); Comparison.RequireSplit = true; enqueueBlock(Comparisons, Comparison); } else { LLVM_DEBUG(dbgs() << "ignoring initial block '" << Comparison.BB->getName() << "' that does extra work besides compare\n"); } continue; } // TODO(courbet): Right now we abort the whole chain. We could be // merging only the blocks that don't do other work and resume the // chain from there. For example: // if (a[0] == b[0]) { // bb1 // if (a[1] == b[1]) { // bb2 // some_value = 3; //bb3 // if (a[2] == b[2]) { //bb3 // do a ton of stuff //bb4 // } // } // } // // This is: // // bb1 --eq--> bb2 --eq--> bb3* -eq--> bb4 --+ // \ \ \ \ // ne ne ne \ // \ \ \ v // +------------+-----------+----------> bb_phi // // We can only merge the first two comparisons, because bb3* does // "other work" (setting some_value to 3). // We could still merge bb1 and bb2 though. return; } enqueueBlock(Comparisons, Comparison); } // It is possible we have no suitable comparison to merge. if (Comparisons.empty()) { LLVM_DEBUG(dbgs() << "chain with no BCE basic blocks, no merge\n"); return; } EntryBlock_ = Comparisons[0].BB; Comparisons_ = std::move(Comparisons); #ifdef MERGEICMPS_DOT_ON errs() << "BEFORE REORDERING:\n\n"; dump(); #endif // MERGEICMPS_DOT_ON // Reorder blocks by LHS. We can do that without changing the // semantics because we are only accessing dereferencable memory. - llvm::sort(Comparisons_, [](const BCECmpBlock &a, const BCECmpBlock &b) { - return a.Lhs() < b.Lhs(); - }); + llvm::sort(Comparisons_, + [](const BCECmpBlock &LhsBlock, const BCECmpBlock &RhsBlock) { + return LhsBlock.Lhs() < RhsBlock.Lhs(); + }); #ifdef MERGEICMPS_DOT_ON errs() << "AFTER REORDERING:\n\n"; dump(); #endif // MERGEICMPS_DOT_ON } #ifdef MERGEICMPS_DOT_ON void BCECmpChain::dump() const { errs() << "digraph dag {\n"; errs() << " graph [bgcolor=transparent];\n"; errs() << " node [color=black,style=filled,fillcolor=lightyellow];\n"; errs() << " edge [color=black];\n"; for (size_t I = 0; I < Comparisons_.size(); ++I) { const auto &Comparison = Comparisons_[I]; errs() << " \"" << I << "\" [label=\"%" << Comparison.Lhs().Base()->getName() << " + " << Comparison.Lhs().Offset << " == %" << Comparison.Rhs().Base()->getName() << " + " << Comparison.Rhs().Offset << " (" << (Comparison.SizeBits() / 8) << " bytes)\"];\n"; const Value *const Val = Phi_.getIncomingValueForBlock(Comparison.BB); if (I > 0) errs() << " \"" << (I - 1) << "\" -> \"" << I << "\";\n"; errs() << " \"" << I << "\" -> \"Phi\" [label=\"" << *Val << "\"];\n"; } errs() << " \"Phi\" [label=\"Phi\"];\n"; errs() << "}\n\n"; } #endif // MERGEICMPS_DOT_ON bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI, AliasAnalysis *AA) { // First pass to check if there is at least one merge. If not, we don't do // anything and we keep analysis passes intact. { bool AtLeastOneMerged = false; for (size_t I = 1; I < Comparisons_.size(); ++I) { if (IsContiguous(Comparisons_[I - 1], Comparisons_[I])) { AtLeastOneMerged = true; break; } } if (!AtLeastOneMerged) return false; } // Remove phi references to comparison blocks, they will be rebuilt as we // merge the blocks. for (const auto &Comparison : Comparisons_) { Phi_.removeIncomingValue(Comparison.BB, false); } // If entry block is part of the chain, we need to make the first block // of the chain the new entry block of the function. BasicBlock *Entry = &Comparisons_[0].BB->getParent()->getEntryBlock(); for (size_t I = 1; I < Comparisons_.size(); ++I) { if (Entry == Comparisons_[I].BB) { BasicBlock *NEntryBB = BasicBlock::Create(Entry->getContext(), "", Entry->getParent(), Entry); BranchInst::Create(Entry, NEntryBB); break; } } // Point the predecessors of the chain to the first comparison block (which is // the new entry point) and update the entry block of the chain. if (EntryBlock_ != Comparisons_[0].BB) { EntryBlock_->replaceAllUsesWith(Comparisons_[0].BB); EntryBlock_ = Comparisons_[0].BB; } // Effectively merge blocks. int NumMerged = 1; for (size_t I = 1; I < Comparisons_.size(); ++I) { if (IsContiguous(Comparisons_[I - 1], Comparisons_[I])) { ++NumMerged; } else { // Merge all previous comparisons and start a new merge block. mergeComparisons( makeArrayRef(Comparisons_).slice(I - NumMerged, NumMerged), Comparisons_[I].BB, Phi_, TLI, AA); NumMerged = 1; } } mergeComparisons(makeArrayRef(Comparisons_) .slice(Comparisons_.size() - NumMerged, NumMerged), nullptr, Phi_, TLI, AA); return true; } void BCECmpChain::mergeComparisons(ArrayRef Comparisons, BasicBlock *const NextBBInChain, PHINode &Phi, const TargetLibraryInfo *const TLI, AliasAnalysis *AA) { assert(!Comparisons.empty()); const auto &FirstComparison = *Comparisons.begin(); BasicBlock *const BB = FirstComparison.BB; LLVMContext &Context = BB->getContext(); if (Comparisons.size() >= 2) { // If there is one block that requires splitting, we do it now, i.e. // just before we know we will collapse the chain. The instructions // can be executed before any of the instructions in the chain. auto C = std::find_if(Comparisons.begin(), Comparisons.end(), [](const BCECmpBlock &B) { return B.RequireSplit; }); if (C != Comparisons.end()) C->split(EntryBlock_, AA); LLVM_DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons\n"); const auto TotalSize = std::accumulate(Comparisons.begin(), Comparisons.end(), 0, [](int Size, const BCECmpBlock &C) { return Size + C.SizeBits(); }) / 8; // Incoming edges do not need to be updated, and both GEPs are already // computing the right address, we just need to: // - replace the two loads and the icmp with the memcmp // - update the branch // - update the incoming values in the phi. FirstComparison.BranchI->eraseFromParent(); FirstComparison.CmpI->eraseFromParent(); FirstComparison.Lhs().LoadI->eraseFromParent(); FirstComparison.Rhs().LoadI->eraseFromParent(); IRBuilder<> Builder(BB); const auto &DL = Phi.getModule()->getDataLayout(); Value *const MemCmpCall = emitMemCmp( FirstComparison.Lhs().GEP, FirstComparison.Rhs().GEP, ConstantInt::get(DL.getIntPtrType(Context), TotalSize), Builder, DL, TLI); Value *const MemCmpIsZero = Builder.CreateICmpEQ( MemCmpCall, ConstantInt::get(Type::getInt32Ty(Context), 0)); // Add a branch to the next basic block in the chain. if (NextBBInChain) { Builder.CreateCondBr(MemCmpIsZero, NextBBInChain, Phi.getParent()); Phi.addIncoming(ConstantInt::getFalse(Context), BB); } else { Builder.CreateBr(Phi.getParent()); Phi.addIncoming(MemCmpIsZero, BB); } // Delete merged blocks. for (size_t I = 1; I < Comparisons.size(); ++I) { BasicBlock *CBB = Comparisons[I].BB; CBB->replaceAllUsesWith(BB); CBB->eraseFromParent(); } } else { assert(Comparisons.size() == 1); // There are no blocks to merge, but we still need to update the branches. LLVM_DEBUG(dbgs() << "Only one comparison, updating branches\n"); if (NextBBInChain) { if (FirstComparison.BranchI->isConditional()) { LLVM_DEBUG(dbgs() << "conditional -> conditional\n"); // Just update the "true" target, the "false" target should already be // the phi block. assert(FirstComparison.BranchI->getSuccessor(1) == Phi.getParent()); FirstComparison.BranchI->setSuccessor(0, NextBBInChain); Phi.addIncoming(ConstantInt::getFalse(Context), BB); } else { LLVM_DEBUG(dbgs() << "unconditional -> conditional\n"); // Replace the unconditional branch by a conditional one. FirstComparison.BranchI->eraseFromParent(); IRBuilder<> Builder(BB); Builder.CreateCondBr(FirstComparison.CmpI, NextBBInChain, Phi.getParent()); Phi.addIncoming(FirstComparison.CmpI, BB); } } else { if (FirstComparison.BranchI->isConditional()) { LLVM_DEBUG(dbgs() << "conditional -> unconditional\n"); // Replace the conditional branch by an unconditional one. FirstComparison.BranchI->eraseFromParent(); IRBuilder<> Builder(BB); Builder.CreateBr(Phi.getParent()); Phi.addIncoming(FirstComparison.CmpI, BB); } else { LLVM_DEBUG(dbgs() << "unconditional -> unconditional\n"); Phi.addIncoming(FirstComparison.CmpI, BB); } } } } std::vector getOrderedBlocks(PHINode &Phi, BasicBlock *const LastBlock, int NumBlocks) { // Walk up from the last block to find other blocks. std::vector Blocks(NumBlocks); assert(LastBlock && "invalid last block"); BasicBlock *CurBlock = LastBlock; for (int BlockIndex = NumBlocks - 1; BlockIndex > 0; --BlockIndex) { if (CurBlock->hasAddressTaken()) { // Somebody is jumping to the block through an address, all bets are // off. LLVM_DEBUG(dbgs() << "skip: block " << BlockIndex << " has its address taken\n"); return {}; } Blocks[BlockIndex] = CurBlock; auto *SinglePredecessor = CurBlock->getSinglePredecessor(); if (!SinglePredecessor) { // The block has two or more predecessors. LLVM_DEBUG(dbgs() << "skip: block " << BlockIndex << " has two or more predecessors\n"); return {}; } if (Phi.getBasicBlockIndex(SinglePredecessor) < 0) { // The block does not link back to the phi. LLVM_DEBUG(dbgs() << "skip: block " << BlockIndex << " does not link back to the phi\n"); return {}; } CurBlock = SinglePredecessor; } Blocks[0] = CurBlock; return Blocks; } bool processPhi(PHINode &Phi, const TargetLibraryInfo *const TLI, AliasAnalysis *AA) { LLVM_DEBUG(dbgs() << "processPhi()\n"); if (Phi.getNumIncomingValues() <= 1) { LLVM_DEBUG(dbgs() << "skip: only one incoming value in phi\n"); return false; } // We are looking for something that has the following structure: // bb1 --eq--> bb2 --eq--> bb3 --eq--> bb4 --+ // \ \ \ \ // ne ne ne \ // \ \ \ v // +------------+-----------+----------> bb_phi // // - The last basic block (bb4 here) must branch unconditionally to bb_phi. // It's the only block that contributes a non-constant value to the Phi. // - All other blocks (b1, b2, b3) must have exactly two successors, one of // them being the phi block. // - All intermediate blocks (bb2, bb3) must have only one predecessor. // - Blocks cannot do other work besides the comparison, see doesOtherWork() // The blocks are not necessarily ordered in the phi, so we start from the // last block and reconstruct the order. BasicBlock *LastBlock = nullptr; for (unsigned I = 0; I < Phi.getNumIncomingValues(); ++I) { if (isa(Phi.getIncomingValue(I))) continue; if (LastBlock) { // There are several non-constant values. LLVM_DEBUG(dbgs() << "skip: several non-constant values\n"); return false; } if (!isa(Phi.getIncomingValue(I)) || cast(Phi.getIncomingValue(I))->getParent() != Phi.getIncomingBlock(I)) { // Non-constant incoming value is not from a cmp instruction or not // produced by the last block. We could end up processing the value // producing block more than once. // // This is an uncommon case, so we bail. LLVM_DEBUG( dbgs() << "skip: non-constant value not from cmp or not from last block.\n"); return false; } LastBlock = Phi.getIncomingBlock(I); } if (!LastBlock) { // There is no non-constant block. LLVM_DEBUG(dbgs() << "skip: no non-constant block\n"); return false; } if (LastBlock->getSingleSuccessor() != Phi.getParent()) { LLVM_DEBUG(dbgs() << "skip: last block non-phi successor\n"); return false; } const auto Blocks = getOrderedBlocks(Phi, LastBlock, Phi.getNumIncomingValues()); if (Blocks.empty()) return false; BCECmpChain CmpChain(Blocks, Phi, AA); if (CmpChain.size() < 2) { LLVM_DEBUG(dbgs() << "skip: only one compare block\n"); return false; } return CmpChain.simplify(TLI, AA); } class MergeICmps : public FunctionPass { public: static char ID; MergeICmps() : FunctionPass(ID) { initializeMergeICmpsPass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override { if (skipFunction(F)) return false; const auto &TLI = getAnalysis().getTLI(); const auto &TTI = getAnalysis().getTTI(F); AliasAnalysis *AA = &getAnalysis().getAAResults(); auto PA = runImpl(F, &TLI, &TTI, AA); return !PA.areAllPreserved(); } private: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); AU.addRequired(); } PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, AliasAnalysis *AA); }; PreservedAnalyses MergeICmps::runImpl(Function &F, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, AliasAnalysis *AA) { LLVM_DEBUG(dbgs() << "MergeICmpsPass: " << F.getName() << "\n"); // We only try merging comparisons if the target wants to expand memcmp later. // The rationale is to avoid turning small chains into memcmp calls. if (!TTI->enableMemCmpExpansion(true)) return PreservedAnalyses::all(); // If we don't have memcmp avaiable we can't emit calls to it. if (!TLI->has(LibFunc_memcmp)) return PreservedAnalyses::all(); bool MadeChange = false; for (auto BBIt = ++F.begin(); BBIt != F.end(); ++BBIt) { // A Phi operation is always first in a basic block. if (auto *const Phi = dyn_cast(&*BBIt->begin())) MadeChange |= processPhi(*Phi, TLI, AA); } if (MadeChange) return PreservedAnalyses::none(); return PreservedAnalyses::all(); } } // namespace char MergeICmps::ID = 0; INITIALIZE_PASS_BEGIN(MergeICmps, "mergeicmps", "Merge contiguous icmps into a memcmp", false, false) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(MergeICmps, "mergeicmps", "Merge contiguous icmps into a memcmp", false, false) Pass *llvm::createMergeICmpsPass() { return new MergeICmps(); } Index: projects/clang800-import/contrib/llvm/tools/clang/include/clang/AST/Expr.h =================================================================== --- projects/clang800-import/contrib/llvm/tools/clang/include/clang/AST/Expr.h (revision 344547) +++ projects/clang800-import/contrib/llvm/tools/clang/include/clang/AST/Expr.h (revision 344548) @@ -1,5590 +1,5595 @@ //===--- Expr.h - Classes for representing expressions ----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines the Expr interface and subclasses. // //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_AST_EXPR_H #define LLVM_CLANG_AST_EXPR_H #include "clang/AST/APValue.h" #include "clang/AST/ASTVector.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclAccessPair.h" #include "clang/AST/OperationKinds.h" #include "clang/AST/Stmt.h" #include "clang/AST/TemplateBase.h" #include "clang/AST/Type.h" #include "clang/Basic/CharInfo.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/SyncScope.h" #include "clang/Basic/TypeTraits.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APSInt.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/TrailingObjects.h" namespace clang { class APValue; class ASTContext; class BlockDecl; class CXXBaseSpecifier; class CXXMemberCallExpr; class CXXOperatorCallExpr; class CastExpr; class Decl; class IdentifierInfo; class MaterializeTemporaryExpr; class NamedDecl; class ObjCPropertyRefExpr; class OpaqueValueExpr; class ParmVarDecl; class StringLiteral; class TargetInfo; class ValueDecl; /// A simple array of base specifiers. typedef SmallVector CXXCastPath; /// An adjustment to be made to the temporary created when emitting a /// reference binding, which accesses a particular subobject of that temporary. struct SubobjectAdjustment { enum { DerivedToBaseAdjustment, FieldAdjustment, MemberPointerAdjustment } Kind; struct DTB { const CastExpr *BasePath; const CXXRecordDecl *DerivedClass; }; struct P { const MemberPointerType *MPT; Expr *RHS; }; union { struct DTB DerivedToBase; FieldDecl *Field; struct P Ptr; }; SubobjectAdjustment(const CastExpr *BasePath, const CXXRecordDecl *DerivedClass) : Kind(DerivedToBaseAdjustment) { DerivedToBase.BasePath = BasePath; DerivedToBase.DerivedClass = DerivedClass; } SubobjectAdjustment(FieldDecl *Field) : Kind(FieldAdjustment) { this->Field = Field; } SubobjectAdjustment(const MemberPointerType *MPT, Expr *RHS) : Kind(MemberPointerAdjustment) { this->Ptr.MPT = MPT; this->Ptr.RHS = RHS; } }; /// This represents one expression. Note that Expr's are subclasses of Stmt. /// This allows an expression to be transparently used any place a Stmt is /// required. class Expr : public Stmt { QualType TR; protected: Expr(StmtClass SC, QualType T, ExprValueKind VK, ExprObjectKind OK, bool TD, bool VD, bool ID, bool ContainsUnexpandedParameterPack) : Stmt(SC) { ExprBits.TypeDependent = TD; ExprBits.ValueDependent = VD; ExprBits.InstantiationDependent = ID; ExprBits.ValueKind = VK; ExprBits.ObjectKind = OK; assert(ExprBits.ObjectKind == OK && "truncated kind"); ExprBits.ContainsUnexpandedParameterPack = ContainsUnexpandedParameterPack; setType(T); } /// Construct an empty expression. explicit Expr(StmtClass SC, EmptyShell) : Stmt(SC) { } public: QualType getType() const { return TR; } void setType(QualType t) { // In C++, the type of an expression is always adjusted so that it // will not have reference type (C++ [expr]p6). Use // QualType::getNonReferenceType() to retrieve the non-reference // type. Additionally, inspect Expr::isLvalue to determine whether // an expression that is adjusted in this manner should be // considered an lvalue. assert((t.isNull() || !t->isReferenceType()) && "Expressions can't have reference type"); TR = t; } /// isValueDependent - Determines whether this expression is /// value-dependent (C++ [temp.dep.constexpr]). For example, the /// array bound of "Chars" in the following example is /// value-dependent. /// @code /// template struct meta_string; /// @endcode bool isValueDependent() const { return ExprBits.ValueDependent; } /// Set whether this expression is value-dependent or not. void setValueDependent(bool VD) { ExprBits.ValueDependent = VD; } /// isTypeDependent - Determines whether this expression is /// type-dependent (C++ [temp.dep.expr]), which means that its type /// could change from one template instantiation to the next. For /// example, the expressions "x" and "x + y" are type-dependent in /// the following code, but "y" is not type-dependent: /// @code /// template /// void add(T x, int y) { /// x + y; /// } /// @endcode bool isTypeDependent() const { return ExprBits.TypeDependent; } /// Set whether this expression is type-dependent or not. void setTypeDependent(bool TD) { ExprBits.TypeDependent = TD; } /// Whether this expression is instantiation-dependent, meaning that /// it depends in some way on a template parameter, even if neither its type /// nor (constant) value can change due to the template instantiation. /// /// In the following example, the expression \c sizeof(sizeof(T() + T())) is /// instantiation-dependent (since it involves a template parameter \c T), but /// is neither type- nor value-dependent, since the type of the inner /// \c sizeof is known (\c std::size_t) and therefore the size of the outer /// \c sizeof is known. /// /// \code /// template /// void f(T x, T y) { /// sizeof(sizeof(T() + T()); /// } /// \endcode /// bool isInstantiationDependent() const { return ExprBits.InstantiationDependent; } /// Set whether this expression is instantiation-dependent or not. void setInstantiationDependent(bool ID) { ExprBits.InstantiationDependent = ID; } /// Whether this expression contains an unexpanded parameter /// pack (for C++11 variadic templates). /// /// Given the following function template: /// /// \code /// template /// void forward(const F &f, Types &&...args) { /// f(static_cast(args)...); /// } /// \endcode /// /// The expressions \c args and \c static_cast(args) both /// contain parameter packs. bool containsUnexpandedParameterPack() const { return ExprBits.ContainsUnexpandedParameterPack; } /// Set the bit that describes whether this expression /// contains an unexpanded parameter pack. void setContainsUnexpandedParameterPack(bool PP = true) { ExprBits.ContainsUnexpandedParameterPack = PP; } /// getExprLoc - Return the preferred location for the arrow when diagnosing /// a problem with a generic expression. SourceLocation getExprLoc() const LLVM_READONLY; /// isUnusedResultAWarning - Return true if this immediate expression should /// be warned about if the result is unused. If so, fill in expr, location, /// and ranges with expr to warn on and source locations/ranges appropriate /// for a warning. bool isUnusedResultAWarning(const Expr *&WarnExpr, SourceLocation &Loc, SourceRange &R1, SourceRange &R2, ASTContext &Ctx) const; /// isLValue - True if this expression is an "l-value" according to /// the rules of the current language. C and C++ give somewhat /// different rules for this concept, but in general, the result of /// an l-value expression identifies a specific object whereas the /// result of an r-value expression is a value detached from any /// specific storage. /// /// C++11 divides the concept of "r-value" into pure r-values /// ("pr-values") and so-called expiring values ("x-values"), which /// identify specific objects that can be safely cannibalized for /// their resources. This is an unfortunate abuse of terminology on /// the part of the C++ committee. In Clang, when we say "r-value", /// we generally mean a pr-value. bool isLValue() const { return getValueKind() == VK_LValue; } bool isRValue() const { return getValueKind() == VK_RValue; } bool isXValue() const { return getValueKind() == VK_XValue; } bool isGLValue() const { return getValueKind() != VK_RValue; } enum LValueClassification { LV_Valid, LV_NotObjectType, LV_IncompleteVoidType, LV_DuplicateVectorComponents, LV_InvalidExpression, LV_InvalidMessageExpression, LV_MemberFunction, LV_SubObjCPropertySetting, LV_ClassTemporary, LV_ArrayTemporary }; /// Reasons why an expression might not be an l-value. LValueClassification ClassifyLValue(ASTContext &Ctx) const; enum isModifiableLvalueResult { MLV_Valid, MLV_NotObjectType, MLV_IncompleteVoidType, MLV_DuplicateVectorComponents, MLV_InvalidExpression, MLV_LValueCast, // Specialized form of MLV_InvalidExpression. MLV_IncompleteType, MLV_ConstQualified, MLV_ConstQualifiedField, MLV_ConstAddrSpace, MLV_ArrayType, MLV_NoSetterProperty, MLV_MemberFunction, MLV_SubObjCPropertySetting, MLV_InvalidMessageExpression, MLV_ClassTemporary, MLV_ArrayTemporary }; /// isModifiableLvalue - C99 6.3.2.1: an lvalue that does not have array type, /// does not have an incomplete type, does not have a const-qualified type, /// and if it is a structure or union, does not have any member (including, /// recursively, any member or element of all contained aggregates or unions) /// with a const-qualified type. /// /// \param Loc [in,out] - A source location which *may* be filled /// in with the location of the expression making this a /// non-modifiable lvalue, if specified. isModifiableLvalueResult isModifiableLvalue(ASTContext &Ctx, SourceLocation *Loc = nullptr) const; /// The return type of classify(). Represents the C++11 expression /// taxonomy. class Classification { public: /// The various classification results. Most of these mean prvalue. enum Kinds { CL_LValue, CL_XValue, CL_Function, // Functions cannot be lvalues in C. CL_Void, // Void cannot be an lvalue in C. CL_AddressableVoid, // Void expression whose address can be taken in C. CL_DuplicateVectorComponents, // A vector shuffle with dupes. CL_MemberFunction, // An expression referring to a member function CL_SubObjCPropertySetting, CL_ClassTemporary, // A temporary of class type, or subobject thereof. CL_ArrayTemporary, // A temporary of array type. CL_ObjCMessageRValue, // ObjC message is an rvalue CL_PRValue // A prvalue for any other reason, of any other type }; /// The results of modification testing. enum ModifiableType { CM_Untested, // testModifiable was false. CM_Modifiable, CM_RValue, // Not modifiable because it's an rvalue CM_Function, // Not modifiable because it's a function; C++ only CM_LValueCast, // Same as CM_RValue, but indicates GCC cast-as-lvalue ext CM_NoSetterProperty,// Implicit assignment to ObjC property without setter CM_ConstQualified, CM_ConstQualifiedField, CM_ConstAddrSpace, CM_ArrayType, CM_IncompleteType }; private: friend class Expr; unsigned short Kind; unsigned short Modifiable; explicit Classification(Kinds k, ModifiableType m) : Kind(k), Modifiable(m) {} public: Classification() {} Kinds getKind() const { return static_cast(Kind); } ModifiableType getModifiable() const { assert(Modifiable != CM_Untested && "Did not test for modifiability."); return static_cast(Modifiable); } bool isLValue() const { return Kind == CL_LValue; } bool isXValue() const { return Kind == CL_XValue; } bool isGLValue() const { return Kind <= CL_XValue; } bool isPRValue() const { return Kind >= CL_Function; } bool isRValue() const { return Kind >= CL_XValue; } bool isModifiable() const { return getModifiable() == CM_Modifiable; } /// Create a simple, modifiably lvalue static Classification makeSimpleLValue() { return Classification(CL_LValue, CM_Modifiable); } }; /// Classify - Classify this expression according to the C++11 /// expression taxonomy. /// /// C++11 defines ([basic.lval]) a new taxonomy of expressions to replace the /// old lvalue vs rvalue. This function determines the type of expression this /// is. There are three expression types: /// - lvalues are classical lvalues as in C++03. /// - prvalues are equivalent to rvalues in C++03. /// - xvalues are expressions yielding unnamed rvalue references, e.g. a /// function returning an rvalue reference. /// lvalues and xvalues are collectively referred to as glvalues, while /// prvalues and xvalues together form rvalues. Classification Classify(ASTContext &Ctx) const { return ClassifyImpl(Ctx, nullptr); } /// ClassifyModifiable - Classify this expression according to the /// C++11 expression taxonomy, and see if it is valid on the left side /// of an assignment. /// /// This function extends classify in that it also tests whether the /// expression is modifiable (C99 6.3.2.1p1). /// \param Loc A source location that might be filled with a relevant location /// if the expression is not modifiable. Classification ClassifyModifiable(ASTContext &Ctx, SourceLocation &Loc) const{ return ClassifyImpl(Ctx, &Loc); } /// getValueKindForType - Given a formal return or parameter type, /// give its value kind. static ExprValueKind getValueKindForType(QualType T) { if (const ReferenceType *RT = T->getAs()) return (isa(RT) ? VK_LValue : (RT->getPointeeType()->isFunctionType() ? VK_LValue : VK_XValue)); return VK_RValue; } /// getValueKind - The value kind that this expression produces. ExprValueKind getValueKind() const { return static_cast(ExprBits.ValueKind); } /// getObjectKind - The object kind that this expression produces. /// Object kinds are meaningful only for expressions that yield an /// l-value or x-value. ExprObjectKind getObjectKind() const { return static_cast(ExprBits.ObjectKind); } bool isOrdinaryOrBitFieldObject() const { ExprObjectKind OK = getObjectKind(); return (OK == OK_Ordinary || OK == OK_BitField); } /// setValueKind - Set the value kind produced by this expression. void setValueKind(ExprValueKind Cat) { ExprBits.ValueKind = Cat; } /// setObjectKind - Set the object kind produced by this expression. void setObjectKind(ExprObjectKind Cat) { ExprBits.ObjectKind = Cat; } private: Classification ClassifyImpl(ASTContext &Ctx, SourceLocation *Loc) const; public: /// Returns true if this expression is a gl-value that /// potentially refers to a bit-field. /// /// In C++, whether a gl-value refers to a bitfield is essentially /// an aspect of the value-kind type system. bool refersToBitField() const { return getObjectKind() == OK_BitField; } /// If this expression refers to a bit-field, retrieve the /// declaration of that bit-field. /// /// Note that this returns a non-null pointer in subtly different /// places than refersToBitField returns true. In particular, this can /// return a non-null pointer even for r-values loaded from /// bit-fields, but it will return null for a conditional bit-field. FieldDecl *getSourceBitField(); const FieldDecl *getSourceBitField() const { return const_cast(this)->getSourceBitField(); } Decl *getReferencedDeclOfCallee(); const Decl *getReferencedDeclOfCallee() const { return const_cast(this)->getReferencedDeclOfCallee(); } /// If this expression is an l-value for an Objective C /// property, find the underlying property reference expression. const ObjCPropertyRefExpr *getObjCProperty() const; /// Check if this expression is the ObjC 'self' implicit parameter. bool isObjCSelfExpr() const; /// Returns whether this expression refers to a vector element. bool refersToVectorElement() const; /// Returns whether this expression refers to a global register /// variable. bool refersToGlobalRegisterVar() const; /// Returns whether this expression has a placeholder type. bool hasPlaceholderType() const { return getType()->isPlaceholderType(); } /// Returns whether this expression has a specific placeholder type. bool hasPlaceholderType(BuiltinType::Kind K) const { assert(BuiltinType::isPlaceholderTypeKind(K)); if (const BuiltinType *BT = dyn_cast(getType())) return BT->getKind() == K; return false; } /// isKnownToHaveBooleanValue - Return true if this is an integer expression /// that is known to return 0 or 1. This happens for _Bool/bool expressions /// but also int expressions which are produced by things like comparisons in /// C. bool isKnownToHaveBooleanValue() const; /// isIntegerConstantExpr - Return true if this expression is a valid integer /// constant expression, and, if so, return its value in Result. If not a /// valid i-c-e, return false and fill in Loc (if specified) with the location /// of the invalid expression. /// /// Note: This does not perform the implicit conversions required by C++11 /// [expr.const]p5. bool isIntegerConstantExpr(llvm::APSInt &Result, const ASTContext &Ctx, SourceLocation *Loc = nullptr, bool isEvaluated = true) const; bool isIntegerConstantExpr(const ASTContext &Ctx, SourceLocation *Loc = nullptr) const; /// isCXX98IntegralConstantExpr - Return true if this expression is an /// integral constant expression in C++98. Can only be used in C++. bool isCXX98IntegralConstantExpr(const ASTContext &Ctx) const; /// isCXX11ConstantExpr - Return true if this expression is a constant /// expression in C++11. Can only be used in C++. /// /// Note: This does not perform the implicit conversions required by C++11 /// [expr.const]p5. bool isCXX11ConstantExpr(const ASTContext &Ctx, APValue *Result = nullptr, SourceLocation *Loc = nullptr) const; /// isPotentialConstantExpr - Return true if this function's definition /// might be usable in a constant expression in C++11, if it were marked /// constexpr. Return false if the function can never produce a constant /// expression, along with diagnostics describing why not. static bool isPotentialConstantExpr(const FunctionDecl *FD, SmallVectorImpl< PartialDiagnosticAt> &Diags); /// isPotentialConstantExprUnevaluted - Return true if this expression might /// be usable in a constant expression in C++11 in an unevaluated context, if /// it were in function FD marked constexpr. Return false if the function can /// never produce a constant expression, along with diagnostics describing /// why not. static bool isPotentialConstantExprUnevaluated(Expr *E, const FunctionDecl *FD, SmallVectorImpl< PartialDiagnosticAt> &Diags); /// isConstantInitializer - Returns true if this expression can be emitted to /// IR as a constant, and thus can be used as a constant initializer in C. /// If this expression is not constant and Culprit is non-null, /// it is used to store the address of first non constant expr. bool isConstantInitializer(ASTContext &Ctx, bool ForRef, const Expr **Culprit = nullptr) const; /// EvalStatus is a struct with detailed info about an evaluation in progress. struct EvalStatus { /// Whether the evaluated expression has side effects. /// For example, (f() && 0) can be folded, but it still has side effects. bool HasSideEffects; /// Whether the evaluation hit undefined behavior. /// For example, 1.0 / 0.0 can be folded to Inf, but has undefined behavior. /// Likewise, INT_MAX + 1 can be folded to INT_MIN, but has UB. bool HasUndefinedBehavior; /// Diag - If this is non-null, it will be filled in with a stack of notes /// indicating why evaluation failed (or why it failed to produce a constant /// expression). /// If the expression is unfoldable, the notes will indicate why it's not /// foldable. If the expression is foldable, but not a constant expression, /// the notes will describes why it isn't a constant expression. If the /// expression *is* a constant expression, no notes will be produced. SmallVectorImpl *Diag; EvalStatus() : HasSideEffects(false), HasUndefinedBehavior(false), Diag(nullptr) {} // hasSideEffects - Return true if the evaluated expression has // side effects. bool hasSideEffects() const { return HasSideEffects; } }; /// EvalResult is a struct with detailed info about an evaluated expression. struct EvalResult : EvalStatus { /// Val - This is the value the expression can be folded to. APValue Val; // isGlobalLValue - Return true if the evaluated lvalue expression // is global. bool isGlobalLValue() const; }; /// EvaluateAsRValue - Return true if this is a constant which we can fold to /// an rvalue using any crazy technique (that has nothing to do with language /// standards) that we want to, even if the expression has side-effects. If /// this function returns true, it returns the folded constant in Result. If /// the expression is a glvalue, an lvalue-to-rvalue conversion will be /// applied. bool EvaluateAsRValue(EvalResult &Result, const ASTContext &Ctx, bool InConstantContext = false) const; /// EvaluateAsBooleanCondition - Return true if this is a constant /// which we can fold and convert to a boolean condition using /// any crazy technique that we want to, even if the expression has /// side-effects. bool EvaluateAsBooleanCondition(bool &Result, const ASTContext &Ctx) const; enum SideEffectsKind { SE_NoSideEffects, ///< Strictly evaluate the expression. SE_AllowUndefinedBehavior, ///< Allow UB that we can give a value, but not ///< arbitrary unmodeled side effects. SE_AllowSideEffects ///< Allow any unmodeled side effect. }; /// EvaluateAsInt - Return true if this is a constant which we can fold and /// convert to an integer, using any crazy technique that we want to. bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects = SE_NoSideEffects) const; /// EvaluateAsFloat - Return true if this is a constant which we can fold and /// convert to a floating point value, using any crazy technique that we /// want to. bool EvaluateAsFloat(llvm::APFloat &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects = SE_NoSideEffects) const; /// isEvaluatable - Call EvaluateAsRValue to see if this expression can be /// constant folded without side-effects, but discard the result. bool isEvaluatable(const ASTContext &Ctx, SideEffectsKind AllowSideEffects = SE_NoSideEffects) const; /// HasSideEffects - This routine returns true for all those expressions /// which have any effect other than producing a value. Example is a function /// call, volatile variable read, or throwing an exception. If /// IncludePossibleEffects is false, this call treats certain expressions with /// potential side effects (such as function call-like expressions, /// instantiation-dependent expressions, or invocations from a macro) as not /// having side effects. bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects = true) const; /// Determine whether this expression involves a call to any function /// that is not trivial. bool hasNonTrivialCall(const ASTContext &Ctx) const; /// EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded /// integer. This must be called on an expression that constant folds to an /// integer. llvm::APSInt EvaluateKnownConstInt( const ASTContext &Ctx, SmallVectorImpl *Diag = nullptr) const; llvm::APSInt EvaluateKnownConstIntCheckOverflow( const ASTContext &Ctx, SmallVectorImpl *Diag = nullptr) const; void EvaluateForOverflow(const ASTContext &Ctx) const; /// EvaluateAsLValue - Evaluate an expression to see if we can fold it to an /// lvalue with link time known address, with no side-effects. bool EvaluateAsLValue(EvalResult &Result, const ASTContext &Ctx) const; /// EvaluateAsInitializer - Evaluate an expression as if it were the /// initializer of the given declaration. Returns true if the initializer /// can be folded to a constant, and produces any relevant notes. In C++11, /// notes will be produced if the expression is not a constant expression. bool EvaluateAsInitializer(APValue &Result, const ASTContext &Ctx, const VarDecl *VD, SmallVectorImpl &Notes) const; /// EvaluateWithSubstitution - Evaluate an expression as if from the context /// of a call to the given function with the given arguments, inside an /// unevaluated context. Returns true if the expression could be folded to a /// constant. bool EvaluateWithSubstitution(APValue &Value, ASTContext &Ctx, const FunctionDecl *Callee, ArrayRef Args, const Expr *This = nullptr) const; /// Indicates how the constant expression will be used. enum ConstExprUsage { EvaluateForCodeGen, EvaluateForMangling }; /// Evaluate an expression that is required to be a constant expression. bool EvaluateAsConstantExpr(EvalResult &Result, ConstExprUsage Usage, const ASTContext &Ctx) const; /// If the current Expr is a pointer, this will try to statically /// determine the number of bytes available where the pointer is pointing. /// Returns true if all of the above holds and we were able to figure out the /// size, false otherwise. /// /// \param Type - How to evaluate the size of the Expr, as defined by the /// "type" parameter of __builtin_object_size bool tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx, unsigned Type) const; /// Enumeration used to describe the kind of Null pointer constant /// returned from \c isNullPointerConstant(). enum NullPointerConstantKind { /// Expression is not a Null pointer constant. NPCK_NotNull = 0, /// Expression is a Null pointer constant built from a zero integer /// expression that is not a simple, possibly parenthesized, zero literal. /// C++ Core Issue 903 will classify these expressions as "not pointers" /// once it is adopted. /// http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#903 NPCK_ZeroExpression, /// Expression is a Null pointer constant built from a literal zero. NPCK_ZeroLiteral, /// Expression is a C++11 nullptr. NPCK_CXX11_nullptr, /// Expression is a GNU-style __null constant. NPCK_GNUNull }; /// Enumeration used to describe how \c isNullPointerConstant() /// should cope with value-dependent expressions. enum NullPointerConstantValueDependence { /// Specifies that the expression should never be value-dependent. NPC_NeverValueDependent = 0, /// Specifies that a value-dependent expression of integral or /// dependent type should be considered a null pointer constant. NPC_ValueDependentIsNull, /// Specifies that a value-dependent expression should be considered /// to never be a null pointer constant. NPC_ValueDependentIsNotNull }; /// isNullPointerConstant - C99 6.3.2.3p3 - Test if this reduces down to /// a Null pointer constant. The return value can further distinguish the /// kind of NULL pointer constant that was detected. NullPointerConstantKind isNullPointerConstant( ASTContext &Ctx, NullPointerConstantValueDependence NPC) const; /// isOBJCGCCandidate - Return true if this expression may be used in a read/ /// write barrier. bool isOBJCGCCandidate(ASTContext &Ctx) const; /// Returns true if this expression is a bound member function. bool isBoundMemberFunction(ASTContext &Ctx) const; /// Given an expression of bound-member type, find the type /// of the member. Returns null if this is an *overloaded* bound /// member expression. static QualType findBoundMemberType(const Expr *expr); /// IgnoreImpCasts - Skip past any implicit casts which might /// surround this expression. Only skips ImplicitCastExprs. Expr *IgnoreImpCasts() LLVM_READONLY; /// IgnoreImplicit - Skip past any implicit AST nodes which might /// surround this expression. Expr *IgnoreImplicit() LLVM_READONLY { return cast(Stmt::IgnoreImplicit()); } const Expr *IgnoreImplicit() const LLVM_READONLY { return const_cast(this)->IgnoreImplicit(); } /// IgnoreParens - Ignore parentheses. If this Expr is a ParenExpr, return /// its subexpression. If that subexpression is also a ParenExpr, /// then this method recursively returns its subexpression, and so forth. /// Otherwise, the method returns the current Expr. Expr *IgnoreParens() LLVM_READONLY; /// IgnoreParenCasts - Ignore parentheses and casts. Strip off any ParenExpr /// or CastExprs, returning their operand. Expr *IgnoreParenCasts() LLVM_READONLY; /// Ignore casts. Strip off any CastExprs, returning their operand. Expr *IgnoreCasts() LLVM_READONLY; /// IgnoreParenImpCasts - Ignore parentheses and implicit casts. Strip off /// any ParenExpr or ImplicitCastExprs, returning their operand. Expr *IgnoreParenImpCasts() LLVM_READONLY; /// IgnoreConversionOperator - Ignore conversion operator. If this Expr is a /// call to a conversion operator, return the argument. Expr *IgnoreConversionOperator() LLVM_READONLY; const Expr *IgnoreConversionOperator() const LLVM_READONLY { return const_cast(this)->IgnoreConversionOperator(); } const Expr *IgnoreParenImpCasts() const LLVM_READONLY { return const_cast(this)->IgnoreParenImpCasts(); } /// Ignore parentheses and lvalue casts. Strip off any ParenExpr and /// CastExprs that represent lvalue casts, returning their operand. Expr *IgnoreParenLValueCasts() LLVM_READONLY; const Expr *IgnoreParenLValueCasts() const LLVM_READONLY { return const_cast(this)->IgnoreParenLValueCasts(); } /// IgnoreParenNoopCasts - Ignore parentheses and casts that do not change the /// value (including ptr->int casts of the same size). Strip off any /// ParenExpr or CastExprs, returning their operand. Expr *IgnoreParenNoopCasts(ASTContext &Ctx) LLVM_READONLY; /// Ignore parentheses and derived-to-base casts. Expr *ignoreParenBaseCasts() LLVM_READONLY; const Expr *ignoreParenBaseCasts() const LLVM_READONLY { return const_cast(this)->ignoreParenBaseCasts(); } /// Determine whether this expression is a default function argument. /// /// Default arguments are implicitly generated in the abstract syntax tree /// by semantic analysis for function calls, object constructions, etc. in /// C++. Default arguments are represented by \c CXXDefaultArgExpr nodes; /// this routine also looks through any implicit casts to determine whether /// the expression is a default argument. bool isDefaultArgument() const; /// Determine whether the result of this expression is a /// temporary object of the given class type. bool isTemporaryObject(ASTContext &Ctx, const CXXRecordDecl *TempTy) const; /// Whether this expression is an implicit reference to 'this' in C++. bool isImplicitCXXThis() const; const Expr *IgnoreImpCasts() const LLVM_READONLY { return const_cast(this)->IgnoreImpCasts(); } const Expr *IgnoreParens() const LLVM_READONLY { return const_cast(this)->IgnoreParens(); } const Expr *IgnoreParenCasts() const LLVM_READONLY { return const_cast(this)->IgnoreParenCasts(); } /// Strip off casts, but keep parentheses. const Expr *IgnoreCasts() const LLVM_READONLY { return const_cast(this)->IgnoreCasts(); } const Expr *IgnoreParenNoopCasts(ASTContext &Ctx) const LLVM_READONLY { return const_cast(this)->IgnoreParenNoopCasts(Ctx); } static bool hasAnyTypeDependentArguments(ArrayRef Exprs); /// For an expression of class type or pointer to class type, /// return the most derived class decl the expression is known to refer to. /// /// If this expression is a cast, this method looks through it to find the /// most derived decl that can be inferred from the expression. /// This is valid because derived-to-base conversions have undefined /// behavior if the object isn't dynamically of the derived type. const CXXRecordDecl *getBestDynamicClassType() const; /// Get the inner expression that determines the best dynamic class. /// If this is a prvalue, we guarantee that it is of the most-derived type /// for the object itself. const Expr *getBestDynamicClassTypeExpr() const; /// Walk outwards from an expression we want to bind a reference to and /// find the expression whose lifetime needs to be extended. Record /// the LHSs of comma expressions and adjustments needed along the path. const Expr *skipRValueSubobjectAdjustments( SmallVectorImpl &CommaLHS, SmallVectorImpl &Adjustments) const; const Expr *skipRValueSubobjectAdjustments() const { SmallVector CommaLHSs; SmallVector Adjustments; return skipRValueSubobjectAdjustments(CommaLHSs, Adjustments); } static bool classof(const Stmt *T) { return T->getStmtClass() >= firstExprConstant && T->getStmtClass() <= lastExprConstant; } }; //===----------------------------------------------------------------------===// // Wrapper Expressions. //===----------------------------------------------------------------------===// /// FullExpr - Represents a "full-expression" node. class FullExpr : public Expr { protected: Stmt *SubExpr; FullExpr(StmtClass SC, Expr *subexpr) : Expr(SC, subexpr->getType(), subexpr->getValueKind(), subexpr->getObjectKind(), subexpr->isTypeDependent(), subexpr->isValueDependent(), subexpr->isInstantiationDependent(), subexpr->containsUnexpandedParameterPack()), SubExpr(subexpr) {} FullExpr(StmtClass SC, EmptyShell Empty) : Expr(SC, Empty) {} public: const Expr *getSubExpr() const { return cast(SubExpr); } Expr *getSubExpr() { return cast(SubExpr); } /// As with any mutator of the AST, be very careful when modifying an /// existing AST to preserve its invariants. void setSubExpr(Expr *E) { SubExpr = E; } static bool classof(const Stmt *T) { return T->getStmtClass() >= firstFullExprConstant && T->getStmtClass() <= lastFullExprConstant; } }; /// ConstantExpr - An expression that occurs in a constant context. class ConstantExpr : public FullExpr { ConstantExpr(Expr *subexpr) : FullExpr(ConstantExprClass, subexpr) {} public: static ConstantExpr *Create(const ASTContext &Context, Expr *E) { assert(!isa(E)); return new (Context) ConstantExpr(E); } /// Build an empty constant expression wrapper. explicit ConstantExpr(EmptyShell Empty) : FullExpr(ConstantExprClass, Empty) {} SourceLocation getBeginLoc() const LLVM_READONLY { return SubExpr->getBeginLoc(); } SourceLocation getEndLoc() const LLVM_READONLY { return SubExpr->getEndLoc(); } static bool classof(const Stmt *T) { return T->getStmtClass() == ConstantExprClass; } // Iterators child_range children() { return child_range(&SubExpr, &SubExpr+1); } const_child_range children() const { return const_child_range(&SubExpr, &SubExpr + 1); } }; //===----------------------------------------------------------------------===// // Primary Expressions. //===----------------------------------------------------------------------===// /// OpaqueValueExpr - An expression referring to an opaque object of a /// fixed type and value class. These don't correspond to concrete /// syntax; instead they're used to express operations (usually copy /// operations) on values whose source is generally obvious from /// context. class OpaqueValueExpr : public Expr { friend class ASTStmtReader; Expr *SourceExpr; public: OpaqueValueExpr(SourceLocation Loc, QualType T, ExprValueKind VK, ExprObjectKind OK = OK_Ordinary, Expr *SourceExpr = nullptr) : Expr(OpaqueValueExprClass, T, VK, OK, T->isDependentType() || (SourceExpr && SourceExpr->isTypeDependent()), T->isDependentType() || (SourceExpr && SourceExpr->isValueDependent()), T->isInstantiationDependentType() || (SourceExpr && SourceExpr->isInstantiationDependent()), false), SourceExpr(SourceExpr) { setIsUnique(false); OpaqueValueExprBits.Loc = Loc; } /// Given an expression which invokes a copy constructor --- i.e. a /// CXXConstructExpr, possibly wrapped in an ExprWithCleanups --- /// find the OpaqueValueExpr that's the source of the construction. static const OpaqueValueExpr *findInCopyConstruct(const Expr *expr); explicit OpaqueValueExpr(EmptyShell Empty) : Expr(OpaqueValueExprClass, Empty) {} /// Retrieve the location of this expression. SourceLocation getLocation() const { return OpaqueValueExprBits.Loc; } SourceLocation getBeginLoc() const LLVM_READONLY { return SourceExpr ? SourceExpr->getBeginLoc() : getLocation(); } SourceLocation getEndLoc() const LLVM_READONLY { return SourceExpr ? SourceExpr->getEndLoc() : getLocation(); } SourceLocation getExprLoc() const LLVM_READONLY { return SourceExpr ? SourceExpr->getExprLoc() : getLocation(); } child_range children() { return child_range(child_iterator(), child_iterator()); } const_child_range children() const { return const_child_range(const_child_iterator(), const_child_iterator()); } /// The source expression of an opaque value expression is the /// expression which originally generated the value. This is /// provided as a convenience for analyses that don't wish to /// precisely model the execution behavior of the program. /// /// The source expression is typically set when building the /// expression which binds the opaque value expression in the first /// place. Expr *getSourceExpr() const { return SourceExpr; } void setIsUnique(bool V) { assert((!V || SourceExpr) && "unique OVEs are expected to have source expressions"); OpaqueValueExprBits.IsUnique = V; } bool isUnique() const { return OpaqueValueExprBits.IsUnique; } static bool classof(const Stmt *T) { return T->getStmtClass() == OpaqueValueExprClass; } }; /// A reference to a declared variable, function, enum, etc. /// [C99 6.5.1p2] /// /// This encodes all the information about how a declaration is referenced /// within an expression. /// /// There are several optional constructs attached to DeclRefExprs only when /// they apply in order to conserve memory. These are laid out past the end of /// the object, and flags in the DeclRefExprBitfield track whether they exist: /// /// DeclRefExprBits.HasQualifier: /// Specifies when this declaration reference expression has a C++ /// nested-name-specifier. /// DeclRefExprBits.HasFoundDecl: /// Specifies when this declaration reference expression has a record of /// a NamedDecl (different from the referenced ValueDecl) which was found /// during name lookup and/or overload resolution. /// DeclRefExprBits.HasTemplateKWAndArgsInfo: /// Specifies when this declaration reference expression has an explicit /// C++ template keyword and/or template argument list. /// DeclRefExprBits.RefersToEnclosingVariableOrCapture /// Specifies when this declaration reference expression (validly) /// refers to an enclosed local or a captured variable. class DeclRefExpr final : public Expr, private llvm::TrailingObjects { friend class ASTStmtReader; friend class ASTStmtWriter; friend TrailingObjects; /// The declaration that we are referencing. ValueDecl *D; /// Provides source/type location info for the declaration name /// embedded in D. DeclarationNameLoc DNLoc; size_t numTrailingObjects(OverloadToken) const { return hasQualifier(); } size_t numTrailingObjects(OverloadToken) const { return hasFoundDecl(); } size_t numTrailingObjects(OverloadToken) const { return hasTemplateKWAndArgsInfo(); } /// Test whether there is a distinct FoundDecl attached to the end of /// this DRE. bool hasFoundDecl() const { return DeclRefExprBits.HasFoundDecl; } DeclRefExpr(const ASTContext &Ctx, NestedNameSpecifierLoc QualifierLoc, SourceLocation TemplateKWLoc, ValueDecl *D, bool RefersToEnlosingVariableOrCapture, const DeclarationNameInfo &NameInfo, NamedDecl *FoundD, const TemplateArgumentListInfo *TemplateArgs, QualType T, ExprValueKind VK); /// Construct an empty declaration reference expression. explicit DeclRefExpr(EmptyShell Empty) : Expr(DeclRefExprClass, Empty) {} /// Computes the type- and value-dependence flags for this /// declaration reference expression. void computeDependence(const ASTContext &Ctx); public: DeclRefExpr(const ASTContext &Ctx, ValueDecl *D, bool RefersToEnclosingVariableOrCapture, QualType T, ExprValueKind VK, SourceLocation L, const DeclarationNameLoc &LocInfo = DeclarationNameLoc()); static DeclRefExpr * Create(const ASTContext &Context, NestedNameSpecifierLoc QualifierLoc, SourceLocation TemplateKWLoc, ValueDecl *D, bool RefersToEnclosingVariableOrCapture, SourceLocation NameLoc, QualType T, ExprValueKind VK, NamedDecl *FoundD = nullptr, const TemplateArgumentListInfo *TemplateArgs = nullptr); static DeclRefExpr * Create(const ASTContext &Context, NestedNameSpecifierLoc QualifierLoc, SourceLocation TemplateKWLoc, ValueDecl *D, bool RefersToEnclosingVariableOrCapture, const DeclarationNameInfo &NameInfo, QualType T, ExprValueKind VK, NamedDecl *FoundD = nullptr, const TemplateArgumentListInfo *TemplateArgs = nullptr); /// Construct an empty declaration reference expression. static DeclRefExpr *CreateEmpty(const ASTContext &Context, bool HasQualifier, bool HasFoundDecl, bool HasTemplateKWAndArgsInfo, unsigned NumTemplateArgs); ValueDecl *getDecl() { return D; } const ValueDecl *getDecl() const { return D; } void setDecl(ValueDecl *NewD) { D = NewD; } DeclarationNameInfo getNameInfo() const { return DeclarationNameInfo(getDecl()->getDeclName(), getLocation(), DNLoc); } SourceLocation getLocation() const { return DeclRefExprBits.Loc; } void setLocation(SourceLocation L) { DeclRefExprBits.Loc = L; } SourceLocation getBeginLoc() const LLVM_READONLY; SourceLocation getEndLoc() const LLVM_READONLY; /// Determine whether this declaration reference was preceded by a /// C++ nested-name-specifier, e.g., \c N::foo. bool hasQualifier() const { return DeclRefExprBits.HasQualifier; } /// If the name was qualified, retrieves the nested-name-specifier /// that precedes the name, with source-location information. NestedNameSpecifierLoc getQualifierLoc() const { if (!hasQualifier()) return NestedNameSpecifierLoc(); return *getTrailingObjects(); } /// If the name was qualified, retrieves the nested-name-specifier /// that precedes the name. Otherwise, returns NULL. NestedNameSpecifier *getQualifier() const { return getQualifierLoc().getNestedNameSpecifier(); } /// Get the NamedDecl through which this reference occurred. /// /// This Decl may be different from the ValueDecl actually referred to in the /// presence of using declarations, etc. It always returns non-NULL, and may /// simple return the ValueDecl when appropriate. NamedDecl *getFoundDecl() { return hasFoundDecl() ? *getTrailingObjects() : D; } /// Get the NamedDecl through which this reference occurred. /// See non-const variant. const NamedDecl *getFoundDecl() const { return hasFoundDecl() ? *getTrailingObjects() : D; } bool hasTemplateKWAndArgsInfo() const { return DeclRefExprBits.HasTemplateKWAndArgsInfo; } /// Retrieve the location of the template keyword preceding /// this name, if any. SourceLocation getTemplateKeywordLoc() const { if (!hasTemplateKWAndArgsInfo()) return SourceLocation(); return getTrailingObjects()->TemplateKWLoc; } /// Retrieve the location of the left angle bracket starting the /// explicit template argument list following the name, if any. SourceLocation getLAngleLoc() const { if (!hasTemplateKWAndArgsInfo()) return SourceLocation(); return getTrailingObjects()->LAngleLoc; } /// Retrieve the location of the right angle bracket ending the /// explicit template argument list following the name, if any. SourceLocation getRAngleLoc() const { if (!hasTemplateKWAndArgsInfo()) return SourceLocation(); return getTrailingObjects()->RAngleLoc; } /// Determines whether the name in this declaration reference /// was preceded by the template keyword. bool hasTemplateKeyword() const { return getTemplateKeywordLoc().isValid(); } /// Determines whether this declaration reference was followed by an /// explicit template argument list. bool hasExplicitTemplateArgs() const { return getLAngleLoc().isValid(); } /// Copies the template arguments (if present) into the given /// structure. void copyTemplateArgumentsInto(TemplateArgumentListInfo &List) const { if (hasExplicitTemplateArgs()) getTrailingObjects()->copyInto( getTrailingObjects(), List); } /// Retrieve the template arguments provided as part of this /// template-id. const TemplateArgumentLoc *getTemplateArgs() const { if (!hasExplicitTemplateArgs()) return nullptr; return getTrailingObjects(); } /// Retrieve the number of template arguments provided as part of this /// template-id. unsigned getNumTemplateArgs() const { if (!hasExplicitTemplateArgs()) return 0; return getTrailingObjects()->NumTemplateArgs; } ArrayRef template_arguments() const { return {getTemplateArgs(), getNumTemplateArgs()}; } /// Returns true if this expression refers to a function that /// was resolved from an overloaded set having size greater than 1. bool hadMultipleCandidates() const { return DeclRefExprBits.HadMultipleCandidates; } /// Sets the flag telling whether this expression refers to /// a function that was resolved from an overloaded set having size /// greater than 1. void setHadMultipleCandidates(bool V = true) { DeclRefExprBits.HadMultipleCandidates = V; } /// Does this DeclRefExpr refer to an enclosing local or a captured /// variable? bool refersToEnclosingVariableOrCapture() const { return DeclRefExprBits.RefersToEnclosingVariableOrCapture; } static bool classof(const Stmt *T) { return T->getStmtClass() == DeclRefExprClass; } // Iterators child_range children() { return child_range(child_iterator(), child_iterator()); } const_child_range children() const { return const_child_range(const_child_iterator(), const_child_iterator()); } }; /// Used by IntegerLiteral/FloatingLiteral to store the numeric without /// leaking memory. /// /// For large floats/integers, APFloat/APInt will allocate memory from the heap /// to represent these numbers. Unfortunately, when we use a BumpPtrAllocator /// to allocate IntegerLiteral/FloatingLiteral nodes the memory associated with /// the APFloat/APInt values will never get freed. APNumericStorage uses /// ASTContext's allocator for memory allocation. class APNumericStorage { union { uint64_t VAL; ///< Used to store the <= 64 bits integer value. uint64_t *pVal; ///< Used to store the >64 bits integer value. }; unsigned BitWidth; bool hasAllocation() const { return llvm::APInt::getNumWords(BitWidth) > 1; } APNumericStorage(const APNumericStorage &) = delete; void operator=(const APNumericStorage &) = delete; protected: APNumericStorage() : VAL(0), BitWidth(0) { } llvm::APInt getIntValue() const { unsigned NumWords = llvm::APInt::getNumWords(BitWidth); if (NumWords > 1) return llvm::APInt(BitWidth, NumWords, pVal); else return llvm::APInt(BitWidth, VAL); } void setIntValue(const ASTContext &C, const llvm::APInt &Val); }; class APIntStorage : private APNumericStorage { public: llvm::APInt getValue() const { return getIntValue(); } void setValue(const ASTContext &C, const llvm::APInt &Val) { setIntValue(C, Val); } }; class APFloatStorage : private APNumericStorage { public: llvm::APFloat getValue(const llvm::fltSemantics &Semantics) const { return llvm::APFloat(Semantics, getIntValue()); } void setValue(const ASTContext &C, const llvm::APFloat &Val) { setIntValue(C, Val.bitcastToAPInt()); } }; class IntegerLiteral : public Expr, public APIntStorage { SourceLocation Loc; /// Construct an empty integer literal. explicit IntegerLiteral(EmptyShell Empty) : Expr(IntegerLiteralClass, Empty) { } public: // type should be IntTy, LongTy, LongLongTy, UnsignedIntTy, UnsignedLongTy, // or UnsignedLongLongTy IntegerLiteral(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l); /// Returns a new integer literal with value 'V' and type 'type'. /// \param type - either IntTy, LongTy, LongLongTy, UnsignedIntTy, /// UnsignedLongTy, or UnsignedLongLongTy which should match the size of V /// \param V - the value that the returned integer literal contains. static IntegerLiteral *Create(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l); /// Returns a new empty integer literal. static IntegerLiteral *Create(const ASTContext &C, EmptyShell Empty); SourceLocation getBeginLoc() const LLVM_READONLY { return Loc; } SourceLocation getEndLoc() const LLVM_READONLY { return Loc; } /// Retrieve the location of the literal. SourceLocation getLocation() const { return Loc; } void setLocation(SourceLocation Location) { Loc = Location; } static bool classof(const Stmt *T) { return T->getStmtClass() == IntegerLiteralClass; } // Iterators child_range children() { return child_range(child_iterator(), child_iterator()); } const_child_range children() const { return const_child_range(const_child_iterator(), const_child_iterator()); } }; class FixedPointLiteral : public Expr, public APIntStorage { SourceLocation Loc; unsigned Scale; /// \brief Construct an empty integer literal. explicit FixedPointLiteral(EmptyShell Empty) : Expr(FixedPointLiteralClass, Empty) {} public: FixedPointLiteral(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l, unsigned Scale); // Store the int as is without any bit shifting. static FixedPointLiteral *CreateFromRawInt(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l, unsigned Scale); SourceLocation getBeginLoc() const LLVM_READONLY { return Loc; } SourceLocation getEndLoc() const LLVM_READONLY { return Loc; } /// \brief Retrieve the location of the literal. SourceLocation getLocation() const { return Loc; } void setLocation(SourceLocation Location) { Loc = Location; } static bool classof(const Stmt *T) { return T->getStmtClass() == FixedPointLiteralClass; } std::string getValueAsString(unsigned Radix) const; // Iterators child_range children() { return child_range(child_iterator(), child_iterator()); } const_child_range children() const { return const_child_range(const_child_iterator(), const_child_iterator()); } }; class CharacterLiteral : public Expr { public: enum CharacterKind { Ascii, Wide, UTF8, UTF16, UTF32 }; private: unsigned Value; SourceLocation Loc; public: // type should be IntTy CharacterLiteral(unsigned value, CharacterKind kind, QualType type, SourceLocation l) : Expr(CharacterLiteralClass, type, VK_RValue, OK_Ordinary, false, false, false, false), Value(value), Loc(l) { CharacterLiteralBits.Kind = kind; } /// Construct an empty character literal. CharacterLiteral(EmptyShell Empty) : Expr(CharacterLiteralClass, Empty) { } SourceLocation getLocation() const { return Loc; } CharacterKind getKind() const { return static_cast(CharacterLiteralBits.Kind); } SourceLocation getBeginLoc() const LLVM_READONLY { return Loc; } SourceLocation getEndLoc() const LLVM_READONLY { return Loc; } unsigned getValue() const { return Value; } void setLocation(SourceLocation Location) { Loc = Location; } void setKind(CharacterKind kind) { CharacterLiteralBits.Kind = kind; } void setValue(unsigned Val) { Value = Val; } static bool classof(const Stmt *T) { return T->getStmtClass() == CharacterLiteralClass; } // Iterators child_range children() { return child_range(child_iterator(), child_iterator()); } const_child_range children() const { return const_child_range(const_child_iterator(), const_child_iterator()); } }; class FloatingLiteral : public Expr, private APFloatStorage { SourceLocation Loc; FloatingLiteral(const ASTContext &C, const llvm::APFloat &V, bool isexact, QualType Type, SourceLocation L); /// Construct an empty floating-point literal. explicit FloatingLiteral(const ASTContext &C, EmptyShell Empty); public: static FloatingLiteral *Create(const ASTContext &C, const llvm::APFloat &V, bool isexact, QualType Type, SourceLocation L); static FloatingLiteral *Create(const ASTContext &C, EmptyShell Empty); llvm::APFloat getValue() const { return APFloatStorage::getValue(getSemantics()); } void setValue(const ASTContext &C, const llvm::APFloat &Val) { assert(&getSemantics() == &Val.getSemantics() && "Inconsistent semantics"); APFloatStorage::setValue(C, Val); } /// Get a raw enumeration value representing the floating-point semantics of /// this literal (32-bit IEEE, x87, ...), suitable for serialisation. APFloatSemantics getRawSemantics() const { return static_cast(FloatingLiteralBits.Semantics); } /// Set the raw enumeration value representing the floating-point semantics of /// this literal (32-bit IEEE, x87, ...), suitable for serialisation. void setRawSemantics(APFloatSemantics Sem) { FloatingLiteralBits.Semantics = Sem; } /// Return the APFloat semantics this literal uses. const llvm::fltSemantics &getSemantics() const; /// Set the APFloat semantics this literal uses. void setSemantics(const llvm::fltSemantics &Sem); bool isExact() const { return FloatingLiteralBits.IsExact; } void setExact(bool E) { FloatingLiteralBits.IsExact = E; } /// getValueAsApproximateDouble - This returns the value as an inaccurate /// double. Note that this may cause loss of precision, but is useful for /// debugging dumps, etc. double getValueAsApproximateDouble() const; SourceLocation getLocation() const { return Loc; } void setLocation(SourceLocation L) { Loc = L; } SourceLocation getBeginLoc() const LLVM_READONLY { return Loc; } SourceLocation getEndLoc() const LLVM_READONLY { return Loc; } static bool classof(const Stmt *T) { return T->getStmtClass() == FloatingLiteralClass; } // Iterators child_range children() { return child_range(child_iterator(), child_iterator()); } const_child_range children() const { return const_child_range(const_child_iterator(), const_child_iterator()); } }; /// ImaginaryLiteral - We support imaginary integer and floating point literals, /// like "1.0i". We represent these as a wrapper around FloatingLiteral and /// IntegerLiteral classes. Instances of this class always have a Complex type /// whose element type matches the subexpression. /// class ImaginaryLiteral : public Expr { Stmt *Val; public: ImaginaryLiteral(Expr *val, QualType Ty) : Expr(ImaginaryLiteralClass, Ty, VK_RValue, OK_Ordinary, false, false, false, false), Val(val) {} /// Build an empty imaginary literal. explicit ImaginaryLiteral(EmptyShell Empty) : Expr(ImaginaryLiteralClass, Empty) { } const Expr *getSubExpr() const { return cast(Val); } Expr *getSubExpr() { return cast(Val); } void setSubExpr(Expr *E) { Val = E; } SourceLocation getBeginLoc() const LLVM_READONLY { return Val->getBeginLoc(); } SourceLocation getEndLoc() const LLVM_READONLY { return Val->getEndLoc(); } static bool classof(const Stmt *T) { return T->getStmtClass() == ImaginaryLiteralClass; } // Iterators child_range children() { return child_range(&Val, &Val+1); } const_child_range children() const { return const_child_range(&Val, &Val + 1); } }; /// StringLiteral - This represents a string literal expression, e.g. "foo" /// or L"bar" (wide strings). The actual string data can be obtained with /// getBytes() and is NOT null-terminated. The length of the string data is /// determined by calling getByteLength(). /// /// The C type for a string is always a ConstantArrayType. In C++, the char /// type is const qualified, in C it is not. /// /// Note that strings in C can be formed by concatenation of multiple string /// literal pptokens in translation phase #6. This keeps track of the locations /// of each of these pieces. /// /// Strings in C can also be truncated and extended by assigning into arrays, /// e.g. with constructs like: /// char X[2] = "foobar"; /// In this case, getByteLength() will return 6, but the string literal will /// have type "char[2]". class StringLiteral final : public Expr, private llvm::TrailingObjects { friend class ASTStmtReader; friend TrailingObjects; /// StringLiteral is followed by several trailing objects. They are in order: /// /// * A single unsigned storing the length in characters of this string. The /// length in bytes is this length times the width of a single character. /// Always present and stored as a trailing objects because storing it in /// StringLiteral would increase the size of StringLiteral by sizeof(void *) /// due to alignment requirements. If you add some data to StringLiteral, /// consider moving it inside StringLiteral. /// /// * An array of getNumConcatenated() SourceLocation, one for each of the /// token this string is made of. /// /// * An array of getByteLength() char used to store the string data. public: enum StringKind { Ascii, Wide, UTF8, UTF16, UTF32 }; private: unsigned numTrailingObjects(OverloadToken) const { return 1; } unsigned numTrailingObjects(OverloadToken) const { return getNumConcatenated(); } unsigned numTrailingObjects(OverloadToken) const { return getByteLength(); } char *getStrDataAsChar() { return getTrailingObjects(); } const char *getStrDataAsChar() const { return getTrailingObjects(); } const uint16_t *getStrDataAsUInt16() const { return reinterpret_cast(getTrailingObjects()); } const uint32_t *getStrDataAsUInt32() const { return reinterpret_cast(getTrailingObjects()); } /// Build a string literal. StringLiteral(const ASTContext &Ctx, StringRef Str, StringKind Kind, bool Pascal, QualType Ty, const SourceLocation *Loc, unsigned NumConcatenated); /// Build an empty string literal. StringLiteral(EmptyShell Empty, unsigned NumConcatenated, unsigned Length, unsigned CharByteWidth); /// Map a target and string kind to the appropriate character width. static unsigned mapCharByteWidth(TargetInfo const &Target, StringKind SK); /// Set one of the string literal token. void setStrTokenLoc(unsigned TokNum, SourceLocation L) { assert(TokNum < getNumConcatenated() && "Invalid tok number"); getTrailingObjects()[TokNum] = L; } public: /// This is the "fully general" constructor that allows representation of /// strings formed from multiple concatenated tokens. static StringLiteral *Create(const ASTContext &Ctx, StringRef Str, StringKind Kind, bool Pascal, QualType Ty, const SourceLocation *Loc, unsigned NumConcatenated); /// Simple constructor for string literals made from one token. static StringLiteral *Create(const ASTContext &Ctx, StringRef Str, StringKind Kind, bool Pascal, QualType Ty, SourceLocation Loc) { return Create(Ctx, Str, Kind, Pascal, Ty, &Loc, 1); } /// Construct an empty string literal. static StringLiteral *CreateEmpty(const ASTContext &Ctx, unsigned NumConcatenated, unsigned Length, unsigned CharByteWidth); StringRef getString() const { assert(getCharByteWidth() == 1 && "This function is used in places that assume strings use char"); return StringRef(getStrDataAsChar(), getByteLength()); } /// Allow access to clients that need the byte representation, such as /// ASTWriterStmt::VisitStringLiteral(). StringRef getBytes() const { // FIXME: StringRef may not be the right type to use as a result for this. return StringRef(getStrDataAsChar(), getByteLength()); } void outputString(raw_ostream &OS) const; uint32_t getCodeUnit(size_t i) const { assert(i < getLength() && "out of bounds access"); switch (getCharByteWidth()) { case 1: return static_cast(getStrDataAsChar()[i]); case 2: return getStrDataAsUInt16()[i]; case 4: return getStrDataAsUInt32()[i]; } llvm_unreachable("Unsupported character width!"); } unsigned getByteLength() const { return getCharByteWidth() * getLength(); } unsigned getLength() const { return *getTrailingObjects(); } unsigned getCharByteWidth() const { return StringLiteralBits.CharByteWidth; } StringKind getKind() const { return static_cast(StringLiteralBits.Kind); } bool isAscii() const { return getKind() == Ascii; } bool isWide() const { return getKind() == Wide; } bool isUTF8() const { return getKind() == UTF8; } bool isUTF16() const { return getKind() == UTF16; } bool isUTF32() const { return getKind() == UTF32; } bool isPascal() const { return StringLiteralBits.IsPascal; } bool containsNonAscii() const { for (auto c : getString()) if (!isASCII(c)) return true; return false; } bool containsNonAsciiOrNull() const { for (auto c : getString()) if (!isASCII(c) || !c) return true; return false; } /// getNumConcatenated - Get the number of string literal tokens that were /// concatenated in translation phase #6 to form this string literal. unsigned getNumConcatenated() const { return StringLiteralBits.NumConcatenated; } /// Get one of the string literal token. SourceLocation getStrTokenLoc(unsigned TokNum) const { assert(TokNum < getNumConcatenated() && "Invalid tok number"); return getTrailingObjects()[TokNum]; } /// getLocationOfByte - Return a source location that points to the specified /// byte of this string literal. /// /// Strings are amazingly complex. They can be formed from multiple tokens /// and can have escape sequences in them in addition to the usual trigraph /// and escaped newline business. This routine handles this complexity. /// SourceLocation getLocationOfByte(unsigned ByteNo, const SourceManager &SM, const LangOptions &Features, const TargetInfo &Target, unsigned *StartToken = nullptr, unsigned *StartTokenByteOffset = nullptr) const; typedef const SourceLocation *tokloc_iterator; tokloc_iterator tokloc_begin() const { return getTrailingObjects(); } tokloc_iterator tokloc_end() const { return getTrailingObjects() + getNumConcatenated(); } SourceLocation getBeginLoc() const LLVM_READONLY { return *tokloc_begin(); } SourceLocation getEndLoc() const LLVM_READONLY { return *(tokloc_end() - 1); } static bool classof(const Stmt *T) { return T->getStmtClass() == StringLiteralClass; } // Iterators child_range children() { return child_range(child_iterator(), child_iterator()); } const_child_range children() const { return const_child_range(const_child_iterator(), const_child_iterator()); } }; /// [C99 6.4.2.2] - A predefined identifier such as __func__. class PredefinedExpr final : public Expr, private llvm::TrailingObjects { friend class ASTStmtReader; friend TrailingObjects; // PredefinedExpr is optionally followed by a single trailing // "Stmt *" for the predefined identifier. It is present if and only if // hasFunctionName() is true and is always a "StringLiteral *". public: enum IdentKind { Func, Function, LFunction, // Same as Function, but as wide string. FuncDName, FuncSig, LFuncSig, // Same as FuncSig, but as as wide string PrettyFunction, /// The same as PrettyFunction, except that the /// 'virtual' keyword is omitted for virtual member functions. PrettyFunctionNoVirtual }; private: PredefinedExpr(SourceLocation L, QualType FNTy, IdentKind IK, StringLiteral *SL); explicit PredefinedExpr(EmptyShell Empty, bool HasFunctionName); /// True if this PredefinedExpr has storage for a function name. bool hasFunctionName() const { return PredefinedExprBits.HasFunctionName; } void setFunctionName(StringLiteral *SL) { assert(hasFunctionName() && "This PredefinedExpr has no storage for a function name!"); *getTrailingObjects() = SL; } public: /// Create a PredefinedExpr. static PredefinedExpr *Create(const ASTContext &Ctx, SourceLocation L, QualType FNTy, IdentKind IK, StringLiteral *SL); /// Create an empty PredefinedExpr. static PredefinedExpr *CreateEmpty(const ASTContext &Ctx, bool HasFunctionName); IdentKind getIdentKind() const { return static_cast(PredefinedExprBits.Kind); } SourceLocation getLocation() const { return PredefinedExprBits.Loc; } void setLocation(SourceLocation L) { PredefinedExprBits.Loc = L; } StringLiteral *getFunctionName() { return hasFunctionName() ? static_cast(*getTrailingObjects()) : nullptr; } const StringLiteral *getFunctionName() const { return hasFunctionName() ? static_cast(*getTrailingObjects()) : nullptr; } static StringRef getIdentKindName(IdentKind IK); static std::string ComputeName(IdentKind IK, const Decl *CurrentDecl); SourceLocation getBeginLoc() const { return getLocation(); } SourceLocation getEndLoc() const { return getLocation(); } static bool classof(const Stmt *T) { return T->getStmtClass() == PredefinedExprClass; } // Iterators child_range children() { return child_range(getTrailingObjects(), getTrailingObjects() + hasFunctionName()); } }; /// ParenExpr - This represents a parethesized expression, e.g. "(1)". This /// AST node is only formed if full location information is requested. class ParenExpr : public Expr { SourceLocation L, R; Stmt *Val; public: ParenExpr(SourceLocation l, SourceLocation r, Expr *val) : Expr(ParenExprClass, val->getType(), val->getValueKind(), val->getObjectKind(), val->isTypeDependent(), val->isValueDependent(), val->isInstantiationDependent(), val->containsUnexpandedParameterPack()), L(l), R(r), Val(val) {} /// Construct an empty parenthesized expression. explicit ParenExpr(EmptyShell Empty) : Expr(ParenExprClass, Empty) { } const Expr *getSubExpr() const { return cast(Val); } Expr *getSubExpr() { return cast(Val); } void setSubExpr(Expr *E) { Val = E; } SourceLocation getBeginLoc() const LLVM_READONLY { return L; } SourceLocation getEndLoc() const LLVM_READONLY { return R; } /// Get the location of the left parentheses '('. SourceLocation getLParen() const { return L; } void setLParen(SourceLocation Loc) { L = Loc; } /// Get the location of the right parentheses ')'. SourceLocation getRParen() const { return R; } void setRParen(SourceLocation Loc) { R = Loc; } static bool classof(const Stmt *T) { return T->getStmtClass() == ParenExprClass; } // Iterators child_range children() { return child_range(&Val, &Val+1); } const_child_range children() const { return const_child_range(&Val, &Val + 1); } }; /// UnaryOperator - This represents the unary-expression's (except sizeof and /// alignof), the postinc/postdec operators from postfix-expression, and various /// extensions. /// /// Notes on various nodes: /// /// Real/Imag - These return the real/imag part of a complex operand. If /// applied to a non-complex value, the former returns its operand and the /// later returns zero in the type of the operand. /// class UnaryOperator : public Expr { Stmt *Val; public: typedef UnaryOperatorKind Opcode; UnaryOperator(Expr *input, Opcode opc, QualType type, ExprValueKind VK, ExprObjectKind OK, SourceLocation l, bool CanOverflow) : Expr(UnaryOperatorClass, type, VK, OK, input->isTypeDependent() || type->isDependentType(), input->isValueDependent(), (input->isInstantiationDependent() || type->isInstantiationDependentType()), input->containsUnexpandedParameterPack()), Val(input) { UnaryOperatorBits.Opc = opc; UnaryOperatorBits.CanOverflow = CanOverflow; UnaryOperatorBits.Loc = l; } /// Build an empty unary operator. explicit UnaryOperator(EmptyShell Empty) : Expr(UnaryOperatorClass, Empty) { UnaryOperatorBits.Opc = UO_AddrOf; } Opcode getOpcode() const { return static_cast(UnaryOperatorBits.Opc); } void setOpcode(Opcode Opc) { UnaryOperatorBits.Opc = Opc; } Expr *getSubExpr() const { return cast(Val); } void setSubExpr(Expr *E) { Val = E; } /// getOperatorLoc - Return the location of the operator. SourceLocation getOperatorLoc() const { return UnaryOperatorBits.Loc; } void setOperatorLoc(SourceLocation L) { UnaryOperatorBits.Loc = L; } /// Returns true if the unary operator can cause an overflow. For instance, /// signed int i = INT_MAX; i++; /// signed char c = CHAR_MAX; c++; /// Due to integer promotions, c++ is promoted to an int before the postfix /// increment, and the result is an int that cannot overflow. However, i++ /// can overflow. bool canOverflow() const { return UnaryOperatorBits.CanOverflow; } void setCanOverflow(bool C) { UnaryOperatorBits.CanOverflow = C; } /// isPostfix - Return true if this is a postfix operation, like x++. static bool isPostfix(Opcode Op) { return Op == UO_PostInc || Op == UO_PostDec; } /// isPrefix - Return true if this is a prefix operation, like --x. static bool isPrefix(Opcode Op) { return Op == UO_PreInc || Op == UO_PreDec; } bool isPrefix() const { return isPrefix(getOpcode()); } bool isPostfix() const { return isPostfix(getOpcode()); } static bool isIncrementOp(Opcode Op) { return Op == UO_PreInc || Op == UO_PostInc; } bool isIncrementOp() const { return isIncrementOp(getOpcode()); } static bool isDecrementOp(Opcode Op) { return Op == UO_PreDec || Op == UO_PostDec; } bool isDecrementOp() const { return isDecrementOp(getOpcode()); } static bool isIncrementDecrementOp(Opcode Op) { return Op <= UO_PreDec; } bool isIncrementDecrementOp() const { return isIncrementDecrementOp(getOpcode()); } static bool isArithmeticOp(Opcode Op) { return Op >= UO_Plus && Op <= UO_LNot; } bool isArithmeticOp() const { return isArithmeticOp(getOpcode()); } /// getOpcodeStr - Turn an Opcode enum value into the punctuation char it /// corresponds to, e.g. "sizeof" or "[pre]++" static StringRef getOpcodeStr(Opcode Op); /// Retrieve the unary opcode that corresponds to the given /// overloaded operator. static Opcode getOverloadedOpcode(OverloadedOperatorKind OO, bool Postfix); /// Retrieve the overloaded operator kind that corresponds to /// the given unary opcode. static OverloadedOperatorKind getOverloadedOperator(Opcode Opc); SourceLocation getBeginLoc() const LLVM_READONLY { return isPostfix() ? Val->getBeginLoc() : getOperatorLoc(); } SourceLocation getEndLoc() const LLVM_READONLY { return isPostfix() ? getOperatorLoc() : Val->getEndLoc(); } SourceLocation getExprLoc() const { return getOperatorLoc(); } static bool classof(const Stmt *T) { return T->getStmtClass() == UnaryOperatorClass; } // Iterators child_range children() { return child_range(&Val, &Val+1); } const_child_range children() const { return const_child_range(&Val, &Val + 1); } }; /// Helper class for OffsetOfExpr. // __builtin_offsetof(type, identifier(.identifier|[expr])*) class OffsetOfNode { public: /// The kind of offsetof node we have. enum Kind { /// An index into an array. Array = 0x00, /// A field. Field = 0x01, /// A field in a dependent type, known only by its name. Identifier = 0x02, /// An implicit indirection through a C++ base class, when the /// field found is in a base class. Base = 0x03 }; private: enum { MaskBits = 2, Mask = 0x03 }; /// The source range that covers this part of the designator. SourceRange Range; /// The data describing the designator, which comes in three /// different forms, depending on the lower two bits. /// - An unsigned index into the array of Expr*'s stored after this node /// in memory, for [constant-expression] designators. /// - A FieldDecl*, for references to a known field. /// - An IdentifierInfo*, for references to a field with a given name /// when the class type is dependent. /// - A CXXBaseSpecifier*, for references that look at a field in a /// base class. uintptr_t Data; public: /// Create an offsetof node that refers to an array element. OffsetOfNode(SourceLocation LBracketLoc, unsigned Index, SourceLocation RBracketLoc) : Range(LBracketLoc, RBracketLoc), Data((Index << 2) | Array) {} /// Create an offsetof node that refers to a field. OffsetOfNode(SourceLocation DotLoc, FieldDecl *Field, SourceLocation NameLoc) : Range(DotLoc.isValid() ? DotLoc : NameLoc, NameLoc), Data(reinterpret_cast(Field) | OffsetOfNode::Field) {} /// Create an offsetof node that refers to an identifier. OffsetOfNode(SourceLocation DotLoc, IdentifierInfo *Name, SourceLocation NameLoc) : Range(DotLoc.isValid() ? DotLoc : NameLoc, NameLoc), Data(reinterpret_cast(Name) | Identifier) {} /// Create an offsetof node that refers into a C++ base class. explicit OffsetOfNode(const CXXBaseSpecifier *Base) : Range(), Data(reinterpret_cast(Base) | OffsetOfNode::Base) {} /// Determine what kind of offsetof node this is. Kind getKind() const { return static_cast(Data & Mask); } /// For an array element node, returns the index into the array /// of expressions. unsigned getArrayExprIndex() const { assert(getKind() == Array); return Data >> 2; } /// For a field offsetof node, returns the field. FieldDecl *getField() const { assert(getKind() == Field); return reinterpret_cast(Data & ~(uintptr_t)Mask); } /// For a field or identifier offsetof node, returns the name of /// the field. IdentifierInfo *getFieldName() const; /// For a base class node, returns the base specifier. CXXBaseSpecifier *getBase() const { assert(getKind() == Base); return reinterpret_cast(Data & ~(uintptr_t)Mask); } /// Retrieve the source range that covers this offsetof node. /// /// For an array element node, the source range contains the locations of /// the square brackets. For a field or identifier node, the source range /// contains the location of the period (if there is one) and the /// identifier. SourceRange getSourceRange() const LLVM_READONLY { return Range; } SourceLocation getBeginLoc() const LLVM_READONLY { return Range.getBegin(); } SourceLocation getEndLoc() const LLVM_READONLY { return Range.getEnd(); } }; /// OffsetOfExpr - [C99 7.17] - This represents an expression of the form /// offsetof(record-type, member-designator). For example, given: /// @code /// struct S { /// float f; /// double d; /// }; /// struct T { /// int i; /// struct S s[10]; /// }; /// @endcode /// we can represent and evaluate the expression @c offsetof(struct T, s[2].d). class OffsetOfExpr final : public Expr, private llvm::TrailingObjects { SourceLocation OperatorLoc, RParenLoc; // Base type; TypeSourceInfo *TSInfo; // Number of sub-components (i.e. instances of OffsetOfNode). unsigned NumComps; // Number of sub-expressions (i.e. array subscript expressions). unsigned NumExprs; size_t numTrailingObjects(OverloadToken) const { return NumComps; } OffsetOfExpr(const ASTContext &C, QualType type, SourceLocation OperatorLoc, TypeSourceInfo *tsi, ArrayRef comps, ArrayRef exprs, SourceLocation RParenLoc); explicit OffsetOfExpr(unsigned numComps, unsigned numExprs) : Expr(OffsetOfExprClass, EmptyShell()), TSInfo(nullptr), NumComps(numComps), NumExprs(numExprs) {} public: static OffsetOfExpr *Create(const ASTContext &C, QualType type, SourceLocation OperatorLoc, TypeSourceInfo *tsi, ArrayRef comps, ArrayRef exprs, SourceLocation RParenLoc); static OffsetOfExpr *CreateEmpty(const ASTContext &C, unsigned NumComps, unsigned NumExprs); /// getOperatorLoc - Return the location of the operator. SourceLocation getOperatorLoc() const { return OperatorLoc; } void setOperatorLoc(SourceLocation L) { OperatorLoc = L; } /// Return the location of the right parentheses. SourceLocation getRParenLoc() const { return RParenLoc; } void setRParenLoc(SourceLocation R) { RParenLoc = R; } TypeSourceInfo *getTypeSourceInfo() const { return TSInfo; } void setTypeSourceInfo(TypeSourceInfo *tsi) { TSInfo = tsi; } const OffsetOfNode &getComponent(unsigned Idx) const { assert(Idx < NumComps && "Subscript out of range"); return getTrailingObjects()[Idx]; } void setComponent(unsigned Idx, OffsetOfNode ON) { assert(Idx < NumComps && "Subscript out of range"); getTrailingObjects()[Idx] = ON; } unsigned getNumComponents() const { return NumComps; } Expr* getIndexExpr(unsigned Idx) { assert(Idx < NumExprs && "Subscript out of range"); return getTrailingObjects()[Idx]; } const Expr *getIndexExpr(unsigned Idx) const { assert(Idx < NumExprs && "Subscript out of range"); return getTrailingObjects()[Idx]; } void setIndexExpr(unsigned Idx, Expr* E) { assert(Idx < NumComps && "Subscript out of range"); getTrailingObjects()[Idx] = E; } unsigned getNumExpressions() const { return NumExprs; } SourceLocation getBeginLoc() const LLVM_READONLY { return OperatorLoc; } SourceLocation getEndLoc() const LLVM_READONLY { return RParenLoc; } static bool classof(const Stmt *T) { return T->getStmtClass() == OffsetOfExprClass; } // Iterators child_range children() { Stmt **begin = reinterpret_cast(getTrailingObjects()); return child_range(begin, begin + NumExprs); } const_child_range children() const { Stmt *const *begin = reinterpret_cast(getTrailingObjects()); return const_child_range(begin, begin + NumExprs); } friend TrailingObjects; }; /// UnaryExprOrTypeTraitExpr - expression with either a type or (unevaluated) /// expression operand. Used for sizeof/alignof (C99 6.5.3.4) and /// vec_step (OpenCL 1.1 6.11.12). class UnaryExprOrTypeTraitExpr : public Expr { union { TypeSourceInfo *Ty; Stmt *Ex; } Argument; SourceLocation OpLoc, RParenLoc; public: UnaryExprOrTypeTraitExpr(UnaryExprOrTypeTrait ExprKind, TypeSourceInfo *TInfo, QualType resultType, SourceLocation op, SourceLocation rp) : Expr(UnaryExprOrTypeTraitExprClass, resultType, VK_RValue, OK_Ordinary, false, // Never type-dependent (C++ [temp.dep.expr]p3). // Value-dependent if the argument is type-dependent. TInfo->getType()->isDependentType(), TInfo->getType()->isInstantiationDependentType(), TInfo->getType()->containsUnexpandedParameterPack()), OpLoc(op), RParenLoc(rp) { UnaryExprOrTypeTraitExprBits.Kind = ExprKind; UnaryExprOrTypeTraitExprBits.IsType = true; Argument.Ty = TInfo; } UnaryExprOrTypeTraitExpr(UnaryExprOrTypeTrait ExprKind, Expr *E, QualType resultType, SourceLocation op, SourceLocation rp); /// Construct an empty sizeof/alignof expression. explicit UnaryExprOrTypeTraitExpr(EmptyShell Empty) : Expr(UnaryExprOrTypeTraitExprClass, Empty) { } UnaryExprOrTypeTrait getKind() const { return static_cast(UnaryExprOrTypeTraitExprBits.Kind); } void setKind(UnaryExprOrTypeTrait K) { UnaryExprOrTypeTraitExprBits.Kind = K;} bool isArgumentType() const { return UnaryExprOrTypeTraitExprBits.IsType; } QualType getArgumentType() const { return getArgumentTypeInfo()->getType(); } TypeSourceInfo *getArgumentTypeInfo() const { assert(isArgumentType() && "calling getArgumentType() when arg is expr"); return Argument.Ty; } Expr *getArgumentExpr() { assert(!isArgumentType() && "calling getArgumentExpr() when arg is type"); return static_cast(Argument.Ex); } const Expr *getArgumentExpr() const { return const_cast(this)->getArgumentExpr(); } void setArgument(Expr *E) { Argument.Ex = E; UnaryExprOrTypeTraitExprBits.IsType = false; } void setArgument(TypeSourceInfo *TInfo) { Argument.Ty = TInfo; UnaryExprOrTypeTraitExprBits.IsType = true; } /// Gets the argument type, or the type of the argument expression, whichever /// is appropriate. QualType getTypeOfArgument() const { return isArgumentType() ? getArgumentType() : getArgumentExpr()->getType(); } SourceLocation getOperatorLoc() const { return OpLoc; } void setOperatorLoc(SourceLocation L) { OpLoc = L; } SourceLocation getRParenLoc() const { return RParenLoc; } void setRParenLoc(SourceLocation L) { RParenLoc = L; } SourceLocation getBeginLoc() const LLVM_READONLY { return OpLoc; } SourceLocation getEndLoc() const LLVM_READONLY { return RParenLoc; } static bool classof(const Stmt *T) { return T->getStmtClass() == UnaryExprOrTypeTraitExprClass; } // Iterators child_range children(); const_child_range children() const; }; //===----------------------------------------------------------------------===// // Postfix Operators. //===----------------------------------------------------------------------===// /// ArraySubscriptExpr - [C99 6.5.2.1] Array Subscripting. class ArraySubscriptExpr : public Expr { enum { LHS, RHS, END_EXPR }; Stmt *SubExprs[END_EXPR]; bool lhsIsBase() const { return getRHS()->getType()->isIntegerType(); } public: ArraySubscriptExpr(Expr *lhs, Expr *rhs, QualType t, ExprValueKind VK, ExprObjectKind OK, SourceLocation rbracketloc) : Expr(ArraySubscriptExprClass, t, VK, OK, lhs->isTypeDependent() || rhs->isTypeDependent(), lhs->isValueDependent() || rhs->isValueDependent(), (lhs->isInstantiationDependent() || rhs->isInstantiationDependent()), (lhs->containsUnexpandedParameterPack() || rhs->containsUnexpandedParameterPack())) { SubExprs[LHS] = lhs; SubExprs[RHS] = rhs; ArraySubscriptExprBits.RBracketLoc = rbracketloc; } /// Create an empty array subscript expression. explicit ArraySubscriptExpr(EmptyShell Shell) : Expr(ArraySubscriptExprClass, Shell) { } /// An array access can be written A[4] or 4[A] (both are equivalent). /// - getBase() and getIdx() always present the normalized view: A[4]. /// In this case getBase() returns "A" and getIdx() returns "4". /// - getLHS() and getRHS() present the syntactic view. e.g. for /// 4[A] getLHS() returns "4". /// Note: Because vector element access is also written A[4] we must /// predicate the format conversion in getBase and getIdx only on the /// the type of the RHS, as it is possible for the LHS to be a vector of /// integer type Expr *getLHS() { return cast(SubExprs[LHS]); } const Expr *getLHS() const { return cast(SubExprs[LHS]); } void setLHS(Expr *E) { SubExprs[LHS] = E; } Expr *getRHS() { return cast(SubExprs[RHS]); } const Expr *getRHS() const { return cast(SubExprs[RHS]); } void setRHS(Expr *E) { SubExprs[RHS] = E; } Expr *getBase() { return lhsIsBase() ? getLHS() : getRHS(); } const Expr *getBase() const { return lhsIsBase() ? getLHS() : getRHS(); } Expr *getIdx() { return lhsIsBase() ? getRHS() : getLHS(); } const Expr *getIdx() const { return lhsIsBase() ? getRHS() : getLHS(); } SourceLocation getBeginLoc() const LLVM_READONLY { return getLHS()->getBeginLoc(); } SourceLocation getEndLoc() const { return getRBracketLoc(); } SourceLocation getRBracketLoc() const { return ArraySubscriptExprBits.RBracketLoc; } void setRBracketLoc(SourceLocation L) { ArraySubscriptExprBits.RBracketLoc = L; } SourceLocation getExprLoc() const LLVM_READONLY { return getBase()->getExprLoc(); } static bool classof(const Stmt *T) { return T->getStmtClass() == ArraySubscriptExprClass; } // Iterators child_range children() { return child_range(&SubExprs[0], &SubExprs[0]+END_EXPR); } const_child_range children() const { return const_child_range(&SubExprs[0], &SubExprs[0] + END_EXPR); } }; /// CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]). /// CallExpr itself represents a normal function call, e.g., "f(x, 2)", /// while its subclasses may represent alternative syntax that (semantically) /// results in a function call. For example, CXXOperatorCallExpr is /// a subclass for overloaded operator calls that use operator syntax, e.g., /// "str1 + str2" to resolve to a function call. class CallExpr : public Expr { enum { FN = 0, PREARGS_START = 1 }; /// The number of arguments in the call expression. unsigned NumArgs; /// The location of the right parenthese. This has a different meaning for /// the derived classes of CallExpr. SourceLocation RParenLoc; void updateDependenciesFromArg(Expr *Arg); // CallExpr store some data in trailing objects. However since CallExpr // is used a base of other expression classes we cannot use // llvm::TrailingObjects. Instead we manually perform the pointer arithmetic // and casts. // // The trailing objects are in order: // // * A single "Stmt *" for the callee expression. // // * An array of getNumPreArgs() "Stmt *" for the pre-argument expressions. // // * An array of getNumArgs() "Stmt *" for the argument expressions. // // Note that we store the offset in bytes from the this pointer to the start // of the trailing objects. It would be perfectly possible to compute it // based on the dynamic kind of the CallExpr. However 1.) we have plenty of // space in the bit-fields of Stmt. 2.) It was benchmarked to be faster to // compute this once and then load the offset from the bit-fields of Stmt, // instead of re-computing the offset each time the trailing objects are // accessed. /// Return a pointer to the start of the trailing array of "Stmt *". Stmt **getTrailingStmts() { return reinterpret_cast(reinterpret_cast(this) + CallExprBits.OffsetToTrailingObjects); } Stmt *const *getTrailingStmts() const { return const_cast(this)->getTrailingStmts(); } /// Map a statement class to the appropriate offset in bytes from the /// this pointer to the trailing objects. static unsigned offsetToTrailingObjects(StmtClass SC); public: enum class ADLCallKind : bool { NotADL, UsesADL }; static constexpr ADLCallKind NotADL = ADLCallKind::NotADL; static constexpr ADLCallKind UsesADL = ADLCallKind::UsesADL; protected: /// Build a call expression, assuming that appropriate storage has been /// allocated for the trailing objects. CallExpr(StmtClass SC, Expr *Fn, ArrayRef PreArgs, ArrayRef Args, QualType Ty, ExprValueKind VK, SourceLocation RParenLoc, unsigned MinNumArgs, ADLCallKind UsesADL); /// Build an empty call expression, for deserialization. CallExpr(StmtClass SC, unsigned NumPreArgs, unsigned NumArgs, EmptyShell Empty); /// Return the size in bytes needed for the trailing objects. /// Used by the derived classes to allocate the right amount of storage. static unsigned sizeOfTrailingObjects(unsigned NumPreArgs, unsigned NumArgs) { return (1 + NumPreArgs + NumArgs) * sizeof(Stmt *); } Stmt *getPreArg(unsigned I) { assert(I < getNumPreArgs() && "Prearg access out of range!"); return getTrailingStmts()[PREARGS_START + I]; } const Stmt *getPreArg(unsigned I) const { assert(I < getNumPreArgs() && "Prearg access out of range!"); return getTrailingStmts()[PREARGS_START + I]; } void setPreArg(unsigned I, Stmt *PreArg) { assert(I < getNumPreArgs() && "Prearg access out of range!"); getTrailingStmts()[PREARGS_START + I] = PreArg; } unsigned getNumPreArgs() const { return CallExprBits.NumPreArgs; } public: /// Create a call expression. Fn is the callee expression, Args is the /// argument array, Ty is the type of the call expression (which is *not* /// the return type in general), VK is the value kind of the call expression /// (lvalue, rvalue, ...), and RParenLoc is the location of the right /// parenthese in the call expression. MinNumArgs specifies the minimum /// number of arguments. The actual number of arguments will be the greater /// of Args.size() and MinNumArgs. This is used in a few places to allocate /// enough storage for the default arguments. UsesADL specifies whether the /// callee was found through argument-dependent lookup. /// /// Note that you can use CreateTemporary if you need a temporary call /// expression on the stack. static CallExpr *Create(const ASTContext &Ctx, Expr *Fn, ArrayRef Args, QualType Ty, ExprValueKind VK, SourceLocation RParenLoc, unsigned MinNumArgs = 0, ADLCallKind UsesADL = NotADL); /// Create a temporary call expression with no arguments in the memory /// pointed to by Mem. Mem must points to at least sizeof(CallExpr) /// + sizeof(Stmt *) bytes of storage, aligned to alignof(CallExpr): /// /// \code{.cpp} /// llvm::AlignedCharArray Buffer; /// CallExpr *TheCall = CallExpr::CreateTemporary(Buffer.buffer, etc); /// \endcode static CallExpr *CreateTemporary(void *Mem, Expr *Fn, QualType Ty, ExprValueKind VK, SourceLocation RParenLoc, ADLCallKind UsesADL = NotADL); /// Create an empty call expression, for deserialization. static CallExpr *CreateEmpty(const ASTContext &Ctx, unsigned NumArgs, EmptyShell Empty); Expr *getCallee() { return cast(getTrailingStmts()[FN]); } const Expr *getCallee() const { return cast(getTrailingStmts()[FN]); } void setCallee(Expr *F) { getTrailingStmts()[FN] = F; } ADLCallKind getADLCallKind() const { return static_cast(CallExprBits.UsesADL); } void setADLCallKind(ADLCallKind V = UsesADL) { CallExprBits.UsesADL = static_cast(V); } bool usesADL() const { return getADLCallKind() == UsesADL; } Decl *getCalleeDecl() { return getCallee()->getReferencedDeclOfCallee(); } const Decl *getCalleeDecl() const { return getCallee()->getReferencedDeclOfCallee(); } /// If the callee is a FunctionDecl, return it. Otherwise return null. FunctionDecl *getDirectCallee() { return dyn_cast_or_null(getCalleeDecl()); } const FunctionDecl *getDirectCallee() const { return dyn_cast_or_null(getCalleeDecl()); } /// getNumArgs - Return the number of actual arguments to this call. unsigned getNumArgs() const { return NumArgs; } /// Retrieve the call arguments. Expr **getArgs() { return reinterpret_cast(getTrailingStmts() + PREARGS_START + getNumPreArgs()); } const Expr *const *getArgs() const { return reinterpret_cast( getTrailingStmts() + PREARGS_START + getNumPreArgs()); } /// getArg - Return the specified argument. Expr *getArg(unsigned Arg) { assert(Arg < getNumArgs() && "Arg access out of range!"); return getArgs()[Arg]; } const Expr *getArg(unsigned Arg) const { assert(Arg < getNumArgs() && "Arg access out of range!"); return getArgs()[Arg]; } /// setArg - Set the specified argument. void setArg(unsigned Arg, Expr *ArgExpr) { assert(Arg < getNumArgs() && "Arg access out of range!"); getArgs()[Arg] = ArgExpr; } /// Reduce the number of arguments in this call expression. This is used for /// example during error recovery to drop extra arguments. There is no way /// to perform the opposite because: 1.) We don't track how much storage /// we have for the argument array 2.) This would potentially require growing /// the argument array, something we cannot support since the arguments are /// stored in a trailing array. void shrinkNumArgs(unsigned NewNumArgs) { assert((NewNumArgs <= getNumArgs()) && "shrinkNumArgs cannot increase the number of arguments!"); NumArgs = NewNumArgs; } + /// Bluntly set a new number of arguments without doing any checks whatsoever. + /// Only used during construction of a CallExpr in a few places in Sema. + /// FIXME: Find a way to remove it. + void setNumArgsUnsafe(unsigned NewNumArgs) { NumArgs = NewNumArgs; } + typedef ExprIterator arg_iterator; typedef ConstExprIterator const_arg_iterator; typedef llvm::iterator_range arg_range; typedef llvm::iterator_range const_arg_range; arg_range arguments() { return arg_range(arg_begin(), arg_end()); } const_arg_range arguments() const { return const_arg_range(arg_begin(), arg_end()); } arg_iterator arg_begin() { return getTrailingStmts() + PREARGS_START + getNumPreArgs(); } arg_iterator arg_end() { return arg_begin() + getNumArgs(); } const_arg_iterator arg_begin() const { return getTrailingStmts() + PREARGS_START + getNumPreArgs(); } const_arg_iterator arg_end() const { return arg_begin() + getNumArgs(); } /// This method provides fast access to all the subexpressions of /// a CallExpr without going through the slower virtual child_iterator /// interface. This provides efficient reverse iteration of the /// subexpressions. This is currently used for CFG construction. ArrayRef getRawSubExprs() { return llvm::makeArrayRef(getTrailingStmts(), PREARGS_START + getNumPreArgs() + getNumArgs()); } /// getNumCommas - Return the number of commas that must have been present in /// this function call. unsigned getNumCommas() const { return getNumArgs() ? getNumArgs() - 1 : 0; } /// getBuiltinCallee - If this is a call to a builtin, return the builtin ID /// of the callee. If not, return 0. unsigned getBuiltinCallee() const; /// Returns \c true if this is a call to a builtin which does not /// evaluate side-effects within its arguments. bool isUnevaluatedBuiltinCall(const ASTContext &Ctx) const; /// getCallReturnType - Get the return type of the call expr. This is not /// always the type of the expr itself, if the return type is a reference /// type. QualType getCallReturnType(const ASTContext &Ctx) const; /// Returns the WarnUnusedResultAttr that is either declared on the called /// function, or its return type declaration. const Attr *getUnusedResultAttr(const ASTContext &Ctx) const; /// Returns true if this call expression should warn on unused results. bool hasUnusedResultAttr(const ASTContext &Ctx) const { return getUnusedResultAttr(Ctx) != nullptr; } SourceLocation getRParenLoc() const { return RParenLoc; } void setRParenLoc(SourceLocation L) { RParenLoc = L; } SourceLocation getBeginLoc() const LLVM_READONLY; SourceLocation getEndLoc() const LLVM_READONLY; /// Return true if this is a call to __assume() or __builtin_assume() with /// a non-value-dependent constant parameter evaluating as false. bool isBuiltinAssumeFalse(const ASTContext &Ctx) const; bool isCallToStdMove() const { const FunctionDecl *FD = getDirectCallee(); return getNumArgs() == 1 && FD && FD->isInStdNamespace() && FD->getIdentifier() && FD->getIdentifier()->isStr("move"); } static bool classof(const Stmt *T) { return T->getStmtClass() >= firstCallExprConstant && T->getStmtClass() <= lastCallExprConstant; } // Iterators child_range children() { return child_range(getTrailingStmts(), getTrailingStmts() + PREARGS_START + getNumPreArgs() + getNumArgs()); } const_child_range children() const { return const_child_range(getTrailingStmts(), getTrailingStmts() + PREARGS_START + getNumPreArgs() + getNumArgs()); } }; /// Extra data stored in some MemberExpr objects. struct MemberExprNameQualifier { /// The nested-name-specifier that qualifies the name, including /// source-location information. NestedNameSpecifierLoc QualifierLoc; /// The DeclAccessPair through which the MemberDecl was found due to /// name qualifiers. DeclAccessPair FoundDecl; }; /// MemberExpr - [C99 6.5.2.3] Structure and Union Members. X->F and X.F. /// class MemberExpr final : public Expr, private llvm::TrailingObjects { friend class ASTReader; friend class ASTStmtWriter; friend TrailingObjects; /// Base - the expression for the base pointer or structure references. In /// X.F, this is "X". Stmt *Base; /// MemberDecl - This is the decl being referenced by the field/member name. /// In X.F, this is the decl referenced by F. ValueDecl *MemberDecl; /// MemberDNLoc - Provides source/type location info for the /// declaration name embedded in MemberDecl. DeclarationNameLoc MemberDNLoc; /// MemberLoc - This is the location of the member name. SourceLocation MemberLoc; size_t numTrailingObjects(OverloadToken) const { return hasQualifierOrFoundDecl(); } size_t numTrailingObjects(OverloadToken) const { return hasTemplateKWAndArgsInfo(); } bool hasQualifierOrFoundDecl() const { return MemberExprBits.HasQualifierOrFoundDecl; } bool hasTemplateKWAndArgsInfo() const { return MemberExprBits.HasTemplateKWAndArgsInfo; } public: MemberExpr(Expr *base, bool isarrow, SourceLocation operatorloc, ValueDecl *memberdecl, const DeclarationNameInfo &NameInfo, QualType ty, ExprValueKind VK, ExprObjectKind OK) : Expr(MemberExprClass, ty, VK, OK, base->isTypeDependent(), base->isValueDependent(), base->isInstantiationDependent(), base->containsUnexpandedParameterPack()), Base(base), MemberDecl(memberdecl), MemberDNLoc(NameInfo.getInfo()), MemberLoc(NameInfo.getLoc()) { assert(memberdecl->getDeclName() == NameInfo.getName()); MemberExprBits.IsArrow = isarrow; MemberExprBits.HasQualifierOrFoundDecl = false; MemberExprBits.HasTemplateKWAndArgsInfo = false; MemberExprBits.HadMultipleCandidates = false; MemberExprBits.OperatorLoc = operatorloc; } // NOTE: this constructor should be used only when it is known that // the member name can not provide additional syntactic info // (i.e., source locations for C++ operator names or type source info // for constructors, destructors and conversion operators). MemberExpr(Expr *base, bool isarrow, SourceLocation operatorloc, ValueDecl *memberdecl, SourceLocation l, QualType ty, ExprValueKind VK, ExprObjectKind OK) : Expr(MemberExprClass, ty, VK, OK, base->isTypeDependent(), base->isValueDependent(), base->isInstantiationDependent(), base->containsUnexpandedParameterPack()), Base(base), MemberDecl(memberdecl), MemberDNLoc(), MemberLoc(l) { MemberExprBits.IsArrow = isarrow; MemberExprBits.HasQualifierOrFoundDecl = false; MemberExprBits.HasTemplateKWAndArgsInfo = false; MemberExprBits.HadMultipleCandidates = false; MemberExprBits.OperatorLoc = operatorloc; } static MemberExpr *Create(const ASTContext &C, Expr *base, bool isarrow, SourceLocation OperatorLoc, NestedNameSpecifierLoc QualifierLoc, SourceLocation TemplateKWLoc, ValueDecl *memberdecl, DeclAccessPair founddecl, DeclarationNameInfo MemberNameInfo, const TemplateArgumentListInfo *targs, QualType ty, ExprValueKind VK, ExprObjectKind OK); void setBase(Expr *E) { Base = E; } Expr *getBase() const { return cast(Base); } /// Retrieve the member declaration to which this expression refers. /// /// The returned declaration will be a FieldDecl or (in C++) a VarDecl (for /// static data members), a CXXMethodDecl, or an EnumConstantDecl. ValueDecl *getMemberDecl() const { return MemberDecl; } void setMemberDecl(ValueDecl *D) { MemberDecl = D; } /// Retrieves the declaration found by lookup. DeclAccessPair getFoundDecl() const { if (!hasQualifierOrFoundDecl()) return DeclAccessPair::make(getMemberDecl(), getMemberDecl()->getAccess()); return getTrailingObjects()->FoundDecl; } /// Determines whether this member expression actually had /// a C++ nested-name-specifier prior to the name of the member, e.g., /// x->Base::foo. bool hasQualifier() const { return getQualifier() != nullptr; } /// If the member name was qualified, retrieves the /// nested-name-specifier that precedes the member name, with source-location /// information. NestedNameSpecifierLoc getQualifierLoc() const { if (!hasQualifierOrFoundDecl()) return NestedNameSpecifierLoc(); return getTrailingObjects()->QualifierLoc; } /// If the member name was qualified, retrieves the /// nested-name-specifier that precedes the member name. Otherwise, returns /// NULL. NestedNameSpecifier *getQualifier() const { return getQualifierLoc().getNestedNameSpecifier(); } /// Retrieve the location of the template keyword preceding /// the member name, if any. SourceLocation getTemplateKeywordLoc() const { if (!hasTemplateKWAndArgsInfo()) return SourceLocation(); return getTrailingObjects()->TemplateKWLoc; } /// Retrieve the location of the left angle bracket starting the /// explicit template argument list following the member name, if any. SourceLocation getLAngleLoc() const { if (!hasTemplateKWAndArgsInfo()) return SourceLocation(); return getTrailingObjects()->LAngleLoc; } /// Retrieve the location of the right angle bracket ending the /// explicit template argument list following the member name, if any. SourceLocation getRAngleLoc() const { if (!hasTemplateKWAndArgsInfo()) return SourceLocation(); return getTrailingObjects()->RAngleLoc; } /// Determines whether the member name was preceded by the template keyword. bool hasTemplateKeyword() const { return getTemplateKeywordLoc().isValid(); } /// Determines whether the member name was followed by an /// explicit template argument list. bool hasExplicitTemplateArgs() const { return getLAngleLoc().isValid(); } /// Copies the template arguments (if present) into the given /// structure. void copyTemplateArgumentsInto(TemplateArgumentListInfo &List) const { if (hasExplicitTemplateArgs()) getTrailingObjects()->copyInto( getTrailingObjects(), List); } /// Retrieve the template arguments provided as part of this /// template-id. const TemplateArgumentLoc *getTemplateArgs() const { if (!hasExplicitTemplateArgs()) return nullptr; return getTrailingObjects(); } /// Retrieve the number of template arguments provided as part of this /// template-id. unsigned getNumTemplateArgs() const { if (!hasExplicitTemplateArgs()) return 0; return getTrailingObjects()->NumTemplateArgs; } ArrayRef template_arguments() const { return {getTemplateArgs(), getNumTemplateArgs()}; } /// Retrieve the member declaration name info. DeclarationNameInfo getMemberNameInfo() const { return DeclarationNameInfo(MemberDecl->getDeclName(), MemberLoc, MemberDNLoc); } SourceLocation getOperatorLoc() const { return MemberExprBits.OperatorLoc; } bool isArrow() const { return MemberExprBits.IsArrow; } void setArrow(bool A) { MemberExprBits.IsArrow = A; } /// getMemberLoc - Return the location of the "member", in X->F, it is the /// location of 'F'. SourceLocation getMemberLoc() const { return MemberLoc; } void setMemberLoc(SourceLocation L) { MemberLoc = L; } SourceLocation getBeginLoc() const LLVM_READONLY; SourceLocation getEndLoc() const LLVM_READONLY; SourceLocation getExprLoc() const LLVM_READONLY { return MemberLoc; } /// Determine whether the base of this explicit is implicit. bool isImplicitAccess() const { return getBase() && getBase()->isImplicitCXXThis(); } /// Returns true if this member expression refers to a method that /// was resolved from an overloaded set having size greater than 1. bool hadMultipleCandidates() const { return MemberExprBits.HadMultipleCandidates; } /// Sets the flag telling whether this expression refers to /// a method that was resolved from an overloaded set having size /// greater than 1. void setHadMultipleCandidates(bool V = true) { MemberExprBits.HadMultipleCandidates = V; } /// Returns true if virtual dispatch is performed. /// If the member access is fully qualified, (i.e. X::f()), virtual /// dispatching is not performed. In -fapple-kext mode qualified /// calls to virtual method will still go through the vtable. bool performsVirtualDispatch(const LangOptions &LO) const { return LO.AppleKext || !hasQualifier(); } static bool classof(const Stmt *T) { return T->getStmtClass() == MemberExprClass; } // Iterators child_range children() { return child_range(&Base, &Base+1); } const_child_range children() const { return const_child_range(&Base, &Base + 1); } }; /// CompoundLiteralExpr - [C99 6.5.2.5] /// class CompoundLiteralExpr : public Expr { /// LParenLoc - If non-null, this is the location of the left paren in a /// compound literal like "(int){4}". This can be null if this is a /// synthesized compound expression. SourceLocation LParenLoc; /// The type as written. This can be an incomplete array type, in /// which case the actual expression type will be different. /// The int part of the pair stores whether this expr is file scope. llvm::PointerIntPair TInfoAndScope; Stmt *Init; public: CompoundLiteralExpr(SourceLocation lparenloc, TypeSourceInfo *tinfo, QualType T, ExprValueKind VK, Expr *init, bool fileScope) : Expr(CompoundLiteralExprClass, T, VK, OK_Ordinary, tinfo->getType()->isDependentType(), init->isValueDependent(), (init->isInstantiationDependent() || tinfo->getType()->isInstantiationDependentType()), init->containsUnexpandedParameterPack()), LParenLoc(lparenloc), TInfoAndScope(tinfo, fileScope), Init(init) {} /// Construct an empty compound literal. explicit CompoundLiteralExpr(EmptyShell Empty) : Expr(CompoundLiteralExprClass, Empty) { } const Expr *getInitializer() const { return cast(Init); } Expr *getInitializer() { return cast(Init); } void setInitializer(Expr *E) { Init = E; } bool isFileScope() const { return TInfoAndScope.getInt(); } void setFileScope(bool FS) { TInfoAndScope.setInt(FS); } SourceLocation getLParenLoc() const { return LParenLoc; } void setLParenLoc(SourceLocation L) { LParenLoc = L; } TypeSourceInfo *getTypeSourceInfo() const { return TInfoAndScope.getPointer(); } void setTypeSourceInfo(TypeSourceInfo *tinfo) { TInfoAndScope.setPointer(tinfo); } SourceLocation getBeginLoc() const LLVM_READONLY { // FIXME: Init should never be null. if (!Init) return SourceLocation(); if (LParenLoc.isInvalid()) return Init->getBeginLoc(); return LParenLoc; } SourceLocation getEndLoc() const LLVM_READONLY { // FIXME: Init should never be null. if (!Init) return SourceLocation(); return Init->getEndLoc(); } static bool classof(const Stmt *T) { return T->getStmtClass() == CompoundLiteralExprClass; } // Iterators child_range children() { return child_range(&Init, &Init+1); } const_child_range children() const { return const_child_range(&Init, &Init + 1); } }; /// CastExpr - Base class for type casts, including both implicit /// casts (ImplicitCastExpr) and explicit casts that have some /// representation in the source code (ExplicitCastExpr's derived /// classes). class CastExpr : public Expr { Stmt *Op; bool CastConsistency() const; const CXXBaseSpecifier * const *path_buffer() const { return const_cast(this)->path_buffer(); } CXXBaseSpecifier **path_buffer(); protected: CastExpr(StmtClass SC, QualType ty, ExprValueKind VK, const CastKind kind, Expr *op, unsigned BasePathSize) : Expr(SC, ty, VK, OK_Ordinary, // Cast expressions are type-dependent if the type is // dependent (C++ [temp.dep.expr]p3). ty->isDependentType(), // Cast expressions are value-dependent if the type is // dependent or if the subexpression is value-dependent. ty->isDependentType() || (op && op->isValueDependent()), (ty->isInstantiationDependentType() || (op && op->isInstantiationDependent())), // An implicit cast expression doesn't (lexically) contain an // unexpanded pack, even if its target type does. ((SC != ImplicitCastExprClass && ty->containsUnexpandedParameterPack()) || (op && op->containsUnexpandedParameterPack()))), Op(op) { CastExprBits.Kind = kind; CastExprBits.PartOfExplicitCast = false; CastExprBits.BasePathSize = BasePathSize; assert((CastExprBits.BasePathSize == BasePathSize) && "BasePathSize overflow!"); assert(CastConsistency()); } /// Construct an empty cast. CastExpr(StmtClass SC, EmptyShell Empty, unsigned BasePathSize) : Expr(SC, Empty) { CastExprBits.PartOfExplicitCast = false; CastExprBits.BasePathSize = BasePathSize; assert((CastExprBits.BasePathSize == BasePathSize) && "BasePathSize overflow!"); } public: CastKind getCastKind() const { return (CastKind) CastExprBits.Kind; } void setCastKind(CastKind K) { CastExprBits.Kind = K; } static const char *getCastKindName(CastKind CK); const char *getCastKindName() const { return getCastKindName(getCastKind()); } Expr *getSubExpr() { return cast(Op); } const Expr *getSubExpr() const { return cast(Op); } void setSubExpr(Expr *E) { Op = E; } /// Retrieve the cast subexpression as it was written in the source /// code, looking through any implicit casts or other intermediate nodes /// introduced by semantic analysis. Expr *getSubExprAsWritten(); const Expr *getSubExprAsWritten() const { return const_cast(this)->getSubExprAsWritten(); } /// If this cast applies a user-defined conversion, retrieve the conversion /// function that it invokes. NamedDecl *getConversionFunction() const; typedef CXXBaseSpecifier **path_iterator; typedef const CXXBaseSpecifier *const *path_const_iterator; bool path_empty() const { return path_size() == 0; } unsigned path_size() const { return CastExprBits.BasePathSize; } path_iterator path_begin() { return path_buffer(); } path_iterator path_end() { return path_buffer() + path_size(); } path_const_iterator path_begin() const { return path_buffer(); } path_const_iterator path_end() const { return path_buffer() + path_size(); } const FieldDecl *getTargetUnionField() const { assert(getCastKind() == CK_ToUnion); return getTargetFieldForToUnionCast(getType(), getSubExpr()->getType()); } static const FieldDecl *getTargetFieldForToUnionCast(QualType unionType, QualType opType); static const FieldDecl *getTargetFieldForToUnionCast(const RecordDecl *RD, QualType opType); static bool classof(const Stmt *T) { return T->getStmtClass() >= firstCastExprConstant && T->getStmtClass() <= lastCastExprConstant; } // Iterators child_range children() { return child_range(&Op, &Op+1); } const_child_range children() const { return const_child_range(&Op, &Op + 1); } }; /// ImplicitCastExpr - Allows us to explicitly represent implicit type /// conversions, which have no direct representation in the original /// source code. For example: converting T[]->T*, void f()->void /// (*f)(), float->double, short->int, etc. /// /// In C, implicit casts always produce rvalues. However, in C++, an /// implicit cast whose result is being bound to a reference will be /// an lvalue or xvalue. For example: /// /// @code /// class Base { }; /// class Derived : public Base { }; /// Derived &&ref(); /// void f(Derived d) { /// Base& b = d; // initializer is an ImplicitCastExpr /// // to an lvalue of type Base /// Base&& r = ref(); // initializer is an ImplicitCastExpr /// // to an xvalue of type Base /// } /// @endcode class ImplicitCastExpr final : public CastExpr, private llvm::TrailingObjects { ImplicitCastExpr(QualType ty, CastKind kind, Expr *op, unsigned BasePathLength, ExprValueKind VK) : CastExpr(ImplicitCastExprClass, ty, VK, kind, op, BasePathLength) { } /// Construct an empty implicit cast. explicit ImplicitCastExpr(EmptyShell Shell, unsigned PathSize) : CastExpr(ImplicitCastExprClass, Shell, PathSize) { } public: enum OnStack_t { OnStack }; ImplicitCastExpr(OnStack_t _, QualType ty, CastKind kind, Expr *op, ExprValueKind VK) : CastExpr(ImplicitCastExprClass, ty, VK, kind, op, 0) { } bool isPartOfExplicitCast() const { return CastExprBits.PartOfExplicitCast; } void setIsPartOfExplicitCast(bool PartOfExplicitCast) { CastExprBits.PartOfExplicitCast = PartOfExplicitCast; } static ImplicitCastExpr *Create(const ASTContext &Context, QualType T, CastKind Kind, Expr *Operand, const CXXCastPath *BasePath, ExprValueKind Cat); static ImplicitCastExpr *CreateEmpty(const ASTContext &Context, unsigned PathSize); SourceLocation getBeginLoc() const LLVM_READONLY { return getSubExpr()->getBeginLoc(); } SourceLocation getEndLoc() const LLVM_READONLY { return getSubExpr()->getEndLoc(); } static bool classof(const Stmt *T) { return T->getStmtClass() == ImplicitCastExprClass; } friend TrailingObjects; friend class CastExpr; }; inline Expr *Expr::IgnoreImpCasts() { Expr *e = this; while (true) if (ImplicitCastExpr *ice = dyn_cast(e)) e = ice->getSubExpr(); else if (FullExpr *fe = dyn_cast(e)) e = fe->getSubExpr(); else break; return e; } /// ExplicitCastExpr - An explicit cast written in the source /// code. /// /// This class is effectively an abstract class, because it provides /// the basic representation of an explicitly-written cast without /// specifying which kind of cast (C cast, functional cast, static /// cast, etc.) was written; specific derived classes represent the /// particular style of cast and its location information. /// /// Unlike implicit casts, explicit cast nodes have two different /// types: the type that was written into the source code, and the /// actual type of the expression as determined by semantic /// analysis. These types may differ slightly. For example, in C++ one /// can cast to a reference type, which indicates that the resulting /// expression will be an lvalue or xvalue. The reference type, however, /// will not be used as the type of the expression. class ExplicitCastExpr : public CastExpr { /// TInfo - Source type info for the (written) type /// this expression is casting to. TypeSourceInfo *TInfo; protected: ExplicitCastExpr(StmtClass SC, QualType exprTy, ExprValueKind VK, CastKind kind, Expr *op, unsigned PathSize, TypeSourceInfo *writtenTy) : CastExpr(SC, exprTy, VK, kind, op, PathSize), TInfo(writtenTy) {} /// Construct an empty explicit cast. ExplicitCastExpr(StmtClass SC, EmptyShell Shell, unsigned PathSize) : CastExpr(SC, Shell, PathSize) { } public: /// getTypeInfoAsWritten - Returns the type source info for the type /// that this expression is casting to. TypeSourceInfo *getTypeInfoAsWritten() const { return TInfo; } void setTypeInfoAsWritten(TypeSourceInfo *writtenTy) { TInfo = writtenTy; } /// getTypeAsWritten - Returns the type that this expression is /// casting to, as written in the source code. QualType getTypeAsWritten() const { return TInfo->getType(); } static bool classof(const Stmt *T) { return T->getStmtClass() >= firstExplicitCastExprConstant && T->getStmtClass() <= lastExplicitCastExprConstant; } }; /// CStyleCastExpr - An explicit cast in C (C99 6.5.4) or a C-style /// cast in C++ (C++ [expr.cast]), which uses the syntax /// (Type)expr. For example: @c (int)f. class CStyleCastExpr final : public ExplicitCastExpr, private llvm::TrailingObjects { SourceLocation LPLoc; // the location of the left paren SourceLocation RPLoc; // the location of the right paren CStyleCastExpr(QualType exprTy, ExprValueKind vk, CastKind kind, Expr *op, unsigned PathSize, TypeSourceInfo *writtenTy, SourceLocation l, SourceLocation r) : ExplicitCastExpr(CStyleCastExprClass, exprTy, vk, kind, op, PathSize, writtenTy), LPLoc(l), RPLoc(r) {} /// Construct an empty C-style explicit cast. explicit CStyleCastExpr(EmptyShell Shell, unsigned PathSize) : ExplicitCastExpr(CStyleCastExprClass, Shell, PathSize) { } public: static CStyleCastExpr *Create(const ASTContext &Context, QualType T, ExprValueKind VK, CastKind K, Expr *Op, const CXXCastPath *BasePath, TypeSourceInfo *WrittenTy, SourceLocation L, SourceLocation R); static CStyleCastExpr *CreateEmpty(const ASTContext &Context, unsigned PathSize); SourceLocation getLParenLoc() const { return LPLoc; } void setLParenLoc(SourceLocation L) { LPLoc = L; } SourceLocation getRParenLoc() const { return RPLoc; } void setRParenLoc(SourceLocation L) { RPLoc = L; } SourceLocation getBeginLoc() const LLVM_READONLY { return LPLoc; } SourceLocation getEndLoc() const LLVM_READONLY { return getSubExpr()->getEndLoc(); } static bool classof(const Stmt *T) { return T->getStmtClass() == CStyleCastExprClass; } friend TrailingObjects; friend class CastExpr; }; /// A builtin binary operation expression such as "x + y" or "x <= y". /// /// This expression node kind describes a builtin binary operation, /// such as "x + y" for integer values "x" and "y". The operands will /// already have been converted to appropriate types (e.g., by /// performing promotions or conversions). /// /// In C++, where operators may be overloaded, a different kind of /// expression node (CXXOperatorCallExpr) is used to express the /// invocation of an overloaded operator with operator syntax. Within /// a C++ template, whether BinaryOperator or CXXOperatorCallExpr is /// used to store an expression "x + y" depends on the subexpressions /// for x and y. If neither x or y is type-dependent, and the "+" /// operator resolves to a built-in operation, BinaryOperator will be /// used to express the computation (x and y may still be /// value-dependent). If either x or y is type-dependent, or if the /// "+" resolves to an overloaded operator, CXXOperatorCallExpr will /// be used to express the computation. class BinaryOperator : public Expr { enum { LHS, RHS, END_EXPR }; Stmt *SubExprs[END_EXPR]; public: typedef BinaryOperatorKind Opcode; BinaryOperator(Expr *lhs, Expr *rhs, Opcode opc, QualType ResTy, ExprValueKind VK, ExprObjectKind OK, SourceLocation opLoc, FPOptions FPFeatures) : Expr(BinaryOperatorClass, ResTy, VK, OK, lhs->isTypeDependent() || rhs->isTypeDependent(), lhs->isValueDependent() || rhs->isValueDependent(), (lhs->isInstantiationDependent() || rhs->isInstantiationDependent()), (lhs->containsUnexpandedParameterPack() || rhs->containsUnexpandedParameterPack())) { BinaryOperatorBits.Opc = opc; BinaryOperatorBits.FPFeatures = FPFeatures.getInt(); BinaryOperatorBits.OpLoc = opLoc; SubExprs[LHS] = lhs; SubExprs[RHS] = rhs; assert(!isCompoundAssignmentOp() && "Use CompoundAssignOperator for compound assignments"); } /// Construct an empty binary operator. explicit BinaryOperator(EmptyShell Empty) : Expr(BinaryOperatorClass, Empty) { BinaryOperatorBits.Opc = BO_Comma; } SourceLocation getExprLoc() const { return getOperatorLoc(); } SourceLocation getOperatorLoc() const { return BinaryOperatorBits.OpLoc; } void setOperatorLoc(SourceLocation L) { BinaryOperatorBits.OpLoc = L; } Opcode getOpcode() const { return static_cast(BinaryOperatorBits.Opc); } void setOpcode(Opcode Opc) { BinaryOperatorBits.Opc = Opc; } Expr *getLHS() const { return cast(SubExprs[LHS]); } void setLHS(Expr *E) { SubExprs[LHS] = E; } Expr *getRHS() const { return cast(SubExprs[RHS]); } void setRHS(Expr *E) { SubExprs[RHS] = E; } SourceLocation getBeginLoc() const LLVM_READONLY { return getLHS()->getBeginLoc(); } SourceLocation getEndLoc() const LLVM_READONLY { return getRHS()->getEndLoc(); } /// getOpcodeStr - Turn an Opcode enum value into the punctuation char it /// corresponds to, e.g. "<<=". static StringRef getOpcodeStr(Opcode Op); StringRef getOpcodeStr() const { return getOpcodeStr(getOpcode()); } /// Retrieve the binary opcode that corresponds to the given /// overloaded operator. static Opcode getOverloadedOpcode(OverloadedOperatorKind OO); /// Retrieve the overloaded operator kind that corresponds to /// the given binary opcode. static OverloadedOperatorKind getOverloadedOperator(Opcode Opc); /// predicates to categorize the respective opcodes. static bool isPtrMemOp(Opcode Opc) { return Opc == BO_PtrMemD || Opc == BO_PtrMemI; } bool isPtrMemOp() const { return isPtrMemOp(getOpcode()); } static bool isMultiplicativeOp(Opcode Opc) { return Opc >= BO_Mul && Opc <= BO_Rem; } bool isMultiplicativeOp() const { return isMultiplicativeOp(getOpcode()); } static bool isAdditiveOp(Opcode Opc) { return Opc == BO_Add || Opc==BO_Sub; } bool isAdditiveOp() const { return isAdditiveOp(getOpcode()); } static bool isShiftOp(Opcode Opc) { return Opc == BO_Shl || Opc == BO_Shr; } bool isShiftOp() const { return isShiftOp(getOpcode()); } static bool isBitwiseOp(Opcode Opc) { return Opc >= BO_And && Opc <= BO_Or; } bool isBitwiseOp() const { return isBitwiseOp(getOpcode()); } static bool isRelationalOp(Opcode Opc) { return Opc >= BO_LT && Opc<=BO_GE; } bool isRelationalOp() const { return isRelationalOp(getOpcode()); } static bool isEqualityOp(Opcode Opc) { return Opc == BO_EQ || Opc == BO_NE; } bool isEqualityOp() const { return isEqualityOp(getOpcode()); } static bool isComparisonOp(Opcode Opc) { return Opc >= BO_Cmp && Opc<=BO_NE; } bool isComparisonOp() const { return isComparisonOp(getOpcode()); } static Opcode negateComparisonOp(Opcode Opc) { switch (Opc) { default: llvm_unreachable("Not a comparison operator."); case BO_LT: return BO_GE; case BO_GT: return BO_LE; case BO_LE: return BO_GT; case BO_GE: return BO_LT; case BO_EQ: return BO_NE; case BO_NE: return BO_EQ; } } static Opcode reverseComparisonOp(Opcode Opc) { switch (Opc) { default: llvm_unreachable("Not a comparison operator."); case BO_LT: return BO_GT; case BO_GT: return BO_LT; case BO_LE: return BO_GE; case BO_GE: return BO_LE; case BO_EQ: case BO_NE: return Opc; } } static bool isLogicalOp(Opcode Opc) { return Opc == BO_LAnd || Opc==BO_LOr; } bool isLogicalOp() const { return isLogicalOp(getOpcode()); } static bool isAssignmentOp(Opcode Opc) { return Opc >= BO_Assign && Opc <= BO_OrAssign; } bool isAssignmentOp() const { return isAssignmentOp(getOpcode()); } static bool isCompoundAssignmentOp(Opcode Opc) { return Opc > BO_Assign && Opc <= BO_OrAssign; } bool isCompoundAssignmentOp() const { return isCompoundAssignmentOp(getOpcode()); } static Opcode getOpForCompoundAssignment(Opcode Opc) { assert(isCompoundAssignmentOp(Opc)); if (Opc >= BO_AndAssign) return Opcode(unsigned(Opc) - BO_AndAssign + BO_And); else return Opcode(unsigned(Opc) - BO_MulAssign + BO_Mul); } static bool isShiftAssignOp(Opcode Opc) { return Opc == BO_ShlAssign || Opc == BO_ShrAssign; } bool isShiftAssignOp() const { return isShiftAssignOp(getOpcode()); } // Return true if a binary operator using the specified opcode and operands // would match the 'p = (i8*)nullptr + n' idiom for casting a pointer-sized // integer to a pointer. static bool isNullPointerArithmeticExtension(ASTContext &Ctx, Opcode Opc, Expr *LHS, Expr *RHS); static bool classof(const Stmt *S) { return S->getStmtClass() >= firstBinaryOperatorConstant && S->getStmtClass() <= lastBinaryOperatorConstant; } // Iterators child_range children() { return child_range(&SubExprs[0], &SubExprs[0]+END_EXPR); } const_child_range children() const { return const_child_range(&SubExprs[0], &SubExprs[0] + END_EXPR); } // Set the FP contractability status of this operator. Only meaningful for // operations on floating point types. void setFPFeatures(FPOptions F) { BinaryOperatorBits.FPFeatures = F.getInt(); } FPOptions getFPFeatures() const { return FPOptions(BinaryOperatorBits.FPFeatures); } // Get the FP contractability status of this operator. Only meaningful for // operations on floating point types. bool isFPContractableWithinStatement() const { return getFPFeatures().allowFPContractWithinStatement(); } // Get the FENV_ACCESS status of this operator. Only meaningful for // operations on floating point types. bool isFEnvAccessOn() const { return getFPFeatures().allowFEnvAccess(); } protected: BinaryOperator(Expr *lhs, Expr *rhs, Opcode opc, QualType ResTy, ExprValueKind VK, ExprObjectKind OK, SourceLocation opLoc, FPOptions FPFeatures, bool dead2) : Expr(CompoundAssignOperatorClass, ResTy, VK, OK, lhs->isTypeDependent() || rhs->isTypeDependent(), lhs->isValueDependent() || rhs->isValueDependent(), (lhs->isInstantiationDependent() || rhs->isInstantiationDependent()), (lhs->containsUnexpandedParameterPack() || rhs->containsUnexpandedParameterPack())) { BinaryOperatorBits.Opc = opc; BinaryOperatorBits.FPFeatures = FPFeatures.getInt(); BinaryOperatorBits.OpLoc = opLoc; SubExprs[LHS] = lhs; SubExprs[RHS] = rhs; } BinaryOperator(StmtClass SC, EmptyShell Empty) : Expr(SC, Empty) { BinaryOperatorBits.Opc = BO_MulAssign; } }; /// CompoundAssignOperator - For compound assignments (e.g. +=), we keep /// track of the type the operation is performed in. Due to the semantics of /// these operators, the operands are promoted, the arithmetic performed, an /// implicit conversion back to the result type done, then the assignment takes /// place. This captures the intermediate type which the computation is done /// in. class CompoundAssignOperator : public BinaryOperator { QualType ComputationLHSType; QualType ComputationResultType; public: CompoundAssignOperator(Expr *lhs, Expr *rhs, Opcode opc, QualType ResType, ExprValueKind VK, ExprObjectKind OK, QualType CompLHSType, QualType CompResultType, SourceLocation OpLoc, FPOptions FPFeatures) : BinaryOperator(lhs, rhs, opc, ResType, VK, OK, OpLoc, FPFeatures, true), ComputationLHSType(CompLHSType), ComputationResultType(CompResultType) { assert(isCompoundAssignmentOp() && "Only should be used for compound assignments"); } /// Build an empty compound assignment operator expression. explicit CompoundAssignOperator(EmptyShell Empty) : BinaryOperator(CompoundAssignOperatorClass, Empty) { } // The two computation types are the type the LHS is converted // to for the computation and the type of the result; the two are // distinct in a few cases (specifically, int+=ptr and ptr-=ptr). QualType getComputationLHSType() const { return ComputationLHSType; } void setComputationLHSType(QualType T) { ComputationLHSType = T; } QualType getComputationResultType() const { return ComputationResultType; } void setComputationResultType(QualType T) { ComputationResultType = T; } static bool classof(const Stmt *S) { return S->getStmtClass() == CompoundAssignOperatorClass; } }; /// AbstractConditionalOperator - An abstract base class for /// ConditionalOperator and BinaryConditionalOperator. class AbstractConditionalOperator : public Expr { SourceLocation QuestionLoc, ColonLoc; friend class ASTStmtReader; protected: AbstractConditionalOperator(StmtClass SC, QualType T, ExprValueKind VK, ExprObjectKind OK, bool TD, bool VD, bool ID, bool ContainsUnexpandedParameterPack, SourceLocation qloc, SourceLocation cloc) : Expr(SC, T, VK, OK, TD, VD, ID, ContainsUnexpandedParameterPack), QuestionLoc(qloc), ColonLoc(cloc) {} AbstractConditionalOperator(StmtClass SC, EmptyShell Empty) : Expr(SC, Empty) { } public: // getCond - Return the expression representing the condition for // the ?: operator. Expr *getCond() const; // getTrueExpr - Return the subexpression representing the value of // the expression if the condition evaluates to true. Expr *getTrueExpr() const; // getFalseExpr - Return the subexpression representing the value of // the expression if the condition evaluates to false. This is // the same as getRHS. Expr *getFalseExpr() const; SourceLocation getQuestionLoc() const { return QuestionLoc; } SourceLocation getColonLoc() const { return ColonLoc; } static bool classof(const Stmt *T) { return T->getStmtClass() == ConditionalOperatorClass || T->getStmtClass() == BinaryConditionalOperatorClass; } }; /// ConditionalOperator - The ?: ternary operator. The GNU "missing /// middle" extension is a BinaryConditionalOperator. class ConditionalOperator : public AbstractConditionalOperator { enum { COND, LHS, RHS, END_EXPR }; Stmt* SubExprs[END_EXPR]; // Left/Middle/Right hand sides. friend class ASTStmtReader; public: ConditionalOperator(Expr *cond, SourceLocation QLoc, Expr *lhs, SourceLocation CLoc, Expr *rhs, QualType t, ExprValueKind VK, ExprObjectKind OK) : AbstractConditionalOperator(ConditionalOperatorClass, t, VK, OK, // FIXME: the type of the conditional operator doesn't // depend on the type of the conditional, but the standard // seems to imply that it could. File a bug! (lhs->isTypeDependent() || rhs->isTypeDependent()), (cond->isValueDependent() || lhs->isValueDependent() || rhs->isValueDependent()), (cond->isInstantiationDependent() || lhs->isInstantiationDependent() || rhs->isInstantiationDependent()), (cond->containsUnexpandedParameterPack() || lhs->containsUnexpandedParameterPack() || rhs->containsUnexpandedParameterPack()), QLoc, CLoc) { SubExprs[COND] = cond; SubExprs[LHS] = lhs; SubExprs[RHS] = rhs; } /// Build an empty conditional operator. explicit ConditionalOperator(EmptyShell Empty) : AbstractConditionalOperator(ConditionalOperatorClass, Empty) { } // getCond - Return the expression representing the condition for // the ?: operator. Expr *getCond() const { return cast(SubExprs[COND]); } // getTrueExpr - Return the subexpression representing the value of // the expression if the condition evaluates to true. Expr *getTrueExpr() const { return cast(SubExprs[LHS]); } // getFalseExpr - Return the subexpression representing the value of // the expression if the condition evaluates to false. This is // the same as getRHS. Expr *getFalseExpr() const { return cast(SubExprs[RHS]); } Expr *getLHS() const { return cast(SubExprs[LHS]); } Expr *getRHS() const { return cast(SubExprs[RHS]); } SourceLocation getBeginLoc() const LLVM_READONLY { return getCond()->getBeginLoc(); } SourceLocation getEndLoc() const LLVM_READONLY { return getRHS()->getEndLoc(); } static bool classof(const Stmt *T) { return T->getStmtClass() == ConditionalOperatorClass; } // Iterators child_range children() { return child_range(&SubExprs[0], &SubExprs[0]+END_EXPR); } const_child_range children() const { return const_child_range(&SubExprs[0], &SubExprs[0] + END_EXPR); } }; /// BinaryConditionalOperator - The GNU extension to the conditional /// operator which allows the middle operand to be omitted. /// /// This is a different expression kind on the assumption that almost /// every client ends up needing to know that these are different. class BinaryConditionalOperator : public AbstractConditionalOperator { enum { COMMON, COND, LHS, RHS, NUM_SUBEXPRS }; /// - the common condition/left-hand-side expression, which will be /// evaluated as the opaque value /// - the condition, expressed in terms of the opaque value /// - the left-hand-side, expressed in terms of the opaque value /// - the right-hand-side Stmt *SubExprs[NUM_SUBEXPRS]; OpaqueValueExpr *OpaqueValue; friend class ASTStmtReader; public: BinaryConditionalOperator(Expr *common, OpaqueValueExpr *opaqueValue, Expr *cond, Expr *lhs, Expr *rhs, SourceLocation qloc, SourceLocation cloc, QualType t, ExprValueKind VK, ExprObjectKind OK) : AbstractConditionalOperator(BinaryConditionalOperatorClass, t, VK, OK, (common->isTypeDependent() || rhs->isTypeDependent()), (common->isValueDependent() || rhs->isValueDependent()), (common->isInstantiationDependent() || rhs->isInstantiationDependent()), (common->containsUnexpandedParameterPack() || rhs->containsUnexpandedParameterPack()), qloc, cloc), OpaqueValue(opaqueValue) { SubExprs[COMMON] = common; SubExprs[COND] = cond; SubExprs[LHS] = lhs; SubExprs[RHS] = rhs; assert(OpaqueValue->getSourceExpr() == common && "Wrong opaque value"); } /// Build an empty conditional operator. explicit BinaryConditionalOperator(EmptyShell Empty) : AbstractConditionalOperator(BinaryConditionalOperatorClass, Empty) { } /// getCommon - Return the common expression, written to the /// left of the condition. The opaque value will be bound to the /// result of this expression. Expr *getCommon() const { return cast(SubExprs[COMMON]); } /// getOpaqueValue - Return the opaque value placeholder. OpaqueValueExpr *getOpaqueValue() const { return OpaqueValue; } /// getCond - Return the condition expression; this is defined /// in terms of the opaque value. Expr *getCond() const { return cast(SubExprs[COND]); } /// getTrueExpr - Return the subexpression which will be /// evaluated if the condition evaluates to true; this is defined /// in terms of the opaque value. Expr *getTrueExpr() const { return cast(SubExprs[LHS]); } /// getFalseExpr - Return the subexpression which will be /// evaluated if the condnition evaluates to false; this is /// defined in terms of the opaque value. Expr *getFalseExpr() const { return cast(SubExprs[RHS]); } SourceLocation getBeginLoc() const LLVM_READONLY { return getCommon()->getBeginLoc(); } SourceLocation getEndLoc() const LLVM_READONLY { return getFalseExpr()->getEndLoc(); } static bool classof(const Stmt *T) { return T->getStmtClass() == BinaryConditionalOperatorClass; } // Iterators child_range children() { return child_range(SubExprs, SubExprs + NUM_SUBEXPRS); } const_child_range children() const { return const_child_range(SubExprs, SubExprs + NUM_SUBEXPRS); } }; inline Expr *AbstractConditionalOperator::getCond() const { if (const ConditionalOperator *co = dyn_cast(this)) return co->getCond(); return cast(this)->getCond(); } inline Expr *AbstractConditionalOperator::getTrueExpr() const { if (const ConditionalOperator *co = dyn_cast(this)) return co->getTrueExpr(); return cast(this)->getTrueExpr(); } inline Expr *AbstractConditionalOperator::getFalseExpr() const { if (const ConditionalOperator *co = dyn_cast(this)) return co->getFalseExpr(); return cast(this)->getFalseExpr(); } /// AddrLabelExpr - The GNU address of label extension, representing &&label. class AddrLabelExpr : public Expr { SourceLocation AmpAmpLoc, LabelLoc; LabelDecl *Label; public: AddrLabelExpr(SourceLocation AALoc, SourceLocation LLoc, LabelDecl *L, QualType t) : Expr(AddrLabelExprClass, t, VK_RValue, OK_Ordinary, false, false, false, false), AmpAmpLoc(AALoc), LabelLoc(LLoc), Label(L) {} /// Build an empty address of a label expression. explicit AddrLabelExpr(EmptyShell Empty) : Expr(AddrLabelExprClass, Empty) { } SourceLocation getAmpAmpLoc() const { return AmpAmpLoc; } void setAmpAmpLoc(SourceLocation L) { AmpAmpLoc = L; } SourceLocation getLabelLoc() const { return LabelLoc; } void setLabelLoc(SourceLocation L) { LabelLoc = L; } SourceLocation getBeginLoc() const LLVM_READONLY { return AmpAmpLoc; } SourceLocation getEndLoc() const LLVM_READONLY { return LabelLoc; } LabelDecl *getLabel() const { return Label; } void setLabel(LabelDecl *L) { Label = L; } static bool classof(const Stmt *T) { return T->getStmtClass() == AddrLabelExprClass; } // Iterators child_range children() { return child_range(child_iterator(), child_iterator()); } const_child_range children() const { return const_child_range(const_child_iterator(), const_child_iterator()); } }; /// StmtExpr - This is the GNU Statement Expression extension: ({int X=4; X;}). /// The StmtExpr contains a single CompoundStmt node, which it evaluates and /// takes the value of the last subexpression. /// /// A StmtExpr is always an r-value; values "returned" out of a /// StmtExpr will be copied. class StmtExpr : public Expr { Stmt *SubStmt; SourceLocation LParenLoc, RParenLoc; public: // FIXME: Does type-dependence need to be computed differently? // FIXME: Do we need to compute instantiation instantiation-dependence for // statements? (ugh!) StmtExpr(CompoundStmt *substmt, QualType T, SourceLocation lp, SourceLocation rp) : Expr(StmtExprClass, T, VK_RValue, OK_Ordinary, T->isDependentType(), false, false, false), SubStmt(substmt), LParenLoc(lp), RParenLoc(rp) { } /// Build an empty statement expression. explicit StmtExpr(EmptyShell Empty) : Expr(StmtExprClass, Empty) { } CompoundStmt *getSubStmt() { return cast(SubStmt); } const CompoundStmt *getSubStmt() const { return cast(SubStmt); } void setSubStmt(CompoundStmt *S) { SubStmt = S; } SourceLocation getBeginLoc() const LLVM_READONLY { return LParenLoc; } SourceLocation getEndLoc() const LLVM_READONLY { return RParenLoc; } SourceLocation getLParenLoc() const { return LParenLoc; } void setLParenLoc(SourceLocation L) { LParenLoc = L; } SourceLocation getRParenLoc() const { return RParenLoc; } void setRParenLoc(SourceLocation L) { RParenLoc = L; } static bool classof(const Stmt *T) { return T->getStmtClass() == StmtExprClass; } // Iterators child_range children() { return child_range(&SubStmt, &SubStmt+1); } const_child_range children() const { return const_child_range(&SubStmt, &SubStmt + 1); } }; /// ShuffleVectorExpr - clang-specific builtin-in function /// __builtin_shufflevector. /// This AST node represents a operator that does a constant /// shuffle, similar to LLVM's shufflevector instruction. It takes /// two vectors and a variable number of constant indices, /// and returns the appropriately shuffled vector. class ShuffleVectorExpr : public Expr { SourceLocation BuiltinLoc, RParenLoc; // SubExprs - the list of values passed to the __builtin_shufflevector // function. The first two are vectors, and the rest are constant // indices. The number of values in this list is always // 2+the number of indices in the vector type. Stmt **SubExprs; unsigned NumExprs; public: ShuffleVectorExpr(const ASTContext &C, ArrayRef args, QualType Type, SourceLocation BLoc, SourceLocation RP); /// Build an empty vector-shuffle expression. explicit ShuffleVectorExpr(EmptyShell Empty) : Expr(ShuffleVectorExprClass, Empty), SubExprs(nullptr) { } SourceLocation getBuiltinLoc() const { return BuiltinLoc; } void setBuiltinLoc(SourceLocation L) { BuiltinLoc = L; } SourceLocation getRParenLoc() const { return RParenLoc; } void setRParenLoc(SourceLocation L) { RParenLoc = L; } SourceLocation getBeginLoc() const LLVM_READONLY { return BuiltinLoc; } SourceLocation getEndLoc() const LLVM_READONLY { return RParenLoc; } static bool classof(const Stmt *T) { return T->getStmtClass() == ShuffleVectorExprClass; } /// getNumSubExprs - Return the size of the SubExprs array. This includes the /// constant expression, the actual arguments passed in, and the function /// pointers. unsigned getNumSubExprs() const { return NumExprs; } /// Retrieve the array of expressions. Expr **getSubExprs() { return reinterpret_cast(SubExprs); } /// getExpr - Return the Expr at the specified index. Expr *getExpr(unsigned Index) { assert((Index < NumExprs) && "Arg access out of range!"); return cast(SubExprs[Index]); } const Expr *getExpr(unsigned Index) const { assert((Index < NumExprs) && "Arg access out of range!"); return cast(SubExprs[Index]); } void setExprs(const ASTContext &C, ArrayRef Exprs); llvm::APSInt getShuffleMaskIdx(const ASTContext &Ctx, unsigned N) const { assert((N < NumExprs - 2) && "Shuffle idx out of range!"); return getExpr(N+2)->EvaluateKnownConstInt(Ctx); } // Iterators child_range children() { return child_range(&SubExprs[0], &SubExprs[0]+NumExprs); } const_child_range children() const { return const_child_range(&SubExprs[0], &SubExprs[0] + NumExprs); } }; /// ConvertVectorExpr - Clang builtin function __builtin_convertvector /// This AST node provides support for converting a vector type to another /// vector type of the same arity. class ConvertVectorExpr : public Expr { private: Stmt *SrcExpr; TypeSourceInfo *TInfo; SourceLocation BuiltinLoc, RParenLoc; friend class ASTReader; friend class ASTStmtReader; explicit ConvertVectorExpr(EmptyShell Empty) : Expr(ConvertVectorExprClass, Empty) {} public: ConvertVectorExpr(Expr* SrcExpr, TypeSourceInfo *TI, QualType DstType, ExprValueKind VK, ExprObjectKind OK, SourceLocation BuiltinLoc, SourceLocation RParenLoc) : Expr(ConvertVectorExprClass, DstType, VK, OK, DstType->isDependentType(), DstType->isDependentType() || SrcExpr->isValueDependent(), (DstType->isInstantiationDependentType() || SrcExpr->isInstantiationDependent()), (DstType->containsUnexpandedParameterPack() || SrcExpr->containsUnexpandedParameterPack())), SrcExpr(SrcExpr), TInfo(TI), BuiltinLoc(BuiltinLoc), RParenLoc(RParenLoc) {} /// getSrcExpr - Return the Expr to be converted. Expr *getSrcExpr() const { return cast(SrcExpr); } /// getTypeSourceInfo - Return the destination type. TypeSourceInfo *getTypeSourceInfo() const { return TInfo; } void setTypeSourceInfo(TypeSourceInfo *ti) { TInfo = ti; } /// getBuiltinLoc - Return the location of the __builtin_convertvector token. SourceLocation getBuiltinLoc() const { return BuiltinLoc; } /// getRParenLoc - Return the location of final right parenthesis. SourceLocation getRParenLoc() const { return RParenLoc; } SourceLocation getBeginLoc() const LLVM_READONLY { return BuiltinLoc; } SourceLocation getEndLoc() const LLVM_READONLY { return RParenLoc; } static bool classof(const Stmt *T) { return T->getStmtClass() == ConvertVectorExprClass; } // Iterators child_range children() { return child_range(&SrcExpr, &SrcExpr+1); } const_child_range children() const { return const_child_range(&SrcExpr, &SrcExpr + 1); } }; /// ChooseExpr - GNU builtin-in function __builtin_choose_expr. /// This AST node is similar to the conditional operator (?:) in C, with /// the following exceptions: /// - the test expression must be a integer constant expression. /// - the expression returned acts like the chosen subexpression in every /// visible way: the type is the same as that of the chosen subexpression, /// and all predicates (whether it's an l-value, whether it's an integer /// constant expression, etc.) return the same result as for the chosen /// sub-expression. class ChooseExpr : public Expr { enum { COND, LHS, RHS, END_EXPR }; Stmt* SubExprs[END_EXPR]; // Left/Middle/Right hand sides. SourceLocation BuiltinLoc, RParenLoc; bool CondIsTrue; public: ChooseExpr(SourceLocation BLoc, Expr *cond, Expr *lhs, Expr *rhs, QualType t, ExprValueKind VK, ExprObjectKind OK, SourceLocation RP, bool condIsTrue, bool TypeDependent, bool ValueDependent) : Expr(ChooseExprClass, t, VK, OK, TypeDependent, ValueDependent, (cond->isInstantiationDependent() || lhs->isInstantiationDependent() || rhs->isInstantiationDependent()), (cond->containsUnexpandedParameterPack() || lhs->containsUnexpandedParameterPack() || rhs->containsUnexpandedParameterPack())), BuiltinLoc(BLoc), RParenLoc(RP), CondIsTrue(condIsTrue) { SubExprs[COND] = cond; SubExprs[LHS] = lhs; SubExprs[RHS] = rhs; } /// Build an empty __builtin_choose_expr. explicit ChooseExpr(EmptyShell Empty) : Expr(ChooseExprClass, Empty) { } /// isConditionTrue - Return whether the condition is true (i.e. not /// equal to zero). bool isConditionTrue() const { assert(!isConditionDependent() && "Dependent condition isn't true or false"); return CondIsTrue; } void setIsConditionTrue(bool isTrue) { CondIsTrue = isTrue; } bool isConditionDependent() const { return getCond()->isTypeDependent() || getCond()->isValueDependent(); } /// getChosenSubExpr - Return the subexpression chosen according to the /// condition. Expr *getChosenSubExpr() const { return isConditionTrue() ? getLHS() : getRHS(); } Expr *getCond() const { return cast(SubExprs[COND]); } void setCond(Expr *E) { SubExprs[COND] = E; } Expr *getLHS() const { return cast(SubExprs[LHS]); } void setLHS(Expr *E) { SubExprs[LHS] = E; } Expr *getRHS() const { return cast(SubExprs[RHS]); } void setRHS(Expr *E) { SubExprs[RHS] = E; } SourceLocation getBuiltinLoc() const { return BuiltinLoc; } void setBuiltinLoc(SourceLocation L) { BuiltinLoc = L; } SourceLocation getRParenLoc() const { return RParenLoc; } void setRParenLoc(SourceLocation L) { RParenLoc = L; } SourceLocation getBeginLoc() const LLVM_READONLY { return BuiltinLoc; } SourceLocation getEndLoc() const LLVM_READONLY { return RParenLoc; } static bool classof(const Stmt *T) { return T->getStmtClass() == ChooseExprClass; } // Iterators child_range children() { return child_range(&SubExprs[0], &SubExprs[0]+END_EXPR); } const_child_range children() const { return const_child_range(&SubExprs[0], &SubExprs[0] + END_EXPR); } }; /// GNUNullExpr - Implements the GNU __null extension, which is a name /// for a null pointer constant that has integral type (e.g., int or /// long) and is the same size and alignment as a pointer. The __null /// extension is typically only used by system headers, which define /// NULL as __null in C++ rather than using 0 (which is an integer /// that may not match the size of a pointer). class GNUNullExpr : public Expr { /// TokenLoc - The location of the __null keyword. SourceLocation TokenLoc; public: GNUNullExpr(QualType Ty, SourceLocation Loc) : Expr(GNUNullExprClass, Ty, VK_RValue, OK_Ordinary, false, false, false, false), TokenLoc(Loc) { } /// Build an empty GNU __null expression. explicit GNUNullExpr(EmptyShell Empty) : Expr(GNUNullExprClass, Empty) { } /// getTokenLocation - The location of the __null token. SourceLocation getTokenLocation() const { return TokenLoc; } void setTokenLocation(SourceLocation L) { TokenLoc = L; } SourceLocation getBeginLoc() const LLVM_READONLY { return TokenLoc; } SourceLocation getEndLoc() const LLVM_READONLY { return TokenLoc; } static bool classof(const Stmt *T) { return T->getStmtClass() == GNUNullExprClass; } // Iterators child_range children() { return child_range(child_iterator(), child_iterator()); } const_child_range children() const { return const_child_range(const_child_iterator(), const_child_iterator()); } }; /// Represents a call to the builtin function \c __builtin_va_arg. class VAArgExpr : public Expr { Stmt *Val; llvm::PointerIntPair TInfo; SourceLocation BuiltinLoc, RParenLoc; public: VAArgExpr(SourceLocation BLoc, Expr *e, TypeSourceInfo *TInfo, SourceLocation RPLoc, QualType t, bool IsMS) : Expr(VAArgExprClass, t, VK_RValue, OK_Ordinary, t->isDependentType(), false, (TInfo->getType()->isInstantiationDependentType() || e->isInstantiationDependent()), (TInfo->getType()->containsUnexpandedParameterPack() || e->containsUnexpandedParameterPack())), Val(e), TInfo(TInfo, IsMS), BuiltinLoc(BLoc), RParenLoc(RPLoc) {} /// Create an empty __builtin_va_arg expression. explicit VAArgExpr(EmptyShell Empty) : Expr(VAArgExprClass, Empty), Val(nullptr), TInfo(nullptr, false) {} const Expr *getSubExpr() const { return cast(Val); } Expr *getSubExpr() { return cast(Val); } void setSubExpr(Expr *E) { Val = E; } /// Returns whether this is really a Win64 ABI va_arg expression. bool isMicrosoftABI() const { return TInfo.getInt(); } void setIsMicrosoftABI(bool IsMS) { TInfo.setInt(IsMS); } TypeSourceInfo *getWrittenTypeInfo() const { return TInfo.getPointer(); } void setWrittenTypeInfo(TypeSourceInfo *TI) { TInfo.setPointer(TI); } SourceLocation getBuiltinLoc() const { return BuiltinLoc; } void setBuiltinLoc(SourceLocation L) { BuiltinLoc = L; } SourceLocation getRParenLoc() const { return RParenLoc; } void setRParenLoc(SourceLocation L) { RParenLoc = L; } SourceLocation getBeginLoc() const LLVM_READONLY { return BuiltinLoc; } SourceLocation getEndLoc() const LLVM_READONLY { return RParenLoc; } static bool classof(const Stmt *T) { return T->getStmtClass() == VAArgExprClass; } // Iterators child_range children() { return child_range(&Val, &Val+1); } const_child_range children() const { return const_child_range(&Val, &Val + 1); } }; /// Describes an C or C++ initializer list. /// /// InitListExpr describes an initializer list, which can be used to /// initialize objects of different types, including /// struct/class/union types, arrays, and vectors. For example: /// /// @code /// struct foo x = { 1, { 2, 3 } }; /// @endcode /// /// Prior to semantic analysis, an initializer list will represent the /// initializer list as written by the user, but will have the /// placeholder type "void". This initializer list is called the /// syntactic form of the initializer, and may contain C99 designated /// initializers (represented as DesignatedInitExprs), initializations /// of subobject members without explicit braces, and so on. Clients /// interested in the original syntax of the initializer list should /// use the syntactic form of the initializer list. /// /// After semantic analysis, the initializer list will represent the /// semantic form of the initializer, where the initializations of all /// subobjects are made explicit with nested InitListExpr nodes and /// C99 designators have been eliminated by placing the designated /// initializations into the subobject they initialize. Additionally, /// any "holes" in the initialization, where no initializer has been /// specified for a particular subobject, will be replaced with /// implicitly-generated ImplicitValueInitExpr expressions that /// value-initialize the subobjects. Note, however, that the /// initializer lists may still have fewer initializers than there are /// elements to initialize within the object. /// /// After semantic analysis has completed, given an initializer list, /// method isSemanticForm() returns true if and only if this is the /// semantic form of the initializer list (note: the same AST node /// may at the same time be the syntactic form). /// Given the semantic form of the initializer list, one can retrieve /// the syntactic form of that initializer list (when different) /// using method getSyntacticForm(); the method returns null if applied /// to a initializer list which is already in syntactic form. /// Similarly, given the syntactic form (i.e., an initializer list such /// that isSemanticForm() returns false), one can retrieve the semantic /// form using method getSemanticForm(). /// Since many initializer lists have the same syntactic and semantic forms, /// getSyntacticForm() may return NULL, indicating that the current /// semantic initializer list also serves as its syntactic form. class InitListExpr : public Expr { // FIXME: Eliminate this vector in favor of ASTContext allocation typedef ASTVector InitExprsTy; InitExprsTy InitExprs; SourceLocation LBraceLoc, RBraceLoc; /// The alternative form of the initializer list (if it exists). /// The int part of the pair stores whether this initializer list is /// in semantic form. If not null, the pointer points to: /// - the syntactic form, if this is in semantic form; /// - the semantic form, if this is in syntactic form. llvm::PointerIntPair AltForm; /// Either: /// If this initializer list initializes an array with more elements than /// there are initializers in the list, specifies an expression to be used /// for value initialization of the rest of the elements. /// Or /// If this initializer list initializes a union, specifies which /// field within the union will be initialized. llvm::PointerUnion ArrayFillerOrUnionFieldInit; public: InitListExpr(const ASTContext &C, SourceLocation lbraceloc, ArrayRef initExprs, SourceLocation rbraceloc); /// Build an empty initializer list. explicit InitListExpr(EmptyShell Empty) : Expr(InitListExprClass, Empty), AltForm(nullptr, true) { } unsigned getNumInits() const { return InitExprs.size(); } /// Retrieve the set of initializers. Expr **getInits() { return reinterpret_cast(InitExprs.data()); } /// Retrieve the set of initializers. Expr * const *getInits() const { return reinterpret_cast(InitExprs.data()); } ArrayRef inits() { return llvm::makeArrayRef(getInits(), getNumInits()); } ArrayRef inits() const { return llvm::makeArrayRef(getInits(), getNumInits()); } const Expr *getInit(unsigned Init) const { assert(Init < getNumInits() && "Initializer access out of range!"); return cast_or_null(InitExprs[Init]); } Expr *getInit(unsigned Init) { assert(Init < getNumInits() && "Initializer access out of range!"); return cast_or_null(InitExprs[Init]); } void setInit(unsigned Init, Expr *expr) { assert(Init < getNumInits() && "Initializer access out of range!"); InitExprs[Init] = expr; if (expr) { ExprBits.TypeDependent |= expr->isTypeDependent(); ExprBits.ValueDependent |= expr->isValueDependent(); ExprBits.InstantiationDependent |= expr->isInstantiationDependent(); ExprBits.ContainsUnexpandedParameterPack |= expr->containsUnexpandedParameterPack(); } } /// Reserve space for some number of initializers. void reserveInits(const ASTContext &C, unsigned NumInits); /// Specify the number of initializers /// /// If there are more than @p NumInits initializers, the remaining /// initializers will be destroyed. If there are fewer than @p /// NumInits initializers, NULL expressions will be added for the /// unknown initializers. void resizeInits(const ASTContext &Context, unsigned NumInits); /// Updates the initializer at index @p Init with the new /// expression @p expr, and returns the old expression at that /// location. /// /// When @p Init is out of range for this initializer list, the /// initializer list will be extended with NULL expressions to /// accommodate the new entry. Expr *updateInit(const ASTContext &C, unsigned Init, Expr *expr); /// If this initializer list initializes an array with more elements /// than there are initializers in the list, specifies an expression to be /// used for value initialization of the rest of the elements. Expr *getArrayFiller() { return ArrayFillerOrUnionFieldInit.dyn_cast(); } const Expr *getArrayFiller() const { return const_cast(this)->getArrayFiller(); } void setArrayFiller(Expr *filler); /// Return true if this is an array initializer and its array "filler" /// has been set. bool hasArrayFiller() const { return getArrayFiller(); } /// If this initializes a union, specifies which field in the /// union to initialize. /// /// Typically, this field is the first named field within the /// union. However, a designated initializer can specify the /// initialization of a different field within the union. FieldDecl *getInitializedFieldInUnion() { return ArrayFillerOrUnionFieldInit.dyn_cast(); } const FieldDecl *getInitializedFieldInUnion() const { return const_cast(this)->getInitializedFieldInUnion(); } void setInitializedFieldInUnion(FieldDecl *FD) { assert((FD == nullptr || getInitializedFieldInUnion() == nullptr || getInitializedFieldInUnion() == FD) && "Only one field of a union may be initialized at a time!"); ArrayFillerOrUnionFieldInit = FD; } // Explicit InitListExpr's originate from source code (and have valid source // locations). Implicit InitListExpr's are created by the semantic analyzer. bool isExplicit() const { return LBraceLoc.isValid() && RBraceLoc.isValid(); } // Is this an initializer for an array of characters, initialized by a string // literal or an @encode? bool isStringLiteralInit() const; /// Is this a transparent initializer list (that is, an InitListExpr that is /// purely syntactic, and whose semantics are that of the sole contained /// initializer)? bool isTransparent() const; /// Is this the zero initializer {0} in a language which considers it /// idiomatic? bool isIdiomaticZeroInitializer(const LangOptions &LangOpts) const; SourceLocation getLBraceLoc() const { return LBraceLoc; } void setLBraceLoc(SourceLocation Loc) { LBraceLoc = Loc; } SourceLocation getRBraceLoc() const { return RBraceLoc; } void setRBraceLoc(SourceLocation Loc) { RBraceLoc = Loc; } bool isSemanticForm() const { return AltForm.getInt(); } InitListExpr *getSemanticForm() const { return isSemanticForm() ? nullptr : AltForm.getPointer(); } bool isSyntacticForm() const { return !AltForm.getInt() || !AltForm.getPointer(); } InitListExpr *getSyntacticForm() const { return isSemanticForm() ? AltForm.getPointer() : nullptr; } void setSyntacticForm(InitListExpr *Init) { AltForm.setPointer(Init); AltForm.setInt(true); Init->AltForm.setPointer(this); Init->AltForm.setInt(false); } bool hadArrayRangeDesignator() const { return InitListExprBits.HadArrayRangeDesignator != 0; } void sawArrayRangeDesignator(bool ARD = true) { InitListExprBits.HadArrayRangeDesignator = ARD; } SourceLocation getBeginLoc() const LLVM_READONLY; SourceLocation getEndLoc() const LLVM_READONLY; static bool classof(const Stmt *T) { return T->getStmtClass() == InitListExprClass; } // Iterators child_range children() { const_child_range CCR = const_cast(this)->children(); return child_range(cast_away_const(CCR.begin()), cast_away_const(CCR.end())); } const_child_range children() const { // FIXME: This does not include the array filler expression. if (InitExprs.empty()) return const_child_range(const_child_iterator(), const_child_iterator()); return const_child_range(&InitExprs[0], &InitExprs[0] + InitExprs.size()); } typedef InitExprsTy::iterator iterator; typedef InitExprsTy::const_iterator const_iterator; typedef InitExprsTy::reverse_iterator reverse_iterator; typedef InitExprsTy::const_reverse_iterator const_reverse_iterator; iterator begin() { return InitExprs.begin(); } const_iterator begin() const { return InitExprs.begin(); } iterator end() { return InitExprs.end(); } const_iterator end() const { return InitExprs.end(); } reverse_iterator rbegin() { return InitExprs.rbegin(); } const_reverse_iterator rbegin() const { return InitExprs.rbegin(); } reverse_iterator rend() { return InitExprs.rend(); } const_reverse_iterator rend() const { return InitExprs.rend(); } friend class ASTStmtReader; friend class ASTStmtWriter; }; /// Represents a C99 designated initializer expression. /// /// A designated initializer expression (C99 6.7.8) contains one or /// more designators (which can be field designators, array /// designators, or GNU array-range designators) followed by an /// expression that initializes the field or element(s) that the /// designators refer to. For example, given: /// /// @code /// struct point { /// double x; /// double y; /// }; /// struct point ptarray[10] = { [2].y = 1.0, [2].x = 2.0, [0].x = 1.0 }; /// @endcode /// /// The InitListExpr contains three DesignatedInitExprs, the first of /// which covers @c [2].y=1.0. This DesignatedInitExpr will have two /// designators, one array designator for @c [2] followed by one field /// designator for @c .y. The initialization expression will be 1.0. class DesignatedInitExpr final : public Expr, private llvm::TrailingObjects { public: /// Forward declaration of the Designator class. class Designator; private: /// The location of the '=' or ':' prior to the actual initializer /// expression. SourceLocation EqualOrColonLoc; /// Whether this designated initializer used the GNU deprecated /// syntax rather than the C99 '=' syntax. unsigned GNUSyntax : 1; /// The number of designators in this initializer expression. unsigned NumDesignators : 15; /// The number of subexpressions of this initializer expression, /// which contains both the initializer and any additional /// expressions used by array and array-range designators. unsigned NumSubExprs : 16; /// The designators in this designated initialization /// expression. Designator *Designators; DesignatedInitExpr(const ASTContext &C, QualType Ty, llvm::ArrayRef Designators, SourceLocation EqualOrColonLoc, bool GNUSyntax, ArrayRef IndexExprs, Expr *Init); explicit DesignatedInitExpr(unsigned NumSubExprs) : Expr(DesignatedInitExprClass, EmptyShell()), NumDesignators(0), NumSubExprs(NumSubExprs), Designators(nullptr) { } public: /// A field designator, e.g., ".x". struct FieldDesignator { /// Refers to the field that is being initialized. The low bit /// of this field determines whether this is actually a pointer /// to an IdentifierInfo (if 1) or a FieldDecl (if 0). When /// initially constructed, a field designator will store an /// IdentifierInfo*. After semantic analysis has resolved that /// name, the field designator will instead store a FieldDecl*. uintptr_t NameOrField; /// The location of the '.' in the designated initializer. unsigned DotLoc; /// The location of the field name in the designated initializer. unsigned FieldLoc; }; /// An array or GNU array-range designator, e.g., "[9]" or "[10..15]". struct ArrayOrRangeDesignator { /// Location of the first index expression within the designated /// initializer expression's list of subexpressions. unsigned Index; /// The location of the '[' starting the array range designator. unsigned LBracketLoc; /// The location of the ellipsis separating the start and end /// indices. Only valid for GNU array-range designators. unsigned EllipsisLoc; /// The location of the ']' terminating the array range designator. unsigned RBracketLoc; }; /// Represents a single C99 designator. /// /// @todo This class is infuriatingly similar to clang::Designator, /// but minor differences (storing indices vs. storing pointers) /// keep us from reusing it. Try harder, later, to rectify these /// differences. class Designator { /// The kind of designator this describes. enum { FieldDesignator, ArrayDesignator, ArrayRangeDesignator } Kind; union { /// A field designator, e.g., ".x". struct FieldDesignator Field; /// An array or GNU array-range designator, e.g., "[9]" or "[10..15]". struct ArrayOrRangeDesignator ArrayOrRange; }; friend class DesignatedInitExpr; public: Designator() {} /// Initializes a field designator. Designator(const IdentifierInfo *FieldName, SourceLocation DotLoc, SourceLocation FieldLoc) : Kind(FieldDesignator) { Field.NameOrField = reinterpret_cast(FieldName) | 0x01; Field.DotLoc = DotLoc.getRawEncoding(); Field.FieldLoc = FieldLoc.getRawEncoding(); } /// Initializes an array designator. Designator(unsigned Index, SourceLocation LBracketLoc, SourceLocation RBracketLoc) : Kind(ArrayDesignator) { ArrayOrRange.Index = Index; ArrayOrRange.LBracketLoc = LBracketLoc.getRawEncoding(); ArrayOrRange.EllipsisLoc = SourceLocation().getRawEncoding(); ArrayOrRange.RBracketLoc = RBracketLoc.getRawEncoding(); } /// Initializes a GNU array-range designator. Designator(unsigned Index, SourceLocation LBracketLoc, SourceLocation EllipsisLoc, SourceLocation RBracketLoc) : Kind(ArrayRangeDesignator) { ArrayOrRange.Index = Index; ArrayOrRange.LBracketLoc = LBracketLoc.getRawEncoding(); ArrayOrRange.EllipsisLoc = EllipsisLoc.getRawEncoding(); ArrayOrRange.RBracketLoc = RBracketLoc.getRawEncoding(); } bool isFieldDesignator() const { return Kind == FieldDesignator; } bool isArrayDesignator() const { return Kind == ArrayDesignator; } bool isArrayRangeDesignator() const { return Kind == ArrayRangeDesignator; } IdentifierInfo *getFieldName() const; FieldDecl *getField() const { assert(Kind == FieldDesignator && "Only valid on a field designator"); if (Field.NameOrField & 0x01) return nullptr; else return reinterpret_cast(Field.NameOrField); } void setField(FieldDecl *FD) { assert(Kind == FieldDesignator && "Only valid on a field designator"); Field.NameOrField = reinterpret_cast(FD); } SourceLocation getDotLoc() const { assert(Kind == FieldDesignator && "Only valid on a field designator"); return SourceLocation::getFromRawEncoding(Field.DotLoc); } SourceLocation getFieldLoc() const { assert(Kind == FieldDesignator && "Only valid on a field designator"); return SourceLocation::getFromRawEncoding(Field.FieldLoc); } SourceLocation getLBracketLoc() const { assert((Kind == ArrayDesignator || Kind == ArrayRangeDesignator) && "Only valid on an array or array-range designator"); return SourceLocation::getFromRawEncoding(ArrayOrRange.LBracketLoc); } SourceLocation getRBracketLoc() const { assert((Kind == ArrayDesignator || Kind == ArrayRangeDesignator) && "Only valid on an array or array-range designator"); return SourceLocation::getFromRawEncoding(ArrayOrRange.RBracketLoc); } SourceLocation getEllipsisLoc() const { assert(Kind == ArrayRangeDesignator && "Only valid on an array-range designator"); return SourceLocation::getFromRawEncoding(ArrayOrRange.EllipsisLoc); } unsigned getFirstExprIndex() const { assert((Kind == ArrayDesignator || Kind == ArrayRangeDesignator) && "Only valid on an array or array-range designator"); return ArrayOrRange.Index; } SourceLocation getBeginLoc() const LLVM_READONLY { if (Kind == FieldDesignator) return getDotLoc().isInvalid()? getFieldLoc() : getDotLoc(); else return getLBracketLoc(); } SourceLocation getEndLoc() const LLVM_READONLY { return Kind == FieldDesignator ? getFieldLoc() : getRBracketLoc(); } SourceRange getSourceRange() const LLVM_READONLY { return SourceRange(getBeginLoc(), getEndLoc()); } }; static DesignatedInitExpr *Create(const ASTContext &C, llvm::ArrayRef Designators, ArrayRef IndexExprs, SourceLocation EqualOrColonLoc, bool GNUSyntax, Expr *Init); static DesignatedInitExpr *CreateEmpty(const ASTContext &C, unsigned NumIndexExprs); /// Returns the number of designators in this initializer. unsigned size() const { return NumDesignators; } // Iterator access to the designators. llvm::MutableArrayRef designators() { return {Designators, NumDesignators}; } llvm::ArrayRef designators() const { return {Designators, NumDesignators}; } Designator *getDesignator(unsigned Idx) { return &designators()[Idx]; } const Designator *getDesignator(unsigned Idx) const { return &designators()[Idx]; } void setDesignators(const ASTContext &C, const Designator *Desigs, unsigned NumDesigs); Expr *getArrayIndex(const Designator &D) const; Expr *getArrayRangeStart(const Designator &D) const; Expr *getArrayRangeEnd(const Designator &D) const; /// Retrieve the location of the '=' that precedes the /// initializer value itself, if present. SourceLocation getEqualOrColonLoc() const { return EqualOrColonLoc; } void setEqualOrColonLoc(SourceLocation L) { EqualOrColonLoc = L; } /// Determines whether this designated initializer used the /// deprecated GNU syntax for designated initializers. bool usesGNUSyntax() const { return GNUSyntax; } void setGNUSyntax(bool GNU) { GNUSyntax = GNU; } /// Retrieve the initializer value. Expr *getInit() const { return cast(*const_cast(this)->child_begin()); } void setInit(Expr *init) { *child_begin() = init; } /// Retrieve the total number of subexpressions in this /// designated initializer expression, including the actual /// initialized value and any expressions that occur within array /// and array-range designators. unsigned getNumSubExprs() const { return NumSubExprs; } Expr *getSubExpr(unsigned Idx) const { assert(Idx < NumSubExprs && "Subscript out of range"); return cast(getTrailingObjects()[Idx]); } void setSubExpr(unsigned Idx, Expr *E) { assert(Idx < NumSubExprs && "Subscript out of range"); getTrailingObjects()[Idx] = E; } /// Replaces the designator at index @p Idx with the series /// of designators in [First, Last). void ExpandDesignator(const ASTContext &C, unsigned Idx, const Designator *First, const Designator *Last); SourceRange getDesignatorsSourceRange() const; SourceLocation getBeginLoc() const LLVM_READONLY; SourceLocation getEndLoc() const LLVM_READONLY; static bool classof(const Stmt *T) { return T->getStmtClass() == DesignatedInitExprClass; } // Iterators child_range children() { Stmt **begin = getTrailingObjects(); return child_range(begin, begin + NumSubExprs); } const_child_range children() const { Stmt * const *begin = getTrailingObjects(); return const_child_range(begin, begin + NumSubExprs); } friend TrailingObjects; }; /// Represents a place-holder for an object not to be initialized by /// anything. /// /// This only makes sense when it appears as part of an updater of a /// DesignatedInitUpdateExpr (see below). The base expression of a DIUE /// initializes a big object, and the NoInitExpr's mark the spots within the /// big object not to be overwritten by the updater. /// /// \see DesignatedInitUpdateExpr class NoInitExpr : public Expr { public: explicit NoInitExpr(QualType ty) : Expr(NoInitExprClass, ty, VK_RValue, OK_Ordinary, false, false, ty->isInstantiationDependentType(), false) { } explicit NoInitExpr(EmptyShell Empty) : Expr(NoInitExprClass, Empty) { } static bool classof(const Stmt *T) { return T->getStmtClass() == NoInitExprClass; } SourceLocation getBeginLoc() const LLVM_READONLY { return SourceLocation(); } SourceLocation getEndLoc() const LLVM_READONLY { return SourceLocation(); } // Iterators child_range children() { return child_range(child_iterator(), child_iterator()); } const_child_range children() const { return const_child_range(const_child_iterator(), const_child_iterator()); } }; // In cases like: // struct Q { int a, b, c; }; // Q *getQ(); // void foo() { // struct A { Q q; } a = { *getQ(), .q.b = 3 }; // } // // We will have an InitListExpr for a, with type A, and then a // DesignatedInitUpdateExpr for "a.q" with type Q. The "base" for this DIUE // is the call expression *getQ(); the "updater" for the DIUE is ".q.b = 3" // class DesignatedInitUpdateExpr : public Expr { // BaseAndUpdaterExprs[0] is the base expression; // BaseAndUpdaterExprs[1] is an InitListExpr overwriting part of the base. Stmt *BaseAndUpdaterExprs[2]; public: DesignatedInitUpdateExpr(const ASTContext &C, SourceLocation lBraceLoc, Expr *baseExprs, SourceLocation rBraceLoc); explicit DesignatedInitUpdateExpr(EmptyShell Empty) : Expr(DesignatedInitUpdateExprClass, Empty) { } SourceLocation getBeginLoc() const LLVM_READONLY; SourceLocation getEndLoc() const LLVM_READONLY; static bool classof(const Stmt *T) { return T->getStmtClass() == DesignatedInitUpdateExprClass; } Expr *getBase() const { return cast(BaseAndUpdaterExprs[0]); } void setBase(Expr *Base) { BaseAndUpdaterExprs[0] = Base; } InitListExpr *getUpdater() const { return cast(BaseAndUpdaterExprs[1]); } void setUpdater(Expr *Updater) { BaseAndUpdaterExprs[1] = Updater; } // Iterators // children = the base and the updater child_range children() { return child_range(&BaseAndUpdaterExprs[0], &BaseAndUpdaterExprs[0] + 2); } const_child_range children() const { return const_child_range(&BaseAndUpdaterExprs[0], &BaseAndUpdaterExprs[0] + 2); } }; /// Represents a loop initializing the elements of an array. /// /// The need to initialize the elements of an array occurs in a number of /// contexts: /// /// * in the implicit copy/move constructor for a class with an array member /// * when a lambda-expression captures an array by value /// * when a decomposition declaration decomposes an array /// /// There are two subexpressions: a common expression (the source array) /// that is evaluated once up-front, and a per-element initializer that /// runs once for each array element. /// /// Within the per-element initializer, the common expression may be referenced /// via an OpaqueValueExpr, and the current index may be obtained via an /// ArrayInitIndexExpr. class ArrayInitLoopExpr : public Expr { Stmt *SubExprs[2]; explicit ArrayInitLoopExpr(EmptyShell Empty) : Expr(ArrayInitLoopExprClass, Empty), SubExprs{} {} public: explicit ArrayInitLoopExpr(QualType T, Expr *CommonInit, Expr *ElementInit) : Expr(ArrayInitLoopExprClass, T, VK_RValue, OK_Ordinary, false, CommonInit->isValueDependent() || ElementInit->isValueDependent(), T->isInstantiationDependentType(), CommonInit->containsUnexpandedParameterPack() || ElementInit->containsUnexpandedParameterPack()), SubExprs{CommonInit, ElementInit} {} /// Get the common subexpression shared by all initializations (the source /// array). OpaqueValueExpr *getCommonExpr() const { return cast(SubExprs[0]); } /// Get the initializer to use for each array element. Expr *getSubExpr() const { return cast(SubExprs[1]); } llvm::APInt getArraySize() const { return cast(getType()->castAsArrayTypeUnsafe()) ->getSize(); } static bool classof(const Stmt *S) { return S->getStmtClass() == ArrayInitLoopExprClass; } SourceLocation getBeginLoc() const LLVM_READONLY { return getCommonExpr()->getBeginLoc(); } SourceLocation getEndLoc() const LLVM_READONLY { return getCommonExpr()->getEndLoc(); } child_range children() { return child_range(SubExprs, SubExprs + 2); } const_child_range children() const { return const_child_range(SubExprs, SubExprs + 2); } friend class ASTReader; friend class ASTStmtReader; friend class ASTStmtWriter; }; /// Represents the index of the current element of an array being /// initialized by an ArrayInitLoopExpr. This can only appear within the /// subexpression of an ArrayInitLoopExpr. class ArrayInitIndexExpr : public Expr { explicit ArrayInitIndexExpr(EmptyShell Empty) : Expr(ArrayInitIndexExprClass, Empty) {} public: explicit ArrayInitIndexExpr(QualType T) : Expr(ArrayInitIndexExprClass, T, VK_RValue, OK_Ordinary, false, false, false, false) {} static bool classof(const Stmt *S) { return S->getStmtClass() == ArrayInitIndexExprClass; } SourceLocation getBeginLoc() const LLVM_READONLY { return SourceLocation(); } SourceLocation getEndLoc() const LLVM_READONLY { return SourceLocation(); } child_range children() { return child_range(child_iterator(), child_iterator()); } const_child_range children() const { return const_child_range(const_child_iterator(), const_child_iterator()); } friend class ASTReader; friend class ASTStmtReader; }; /// Represents an implicitly-generated value initialization of /// an object of a given type. /// /// Implicit value initializations occur within semantic initializer /// list expressions (InitListExpr) as placeholders for subobject /// initializations not explicitly specified by the user. /// /// \see InitListExpr class ImplicitValueInitExpr : public Expr { public: explicit ImplicitValueInitExpr(QualType ty) : Expr(ImplicitValueInitExprClass, ty, VK_RValue, OK_Ordinary, false, false, ty->isInstantiationDependentType(), false) { } /// Construct an empty implicit value initialization. explicit ImplicitValueInitExpr(EmptyShell Empty) : Expr(ImplicitValueInitExprClass, Empty) { } static bool classof(const Stmt *T) { return T->getStmtClass() == ImplicitValueInitExprClass; } SourceLocation getBeginLoc() const LLVM_READONLY { return SourceLocation(); } SourceLocation getEndLoc() const LLVM_READONLY { return SourceLocation(); } // Iterators child_range children() { return child_range(child_iterator(), child_iterator()); } const_child_range children() const { return const_child_range(const_child_iterator(), const_child_iterator()); } }; class ParenListExpr final : public Expr, private llvm::TrailingObjects { friend class ASTStmtReader; friend TrailingObjects; /// The location of the left and right parentheses. SourceLocation LParenLoc, RParenLoc; /// Build a paren list. ParenListExpr(SourceLocation LParenLoc, ArrayRef Exprs, SourceLocation RParenLoc); /// Build an empty paren list. ParenListExpr(EmptyShell Empty, unsigned NumExprs); public: /// Create a paren list. static ParenListExpr *Create(const ASTContext &Ctx, SourceLocation LParenLoc, ArrayRef Exprs, SourceLocation RParenLoc); /// Create an empty paren list. static ParenListExpr *CreateEmpty(const ASTContext &Ctx, unsigned NumExprs); /// Return the number of expressions in this paren list. unsigned getNumExprs() const { return ParenListExprBits.NumExprs; } Expr *getExpr(unsigned Init) { assert(Init < getNumExprs() && "Initializer access out of range!"); return getExprs()[Init]; } const Expr *getExpr(unsigned Init) const { return const_cast(this)->getExpr(Init); } Expr **getExprs() { return reinterpret_cast(getTrailingObjects()); } ArrayRef exprs() { return llvm::makeArrayRef(getExprs(), getNumExprs()); } SourceLocation getLParenLoc() const { return LParenLoc; } SourceLocation getRParenLoc() const { return RParenLoc; } SourceLocation getBeginLoc() const { return getLParenLoc(); } SourceLocation getEndLoc() const { return getRParenLoc(); } static bool classof(const Stmt *T) { return T->getStmtClass() == ParenListExprClass; } // Iterators child_range children() { return child_range(getTrailingObjects(), getTrailingObjects() + getNumExprs()); } const_child_range children() const { return const_child_range(getTrailingObjects(), getTrailingObjects() + getNumExprs()); } }; /// Represents a C11 generic selection. /// /// A generic selection (C11 6.5.1.1) contains an unevaluated controlling /// expression, followed by one or more generic associations. Each generic /// association specifies a type name and an expression, or "default" and an /// expression (in which case it is known as a default generic association). /// The type and value of the generic selection are identical to those of its /// result expression, which is defined as the expression in the generic /// association with a type name that is compatible with the type of the /// controlling expression, or the expression in the default generic association /// if no types are compatible. For example: /// /// @code /// _Generic(X, double: 1, float: 2, default: 3) /// @endcode /// /// The above expression evaluates to 1 if 1.0 is substituted for X, 2 if 1.0f /// or 3 if "hello". /// /// As an extension, generic selections are allowed in C++, where the following /// additional semantics apply: /// /// Any generic selection whose controlling expression is type-dependent or /// which names a dependent type in its association list is result-dependent, /// which means that the choice of result expression is dependent. /// Result-dependent generic associations are both type- and value-dependent. class GenericSelectionExpr : public Expr { enum { CONTROLLING, END_EXPR }; TypeSourceInfo **AssocTypes; Stmt **SubExprs; unsigned NumAssocs, ResultIndex; SourceLocation GenericLoc, DefaultLoc, RParenLoc; public: GenericSelectionExpr(const ASTContext &Context, SourceLocation GenericLoc, Expr *ControllingExpr, ArrayRef AssocTypes, ArrayRef AssocExprs, SourceLocation DefaultLoc, SourceLocation RParenLoc, bool ContainsUnexpandedParameterPack, unsigned ResultIndex); /// This constructor is used in the result-dependent case. GenericSelectionExpr(const ASTContext &Context, SourceLocation GenericLoc, Expr *ControllingExpr, ArrayRef AssocTypes, ArrayRef AssocExprs, SourceLocation DefaultLoc, SourceLocation RParenLoc, bool ContainsUnexpandedParameterPack); explicit GenericSelectionExpr(EmptyShell Empty) : Expr(GenericSelectionExprClass, Empty) { } unsigned getNumAssocs() const { return NumAssocs; } SourceLocation getGenericLoc() const { return GenericLoc; } SourceLocation getDefaultLoc() const { return DefaultLoc; } SourceLocation getRParenLoc() const { return RParenLoc; } const Expr *getAssocExpr(unsigned i) const { return cast(SubExprs[END_EXPR+i]); } Expr *getAssocExpr(unsigned i) { return cast(SubExprs[END_EXPR+i]); } ArrayRef getAssocExprs() const { return NumAssocs ? llvm::makeArrayRef( &reinterpret_cast(SubExprs)[END_EXPR], NumAssocs) : None; } const TypeSourceInfo *getAssocTypeSourceInfo(unsigned i) const { return AssocTypes[i]; } TypeSourceInfo *getAssocTypeSourceInfo(unsigned i) { return AssocTypes[i]; } ArrayRef getAssocTypeSourceInfos() const { return NumAssocs ? llvm::makeArrayRef(&AssocTypes[0], NumAssocs) : None; } QualType getAssocType(unsigned i) const { if (const TypeSourceInfo *TS = getAssocTypeSourceInfo(i)) return TS->getType(); else return QualType(); } const Expr *getControllingExpr() const { return cast(SubExprs[CONTROLLING]); } Expr *getControllingExpr() { return cast(SubExprs[CONTROLLING]); } /// Whether this generic selection is result-dependent. bool isResultDependent() const { return ResultIndex == -1U; } /// The zero-based index of the result expression's generic association in /// the generic selection's association list. Defined only if the /// generic selection is not result-dependent. unsigned getResultIndex() const { assert(!isResultDependent() && "Generic selection is result-dependent"); return ResultIndex; } /// The generic selection's result expression. Defined only if the /// generic selection is not result-dependent. const Expr *getResultExpr() const { return getAssocExpr(getResultIndex()); } Expr *getResultExpr() { return getAssocExpr(getResultIndex()); } SourceLocation getBeginLoc() const LLVM_READONLY { return GenericLoc; } SourceLocation getEndLoc() const LLVM_READONLY { return RParenLoc; } static bool classof(const Stmt *T) { return T->getStmtClass() == GenericSelectionExprClass; } child_range children() { return child_range(SubExprs, SubExprs+END_EXPR+NumAssocs); } const_child_range children() const { return const_child_range(SubExprs, SubExprs + END_EXPR + NumAssocs); } friend class ASTStmtReader; }; //===----------------------------------------------------------------------===// // Clang Extensions //===----------------------------------------------------------------------===// /// ExtVectorElementExpr - This represents access to specific elements of a /// vector, and may occur on the left hand side or right hand side. For example /// the following is legal: "V.xy = V.zw" if V is a 4 element extended vector. /// /// Note that the base may have either vector or pointer to vector type, just /// like a struct field reference. /// class ExtVectorElementExpr : public Expr { Stmt *Base; IdentifierInfo *Accessor; SourceLocation AccessorLoc; public: ExtVectorElementExpr(QualType ty, ExprValueKind VK, Expr *base, IdentifierInfo &accessor, SourceLocation loc) : Expr(ExtVectorElementExprClass, ty, VK, (VK == VK_RValue ? OK_Ordinary : OK_VectorComponent), base->isTypeDependent(), base->isValueDependent(), base->isInstantiationDependent(), base->containsUnexpandedParameterPack()), Base(base), Accessor(&accessor), AccessorLoc(loc) {} /// Build an empty vector element expression. explicit ExtVectorElementExpr(EmptyShell Empty) : Expr(ExtVectorElementExprClass, Empty) { } const Expr *getBase() const { return cast(Base); } Expr *getBase() { return cast(Base); } void setBase(Expr *E) { Base = E; } IdentifierInfo &getAccessor() const { return *Accessor; } void setAccessor(IdentifierInfo *II) { Accessor = II; } SourceLocation getAccessorLoc() const { return AccessorLoc; } void setAccessorLoc(SourceLocation L) { AccessorLoc = L; } /// getNumElements - Get the number of components being selected. unsigned getNumElements() const; /// containsDuplicateElements - Return true if any element access is /// repeated. bool containsDuplicateElements() const; /// getEncodedElementAccess - Encode the elements accessed into an llvm /// aggregate Constant of ConstantInt(s). void getEncodedElementAccess(SmallVectorImpl &Elts) const; SourceLocation getBeginLoc() const LLVM_READONLY { return getBase()->getBeginLoc(); } SourceLocation getEndLoc() const LLVM_READONLY { return AccessorLoc; } /// isArrow - Return true if the base expression is a pointer to vector, /// return false if the base expression is a vector. bool isArrow() const; static bool classof(const Stmt *T) { return T->getStmtClass() == ExtVectorElementExprClass; } // Iterators child_range children() { return child_range(&Base, &Base+1); } const_child_range children() const { return const_child_range(&Base, &Base + 1); } }; /// BlockExpr - Adaptor class for mixing a BlockDecl with expressions. /// ^{ statement-body } or ^(int arg1, float arg2){ statement-body } class BlockExpr : public Expr { protected: BlockDecl *TheBlock; public: BlockExpr(BlockDecl *BD, QualType ty) : Expr(BlockExprClass, ty, VK_RValue, OK_Ordinary, ty->isDependentType(), ty->isDependentType(), ty->isInstantiationDependentType() || BD->isDependentContext(), false), TheBlock(BD) {} /// Build an empty block expression. explicit BlockExpr(EmptyShell Empty) : Expr(BlockExprClass, Empty) { } const BlockDecl *getBlockDecl() const { return TheBlock; } BlockDecl *getBlockDecl() { return TheBlock; } void setBlockDecl(BlockDecl *BD) { TheBlock = BD; } // Convenience functions for probing the underlying BlockDecl. SourceLocation getCaretLocation() const; const Stmt *getBody() const; Stmt *getBody(); SourceLocation getBeginLoc() const LLVM_READONLY { return getCaretLocation(); } SourceLocation getEndLoc() const LLVM_READONLY { return getBody()->getEndLoc(); } /// getFunctionType - Return the underlying function type for this block. const FunctionProtoType *getFunctionType() const; static bool classof(const Stmt *T) { return T->getStmtClass() == BlockExprClass; } // Iterators child_range children() { return child_range(child_iterator(), child_iterator()); } const_child_range children() const { return const_child_range(const_child_iterator(), const_child_iterator()); } }; /// AsTypeExpr - Clang builtin function __builtin_astype [OpenCL 6.2.4.2] /// This AST node provides support for reinterpreting a type to another /// type of the same size. class AsTypeExpr : public Expr { private: Stmt *SrcExpr; SourceLocation BuiltinLoc, RParenLoc; friend class ASTReader; friend class ASTStmtReader; explicit AsTypeExpr(EmptyShell Empty) : Expr(AsTypeExprClass, Empty) {} public: AsTypeExpr(Expr* SrcExpr, QualType DstType, ExprValueKind VK, ExprObjectKind OK, SourceLocation BuiltinLoc, SourceLocation RParenLoc) : Expr(AsTypeExprClass, DstType, VK, OK, DstType->isDependentType(), DstType->isDependentType() || SrcExpr->isValueDependent(), (DstType->isInstantiationDependentType() || SrcExpr->isInstantiationDependent()), (DstType->containsUnexpandedParameterPack() || SrcExpr->containsUnexpandedParameterPack())), SrcExpr(SrcExpr), BuiltinLoc(BuiltinLoc), RParenLoc(RParenLoc) {} /// getSrcExpr - Return the Expr to be converted. Expr *getSrcExpr() const { return cast(SrcExpr); } /// getBuiltinLoc - Return the location of the __builtin_astype token. SourceLocation getBuiltinLoc() const { return BuiltinLoc; } /// getRParenLoc - Return the location of final right parenthesis. SourceLocation getRParenLoc() const { return RParenLoc; } SourceLocation getBeginLoc() const LLVM_READONLY { return BuiltinLoc; } SourceLocation getEndLoc() const LLVM_READONLY { return RParenLoc; } static bool classof(const Stmt *T) { return T->getStmtClass() == AsTypeExprClass; } // Iterators child_range children() { return child_range(&SrcExpr, &SrcExpr+1); } const_child_range children() const { return const_child_range(&SrcExpr, &SrcExpr + 1); } }; /// PseudoObjectExpr - An expression which accesses a pseudo-object /// l-value. A pseudo-object is an abstract object, accesses to which /// are translated to calls. The pseudo-object expression has a /// syntactic form, which shows how the expression was actually /// written in the source code, and a semantic form, which is a series /// of expressions to be executed in order which detail how the /// operation is actually evaluated. Optionally, one of the semantic /// forms may also provide a result value for the expression. /// /// If any of the semantic-form expressions is an OpaqueValueExpr, /// that OVE is required to have a source expression, and it is bound /// to the result of that source expression. Such OVEs may appear /// only in subsequent semantic-form expressions and as /// sub-expressions of the syntactic form. /// /// PseudoObjectExpr should be used only when an operation can be /// usefully described in terms of fairly simple rewrite rules on /// objects and functions that are meant to be used by end-developers. /// For example, under the Itanium ABI, dynamic casts are implemented /// as a call to a runtime function called __dynamic_cast; using this /// class to describe that would be inappropriate because that call is /// not really part of the user-visible semantics, and instead the /// cast is properly reflected in the AST and IR-generation has been /// taught to generate the call as necessary. In contrast, an /// Objective-C property access is semantically defined to be /// equivalent to a particular message send, and this is very much /// part of the user model. The name of this class encourages this /// modelling design. class PseudoObjectExpr final : public Expr, private llvm::TrailingObjects { // PseudoObjectExprBits.NumSubExprs - The number of sub-expressions. // Always at least two, because the first sub-expression is the // syntactic form. // PseudoObjectExprBits.ResultIndex - The index of the // sub-expression holding the result. 0 means the result is void, // which is unambiguous because it's the index of the syntactic // form. Note that this is therefore 1 higher than the value passed // in to Create, which is an index within the semantic forms. // Note also that ASTStmtWriter assumes this encoding. Expr **getSubExprsBuffer() { return getTrailingObjects(); } const Expr * const *getSubExprsBuffer() const { return getTrailingObjects(); } PseudoObjectExpr(QualType type, ExprValueKind VK, Expr *syntactic, ArrayRef semantic, unsigned resultIndex); PseudoObjectExpr(EmptyShell shell, unsigned numSemanticExprs); unsigned getNumSubExprs() const { return PseudoObjectExprBits.NumSubExprs; } public: /// NoResult - A value for the result index indicating that there is /// no semantic result. enum : unsigned { NoResult = ~0U }; static PseudoObjectExpr *Create(const ASTContext &Context, Expr *syntactic, ArrayRef semantic, unsigned resultIndex); static PseudoObjectExpr *Create(const ASTContext &Context, EmptyShell shell, unsigned numSemanticExprs); /// Return the syntactic form of this expression, i.e. the /// expression it actually looks like. Likely to be expressed in /// terms of OpaqueValueExprs bound in the semantic form. Expr *getSyntacticForm() { return getSubExprsBuffer()[0]; } const Expr *getSyntacticForm() const { return getSubExprsBuffer()[0]; } /// Return the index of the result-bearing expression into the semantics /// expressions, or PseudoObjectExpr::NoResult if there is none. unsigned getResultExprIndex() const { if (PseudoObjectExprBits.ResultIndex == 0) return NoResult; return PseudoObjectExprBits.ResultIndex - 1; } /// Return the result-bearing expression, or null if there is none. Expr *getResultExpr() { if (PseudoObjectExprBits.ResultIndex == 0) return nullptr; return getSubExprsBuffer()[PseudoObjectExprBits.ResultIndex]; } const Expr *getResultExpr() const { return const_cast(this)->getResultExpr(); } unsigned getNumSemanticExprs() const { return getNumSubExprs() - 1; } typedef Expr * const *semantics_iterator; typedef const Expr * const *const_semantics_iterator; semantics_iterator semantics_begin() { return getSubExprsBuffer() + 1; } const_semantics_iterator semantics_begin() const { return getSubExprsBuffer() + 1; } semantics_iterator semantics_end() { return getSubExprsBuffer() + getNumSubExprs(); } const_semantics_iterator semantics_end() const { return getSubExprsBuffer() + getNumSubExprs(); } llvm::iterator_range semantics() { return llvm::make_range(semantics_begin(), semantics_end()); } llvm::iterator_range semantics() const { return llvm::make_range(semantics_begin(), semantics_end()); } Expr *getSemanticExpr(unsigned index) { assert(index + 1 < getNumSubExprs()); return getSubExprsBuffer()[index + 1]; } const Expr *getSemanticExpr(unsigned index) const { return const_cast(this)->getSemanticExpr(index); } SourceLocation getExprLoc() const LLVM_READONLY { return getSyntacticForm()->getExprLoc(); } SourceLocation getBeginLoc() const LLVM_READONLY { return getSyntacticForm()->getBeginLoc(); } SourceLocation getEndLoc() const LLVM_READONLY { return getSyntacticForm()->getEndLoc(); } child_range children() { const_child_range CCR = const_cast(this)->children(); return child_range(cast_away_const(CCR.begin()), cast_away_const(CCR.end())); } const_child_range children() const { Stmt *const *cs = const_cast( reinterpret_cast(getSubExprsBuffer())); return const_child_range(cs, cs + getNumSubExprs()); } static bool classof(const Stmt *T) { return T->getStmtClass() == PseudoObjectExprClass; } friend TrailingObjects; friend class ASTStmtReader; }; /// AtomicExpr - Variadic atomic builtins: __atomic_exchange, __atomic_fetch_*, /// __atomic_load, __atomic_store, and __atomic_compare_exchange_*, for the /// similarly-named C++11 instructions, and __c11 variants for , /// and corresponding __opencl_atomic_* for OpenCL 2.0. /// All of these instructions take one primary pointer, at least one memory /// order. The instructions for which getScopeModel returns non-null value /// take one synch scope. class AtomicExpr : public Expr { public: enum AtomicOp { #define BUILTIN(ID, TYPE, ATTRS) #define ATOMIC_BUILTIN(ID, TYPE, ATTRS) AO ## ID, #include "clang/Basic/Builtins.def" // Avoid trailing comma BI_First = 0 }; private: /// Location of sub-expressions. /// The location of Scope sub-expression is NumSubExprs - 1, which is /// not fixed, therefore is not defined in enum. enum { PTR, ORDER, VAL1, ORDER_FAIL, VAL2, WEAK, END_EXPR }; Stmt *SubExprs[END_EXPR + 1]; unsigned NumSubExprs; SourceLocation BuiltinLoc, RParenLoc; AtomicOp Op; friend class ASTStmtReader; public: AtomicExpr(SourceLocation BLoc, ArrayRef args, QualType t, AtomicOp op, SourceLocation RP); /// Determine the number of arguments the specified atomic builtin /// should have. static unsigned getNumSubExprs(AtomicOp Op); /// Build an empty AtomicExpr. explicit AtomicExpr(EmptyShell Empty) : Expr(AtomicExprClass, Empty) { } Expr *getPtr() const { return cast(SubExprs[PTR]); } Expr *getOrder() const { return cast(SubExprs[ORDER]); } Expr *getScope() const { assert(getScopeModel() && "No scope"); return cast(SubExprs[NumSubExprs - 1]); } Expr *getVal1() const { if (Op == AO__c11_atomic_init || Op == AO__opencl_atomic_init) return cast(SubExprs[ORDER]); assert(NumSubExprs > VAL1); return cast(SubExprs[VAL1]); } Expr *getOrderFail() const { assert(NumSubExprs > ORDER_FAIL); return cast(SubExprs[ORDER_FAIL]); } Expr *getVal2() const { if (Op == AO__atomic_exchange) return cast(SubExprs[ORDER_FAIL]); assert(NumSubExprs > VAL2); return cast(SubExprs[VAL2]); } Expr *getWeak() const { assert(NumSubExprs > WEAK); return cast(SubExprs[WEAK]); } QualType getValueType() const; AtomicOp getOp() const { return Op; } unsigned getNumSubExprs() const { return NumSubExprs; } Expr **getSubExprs() { return reinterpret_cast(SubExprs); } const Expr * const *getSubExprs() const { return reinterpret_cast(SubExprs); } bool isVolatile() const { return getPtr()->getType()->getPointeeType().isVolatileQualified(); } bool isCmpXChg() const { return getOp() == AO__c11_atomic_compare_exchange_strong || getOp() == AO__c11_atomic_compare_exchange_weak || getOp() == AO__opencl_atomic_compare_exchange_strong || getOp() == AO__opencl_atomic_compare_exchange_weak || getOp() == AO__atomic_compare_exchange || getOp() == AO__atomic_compare_exchange_n; } bool isOpenCL() const { return getOp() >= AO__opencl_atomic_init && getOp() <= AO__opencl_atomic_fetch_max; } SourceLocation getBuiltinLoc() const { return BuiltinLoc; } SourceLocation getRParenLoc() const { return RParenLoc; } SourceLocation getBeginLoc() const LLVM_READONLY { return BuiltinLoc; } SourceLocation getEndLoc() const LLVM_READONLY { return RParenLoc; } static bool classof(const Stmt *T) { return T->getStmtClass() == AtomicExprClass; } // Iterators child_range children() { return child_range(SubExprs, SubExprs+NumSubExprs); } const_child_range children() const { return const_child_range(SubExprs, SubExprs + NumSubExprs); } /// Get atomic scope model for the atomic op code. /// \return empty atomic scope model if the atomic op code does not have /// scope operand. static std::unique_ptr getScopeModel(AtomicOp Op) { auto Kind = (Op >= AO__opencl_atomic_load && Op <= AO__opencl_atomic_fetch_max) ? AtomicScopeModelKind::OpenCL : AtomicScopeModelKind::None; return AtomicScopeModel::create(Kind); } /// Get atomic scope model. /// \return empty atomic scope model if this atomic expression does not have /// scope operand. std::unique_ptr getScopeModel() const { return getScopeModel(getOp()); } }; /// TypoExpr - Internal placeholder for expressions where typo correction /// still needs to be performed and/or an error diagnostic emitted. class TypoExpr : public Expr { public: TypoExpr(QualType T) : Expr(TypoExprClass, T, VK_LValue, OK_Ordinary, /*isTypeDependent*/ true, /*isValueDependent*/ true, /*isInstantiationDependent*/ true, /*containsUnexpandedParameterPack*/ false) { assert(T->isDependentType() && "TypoExpr given a non-dependent type"); } child_range children() { return child_range(child_iterator(), child_iterator()); } const_child_range children() const { return const_child_range(const_child_iterator(), const_child_iterator()); } SourceLocation getBeginLoc() const LLVM_READONLY { return SourceLocation(); } SourceLocation getEndLoc() const LLVM_READONLY { return SourceLocation(); } static bool classof(const Stmt *T) { return T->getStmtClass() == TypoExprClass; } }; } // end namespace clang #endif // LLVM_CLANG_AST_EXPR_H Index: projects/clang800-import/contrib/llvm/tools/clang/lib/CodeGen/CGDecl.cpp =================================================================== --- projects/clang800-import/contrib/llvm/tools/clang/lib/CodeGen/CGDecl.cpp (revision 344547) +++ projects/clang800-import/contrib/llvm/tools/clang/lib/CodeGen/CGDecl.cpp (revision 344548) @@ -1,2426 +1,2426 @@ //===--- CGDecl.cpp - Emit LLVM Code for declarations ---------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This contains code to emit Decl nodes as LLVM code. // //===----------------------------------------------------------------------===// #include "CGBlocks.h" #include "CGCXXABI.h" #include "CGCleanup.h" #include "CGDebugInfo.h" #include "CGOpenCLRuntime.h" #include "CGOpenMPRuntime.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/CharUnits.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclOpenMP.h" #include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" using namespace clang; using namespace CodeGen; void CodeGenFunction::EmitDecl(const Decl &D) { switch (D.getKind()) { case Decl::BuiltinTemplate: case Decl::TranslationUnit: case Decl::ExternCContext: case Decl::Namespace: case Decl::UnresolvedUsingTypename: case Decl::ClassTemplateSpecialization: case Decl::ClassTemplatePartialSpecialization: case Decl::VarTemplateSpecialization: case Decl::VarTemplatePartialSpecialization: case Decl::TemplateTypeParm: case Decl::UnresolvedUsingValue: case Decl::NonTypeTemplateParm: case Decl::CXXDeductionGuide: case Decl::CXXMethod: case Decl::CXXConstructor: case Decl::CXXDestructor: case Decl::CXXConversion: case Decl::Field: case Decl::MSProperty: case Decl::IndirectField: case Decl::ObjCIvar: case Decl::ObjCAtDefsField: case Decl::ParmVar: case Decl::ImplicitParam: case Decl::ClassTemplate: case Decl::VarTemplate: case Decl::FunctionTemplate: case Decl::TypeAliasTemplate: case Decl::TemplateTemplateParm: case Decl::ObjCMethod: case Decl::ObjCCategory: case Decl::ObjCProtocol: case Decl::ObjCInterface: case Decl::ObjCCategoryImpl: case Decl::ObjCImplementation: case Decl::ObjCProperty: case Decl::ObjCCompatibleAlias: case Decl::PragmaComment: case Decl::PragmaDetectMismatch: case Decl::AccessSpec: case Decl::LinkageSpec: case Decl::Export: case Decl::ObjCPropertyImpl: case Decl::FileScopeAsm: case Decl::Friend: case Decl::FriendTemplate: case Decl::Block: case Decl::Captured: case Decl::ClassScopeFunctionSpecialization: case Decl::UsingShadow: case Decl::ConstructorUsingShadow: case Decl::ObjCTypeParam: case Decl::Binding: llvm_unreachable("Declaration should not be in declstmts!"); case Decl::Function: // void X(); case Decl::Record: // struct/union/class X; case Decl::Enum: // enum X; case Decl::EnumConstant: // enum ? { X = ? } case Decl::CXXRecord: // struct/union/class X; [C++] case Decl::StaticAssert: // static_assert(X, ""); [C++0x] case Decl::Label: // __label__ x; case Decl::Import: case Decl::OMPThreadPrivate: case Decl::OMPCapturedExpr: case Decl::OMPRequires: case Decl::Empty: // None of these decls require codegen support. return; case Decl::NamespaceAlias: if (CGDebugInfo *DI = getDebugInfo()) DI->EmitNamespaceAlias(cast(D)); return; case Decl::Using: // using X; [C++] if (CGDebugInfo *DI = getDebugInfo()) DI->EmitUsingDecl(cast(D)); return; case Decl::UsingPack: for (auto *Using : cast(D).expansions()) EmitDecl(*Using); return; case Decl::UsingDirective: // using namespace X; [C++] if (CGDebugInfo *DI = getDebugInfo()) DI->EmitUsingDirective(cast(D)); return; case Decl::Var: case Decl::Decomposition: { const VarDecl &VD = cast(D); assert(VD.isLocalVarDecl() && "Should not see file-scope variables inside a function!"); EmitVarDecl(VD); if (auto *DD = dyn_cast(&VD)) for (auto *B : DD->bindings()) if (auto *HD = B->getHoldingVar()) EmitVarDecl(*HD); return; } case Decl::OMPDeclareReduction: return CGM.EmitOMPDeclareReduction(cast(&D), this); case Decl::Typedef: // typedef int X; case Decl::TypeAlias: { // using X = int; [C++0x] const TypedefNameDecl &TD = cast(D); QualType Ty = TD.getUnderlyingType(); if (Ty->isVariablyModifiedType()) EmitVariablyModifiedType(Ty); } } } /// EmitVarDecl - This method handles emission of any variable declaration /// inside a function, including static vars etc. void CodeGenFunction::EmitVarDecl(const VarDecl &D) { if (D.hasExternalStorage()) // Don't emit it now, allow it to be emitted lazily on its first use. return; // Some function-scope variable does not have static storage but still // needs to be emitted like a static variable, e.g. a function-scope // variable in constant address space in OpenCL. if (D.getStorageDuration() != SD_Automatic) { // Static sampler variables translated to function calls. if (D.getType()->isSamplerT()) return; llvm::GlobalValue::LinkageTypes Linkage = CGM.getLLVMLinkageVarDefinition(&D, /*isConstant=*/false); // FIXME: We need to force the emission/use of a guard variable for // some variables even if we can constant-evaluate them because // we can't guarantee every translation unit will constant-evaluate them. return EmitStaticVarDecl(D, Linkage); } if (D.getType().getAddressSpace() == LangAS::opencl_local) return CGM.getOpenCLRuntime().EmitWorkGroupLocalVarDecl(*this, D); assert(D.hasLocalStorage()); return EmitAutoVarDecl(D); } static std::string getStaticDeclName(CodeGenModule &CGM, const VarDecl &D) { if (CGM.getLangOpts().CPlusPlus) return CGM.getMangledName(&D).str(); // If this isn't C++, we don't need a mangled name, just a pretty one. assert(!D.isExternallyVisible() && "name shouldn't matter"); std::string ContextName; const DeclContext *DC = D.getDeclContext(); if (auto *CD = dyn_cast(DC)) DC = cast(CD->getNonClosureContext()); if (const auto *FD = dyn_cast(DC)) ContextName = CGM.getMangledName(FD); else if (const auto *BD = dyn_cast(DC)) ContextName = CGM.getBlockMangledName(GlobalDecl(), BD); else if (const auto *OMD = dyn_cast(DC)) ContextName = OMD->getSelector().getAsString(); else llvm_unreachable("Unknown context for static var decl"); ContextName += "." + D.getNameAsString(); return ContextName; } llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl( const VarDecl &D, llvm::GlobalValue::LinkageTypes Linkage) { // In general, we don't always emit static var decls once before we reference // them. It is possible to reference them before emitting the function that // contains them, and it is possible to emit the containing function multiple // times. if (llvm::Constant *ExistingGV = StaticLocalDeclMap[&D]) return ExistingGV; QualType Ty = D.getType(); assert(Ty->isConstantSizeType() && "VLAs can't be static"); // Use the label if the variable is renamed with the asm-label extension. std::string Name; if (D.hasAttr()) Name = getMangledName(&D); else Name = getStaticDeclName(*this, D); llvm::Type *LTy = getTypes().ConvertTypeForMem(Ty); LangAS AS = GetGlobalVarAddressSpace(&D); unsigned TargetAS = getContext().getTargetAddressSpace(AS); // OpenCL variables in local address space and CUDA shared // variables cannot have an initializer. llvm::Constant *Init = nullptr; if (Ty.getAddressSpace() == LangAS::opencl_local || D.hasAttr()) Init = llvm::UndefValue::get(LTy); else Init = EmitNullConstant(Ty); llvm::GlobalVariable *GV = new llvm::GlobalVariable( getModule(), LTy, Ty.isConstant(getContext()), Linkage, Init, Name, nullptr, llvm::GlobalVariable::NotThreadLocal, TargetAS); GV->setAlignment(getContext().getDeclAlign(&D).getQuantity()); if (supportsCOMDAT() && GV->isWeakForLinker()) GV->setComdat(TheModule.getOrInsertComdat(GV->getName())); if (D.getTLSKind()) setTLSMode(GV, D); setGVProperties(GV, &D); // Make sure the result is of the correct type. LangAS ExpectedAS = Ty.getAddressSpace(); llvm::Constant *Addr = GV; if (AS != ExpectedAS) { Addr = getTargetCodeGenInfo().performAddrSpaceCast( *this, GV, AS, ExpectedAS, LTy->getPointerTo(getContext().getTargetAddressSpace(ExpectedAS))); } setStaticLocalDeclAddress(&D, Addr); // Ensure that the static local gets initialized by making sure the parent // function gets emitted eventually. const Decl *DC = cast(D.getDeclContext()); // We can't name blocks or captured statements directly, so try to emit their // parents. if (isa(DC) || isa(DC)) { DC = DC->getNonClosureContext(); // FIXME: Ensure that global blocks get emitted. if (!DC) return Addr; } GlobalDecl GD; if (const auto *CD = dyn_cast(DC)) GD = GlobalDecl(CD, Ctor_Base); else if (const auto *DD = dyn_cast(DC)) GD = GlobalDecl(DD, Dtor_Base); else if (const auto *FD = dyn_cast(DC)) GD = GlobalDecl(FD); else { // Don't do anything for Obj-C method decls or global closures. We should // never defer them. assert(isa(DC) && "unexpected parent code decl"); } if (GD.getDecl()) { // Disable emission of the parent function for the OpenMP device codegen. CGOpenMPRuntime::DisableAutoDeclareTargetRAII NoDeclTarget(*this); (void)GetAddrOfGlobal(GD); } return Addr; } /// hasNontrivialDestruction - Determine whether a type's destruction is /// non-trivial. If so, and the variable uses static initialization, we must /// register its destructor to run on exit. static bool hasNontrivialDestruction(QualType T) { CXXRecordDecl *RD = T->getBaseElementTypeUnsafe()->getAsCXXRecordDecl(); return RD && !RD->hasTrivialDestructor(); } /// AddInitializerToStaticVarDecl - Add the initializer for 'D' to the /// global variable that has already been created for it. If the initializer /// has a different type than GV does, this may free GV and return a different /// one. Otherwise it just returns GV. llvm::GlobalVariable * CodeGenFunction::AddInitializerToStaticVarDecl(const VarDecl &D, llvm::GlobalVariable *GV) { ConstantEmitter emitter(*this); llvm::Constant *Init = emitter.tryEmitForInitializer(D); // If constant emission failed, then this should be a C++ static // initializer. if (!Init) { if (!getLangOpts().CPlusPlus) CGM.ErrorUnsupported(D.getInit(), "constant l-value expression"); else if (HaveInsertPoint()) { // Since we have a static initializer, this global variable can't // be constant. GV->setConstant(false); EmitCXXGuardedInit(D, GV, /*PerformInit*/true); } return GV; } // The initializer may differ in type from the global. Rewrite // the global to match the initializer. (We have to do this // because some types, like unions, can't be completely represented // in the LLVM type system.) if (GV->getType()->getElementType() != Init->getType()) { llvm::GlobalVariable *OldGV = GV; GV = new llvm::GlobalVariable(CGM.getModule(), Init->getType(), OldGV->isConstant(), OldGV->getLinkage(), Init, "", /*InsertBefore*/ OldGV, OldGV->getThreadLocalMode(), CGM.getContext().getTargetAddressSpace(D.getType())); GV->setVisibility(OldGV->getVisibility()); GV->setDSOLocal(OldGV->isDSOLocal()); GV->setComdat(OldGV->getComdat()); // Steal the name of the old global GV->takeName(OldGV); // Replace all uses of the old global with the new global llvm::Constant *NewPtrForOldDecl = llvm::ConstantExpr::getBitCast(GV, OldGV->getType()); OldGV->replaceAllUsesWith(NewPtrForOldDecl); // Erase the old global, since it is no longer used. OldGV->eraseFromParent(); } GV->setConstant(CGM.isTypeConstant(D.getType(), true)); GV->setInitializer(Init); emitter.finalize(GV); if (hasNontrivialDestruction(D.getType()) && HaveInsertPoint()) { // We have a constant initializer, but a nontrivial destructor. We still // need to perform a guarded "initialization" in order to register the // destructor. EmitCXXGuardedInit(D, GV, /*PerformInit*/false); } return GV; } void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D, llvm::GlobalValue::LinkageTypes Linkage) { // Check to see if we already have a global variable for this // declaration. This can happen when double-emitting function // bodies, e.g. with complete and base constructors. llvm::Constant *addr = CGM.getOrCreateStaticVarDecl(D, Linkage); CharUnits alignment = getContext().getDeclAlign(&D); // Store into LocalDeclMap before generating initializer to handle // circular references. setAddrOfLocalVar(&D, Address(addr, alignment)); // We can't have a VLA here, but we can have a pointer to a VLA, // even though that doesn't really make any sense. // Make sure to evaluate VLA bounds now so that we have them for later. if (D.getType()->isVariablyModifiedType()) EmitVariablyModifiedType(D.getType()); // Save the type in case adding the initializer forces a type change. llvm::Type *expectedType = addr->getType(); llvm::GlobalVariable *var = cast(addr->stripPointerCasts()); // CUDA's local and local static __shared__ variables should not // have any non-empty initializers. This is ensured by Sema. // Whatever initializer such variable may have when it gets here is // a no-op and should not be emitted. bool isCudaSharedVar = getLangOpts().CUDA && getLangOpts().CUDAIsDevice && D.hasAttr(); // If this value has an initializer, emit it. if (D.getInit() && !isCudaSharedVar) var = AddInitializerToStaticVarDecl(D, var); var->setAlignment(alignment.getQuantity()); if (D.hasAttr()) CGM.AddGlobalAnnotations(&D, var); if (auto *SA = D.getAttr()) var->addAttribute("bss-section", SA->getName()); if (auto *SA = D.getAttr()) var->addAttribute("data-section", SA->getName()); if (auto *SA = D.getAttr()) var->addAttribute("rodata-section", SA->getName()); if (const SectionAttr *SA = D.getAttr()) var->setSection(SA->getName()); if (D.hasAttr()) CGM.addUsedGlobal(var); // We may have to cast the constant because of the initializer // mismatch above. // // FIXME: It is really dangerous to store this in the map; if anyone // RAUW's the GV uses of this constant will be invalid. llvm::Constant *castedAddr = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(var, expectedType); if (var != castedAddr) LocalDeclMap.find(&D)->second = Address(castedAddr, alignment); CGM.setStaticLocalDeclAddress(&D, castedAddr); CGM.getSanitizerMetadata()->reportGlobalToASan(var, D); // Emit global variable debug descriptor for static vars. CGDebugInfo *DI = getDebugInfo(); if (DI && CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo) { DI->setLocation(D.getLocation()); DI->EmitGlobalVariable(var, &D); } } namespace { struct DestroyObject final : EHScopeStack::Cleanup { DestroyObject(Address addr, QualType type, CodeGenFunction::Destroyer *destroyer, bool useEHCleanupForArray) : addr(addr), type(type), destroyer(destroyer), useEHCleanupForArray(useEHCleanupForArray) {} Address addr; QualType type; CodeGenFunction::Destroyer *destroyer; bool useEHCleanupForArray; void Emit(CodeGenFunction &CGF, Flags flags) override { // Don't use an EH cleanup recursively from an EH cleanup. bool useEHCleanupForArray = flags.isForNormalCleanup() && this->useEHCleanupForArray; CGF.emitDestroy(addr, type, destroyer, useEHCleanupForArray); } }; template struct DestroyNRVOVariable : EHScopeStack::Cleanup { DestroyNRVOVariable(Address addr, llvm::Value *NRVOFlag) : NRVOFlag(NRVOFlag), Loc(addr) {} llvm::Value *NRVOFlag; Address Loc; void Emit(CodeGenFunction &CGF, Flags flags) override { // Along the exceptions path we always execute the dtor. bool NRVO = flags.isForNormalCleanup() && NRVOFlag; llvm::BasicBlock *SkipDtorBB = nullptr; if (NRVO) { // If we exited via NRVO, we skip the destructor call. llvm::BasicBlock *RunDtorBB = CGF.createBasicBlock("nrvo.unused"); SkipDtorBB = CGF.createBasicBlock("nrvo.skipdtor"); llvm::Value *DidNRVO = CGF.Builder.CreateFlagLoad(NRVOFlag, "nrvo.val"); CGF.Builder.CreateCondBr(DidNRVO, SkipDtorBB, RunDtorBB); CGF.EmitBlock(RunDtorBB); } static_cast(this)->emitDestructorCall(CGF); if (NRVO) CGF.EmitBlock(SkipDtorBB); } virtual ~DestroyNRVOVariable() = default; }; struct DestroyNRVOVariableCXX final : DestroyNRVOVariable { DestroyNRVOVariableCXX(Address addr, const CXXDestructorDecl *Dtor, llvm::Value *NRVOFlag) : DestroyNRVOVariable(addr, NRVOFlag), Dtor(Dtor) {} const CXXDestructorDecl *Dtor; void emitDestructorCall(CodeGenFunction &CGF) { CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete, /*ForVirtualBase=*/false, /*Delegating=*/false, Loc); } }; struct DestroyNRVOVariableC final : DestroyNRVOVariable { DestroyNRVOVariableC(Address addr, llvm::Value *NRVOFlag, QualType Ty) : DestroyNRVOVariable(addr, NRVOFlag), Ty(Ty) {} QualType Ty; void emitDestructorCall(CodeGenFunction &CGF) { CGF.destroyNonTrivialCStruct(CGF, Loc, Ty); } }; struct CallStackRestore final : EHScopeStack::Cleanup { Address Stack; CallStackRestore(Address Stack) : Stack(Stack) {} void Emit(CodeGenFunction &CGF, Flags flags) override { llvm::Value *V = CGF.Builder.CreateLoad(Stack); llvm::Value *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::stackrestore); CGF.Builder.CreateCall(F, V); } }; struct ExtendGCLifetime final : EHScopeStack::Cleanup { const VarDecl &Var; ExtendGCLifetime(const VarDecl *var) : Var(*var) {} void Emit(CodeGenFunction &CGF, Flags flags) override { // Compute the address of the local variable, in case it's a // byref or something. DeclRefExpr DRE(CGF.getContext(), const_cast(&Var), false, Var.getType(), VK_LValue, SourceLocation()); llvm::Value *value = CGF.EmitLoadOfScalar(CGF.EmitDeclRefLValue(&DRE), SourceLocation()); CGF.EmitExtendGCLifetime(value); } }; struct CallCleanupFunction final : EHScopeStack::Cleanup { llvm::Constant *CleanupFn; const CGFunctionInfo &FnInfo; const VarDecl &Var; CallCleanupFunction(llvm::Constant *CleanupFn, const CGFunctionInfo *Info, const VarDecl *Var) : CleanupFn(CleanupFn), FnInfo(*Info), Var(*Var) {} void Emit(CodeGenFunction &CGF, Flags flags) override { DeclRefExpr DRE(CGF.getContext(), const_cast(&Var), false, Var.getType(), VK_LValue, SourceLocation()); // Compute the address of the local variable, in case it's a byref // or something. llvm::Value *Addr = CGF.EmitDeclRefLValue(&DRE).getPointer(); // In some cases, the type of the function argument will be different from // the type of the pointer. An example of this is // void f(void* arg); // __attribute__((cleanup(f))) void *g; // // To fix this we insert a bitcast here. QualType ArgTy = FnInfo.arg_begin()->type; llvm::Value *Arg = CGF.Builder.CreateBitCast(Addr, CGF.ConvertType(ArgTy)); CallArgList Args; Args.add(RValue::get(Arg), CGF.getContext().getPointerType(Var.getType())); auto Callee = CGCallee::forDirect(CleanupFn); CGF.EmitCall(FnInfo, Callee, ReturnValueSlot(), Args); } }; } // end anonymous namespace /// EmitAutoVarWithLifetime - Does the setup required for an automatic /// variable with lifetime. static void EmitAutoVarWithLifetime(CodeGenFunction &CGF, const VarDecl &var, Address addr, Qualifiers::ObjCLifetime lifetime) { switch (lifetime) { case Qualifiers::OCL_None: llvm_unreachable("present but none"); case Qualifiers::OCL_ExplicitNone: // nothing to do break; case Qualifiers::OCL_Strong: { CodeGenFunction::Destroyer *destroyer = (var.hasAttr() ? CodeGenFunction::destroyARCStrongPrecise : CodeGenFunction::destroyARCStrongImprecise); CleanupKind cleanupKind = CGF.getARCCleanupKind(); CGF.pushDestroy(cleanupKind, addr, var.getType(), destroyer, cleanupKind & EHCleanup); break; } case Qualifiers::OCL_Autoreleasing: // nothing to do break; case Qualifiers::OCL_Weak: // __weak objects always get EH cleanups; otherwise, exceptions // could cause really nasty crashes instead of mere leaks. CGF.pushDestroy(NormalAndEHCleanup, addr, var.getType(), CodeGenFunction::destroyARCWeak, /*useEHCleanup*/ true); break; } } static bool isAccessedBy(const VarDecl &var, const Stmt *s) { if (const Expr *e = dyn_cast(s)) { // Skip the most common kinds of expressions that make // hierarchy-walking expensive. s = e = e->IgnoreParenCasts(); if (const DeclRefExpr *ref = dyn_cast(e)) return (ref->getDecl() == &var); if (const BlockExpr *be = dyn_cast(e)) { const BlockDecl *block = be->getBlockDecl(); for (const auto &I : block->captures()) { if (I.getVariable() == &var) return true; } } } for (const Stmt *SubStmt : s->children()) // SubStmt might be null; as in missing decl or conditional of an if-stmt. if (SubStmt && isAccessedBy(var, SubStmt)) return true; return false; } static bool isAccessedBy(const ValueDecl *decl, const Expr *e) { if (!decl) return false; if (!isa(decl)) return false; const VarDecl *var = cast(decl); return isAccessedBy(*var, e); } static bool tryEmitARCCopyWeakInit(CodeGenFunction &CGF, const LValue &destLV, const Expr *init) { bool needsCast = false; while (auto castExpr = dyn_cast(init->IgnoreParens())) { switch (castExpr->getCastKind()) { // Look through casts that don't require representation changes. case CK_NoOp: case CK_BitCast: case CK_BlockPointerToObjCPointerCast: needsCast = true; break; // If we find an l-value to r-value cast from a __weak variable, // emit this operation as a copy or move. case CK_LValueToRValue: { const Expr *srcExpr = castExpr->getSubExpr(); if (srcExpr->getType().getObjCLifetime() != Qualifiers::OCL_Weak) return false; // Emit the source l-value. LValue srcLV = CGF.EmitLValue(srcExpr); // Handle a formal type change to avoid asserting. auto srcAddr = srcLV.getAddress(); if (needsCast) { srcAddr = CGF.Builder.CreateElementBitCast(srcAddr, destLV.getAddress().getElementType()); } // If it was an l-value, use objc_copyWeak. if (srcExpr->getValueKind() == VK_LValue) { CGF.EmitARCCopyWeak(destLV.getAddress(), srcAddr); } else { assert(srcExpr->getValueKind() == VK_XValue); CGF.EmitARCMoveWeak(destLV.getAddress(), srcAddr); } return true; } // Stop at anything else. default: return false; } init = castExpr->getSubExpr(); } return false; } static void drillIntoBlockVariable(CodeGenFunction &CGF, LValue &lvalue, const VarDecl *var) { lvalue.setAddress(CGF.emitBlockByrefAddress(lvalue.getAddress(), var)); } void CodeGenFunction::EmitNullabilityCheck(LValue LHS, llvm::Value *RHS, SourceLocation Loc) { if (!SanOpts.has(SanitizerKind::NullabilityAssign)) return; auto Nullability = LHS.getType()->getNullability(getContext()); if (!Nullability || *Nullability != NullabilityKind::NonNull) return; // Check if the right hand side of the assignment is nonnull, if the left // hand side must be nonnull. SanitizerScope SanScope(this); llvm::Value *IsNotNull = Builder.CreateIsNotNull(RHS); llvm::Constant *StaticData[] = { EmitCheckSourceLocation(Loc), EmitCheckTypeDescriptor(LHS.getType()), llvm::ConstantInt::get(Int8Ty, 0), // The LogAlignment info is unused. llvm::ConstantInt::get(Int8Ty, TCK_NonnullAssign)}; EmitCheck({{IsNotNull, SanitizerKind::NullabilityAssign}}, SanitizerHandler::TypeMismatch, StaticData, RHS); } void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit) { Qualifiers::ObjCLifetime lifetime = lvalue.getObjCLifetime(); if (!lifetime) { llvm::Value *value = EmitScalarExpr(init); if (capturedByInit) drillIntoBlockVariable(*this, lvalue, cast(D)); EmitNullabilityCheck(lvalue, value, init->getExprLoc()); EmitStoreThroughLValue(RValue::get(value), lvalue, true); return; } if (const CXXDefaultInitExpr *DIE = dyn_cast(init)) init = DIE->getExpr(); // If we're emitting a value with lifetime, we have to do the // initialization *before* we leave the cleanup scopes. if (const FullExpr *fe = dyn_cast(init)) { enterFullExpression(fe); init = fe->getSubExpr(); } CodeGenFunction::RunCleanupsScope Scope(*this); // We have to maintain the illusion that the variable is // zero-initialized. If the variable might be accessed in its // initializer, zero-initialize before running the initializer, then // actually perform the initialization with an assign. bool accessedByInit = false; if (lifetime != Qualifiers::OCL_ExplicitNone) accessedByInit = (capturedByInit || isAccessedBy(D, init)); if (accessedByInit) { LValue tempLV = lvalue; // Drill down to the __block object if necessary. if (capturedByInit) { // We can use a simple GEP for this because it can't have been // moved yet. tempLV.setAddress(emitBlockByrefAddress(tempLV.getAddress(), cast(D), /*follow*/ false)); } auto ty = cast(tempLV.getAddress().getElementType()); llvm::Value *zero = CGM.getNullPointer(ty, tempLV.getType()); // If __weak, we want to use a barrier under certain conditions. if (lifetime == Qualifiers::OCL_Weak) EmitARCInitWeak(tempLV.getAddress(), zero); // Otherwise just do a simple store. else EmitStoreOfScalar(zero, tempLV, /* isInitialization */ true); } // Emit the initializer. llvm::Value *value = nullptr; switch (lifetime) { case Qualifiers::OCL_None: llvm_unreachable("present but none"); case Qualifiers::OCL_Strong: { if (!D || !isa(D) || !cast(D)->isARCPseudoStrong()) { value = EmitARCRetainScalarExpr(init); break; } // If D is pseudo-strong, treat it like __unsafe_unretained here. This means // that we omit the retain, and causes non-autoreleased return values to be // immediately released. LLVM_FALLTHROUGH; } case Qualifiers::OCL_ExplicitNone: value = EmitARCUnsafeUnretainedScalarExpr(init); break; case Qualifiers::OCL_Weak: { // If it's not accessed by the initializer, try to emit the // initialization with a copy or move. if (!accessedByInit && tryEmitARCCopyWeakInit(*this, lvalue, init)) { return; } // No way to optimize a producing initializer into this. It's not // worth optimizing for, because the value will immediately // disappear in the common case. value = EmitScalarExpr(init); if (capturedByInit) drillIntoBlockVariable(*this, lvalue, cast(D)); if (accessedByInit) EmitARCStoreWeak(lvalue.getAddress(), value, /*ignored*/ true); else EmitARCInitWeak(lvalue.getAddress(), value); return; } case Qualifiers::OCL_Autoreleasing: value = EmitARCRetainAutoreleaseScalarExpr(init); break; } if (capturedByInit) drillIntoBlockVariable(*this, lvalue, cast(D)); EmitNullabilityCheck(lvalue, value, init->getExprLoc()); // If the variable might have been accessed by its initializer, we // might have to initialize with a barrier. We have to do this for // both __weak and __strong, but __weak got filtered out above. if (accessedByInit && lifetime == Qualifiers::OCL_Strong) { llvm::Value *oldValue = EmitLoadOfScalar(lvalue, init->getExprLoc()); EmitStoreOfScalar(value, lvalue, /* isInitialization */ true); EmitARCRelease(oldValue, ARCImpreciseLifetime); return; } EmitStoreOfScalar(value, lvalue, /* isInitialization */ true); } /// Decide whether we can emit the non-zero parts of the specified initializer /// with equal or fewer than NumStores scalar stores. static bool canEmitInitWithFewStoresAfterBZero(llvm::Constant *Init, unsigned &NumStores) { // Zero and Undef never requires any extra stores. if (isa(Init) || isa(Init) || isa(Init)) return true; if (isa(Init) || isa(Init) || isa(Init) || isa(Init) || isa(Init)) return Init->isNullValue() || NumStores--; // See if we can emit each element. if (isa(Init) || isa(Init)) { for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) { llvm::Constant *Elt = cast(Init->getOperand(i)); if (!canEmitInitWithFewStoresAfterBZero(Elt, NumStores)) return false; } return true; } if (llvm::ConstantDataSequential *CDS = dyn_cast(Init)) { for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { llvm::Constant *Elt = CDS->getElementAsConstant(i); if (!canEmitInitWithFewStoresAfterBZero(Elt, NumStores)) return false; } return true; } // Anything else is hard and scary. return false; } /// For inits that canEmitInitWithFewStoresAfterBZero returned true for, emit /// the scalar stores that would be required. static void emitStoresForInitAfterBZero(CodeGenModule &CGM, llvm::Constant *Init, Address Loc, bool isVolatile, CGBuilderTy &Builder) { assert(!Init->isNullValue() && !isa(Init) && "called emitStoresForInitAfterBZero for zero or undef value."); if (isa(Init) || isa(Init) || isa(Init) || isa(Init) || isa(Init)) { Builder.CreateStore(Init, Loc, isVolatile); return; } if (llvm::ConstantDataSequential *CDS = dyn_cast(Init)) { for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { llvm::Constant *Elt = CDS->getElementAsConstant(i); // If necessary, get a pointer to the element and emit it. if (!Elt->isNullValue() && !isa(Elt)) emitStoresForInitAfterBZero( CGM, Elt, Builder.CreateConstInBoundsGEP2_32(Loc, 0, i, CGM.getDataLayout()), isVolatile, Builder); } return; } assert((isa(Init) || isa(Init)) && "Unknown value type!"); for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) { llvm::Constant *Elt = cast(Init->getOperand(i)); // If necessary, get a pointer to the element and emit it. if (!Elt->isNullValue() && !isa(Elt)) emitStoresForInitAfterBZero( CGM, Elt, Builder.CreateConstInBoundsGEP2_32(Loc, 0, i, CGM.getDataLayout()), isVolatile, Builder); } } /// Decide whether we should use bzero plus some stores to initialize a local /// variable instead of using a memcpy from a constant global. It is beneficial /// to use bzero if the global is all zeros, or mostly zeros and large. static bool shouldUseBZeroPlusStoresToInitialize(llvm::Constant *Init, uint64_t GlobalSize) { // If a global is all zeros, always use a bzero. if (isa(Init)) return true; // If a non-zero global is <= 32 bytes, always use a memcpy. If it is large, // do it if it will require 6 or fewer scalar stores. // TODO: Should budget depends on the size? Avoiding a large global warrants // plopping in more stores. unsigned StoreBudget = 6; uint64_t SizeLimit = 32; return GlobalSize > SizeLimit && canEmitInitWithFewStoresAfterBZero(Init, StoreBudget); } /// Decide whether we should use memset to initialize a local variable instead /// of using a memcpy from a constant global. Assumes we've already decided to /// not user bzero. /// FIXME We could be more clever, as we are for bzero above, and generate /// memset followed by stores. It's unclear that's worth the effort. static llvm::Value *shouldUseMemSetToInitialize(llvm::Constant *Init, uint64_t GlobalSize) { uint64_t SizeLimit = 32; if (GlobalSize <= SizeLimit) return nullptr; return llvm::isBytewiseValue(Init); } static llvm::Constant *patternFor(CodeGenModule &CGM, llvm::Type *Ty) { // The following value is a guaranteed unmappable pointer value and has a // repeated byte-pattern which makes it easier to synthesize. We use it for // pointers as well as integers so that aggregates are likely to be // initialized with this repeated value. constexpr uint64_t LargeValue = 0xAAAAAAAAAAAAAAAAull; // For 32-bit platforms it's a bit trickier because, across systems, only the // zero page can reasonably be expected to be unmapped, and even then we need // a very low address. We use a smaller value, and that value sadly doesn't // have a repeated byte-pattern. We don't use it for integers. constexpr uint32_t SmallValue = 0x000000AA; // Floating-point values are initialized as NaNs because they propagate. Using // a repeated byte pattern means that it will be easier to initialize // all-floating-point aggregates and arrays with memset. Further, aggregates // which mix integral and a few floats might also initialize with memset // followed by a handful of stores for the floats. Using fairly unique NaNs // also means they'll be easier to distinguish in a crash. constexpr bool NegativeNaN = true; constexpr uint64_t NaNPayload = 0xFFFFFFFFFFFFFFFFull; if (Ty->isIntOrIntVectorTy()) { unsigned BitWidth = cast( Ty->isVectorTy() ? Ty->getVectorElementType() : Ty) ->getBitWidth(); if (BitWidth <= 64) return llvm::ConstantInt::get(Ty, LargeValue); return llvm::ConstantInt::get( Ty, llvm::APInt::getSplat(BitWidth, llvm::APInt(64, LargeValue))); } if (Ty->isPtrOrPtrVectorTy()) { auto *PtrTy = cast( Ty->isVectorTy() ? Ty->getVectorElementType() : Ty); unsigned PtrWidth = CGM.getContext().getTargetInfo().getPointerWidth( PtrTy->getAddressSpace()); llvm::Type *IntTy = llvm::IntegerType::get(CGM.getLLVMContext(), PtrWidth); uint64_t IntValue; switch (PtrWidth) { default: llvm_unreachable("pattern initialization of unsupported pointer width"); case 64: IntValue = LargeValue; break; case 32: IntValue = SmallValue; break; } auto *Int = llvm::ConstantInt::get(IntTy, IntValue); return llvm::ConstantExpr::getIntToPtr(Int, PtrTy); } if (Ty->isFPOrFPVectorTy()) { unsigned BitWidth = llvm::APFloat::semanticsSizeInBits( (Ty->isVectorTy() ? Ty->getVectorElementType() : Ty) ->getFltSemantics()); llvm::APInt Payload(64, NaNPayload); if (BitWidth >= 64) Payload = llvm::APInt::getSplat(BitWidth, Payload); return llvm::ConstantFP::getQNaN(Ty, NegativeNaN, &Payload); } if (Ty->isArrayTy()) { // Note: this doesn't touch tail padding (at the end of an object, before // the next array object). It is instead handled by replaceUndef. auto *ArrTy = cast(Ty); llvm::SmallVector Element( ArrTy->getNumElements(), patternFor(CGM, ArrTy->getElementType())); return llvm::ConstantArray::get(ArrTy, Element); } // Note: this doesn't touch struct padding. It will initialize as much union // padding as is required for the largest type in the union. Padding is // instead handled by replaceUndef. Stores to structs with volatile members // don't have a volatile qualifier when initialized according to C++. This is // fine because stack-based volatiles don't really have volatile semantics // anyways, and the initialization shouldn't be observable. auto *StructTy = cast(Ty); llvm::SmallVector Struct(StructTy->getNumElements()); for (unsigned El = 0; El != Struct.size(); ++El) Struct[El] = patternFor(CGM, StructTy->getElementType(El)); return llvm::ConstantStruct::get(StructTy, Struct); } static Address createUnnamedGlobalFrom(CodeGenModule &CGM, const VarDecl &D, CGBuilderTy &Builder, llvm::Constant *Constant, CharUnits Align) { auto FunctionName = [&](const DeclContext *DC) -> std::string { if (const auto *FD = dyn_cast(DC)) { if (const auto *CC = dyn_cast(FD)) return CC->getNameAsString(); if (const auto *CD = dyn_cast(FD)) return CD->getNameAsString(); return CGM.getMangledName(FD); } else if (const auto *OM = dyn_cast(DC)) { return OM->getNameAsString(); } else if (isa(DC)) { return ""; } else if (isa(DC)) { return ""; } else { llvm::llvm_unreachable_internal("expected a function or method"); } }; auto *Ty = Constant->getType(); bool isConstant = true; llvm::GlobalVariable *InsertBefore = nullptr; unsigned AS = CGM.getContext().getTargetAddressSpace( CGM.getStringLiteralAddressSpace()); llvm::GlobalVariable *GV = new llvm::GlobalVariable( CGM.getModule(), Ty, isConstant, llvm::GlobalValue::PrivateLinkage, Constant, "__const." + FunctionName(D.getParentFunctionOrMethod()) + "." + D.getName(), InsertBefore, llvm::GlobalValue::NotThreadLocal, AS); GV->setAlignment(Align.getQuantity()); GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); Address SrcPtr = Address(GV, Align); llvm::Type *BP = llvm::PointerType::getInt8PtrTy(CGM.getLLVMContext(), AS); if (SrcPtr.getType() != BP) SrcPtr = Builder.CreateBitCast(SrcPtr, BP); return SrcPtr; } static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D, Address Loc, bool isVolatile, CGBuilderTy &Builder, llvm::Constant *constant) { auto *Ty = constant->getType(); bool isScalar = Ty->isIntOrIntVectorTy() || Ty->isPtrOrPtrVectorTy() || Ty->isFPOrFPVectorTy(); if (isScalar) { Builder.CreateStore(constant, Loc, isVolatile); return; } auto *Int8Ty = llvm::IntegerType::getInt8Ty(CGM.getLLVMContext()); auto *IntPtrTy = CGM.getDataLayout().getIntPtrType(CGM.getLLVMContext()); // If the initializer is all or mostly the same, codegen with bzero / memset // then do a few stores afterward. uint64_t ConstantSize = CGM.getDataLayout().getTypeAllocSize(Ty); auto *SizeVal = llvm::ConstantInt::get(IntPtrTy, ConstantSize); if (shouldUseBZeroPlusStoresToInitialize(constant, ConstantSize)) { Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, 0), SizeVal, isVolatile); bool valueAlreadyCorrect = constant->isNullValue() || isa(constant); if (!valueAlreadyCorrect) { Loc = Builder.CreateBitCast(Loc, Ty->getPointerTo(Loc.getAddressSpace())); emitStoresForInitAfterBZero(CGM, constant, Loc, isVolatile, Builder); } return; } llvm::Value *Pattern = shouldUseMemSetToInitialize(constant, ConstantSize); if (Pattern) { uint64_t Value = 0x00; if (!isa(Pattern)) { const llvm::APInt &AP = cast(Pattern)->getValue(); assert(AP.getBitWidth() <= 8); Value = AP.getLimitedValue(); } Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, Value), SizeVal, isVolatile); return; } Builder.CreateMemCpy( Loc, createUnnamedGlobalFrom(CGM, D, Builder, constant, Loc.getAlignment()), SizeVal, isVolatile); } static void emitStoresForZeroInit(CodeGenModule &CGM, const VarDecl &D, Address Loc, bool isVolatile, CGBuilderTy &Builder) { llvm::Type *ElTy = Loc.getElementType(); llvm::Constant *constant = llvm::Constant::getNullValue(ElTy); emitStoresForConstant(CGM, D, Loc, isVolatile, Builder, constant); } static void emitStoresForPatternInit(CodeGenModule &CGM, const VarDecl &D, Address Loc, bool isVolatile, CGBuilderTy &Builder) { llvm::Type *ElTy = Loc.getElementType(); llvm::Constant *constant = patternFor(CGM, ElTy); assert(!isa(constant)); emitStoresForConstant(CGM, D, Loc, isVolatile, Builder, constant); } static bool containsUndef(llvm::Constant *constant) { auto *Ty = constant->getType(); if (isa(constant)) return true; if (Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy()) for (llvm::Use &Op : constant->operands()) if (containsUndef(cast(Op))) return true; return false; } static llvm::Constant *replaceUndef(llvm::Constant *constant) { // FIXME: when doing pattern initialization, replace undef with 0xAA instead. // FIXME: also replace padding between values by creating a new struct type // which has no padding. auto *Ty = constant->getType(); if (isa(constant)) return llvm::Constant::getNullValue(Ty); if (!(Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy())) return constant; if (!containsUndef(constant)) return constant; llvm::SmallVector Values(constant->getNumOperands()); for (unsigned Op = 0, NumOp = constant->getNumOperands(); Op != NumOp; ++Op) { auto *OpValue = cast(constant->getOperand(Op)); Values[Op] = replaceUndef(OpValue); } if (Ty->isStructTy()) return llvm::ConstantStruct::get(cast(Ty), Values); if (Ty->isArrayTy()) return llvm::ConstantArray::get(cast(Ty), Values); assert(Ty->isVectorTy()); return llvm::ConstantVector::get(Values); } /// EmitAutoVarDecl - Emit code and set up an entry in LocalDeclMap for a /// variable declaration with auto, register, or no storage class specifier. /// These turn into simple stack objects, or GlobalValues depending on target. void CodeGenFunction::EmitAutoVarDecl(const VarDecl &D) { AutoVarEmission emission = EmitAutoVarAlloca(D); EmitAutoVarInit(emission); EmitAutoVarCleanups(emission); } /// Emit a lifetime.begin marker if some criteria are satisfied. /// \return a pointer to the temporary size Value if a marker was emitted, null /// otherwise llvm::Value *CodeGenFunction::EmitLifetimeStart(uint64_t Size, llvm::Value *Addr) { if (!ShouldEmitLifetimeMarkers) return nullptr; assert(Addr->getType()->getPointerAddressSpace() == CGM.getDataLayout().getAllocaAddrSpace() && "Pointer should be in alloca address space"); llvm::Value *SizeV = llvm::ConstantInt::get(Int64Ty, Size); Addr = Builder.CreateBitCast(Addr, AllocaInt8PtrTy); llvm::CallInst *C = Builder.CreateCall(CGM.getLLVMLifetimeStartFn(), {SizeV, Addr}); C->setDoesNotThrow(); return SizeV; } void CodeGenFunction::EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr) { assert(Addr->getType()->getPointerAddressSpace() == CGM.getDataLayout().getAllocaAddrSpace() && "Pointer should be in alloca address space"); Addr = Builder.CreateBitCast(Addr, AllocaInt8PtrTy); llvm::CallInst *C = Builder.CreateCall(CGM.getLLVMLifetimeEndFn(), {Size, Addr}); C->setDoesNotThrow(); } void CodeGenFunction::EmitAndRegisterVariableArrayDimensions( CGDebugInfo *DI, const VarDecl &D, bool EmitDebugInfo) { // For each dimension stores its QualType and corresponding // size-expression Value. SmallVector Dimensions; SmallVector VLAExprNames; // Break down the array into individual dimensions. QualType Type1D = D.getType(); while (getContext().getAsVariableArrayType(Type1D)) { auto VlaSize = getVLAElements1D(Type1D); if (auto *C = dyn_cast(VlaSize.NumElts)) Dimensions.emplace_back(C, Type1D.getUnqualifiedType()); else { // Generate a locally unique name for the size expression. Twine Name = Twine("__vla_expr") + Twine(VLAExprCounter++); SmallString<12> Buffer; StringRef NameRef = Name.toStringRef(Buffer); auto &Ident = getContext().Idents.getOwn(NameRef); VLAExprNames.push_back(&Ident); auto SizeExprAddr = CreateDefaultAlignTempAlloca(VlaSize.NumElts->getType(), NameRef); Builder.CreateStore(VlaSize.NumElts, SizeExprAddr); Dimensions.emplace_back(SizeExprAddr.getPointer(), Type1D.getUnqualifiedType()); } Type1D = VlaSize.Type; } if (!EmitDebugInfo) return; // Register each dimension's size-expression with a DILocalVariable, // so that it can be used by CGDebugInfo when instantiating a DISubrange // to describe this array. unsigned NameIdx = 0; for (auto &VlaSize : Dimensions) { llvm::Metadata *MD; if (auto *C = dyn_cast(VlaSize.NumElts)) MD = llvm::ConstantAsMetadata::get(C); else { // Create an artificial VarDecl to generate debug info for. IdentifierInfo *NameIdent = VLAExprNames[NameIdx++]; auto VlaExprTy = VlaSize.NumElts->getType()->getPointerElementType(); auto QT = getContext().getIntTypeForBitwidth( VlaExprTy->getScalarSizeInBits(), false); auto *ArtificialDecl = VarDecl::Create( getContext(), const_cast(D.getDeclContext()), D.getLocation(), D.getLocation(), NameIdent, QT, getContext().CreateTypeSourceInfo(QT), SC_Auto); ArtificialDecl->setImplicit(); MD = DI->EmitDeclareOfAutoVariable(ArtificialDecl, VlaSize.NumElts, Builder); } assert(MD && "No Size expression debug node created"); DI->registerVLASizeExpression(VlaSize.Type, MD); } } /// EmitAutoVarAlloca - Emit the alloca and debug information for a /// local variable. Does not emit initialization or destruction. CodeGenFunction::AutoVarEmission CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { QualType Ty = D.getType(); assert( Ty.getAddressSpace() == LangAS::Default || (Ty.getAddressSpace() == LangAS::opencl_private && getLangOpts().OpenCL)); AutoVarEmission emission(D); bool isEscapingByRef = D.isEscapingByref(); emission.IsEscapingByRef = isEscapingByRef; CharUnits alignment = getContext().getDeclAlign(&D); // If the type is variably-modified, emit all the VLA sizes for it. if (Ty->isVariablyModifiedType()) EmitVariablyModifiedType(Ty); auto *DI = getDebugInfo(); bool EmitDebugInfo = DI && CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo; Address address = Address::invalid(); Address AllocaAddr = Address::invalid(); if (Ty->isConstantSizeType()) { bool NRVO = getLangOpts().ElideConstructors && D.isNRVOVariable(); // If this value is an array or struct with a statically determinable // constant initializer, there are optimizations we can do. // // TODO: We should constant-evaluate the initializer of any variable, // as long as it is initialized by a constant expression. Currently, // isConstantInitializer produces wrong answers for structs with // reference or bitfield members, and a few other cases, and checking // for POD-ness protects us from some of these. if (D.getInit() && (Ty->isArrayType() || Ty->isRecordType()) && (D.isConstexpr() || ((Ty.isPODType(getContext()) || getContext().getBaseElementType(Ty)->isObjCObjectPointerType()) && D.getInit()->isConstantInitializer(getContext(), false)))) { // If the variable's a const type, and it's neither an NRVO // candidate nor a __block variable and has no mutable members, // emit it as a global instead. // Exception is if a variable is located in non-constant address space // in OpenCL. if ((!getLangOpts().OpenCL || Ty.getAddressSpace() == LangAS::opencl_constant) && (CGM.getCodeGenOpts().MergeAllConstants && !NRVO && !isEscapingByRef && CGM.isTypeConstant(Ty, true))) { EmitStaticVarDecl(D, llvm::GlobalValue::InternalLinkage); // Signal this condition to later callbacks. emission.Addr = Address::invalid(); assert(emission.wasEmittedAsGlobal()); return emission; } // Otherwise, tell the initialization code that we're in this case. emission.IsConstantAggregate = true; } // A normal fixed sized variable becomes an alloca in the entry block, // unless: // - it's an NRVO variable. // - we are compiling OpenMP and it's an OpenMP local variable. Address OpenMPLocalAddr = getLangOpts().OpenMP ? CGM.getOpenMPRuntime().getAddressOfLocalVariable(*this, &D) : Address::invalid(); if (getLangOpts().OpenMP && OpenMPLocalAddr.isValid()) { address = OpenMPLocalAddr; } else if (NRVO) { // The named return value optimization: allocate this variable in the // return slot, so that we can elide the copy when returning this // variable (C++0x [class.copy]p34). address = ReturnValue; if (const RecordType *RecordTy = Ty->getAs()) { const auto *RD = RecordTy->getDecl(); const auto *CXXRD = dyn_cast(RD); if ((CXXRD && !CXXRD->hasTrivialDestructor()) || RD->isNonTrivialToPrimitiveDestroy()) { // Create a flag that is used to indicate when the NRVO was applied // to this variable. Set it to zero to indicate that NRVO was not // applied. llvm::Value *Zero = Builder.getFalse(); Address NRVOFlag = CreateTempAlloca(Zero->getType(), CharUnits::One(), "nrvo"); EnsureInsertPoint(); Builder.CreateStore(Zero, NRVOFlag); // Record the NRVO flag for this variable. NRVOFlags[&D] = NRVOFlag.getPointer(); emission.NRVOFlag = NRVOFlag.getPointer(); } } } else { CharUnits allocaAlignment; llvm::Type *allocaTy; if (isEscapingByRef) { auto &byrefInfo = getBlockByrefInfo(&D); allocaTy = byrefInfo.Type; allocaAlignment = byrefInfo.ByrefAlignment; } else { allocaTy = ConvertTypeForMem(Ty); allocaAlignment = alignment; } // Create the alloca. Note that we set the name separately from // building the instruction so that it's there even in no-asserts // builds. address = CreateTempAlloca(allocaTy, allocaAlignment, D.getName(), /*ArraySize=*/nullptr, &AllocaAddr); // Don't emit lifetime markers for MSVC catch parameters. The lifetime of // the catch parameter starts in the catchpad instruction, and we can't // insert code in those basic blocks. bool IsMSCatchParam = D.isExceptionVariable() && getTarget().getCXXABI().isMicrosoft(); // Emit a lifetime intrinsic if meaningful. There's no point in doing this // if we don't have a valid insertion point (?). if (HaveInsertPoint() && !IsMSCatchParam) { // If there's a jump into the lifetime of this variable, its lifetime // gets broken up into several regions in IR, which requires more work // to handle correctly. For now, just omit the intrinsics; this is a // rare case, and it's better to just be conservatively correct. // PR28267. // // We have to do this in all language modes if there's a jump past the // declaration. We also have to do it in C if there's a jump to an // earlier point in the current block because non-VLA lifetimes begin as // soon as the containing block is entered, not when its variables // actually come into scope; suppressing the lifetime annotations // completely in this case is unnecessarily pessimistic, but again, this // is rare. if (!Bypasses.IsBypassed(&D) && !(!getLangOpts().CPlusPlus && hasLabelBeenSeenInCurrentScope())) { uint64_t size = CGM.getDataLayout().getTypeAllocSize(allocaTy); emission.SizeForLifetimeMarkers = EmitLifetimeStart(size, AllocaAddr.getPointer()); } } else { assert(!emission.useLifetimeMarkers()); } } } else { EnsureInsertPoint(); if (!DidCallStackSave) { // Save the stack. Address Stack = CreateTempAlloca(Int8PtrTy, getPointerAlign(), "saved_stack"); llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::stacksave); llvm::Value *V = Builder.CreateCall(F); Builder.CreateStore(V, Stack); DidCallStackSave = true; // Push a cleanup block and restore the stack there. // FIXME: in general circumstances, this should be an EH cleanup. pushStackRestore(NormalCleanup, Stack); } auto VlaSize = getVLASize(Ty); llvm::Type *llvmTy = ConvertTypeForMem(VlaSize.Type); // Allocate memory for the array. address = CreateTempAlloca(llvmTy, alignment, "vla", VlaSize.NumElts, &AllocaAddr); // If we have debug info enabled, properly describe the VLA dimensions for // this type by registering the vla size expression for each of the // dimensions. EmitAndRegisterVariableArrayDimensions(DI, D, EmitDebugInfo); } setAddrOfLocalVar(&D, address); emission.Addr = address; emission.AllocaAddr = AllocaAddr; // Emit debug info for local var declaration. if (EmitDebugInfo && HaveInsertPoint()) { DI->setLocation(D.getLocation()); (void)DI->EmitDeclareOfAutoVariable(&D, address.getPointer(), Builder); } if (D.hasAttr()) EmitVarAnnotations(&D, address.getPointer()); // Make sure we call @llvm.lifetime.end. if (emission.useLifetimeMarkers()) EHStack.pushCleanup(NormalEHLifetimeMarker, emission.getOriginalAllocatedAddress(), emission.getSizeForLifetimeMarkers()); return emission; } static bool isCapturedBy(const VarDecl &, const Expr *); /// Determines whether the given __block variable is potentially /// captured by the given statement. static bool isCapturedBy(const VarDecl &Var, const Stmt *S) { if (const Expr *E = dyn_cast(S)) return isCapturedBy(Var, E); for (const Stmt *SubStmt : S->children()) if (isCapturedBy(Var, SubStmt)) return true; return false; } /// Determines whether the given __block variable is potentially /// captured by the given expression. static bool isCapturedBy(const VarDecl &Var, const Expr *E) { // Skip the most common kinds of expressions that make // hierarchy-walking expensive. E = E->IgnoreParenCasts(); if (const BlockExpr *BE = dyn_cast(E)) { const BlockDecl *Block = BE->getBlockDecl(); for (const auto &I : Block->captures()) { if (I.getVariable() == &Var) return true; } // No need to walk into the subexpressions. return false; } if (const StmtExpr *SE = dyn_cast(E)) { const CompoundStmt *CS = SE->getSubStmt(); for (const auto *BI : CS->body()) if (const auto *BIE = dyn_cast(BI)) { if (isCapturedBy(Var, BIE)) return true; } else if (const auto *DS = dyn_cast(BI)) { // special case declarations for (const auto *I : DS->decls()) { if (const auto *VD = dyn_cast((I))) { const Expr *Init = VD->getInit(); if (Init && isCapturedBy(Var, Init)) return true; } } } else // FIXME. Make safe assumption assuming arbitrary statements cause capturing. // Later, provide code to poke into statements for capture analysis. return true; return false; } for (const Stmt *SubStmt : E->children()) if (isCapturedBy(Var, SubStmt)) return true; return false; } /// Determine whether the given initializer is trivial in the sense /// that it requires no code to be generated. bool CodeGenFunction::isTrivialInitializer(const Expr *Init) { if (!Init) return true; if (const CXXConstructExpr *Construct = dyn_cast(Init)) if (CXXConstructorDecl *Constructor = Construct->getConstructor()) if (Constructor->isTrivial() && Constructor->isDefaultConstructor() && !Construct->requiresZeroInitialization()) return true; return false; } void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { assert(emission.Variable && "emission was not valid!"); // If this was emitted as a global constant, we're done. if (emission.wasEmittedAsGlobal()) return; const VarDecl &D = *emission.Variable; auto DL = ApplyDebugLocation::CreateDefaultArtificial(*this, D.getLocation()); QualType type = D.getType(); bool isVolatile = type.isVolatileQualified(); // If this local has an initializer, emit it now. const Expr *Init = D.getInit(); // If we are at an unreachable point, we don't need to emit the initializer // unless it contains a label. if (!HaveInsertPoint()) { if (!Init || !ContainsLabel(Init)) return; EnsureInsertPoint(); } // Initialize the structure of a __block variable. if (emission.IsEscapingByRef) emitByrefStructureInit(emission); // Initialize the variable here if it doesn't have a initializer and it is a // C struct that is non-trivial to initialize or an array containing such a // struct. if (!Init && type.isNonTrivialToPrimitiveDefaultInitialize() == QualType::PDIK_Struct) { LValue Dst = MakeAddrLValue(emission.getAllocatedAddress(), type); if (emission.IsEscapingByRef) drillIntoBlockVariable(*this, Dst, &D); defaultInitNonTrivialCStructVar(Dst); return; } // Check whether this is a byref variable that's potentially // captured and moved by its own initializer. If so, we'll need to // emit the initializer first, then copy into the variable. bool capturedByInit = Init && emission.IsEscapingByRef && isCapturedBy(D, Init); - Address Loc = - capturedByInit ? emission.Addr : emission.getObjectAddress(*this); + bool locIsByrefHeader = !capturedByInit; + const Address Loc = + locIsByrefHeader ? emission.getObjectAddress(*this) : emission.Addr; // Note: constexpr already initializes everything correctly. LangOptions::TrivialAutoVarInitKind trivialAutoVarInit = (D.isConstexpr() ? LangOptions::TrivialAutoVarInitKind::Uninitialized : (D.getAttr() ? LangOptions::TrivialAutoVarInitKind::Uninitialized : getContext().getLangOpts().getTrivialAutoVarInit())); auto initializeWhatIsTechnicallyUninitialized = [&](Address Loc) { if (trivialAutoVarInit == LangOptions::TrivialAutoVarInitKind::Uninitialized) return; // Only initialize a __block's storage: we always initialize the header. - if (emission.IsEscapingByRef) + if (emission.IsEscapingByRef && !locIsByrefHeader) Loc = emitBlockByrefAddress(Loc, &D, /*follow=*/false); CharUnits Size = getContext().getTypeSizeInChars(type); if (!Size.isZero()) { switch (trivialAutoVarInit) { case LangOptions::TrivialAutoVarInitKind::Uninitialized: llvm_unreachable("Uninitialized handled above"); case LangOptions::TrivialAutoVarInitKind::Zero: emitStoresForZeroInit(CGM, D, Loc, isVolatile, Builder); break; case LangOptions::TrivialAutoVarInitKind::Pattern: emitStoresForPatternInit(CGM, D, Loc, isVolatile, Builder); break; } return; } // VLAs look zero-sized to getTypeInfo. We can't emit constant stores to // them, so emit a memcpy with the VLA size to initialize each element. // Technically zero-sized or negative-sized VLAs are undefined, and UBSan // will catch that code, but there exists code which generates zero-sized // VLAs. Be nice and initialize whatever they requested. const VariableArrayType *VlaType = dyn_cast_or_null(getContext().getAsArrayType(type)); if (!VlaType) return; auto VlaSize = getVLASize(VlaType); auto SizeVal = VlaSize.NumElts; CharUnits EltSize = getContext().getTypeSizeInChars(VlaSize.Type); switch (trivialAutoVarInit) { case LangOptions::TrivialAutoVarInitKind::Uninitialized: llvm_unreachable("Uninitialized handled above"); case LangOptions::TrivialAutoVarInitKind::Zero: if (!EltSize.isOne()) SizeVal = Builder.CreateNUWMul(SizeVal, CGM.getSize(EltSize)); Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, 0), SizeVal, isVolatile); break; case LangOptions::TrivialAutoVarInitKind::Pattern: { llvm::Type *ElTy = Loc.getElementType(); llvm::Constant *Constant = patternFor(CGM, ElTy); CharUnits ConstantAlign = getContext().getTypeAlignInChars(VlaSize.Type); llvm::BasicBlock *SetupBB = createBasicBlock("vla-setup.loop"); llvm::BasicBlock *LoopBB = createBasicBlock("vla-init.loop"); llvm::BasicBlock *ContBB = createBasicBlock("vla-init.cont"); llvm::Value *IsZeroSizedVLA = Builder.CreateICmpEQ( SizeVal, llvm::ConstantInt::get(SizeVal->getType(), 0), "vla.iszerosized"); Builder.CreateCondBr(IsZeroSizedVLA, ContBB, SetupBB); EmitBlock(SetupBB); if (!EltSize.isOne()) SizeVal = Builder.CreateNUWMul(SizeVal, CGM.getSize(EltSize)); llvm::Value *BaseSizeInChars = llvm::ConstantInt::get(IntPtrTy, EltSize.getQuantity()); Address Begin = Builder.CreateElementBitCast(Loc, Int8Ty, "vla.begin"); llvm::Value *End = Builder.CreateInBoundsGEP(Begin.getPointer(), SizeVal, "vla.end"); llvm::BasicBlock *OriginBB = Builder.GetInsertBlock(); EmitBlock(LoopBB); llvm::PHINode *Cur = Builder.CreatePHI(Begin.getType(), 2, "vla.cur"); Cur->addIncoming(Begin.getPointer(), OriginBB); CharUnits CurAlign = Loc.getAlignment().alignmentOfArrayElement(EltSize); Builder.CreateMemCpy( Address(Cur, CurAlign), createUnnamedGlobalFrom(CGM, D, Builder, Constant, ConstantAlign), BaseSizeInChars, isVolatile); llvm::Value *Next = Builder.CreateInBoundsGEP(Int8Ty, Cur, BaseSizeInChars, "vla.next"); llvm::Value *Done = Builder.CreateICmpEQ(Next, End, "vla-init.isdone"); Builder.CreateCondBr(Done, ContBB, LoopBB); Cur->addIncoming(Next, LoopBB); EmitBlock(ContBB); } break; } }; if (isTrivialInitializer(Init)) { initializeWhatIsTechnicallyUninitialized(Loc); return; } llvm::Constant *constant = nullptr; if (emission.IsConstantAggregate || D.isConstexpr()) { assert(!capturedByInit && "constant init contains a capturing block?"); constant = ConstantEmitter(*this).tryEmitAbstractForInitializer(D); if (constant && trivialAutoVarInit != LangOptions::TrivialAutoVarInitKind::Uninitialized) constant = replaceUndef(constant); } if (!constant) { initializeWhatIsTechnicallyUninitialized(Loc); LValue lv = MakeAddrLValue(Loc, type); lv.setNonGC(true); return EmitExprAsInit(Init, &D, lv, capturedByInit); } if (!emission.IsConstantAggregate) { // For simple scalar/complex initialization, store the value directly. LValue lv = MakeAddrLValue(Loc, type); lv.setNonGC(true); return EmitStoreThroughLValue(RValue::get(constant), lv, true); } llvm::Type *BP = CGM.Int8Ty->getPointerTo(Loc.getAddressSpace()); - if (Loc.getType() != BP) - Loc = Builder.CreateBitCast(Loc, BP); - - emitStoresForConstant(CGM, D, Loc, isVolatile, Builder, constant); + emitStoresForConstant( + CGM, D, (Loc.getType() == BP) ? Loc : Builder.CreateBitCast(Loc, BP), + isVolatile, Builder, constant); } /// Emit an expression as an initializer for an object (variable, field, etc.) /// at the given location. The expression is not necessarily the normal /// initializer for the object, and the address is not necessarily /// its normal location. /// /// \param init the initializing expression /// \param D the object to act as if we're initializing /// \param loc the address to initialize; its type is a pointer /// to the LLVM mapping of the object's type /// \param alignment the alignment of the address /// \param capturedByInit true if \p D is a __block variable /// whose address is potentially changed by the initializer void CodeGenFunction::EmitExprAsInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit) { QualType type = D->getType(); if (type->isReferenceType()) { RValue rvalue = EmitReferenceBindingToExpr(init); if (capturedByInit) drillIntoBlockVariable(*this, lvalue, cast(D)); EmitStoreThroughLValue(rvalue, lvalue, true); return; } switch (getEvaluationKind(type)) { case TEK_Scalar: EmitScalarInit(init, D, lvalue, capturedByInit); return; case TEK_Complex: { ComplexPairTy complex = EmitComplexExpr(init); if (capturedByInit) drillIntoBlockVariable(*this, lvalue, cast(D)); EmitStoreOfComplex(complex, lvalue, /*init*/ true); return; } case TEK_Aggregate: if (type->isAtomicType()) { EmitAtomicInit(const_cast(init), lvalue); } else { AggValueSlot::Overlap_t Overlap = AggValueSlot::MayOverlap; if (isa(D)) Overlap = AggValueSlot::DoesNotOverlap; else if (auto *FD = dyn_cast(D)) Overlap = overlapForFieldInit(FD); // TODO: how can we delay here if D is captured by its initializer? EmitAggExpr(init, AggValueSlot::forLValue(lvalue, AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased, Overlap)); } return; } llvm_unreachable("bad evaluation kind"); } /// Enter a destroy cleanup for the given local variable. void CodeGenFunction::emitAutoVarTypeCleanup( const CodeGenFunction::AutoVarEmission &emission, QualType::DestructionKind dtorKind) { assert(dtorKind != QualType::DK_none); // Note that for __block variables, we want to destroy the // original stack object, not the possibly forwarded object. Address addr = emission.getObjectAddress(*this); const VarDecl *var = emission.Variable; QualType type = var->getType(); CleanupKind cleanupKind = NormalAndEHCleanup; CodeGenFunction::Destroyer *destroyer = nullptr; switch (dtorKind) { case QualType::DK_none: llvm_unreachable("no cleanup for trivially-destructible variable"); case QualType::DK_cxx_destructor: // If there's an NRVO flag on the emission, we need a different // cleanup. if (emission.NRVOFlag) { assert(!type->isArrayType()); CXXDestructorDecl *dtor = type->getAsCXXRecordDecl()->getDestructor(); EHStack.pushCleanup(cleanupKind, addr, dtor, emission.NRVOFlag); return; } break; case QualType::DK_objc_strong_lifetime: // Suppress cleanups for pseudo-strong variables. if (var->isARCPseudoStrong()) return; // Otherwise, consider whether to use an EH cleanup or not. cleanupKind = getARCCleanupKind(); // Use the imprecise destroyer by default. if (!var->hasAttr()) destroyer = CodeGenFunction::destroyARCStrongImprecise; break; case QualType::DK_objc_weak_lifetime: break; case QualType::DK_nontrivial_c_struct: destroyer = CodeGenFunction::destroyNonTrivialCStruct; if (emission.NRVOFlag) { assert(!type->isArrayType()); EHStack.pushCleanup(cleanupKind, addr, emission.NRVOFlag, type); return; } break; } // If we haven't chosen a more specific destroyer, use the default. if (!destroyer) destroyer = getDestroyer(dtorKind); // Use an EH cleanup in array destructors iff the destructor itself // is being pushed as an EH cleanup. bool useEHCleanup = (cleanupKind & EHCleanup); EHStack.pushCleanup(cleanupKind, addr, type, destroyer, useEHCleanup); } void CodeGenFunction::EmitAutoVarCleanups(const AutoVarEmission &emission) { assert(emission.Variable && "emission was not valid!"); // If this was emitted as a global constant, we're done. if (emission.wasEmittedAsGlobal()) return; // If we don't have an insertion point, we're done. Sema prevents // us from jumping into any of these scopes anyway. if (!HaveInsertPoint()) return; const VarDecl &D = *emission.Variable; // Check the type for a cleanup. if (QualType::DestructionKind dtorKind = D.getType().isDestructedType()) emitAutoVarTypeCleanup(emission, dtorKind); // In GC mode, honor objc_precise_lifetime. if (getLangOpts().getGC() != LangOptions::NonGC && D.hasAttr()) { EHStack.pushCleanup(NormalCleanup, &D); } // Handle the cleanup attribute. if (const CleanupAttr *CA = D.getAttr()) { const FunctionDecl *FD = CA->getFunctionDecl(); llvm::Constant *F = CGM.GetAddrOfFunction(FD); assert(F && "Could not find function!"); const CGFunctionInfo &Info = CGM.getTypes().arrangeFunctionDeclaration(FD); EHStack.pushCleanup(NormalAndEHCleanup, F, &Info, &D); } // If this is a block variable, call _Block_object_destroy // (on the unforwarded address). Don't enter this cleanup if we're in pure-GC // mode. if (emission.IsEscapingByRef && CGM.getLangOpts().getGC() != LangOptions::GCOnly) { BlockFieldFlags Flags = BLOCK_FIELD_IS_BYREF; if (emission.Variable->getType().isObjCGCWeak()) Flags |= BLOCK_FIELD_IS_WEAK; enterByrefCleanup(NormalAndEHCleanup, emission.Addr, Flags, /*LoadBlockVarAddr*/ false, cxxDestructorCanThrow(emission.Variable->getType())); } } CodeGenFunction::Destroyer * CodeGenFunction::getDestroyer(QualType::DestructionKind kind) { switch (kind) { case QualType::DK_none: llvm_unreachable("no destroyer for trivial dtor"); case QualType::DK_cxx_destructor: return destroyCXXObject; case QualType::DK_objc_strong_lifetime: return destroyARCStrongPrecise; case QualType::DK_objc_weak_lifetime: return destroyARCWeak; case QualType::DK_nontrivial_c_struct: return destroyNonTrivialCStruct; } llvm_unreachable("Unknown DestructionKind"); } /// pushEHDestroy - Push the standard destructor for the given type as /// an EH-only cleanup. void CodeGenFunction::pushEHDestroy(QualType::DestructionKind dtorKind, Address addr, QualType type) { assert(dtorKind && "cannot push destructor for trivial type"); assert(needsEHCleanup(dtorKind)); pushDestroy(EHCleanup, addr, type, getDestroyer(dtorKind), true); } /// pushDestroy - Push the standard destructor for the given type as /// at least a normal cleanup. void CodeGenFunction::pushDestroy(QualType::DestructionKind dtorKind, Address addr, QualType type) { assert(dtorKind && "cannot push destructor for trivial type"); CleanupKind cleanupKind = getCleanupKind(dtorKind); pushDestroy(cleanupKind, addr, type, getDestroyer(dtorKind), cleanupKind & EHCleanup); } void CodeGenFunction::pushDestroy(CleanupKind cleanupKind, Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray) { pushFullExprCleanup(cleanupKind, addr, type, destroyer, useEHCleanupForArray); } void CodeGenFunction::pushStackRestore(CleanupKind Kind, Address SPMem) { EHStack.pushCleanup(Kind, SPMem); } void CodeGenFunction::pushLifetimeExtendedDestroy( CleanupKind cleanupKind, Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray) { // Push an EH-only cleanup for the object now. // FIXME: When popping normal cleanups, we need to keep this EH cleanup // around in case a temporary's destructor throws an exception. if (cleanupKind & EHCleanup) EHStack.pushCleanup( static_cast(cleanupKind & ~NormalCleanup), addr, type, destroyer, useEHCleanupForArray); // Remember that we need to push a full cleanup for the object at the // end of the full-expression. pushCleanupAfterFullExpr( cleanupKind, addr, type, destroyer, useEHCleanupForArray); } /// emitDestroy - Immediately perform the destruction of the given /// object. /// /// \param addr - the address of the object; a type* /// \param type - the type of the object; if an array type, all /// objects are destroyed in reverse order /// \param destroyer - the function to call to destroy individual /// elements /// \param useEHCleanupForArray - whether an EH cleanup should be /// used when destroying array elements, in case one of the /// destructions throws an exception void CodeGenFunction::emitDestroy(Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray) { const ArrayType *arrayType = getContext().getAsArrayType(type); if (!arrayType) return destroyer(*this, addr, type); llvm::Value *length = emitArrayLength(arrayType, type, addr); CharUnits elementAlign = addr.getAlignment() .alignmentOfArrayElement(getContext().getTypeSizeInChars(type)); // Normally we have to check whether the array is zero-length. bool checkZeroLength = true; // But if the array length is constant, we can suppress that. if (llvm::ConstantInt *constLength = dyn_cast(length)) { // ...and if it's constant zero, we can just skip the entire thing. if (constLength->isZero()) return; checkZeroLength = false; } llvm::Value *begin = addr.getPointer(); llvm::Value *end = Builder.CreateInBoundsGEP(begin, length); emitArrayDestroy(begin, end, type, elementAlign, destroyer, checkZeroLength, useEHCleanupForArray); } /// emitArrayDestroy - Destroys all the elements of the given array, /// beginning from last to first. The array cannot be zero-length. /// /// \param begin - a type* denoting the first element of the array /// \param end - a type* denoting one past the end of the array /// \param elementType - the element type of the array /// \param destroyer - the function to call to destroy elements /// \param useEHCleanup - whether to push an EH cleanup to destroy /// the remaining elements in case the destruction of a single /// element throws void CodeGenFunction::emitArrayDestroy(llvm::Value *begin, llvm::Value *end, QualType elementType, CharUnits elementAlign, Destroyer *destroyer, bool checkZeroLength, bool useEHCleanup) { assert(!elementType->isArrayType()); // The basic structure here is a do-while loop, because we don't // need to check for the zero-element case. llvm::BasicBlock *bodyBB = createBasicBlock("arraydestroy.body"); llvm::BasicBlock *doneBB = createBasicBlock("arraydestroy.done"); if (checkZeroLength) { llvm::Value *isEmpty = Builder.CreateICmpEQ(begin, end, "arraydestroy.isempty"); Builder.CreateCondBr(isEmpty, doneBB, bodyBB); } // Enter the loop body, making that address the current address. llvm::BasicBlock *entryBB = Builder.GetInsertBlock(); EmitBlock(bodyBB); llvm::PHINode *elementPast = Builder.CreatePHI(begin->getType(), 2, "arraydestroy.elementPast"); elementPast->addIncoming(end, entryBB); // Shift the address back by one element. llvm::Value *negativeOne = llvm::ConstantInt::get(SizeTy, -1, true); llvm::Value *element = Builder.CreateInBoundsGEP(elementPast, negativeOne, "arraydestroy.element"); if (useEHCleanup) pushRegularPartialArrayCleanup(begin, element, elementType, elementAlign, destroyer); // Perform the actual destruction there. destroyer(*this, Address(element, elementAlign), elementType); if (useEHCleanup) PopCleanupBlock(); // Check whether we've reached the end. llvm::Value *done = Builder.CreateICmpEQ(element, begin, "arraydestroy.done"); Builder.CreateCondBr(done, doneBB, bodyBB); elementPast->addIncoming(element, Builder.GetInsertBlock()); // Done. EmitBlock(doneBB); } /// Perform partial array destruction as if in an EH cleanup. Unlike /// emitArrayDestroy, the element type here may still be an array type. static void emitPartialArrayDestroy(CodeGenFunction &CGF, llvm::Value *begin, llvm::Value *end, QualType type, CharUnits elementAlign, CodeGenFunction::Destroyer *destroyer) { // If the element type is itself an array, drill down. unsigned arrayDepth = 0; while (const ArrayType *arrayType = CGF.getContext().getAsArrayType(type)) { // VLAs don't require a GEP index to walk into. if (!isa(arrayType)) arrayDepth++; type = arrayType->getElementType(); } if (arrayDepth) { llvm::Value *zero = llvm::ConstantInt::get(CGF.SizeTy, 0); SmallVector gepIndices(arrayDepth+1, zero); begin = CGF.Builder.CreateInBoundsGEP(begin, gepIndices, "pad.arraybegin"); end = CGF.Builder.CreateInBoundsGEP(end, gepIndices, "pad.arrayend"); } // Destroy the array. We don't ever need an EH cleanup because we // assume that we're in an EH cleanup ourselves, so a throwing // destructor causes an immediate terminate. CGF.emitArrayDestroy(begin, end, type, elementAlign, destroyer, /*checkZeroLength*/ true, /*useEHCleanup*/ false); } namespace { /// RegularPartialArrayDestroy - a cleanup which performs a partial /// array destroy where the end pointer is regularly determined and /// does not need to be loaded from a local. class RegularPartialArrayDestroy final : public EHScopeStack::Cleanup { llvm::Value *ArrayBegin; llvm::Value *ArrayEnd; QualType ElementType; CodeGenFunction::Destroyer *Destroyer; CharUnits ElementAlign; public: RegularPartialArrayDestroy(llvm::Value *arrayBegin, llvm::Value *arrayEnd, QualType elementType, CharUnits elementAlign, CodeGenFunction::Destroyer *destroyer) : ArrayBegin(arrayBegin), ArrayEnd(arrayEnd), ElementType(elementType), Destroyer(destroyer), ElementAlign(elementAlign) {} void Emit(CodeGenFunction &CGF, Flags flags) override { emitPartialArrayDestroy(CGF, ArrayBegin, ArrayEnd, ElementType, ElementAlign, Destroyer); } }; /// IrregularPartialArrayDestroy - a cleanup which performs a /// partial array destroy where the end pointer is irregularly /// determined and must be loaded from a local. class IrregularPartialArrayDestroy final : public EHScopeStack::Cleanup { llvm::Value *ArrayBegin; Address ArrayEndPointer; QualType ElementType; CodeGenFunction::Destroyer *Destroyer; CharUnits ElementAlign; public: IrregularPartialArrayDestroy(llvm::Value *arrayBegin, Address arrayEndPointer, QualType elementType, CharUnits elementAlign, CodeGenFunction::Destroyer *destroyer) : ArrayBegin(arrayBegin), ArrayEndPointer(arrayEndPointer), ElementType(elementType), Destroyer(destroyer), ElementAlign(elementAlign) {} void Emit(CodeGenFunction &CGF, Flags flags) override { llvm::Value *arrayEnd = CGF.Builder.CreateLoad(ArrayEndPointer); emitPartialArrayDestroy(CGF, ArrayBegin, arrayEnd, ElementType, ElementAlign, Destroyer); } }; } // end anonymous namespace /// pushIrregularPartialArrayCleanup - Push an EH cleanup to destroy /// already-constructed elements of the given array. The cleanup /// may be popped with DeactivateCleanupBlock or PopCleanupBlock. /// /// \param elementType - the immediate element type of the array; /// possibly still an array type void CodeGenFunction::pushIrregularPartialArrayCleanup(llvm::Value *arrayBegin, Address arrayEndPointer, QualType elementType, CharUnits elementAlign, Destroyer *destroyer) { pushFullExprCleanup(EHCleanup, arrayBegin, arrayEndPointer, elementType, elementAlign, destroyer); } /// pushRegularPartialArrayCleanup - Push an EH cleanup to destroy /// already-constructed elements of the given array. The cleanup /// may be popped with DeactivateCleanupBlock or PopCleanupBlock. /// /// \param elementType - the immediate element type of the array; /// possibly still an array type void CodeGenFunction::pushRegularPartialArrayCleanup(llvm::Value *arrayBegin, llvm::Value *arrayEnd, QualType elementType, CharUnits elementAlign, Destroyer *destroyer) { pushFullExprCleanup(EHCleanup, arrayBegin, arrayEnd, elementType, elementAlign, destroyer); } /// Lazily declare the @llvm.lifetime.start intrinsic. llvm::Constant *CodeGenModule::getLLVMLifetimeStartFn() { if (LifetimeStartFn) return LifetimeStartFn; LifetimeStartFn = llvm::Intrinsic::getDeclaration(&getModule(), llvm::Intrinsic::lifetime_start, AllocaInt8PtrTy); return LifetimeStartFn; } /// Lazily declare the @llvm.lifetime.end intrinsic. llvm::Constant *CodeGenModule::getLLVMLifetimeEndFn() { if (LifetimeEndFn) return LifetimeEndFn; LifetimeEndFn = llvm::Intrinsic::getDeclaration(&getModule(), llvm::Intrinsic::lifetime_end, AllocaInt8PtrTy); return LifetimeEndFn; } namespace { /// A cleanup to perform a release of an object at the end of a /// function. This is used to balance out the incoming +1 of a /// ns_consumed argument when we can't reasonably do that just by /// not doing the initial retain for a __block argument. struct ConsumeARCParameter final : EHScopeStack::Cleanup { ConsumeARCParameter(llvm::Value *param, ARCPreciseLifetime_t precise) : Param(param), Precise(precise) {} llvm::Value *Param; ARCPreciseLifetime_t Precise; void Emit(CodeGenFunction &CGF, Flags flags) override { CGF.EmitARCRelease(Param, Precise); } }; } // end anonymous namespace /// Emit an alloca (or GlobalValue depending on target) /// for the specified parameter and set up LocalDeclMap. void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg, unsigned ArgNo) { // FIXME: Why isn't ImplicitParamDecl a ParmVarDecl? assert((isa(D) || isa(D)) && "Invalid argument to EmitParmDecl"); Arg.getAnyValue()->setName(D.getName()); QualType Ty = D.getType(); // Use better IR generation for certain implicit parameters. if (auto IPD = dyn_cast(&D)) { // The only implicit argument a block has is its literal. // This may be passed as an inalloca'ed value on Windows x86. if (BlockInfo) { llvm::Value *V = Arg.isIndirect() ? Builder.CreateLoad(Arg.getIndirectAddress()) : Arg.getDirectValue(); setBlockContextParameter(IPD, ArgNo, V); return; } } Address DeclPtr = Address::invalid(); bool DoStore = false; bool IsScalar = hasScalarEvaluationKind(Ty); // If we already have a pointer to the argument, reuse the input pointer. if (Arg.isIndirect()) { DeclPtr = Arg.getIndirectAddress(); // If we have a prettier pointer type at this point, bitcast to that. unsigned AS = DeclPtr.getType()->getAddressSpace(); llvm::Type *IRTy = ConvertTypeForMem(Ty)->getPointerTo(AS); if (DeclPtr.getType() != IRTy) DeclPtr = Builder.CreateBitCast(DeclPtr, IRTy, D.getName()); // Indirect argument is in alloca address space, which may be different // from the default address space. auto AllocaAS = CGM.getASTAllocaAddressSpace(); auto *V = DeclPtr.getPointer(); auto SrcLangAS = getLangOpts().OpenCL ? LangAS::opencl_private : AllocaAS; auto DestLangAS = getLangOpts().OpenCL ? LangAS::opencl_private : LangAS::Default; if (SrcLangAS != DestLangAS) { assert(getContext().getTargetAddressSpace(SrcLangAS) == CGM.getDataLayout().getAllocaAddrSpace()); auto DestAS = getContext().getTargetAddressSpace(DestLangAS); auto *T = V->getType()->getPointerElementType()->getPointerTo(DestAS); DeclPtr = Address(getTargetHooks().performAddrSpaceCast( *this, V, SrcLangAS, DestLangAS, T, true), DeclPtr.getAlignment()); } // Push a destructor cleanup for this parameter if the ABI requires it. // Don't push a cleanup in a thunk for a method that will also emit a // cleanup. if (hasAggregateEvaluationKind(Ty) && !CurFuncIsThunk && Ty->getAs()->getDecl()->isParamDestroyedInCallee()) { if (QualType::DestructionKind DtorKind = Ty.isDestructedType()) { assert((DtorKind == QualType::DK_cxx_destructor || DtorKind == QualType::DK_nontrivial_c_struct) && "unexpected destructor type"); pushDestroy(DtorKind, DeclPtr, Ty); CalleeDestructedParamCleanups[cast(&D)] = EHStack.stable_begin(); } } } else { // Check if the parameter address is controlled by OpenMP runtime. Address OpenMPLocalAddr = getLangOpts().OpenMP ? CGM.getOpenMPRuntime().getAddressOfLocalVariable(*this, &D) : Address::invalid(); if (getLangOpts().OpenMP && OpenMPLocalAddr.isValid()) { DeclPtr = OpenMPLocalAddr; } else { // Otherwise, create a temporary to hold the value. DeclPtr = CreateMemTemp(Ty, getContext().getDeclAlign(&D), D.getName() + ".addr"); } DoStore = true; } llvm::Value *ArgVal = (DoStore ? Arg.getDirectValue() : nullptr); LValue lv = MakeAddrLValue(DeclPtr, Ty); if (IsScalar) { Qualifiers qs = Ty.getQualifiers(); if (Qualifiers::ObjCLifetime lt = qs.getObjCLifetime()) { // We honor __attribute__((ns_consumed)) for types with lifetime. // For __strong, it's handled by just skipping the initial retain; // otherwise we have to balance out the initial +1 with an extra // cleanup to do the release at the end of the function. bool isConsumed = D.hasAttr(); // If a parameter is pseudo-strong then we can omit the implicit retain. if (D.isARCPseudoStrong()) { assert(lt == Qualifiers::OCL_Strong && "pseudo-strong variable isn't strong?"); assert(qs.hasConst() && "pseudo-strong variable should be const!"); lt = Qualifiers::OCL_ExplicitNone; } // Load objects passed indirectly. if (Arg.isIndirect() && !ArgVal) ArgVal = Builder.CreateLoad(DeclPtr); if (lt == Qualifiers::OCL_Strong) { if (!isConsumed) { if (CGM.getCodeGenOpts().OptimizationLevel == 0) { // use objc_storeStrong(&dest, value) for retaining the // object. But first, store a null into 'dest' because // objc_storeStrong attempts to release its old value. llvm::Value *Null = CGM.EmitNullConstant(D.getType()); EmitStoreOfScalar(Null, lv, /* isInitialization */ true); EmitARCStoreStrongCall(lv.getAddress(), ArgVal, true); DoStore = false; } else // Don't use objc_retainBlock for block pointers, because we // don't want to Block_copy something just because we got it // as a parameter. ArgVal = EmitARCRetainNonBlock(ArgVal); } } else { // Push the cleanup for a consumed parameter. if (isConsumed) { ARCPreciseLifetime_t precise = (D.hasAttr() ? ARCPreciseLifetime : ARCImpreciseLifetime); EHStack.pushCleanup(getARCCleanupKind(), ArgVal, precise); } if (lt == Qualifiers::OCL_Weak) { EmitARCInitWeak(DeclPtr, ArgVal); DoStore = false; // The weak init is a store, no need to do two. } } // Enter the cleanup scope. EmitAutoVarWithLifetime(*this, D, DeclPtr, lt); } } // Store the initial value into the alloca. if (DoStore) EmitStoreOfScalar(ArgVal, lv, /* isInitialization */ true); setAddrOfLocalVar(&D, DeclPtr); // Emit debug info for param declaration. if (CGDebugInfo *DI = getDebugInfo()) { if (CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo) { DI->EmitDeclareOfArgVariable(&D, DeclPtr.getPointer(), ArgNo, Builder); } } if (D.hasAttr()) EmitVarAnnotations(&D, DeclPtr.getPointer()); // We can only check return value nullability if all arguments to the // function satisfy their nullability preconditions. This makes it necessary // to emit null checks for args in the function body itself. if (requiresReturnValueNullabilityCheck()) { auto Nullability = Ty->getNullability(getContext()); if (Nullability && *Nullability == NullabilityKind::NonNull) { SanitizerScope SanScope(this); RetValNullabilityPrecondition = Builder.CreateAnd(RetValNullabilityPrecondition, Builder.CreateIsNotNull(Arg.getAnyValue())); } } } void CodeGenModule::EmitOMPDeclareReduction(const OMPDeclareReductionDecl *D, CodeGenFunction *CGF) { if (!LangOpts.OpenMP || (!LangOpts.EmitAllDecls && !D->isUsed())) return; getOpenMPRuntime().emitUserDefinedReduction(CGF, D); } void CodeGenModule::EmitOMPRequiresDecl(const OMPRequiresDecl *D) { getOpenMPRuntime().checkArchForUnifiedAddressing(*this, D); } Index: projects/clang800-import/contrib/llvm/tools/clang/lib/Frontend/TextDiagnostic.cpp =================================================================== --- projects/clang800-import/contrib/llvm/tools/clang/lib/Frontend/TextDiagnostic.cpp (revision 344547) +++ projects/clang800-import/contrib/llvm/tools/clang/lib/Frontend/TextDiagnostic.cpp (revision 344548) @@ -1,1346 +1,1346 @@ //===--- TextDiagnostic.cpp - Text Diagnostic Pretty-Printing -------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include "clang/Frontend/TextDiagnostic.h" #include "clang/Basic/CharInfo.h" #include "clang/Basic/DiagnosticOptions.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/SourceManager.h" #include "clang/Lex/Lexer.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Locale.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" #include using namespace clang; static const enum raw_ostream::Colors noteColor = raw_ostream::BLACK; static const enum raw_ostream::Colors remarkColor = raw_ostream::BLUE; static const enum raw_ostream::Colors fixitColor = raw_ostream::GREEN; static const enum raw_ostream::Colors caretColor = raw_ostream::GREEN; static const enum raw_ostream::Colors warningColor = raw_ostream::MAGENTA; static const enum raw_ostream::Colors templateColor = raw_ostream::CYAN; static const enum raw_ostream::Colors errorColor = raw_ostream::RED; static const enum raw_ostream::Colors fatalColor = raw_ostream::RED; // Used for changing only the bold attribute. static const enum raw_ostream::Colors savedColor = raw_ostream::SAVEDCOLOR; /// Add highlights to differences in template strings. static void applyTemplateHighlighting(raw_ostream &OS, StringRef Str, bool &Normal, bool Bold) { while (1) { size_t Pos = Str.find(ToggleHighlight); OS << Str.slice(0, Pos); if (Pos == StringRef::npos) break; Str = Str.substr(Pos + 1); if (Normal) OS.changeColor(templateColor, true); else { OS.resetColor(); if (Bold) OS.changeColor(savedColor, true); } Normal = !Normal; } } /// Number of spaces to indent when word-wrapping. const unsigned WordWrapIndentation = 6; static int bytesSincePreviousTabOrLineBegin(StringRef SourceLine, size_t i) { int bytes = 0; while (0, bool> printableTextForNextCharacter(StringRef SourceLine, size_t *i, unsigned TabStop) { assert(i && "i must not be null"); assert(*i expandedTab; expandedTab.assign(NumSpaces, ' '); return std::make_pair(expandedTab, true); } unsigned char const *begin, *end; begin = reinterpret_cast(&*(SourceLine.begin() + *i)); end = begin + (SourceLine.size() - *i); if (llvm::isLegalUTF8Sequence(begin, end)) { llvm::UTF32 c; llvm::UTF32 *cptr = &c; unsigned char const *original_begin = begin; unsigned char const *cp_end = begin + llvm::getNumBytesForUTF8(SourceLine[*i]); llvm::ConversionResult res = llvm::ConvertUTF8toUTF32( &begin, cp_end, &cptr, cptr + 1, llvm::strictConversion); (void)res; assert(llvm::conversionOK == res); assert(0 < begin-original_begin && "we must be further along in the string now"); *i += begin-original_begin; if (!llvm::sys::locale::isPrint(c)) { // If next character is valid UTF-8, but not printable SmallString<16> expandedCP(""); while (c) { expandedCP.insert(expandedCP.begin()+3, llvm::hexdigit(c%16)); c/=16; } while (expandedCP.size() < 8) expandedCP.insert(expandedCP.begin()+3, llvm::hexdigit(0)); return std::make_pair(expandedCP, false); } // If next character is valid UTF-8, and printable return std::make_pair(SmallString<16>(original_begin, cp_end), true); } // If next byte is not valid UTF-8 (and therefore not printable) SmallString<16> expandedByte(""); unsigned char byte = SourceLine[*i]; expandedByte[1] = llvm::hexdigit(byte / 16); expandedByte[2] = llvm::hexdigit(byte % 16); ++(*i); return std::make_pair(expandedByte, false); } static void expandTabs(std::string &SourceLine, unsigned TabStop) { size_t i = SourceLine.size(); while (i>0) { i--; if (SourceLine[i]!='\t') continue; size_t tmp_i = i; std::pair,bool> res = printableTextForNextCharacter(SourceLine, &tmp_i, TabStop); SourceLine.replace(i, 1, res.first.c_str()); } } /// This function takes a raw source line and produces a mapping from the bytes /// of the printable representation of the line to the columns those printable /// characters will appear at (numbering the first column as 0). /// /// If a byte 'i' corresponds to multiple columns (e.g. the byte contains a tab /// character) then the array will map that byte to the first column the /// tab appears at and the next value in the map will have been incremented /// more than once. /// /// If a byte is the first in a sequence of bytes that together map to a single /// entity in the output, then the array will map that byte to the appropriate /// column while the subsequent bytes will be -1. /// /// The last element in the array does not correspond to any byte in the input /// and instead is the number of columns needed to display the source /// /// example: (given a tabstop of 8) /// /// "a \t \u3042" -> {0,1,2,8,9,-1,-1,11} /// /// (\\u3042 is represented in UTF-8 by three bytes and takes two columns to /// display) static void byteToColumn(StringRef SourceLine, unsigned TabStop, SmallVectorImpl &out) { out.clear(); if (SourceLine.empty()) { out.resize(1u,0); return; } out.resize(SourceLine.size()+1, -1); int columns = 0; size_t i = 0; while (i,bool> res = printableTextForNextCharacter(SourceLine, &i, TabStop); columns += llvm::sys::locale::columnWidth(res.first); } out.back() = columns; } /// This function takes a raw source line and produces a mapping from columns /// to the byte of the source line that produced the character displaying at /// that column. This is the inverse of the mapping produced by byteToColumn() /// /// The last element in the array is the number of bytes in the source string /// /// example: (given a tabstop of 8) /// /// "a \t \u3042" -> {0,1,2,-1,-1,-1,-1,-1,3,4,-1,7} /// /// (\\u3042 is represented in UTF-8 by three bytes and takes two columns to /// display) static void columnToByte(StringRef SourceLine, unsigned TabStop, SmallVectorImpl &out) { out.clear(); if (SourceLine.empty()) { out.resize(1u, 0); return; } int columns = 0; size_t i = 0; while (i,bool> res = printableTextForNextCharacter(SourceLine, &i, TabStop); columns += llvm::sys::locale::columnWidth(res.first); } out.resize(columns+1, -1); out.back() = i; } namespace { struct SourceColumnMap { SourceColumnMap(StringRef SourceLine, unsigned TabStop) : m_SourceLine(SourceLine) { ::byteToColumn(SourceLine, TabStop, m_byteToColumn); ::columnToByte(SourceLine, TabStop, m_columnToByte); assert(m_byteToColumn.size()==SourceLine.size()+1); assert(0 < m_byteToColumn.size() && 0 < m_columnToByte.size()); assert(m_byteToColumn.size() == static_cast(m_columnToByte.back()+1)); assert(static_cast(m_byteToColumn.back()+1) == m_columnToByte.size()); } int columns() const { return m_byteToColumn.back(); } int bytes() const { return m_columnToByte.back(); } /// Map a byte to the column which it is at the start of, or return -1 /// if it is not at the start of a column (for a UTF-8 trailing byte). int byteToColumn(int n) const { assert(0<=n && n(m_byteToColumn.size())); return m_byteToColumn[n]; } /// Map a byte to the first column which contains it. int byteToContainingColumn(int N) const { assert(0 <= N && N < static_cast(m_byteToColumn.size())); while (m_byteToColumn[N] == -1) --N; return m_byteToColumn[N]; } /// Map a column to the byte which starts the column, or return -1 if /// the column the second or subsequent column of an expanded tab or similar /// multi-column entity. int columnToByte(int n) const { assert(0<=n && n(m_columnToByte.size())); return m_columnToByte[n]; } /// Map from a byte index to the next byte which starts a column. int startOfNextColumn(int N) const { assert(0 <= N && N < static_cast(m_byteToColumn.size() - 1)); while (byteToColumn(++N) == -1) {} return N; } /// Map from a byte index to the previous byte which starts a column. int startOfPreviousColumn(int N) const { assert(0 < N && N < static_cast(m_byteToColumn.size())); while (byteToColumn(--N) == -1) {} return N; } StringRef getSourceLine() const { return m_SourceLine; } private: const std::string m_SourceLine; SmallVector m_byteToColumn; SmallVector m_columnToByte; }; } // end anonymous namespace /// When the source code line we want to print is too long for /// the terminal, select the "interesting" region. static void selectInterestingSourceRegion(std::string &SourceLine, std::string &CaretLine, std::string &FixItInsertionLine, unsigned Columns, const SourceColumnMap &map) { unsigned CaretColumns = CaretLine.size(); unsigned FixItColumns = llvm::sys::locale::columnWidth(FixItInsertionLine); unsigned MaxColumns = std::max(static_cast(map.columns()), std::max(CaretColumns, FixItColumns)); // if the number of columns is less than the desired number we're done if (MaxColumns <= Columns) return; // No special characters are allowed in CaretLine. assert(CaretLine.end() == std::find_if(CaretLine.begin(), CaretLine.end(), [](char c) { return c < ' ' || '~' < c; })); // Find the slice that we need to display the full caret line // correctly. unsigned CaretStart = 0, CaretEnd = CaretLine.size(); for (; CaretStart != CaretEnd; ++CaretStart) if (!isWhitespace(CaretLine[CaretStart])) break; for (; CaretEnd != CaretStart; --CaretEnd) if (!isWhitespace(CaretLine[CaretEnd - 1])) break; // caret has already been inserted into CaretLine so the above whitespace // check is guaranteed to include the caret // If we have a fix-it line, make sure the slice includes all of the // fix-it information. if (!FixItInsertionLine.empty()) { unsigned FixItStart = 0, FixItEnd = FixItInsertionLine.size(); for (; FixItStart != FixItEnd; ++FixItStart) if (!isWhitespace(FixItInsertionLine[FixItStart])) break; for (; FixItEnd != FixItStart; --FixItEnd) if (!isWhitespace(FixItInsertionLine[FixItEnd - 1])) break; // We can safely use the byte offset FixItStart as the column offset // because the characters up until FixItStart are all ASCII whitespace // characters. unsigned FixItStartCol = FixItStart; unsigned FixItEndCol = llvm::sys::locale::columnWidth(FixItInsertionLine.substr(0, FixItEnd)); CaretStart = std::min(FixItStartCol, CaretStart); CaretEnd = std::max(FixItEndCol, CaretEnd); } // CaretEnd may have been set at the middle of a character // If it's not at a character's first column then advance it past the current // character. while (static_cast(CaretEnd) < map.columns() && -1 == map.columnToByte(CaretEnd)) ++CaretEnd; assert((static_cast(CaretStart) > map.columns() || -1!=map.columnToByte(CaretStart)) && "CaretStart must not point to a column in the middle of a source" " line character"); assert((static_cast(CaretEnd) > map.columns() || -1!=map.columnToByte(CaretEnd)) && "CaretEnd must not point to a column in the middle of a source line" " character"); // CaretLine[CaretStart, CaretEnd) contains all of the interesting // parts of the caret line. While this slice is smaller than the // number of columns we have, try to grow the slice to encompass // more context. unsigned SourceStart = map.columnToByte(std::min(CaretStart, map.columns())); unsigned SourceEnd = map.columnToByte(std::min(CaretEnd, map.columns())); unsigned CaretColumnsOutsideSource = CaretEnd-CaretStart - (map.byteToColumn(SourceEnd)-map.byteToColumn(SourceStart)); char const *front_ellipse = " ..."; char const *front_space = " "; char const *back_ellipse = "..."; unsigned ellipses_space = strlen(front_ellipse) + strlen(back_ellipse); unsigned TargetColumns = Columns; // Give us extra room for the ellipses // and any of the caret line that extends past the source if (TargetColumns > ellipses_space+CaretColumnsOutsideSource) TargetColumns -= ellipses_space+CaretColumnsOutsideSource; while (SourceStart>0 || SourceEnd0) { unsigned NewStart = map.startOfPreviousColumn(SourceStart); // Skip over any whitespace we see here; we're looking for // another bit of interesting text. // FIXME: Detect non-ASCII whitespace characters too. while (NewStart && isWhitespace(SourceLine[NewStart])) NewStart = map.startOfPreviousColumn(NewStart); // Skip over this bit of "interesting" text. while (NewStart) { unsigned Prev = map.startOfPreviousColumn(NewStart); if (isWhitespace(SourceLine[Prev])) break; NewStart = Prev; } assert(map.byteToColumn(NewStart) != -1); unsigned NewColumns = map.byteToColumn(SourceEnd) - map.byteToColumn(NewStart); if (NewColumns <= TargetColumns) { SourceStart = NewStart; ExpandedRegion = true; } } if (SourceEnd Columns); // The line needs some truncation, and we'd prefer to keep the front // if possible, so remove the back if (BackColumnsRemoved > strlen(back_ellipse)) SourceLine.replace(SourceEnd, std::string::npos, back_ellipse); // If that's enough then we're done if (FrontColumnsRemoved+ColumnsKept <= Columns) return; // Otherwise remove the front as well if (FrontColumnsRemoved > strlen(front_ellipse)) { SourceLine.replace(0, SourceStart, front_ellipse); CaretLine.replace(0, CaretStart, front_space); if (!FixItInsertionLine.empty()) FixItInsertionLine.replace(0, CaretStart, front_space); } } /// Skip over whitespace in the string, starting at the given /// index. /// /// \returns The index of the first non-whitespace character that is /// greater than or equal to Idx or, if no such character exists, /// returns the end of the string. static unsigned skipWhitespace(unsigned Idx, StringRef Str, unsigned Length) { while (Idx < Length && isWhitespace(Str[Idx])) ++Idx; return Idx; } /// If the given character is the start of some kind of /// balanced punctuation (e.g., quotes or parentheses), return the /// character that will terminate the punctuation. /// /// \returns The ending punctuation character, if any, or the NULL /// character if the input character does not start any punctuation. static inline char findMatchingPunctuation(char c) { switch (c) { case '\'': return '\''; case '`': return '\''; case '"': return '"'; case '(': return ')'; case '[': return ']'; case '{': return '}'; default: break; } return 0; } /// Find the end of the word starting at the given offset /// within a string. /// /// \returns the index pointing one character past the end of the /// word. static unsigned findEndOfWord(unsigned Start, StringRef Str, unsigned Length, unsigned Column, unsigned Columns) { assert(Start < Str.size() && "Invalid start position!"); unsigned End = Start + 1; // If we are already at the end of the string, take that as the word. if (End == Str.size()) return End; // Determine if the start of the string is actually opening // punctuation, e.g., a quote or parentheses. char EndPunct = findMatchingPunctuation(Str[Start]); if (!EndPunct) { // This is a normal word. Just find the first space character. while (End < Length && !isWhitespace(Str[End])) ++End; return End; } // We have the start of a balanced punctuation sequence (quotes, // parentheses, etc.). Determine the full sequence is. SmallString<16> PunctuationEndStack; PunctuationEndStack.push_back(EndPunct); while (End < Length && !PunctuationEndStack.empty()) { if (Str[End] == PunctuationEndStack.back()) PunctuationEndStack.pop_back(); else if (char SubEndPunct = findMatchingPunctuation(Str[End])) PunctuationEndStack.push_back(SubEndPunct); ++End; } // Find the first space character after the punctuation ended. while (End < Length && !isWhitespace(Str[End])) ++End; unsigned PunctWordLength = End - Start; if (// If the word fits on this line Column + PunctWordLength <= Columns || // ... or the word is "short enough" to take up the next line // without too much ugly white space PunctWordLength < Columns/3) return End; // Take the whole thing as a single "word". // The whole quoted/parenthesized string is too long to print as a // single "word". Instead, find the "word" that starts just after // the punctuation and use that end-point instead. This will recurse // until it finds something small enough to consider a word. return findEndOfWord(Start + 1, Str, Length, Column + 1, Columns); } /// Print the given string to a stream, word-wrapping it to /// some number of columns in the process. /// /// \param OS the stream to which the word-wrapping string will be /// emitted. /// \param Str the string to word-wrap and output. /// \param Columns the number of columns to word-wrap to. /// \param Column the column number at which the first character of \p /// Str will be printed. This will be non-zero when part of the first /// line has already been printed. /// \param Bold if the current text should be bold /// \param Indentation the number of spaces to indent any lines beyond /// the first line. /// \returns true if word-wrapping was required, or false if the /// string fit on the first line. static bool printWordWrapped(raw_ostream &OS, StringRef Str, unsigned Columns, unsigned Column = 0, bool Bold = false, unsigned Indentation = WordWrapIndentation) { const unsigned Length = std::min(Str.find('\n'), Str.size()); bool TextNormal = true; // The string used to indent each line. SmallString<16> IndentStr; IndentStr.assign(Indentation, ' '); bool Wrapped = false; for (unsigned WordStart = 0, WordEnd; WordStart < Length; WordStart = WordEnd) { // Find the beginning of the next word. WordStart = skipWhitespace(WordStart, Str, Length); if (WordStart == Length) break; // Find the end of this word. WordEnd = findEndOfWord(WordStart, Str, Length, Column, Columns); // Does this word fit on the current line? unsigned WordLength = WordEnd - WordStart; if (Column + WordLength < Columns) { // This word fits on the current line; print it there. if (WordStart) { OS << ' '; Column += 1; } applyTemplateHighlighting(OS, Str.substr(WordStart, WordLength), TextNormal, Bold); Column += WordLength; continue; } // This word does not fit on the current line, so wrap to the next // line. OS << '\n'; OS.write(&IndentStr[0], Indentation); applyTemplateHighlighting(OS, Str.substr(WordStart, WordLength), TextNormal, Bold); Column = Indentation + WordLength; Wrapped = true; } // Append any remaning text from the message with its existing formatting. applyTemplateHighlighting(OS, Str.substr(Length), TextNormal, Bold); assert(TextNormal && "Text highlighted at end of diagnostic message."); return Wrapped; } TextDiagnostic::TextDiagnostic(raw_ostream &OS, const LangOptions &LangOpts, DiagnosticOptions *DiagOpts) : DiagnosticRenderer(LangOpts, DiagOpts), OS(OS) {} TextDiagnostic::~TextDiagnostic() {} void TextDiagnostic::emitDiagnosticMessage( FullSourceLoc Loc, PresumedLoc PLoc, DiagnosticsEngine::Level Level, StringRef Message, ArrayRef Ranges, DiagOrStoredDiag D) { uint64_t StartOfLocationInfo = OS.tell(); // Emit the location of this particular diagnostic. if (Loc.isValid()) emitDiagnosticLoc(Loc, PLoc, Level, Ranges); if (DiagOpts->ShowColors) OS.resetColor(); printDiagnosticLevel(OS, Level, DiagOpts->ShowColors, DiagOpts->CLFallbackMode); printDiagnosticMessage(OS, /*IsSupplemental*/ Level == DiagnosticsEngine::Note, Message, OS.tell() - StartOfLocationInfo, DiagOpts->MessageLength, DiagOpts->ShowColors); } /*static*/ void TextDiagnostic::printDiagnosticLevel(raw_ostream &OS, DiagnosticsEngine::Level Level, bool ShowColors, bool CLFallbackMode) { if (ShowColors) { // Print diagnostic category in bold and color switch (Level) { case DiagnosticsEngine::Ignored: llvm_unreachable("Invalid diagnostic type"); case DiagnosticsEngine::Note: OS.changeColor(noteColor, true); break; case DiagnosticsEngine::Remark: OS.changeColor(remarkColor, true); break; case DiagnosticsEngine::Warning: OS.changeColor(warningColor, true); break; case DiagnosticsEngine::Error: OS.changeColor(errorColor, true); break; case DiagnosticsEngine::Fatal: OS.changeColor(fatalColor, true); break; } } switch (Level) { case DiagnosticsEngine::Ignored: llvm_unreachable("Invalid diagnostic type"); case DiagnosticsEngine::Note: OS << "note"; break; case DiagnosticsEngine::Remark: OS << "remark"; break; case DiagnosticsEngine::Warning: OS << "warning"; break; case DiagnosticsEngine::Error: OS << "error"; break; case DiagnosticsEngine::Fatal: OS << "fatal error"; break; } // In clang-cl /fallback mode, print diagnostics as "error(clang):". This // makes it more clear whether a message is coming from clang or cl.exe, // and it prevents MSBuild from concluding that the build failed just because // there is an "error:" in the output. if (CLFallbackMode) OS << "(clang)"; OS << ": "; if (ShowColors) OS.resetColor(); } /*static*/ void TextDiagnostic::printDiagnosticMessage(raw_ostream &OS, bool IsSupplemental, StringRef Message, unsigned CurrentColumn, unsigned Columns, bool ShowColors) { bool Bold = false; if (ShowColors && !IsSupplemental) { // Print primary diagnostic messages in bold and without color, to visually // indicate the transition from continuation notes and other output. OS.changeColor(savedColor, true); Bold = true; } if (Columns) printWordWrapped(OS, Message, Columns, CurrentColumn, Bold); else { bool Normal = true; applyTemplateHighlighting(OS, Message, Normal, Bold); assert(Normal && "Formatting should have returned to normal"); } if (ShowColors) OS.resetColor(); OS << '\n'; } void TextDiagnostic::emitFilename(StringRef Filename, const SourceManager &SM) { SmallVector AbsoluteFilename; if (DiagOpts->AbsolutePath) { const DirectoryEntry *Dir = SM.getFileManager().getDirectory( llvm::sys::path::parent_path(Filename)); if (Dir) { StringRef DirName = SM.getFileManager().getCanonicalName(Dir); llvm::sys::path::append(AbsoluteFilename, DirName, llvm::sys::path::filename(Filename)); Filename = StringRef(AbsoluteFilename.data(), AbsoluteFilename.size()); } } OS << Filename; } /// Print out the file/line/column information and include trace. /// /// This method handlen the emission of the diagnostic location information. /// This includes extracting as much location information as is present for /// the diagnostic and printing it, as well as any include stack or source /// ranges necessary. void TextDiagnostic::emitDiagnosticLoc(FullSourceLoc Loc, PresumedLoc PLoc, DiagnosticsEngine::Level Level, ArrayRef Ranges) { if (PLoc.isInvalid()) { // At least print the file name if available: FileID FID = Loc.getFileID(); if (FID.isValid()) { const FileEntry *FE = Loc.getFileEntry(); if (FE && FE->isValid()) { emitFilename(FE->getName(), Loc.getManager()); if (FE->isInPCH()) OS << " (in PCH)"; OS << ": "; } } return; } unsigned LineNo = PLoc.getLine(); if (!DiagOpts->ShowLocation) return; if (DiagOpts->ShowColors) OS.changeColor(savedColor, true); emitFilename(PLoc.getFilename(), Loc.getManager()); switch (DiagOpts->getFormat()) { case DiagnosticOptions::Clang: OS << ':' << LineNo; break; case DiagnosticOptions::MSVC: OS << '(' << LineNo; break; case DiagnosticOptions::Vi: OS << " +" << LineNo; break; } if (DiagOpts->ShowColumn) // Compute the column number. if (unsigned ColNo = PLoc.getColumn()) { if (DiagOpts->getFormat() == DiagnosticOptions::MSVC) { OS << ','; // Visual Studio 2010 or earlier expects column number to be off by one if (LangOpts.MSCompatibilityVersion && !LangOpts.isCompatibleWithMSVC(LangOptions::MSVC2012)) ColNo--; } else OS << ':'; OS << ColNo; } switch (DiagOpts->getFormat()) { case DiagnosticOptions::Clang: case DiagnosticOptions::Vi: OS << ':'; break; case DiagnosticOptions::MSVC: // MSVC2013 and before print 'file(4) : error'. MSVC2015 gets rid of the // space and prints 'file(4): error'. OS << ')'; if (LangOpts.MSCompatibilityVersion && !LangOpts.isCompatibleWithMSVC(LangOptions::MSVC2015)) OS << ' '; - OS << ": "; + OS << ':'; break; } if (DiagOpts->ShowSourceRanges && !Ranges.empty()) { FileID CaretFileID = Loc.getExpansionLoc().getFileID(); bool PrintedRange = false; for (ArrayRef::const_iterator RI = Ranges.begin(), RE = Ranges.end(); RI != RE; ++RI) { // Ignore invalid ranges. if (!RI->isValid()) continue; auto &SM = Loc.getManager(); SourceLocation B = SM.getExpansionLoc(RI->getBegin()); CharSourceRange ERange = SM.getExpansionRange(RI->getEnd()); SourceLocation E = ERange.getEnd(); bool IsTokenRange = ERange.isTokenRange(); std::pair BInfo = SM.getDecomposedLoc(B); std::pair EInfo = SM.getDecomposedLoc(E); // If the start or end of the range is in another file, just discard // it. if (BInfo.first != CaretFileID || EInfo.first != CaretFileID) continue; // Add in the length of the token, so that we cover multi-char // tokens. unsigned TokSize = 0; if (IsTokenRange) TokSize = Lexer::MeasureTokenLength(E, SM, LangOpts); FullSourceLoc BF(B, SM), EF(E, SM); OS << '{' << BF.getLineNumber() << ':' << BF.getColumnNumber() << '-' << EF.getLineNumber() << ':' << (EF.getColumnNumber() + TokSize) << '}'; PrintedRange = true; } if (PrintedRange) OS << ':'; } OS << ' '; } void TextDiagnostic::emitIncludeLocation(FullSourceLoc Loc, PresumedLoc PLoc) { if (DiagOpts->ShowLocation && PLoc.isValid()) OS << "In file included from " << PLoc.getFilename() << ':' << PLoc.getLine() << ":\n"; else OS << "In included file:\n"; } void TextDiagnostic::emitImportLocation(FullSourceLoc Loc, PresumedLoc PLoc, StringRef ModuleName) { if (DiagOpts->ShowLocation && PLoc.isValid()) OS << "In module '" << ModuleName << "' imported from " << PLoc.getFilename() << ':' << PLoc.getLine() << ":\n"; else OS << "In module '" << ModuleName << "':\n"; } void TextDiagnostic::emitBuildingModuleLocation(FullSourceLoc Loc, PresumedLoc PLoc, StringRef ModuleName) { if (DiagOpts->ShowLocation && PLoc.isValid()) OS << "While building module '" << ModuleName << "' imported from " << PLoc.getFilename() << ':' << PLoc.getLine() << ":\n"; else OS << "While building module '" << ModuleName << "':\n"; } /// Find the suitable set of lines to show to include a set of ranges. static llvm::Optional> findLinesForRange(const CharSourceRange &R, FileID FID, const SourceManager &SM) { if (!R.isValid()) return None; SourceLocation Begin = R.getBegin(); SourceLocation End = R.getEnd(); if (SM.getFileID(Begin) != FID || SM.getFileID(End) != FID) return None; return std::make_pair(SM.getExpansionLineNumber(Begin), SM.getExpansionLineNumber(End)); } /// Add as much of range B into range A as possible without exceeding a maximum /// size of MaxRange. Ranges are inclusive. static std::pair maybeAddRange(std::pair A, std::pair B, unsigned MaxRange) { // If A is already the maximum size, we're done. unsigned Slack = MaxRange - (A.second - A.first + 1); if (Slack == 0) return A; // Easy case: merge succeeds within MaxRange. unsigned Min = std::min(A.first, B.first); unsigned Max = std::max(A.second, B.second); if (Max - Min + 1 <= MaxRange) return {Min, Max}; // If we can't reach B from A within MaxRange, there's nothing to do. // Don't add lines to the range that contain nothing interesting. if ((B.first > A.first && B.first - A.first + 1 > MaxRange) || (B.second < A.second && A.second - B.second + 1 > MaxRange)) return A; // Otherwise, expand A towards B to produce a range of size MaxRange. We // attempt to expand by the same amount in both directions if B strictly // contains A. // Expand downwards by up to half the available amount, then upwards as // much as possible, then downwards as much as possible. A.second = std::min(A.second + (Slack + 1) / 2, Max); Slack = MaxRange - (A.second - A.first + 1); A.first = std::max(Min + Slack, A.first) - Slack; A.second = std::min(A.first + MaxRange - 1, Max); return A; } /// Highlight a SourceRange (with ~'s) for any characters on LineNo. static void highlightRange(const CharSourceRange &R, unsigned LineNo, FileID FID, const SourceColumnMap &map, std::string &CaretLine, const SourceManager &SM, const LangOptions &LangOpts) { if (!R.isValid()) return; SourceLocation Begin = R.getBegin(); SourceLocation End = R.getEnd(); unsigned StartLineNo = SM.getExpansionLineNumber(Begin); if (StartLineNo > LineNo || SM.getFileID(Begin) != FID) return; // No intersection. unsigned EndLineNo = SM.getExpansionLineNumber(End); if (EndLineNo < LineNo || SM.getFileID(End) != FID) return; // No intersection. // Compute the column number of the start. unsigned StartColNo = 0; if (StartLineNo == LineNo) { StartColNo = SM.getExpansionColumnNumber(Begin); if (StartColNo) --StartColNo; // Zero base the col #. } // Compute the column number of the end. unsigned EndColNo = map.getSourceLine().size(); if (EndLineNo == LineNo) { EndColNo = SM.getExpansionColumnNumber(End); if (EndColNo) { --EndColNo; // Zero base the col #. // Add in the length of the token, so that we cover multi-char tokens if // this is a token range. if (R.isTokenRange()) EndColNo += Lexer::MeasureTokenLength(End, SM, LangOpts); } else { EndColNo = CaretLine.size(); } } assert(StartColNo <= EndColNo && "Invalid range!"); // Check that a token range does not highlight only whitespace. if (R.isTokenRange()) { // Pick the first non-whitespace column. while (StartColNo < map.getSourceLine().size() && (map.getSourceLine()[StartColNo] == ' ' || map.getSourceLine()[StartColNo] == '\t')) StartColNo = map.startOfNextColumn(StartColNo); // Pick the last non-whitespace column. if (EndColNo > map.getSourceLine().size()) EndColNo = map.getSourceLine().size(); while (EndColNo && (map.getSourceLine()[EndColNo-1] == ' ' || map.getSourceLine()[EndColNo-1] == '\t')) EndColNo = map.startOfPreviousColumn(EndColNo); // If the start/end passed each other, then we are trying to highlight a // range that just exists in whitespace. That most likely means we have // a multi-line highlighting range that covers a blank line. if (StartColNo > EndColNo) { assert(StartLineNo != EndLineNo && "trying to highlight whitespace"); StartColNo = EndColNo; } } assert(StartColNo <= map.getSourceLine().size() && "Invalid range!"); assert(EndColNo <= map.getSourceLine().size() && "Invalid range!"); // Fill the range with ~'s. StartColNo = map.byteToContainingColumn(StartColNo); EndColNo = map.byteToContainingColumn(EndColNo); assert(StartColNo <= EndColNo && "Invalid range!"); if (CaretLine.size() < EndColNo) CaretLine.resize(EndColNo,' '); std::fill(CaretLine.begin()+StartColNo,CaretLine.begin()+EndColNo,'~'); } static std::string buildFixItInsertionLine(FileID FID, unsigned LineNo, const SourceColumnMap &map, ArrayRef Hints, const SourceManager &SM, const DiagnosticOptions *DiagOpts) { std::string FixItInsertionLine; if (Hints.empty() || !DiagOpts->ShowFixits) return FixItInsertionLine; unsigned PrevHintEndCol = 0; for (ArrayRef::iterator I = Hints.begin(), E = Hints.end(); I != E; ++I) { if (!I->CodeToInsert.empty()) { // We have an insertion hint. Determine whether the inserted // code contains no newlines and is on the same line as the caret. std::pair HintLocInfo = SM.getDecomposedExpansionLoc(I->RemoveRange.getBegin()); if (FID == HintLocInfo.first && LineNo == SM.getLineNumber(HintLocInfo.first, HintLocInfo.second) && StringRef(I->CodeToInsert).find_first_of("\n\r") == StringRef::npos) { // Insert the new code into the line just below the code // that the user wrote. // Note: When modifying this function, be very careful about what is a // "column" (printed width, platform-dependent) and what is a // "byte offset" (SourceManager "column"). unsigned HintByteOffset = SM.getColumnNumber(HintLocInfo.first, HintLocInfo.second) - 1; // The hint must start inside the source or right at the end assert(HintByteOffset < static_cast(map.bytes())+1); unsigned HintCol = map.byteToContainingColumn(HintByteOffset); // If we inserted a long previous hint, push this one forwards, and add // an extra space to show that this is not part of the previous // completion. This is sort of the best we can do when two hints appear // to overlap. // // Note that if this hint is located immediately after the previous // hint, no space will be added, since the location is more important. if (HintCol < PrevHintEndCol) HintCol = PrevHintEndCol + 1; // This should NOT use HintByteOffset, because the source might have // Unicode characters in earlier columns. unsigned NewFixItLineSize = FixItInsertionLine.size() + (HintCol - PrevHintEndCol) + I->CodeToInsert.size(); if (NewFixItLineSize > FixItInsertionLine.size()) FixItInsertionLine.resize(NewFixItLineSize, ' '); std::copy(I->CodeToInsert.begin(), I->CodeToInsert.end(), FixItInsertionLine.end() - I->CodeToInsert.size()); PrevHintEndCol = HintCol + llvm::sys::locale::columnWidth(I->CodeToInsert); } } } expandTabs(FixItInsertionLine, DiagOpts->TabStop); return FixItInsertionLine; } /// Emit a code snippet and caret line. /// /// This routine emits a single line's code snippet and caret line.. /// /// \param Loc The location for the caret. /// \param Ranges The underlined ranges for this code snippet. /// \param Hints The FixIt hints active for this diagnostic. void TextDiagnostic::emitSnippetAndCaret( FullSourceLoc Loc, DiagnosticsEngine::Level Level, SmallVectorImpl &Ranges, ArrayRef Hints) { assert(Loc.isValid() && "must have a valid source location here"); assert(Loc.isFileID() && "must have a file location here"); // If caret diagnostics are enabled and we have location, we want to // emit the caret. However, we only do this if the location moved // from the last diagnostic, if the last diagnostic was a note that // was part of a different warning or error diagnostic, or if the // diagnostic has ranges. We don't want to emit the same caret // multiple times if one loc has multiple diagnostics. if (!DiagOpts->ShowCarets) return; if (Loc == LastLoc && Ranges.empty() && Hints.empty() && (LastLevel != DiagnosticsEngine::Note || Level == LastLevel)) return; // Decompose the location into a FID/Offset pair. std::pair LocInfo = Loc.getDecomposedLoc(); FileID FID = LocInfo.first; const SourceManager &SM = Loc.getManager(); // Get information about the buffer it points into. bool Invalid = false; StringRef BufData = Loc.getBufferData(&Invalid); if (Invalid) return; unsigned CaretLineNo = Loc.getLineNumber(); unsigned CaretColNo = Loc.getColumnNumber(); // Arbitrarily stop showing snippets when the line is too long. static const size_t MaxLineLengthToPrint = 4096; if (CaretColNo > MaxLineLengthToPrint) return; // Find the set of lines to include. const unsigned MaxLines = DiagOpts->SnippetLineLimit; std::pair Lines = {CaretLineNo, CaretLineNo}; for (SmallVectorImpl::iterator I = Ranges.begin(), E = Ranges.end(); I != E; ++I) if (auto OptionalRange = findLinesForRange(*I, FID, SM)) Lines = maybeAddRange(Lines, *OptionalRange, MaxLines); for (unsigned LineNo = Lines.first; LineNo != Lines.second + 1; ++LineNo) { const char *BufStart = BufData.data(); const char *BufEnd = BufStart + BufData.size(); // Rewind from the current position to the start of the line. const char *LineStart = BufStart + SM.getDecomposedLoc(SM.translateLineCol(FID, LineNo, 1)).second; if (LineStart == BufEnd) break; // Compute the line end. const char *LineEnd = LineStart; while (*LineEnd != '\n' && *LineEnd != '\r' && LineEnd != BufEnd) ++LineEnd; // Arbitrarily stop showing snippets when the line is too long. // FIXME: Don't print any lines in this case. if (size_t(LineEnd - LineStart) > MaxLineLengthToPrint) return; // Trim trailing null-bytes. StringRef Line(LineStart, LineEnd - LineStart); while (!Line.empty() && Line.back() == '\0' && (LineNo != CaretLineNo || Line.size() > CaretColNo)) Line = Line.drop_back(); // Copy the line of code into an std::string for ease of manipulation. std::string SourceLine(Line.begin(), Line.end()); // Build the byte to column map. const SourceColumnMap sourceColMap(SourceLine, DiagOpts->TabStop); // Create a line for the caret that is filled with spaces that is the same // number of columns as the line of source code. std::string CaretLine(sourceColMap.columns(), ' '); // Highlight all of the characters covered by Ranges with ~ characters. for (SmallVectorImpl::iterator I = Ranges.begin(), E = Ranges.end(); I != E; ++I) highlightRange(*I, LineNo, FID, sourceColMap, CaretLine, SM, LangOpts); // Next, insert the caret itself. if (CaretLineNo == LineNo) { CaretColNo = sourceColMap.byteToContainingColumn(CaretColNo - 1); if (CaretLine.size() < CaretColNo + 1) CaretLine.resize(CaretColNo + 1, ' '); CaretLine[CaretColNo] = '^'; } std::string FixItInsertionLine = buildFixItInsertionLine( FID, LineNo, sourceColMap, Hints, SM, DiagOpts.get()); // If the source line is too long for our terminal, select only the // "interesting" source region within that line. unsigned Columns = DiagOpts->MessageLength; if (Columns) selectInterestingSourceRegion(SourceLine, CaretLine, FixItInsertionLine, Columns, sourceColMap); // If we are in -fdiagnostics-print-source-range-info mode, we are trying // to produce easily machine parsable output. Add a space before the // source line and the caret to make it trivial to tell the main diagnostic // line from what the user is intended to see. if (DiagOpts->ShowSourceRanges) { SourceLine = ' ' + SourceLine; CaretLine = ' ' + CaretLine; } // Finally, remove any blank spaces from the end of CaretLine. while (!CaretLine.empty() && CaretLine[CaretLine.size() - 1] == ' ') CaretLine.erase(CaretLine.end() - 1); // Emit what we have computed. emitSnippet(SourceLine); if (!CaretLine.empty()) { if (DiagOpts->ShowColors) OS.changeColor(caretColor, true); OS << CaretLine << '\n'; if (DiagOpts->ShowColors) OS.resetColor(); } if (!FixItInsertionLine.empty()) { if (DiagOpts->ShowColors) // Print fixit line in color OS.changeColor(fixitColor, false); if (DiagOpts->ShowSourceRanges) OS << ' '; OS << FixItInsertionLine << '\n'; if (DiagOpts->ShowColors) OS.resetColor(); } } // Print out any parseable fixit information requested by the options. emitParseableFixits(Hints, SM); } void TextDiagnostic::emitSnippet(StringRef line) { if (line.empty()) return; size_t i = 0; std::string to_print; bool print_reversed = false; while (i,bool> res = printableTextForNextCharacter(line, &i, DiagOpts->TabStop); bool was_printable = res.second; if (DiagOpts->ShowColors && was_printable == print_reversed) { if (print_reversed) OS.reverseColor(); OS << to_print; to_print.clear(); if (DiagOpts->ShowColors) OS.resetColor(); } print_reversed = !was_printable; to_print += res.first.str(); } if (print_reversed && DiagOpts->ShowColors) OS.reverseColor(); OS << to_print; if (print_reversed && DiagOpts->ShowColors) OS.resetColor(); OS << '\n'; } void TextDiagnostic::emitParseableFixits(ArrayRef Hints, const SourceManager &SM) { if (!DiagOpts->ShowParseableFixits) return; // We follow FixItRewriter's example in not (yet) handling // fix-its in macros. for (ArrayRef::iterator I = Hints.begin(), E = Hints.end(); I != E; ++I) { if (I->RemoveRange.isInvalid() || I->RemoveRange.getBegin().isMacroID() || I->RemoveRange.getEnd().isMacroID()) return; } for (ArrayRef::iterator I = Hints.begin(), E = Hints.end(); I != E; ++I) { SourceLocation BLoc = I->RemoveRange.getBegin(); SourceLocation ELoc = I->RemoveRange.getEnd(); std::pair BInfo = SM.getDecomposedLoc(BLoc); std::pair EInfo = SM.getDecomposedLoc(ELoc); // Adjust for token ranges. if (I->RemoveRange.isTokenRange()) EInfo.second += Lexer::MeasureTokenLength(ELoc, SM, LangOpts); // We specifically do not do word-wrapping or tab-expansion here, // because this is supposed to be easy to parse. PresumedLoc PLoc = SM.getPresumedLoc(BLoc); if (PLoc.isInvalid()) break; OS << "fix-it:\""; OS.write_escaped(PLoc.getFilename()); OS << "\":{" << SM.getLineNumber(BInfo.first, BInfo.second) << ':' << SM.getColumnNumber(BInfo.first, BInfo.second) << '-' << SM.getLineNumber(EInfo.first, EInfo.second) << ':' << SM.getColumnNumber(EInfo.first, EInfo.second) << "}:\""; OS.write_escaped(I->CodeToInsert); OS << "\"\n"; } } Index: projects/clang800-import/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp =================================================================== --- projects/clang800-import/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp (revision 344547) +++ projects/clang800-import/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp (revision 344548) @@ -1,16850 +1,16868 @@ //===--- SemaExpr.cpp - Semantic Analysis for Expressions -----------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements semantic analysis for expressions. // //===----------------------------------------------------------------------===// #include "TreeTransform.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTLambda.h" #include "clang/AST/ASTMutationListener.h" #include "clang/AST/CXXInheritance.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/EvaluatedExprVisitor.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/ExprObjC.h" #include "clang/AST/ExprOpenMP.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/TypeLoc.h" #include "clang/Basic/FixedPoint.h" #include "clang/Basic/PartialDiagnostic.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" #include "clang/Lex/LiteralSupport.h" #include "clang/Lex/Preprocessor.h" #include "clang/Sema/AnalysisBasedWarnings.h" #include "clang/Sema/DeclSpec.h" #include "clang/Sema/DelayedDiagnostic.h" #include "clang/Sema/Designator.h" #include "clang/Sema/Initialization.h" #include "clang/Sema/Lookup.h" #include "clang/Sema/Overload.h" #include "clang/Sema/ParsedTemplate.h" #include "clang/Sema/Scope.h" #include "clang/Sema/ScopeInfo.h" #include "clang/Sema/SemaFixItUtils.h" #include "clang/Sema/SemaInternal.h" #include "clang/Sema/Template.h" #include "llvm/Support/ConvertUTF.h" using namespace clang; using namespace sema; /// Determine whether the use of this declaration is valid, without /// emitting diagnostics. bool Sema::CanUseDecl(NamedDecl *D, bool TreatUnavailableAsInvalid) { // See if this is an auto-typed variable whose initializer we are parsing. if (ParsingInitForAutoVars.count(D)) return false; // See if this is a deleted function. if (FunctionDecl *FD = dyn_cast(D)) { if (FD->isDeleted()) return false; // If the function has a deduced return type, and we can't deduce it, // then we can't use it either. if (getLangOpts().CPlusPlus14 && FD->getReturnType()->isUndeducedType() && DeduceReturnType(FD, SourceLocation(), /*Diagnose*/ false)) return false; // See if this is an aligned allocation/deallocation function that is // unavailable. if (TreatUnavailableAsInvalid && isUnavailableAlignedAllocationFunction(*FD)) return false; } // See if this function is unavailable. if (TreatUnavailableAsInvalid && D->getAvailability() == AR_Unavailable && cast(CurContext)->getAvailability() != AR_Unavailable) return false; return true; } static void DiagnoseUnusedOfDecl(Sema &S, NamedDecl *D, SourceLocation Loc) { // Warn if this is used but marked unused. if (const auto *A = D->getAttr()) { // [[maybe_unused]] should not diagnose uses, but __attribute__((unused)) // should diagnose them. if (A->getSemanticSpelling() != UnusedAttr::CXX11_maybe_unused && A->getSemanticSpelling() != UnusedAttr::C2x_maybe_unused) { const Decl *DC = cast_or_null(S.getCurObjCLexicalContext()); if (DC && !DC->hasAttr()) S.Diag(Loc, diag::warn_used_but_marked_unused) << D->getDeclName(); } } } /// Emit a note explaining that this function is deleted. void Sema::NoteDeletedFunction(FunctionDecl *Decl) { assert(Decl->isDeleted()); CXXMethodDecl *Method = dyn_cast(Decl); if (Method && Method->isDeleted() && Method->isDefaulted()) { // If the method was explicitly defaulted, point at that declaration. if (!Method->isImplicit()) Diag(Decl->getLocation(), diag::note_implicitly_deleted); // Try to diagnose why this special member function was implicitly // deleted. This might fail, if that reason no longer applies. CXXSpecialMember CSM = getSpecialMember(Method); if (CSM != CXXInvalid) ShouldDeleteSpecialMember(Method, CSM, nullptr, /*Diagnose=*/true); return; } auto *Ctor = dyn_cast(Decl); if (Ctor && Ctor->isInheritingConstructor()) return NoteDeletedInheritingConstructor(Ctor); Diag(Decl->getLocation(), diag::note_availability_specified_here) << Decl << 1; } /// Determine whether a FunctionDecl was ever declared with an /// explicit storage class. static bool hasAnyExplicitStorageClass(const FunctionDecl *D) { for (auto I : D->redecls()) { if (I->getStorageClass() != SC_None) return true; } return false; } /// Check whether we're in an extern inline function and referring to a /// variable or function with internal linkage (C11 6.7.4p3). /// /// This is only a warning because we used to silently accept this code, but /// in many cases it will not behave correctly. This is not enabled in C++ mode /// because the restriction language is a bit weaker (C++11 [basic.def.odr]p6) /// and so while there may still be user mistakes, most of the time we can't /// prove that there are errors. static void diagnoseUseOfInternalDeclInInlineFunction(Sema &S, const NamedDecl *D, SourceLocation Loc) { // This is disabled under C++; there are too many ways for this to fire in // contexts where the warning is a false positive, or where it is technically // correct but benign. if (S.getLangOpts().CPlusPlus) return; // Check if this is an inlined function or method. FunctionDecl *Current = S.getCurFunctionDecl(); if (!Current) return; if (!Current->isInlined()) return; if (!Current->isExternallyVisible()) return; // Check if the decl has internal linkage. if (D->getFormalLinkage() != InternalLinkage) return; // Downgrade from ExtWarn to Extension if // (1) the supposedly external inline function is in the main file, // and probably won't be included anywhere else. // (2) the thing we're referencing is a pure function. // (3) the thing we're referencing is another inline function. // This last can give us false negatives, but it's better than warning on // wrappers for simple C library functions. const FunctionDecl *UsedFn = dyn_cast(D); bool DowngradeWarning = S.getSourceManager().isInMainFile(Loc); if (!DowngradeWarning && UsedFn) DowngradeWarning = UsedFn->isInlined() || UsedFn->hasAttr(); S.Diag(Loc, DowngradeWarning ? diag::ext_internal_in_extern_inline_quiet : diag::ext_internal_in_extern_inline) << /*IsVar=*/!UsedFn << D; S.MaybeSuggestAddingStaticToDecl(Current); S.Diag(D->getCanonicalDecl()->getLocation(), diag::note_entity_declared_at) << D; } void Sema::MaybeSuggestAddingStaticToDecl(const FunctionDecl *Cur) { const FunctionDecl *First = Cur->getFirstDecl(); // Suggest "static" on the function, if possible. if (!hasAnyExplicitStorageClass(First)) { SourceLocation DeclBegin = First->getSourceRange().getBegin(); Diag(DeclBegin, diag::note_convert_inline_to_static) << Cur << FixItHint::CreateInsertion(DeclBegin, "static "); } } /// Determine whether the use of this declaration is valid, and /// emit any corresponding diagnostics. /// /// This routine diagnoses various problems with referencing /// declarations that can occur when using a declaration. For example, /// it might warn if a deprecated or unavailable declaration is being /// used, or produce an error (and return true) if a C++0x deleted /// function is being used. /// /// \returns true if there was an error (this declaration cannot be /// referenced), false otherwise. /// bool Sema::DiagnoseUseOfDecl(NamedDecl *D, ArrayRef Locs, const ObjCInterfaceDecl *UnknownObjCClass, bool ObjCPropertyAccess, bool AvoidPartialAvailabilityChecks, ObjCInterfaceDecl *ClassReceiver) { SourceLocation Loc = Locs.front(); if (getLangOpts().CPlusPlus && isa(D)) { // If there were any diagnostics suppressed by template argument deduction, // emit them now. auto Pos = SuppressedDiagnostics.find(D->getCanonicalDecl()); if (Pos != SuppressedDiagnostics.end()) { for (const PartialDiagnosticAt &Suppressed : Pos->second) Diag(Suppressed.first, Suppressed.second); // Clear out the list of suppressed diagnostics, so that we don't emit // them again for this specialization. However, we don't obsolete this // entry from the table, because we want to avoid ever emitting these // diagnostics again. Pos->second.clear(); } // C++ [basic.start.main]p3: // The function 'main' shall not be used within a program. if (cast(D)->isMain()) Diag(Loc, diag::ext_main_used); diagnoseUnavailableAlignedAllocation(*cast(D), Loc); } // See if this is an auto-typed variable whose initializer we are parsing. if (ParsingInitForAutoVars.count(D)) { if (isa(D)) { Diag(Loc, diag::err_binding_cannot_appear_in_own_initializer) << D->getDeclName(); } else { Diag(Loc, diag::err_auto_variable_cannot_appear_in_own_initializer) << D->getDeclName() << cast(D)->getType(); } return true; } // See if this is a deleted function. if (FunctionDecl *FD = dyn_cast(D)) { if (FD->isDeleted()) { auto *Ctor = dyn_cast(FD); if (Ctor && Ctor->isInheritingConstructor()) Diag(Loc, diag::err_deleted_inherited_ctor_use) << Ctor->getParent() << Ctor->getInheritedConstructor().getConstructor()->getParent(); else Diag(Loc, diag::err_deleted_function_use); NoteDeletedFunction(FD); return true; } // If the function has a deduced return type, and we can't deduce it, // then we can't use it either. if (getLangOpts().CPlusPlus14 && FD->getReturnType()->isUndeducedType() && DeduceReturnType(FD, Loc)) return true; if (getLangOpts().CUDA && !CheckCUDACall(Loc, FD)) return true; } if (auto *MD = dyn_cast(D)) { // Lambdas are only default-constructible or assignable in C++2a onwards. if (MD->getParent()->isLambda() && ((isa(MD) && cast(MD)->isDefaultConstructor()) || MD->isCopyAssignmentOperator() || MD->isMoveAssignmentOperator())) { Diag(Loc, diag::warn_cxx17_compat_lambda_def_ctor_assign) << !isa(MD); } } auto getReferencedObjCProp = [](const NamedDecl *D) -> const ObjCPropertyDecl * { if (const auto *MD = dyn_cast(D)) return MD->findPropertyDecl(); return nullptr; }; if (const ObjCPropertyDecl *ObjCPDecl = getReferencedObjCProp(D)) { if (diagnoseArgIndependentDiagnoseIfAttrs(ObjCPDecl, Loc)) return true; } else if (diagnoseArgIndependentDiagnoseIfAttrs(D, Loc)) { return true; } // [OpenMP 4.0], 2.15 declare reduction Directive, Restrictions // Only the variables omp_in and omp_out are allowed in the combiner. // Only the variables omp_priv and omp_orig are allowed in the // initializer-clause. auto *DRD = dyn_cast(CurContext); if (LangOpts.OpenMP && DRD && !CurContext->containsDecl(D) && isa(D)) { Diag(Loc, diag::err_omp_wrong_var_in_declare_reduction) << getCurFunction()->HasOMPDeclareReductionCombiner; Diag(D->getLocation(), diag::note_entity_declared_at) << D; return true; } DiagnoseAvailabilityOfDecl(D, Locs, UnknownObjCClass, ObjCPropertyAccess, AvoidPartialAvailabilityChecks, ClassReceiver); DiagnoseUnusedOfDecl(*this, D, Loc); diagnoseUseOfInternalDeclInInlineFunction(*this, D, Loc); return false; } /// Retrieve the message suffix that should be added to a /// diagnostic complaining about the given function being deleted or /// unavailable. std::string Sema::getDeletedOrUnavailableSuffix(const FunctionDecl *FD) { std::string Message; if (FD->getAvailability(&Message)) return ": " + Message; return std::string(); } /// DiagnoseSentinelCalls - This routine checks whether a call or /// message-send is to a declaration with the sentinel attribute, and /// if so, it checks that the requirements of the sentinel are /// satisfied. void Sema::DiagnoseSentinelCalls(NamedDecl *D, SourceLocation Loc, ArrayRef Args) { const SentinelAttr *attr = D->getAttr(); if (!attr) return; // The number of formal parameters of the declaration. unsigned numFormalParams; // The kind of declaration. This is also an index into a %select in // the diagnostic. enum CalleeType { CT_Function, CT_Method, CT_Block } calleeType; if (ObjCMethodDecl *MD = dyn_cast(D)) { numFormalParams = MD->param_size(); calleeType = CT_Method; } else if (FunctionDecl *FD = dyn_cast(D)) { numFormalParams = FD->param_size(); calleeType = CT_Function; } else if (isa(D)) { QualType type = cast(D)->getType(); const FunctionType *fn = nullptr; if (const PointerType *ptr = type->getAs()) { fn = ptr->getPointeeType()->getAs(); if (!fn) return; calleeType = CT_Function; } else if (const BlockPointerType *ptr = type->getAs()) { fn = ptr->getPointeeType()->castAs(); calleeType = CT_Block; } else { return; } if (const FunctionProtoType *proto = dyn_cast(fn)) { numFormalParams = proto->getNumParams(); } else { numFormalParams = 0; } } else { return; } // "nullPos" is the number of formal parameters at the end which // effectively count as part of the variadic arguments. This is // useful if you would prefer to not have *any* formal parameters, // but the language forces you to have at least one. unsigned nullPos = attr->getNullPos(); assert((nullPos == 0 || nullPos == 1) && "invalid null position on sentinel"); numFormalParams = (nullPos > numFormalParams ? 0 : numFormalParams - nullPos); // The number of arguments which should follow the sentinel. unsigned numArgsAfterSentinel = attr->getSentinel(); // If there aren't enough arguments for all the formal parameters, // the sentinel, and the args after the sentinel, complain. if (Args.size() < numFormalParams + numArgsAfterSentinel + 1) { Diag(Loc, diag::warn_not_enough_argument) << D->getDeclName(); Diag(D->getLocation(), diag::note_sentinel_here) << int(calleeType); return; } // Otherwise, find the sentinel expression. Expr *sentinelExpr = Args[Args.size() - numArgsAfterSentinel - 1]; if (!sentinelExpr) return; if (sentinelExpr->isValueDependent()) return; if (Context.isSentinelNullExpr(sentinelExpr)) return; // Pick a reasonable string to insert. Optimistically use 'nil', 'nullptr', // or 'NULL' if those are actually defined in the context. Only use // 'nil' for ObjC methods, where it's much more likely that the // variadic arguments form a list of object pointers. SourceLocation MissingNilLoc = getLocForEndOfToken(sentinelExpr->getEndLoc()); std::string NullValue; if (calleeType == CT_Method && PP.isMacroDefined("nil")) NullValue = "nil"; else if (getLangOpts().CPlusPlus11) NullValue = "nullptr"; else if (PP.isMacroDefined("NULL")) NullValue = "NULL"; else NullValue = "(void*) 0"; if (MissingNilLoc.isInvalid()) Diag(Loc, diag::warn_missing_sentinel) << int(calleeType); else Diag(MissingNilLoc, diag::warn_missing_sentinel) << int(calleeType) << FixItHint::CreateInsertion(MissingNilLoc, ", " + NullValue); Diag(D->getLocation(), diag::note_sentinel_here) << int(calleeType); } SourceRange Sema::getExprRange(Expr *E) const { return E ? E->getSourceRange() : SourceRange(); } //===----------------------------------------------------------------------===// // Standard Promotions and Conversions //===----------------------------------------------------------------------===// /// DefaultFunctionArrayConversion (C99 6.3.2.1p3, C99 6.3.2.1p4). ExprResult Sema::DefaultFunctionArrayConversion(Expr *E, bool Diagnose) { // Handle any placeholder expressions which made it here. if (E->getType()->isPlaceholderType()) { ExprResult result = CheckPlaceholderExpr(E); if (result.isInvalid()) return ExprError(); E = result.get(); } QualType Ty = E->getType(); assert(!Ty.isNull() && "DefaultFunctionArrayConversion - missing type"); if (Ty->isFunctionType()) { if (auto *DRE = dyn_cast(E->IgnoreParenCasts())) if (auto *FD = dyn_cast(DRE->getDecl())) if (!checkAddressOfFunctionIsAvailable(FD, Diagnose, E->getExprLoc())) return ExprError(); E = ImpCastExprToType(E, Context.getPointerType(Ty), CK_FunctionToPointerDecay).get(); } else if (Ty->isArrayType()) { // In C90 mode, arrays only promote to pointers if the array expression is // an lvalue. The relevant legalese is C90 6.2.2.1p3: "an lvalue that has // type 'array of type' is converted to an expression that has type 'pointer // to type'...". In C99 this was changed to: C99 6.3.2.1p3: "an expression // that has type 'array of type' ...". The relevant change is "an lvalue" // (C90) to "an expression" (C99). // // C++ 4.2p1: // An lvalue or rvalue of type "array of N T" or "array of unknown bound of // T" can be converted to an rvalue of type "pointer to T". // if (getLangOpts().C99 || getLangOpts().CPlusPlus || E->isLValue()) E = ImpCastExprToType(E, Context.getArrayDecayedType(Ty), CK_ArrayToPointerDecay).get(); } return E; } static void CheckForNullPointerDereference(Sema &S, Expr *E) { // Check to see if we are dereferencing a null pointer. If so, // and if not volatile-qualified, this is undefined behavior that the // optimizer will delete, so warn about it. People sometimes try to use this // to get a deterministic trap and are surprised by clang's behavior. This // only handles the pattern "*null", which is a very syntactic check. if (UnaryOperator *UO = dyn_cast(E->IgnoreParenCasts())) if (UO->getOpcode() == UO_Deref && UO->getSubExpr()->IgnoreParenCasts()-> isNullPointerConstant(S.Context, Expr::NPC_ValueDependentIsNotNull) && !UO->getType().isVolatileQualified()) { S.DiagRuntimeBehavior(UO->getOperatorLoc(), UO, S.PDiag(diag::warn_indirection_through_null) << UO->getSubExpr()->getSourceRange()); S.DiagRuntimeBehavior(UO->getOperatorLoc(), UO, S.PDiag(diag::note_indirection_through_null)); } } static void DiagnoseDirectIsaAccess(Sema &S, const ObjCIvarRefExpr *OIRE, SourceLocation AssignLoc, const Expr* RHS) { const ObjCIvarDecl *IV = OIRE->getDecl(); if (!IV) return; DeclarationName MemberName = IV->getDeclName(); IdentifierInfo *Member = MemberName.getAsIdentifierInfo(); if (!Member || !Member->isStr("isa")) return; const Expr *Base = OIRE->getBase(); QualType BaseType = Base->getType(); if (OIRE->isArrow()) BaseType = BaseType->getPointeeType(); if (const ObjCObjectType *OTy = BaseType->getAs()) if (ObjCInterfaceDecl *IDecl = OTy->getInterface()) { ObjCInterfaceDecl *ClassDeclared = nullptr; ObjCIvarDecl *IV = IDecl->lookupInstanceVariable(Member, ClassDeclared); if (!ClassDeclared->getSuperClass() && (*ClassDeclared->ivar_begin()) == IV) { if (RHS) { NamedDecl *ObjectSetClass = S.LookupSingleName(S.TUScope, &S.Context.Idents.get("object_setClass"), SourceLocation(), S.LookupOrdinaryName); if (ObjectSetClass) { SourceLocation RHSLocEnd = S.getLocForEndOfToken(RHS->getEndLoc()); S.Diag(OIRE->getExprLoc(), diag::warn_objc_isa_assign) << FixItHint::CreateInsertion(OIRE->getBeginLoc(), "object_setClass(") << FixItHint::CreateReplacement( SourceRange(OIRE->getOpLoc(), AssignLoc), ",") << FixItHint::CreateInsertion(RHSLocEnd, ")"); } else S.Diag(OIRE->getLocation(), diag::warn_objc_isa_assign); } else { NamedDecl *ObjectGetClass = S.LookupSingleName(S.TUScope, &S.Context.Idents.get("object_getClass"), SourceLocation(), S.LookupOrdinaryName); if (ObjectGetClass) S.Diag(OIRE->getExprLoc(), diag::warn_objc_isa_use) << FixItHint::CreateInsertion(OIRE->getBeginLoc(), "object_getClass(") << FixItHint::CreateReplacement( SourceRange(OIRE->getOpLoc(), OIRE->getEndLoc()), ")"); else S.Diag(OIRE->getLocation(), diag::warn_objc_isa_use); } S.Diag(IV->getLocation(), diag::note_ivar_decl); } } } ExprResult Sema::DefaultLvalueConversion(Expr *E) { // Handle any placeholder expressions which made it here. if (E->getType()->isPlaceholderType()) { ExprResult result = CheckPlaceholderExpr(E); if (result.isInvalid()) return ExprError(); E = result.get(); } // C++ [conv.lval]p1: // A glvalue of a non-function, non-array type T can be // converted to a prvalue. if (!E->isGLValue()) return E; QualType T = E->getType(); assert(!T.isNull() && "r-value conversion on typeless expression?"); // We don't want to throw lvalue-to-rvalue casts on top of // expressions of certain types in C++. if (getLangOpts().CPlusPlus && (E->getType() == Context.OverloadTy || T->isDependentType() || T->isRecordType())) return E; // The C standard is actually really unclear on this point, and // DR106 tells us what the result should be but not why. It's // generally best to say that void types just doesn't undergo // lvalue-to-rvalue at all. Note that expressions of unqualified // 'void' type are never l-values, but qualified void can be. if (T->isVoidType()) return E; // OpenCL usually rejects direct accesses to values of 'half' type. if (getLangOpts().OpenCL && !getOpenCLOptions().isEnabled("cl_khr_fp16") && T->isHalfType()) { Diag(E->getExprLoc(), diag::err_opencl_half_load_store) << 0 << T; return ExprError(); } CheckForNullPointerDereference(*this, E); if (const ObjCIsaExpr *OISA = dyn_cast(E->IgnoreParenCasts())) { NamedDecl *ObjectGetClass = LookupSingleName(TUScope, &Context.Idents.get("object_getClass"), SourceLocation(), LookupOrdinaryName); if (ObjectGetClass) Diag(E->getExprLoc(), diag::warn_objc_isa_use) << FixItHint::CreateInsertion(OISA->getBeginLoc(), "object_getClass(") << FixItHint::CreateReplacement( SourceRange(OISA->getOpLoc(), OISA->getIsaMemberLoc()), ")"); else Diag(E->getExprLoc(), diag::warn_objc_isa_use); } else if (const ObjCIvarRefExpr *OIRE = dyn_cast(E->IgnoreParenCasts())) DiagnoseDirectIsaAccess(*this, OIRE, SourceLocation(), /* Expr*/nullptr); // C++ [conv.lval]p1: // [...] If T is a non-class type, the type of the prvalue is the // cv-unqualified version of T. Otherwise, the type of the // rvalue is T. // // C99 6.3.2.1p2: // If the lvalue has qualified type, the value has the unqualified // version of the type of the lvalue; otherwise, the value has the // type of the lvalue. if (T.hasQualifiers()) T = T.getUnqualifiedType(); // Under the MS ABI, lock down the inheritance model now. if (T->isMemberPointerType() && Context.getTargetInfo().getCXXABI().isMicrosoft()) (void)isCompleteType(E->getExprLoc(), T); UpdateMarkingForLValueToRValue(E); // Loading a __weak object implicitly retains the value, so we need a cleanup to // balance that. if (E->getType().getObjCLifetime() == Qualifiers::OCL_Weak) Cleanup.setExprNeedsCleanups(true); ExprResult Res = ImplicitCastExpr::Create(Context, T, CK_LValueToRValue, E, nullptr, VK_RValue); // C11 6.3.2.1p2: // ... if the lvalue has atomic type, the value has the non-atomic version // of the type of the lvalue ... if (const AtomicType *Atomic = T->getAs()) { T = Atomic->getValueType().getUnqualifiedType(); Res = ImplicitCastExpr::Create(Context, T, CK_AtomicToNonAtomic, Res.get(), nullptr, VK_RValue); } return Res; } ExprResult Sema::DefaultFunctionArrayLvalueConversion(Expr *E, bool Diagnose) { ExprResult Res = DefaultFunctionArrayConversion(E, Diagnose); if (Res.isInvalid()) return ExprError(); Res = DefaultLvalueConversion(Res.get()); if (Res.isInvalid()) return ExprError(); return Res; } /// CallExprUnaryConversions - a special case of an unary conversion /// performed on a function designator of a call expression. ExprResult Sema::CallExprUnaryConversions(Expr *E) { QualType Ty = E->getType(); ExprResult Res = E; // Only do implicit cast for a function type, but not for a pointer // to function type. if (Ty->isFunctionType()) { Res = ImpCastExprToType(E, Context.getPointerType(Ty), CK_FunctionToPointerDecay).get(); if (Res.isInvalid()) return ExprError(); } Res = DefaultLvalueConversion(Res.get()); if (Res.isInvalid()) return ExprError(); return Res.get(); } /// UsualUnaryConversions - Performs various conversions that are common to most /// operators (C99 6.3). The conversions of array and function types are /// sometimes suppressed. For example, the array->pointer conversion doesn't /// apply if the array is an argument to the sizeof or address (&) operators. /// In these instances, this routine should *not* be called. ExprResult Sema::UsualUnaryConversions(Expr *E) { // First, convert to an r-value. ExprResult Res = DefaultFunctionArrayLvalueConversion(E); if (Res.isInvalid()) return ExprError(); E = Res.get(); QualType Ty = E->getType(); assert(!Ty.isNull() && "UsualUnaryConversions - missing type"); // Half FP have to be promoted to float unless it is natively supported if (Ty->isHalfType() && !getLangOpts().NativeHalfType) return ImpCastExprToType(Res.get(), Context.FloatTy, CK_FloatingCast); // Try to perform integral promotions if the object has a theoretically // promotable type. if (Ty->isIntegralOrUnscopedEnumerationType()) { // C99 6.3.1.1p2: // // The following may be used in an expression wherever an int or // unsigned int may be used: // - an object or expression with an integer type whose integer // conversion rank is less than or equal to the rank of int // and unsigned int. // - A bit-field of type _Bool, int, signed int, or unsigned int. // // If an int can represent all values of the original type, the // value is converted to an int; otherwise, it is converted to an // unsigned int. These are called the integer promotions. All // other types are unchanged by the integer promotions. QualType PTy = Context.isPromotableBitField(E); if (!PTy.isNull()) { E = ImpCastExprToType(E, PTy, CK_IntegralCast).get(); return E; } if (Ty->isPromotableIntegerType()) { QualType PT = Context.getPromotedIntegerType(Ty); E = ImpCastExprToType(E, PT, CK_IntegralCast).get(); return E; } } return E; } /// DefaultArgumentPromotion (C99 6.5.2.2p6). Used for function calls that /// do not have a prototype. Arguments that have type float or __fp16 /// are promoted to double. All other argument types are converted by /// UsualUnaryConversions(). ExprResult Sema::DefaultArgumentPromotion(Expr *E) { QualType Ty = E->getType(); assert(!Ty.isNull() && "DefaultArgumentPromotion - missing type"); ExprResult Res = UsualUnaryConversions(E); if (Res.isInvalid()) return ExprError(); E = Res.get(); // If this is a 'float' or '__fp16' (CVR qualified or typedef) // promote to double. // Note that default argument promotion applies only to float (and // half/fp16); it does not apply to _Float16. const BuiltinType *BTy = Ty->getAs(); if (BTy && (BTy->getKind() == BuiltinType::Half || BTy->getKind() == BuiltinType::Float)) { if (getLangOpts().OpenCL && !getOpenCLOptions().isEnabled("cl_khr_fp64")) { if (BTy->getKind() == BuiltinType::Half) { E = ImpCastExprToType(E, Context.FloatTy, CK_FloatingCast).get(); } } else { E = ImpCastExprToType(E, Context.DoubleTy, CK_FloatingCast).get(); } } // C++ performs lvalue-to-rvalue conversion as a default argument // promotion, even on class types, but note: // C++11 [conv.lval]p2: // When an lvalue-to-rvalue conversion occurs in an unevaluated // operand or a subexpression thereof the value contained in the // referenced object is not accessed. Otherwise, if the glvalue // has a class type, the conversion copy-initializes a temporary // of type T from the glvalue and the result of the conversion // is a prvalue for the temporary. // FIXME: add some way to gate this entire thing for correctness in // potentially potentially evaluated contexts. if (getLangOpts().CPlusPlus && E->isGLValue() && !isUnevaluatedContext()) { ExprResult Temp = PerformCopyInitialization( InitializedEntity::InitializeTemporary(E->getType()), E->getExprLoc(), E); if (Temp.isInvalid()) return ExprError(); E = Temp.get(); } return E; } /// Determine the degree of POD-ness for an expression. /// Incomplete types are considered POD, since this check can be performed /// when we're in an unevaluated context. Sema::VarArgKind Sema::isValidVarArgType(const QualType &Ty) { if (Ty->isIncompleteType()) { // C++11 [expr.call]p7: // After these conversions, if the argument does not have arithmetic, // enumeration, pointer, pointer to member, or class type, the program // is ill-formed. // // Since we've already performed array-to-pointer and function-to-pointer // decay, the only such type in C++ is cv void. This also handles // initializer lists as variadic arguments. if (Ty->isVoidType()) return VAK_Invalid; if (Ty->isObjCObjectType()) return VAK_Invalid; return VAK_Valid; } if (Ty.isDestructedType() == QualType::DK_nontrivial_c_struct) return VAK_Invalid; if (Ty.isCXX98PODType(Context)) return VAK_Valid; // C++11 [expr.call]p7: // Passing a potentially-evaluated argument of class type (Clause 9) // having a non-trivial copy constructor, a non-trivial move constructor, // or a non-trivial destructor, with no corresponding parameter, // is conditionally-supported with implementation-defined semantics. if (getLangOpts().CPlusPlus11 && !Ty->isDependentType()) if (CXXRecordDecl *Record = Ty->getAsCXXRecordDecl()) if (!Record->hasNonTrivialCopyConstructor() && !Record->hasNonTrivialMoveConstructor() && !Record->hasNonTrivialDestructor()) return VAK_ValidInCXX11; if (getLangOpts().ObjCAutoRefCount && Ty->isObjCLifetimeType()) return VAK_Valid; if (Ty->isObjCObjectType()) return VAK_Invalid; if (getLangOpts().MSVCCompat) return VAK_MSVCUndefined; // FIXME: In C++11, these cases are conditionally-supported, meaning we're // permitted to reject them. We should consider doing so. return VAK_Undefined; } void Sema::checkVariadicArgument(const Expr *E, VariadicCallType CT) { // Don't allow one to pass an Objective-C interface to a vararg. const QualType &Ty = E->getType(); VarArgKind VAK = isValidVarArgType(Ty); // Complain about passing non-POD types through varargs. switch (VAK) { case VAK_ValidInCXX11: DiagRuntimeBehavior( E->getBeginLoc(), nullptr, PDiag(diag::warn_cxx98_compat_pass_non_pod_arg_to_vararg) << Ty << CT); LLVM_FALLTHROUGH; case VAK_Valid: if (Ty->isRecordType()) { // This is unlikely to be what the user intended. If the class has a // 'c_str' member function, the user probably meant to call that. DiagRuntimeBehavior(E->getBeginLoc(), nullptr, PDiag(diag::warn_pass_class_arg_to_vararg) << Ty << CT << hasCStrMethod(E) << ".c_str()"); } break; case VAK_Undefined: case VAK_MSVCUndefined: DiagRuntimeBehavior(E->getBeginLoc(), nullptr, PDiag(diag::warn_cannot_pass_non_pod_arg_to_vararg) << getLangOpts().CPlusPlus11 << Ty << CT); break; case VAK_Invalid: if (Ty.isDestructedType() == QualType::DK_nontrivial_c_struct) Diag(E->getBeginLoc(), diag::err_cannot_pass_non_trivial_c_struct_to_vararg) << Ty << CT; else if (Ty->isObjCObjectType()) DiagRuntimeBehavior(E->getBeginLoc(), nullptr, PDiag(diag::err_cannot_pass_objc_interface_to_vararg) << Ty << CT); else Diag(E->getBeginLoc(), diag::err_cannot_pass_to_vararg) << isa(E) << Ty << CT; break; } } /// DefaultVariadicArgumentPromotion - Like DefaultArgumentPromotion, but /// will create a trap if the resulting type is not a POD type. ExprResult Sema::DefaultVariadicArgumentPromotion(Expr *E, VariadicCallType CT, FunctionDecl *FDecl) { if (const BuiltinType *PlaceholderTy = E->getType()->getAsPlaceholderType()) { // Strip the unbridged-cast placeholder expression off, if applicable. if (PlaceholderTy->getKind() == BuiltinType::ARCUnbridgedCast && (CT == VariadicMethod || (FDecl && FDecl->hasAttr()))) { E = stripARCUnbridgedCast(E); // Otherwise, do normal placeholder checking. } else { ExprResult ExprRes = CheckPlaceholderExpr(E); if (ExprRes.isInvalid()) return ExprError(); E = ExprRes.get(); } } ExprResult ExprRes = DefaultArgumentPromotion(E); if (ExprRes.isInvalid()) return ExprError(); E = ExprRes.get(); // Diagnostics regarding non-POD argument types are // emitted along with format string checking in Sema::CheckFunctionCall(). if (isValidVarArgType(E->getType()) == VAK_Undefined) { // Turn this into a trap. CXXScopeSpec SS; SourceLocation TemplateKWLoc; UnqualifiedId Name; Name.setIdentifier(PP.getIdentifierInfo("__builtin_trap"), E->getBeginLoc()); ExprResult TrapFn = ActOnIdExpression(TUScope, SS, TemplateKWLoc, Name, true, false); if (TrapFn.isInvalid()) return ExprError(); ExprResult Call = ActOnCallExpr(TUScope, TrapFn.get(), E->getBeginLoc(), None, E->getEndLoc()); if (Call.isInvalid()) return ExprError(); ExprResult Comma = ActOnBinOp(TUScope, E->getBeginLoc(), tok::comma, Call.get(), E); if (Comma.isInvalid()) return ExprError(); return Comma.get(); } if (!getLangOpts().CPlusPlus && RequireCompleteType(E->getExprLoc(), E->getType(), diag::err_call_incomplete_argument)) return ExprError(); return E; } /// Converts an integer to complex float type. Helper function of /// UsualArithmeticConversions() /// /// \return false if the integer expression is an integer type and is /// successfully converted to the complex type. static bool handleIntegerToComplexFloatConversion(Sema &S, ExprResult &IntExpr, ExprResult &ComplexExpr, QualType IntTy, QualType ComplexTy, bool SkipCast) { if (IntTy->isComplexType() || IntTy->isRealFloatingType()) return true; if (SkipCast) return false; if (IntTy->isIntegerType()) { QualType fpTy = cast(ComplexTy)->getElementType(); IntExpr = S.ImpCastExprToType(IntExpr.get(), fpTy, CK_IntegralToFloating); IntExpr = S.ImpCastExprToType(IntExpr.get(), ComplexTy, CK_FloatingRealToComplex); } else { assert(IntTy->isComplexIntegerType()); IntExpr = S.ImpCastExprToType(IntExpr.get(), ComplexTy, CK_IntegralComplexToFloatingComplex); } return false; } /// Handle arithmetic conversion with complex types. Helper function of /// UsualArithmeticConversions() static QualType handleComplexFloatConversion(Sema &S, ExprResult &LHS, ExprResult &RHS, QualType LHSType, QualType RHSType, bool IsCompAssign) { // if we have an integer operand, the result is the complex type. if (!handleIntegerToComplexFloatConversion(S, RHS, LHS, RHSType, LHSType, /*skipCast*/false)) return LHSType; if (!handleIntegerToComplexFloatConversion(S, LHS, RHS, LHSType, RHSType, /*skipCast*/IsCompAssign)) return RHSType; // This handles complex/complex, complex/float, or float/complex. // When both operands are complex, the shorter operand is converted to the // type of the longer, and that is the type of the result. This corresponds // to what is done when combining two real floating-point operands. // The fun begins when size promotion occur across type domains. // From H&S 6.3.4: When one operand is complex and the other is a real // floating-point type, the less precise type is converted, within it's // real or complex domain, to the precision of the other type. For example, // when combining a "long double" with a "double _Complex", the // "double _Complex" is promoted to "long double _Complex". // Compute the rank of the two types, regardless of whether they are complex. int Order = S.Context.getFloatingTypeOrder(LHSType, RHSType); auto *LHSComplexType = dyn_cast(LHSType); auto *RHSComplexType = dyn_cast(RHSType); QualType LHSElementType = LHSComplexType ? LHSComplexType->getElementType() : LHSType; QualType RHSElementType = RHSComplexType ? RHSComplexType->getElementType() : RHSType; QualType ResultType = S.Context.getComplexType(LHSElementType); if (Order < 0) { // Promote the precision of the LHS if not an assignment. ResultType = S.Context.getComplexType(RHSElementType); if (!IsCompAssign) { if (LHSComplexType) LHS = S.ImpCastExprToType(LHS.get(), ResultType, CK_FloatingComplexCast); else LHS = S.ImpCastExprToType(LHS.get(), RHSElementType, CK_FloatingCast); } } else if (Order > 0) { // Promote the precision of the RHS. if (RHSComplexType) RHS = S.ImpCastExprToType(RHS.get(), ResultType, CK_FloatingComplexCast); else RHS = S.ImpCastExprToType(RHS.get(), LHSElementType, CK_FloatingCast); } return ResultType; } /// Handle arithmetic conversion from integer to float. Helper function /// of UsualArithmeticConversions() static QualType handleIntToFloatConversion(Sema &S, ExprResult &FloatExpr, ExprResult &IntExpr, QualType FloatTy, QualType IntTy, bool ConvertFloat, bool ConvertInt) { if (IntTy->isIntegerType()) { if (ConvertInt) // Convert intExpr to the lhs floating point type. IntExpr = S.ImpCastExprToType(IntExpr.get(), FloatTy, CK_IntegralToFloating); return FloatTy; } // Convert both sides to the appropriate complex float. assert(IntTy->isComplexIntegerType()); QualType result = S.Context.getComplexType(FloatTy); // _Complex int -> _Complex float if (ConvertInt) IntExpr = S.ImpCastExprToType(IntExpr.get(), result, CK_IntegralComplexToFloatingComplex); // float -> _Complex float if (ConvertFloat) FloatExpr = S.ImpCastExprToType(FloatExpr.get(), result, CK_FloatingRealToComplex); return result; } /// Handle arithmethic conversion with floating point types. Helper /// function of UsualArithmeticConversions() static QualType handleFloatConversion(Sema &S, ExprResult &LHS, ExprResult &RHS, QualType LHSType, QualType RHSType, bool IsCompAssign) { bool LHSFloat = LHSType->isRealFloatingType(); bool RHSFloat = RHSType->isRealFloatingType(); // If we have two real floating types, convert the smaller operand // to the bigger result. if (LHSFloat && RHSFloat) { int order = S.Context.getFloatingTypeOrder(LHSType, RHSType); if (order > 0) { RHS = S.ImpCastExprToType(RHS.get(), LHSType, CK_FloatingCast); return LHSType; } assert(order < 0 && "illegal float comparison"); if (!IsCompAssign) LHS = S.ImpCastExprToType(LHS.get(), RHSType, CK_FloatingCast); return RHSType; } if (LHSFloat) { // Half FP has to be promoted to float unless it is natively supported if (LHSType->isHalfType() && !S.getLangOpts().NativeHalfType) LHSType = S.Context.FloatTy; return handleIntToFloatConversion(S, LHS, RHS, LHSType, RHSType, /*convertFloat=*/!IsCompAssign, /*convertInt=*/ true); } assert(RHSFloat); return handleIntToFloatConversion(S, RHS, LHS, RHSType, LHSType, /*convertInt=*/ true, /*convertFloat=*/!IsCompAssign); } /// Diagnose attempts to convert between __float128 and long double if /// there is no support for such conversion. Helper function of /// UsualArithmeticConversions(). static bool unsupportedTypeConversion(const Sema &S, QualType LHSType, QualType RHSType) { /* No issue converting if at least one of the types is not a floating point type or the two types have the same rank. */ if (!LHSType->isFloatingType() || !RHSType->isFloatingType() || S.Context.getFloatingTypeOrder(LHSType, RHSType) == 0) return false; assert(LHSType->isFloatingType() && RHSType->isFloatingType() && "The remaining types must be floating point types."); auto *LHSComplex = LHSType->getAs(); auto *RHSComplex = RHSType->getAs(); QualType LHSElemType = LHSComplex ? LHSComplex->getElementType() : LHSType; QualType RHSElemType = RHSComplex ? RHSComplex->getElementType() : RHSType; // No issue if the two types have the same representation if (&S.Context.getFloatTypeSemantics(LHSElemType) == &S.Context.getFloatTypeSemantics(RHSElemType)) return false; bool Float128AndLongDouble = (LHSElemType == S.Context.Float128Ty && RHSElemType == S.Context.LongDoubleTy); Float128AndLongDouble |= (LHSElemType == S.Context.LongDoubleTy && RHSElemType == S.Context.Float128Ty); // We've handled the situation where __float128 and long double have the same // representation. We allow all conversions for all possible long double types // except PPC's double double. return Float128AndLongDouble && (&S.Context.getFloatTypeSemantics(S.Context.LongDoubleTy) == &llvm::APFloat::PPCDoubleDouble()); } typedef ExprResult PerformCastFn(Sema &S, Expr *operand, QualType toType); namespace { /// These helper callbacks are placed in an anonymous namespace to /// permit their use as function template parameters. ExprResult doIntegralCast(Sema &S, Expr *op, QualType toType) { return S.ImpCastExprToType(op, toType, CK_IntegralCast); } ExprResult doComplexIntegralCast(Sema &S, Expr *op, QualType toType) { return S.ImpCastExprToType(op, S.Context.getComplexType(toType), CK_IntegralComplexCast); } } /// Handle integer arithmetic conversions. Helper function of /// UsualArithmeticConversions() template static QualType handleIntegerConversion(Sema &S, ExprResult &LHS, ExprResult &RHS, QualType LHSType, QualType RHSType, bool IsCompAssign) { // The rules for this case are in C99 6.3.1.8 int order = S.Context.getIntegerTypeOrder(LHSType, RHSType); bool LHSSigned = LHSType->hasSignedIntegerRepresentation(); bool RHSSigned = RHSType->hasSignedIntegerRepresentation(); if (LHSSigned == RHSSigned) { // Same signedness; use the higher-ranked type if (order >= 0) { RHS = (*doRHSCast)(S, RHS.get(), LHSType); return LHSType; } else if (!IsCompAssign) LHS = (*doLHSCast)(S, LHS.get(), RHSType); return RHSType; } else if (order != (LHSSigned ? 1 : -1)) { // The unsigned type has greater than or equal rank to the // signed type, so use the unsigned type if (RHSSigned) { RHS = (*doRHSCast)(S, RHS.get(), LHSType); return LHSType; } else if (!IsCompAssign) LHS = (*doLHSCast)(S, LHS.get(), RHSType); return RHSType; } else if (S.Context.getIntWidth(LHSType) != S.Context.getIntWidth(RHSType)) { // The two types are different widths; if we are here, that // means the signed type is larger than the unsigned type, so // use the signed type. if (LHSSigned) { RHS = (*doRHSCast)(S, RHS.get(), LHSType); return LHSType; } else if (!IsCompAssign) LHS = (*doLHSCast)(S, LHS.get(), RHSType); return RHSType; } else { // The signed type is higher-ranked than the unsigned type, // but isn't actually any bigger (like unsigned int and long // on most 32-bit systems). Use the unsigned type corresponding // to the signed type. QualType result = S.Context.getCorrespondingUnsignedType(LHSSigned ? LHSType : RHSType); RHS = (*doRHSCast)(S, RHS.get(), result); if (!IsCompAssign) LHS = (*doLHSCast)(S, LHS.get(), result); return result; } } /// Handle conversions with GCC complex int extension. Helper function /// of UsualArithmeticConversions() static QualType handleComplexIntConversion(Sema &S, ExprResult &LHS, ExprResult &RHS, QualType LHSType, QualType RHSType, bool IsCompAssign) { const ComplexType *LHSComplexInt = LHSType->getAsComplexIntegerType(); const ComplexType *RHSComplexInt = RHSType->getAsComplexIntegerType(); if (LHSComplexInt && RHSComplexInt) { QualType LHSEltType = LHSComplexInt->getElementType(); QualType RHSEltType = RHSComplexInt->getElementType(); QualType ScalarType = handleIntegerConversion (S, LHS, RHS, LHSEltType, RHSEltType, IsCompAssign); return S.Context.getComplexType(ScalarType); } if (LHSComplexInt) { QualType LHSEltType = LHSComplexInt->getElementType(); QualType ScalarType = handleIntegerConversion (S, LHS, RHS, LHSEltType, RHSType, IsCompAssign); QualType ComplexType = S.Context.getComplexType(ScalarType); RHS = S.ImpCastExprToType(RHS.get(), ComplexType, CK_IntegralRealToComplex); return ComplexType; } assert(RHSComplexInt); QualType RHSEltType = RHSComplexInt->getElementType(); QualType ScalarType = handleIntegerConversion (S, LHS, RHS, LHSType, RHSEltType, IsCompAssign); QualType ComplexType = S.Context.getComplexType(ScalarType); if (!IsCompAssign) LHS = S.ImpCastExprToType(LHS.get(), ComplexType, CK_IntegralRealToComplex); return ComplexType; } /// UsualArithmeticConversions - Performs various conversions that are common to /// binary operators (C99 6.3.1.8). If both operands aren't arithmetic, this /// routine returns the first non-arithmetic type found. The client is /// responsible for emitting appropriate error diagnostics. QualType Sema::UsualArithmeticConversions(ExprResult &LHS, ExprResult &RHS, bool IsCompAssign) { if (!IsCompAssign) { LHS = UsualUnaryConversions(LHS.get()); if (LHS.isInvalid()) return QualType(); } RHS = UsualUnaryConversions(RHS.get()); if (RHS.isInvalid()) return QualType(); // For conversion purposes, we ignore any qualifiers. // For example, "const float" and "float" are equivalent. QualType LHSType = Context.getCanonicalType(LHS.get()->getType()).getUnqualifiedType(); QualType RHSType = Context.getCanonicalType(RHS.get()->getType()).getUnqualifiedType(); // For conversion purposes, we ignore any atomic qualifier on the LHS. if (const AtomicType *AtomicLHS = LHSType->getAs()) LHSType = AtomicLHS->getValueType(); // If both types are identical, no conversion is needed. if (LHSType == RHSType) return LHSType; // If either side is a non-arithmetic type (e.g. a pointer), we are done. // The caller can deal with this (e.g. pointer + int). if (!LHSType->isArithmeticType() || !RHSType->isArithmeticType()) return QualType(); // Apply unary and bitfield promotions to the LHS's type. QualType LHSUnpromotedType = LHSType; if (LHSType->isPromotableIntegerType()) LHSType = Context.getPromotedIntegerType(LHSType); QualType LHSBitfieldPromoteTy = Context.isPromotableBitField(LHS.get()); if (!LHSBitfieldPromoteTy.isNull()) LHSType = LHSBitfieldPromoteTy; if (LHSType != LHSUnpromotedType && !IsCompAssign) LHS = ImpCastExprToType(LHS.get(), LHSType, CK_IntegralCast); // If both types are identical, no conversion is needed. if (LHSType == RHSType) return LHSType; // At this point, we have two different arithmetic types. // Diagnose attempts to convert between __float128 and long double where // such conversions currently can't be handled. if (unsupportedTypeConversion(*this, LHSType, RHSType)) return QualType(); // Handle complex types first (C99 6.3.1.8p1). if (LHSType->isComplexType() || RHSType->isComplexType()) return handleComplexFloatConversion(*this, LHS, RHS, LHSType, RHSType, IsCompAssign); // Now handle "real" floating types (i.e. float, double, long double). if (LHSType->isRealFloatingType() || RHSType->isRealFloatingType()) return handleFloatConversion(*this, LHS, RHS, LHSType, RHSType, IsCompAssign); // Handle GCC complex int extension. if (LHSType->isComplexIntegerType() || RHSType->isComplexIntegerType()) return handleComplexIntConversion(*this, LHS, RHS, LHSType, RHSType, IsCompAssign); // Finally, we have two differing integer types. return handleIntegerConversion (*this, LHS, RHS, LHSType, RHSType, IsCompAssign); } //===----------------------------------------------------------------------===// // Semantic Analysis for various Expression Types //===----------------------------------------------------------------------===// ExprResult Sema::ActOnGenericSelectionExpr(SourceLocation KeyLoc, SourceLocation DefaultLoc, SourceLocation RParenLoc, Expr *ControllingExpr, ArrayRef ArgTypes, ArrayRef ArgExprs) { unsigned NumAssocs = ArgTypes.size(); assert(NumAssocs == ArgExprs.size()); TypeSourceInfo **Types = new TypeSourceInfo*[NumAssocs]; for (unsigned i = 0; i < NumAssocs; ++i) { if (ArgTypes[i]) (void) GetTypeFromParser(ArgTypes[i], &Types[i]); else Types[i] = nullptr; } ExprResult ER = CreateGenericSelectionExpr(KeyLoc, DefaultLoc, RParenLoc, ControllingExpr, llvm::makeArrayRef(Types, NumAssocs), ArgExprs); delete [] Types; return ER; } ExprResult Sema::CreateGenericSelectionExpr(SourceLocation KeyLoc, SourceLocation DefaultLoc, SourceLocation RParenLoc, Expr *ControllingExpr, ArrayRef Types, ArrayRef Exprs) { unsigned NumAssocs = Types.size(); assert(NumAssocs == Exprs.size()); // Decay and strip qualifiers for the controlling expression type, and handle // placeholder type replacement. See committee discussion from WG14 DR423. { EnterExpressionEvaluationContext Unevaluated( *this, Sema::ExpressionEvaluationContext::Unevaluated); ExprResult R = DefaultFunctionArrayLvalueConversion(ControllingExpr); if (R.isInvalid()) return ExprError(); ControllingExpr = R.get(); } // The controlling expression is an unevaluated operand, so side effects are // likely unintended. if (!inTemplateInstantiation() && ControllingExpr->HasSideEffects(Context, false)) Diag(ControllingExpr->getExprLoc(), diag::warn_side_effects_unevaluated_context); bool TypeErrorFound = false, IsResultDependent = ControllingExpr->isTypeDependent(), ContainsUnexpandedParameterPack = ControllingExpr->containsUnexpandedParameterPack(); for (unsigned i = 0; i < NumAssocs; ++i) { if (Exprs[i]->containsUnexpandedParameterPack()) ContainsUnexpandedParameterPack = true; if (Types[i]) { if (Types[i]->getType()->containsUnexpandedParameterPack()) ContainsUnexpandedParameterPack = true; if (Types[i]->getType()->isDependentType()) { IsResultDependent = true; } else { // C11 6.5.1.1p2 "The type name in a generic association shall specify a // complete object type other than a variably modified type." unsigned D = 0; if (Types[i]->getType()->isIncompleteType()) D = diag::err_assoc_type_incomplete; else if (!Types[i]->getType()->isObjectType()) D = diag::err_assoc_type_nonobject; else if (Types[i]->getType()->isVariablyModifiedType()) D = diag::err_assoc_type_variably_modified; if (D != 0) { Diag(Types[i]->getTypeLoc().getBeginLoc(), D) << Types[i]->getTypeLoc().getSourceRange() << Types[i]->getType(); TypeErrorFound = true; } // C11 6.5.1.1p2 "No two generic associations in the same generic // selection shall specify compatible types." for (unsigned j = i+1; j < NumAssocs; ++j) if (Types[j] && !Types[j]->getType()->isDependentType() && Context.typesAreCompatible(Types[i]->getType(), Types[j]->getType())) { Diag(Types[j]->getTypeLoc().getBeginLoc(), diag::err_assoc_compatible_types) << Types[j]->getTypeLoc().getSourceRange() << Types[j]->getType() << Types[i]->getType(); Diag(Types[i]->getTypeLoc().getBeginLoc(), diag::note_compat_assoc) << Types[i]->getTypeLoc().getSourceRange() << Types[i]->getType(); TypeErrorFound = true; } } } } if (TypeErrorFound) return ExprError(); // If we determined that the generic selection is result-dependent, don't // try to compute the result expression. if (IsResultDependent) return new (Context) GenericSelectionExpr( Context, KeyLoc, ControllingExpr, Types, Exprs, DefaultLoc, RParenLoc, ContainsUnexpandedParameterPack); SmallVector CompatIndices; unsigned DefaultIndex = -1U; for (unsigned i = 0; i < NumAssocs; ++i) { if (!Types[i]) DefaultIndex = i; else if (Context.typesAreCompatible(ControllingExpr->getType(), Types[i]->getType())) CompatIndices.push_back(i); } // C11 6.5.1.1p2 "The controlling expression of a generic selection shall have // type compatible with at most one of the types named in its generic // association list." if (CompatIndices.size() > 1) { // We strip parens here because the controlling expression is typically // parenthesized in macro definitions. ControllingExpr = ControllingExpr->IgnoreParens(); Diag(ControllingExpr->getBeginLoc(), diag::err_generic_sel_multi_match) << ControllingExpr->getSourceRange() << ControllingExpr->getType() << (unsigned)CompatIndices.size(); for (unsigned I : CompatIndices) { Diag(Types[I]->getTypeLoc().getBeginLoc(), diag::note_compat_assoc) << Types[I]->getTypeLoc().getSourceRange() << Types[I]->getType(); } return ExprError(); } // C11 6.5.1.1p2 "If a generic selection has no default generic association, // its controlling expression shall have type compatible with exactly one of // the types named in its generic association list." if (DefaultIndex == -1U && CompatIndices.size() == 0) { // We strip parens here because the controlling expression is typically // parenthesized in macro definitions. ControllingExpr = ControllingExpr->IgnoreParens(); Diag(ControllingExpr->getBeginLoc(), diag::err_generic_sel_no_match) << ControllingExpr->getSourceRange() << ControllingExpr->getType(); return ExprError(); } // C11 6.5.1.1p3 "If a generic selection has a generic association with a // type name that is compatible with the type of the controlling expression, // then the result expression of the generic selection is the expression // in that generic association. Otherwise, the result expression of the // generic selection is the expression in the default generic association." unsigned ResultIndex = CompatIndices.size() ? CompatIndices[0] : DefaultIndex; return new (Context) GenericSelectionExpr( Context, KeyLoc, ControllingExpr, Types, Exprs, DefaultLoc, RParenLoc, ContainsUnexpandedParameterPack, ResultIndex); } /// getUDSuffixLoc - Create a SourceLocation for a ud-suffix, given the /// location of the token and the offset of the ud-suffix within it. static SourceLocation getUDSuffixLoc(Sema &S, SourceLocation TokLoc, unsigned Offset) { return Lexer::AdvanceToTokenCharacter(TokLoc, Offset, S.getSourceManager(), S.getLangOpts()); } /// BuildCookedLiteralOperatorCall - A user-defined literal was found. Look up /// the corresponding cooked (non-raw) literal operator, and build a call to it. static ExprResult BuildCookedLiteralOperatorCall(Sema &S, Scope *Scope, IdentifierInfo *UDSuffix, SourceLocation UDSuffixLoc, ArrayRef Args, SourceLocation LitEndLoc) { assert(Args.size() <= 2 && "too many arguments for literal operator"); QualType ArgTy[2]; for (unsigned ArgIdx = 0; ArgIdx != Args.size(); ++ArgIdx) { ArgTy[ArgIdx] = Args[ArgIdx]->getType(); if (ArgTy[ArgIdx]->isArrayType()) ArgTy[ArgIdx] = S.Context.getArrayDecayedType(ArgTy[ArgIdx]); } DeclarationName OpName = S.Context.DeclarationNames.getCXXLiteralOperatorName(UDSuffix); DeclarationNameInfo OpNameInfo(OpName, UDSuffixLoc); OpNameInfo.setCXXLiteralOperatorNameLoc(UDSuffixLoc); LookupResult R(S, OpName, UDSuffixLoc, Sema::LookupOrdinaryName); if (S.LookupLiteralOperator(Scope, R, llvm::makeArrayRef(ArgTy, Args.size()), /*AllowRaw*/ false, /*AllowTemplate*/ false, /*AllowStringTemplate*/ false, /*DiagnoseMissing*/ true) == Sema::LOLR_Error) return ExprError(); return S.BuildLiteralOperatorCall(R, OpNameInfo, Args, LitEndLoc); } /// ActOnStringLiteral - The specified tokens were lexed as pasted string /// fragments (e.g. "foo" "bar" L"baz"). The result string has to handle string /// concatenation ([C99 5.1.1.2, translation phase #6]), so it may come from /// multiple tokens. However, the common case is that StringToks points to one /// string. /// ExprResult Sema::ActOnStringLiteral(ArrayRef StringToks, Scope *UDLScope) { assert(!StringToks.empty() && "Must have at least one string!"); StringLiteralParser Literal(StringToks, PP); if (Literal.hadError) return ExprError(); SmallVector StringTokLocs; for (const Token &Tok : StringToks) StringTokLocs.push_back(Tok.getLocation()); QualType CharTy = Context.CharTy; StringLiteral::StringKind Kind = StringLiteral::Ascii; if (Literal.isWide()) { CharTy = Context.getWideCharType(); Kind = StringLiteral::Wide; } else if (Literal.isUTF8()) { if (getLangOpts().Char8) CharTy = Context.Char8Ty; Kind = StringLiteral::UTF8; } else if (Literal.isUTF16()) { CharTy = Context.Char16Ty; Kind = StringLiteral::UTF16; } else if (Literal.isUTF32()) { CharTy = Context.Char32Ty; Kind = StringLiteral::UTF32; } else if (Literal.isPascal()) { CharTy = Context.UnsignedCharTy; } // Warn on initializing an array of char from a u8 string literal; this // becomes ill-formed in C++2a. if (getLangOpts().CPlusPlus && !getLangOpts().CPlusPlus2a && !getLangOpts().Char8 && Kind == StringLiteral::UTF8) { Diag(StringTokLocs.front(), diag::warn_cxx2a_compat_utf8_string); // Create removals for all 'u8' prefixes in the string literal(s). This // ensures C++2a compatibility (but may change the program behavior when // built by non-Clang compilers for which the execution character set is // not always UTF-8). auto RemovalDiag = PDiag(diag::note_cxx2a_compat_utf8_string_remove_u8); SourceLocation RemovalDiagLoc; for (const Token &Tok : StringToks) { if (Tok.getKind() == tok::utf8_string_literal) { if (RemovalDiagLoc.isInvalid()) RemovalDiagLoc = Tok.getLocation(); RemovalDiag << FixItHint::CreateRemoval(CharSourceRange::getCharRange( Tok.getLocation(), Lexer::AdvanceToTokenCharacter(Tok.getLocation(), 2, getSourceManager(), getLangOpts()))); } } Diag(RemovalDiagLoc, RemovalDiag); } QualType CharTyConst = CharTy; // A C++ string literal has a const-qualified element type (C++ 2.13.4p1). if (getLangOpts().CPlusPlus || getLangOpts().ConstStrings) CharTyConst.addConst(); CharTyConst = Context.adjustStringLiteralBaseType(CharTyConst); // Get an array type for the string, according to C99 6.4.5. This includes // the nul terminator character as well as the string length for pascal // strings. QualType StrTy = Context.getConstantArrayType( CharTyConst, llvm::APInt(32, Literal.GetNumStringChars() + 1), ArrayType::Normal, 0); // Pass &StringTokLocs[0], StringTokLocs.size() to factory! StringLiteral *Lit = StringLiteral::Create(Context, Literal.GetString(), Kind, Literal.Pascal, StrTy, &StringTokLocs[0], StringTokLocs.size()); if (Literal.getUDSuffix().empty()) return Lit; // We're building a user-defined literal. IdentifierInfo *UDSuffix = &Context.Idents.get(Literal.getUDSuffix()); SourceLocation UDSuffixLoc = getUDSuffixLoc(*this, StringTokLocs[Literal.getUDSuffixToken()], Literal.getUDSuffixOffset()); // Make sure we're allowed user-defined literals here. if (!UDLScope) return ExprError(Diag(UDSuffixLoc, diag::err_invalid_string_udl)); // C++11 [lex.ext]p5: The literal L is treated as a call of the form // operator "" X (str, len) QualType SizeType = Context.getSizeType(); DeclarationName OpName = Context.DeclarationNames.getCXXLiteralOperatorName(UDSuffix); DeclarationNameInfo OpNameInfo(OpName, UDSuffixLoc); OpNameInfo.setCXXLiteralOperatorNameLoc(UDSuffixLoc); QualType ArgTy[] = { Context.getArrayDecayedType(StrTy), SizeType }; LookupResult R(*this, OpName, UDSuffixLoc, LookupOrdinaryName); switch (LookupLiteralOperator(UDLScope, R, ArgTy, /*AllowRaw*/ false, /*AllowTemplate*/ false, /*AllowStringTemplate*/ true, /*DiagnoseMissing*/ true)) { case LOLR_Cooked: { llvm::APInt Len(Context.getIntWidth(SizeType), Literal.GetNumStringChars()); IntegerLiteral *LenArg = IntegerLiteral::Create(Context, Len, SizeType, StringTokLocs[0]); Expr *Args[] = { Lit, LenArg }; return BuildLiteralOperatorCall(R, OpNameInfo, Args, StringTokLocs.back()); } case LOLR_StringTemplate: { TemplateArgumentListInfo ExplicitArgs; unsigned CharBits = Context.getIntWidth(CharTy); bool CharIsUnsigned = CharTy->isUnsignedIntegerType(); llvm::APSInt Value(CharBits, CharIsUnsigned); TemplateArgument TypeArg(CharTy); TemplateArgumentLocInfo TypeArgInfo(Context.getTrivialTypeSourceInfo(CharTy)); ExplicitArgs.addArgument(TemplateArgumentLoc(TypeArg, TypeArgInfo)); for (unsigned I = 0, N = Lit->getLength(); I != N; ++I) { Value = Lit->getCodeUnit(I); TemplateArgument Arg(Context, Value, CharTy); TemplateArgumentLocInfo ArgInfo; ExplicitArgs.addArgument(TemplateArgumentLoc(Arg, ArgInfo)); } return BuildLiteralOperatorCall(R, OpNameInfo, None, StringTokLocs.back(), &ExplicitArgs); } case LOLR_Raw: case LOLR_Template: case LOLR_ErrorNoDiagnostic: llvm_unreachable("unexpected literal operator lookup result"); case LOLR_Error: return ExprError(); } llvm_unreachable("unexpected literal operator lookup result"); } ExprResult Sema::BuildDeclRefExpr(ValueDecl *D, QualType Ty, ExprValueKind VK, SourceLocation Loc, const CXXScopeSpec *SS) { DeclarationNameInfo NameInfo(D->getDeclName(), Loc); return BuildDeclRefExpr(D, Ty, VK, NameInfo, SS); } /// BuildDeclRefExpr - Build an expression that references a /// declaration that does not require a closure capture. ExprResult Sema::BuildDeclRefExpr(ValueDecl *D, QualType Ty, ExprValueKind VK, const DeclarationNameInfo &NameInfo, const CXXScopeSpec *SS, NamedDecl *FoundD, const TemplateArgumentListInfo *TemplateArgs) { bool RefersToCapturedVariable = isa(D) && NeedToCaptureVariable(cast(D), NameInfo.getLoc()); DeclRefExpr *E; if (isa(D)) { VarTemplateSpecializationDecl *VarSpec = cast(D); E = DeclRefExpr::Create(Context, SS ? SS->getWithLocInContext(Context) : NestedNameSpecifierLoc(), VarSpec->getTemplateKeywordLoc(), D, RefersToCapturedVariable, NameInfo.getLoc(), Ty, VK, FoundD, TemplateArgs); } else { assert(!TemplateArgs && "No template arguments for non-variable" " template specialization references"); E = DeclRefExpr::Create(Context, SS ? SS->getWithLocInContext(Context) : NestedNameSpecifierLoc(), SourceLocation(), D, RefersToCapturedVariable, NameInfo, Ty, VK, FoundD); } MarkDeclRefReferenced(E); if (getLangOpts().ObjCWeak && isa(D) && Ty.getObjCLifetime() == Qualifiers::OCL_Weak && !isUnevaluatedContext() && !Diags.isIgnored(diag::warn_arc_repeated_use_of_weak, E->getBeginLoc())) getCurFunction()->recordUseOfWeak(E); FieldDecl *FD = dyn_cast(D); if (IndirectFieldDecl *IFD = dyn_cast(D)) FD = IFD->getAnonField(); if (FD) { UnusedPrivateFields.remove(FD); // Just in case we're building an illegal pointer-to-member. if (FD->isBitField()) E->setObjectKind(OK_BitField); } // C++ [expr.prim]/8: The expression [...] is a bit-field if the identifier // designates a bit-field. if (auto *BD = dyn_cast(D)) if (auto *BE = BD->getBinding()) E->setObjectKind(BE->getObjectKind()); return E; } /// Decomposes the given name into a DeclarationNameInfo, its location, and /// possibly a list of template arguments. /// /// If this produces template arguments, it is permitted to call /// DecomposeTemplateName. /// /// This actually loses a lot of source location information for /// non-standard name kinds; we should consider preserving that in /// some way. void Sema::DecomposeUnqualifiedId(const UnqualifiedId &Id, TemplateArgumentListInfo &Buffer, DeclarationNameInfo &NameInfo, const TemplateArgumentListInfo *&TemplateArgs) { if (Id.getKind() == UnqualifiedIdKind::IK_TemplateId) { Buffer.setLAngleLoc(Id.TemplateId->LAngleLoc); Buffer.setRAngleLoc(Id.TemplateId->RAngleLoc); ASTTemplateArgsPtr TemplateArgsPtr(Id.TemplateId->getTemplateArgs(), Id.TemplateId->NumArgs); translateTemplateArguments(TemplateArgsPtr, Buffer); TemplateName TName = Id.TemplateId->Template.get(); SourceLocation TNameLoc = Id.TemplateId->TemplateNameLoc; NameInfo = Context.getNameForTemplate(TName, TNameLoc); TemplateArgs = &Buffer; } else { NameInfo = GetNameFromUnqualifiedId(Id); TemplateArgs = nullptr; } } static void emitEmptyLookupTypoDiagnostic( const TypoCorrection &TC, Sema &SemaRef, const CXXScopeSpec &SS, DeclarationName Typo, SourceLocation TypoLoc, ArrayRef Args, unsigned DiagnosticID, unsigned DiagnosticSuggestID) { DeclContext *Ctx = SS.isEmpty() ? nullptr : SemaRef.computeDeclContext(SS, false); if (!TC) { // Emit a special diagnostic for failed member lookups. // FIXME: computing the declaration context might fail here (?) if (Ctx) SemaRef.Diag(TypoLoc, diag::err_no_member) << Typo << Ctx << SS.getRange(); else SemaRef.Diag(TypoLoc, DiagnosticID) << Typo; return; } std::string CorrectedStr = TC.getAsString(SemaRef.getLangOpts()); bool DroppedSpecifier = TC.WillReplaceSpecifier() && Typo.getAsString() == CorrectedStr; unsigned NoteID = TC.getCorrectionDeclAs() ? diag::note_implicit_param_decl : diag::note_previous_decl; if (!Ctx) SemaRef.diagnoseTypo(TC, SemaRef.PDiag(DiagnosticSuggestID) << Typo, SemaRef.PDiag(NoteID)); else SemaRef.diagnoseTypo(TC, SemaRef.PDiag(diag::err_no_member_suggest) << Typo << Ctx << DroppedSpecifier << SS.getRange(), SemaRef.PDiag(NoteID)); } /// Diagnose an empty lookup. /// /// \return false if new lookup candidates were found bool Sema::DiagnoseEmptyLookup(Scope *S, CXXScopeSpec &SS, LookupResult &R, std::unique_ptr CCC, TemplateArgumentListInfo *ExplicitTemplateArgs, ArrayRef Args, TypoExpr **Out) { DeclarationName Name = R.getLookupName(); unsigned diagnostic = diag::err_undeclared_var_use; unsigned diagnostic_suggest = diag::err_undeclared_var_use_suggest; if (Name.getNameKind() == DeclarationName::CXXOperatorName || Name.getNameKind() == DeclarationName::CXXLiteralOperatorName || Name.getNameKind() == DeclarationName::CXXConversionFunctionName) { diagnostic = diag::err_undeclared_use; diagnostic_suggest = diag::err_undeclared_use_suggest; } // If the original lookup was an unqualified lookup, fake an // unqualified lookup. This is useful when (for example) the // original lookup would not have found something because it was a // dependent name. DeclContext *DC = SS.isEmpty() ? CurContext : nullptr; while (DC) { if (isa(DC)) { LookupQualifiedName(R, DC); if (!R.empty()) { // Don't give errors about ambiguities in this lookup. R.suppressDiagnostics(); // During a default argument instantiation the CurContext points // to a CXXMethodDecl; but we can't apply a this-> fixit inside a // function parameter list, hence add an explicit check. bool isDefaultArgument = !CodeSynthesisContexts.empty() && CodeSynthesisContexts.back().Kind == CodeSynthesisContext::DefaultFunctionArgumentInstantiation; CXXMethodDecl *CurMethod = dyn_cast(CurContext); bool isInstance = CurMethod && CurMethod->isInstance() && DC == CurMethod->getParent() && !isDefaultArgument; // Give a code modification hint to insert 'this->'. // TODO: fixit for inserting 'Base::' in the other cases. // Actually quite difficult! if (getLangOpts().MSVCCompat) diagnostic = diag::ext_found_via_dependent_bases_lookup; if (isInstance) { Diag(R.getNameLoc(), diagnostic) << Name << FixItHint::CreateInsertion(R.getNameLoc(), "this->"); CheckCXXThisCapture(R.getNameLoc()); } else { Diag(R.getNameLoc(), diagnostic) << Name; } // Do we really want to note all of these? for (NamedDecl *D : R) Diag(D->getLocation(), diag::note_dependent_var_use); // Return true if we are inside a default argument instantiation // and the found name refers to an instance member function, otherwise // the function calling DiagnoseEmptyLookup will try to create an // implicit member call and this is wrong for default argument. if (isDefaultArgument && ((*R.begin())->isCXXInstanceMember())) { Diag(R.getNameLoc(), diag::err_member_call_without_object); return true; } // Tell the callee to try to recover. return false; } R.clear(); } // In Microsoft mode, if we are performing lookup from within a friend // function definition declared at class scope then we must set // DC to the lexical parent to be able to search into the parent // class. if (getLangOpts().MSVCCompat && isa(DC) && cast(DC)->getFriendObjectKind() && DC->getLexicalParent()->isRecord()) DC = DC->getLexicalParent(); else DC = DC->getParent(); } // We didn't find anything, so try to correct for a typo. TypoCorrection Corrected; if (S && Out) { SourceLocation TypoLoc = R.getNameLoc(); assert(!ExplicitTemplateArgs && "Diagnosing an empty lookup with explicit template args!"); *Out = CorrectTypoDelayed( R.getLookupNameInfo(), R.getLookupKind(), S, &SS, std::move(CCC), [=](const TypoCorrection &TC) { emitEmptyLookupTypoDiagnostic(TC, *this, SS, Name, TypoLoc, Args, diagnostic, diagnostic_suggest); }, nullptr, CTK_ErrorRecovery); if (*Out) return true; } else if (S && (Corrected = CorrectTypo(R.getLookupNameInfo(), R.getLookupKind(), S, &SS, std::move(CCC), CTK_ErrorRecovery))) { std::string CorrectedStr(Corrected.getAsString(getLangOpts())); bool DroppedSpecifier = Corrected.WillReplaceSpecifier() && Name.getAsString() == CorrectedStr; R.setLookupName(Corrected.getCorrection()); bool AcceptableWithRecovery = false; bool AcceptableWithoutRecovery = false; NamedDecl *ND = Corrected.getFoundDecl(); if (ND) { if (Corrected.isOverloaded()) { OverloadCandidateSet OCS(R.getNameLoc(), OverloadCandidateSet::CSK_Normal); OverloadCandidateSet::iterator Best; for (NamedDecl *CD : Corrected) { if (FunctionTemplateDecl *FTD = dyn_cast(CD)) AddTemplateOverloadCandidate( FTD, DeclAccessPair::make(FTD, AS_none), ExplicitTemplateArgs, Args, OCS); else if (FunctionDecl *FD = dyn_cast(CD)) if (!ExplicitTemplateArgs || ExplicitTemplateArgs->size() == 0) AddOverloadCandidate(FD, DeclAccessPair::make(FD, AS_none), Args, OCS); } switch (OCS.BestViableFunction(*this, R.getNameLoc(), Best)) { case OR_Success: ND = Best->FoundDecl; Corrected.setCorrectionDecl(ND); break; default: // FIXME: Arbitrarily pick the first declaration for the note. Corrected.setCorrectionDecl(ND); break; } } R.addDecl(ND); if (getLangOpts().CPlusPlus && ND->isCXXClassMember()) { CXXRecordDecl *Record = nullptr; if (Corrected.getCorrectionSpecifier()) { const Type *Ty = Corrected.getCorrectionSpecifier()->getAsType(); Record = Ty->getAsCXXRecordDecl(); } if (!Record) Record = cast( ND->getDeclContext()->getRedeclContext()); R.setNamingClass(Record); } auto *UnderlyingND = ND->getUnderlyingDecl(); AcceptableWithRecovery = isa(UnderlyingND) || isa(UnderlyingND); // FIXME: If we ended up with a typo for a type name or // Objective-C class name, we're in trouble because the parser // is in the wrong place to recover. Suggest the typo // correction, but don't make it a fix-it since we're not going // to recover well anyway. AcceptableWithoutRecovery = isa(UnderlyingND) || isa(UnderlyingND); } else { // FIXME: We found a keyword. Suggest it, but don't provide a fix-it // because we aren't able to recover. AcceptableWithoutRecovery = true; } if (AcceptableWithRecovery || AcceptableWithoutRecovery) { unsigned NoteID = Corrected.getCorrectionDeclAs() ? diag::note_implicit_param_decl : diag::note_previous_decl; if (SS.isEmpty()) diagnoseTypo(Corrected, PDiag(diagnostic_suggest) << Name, PDiag(NoteID), AcceptableWithRecovery); else diagnoseTypo(Corrected, PDiag(diag::err_no_member_suggest) << Name << computeDeclContext(SS, false) << DroppedSpecifier << SS.getRange(), PDiag(NoteID), AcceptableWithRecovery); // Tell the callee whether to try to recover. return !AcceptableWithRecovery; } } R.clear(); // Emit a special diagnostic for failed member lookups. // FIXME: computing the declaration context might fail here (?) if (!SS.isEmpty()) { Diag(R.getNameLoc(), diag::err_no_member) << Name << computeDeclContext(SS, false) << SS.getRange(); return true; } // Give up, we can't recover. Diag(R.getNameLoc(), diagnostic) << Name; return true; } /// In Microsoft mode, if we are inside a template class whose parent class has /// dependent base classes, and we can't resolve an unqualified identifier, then /// assume the identifier is a member of a dependent base class. We can only /// recover successfully in static methods, instance methods, and other contexts /// where 'this' is available. This doesn't precisely match MSVC's /// instantiation model, but it's close enough. static Expr * recoverFromMSUnqualifiedLookup(Sema &S, ASTContext &Context, DeclarationNameInfo &NameInfo, SourceLocation TemplateKWLoc, const TemplateArgumentListInfo *TemplateArgs) { // Only try to recover from lookup into dependent bases in static methods or // contexts where 'this' is available. QualType ThisType = S.getCurrentThisType(); const CXXRecordDecl *RD = nullptr; if (!ThisType.isNull()) RD = ThisType->getPointeeType()->getAsCXXRecordDecl(); else if (auto *MD = dyn_cast(S.CurContext)) RD = MD->getParent(); if (!RD || !RD->hasAnyDependentBases()) return nullptr; // Diagnose this as unqualified lookup into a dependent base class. If 'this' // is available, suggest inserting 'this->' as a fixit. SourceLocation Loc = NameInfo.getLoc(); auto DB = S.Diag(Loc, diag::ext_undeclared_unqual_id_with_dependent_base); DB << NameInfo.getName() << RD; if (!ThisType.isNull()) { DB << FixItHint::CreateInsertion(Loc, "this->"); return CXXDependentScopeMemberExpr::Create( Context, /*This=*/nullptr, ThisType, /*IsArrow=*/true, /*Op=*/SourceLocation(), NestedNameSpecifierLoc(), TemplateKWLoc, /*FirstQualifierInScope=*/nullptr, NameInfo, TemplateArgs); } // Synthesize a fake NNS that points to the derived class. This will // perform name lookup during template instantiation. CXXScopeSpec SS; auto *NNS = NestedNameSpecifier::Create(Context, nullptr, true, RD->getTypeForDecl()); SS.MakeTrivial(Context, NNS, SourceRange(Loc, Loc)); return DependentScopeDeclRefExpr::Create( Context, SS.getWithLocInContext(Context), TemplateKWLoc, NameInfo, TemplateArgs); } ExprResult Sema::ActOnIdExpression(Scope *S, CXXScopeSpec &SS, SourceLocation TemplateKWLoc, UnqualifiedId &Id, bool HasTrailingLParen, bool IsAddressOfOperand, std::unique_ptr CCC, bool IsInlineAsmIdentifier, Token *KeywordReplacement) { assert(!(IsAddressOfOperand && HasTrailingLParen) && "cannot be direct & operand and have a trailing lparen"); if (SS.isInvalid()) return ExprError(); TemplateArgumentListInfo TemplateArgsBuffer; // Decompose the UnqualifiedId into the following data. DeclarationNameInfo NameInfo; const TemplateArgumentListInfo *TemplateArgs; DecomposeUnqualifiedId(Id, TemplateArgsBuffer, NameInfo, TemplateArgs); DeclarationName Name = NameInfo.getName(); IdentifierInfo *II = Name.getAsIdentifierInfo(); SourceLocation NameLoc = NameInfo.getLoc(); if (II && II->isEditorPlaceholder()) { // FIXME: When typed placeholders are supported we can create a typed // placeholder expression node. return ExprError(); } // C++ [temp.dep.expr]p3: // An id-expression is type-dependent if it contains: // -- an identifier that was declared with a dependent type, // (note: handled after lookup) // -- a template-id that is dependent, // (note: handled in BuildTemplateIdExpr) // -- a conversion-function-id that specifies a dependent type, // -- a nested-name-specifier that contains a class-name that // names a dependent type. // Determine whether this is a member of an unknown specialization; // we need to handle these differently. bool DependentID = false; if (Name.getNameKind() == DeclarationName::CXXConversionFunctionName && Name.getCXXNameType()->isDependentType()) { DependentID = true; } else if (SS.isSet()) { if (DeclContext *DC = computeDeclContext(SS, false)) { if (RequireCompleteDeclContext(SS, DC)) return ExprError(); } else { DependentID = true; } } if (DependentID) return ActOnDependentIdExpression(SS, TemplateKWLoc, NameInfo, IsAddressOfOperand, TemplateArgs); // Perform the required lookup. LookupResult R(*this, NameInfo, (Id.getKind() == UnqualifiedIdKind::IK_ImplicitSelfParam) ? LookupObjCImplicitSelfParam : LookupOrdinaryName); if (TemplateKWLoc.isValid() || TemplateArgs) { // Lookup the template name again to correctly establish the context in // which it was found. This is really unfortunate as we already did the // lookup to determine that it was a template name in the first place. If // this becomes a performance hit, we can work harder to preserve those // results until we get here but it's likely not worth it. bool MemberOfUnknownSpecialization; if (LookupTemplateName(R, S, SS, QualType(), /*EnteringContext=*/false, MemberOfUnknownSpecialization, TemplateKWLoc)) return ExprError(); if (MemberOfUnknownSpecialization || (R.getResultKind() == LookupResult::NotFoundInCurrentInstantiation)) return ActOnDependentIdExpression(SS, TemplateKWLoc, NameInfo, IsAddressOfOperand, TemplateArgs); } else { bool IvarLookupFollowUp = II && !SS.isSet() && getCurMethodDecl(); LookupParsedName(R, S, &SS, !IvarLookupFollowUp); // If the result might be in a dependent base class, this is a dependent // id-expression. if (R.getResultKind() == LookupResult::NotFoundInCurrentInstantiation) return ActOnDependentIdExpression(SS, TemplateKWLoc, NameInfo, IsAddressOfOperand, TemplateArgs); // If this reference is in an Objective-C method, then we need to do // some special Objective-C lookup, too. if (IvarLookupFollowUp) { ExprResult E(LookupInObjCMethod(R, S, II, true)); if (E.isInvalid()) return ExprError(); if (Expr *Ex = E.getAs()) return Ex; } } if (R.isAmbiguous()) return ExprError(); // This could be an implicitly declared function reference (legal in C90, // extension in C99, forbidden in C++). if (R.empty() && HasTrailingLParen && II && !getLangOpts().CPlusPlus) { NamedDecl *D = ImplicitlyDefineFunction(NameLoc, *II, S); if (D) R.addDecl(D); } // Determine whether this name might be a candidate for // argument-dependent lookup. bool ADL = UseArgumentDependentLookup(SS, R, HasTrailingLParen); if (R.empty() && !ADL) { if (SS.isEmpty() && getLangOpts().MSVCCompat) { if (Expr *E = recoverFromMSUnqualifiedLookup(*this, Context, NameInfo, TemplateKWLoc, TemplateArgs)) return E; } // Don't diagnose an empty lookup for inline assembly. if (IsInlineAsmIdentifier) return ExprError(); // If this name wasn't predeclared and if this is not a function // call, diagnose the problem. TypoExpr *TE = nullptr; auto DefaultValidator = llvm::make_unique( II, SS.isValid() ? SS.getScopeRep() : nullptr); DefaultValidator->IsAddressOfOperand = IsAddressOfOperand; assert((!CCC || CCC->IsAddressOfOperand == IsAddressOfOperand) && "Typo correction callback misconfigured"); if (CCC) { // Make sure the callback knows what the typo being diagnosed is. CCC->setTypoName(II); if (SS.isValid()) CCC->setTypoNNS(SS.getScopeRep()); } // FIXME: DiagnoseEmptyLookup produces bad diagnostics if we're looking for // a template name, but we happen to have always already looked up the name // before we get here if it must be a template name. if (DiagnoseEmptyLookup(S, SS, R, CCC ? std::move(CCC) : std::move(DefaultValidator), nullptr, None, &TE)) { if (TE && KeywordReplacement) { auto &State = getTypoExprState(TE); auto BestTC = State.Consumer->getNextCorrection(); if (BestTC.isKeyword()) { auto *II = BestTC.getCorrectionAsIdentifierInfo(); if (State.DiagHandler) State.DiagHandler(BestTC); KeywordReplacement->startToken(); KeywordReplacement->setKind(II->getTokenID()); KeywordReplacement->setIdentifierInfo(II); KeywordReplacement->setLocation(BestTC.getCorrectionRange().getBegin()); // Clean up the state associated with the TypoExpr, since it has // now been diagnosed (without a call to CorrectDelayedTyposInExpr). clearDelayedTypo(TE); // Signal that a correction to a keyword was performed by returning a // valid-but-null ExprResult. return (Expr*)nullptr; } State.Consumer->resetCorrectionStream(); } return TE ? TE : ExprError(); } assert(!R.empty() && "DiagnoseEmptyLookup returned false but added no results"); // If we found an Objective-C instance variable, let // LookupInObjCMethod build the appropriate expression to // reference the ivar. if (ObjCIvarDecl *Ivar = R.getAsSingle()) { R.clear(); ExprResult E(LookupInObjCMethod(R, S, Ivar->getIdentifier())); // In a hopelessly buggy code, Objective-C instance variable // lookup fails and no expression will be built to reference it. if (!E.isInvalid() && !E.get()) return ExprError(); return E; } } // This is guaranteed from this point on. assert(!R.empty() || ADL); // Check whether this might be a C++ implicit instance member access. // C++ [class.mfct.non-static]p3: // When an id-expression that is not part of a class member access // syntax and not used to form a pointer to member is used in the // body of a non-static member function of class X, if name lookup // resolves the name in the id-expression to a non-static non-type // member of some class C, the id-expression is transformed into a // class member access expression using (*this) as the // postfix-expression to the left of the . operator. // // But we don't actually need to do this for '&' operands if R // resolved to a function or overloaded function set, because the // expression is ill-formed if it actually works out to be a // non-static member function: // // C++ [expr.ref]p4: // Otherwise, if E1.E2 refers to a non-static member function. . . // [t]he expression can be used only as the left-hand operand of a // member function call. // // There are other safeguards against such uses, but it's important // to get this right here so that we don't end up making a // spuriously dependent expression if we're inside a dependent // instance method. if (!R.empty() && (*R.begin())->isCXXClassMember()) { bool MightBeImplicitMember; if (!IsAddressOfOperand) MightBeImplicitMember = true; else if (!SS.isEmpty()) MightBeImplicitMember = false; else if (R.isOverloadedResult()) MightBeImplicitMember = false; else if (R.isUnresolvableResult()) MightBeImplicitMember = true; else MightBeImplicitMember = isa(R.getFoundDecl()) || isa(R.getFoundDecl()) || isa(R.getFoundDecl()); if (MightBeImplicitMember) return BuildPossibleImplicitMemberExpr(SS, TemplateKWLoc, R, TemplateArgs, S); } if (TemplateArgs || TemplateKWLoc.isValid()) { // In C++1y, if this is a variable template id, then check it // in BuildTemplateIdExpr(). // The single lookup result must be a variable template declaration. if (Id.getKind() == UnqualifiedIdKind::IK_TemplateId && Id.TemplateId && Id.TemplateId->Kind == TNK_Var_template) { assert(R.getAsSingle() && "There should only be one declaration found."); } return BuildTemplateIdExpr(SS, TemplateKWLoc, R, ADL, TemplateArgs); } return BuildDeclarationNameExpr(SS, R, ADL); } /// BuildQualifiedDeclarationNameExpr - Build a C++ qualified /// declaration name, generally during template instantiation. /// There's a large number of things which don't need to be done along /// this path. ExprResult Sema::BuildQualifiedDeclarationNameExpr( CXXScopeSpec &SS, const DeclarationNameInfo &NameInfo, bool IsAddressOfOperand, const Scope *S, TypeSourceInfo **RecoveryTSI) { DeclContext *DC = computeDeclContext(SS, false); if (!DC) return BuildDependentDeclRefExpr(SS, /*TemplateKWLoc=*/SourceLocation(), NameInfo, /*TemplateArgs=*/nullptr); if (RequireCompleteDeclContext(SS, DC)) return ExprError(); LookupResult R(*this, NameInfo, LookupOrdinaryName); LookupQualifiedName(R, DC); if (R.isAmbiguous()) return ExprError(); if (R.getResultKind() == LookupResult::NotFoundInCurrentInstantiation) return BuildDependentDeclRefExpr(SS, /*TemplateKWLoc=*/SourceLocation(), NameInfo, /*TemplateArgs=*/nullptr); if (R.empty()) { Diag(NameInfo.getLoc(), diag::err_no_member) << NameInfo.getName() << DC << SS.getRange(); return ExprError(); } if (const TypeDecl *TD = R.getAsSingle()) { // Diagnose a missing typename if this resolved unambiguously to a type in // a dependent context. If we can recover with a type, downgrade this to // a warning in Microsoft compatibility mode. unsigned DiagID = diag::err_typename_missing; if (RecoveryTSI && getLangOpts().MSVCCompat) DiagID = diag::ext_typename_missing; SourceLocation Loc = SS.getBeginLoc(); auto D = Diag(Loc, DiagID); D << SS.getScopeRep() << NameInfo.getName().getAsString() << SourceRange(Loc, NameInfo.getEndLoc()); // Don't recover if the caller isn't expecting us to or if we're in a SFINAE // context. if (!RecoveryTSI) return ExprError(); // Only issue the fixit if we're prepared to recover. D << FixItHint::CreateInsertion(Loc, "typename "); // Recover by pretending this was an elaborated type. QualType Ty = Context.getTypeDeclType(TD); TypeLocBuilder TLB; TLB.pushTypeSpec(Ty).setNameLoc(NameInfo.getLoc()); QualType ET = getElaboratedType(ETK_None, SS, Ty); ElaboratedTypeLoc QTL = TLB.push(ET); QTL.setElaboratedKeywordLoc(SourceLocation()); QTL.setQualifierLoc(SS.getWithLocInContext(Context)); *RecoveryTSI = TLB.getTypeSourceInfo(Context, ET); return ExprEmpty(); } // Defend against this resolving to an implicit member access. We usually // won't get here if this might be a legitimate a class member (we end up in // BuildMemberReferenceExpr instead), but this can be valid if we're forming // a pointer-to-member or in an unevaluated context in C++11. if (!R.empty() && (*R.begin())->isCXXClassMember() && !IsAddressOfOperand) return BuildPossibleImplicitMemberExpr(SS, /*TemplateKWLoc=*/SourceLocation(), R, /*TemplateArgs=*/nullptr, S); return BuildDeclarationNameExpr(SS, R, /* ADL */ false); } /// LookupInObjCMethod - The parser has read a name in, and Sema has /// detected that we're currently inside an ObjC method. Perform some /// additional lookup. /// /// Ideally, most of this would be done by lookup, but there's /// actually quite a lot of extra work involved. /// /// Returns a null sentinel to indicate trivial success. ExprResult Sema::LookupInObjCMethod(LookupResult &Lookup, Scope *S, IdentifierInfo *II, bool AllowBuiltinCreation) { SourceLocation Loc = Lookup.getNameLoc(); ObjCMethodDecl *CurMethod = getCurMethodDecl(); // Check for error condition which is already reported. if (!CurMethod) return ExprError(); // There are two cases to handle here. 1) scoped lookup could have failed, // in which case we should look for an ivar. 2) scoped lookup could have // found a decl, but that decl is outside the current instance method (i.e. // a global variable). In these two cases, we do a lookup for an ivar with // this name, if the lookup sucedes, we replace it our current decl. // If we're in a class method, we don't normally want to look for // ivars. But if we don't find anything else, and there's an // ivar, that's an error. bool IsClassMethod = CurMethod->isClassMethod(); bool LookForIvars; if (Lookup.empty()) LookForIvars = true; else if (IsClassMethod) LookForIvars = false; else LookForIvars = (Lookup.isSingleResult() && Lookup.getFoundDecl()->isDefinedOutsideFunctionOrMethod()); ObjCInterfaceDecl *IFace = nullptr; if (LookForIvars) { IFace = CurMethod->getClassInterface(); ObjCInterfaceDecl *ClassDeclared; ObjCIvarDecl *IV = nullptr; if (IFace && (IV = IFace->lookupInstanceVariable(II, ClassDeclared))) { // Diagnose using an ivar in a class method. if (IsClassMethod) return ExprError(Diag(Loc, diag::err_ivar_use_in_class_method) << IV->getDeclName()); // If we're referencing an invalid decl, just return this as a silent // error node. The error diagnostic was already emitted on the decl. if (IV->isInvalidDecl()) return ExprError(); // Check if referencing a field with __attribute__((deprecated)). if (DiagnoseUseOfDecl(IV, Loc)) return ExprError(); // Diagnose the use of an ivar outside of the declaring class. if (IV->getAccessControl() == ObjCIvarDecl::Private && !declaresSameEntity(ClassDeclared, IFace) && !getLangOpts().DebuggerSupport) Diag(Loc, diag::err_private_ivar_access) << IV->getDeclName(); // FIXME: This should use a new expr for a direct reference, don't // turn this into Self->ivar, just return a BareIVarExpr or something. IdentifierInfo &II = Context.Idents.get("self"); UnqualifiedId SelfName; SelfName.setIdentifier(&II, SourceLocation()); SelfName.setKind(UnqualifiedIdKind::IK_ImplicitSelfParam); CXXScopeSpec SelfScopeSpec; SourceLocation TemplateKWLoc; ExprResult SelfExpr = ActOnIdExpression(S, SelfScopeSpec, TemplateKWLoc, SelfName, false, false); if (SelfExpr.isInvalid()) return ExprError(); SelfExpr = DefaultLvalueConversion(SelfExpr.get()); if (SelfExpr.isInvalid()) return ExprError(); MarkAnyDeclReferenced(Loc, IV, true); ObjCMethodFamily MF = CurMethod->getMethodFamily(); if (MF != OMF_init && MF != OMF_dealloc && MF != OMF_finalize && !IvarBacksCurrentMethodAccessor(IFace, CurMethod, IV)) Diag(Loc, diag::warn_direct_ivar_access) << IV->getDeclName(); ObjCIvarRefExpr *Result = new (Context) ObjCIvarRefExpr(IV, IV->getUsageType(SelfExpr.get()->getType()), Loc, IV->getLocation(), SelfExpr.get(), true, true); if (IV->getType().getObjCLifetime() == Qualifiers::OCL_Weak) { if (!isUnevaluatedContext() && !Diags.isIgnored(diag::warn_arc_repeated_use_of_weak, Loc)) getCurFunction()->recordUseOfWeak(Result); } if (getLangOpts().ObjCAutoRefCount) { if (CurContext->isClosure()) Diag(Loc, diag::warn_implicitly_retains_self) << FixItHint::CreateInsertion(Loc, "self->"); } return Result; } } else if (CurMethod->isInstanceMethod()) { // We should warn if a local variable hides an ivar. if (ObjCInterfaceDecl *IFace = CurMethod->getClassInterface()) { ObjCInterfaceDecl *ClassDeclared; if (ObjCIvarDecl *IV = IFace->lookupInstanceVariable(II, ClassDeclared)) { if (IV->getAccessControl() != ObjCIvarDecl::Private || declaresSameEntity(IFace, ClassDeclared)) Diag(Loc, diag::warn_ivar_use_hidden) << IV->getDeclName(); } } } else if (Lookup.isSingleResult() && Lookup.getFoundDecl()->isDefinedOutsideFunctionOrMethod()) { // If accessing a stand-alone ivar in a class method, this is an error. if (const ObjCIvarDecl *IV = dyn_cast(Lookup.getFoundDecl())) return ExprError(Diag(Loc, diag::err_ivar_use_in_class_method) << IV->getDeclName()); } if (Lookup.empty() && II && AllowBuiltinCreation) { // FIXME. Consolidate this with similar code in LookupName. if (unsigned BuiltinID = II->getBuiltinID()) { if (!(getLangOpts().CPlusPlus && Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID))) { NamedDecl *D = LazilyCreateBuiltin((IdentifierInfo *)II, BuiltinID, S, Lookup.isForRedeclaration(), Lookup.getNameLoc()); if (D) Lookup.addDecl(D); } } } // Sentinel value saying that we didn't do anything special. return ExprResult((Expr *)nullptr); } /// Cast a base object to a member's actual type. /// /// Logically this happens in three phases: /// /// * First we cast from the base type to the naming class. /// The naming class is the class into which we were looking /// when we found the member; it's the qualifier type if a /// qualifier was provided, and otherwise it's the base type. /// /// * Next we cast from the naming class to the declaring class. /// If the member we found was brought into a class's scope by /// a using declaration, this is that class; otherwise it's /// the class declaring the member. /// /// * Finally we cast from the declaring class to the "true" /// declaring class of the member. This conversion does not /// obey access control. ExprResult Sema::PerformObjectMemberConversion(Expr *From, NestedNameSpecifier *Qualifier, NamedDecl *FoundDecl, NamedDecl *Member) { CXXRecordDecl *RD = dyn_cast(Member->getDeclContext()); if (!RD) return From; QualType DestRecordType; QualType DestType; QualType FromRecordType; QualType FromType = From->getType(); bool PointerConversions = false; if (isa(Member)) { DestRecordType = Context.getCanonicalType(Context.getTypeDeclType(RD)); if (FromType->getAs()) { DestType = Context.getPointerType(DestRecordType); FromRecordType = FromType->getPointeeType(); PointerConversions = true; } else { DestType = DestRecordType; FromRecordType = FromType; } } else if (CXXMethodDecl *Method = dyn_cast(Member)) { if (Method->isStatic()) return From; DestType = Method->getThisType(); DestRecordType = DestType->getPointeeType(); if (FromType->getAs()) { FromRecordType = FromType->getPointeeType(); PointerConversions = true; } else { FromRecordType = FromType; DestType = DestRecordType; } } else { // No conversion necessary. return From; } if (DestType->isDependentType() || FromType->isDependentType()) return From; // If the unqualified types are the same, no conversion is necessary. if (Context.hasSameUnqualifiedType(FromRecordType, DestRecordType)) return From; SourceRange FromRange = From->getSourceRange(); SourceLocation FromLoc = FromRange.getBegin(); ExprValueKind VK = From->getValueKind(); // C++ [class.member.lookup]p8: // [...] Ambiguities can often be resolved by qualifying a name with its // class name. // // If the member was a qualified name and the qualified referred to a // specific base subobject type, we'll cast to that intermediate type // first and then to the object in which the member is declared. That allows // one to resolve ambiguities in, e.g., a diamond-shaped hierarchy such as: // // class Base { public: int x; }; // class Derived1 : public Base { }; // class Derived2 : public Base { }; // class VeryDerived : public Derived1, public Derived2 { void f(); }; // // void VeryDerived::f() { // x = 17; // error: ambiguous base subobjects // Derived1::x = 17; // okay, pick the Base subobject of Derived1 // } if (Qualifier && Qualifier->getAsType()) { QualType QType = QualType(Qualifier->getAsType(), 0); assert(QType->isRecordType() && "lookup done with non-record type"); QualType QRecordType = QualType(QType->getAs(), 0); // In C++98, the qualifier type doesn't actually have to be a base // type of the object type, in which case we just ignore it. // Otherwise build the appropriate casts. if (IsDerivedFrom(FromLoc, FromRecordType, QRecordType)) { CXXCastPath BasePath; if (CheckDerivedToBaseConversion(FromRecordType, QRecordType, FromLoc, FromRange, &BasePath)) return ExprError(); if (PointerConversions) QType = Context.getPointerType(QType); From = ImpCastExprToType(From, QType, CK_UncheckedDerivedToBase, VK, &BasePath).get(); FromType = QType; FromRecordType = QRecordType; // If the qualifier type was the same as the destination type, // we're done. if (Context.hasSameUnqualifiedType(FromRecordType, DestRecordType)) return From; } } bool IgnoreAccess = false; // If we actually found the member through a using declaration, cast // down to the using declaration's type. // // Pointer equality is fine here because only one declaration of a // class ever has member declarations. if (FoundDecl->getDeclContext() != Member->getDeclContext()) { assert(isa(FoundDecl)); QualType URecordType = Context.getTypeDeclType( cast(FoundDecl->getDeclContext())); // We only need to do this if the naming-class to declaring-class // conversion is non-trivial. if (!Context.hasSameUnqualifiedType(FromRecordType, URecordType)) { assert(IsDerivedFrom(FromLoc, FromRecordType, URecordType)); CXXCastPath BasePath; if (CheckDerivedToBaseConversion(FromRecordType, URecordType, FromLoc, FromRange, &BasePath)) return ExprError(); QualType UType = URecordType; if (PointerConversions) UType = Context.getPointerType(UType); From = ImpCastExprToType(From, UType, CK_UncheckedDerivedToBase, VK, &BasePath).get(); FromType = UType; FromRecordType = URecordType; } // We don't do access control for the conversion from the // declaring class to the true declaring class. IgnoreAccess = true; } CXXCastPath BasePath; if (CheckDerivedToBaseConversion(FromRecordType, DestRecordType, FromLoc, FromRange, &BasePath, IgnoreAccess)) return ExprError(); return ImpCastExprToType(From, DestType, CK_UncheckedDerivedToBase, VK, &BasePath); } bool Sema::UseArgumentDependentLookup(const CXXScopeSpec &SS, const LookupResult &R, bool HasTrailingLParen) { // Only when used directly as the postfix-expression of a call. if (!HasTrailingLParen) return false; // Never if a scope specifier was provided. if (SS.isSet()) return false; // Only in C++ or ObjC++. if (!getLangOpts().CPlusPlus) return false; // Turn off ADL when we find certain kinds of declarations during // normal lookup: for (NamedDecl *D : R) { // C++0x [basic.lookup.argdep]p3: // -- a declaration of a class member // Since using decls preserve this property, we check this on the // original decl. if (D->isCXXClassMember()) return false; // C++0x [basic.lookup.argdep]p3: // -- a block-scope function declaration that is not a // using-declaration // NOTE: we also trigger this for function templates (in fact, we // don't check the decl type at all, since all other decl types // turn off ADL anyway). if (isa(D)) D = cast(D)->getTargetDecl(); else if (D->getLexicalDeclContext()->isFunctionOrMethod()) return false; // C++0x [basic.lookup.argdep]p3: // -- a declaration that is neither a function or a function // template // And also for builtin functions. if (isa(D)) { FunctionDecl *FDecl = cast(D); // But also builtin functions. if (FDecl->getBuiltinID() && FDecl->isImplicit()) return false; } else if (!isa(D)) return false; } return true; } /// Diagnoses obvious problems with the use of the given declaration /// as an expression. This is only actually called for lookups that /// were not overloaded, and it doesn't promise that the declaration /// will in fact be used. static bool CheckDeclInExpr(Sema &S, SourceLocation Loc, NamedDecl *D) { if (D->isInvalidDecl()) return true; if (isa(D)) { S.Diag(Loc, diag::err_unexpected_typedef) << D->getDeclName(); return true; } if (isa(D)) { S.Diag(Loc, diag::err_unexpected_interface) << D->getDeclName(); return true; } if (isa(D)) { S.Diag(Loc, diag::err_unexpected_namespace) << D->getDeclName(); return true; } return false; } // Certain multiversion types should be treated as overloaded even when there is // only one result. static bool ShouldLookupResultBeMultiVersionOverload(const LookupResult &R) { assert(R.isSingleResult() && "Expected only a single result"); const auto *FD = dyn_cast(R.getFoundDecl()); return FD && (FD->isCPUDispatchMultiVersion() || FD->isCPUSpecificMultiVersion()); } ExprResult Sema::BuildDeclarationNameExpr(const CXXScopeSpec &SS, LookupResult &R, bool NeedsADL, bool AcceptInvalidDecl) { // If this is a single, fully-resolved result and we don't need ADL, // just build an ordinary singleton decl ref. if (!NeedsADL && R.isSingleResult() && !R.getAsSingle() && !ShouldLookupResultBeMultiVersionOverload(R)) return BuildDeclarationNameExpr(SS, R.getLookupNameInfo(), R.getFoundDecl(), R.getRepresentativeDecl(), nullptr, AcceptInvalidDecl); // We only need to check the declaration if there's exactly one // result, because in the overloaded case the results can only be // functions and function templates. if (R.isSingleResult() && !ShouldLookupResultBeMultiVersionOverload(R) && CheckDeclInExpr(*this, R.getNameLoc(), R.getFoundDecl())) return ExprError(); // Otherwise, just build an unresolved lookup expression. Suppress // any lookup-related diagnostics; we'll hash these out later, when // we've picked a target. R.suppressDiagnostics(); UnresolvedLookupExpr *ULE = UnresolvedLookupExpr::Create(Context, R.getNamingClass(), SS.getWithLocInContext(Context), R.getLookupNameInfo(), NeedsADL, R.isOverloadedResult(), R.begin(), R.end()); return ULE; } static void diagnoseUncapturableValueReference(Sema &S, SourceLocation loc, ValueDecl *var, DeclContext *DC); /// Complete semantic analysis for a reference to the given declaration. ExprResult Sema::BuildDeclarationNameExpr( const CXXScopeSpec &SS, const DeclarationNameInfo &NameInfo, NamedDecl *D, NamedDecl *FoundD, const TemplateArgumentListInfo *TemplateArgs, bool AcceptInvalidDecl) { assert(D && "Cannot refer to a NULL declaration"); assert(!isa(D) && "Cannot refer unambiguously to a function template"); SourceLocation Loc = NameInfo.getLoc(); if (CheckDeclInExpr(*this, Loc, D)) return ExprError(); if (TemplateDecl *Template = dyn_cast(D)) { // Specifically diagnose references to class templates that are missing // a template argument list. diagnoseMissingTemplateArguments(TemplateName(Template), Loc); return ExprError(); } // Make sure that we're referring to a value. ValueDecl *VD = dyn_cast(D); if (!VD) { Diag(Loc, diag::err_ref_non_value) << D << SS.getRange(); Diag(D->getLocation(), diag::note_declared_at); return ExprError(); } // Check whether this declaration can be used. Note that we suppress // this check when we're going to perform argument-dependent lookup // on this function name, because this might not be the function // that overload resolution actually selects. if (DiagnoseUseOfDecl(VD, Loc)) return ExprError(); // Only create DeclRefExpr's for valid Decl's. if (VD->isInvalidDecl() && !AcceptInvalidDecl) return ExprError(); // Handle members of anonymous structs and unions. If we got here, // and the reference is to a class member indirect field, then this // must be the subject of a pointer-to-member expression. if (IndirectFieldDecl *indirectField = dyn_cast(VD)) if (!indirectField->isCXXClassMember()) return BuildAnonymousStructUnionMemberReference(SS, NameInfo.getLoc(), indirectField); { QualType type = VD->getType(); if (type.isNull()) return ExprError(); if (auto *FPT = type->getAs()) { // C++ [except.spec]p17: // An exception-specification is considered to be needed when: // - in an expression, the function is the unique lookup result or // the selected member of a set of overloaded functions. ResolveExceptionSpec(Loc, FPT); type = VD->getType(); } ExprValueKind valueKind = VK_RValue; switch (D->getKind()) { // Ignore all the non-ValueDecl kinds. #define ABSTRACT_DECL(kind) #define VALUE(type, base) #define DECL(type, base) \ case Decl::type: #include "clang/AST/DeclNodes.inc" llvm_unreachable("invalid value decl kind"); // These shouldn't make it here. case Decl::ObjCAtDefsField: case Decl::ObjCIvar: llvm_unreachable("forming non-member reference to ivar?"); // Enum constants are always r-values and never references. // Unresolved using declarations are dependent. case Decl::EnumConstant: case Decl::UnresolvedUsingValue: case Decl::OMPDeclareReduction: valueKind = VK_RValue; break; // Fields and indirect fields that got here must be for // pointer-to-member expressions; we just call them l-values for // internal consistency, because this subexpression doesn't really // exist in the high-level semantics. case Decl::Field: case Decl::IndirectField: assert(getLangOpts().CPlusPlus && "building reference to field in C?"); // These can't have reference type in well-formed programs, but // for internal consistency we do this anyway. type = type.getNonReferenceType(); valueKind = VK_LValue; break; // Non-type template parameters are either l-values or r-values // depending on the type. case Decl::NonTypeTemplateParm: { if (const ReferenceType *reftype = type->getAs()) { type = reftype->getPointeeType(); valueKind = VK_LValue; // even if the parameter is an r-value reference break; } // For non-references, we need to strip qualifiers just in case // the template parameter was declared as 'const int' or whatever. valueKind = VK_RValue; type = type.getUnqualifiedType(); break; } case Decl::Var: case Decl::VarTemplateSpecialization: case Decl::VarTemplatePartialSpecialization: case Decl::Decomposition: case Decl::OMPCapturedExpr: // In C, "extern void blah;" is valid and is an r-value. if (!getLangOpts().CPlusPlus && !type.hasQualifiers() && type->isVoidType()) { valueKind = VK_RValue; break; } LLVM_FALLTHROUGH; case Decl::ImplicitParam: case Decl::ParmVar: { // These are always l-values. valueKind = VK_LValue; type = type.getNonReferenceType(); // FIXME: Does the addition of const really only apply in // potentially-evaluated contexts? Since the variable isn't actually // captured in an unevaluated context, it seems that the answer is no. if (!isUnevaluatedContext()) { QualType CapturedType = getCapturedDeclRefType(cast(VD), Loc); if (!CapturedType.isNull()) type = CapturedType; } break; } case Decl::Binding: { // These are always lvalues. valueKind = VK_LValue; type = type.getNonReferenceType(); // FIXME: Support lambda-capture of BindingDecls, once CWG actually // decides how that's supposed to work. auto *BD = cast(VD); if (BD->getDeclContext()->isFunctionOrMethod() && BD->getDeclContext() != CurContext) diagnoseUncapturableValueReference(*this, Loc, BD, CurContext); break; } case Decl::Function: { if (unsigned BID = cast(VD)->getBuiltinID()) { if (!Context.BuiltinInfo.isPredefinedLibFunction(BID)) { type = Context.BuiltinFnTy; valueKind = VK_RValue; break; } } const FunctionType *fty = type->castAs(); // If we're referring to a function with an __unknown_anytype // result type, make the entire expression __unknown_anytype. if (fty->getReturnType() == Context.UnknownAnyTy) { type = Context.UnknownAnyTy; valueKind = VK_RValue; break; } // Functions are l-values in C++. if (getLangOpts().CPlusPlus) { valueKind = VK_LValue; break; } // C99 DR 316 says that, if a function type comes from a // function definition (without a prototype), that type is only // used for checking compatibility. Therefore, when referencing // the function, we pretend that we don't have the full function // type. if (!cast(VD)->hasPrototype() && isa(fty)) type = Context.getFunctionNoProtoType(fty->getReturnType(), fty->getExtInfo()); // Functions are r-values in C. valueKind = VK_RValue; break; } case Decl::CXXDeductionGuide: llvm_unreachable("building reference to deduction guide"); case Decl::MSProperty: valueKind = VK_LValue; break; case Decl::CXXMethod: // If we're referring to a method with an __unknown_anytype // result type, make the entire expression __unknown_anytype. // This should only be possible with a type written directly. if (const FunctionProtoType *proto = dyn_cast(VD->getType())) if (proto->getReturnType() == Context.UnknownAnyTy) { type = Context.UnknownAnyTy; valueKind = VK_RValue; break; } // C++ methods are l-values if static, r-values if non-static. if (cast(VD)->isStatic()) { valueKind = VK_LValue; break; } LLVM_FALLTHROUGH; case Decl::CXXConversion: case Decl::CXXDestructor: case Decl::CXXConstructor: valueKind = VK_RValue; break; } return BuildDeclRefExpr(VD, type, valueKind, NameInfo, &SS, FoundD, TemplateArgs); } } static void ConvertUTF8ToWideString(unsigned CharByteWidth, StringRef Source, SmallString<32> &Target) { Target.resize(CharByteWidth * (Source.size() + 1)); char *ResultPtr = &Target[0]; const llvm::UTF8 *ErrorPtr; bool success = llvm::ConvertUTF8toWide(CharByteWidth, Source, ResultPtr, ErrorPtr); (void)success; assert(success); Target.resize(ResultPtr - &Target[0]); } ExprResult Sema::BuildPredefinedExpr(SourceLocation Loc, PredefinedExpr::IdentKind IK) { // Pick the current block, lambda, captured statement or function. Decl *currentDecl = nullptr; if (const BlockScopeInfo *BSI = getCurBlock()) currentDecl = BSI->TheDecl; else if (const LambdaScopeInfo *LSI = getCurLambda()) currentDecl = LSI->CallOperator; else if (const CapturedRegionScopeInfo *CSI = getCurCapturedRegion()) currentDecl = CSI->TheCapturedDecl; else currentDecl = getCurFunctionOrMethodDecl(); if (!currentDecl) { Diag(Loc, diag::ext_predef_outside_function); currentDecl = Context.getTranslationUnitDecl(); } QualType ResTy; StringLiteral *SL = nullptr; if (cast(currentDecl)->isDependentContext()) ResTy = Context.DependentTy; else { // Pre-defined identifiers are of type char[x], where x is the length of // the string. auto Str = PredefinedExpr::ComputeName(IK, currentDecl); unsigned Length = Str.length(); llvm::APInt LengthI(32, Length + 1); if (IK == PredefinedExpr::LFunction || IK == PredefinedExpr::LFuncSig) { ResTy = Context.adjustStringLiteralBaseType(Context.WideCharTy.withConst()); SmallString<32> RawChars; ConvertUTF8ToWideString(Context.getTypeSizeInChars(ResTy).getQuantity(), Str, RawChars); ResTy = Context.getConstantArrayType(ResTy, LengthI, ArrayType::Normal, /*IndexTypeQuals*/ 0); SL = StringLiteral::Create(Context, RawChars, StringLiteral::Wide, /*Pascal*/ false, ResTy, Loc); } else { ResTy = Context.adjustStringLiteralBaseType(Context.CharTy.withConst()); ResTy = Context.getConstantArrayType(ResTy, LengthI, ArrayType::Normal, /*IndexTypeQuals*/ 0); SL = StringLiteral::Create(Context, Str, StringLiteral::Ascii, /*Pascal*/ false, ResTy, Loc); } } return PredefinedExpr::Create(Context, Loc, ResTy, IK, SL); } ExprResult Sema::ActOnPredefinedExpr(SourceLocation Loc, tok::TokenKind Kind) { PredefinedExpr::IdentKind IK; switch (Kind) { default: llvm_unreachable("Unknown simple primary expr!"); case tok::kw___func__: IK = PredefinedExpr::Func; break; // [C99 6.4.2.2] case tok::kw___FUNCTION__: IK = PredefinedExpr::Function; break; case tok::kw___FUNCDNAME__: IK = PredefinedExpr::FuncDName; break; // [MS] case tok::kw___FUNCSIG__: IK = PredefinedExpr::FuncSig; break; // [MS] case tok::kw_L__FUNCTION__: IK = PredefinedExpr::LFunction; break; // [MS] case tok::kw_L__FUNCSIG__: IK = PredefinedExpr::LFuncSig; break; // [MS] case tok::kw___PRETTY_FUNCTION__: IK = PredefinedExpr::PrettyFunction; break; } return BuildPredefinedExpr(Loc, IK); } ExprResult Sema::ActOnCharacterConstant(const Token &Tok, Scope *UDLScope) { SmallString<16> CharBuffer; bool Invalid = false; StringRef ThisTok = PP.getSpelling(Tok, CharBuffer, &Invalid); if (Invalid) return ExprError(); CharLiteralParser Literal(ThisTok.begin(), ThisTok.end(), Tok.getLocation(), PP, Tok.getKind()); if (Literal.hadError()) return ExprError(); QualType Ty; if (Literal.isWide()) Ty = Context.WideCharTy; // L'x' -> wchar_t in C and C++. else if (Literal.isUTF8() && getLangOpts().Char8) Ty = Context.Char8Ty; // u8'x' -> char8_t when it exists. else if (Literal.isUTF16()) Ty = Context.Char16Ty; // u'x' -> char16_t in C11 and C++11. else if (Literal.isUTF32()) Ty = Context.Char32Ty; // U'x' -> char32_t in C11 and C++11. else if (!getLangOpts().CPlusPlus || Literal.isMultiChar()) Ty = Context.IntTy; // 'x' -> int in C, 'wxyz' -> int in C++. else Ty = Context.CharTy; // 'x' -> char in C++ CharacterLiteral::CharacterKind Kind = CharacterLiteral::Ascii; if (Literal.isWide()) Kind = CharacterLiteral::Wide; else if (Literal.isUTF16()) Kind = CharacterLiteral::UTF16; else if (Literal.isUTF32()) Kind = CharacterLiteral::UTF32; else if (Literal.isUTF8()) Kind = CharacterLiteral::UTF8; Expr *Lit = new (Context) CharacterLiteral(Literal.getValue(), Kind, Ty, Tok.getLocation()); if (Literal.getUDSuffix().empty()) return Lit; // We're building a user-defined literal. IdentifierInfo *UDSuffix = &Context.Idents.get(Literal.getUDSuffix()); SourceLocation UDSuffixLoc = getUDSuffixLoc(*this, Tok.getLocation(), Literal.getUDSuffixOffset()); // Make sure we're allowed user-defined literals here. if (!UDLScope) return ExprError(Diag(UDSuffixLoc, diag::err_invalid_character_udl)); // C++11 [lex.ext]p6: The literal L is treated as a call of the form // operator "" X (ch) return BuildCookedLiteralOperatorCall(*this, UDLScope, UDSuffix, UDSuffixLoc, Lit, Tok.getLocation()); } ExprResult Sema::ActOnIntegerConstant(SourceLocation Loc, uint64_t Val) { unsigned IntSize = Context.getTargetInfo().getIntWidth(); return IntegerLiteral::Create(Context, llvm::APInt(IntSize, Val), Context.IntTy, Loc); } static Expr *BuildFloatingLiteral(Sema &S, NumericLiteralParser &Literal, QualType Ty, SourceLocation Loc) { const llvm::fltSemantics &Format = S.Context.getFloatTypeSemantics(Ty); using llvm::APFloat; APFloat Val(Format); APFloat::opStatus result = Literal.GetFloatValue(Val); // Overflow is always an error, but underflow is only an error if // we underflowed to zero (APFloat reports denormals as underflow). if ((result & APFloat::opOverflow) || ((result & APFloat::opUnderflow) && Val.isZero())) { unsigned diagnostic; SmallString<20> buffer; if (result & APFloat::opOverflow) { diagnostic = diag::warn_float_overflow; APFloat::getLargest(Format).toString(buffer); } else { diagnostic = diag::warn_float_underflow; APFloat::getSmallest(Format).toString(buffer); } S.Diag(Loc, diagnostic) << Ty << StringRef(buffer.data(), buffer.size()); } bool isExact = (result == APFloat::opOK); return FloatingLiteral::Create(S.Context, Val, isExact, Ty, Loc); } bool Sema::CheckLoopHintExpr(Expr *E, SourceLocation Loc) { assert(E && "Invalid expression"); if (E->isValueDependent()) return false; QualType QT = E->getType(); if (!QT->isIntegerType() || QT->isBooleanType() || QT->isCharType()) { Diag(E->getExprLoc(), diag::err_pragma_loop_invalid_argument_type) << QT; return true; } llvm::APSInt ValueAPS; ExprResult R = VerifyIntegerConstantExpression(E, &ValueAPS); if (R.isInvalid()) return true; bool ValueIsPositive = ValueAPS.isStrictlyPositive(); if (!ValueIsPositive || ValueAPS.getActiveBits() > 31) { Diag(E->getExprLoc(), diag::err_pragma_loop_invalid_argument_value) << ValueAPS.toString(10) << ValueIsPositive; return true; } return false; } ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) { // Fast path for a single digit (which is quite common). A single digit // cannot have a trigraph, escaped newline, radix prefix, or suffix. if (Tok.getLength() == 1) { const char Val = PP.getSpellingOfSingleCharacterNumericConstant(Tok); return ActOnIntegerConstant(Tok.getLocation(), Val-'0'); } SmallString<128> SpellingBuffer; // NumericLiteralParser wants to overread by one character. Add padding to // the buffer in case the token is copied to the buffer. If getSpelling() // returns a StringRef to the memory buffer, it should have a null char at // the EOF, so it is also safe. SpellingBuffer.resize(Tok.getLength() + 1); // Get the spelling of the token, which eliminates trigraphs, etc. bool Invalid = false; StringRef TokSpelling = PP.getSpelling(Tok, SpellingBuffer, &Invalid); if (Invalid) return ExprError(); NumericLiteralParser Literal(TokSpelling, Tok.getLocation(), PP); if (Literal.hadError) return ExprError(); if (Literal.hasUDSuffix()) { // We're building a user-defined literal. IdentifierInfo *UDSuffix = &Context.Idents.get(Literal.getUDSuffix()); SourceLocation UDSuffixLoc = getUDSuffixLoc(*this, Tok.getLocation(), Literal.getUDSuffixOffset()); // Make sure we're allowed user-defined literals here. if (!UDLScope) return ExprError(Diag(UDSuffixLoc, diag::err_invalid_numeric_udl)); QualType CookedTy; if (Literal.isFloatingLiteral()) { // C++11 [lex.ext]p4: If S contains a literal operator with parameter type // long double, the literal is treated as a call of the form // operator "" X (f L) CookedTy = Context.LongDoubleTy; } else { // C++11 [lex.ext]p3: If S contains a literal operator with parameter type // unsigned long long, the literal is treated as a call of the form // operator "" X (n ULL) CookedTy = Context.UnsignedLongLongTy; } DeclarationName OpName = Context.DeclarationNames.getCXXLiteralOperatorName(UDSuffix); DeclarationNameInfo OpNameInfo(OpName, UDSuffixLoc); OpNameInfo.setCXXLiteralOperatorNameLoc(UDSuffixLoc); SourceLocation TokLoc = Tok.getLocation(); // Perform literal operator lookup to determine if we're building a raw // literal or a cooked one. LookupResult R(*this, OpName, UDSuffixLoc, LookupOrdinaryName); switch (LookupLiteralOperator(UDLScope, R, CookedTy, /*AllowRaw*/ true, /*AllowTemplate*/ true, /*AllowStringTemplate*/ false, /*DiagnoseMissing*/ !Literal.isImaginary)) { case LOLR_ErrorNoDiagnostic: // Lookup failure for imaginary constants isn't fatal, there's still the // GNU extension producing _Complex types. break; case LOLR_Error: return ExprError(); case LOLR_Cooked: { Expr *Lit; if (Literal.isFloatingLiteral()) { Lit = BuildFloatingLiteral(*this, Literal, CookedTy, Tok.getLocation()); } else { llvm::APInt ResultVal(Context.getTargetInfo().getLongLongWidth(), 0); if (Literal.GetIntegerValue(ResultVal)) Diag(Tok.getLocation(), diag::err_integer_literal_too_large) << /* Unsigned */ 1; Lit = IntegerLiteral::Create(Context, ResultVal, CookedTy, Tok.getLocation()); } return BuildLiteralOperatorCall(R, OpNameInfo, Lit, TokLoc); } case LOLR_Raw: { // C++11 [lit.ext]p3, p4: If S contains a raw literal operator, the // literal is treated as a call of the form // operator "" X ("n") unsigned Length = Literal.getUDSuffixOffset(); QualType StrTy = Context.getConstantArrayType( Context.adjustStringLiteralBaseType(Context.CharTy.withConst()), llvm::APInt(32, Length + 1), ArrayType::Normal, 0); Expr *Lit = StringLiteral::Create( Context, StringRef(TokSpelling.data(), Length), StringLiteral::Ascii, /*Pascal*/false, StrTy, &TokLoc, 1); return BuildLiteralOperatorCall(R, OpNameInfo, Lit, TokLoc); } case LOLR_Template: { // C++11 [lit.ext]p3, p4: Otherwise (S contains a literal operator // template), L is treated as a call fo the form // operator "" X <'c1', 'c2', ... 'ck'>() // where n is the source character sequence c1 c2 ... ck. TemplateArgumentListInfo ExplicitArgs; unsigned CharBits = Context.getIntWidth(Context.CharTy); bool CharIsUnsigned = Context.CharTy->isUnsignedIntegerType(); llvm::APSInt Value(CharBits, CharIsUnsigned); for (unsigned I = 0, N = Literal.getUDSuffixOffset(); I != N; ++I) { Value = TokSpelling[I]; TemplateArgument Arg(Context, Value, Context.CharTy); TemplateArgumentLocInfo ArgInfo; ExplicitArgs.addArgument(TemplateArgumentLoc(Arg, ArgInfo)); } return BuildLiteralOperatorCall(R, OpNameInfo, None, TokLoc, &ExplicitArgs); } case LOLR_StringTemplate: llvm_unreachable("unexpected literal operator lookup result"); } } Expr *Res; if (Literal.isFixedPointLiteral()) { QualType Ty; if (Literal.isAccum) { if (Literal.isHalf) { Ty = Context.ShortAccumTy; } else if (Literal.isLong) { Ty = Context.LongAccumTy; } else { Ty = Context.AccumTy; } } else if (Literal.isFract) { if (Literal.isHalf) { Ty = Context.ShortFractTy; } else if (Literal.isLong) { Ty = Context.LongFractTy; } else { Ty = Context.FractTy; } } if (Literal.isUnsigned) Ty = Context.getCorrespondingUnsignedType(Ty); bool isSigned = !Literal.isUnsigned; unsigned scale = Context.getFixedPointScale(Ty); unsigned bit_width = Context.getTypeInfo(Ty).Width; llvm::APInt Val(bit_width, 0, isSigned); bool Overflowed = Literal.GetFixedPointValue(Val, scale); bool ValIsZero = Val.isNullValue() && !Overflowed; auto MaxVal = Context.getFixedPointMax(Ty).getValue(); if (Literal.isFract && Val == MaxVal + 1 && !ValIsZero) // Clause 6.4.4 - The value of a constant shall be in the range of // representable values for its type, with exception for constants of a // fract type with a value of exactly 1; such a constant shall denote // the maximal value for the type. --Val; else if (Val.ugt(MaxVal) || Overflowed) Diag(Tok.getLocation(), diag::err_too_large_for_fixed_point); Res = FixedPointLiteral::CreateFromRawInt(Context, Val, Ty, Tok.getLocation(), scale); } else if (Literal.isFloatingLiteral()) { QualType Ty; if (Literal.isHalf){ if (getOpenCLOptions().isEnabled("cl_khr_fp16")) Ty = Context.HalfTy; else { Diag(Tok.getLocation(), diag::err_half_const_requires_fp16); return ExprError(); } } else if (Literal.isFloat) Ty = Context.FloatTy; else if (Literal.isLong) Ty = Context.LongDoubleTy; else if (Literal.isFloat16) Ty = Context.Float16Ty; else if (Literal.isFloat128) Ty = Context.Float128Ty; else Ty = Context.DoubleTy; Res = BuildFloatingLiteral(*this, Literal, Ty, Tok.getLocation()); if (Ty == Context.DoubleTy) { if (getLangOpts().SinglePrecisionConstants) { const BuiltinType *BTy = Ty->getAs(); if (BTy->getKind() != BuiltinType::Float) { Res = ImpCastExprToType(Res, Context.FloatTy, CK_FloatingCast).get(); } } else if (getLangOpts().OpenCL && !getOpenCLOptions().isEnabled("cl_khr_fp64")) { // Impose single-precision float type when cl_khr_fp64 is not enabled. Diag(Tok.getLocation(), diag::warn_double_const_requires_fp64); Res = ImpCastExprToType(Res, Context.FloatTy, CK_FloatingCast).get(); } } } else if (!Literal.isIntegerLiteral()) { return ExprError(); } else { QualType Ty; // 'long long' is a C99 or C++11 feature. if (!getLangOpts().C99 && Literal.isLongLong) { if (getLangOpts().CPlusPlus) Diag(Tok.getLocation(), getLangOpts().CPlusPlus11 ? diag::warn_cxx98_compat_longlong : diag::ext_cxx11_longlong); else Diag(Tok.getLocation(), diag::ext_c99_longlong); } // Get the value in the widest-possible width. unsigned MaxWidth = Context.getTargetInfo().getIntMaxTWidth(); llvm::APInt ResultVal(MaxWidth, 0); if (Literal.GetIntegerValue(ResultVal)) { // If this value didn't fit into uintmax_t, error and force to ull. Diag(Tok.getLocation(), diag::err_integer_literal_too_large) << /* Unsigned */ 1; Ty = Context.UnsignedLongLongTy; assert(Context.getTypeSize(Ty) == ResultVal.getBitWidth() && "long long is not intmax_t?"); } else { // If this value fits into a ULL, try to figure out what else it fits into // according to the rules of C99 6.4.4.1p5. // Octal, Hexadecimal, and integers with a U suffix are allowed to // be an unsigned int. bool AllowUnsigned = Literal.isUnsigned || Literal.getRadix() != 10; // Check from smallest to largest, picking the smallest type we can. unsigned Width = 0; // Microsoft specific integer suffixes are explicitly sized. if (Literal.MicrosoftInteger) { if (Literal.MicrosoftInteger == 8 && !Literal.isUnsigned) { Width = 8; Ty = Context.CharTy; } else { Width = Literal.MicrosoftInteger; Ty = Context.getIntTypeForBitwidth(Width, /*Signed=*/!Literal.isUnsigned); } } if (Ty.isNull() && !Literal.isLong && !Literal.isLongLong) { // Are int/unsigned possibilities? unsigned IntSize = Context.getTargetInfo().getIntWidth(); // Does it fit in a unsigned int? if (ResultVal.isIntN(IntSize)) { // Does it fit in a signed int? if (!Literal.isUnsigned && ResultVal[IntSize-1] == 0) Ty = Context.IntTy; else if (AllowUnsigned) Ty = Context.UnsignedIntTy; Width = IntSize; } } // Are long/unsigned long possibilities? if (Ty.isNull() && !Literal.isLongLong) { unsigned LongSize = Context.getTargetInfo().getLongWidth(); // Does it fit in a unsigned long? if (ResultVal.isIntN(LongSize)) { // Does it fit in a signed long? if (!Literal.isUnsigned && ResultVal[LongSize-1] == 0) Ty = Context.LongTy; else if (AllowUnsigned) Ty = Context.UnsignedLongTy; // Check according to the rules of C90 6.1.3.2p5. C++03 [lex.icon]p2 // is compatible. else if (!getLangOpts().C99 && !getLangOpts().CPlusPlus11) { const unsigned LongLongSize = Context.getTargetInfo().getLongLongWidth(); Diag(Tok.getLocation(), getLangOpts().CPlusPlus ? Literal.isLong ? diag::warn_old_implicitly_unsigned_long_cxx : /*C++98 UB*/ diag:: ext_old_implicitly_unsigned_long_cxx : diag::warn_old_implicitly_unsigned_long) << (LongLongSize > LongSize ? /*will have type 'long long'*/ 0 : /*will be ill-formed*/ 1); Ty = Context.UnsignedLongTy; } Width = LongSize; } } // Check long long if needed. if (Ty.isNull()) { unsigned LongLongSize = Context.getTargetInfo().getLongLongWidth(); // Does it fit in a unsigned long long? if (ResultVal.isIntN(LongLongSize)) { // Does it fit in a signed long long? // To be compatible with MSVC, hex integer literals ending with the // LL or i64 suffix are always signed in Microsoft mode. if (!Literal.isUnsigned && (ResultVal[LongLongSize-1] == 0 || (getLangOpts().MSVCCompat && Literal.isLongLong))) Ty = Context.LongLongTy; else if (AllowUnsigned) Ty = Context.UnsignedLongLongTy; Width = LongLongSize; } } // If we still couldn't decide a type, we probably have something that // does not fit in a signed long long, but has no U suffix. if (Ty.isNull()) { Diag(Tok.getLocation(), diag::ext_integer_literal_too_large_for_signed); Ty = Context.UnsignedLongLongTy; Width = Context.getTargetInfo().getLongLongWidth(); } if (ResultVal.getBitWidth() != Width) ResultVal = ResultVal.trunc(Width); } Res = IntegerLiteral::Create(Context, ResultVal, Ty, Tok.getLocation()); } // If this is an imaginary literal, create the ImaginaryLiteral wrapper. if (Literal.isImaginary) { Res = new (Context) ImaginaryLiteral(Res, Context.getComplexType(Res->getType())); Diag(Tok.getLocation(), diag::ext_imaginary_constant); } return Res; } ExprResult Sema::ActOnParenExpr(SourceLocation L, SourceLocation R, Expr *E) { assert(E && "ActOnParenExpr() missing expr"); return new (Context) ParenExpr(L, R, E); } static bool CheckVecStepTraitOperandType(Sema &S, QualType T, SourceLocation Loc, SourceRange ArgRange) { // [OpenCL 1.1 6.11.12] "The vec_step built-in function takes a built-in // scalar or vector data type argument..." // Every built-in scalar type (OpenCL 1.1 6.1.1) is either an arithmetic // type (C99 6.2.5p18) or void. if (!(T->isArithmeticType() || T->isVoidType() || T->isVectorType())) { S.Diag(Loc, diag::err_vecstep_non_scalar_vector_type) << T << ArgRange; return true; } assert((T->isVoidType() || !T->isIncompleteType()) && "Scalar types should always be complete"); return false; } static bool CheckExtensionTraitOperandType(Sema &S, QualType T, SourceLocation Loc, SourceRange ArgRange, UnaryExprOrTypeTrait TraitKind) { // Invalid types must be hard errors for SFINAE in C++. if (S.LangOpts.CPlusPlus) return true; // C99 6.5.3.4p1: if (T->isFunctionType() && (TraitKind == UETT_SizeOf || TraitKind == UETT_AlignOf || TraitKind == UETT_PreferredAlignOf)) { // sizeof(function)/alignof(function) is allowed as an extension. S.Diag(Loc, diag::ext_sizeof_alignof_function_type) << TraitKind << ArgRange; return false; } // Allow sizeof(void)/alignof(void) as an extension, unless in OpenCL where // this is an error (OpenCL v1.1 s6.3.k) if (T->isVoidType()) { unsigned DiagID = S.LangOpts.OpenCL ? diag::err_opencl_sizeof_alignof_type : diag::ext_sizeof_alignof_void_type; S.Diag(Loc, DiagID) << TraitKind << ArgRange; return false; } return true; } static bool CheckObjCTraitOperandConstraints(Sema &S, QualType T, SourceLocation Loc, SourceRange ArgRange, UnaryExprOrTypeTrait TraitKind) { // Reject sizeof(interface) and sizeof(interface) if the // runtime doesn't allow it. if (!S.LangOpts.ObjCRuntime.allowsSizeofAlignof() && T->isObjCObjectType()) { S.Diag(Loc, diag::err_sizeof_nonfragile_interface) << T << (TraitKind == UETT_SizeOf) << ArgRange; return true; } return false; } /// Check whether E is a pointer from a decayed array type (the decayed /// pointer type is equal to T) and emit a warning if it is. static void warnOnSizeofOnArrayDecay(Sema &S, SourceLocation Loc, QualType T, Expr *E) { // Don't warn if the operation changed the type. if (T != E->getType()) return; // Now look for array decays. ImplicitCastExpr *ICE = dyn_cast(E); if (!ICE || ICE->getCastKind() != CK_ArrayToPointerDecay) return; S.Diag(Loc, diag::warn_sizeof_array_decay) << ICE->getSourceRange() << ICE->getType() << ICE->getSubExpr()->getType(); } /// Check the constraints on expression operands to unary type expression /// and type traits. /// /// Completes any types necessary and validates the constraints on the operand /// expression. The logic mostly mirrors the type-based overload, but may modify /// the expression as it completes the type for that expression through template /// instantiation, etc. bool Sema::CheckUnaryExprOrTypeTraitOperand(Expr *E, UnaryExprOrTypeTrait ExprKind) { QualType ExprTy = E->getType(); assert(!ExprTy->isReferenceType()); if (ExprKind == UETT_VecStep) return CheckVecStepTraitOperandType(*this, ExprTy, E->getExprLoc(), E->getSourceRange()); // Whitelist some types as extensions if (!CheckExtensionTraitOperandType(*this, ExprTy, E->getExprLoc(), E->getSourceRange(), ExprKind)) return false; // 'alignof' applied to an expression only requires the base element type of // the expression to be complete. 'sizeof' requires the expression's type to // be complete (and will attempt to complete it if it's an array of unknown // bound). if (ExprKind == UETT_AlignOf || ExprKind == UETT_PreferredAlignOf) { if (RequireCompleteType(E->getExprLoc(), Context.getBaseElementType(E->getType()), diag::err_sizeof_alignof_incomplete_type, ExprKind, E->getSourceRange())) return true; } else { if (RequireCompleteExprType(E, diag::err_sizeof_alignof_incomplete_type, ExprKind, E->getSourceRange())) return true; } // Completing the expression's type may have changed it. ExprTy = E->getType(); assert(!ExprTy->isReferenceType()); if (ExprTy->isFunctionType()) { Diag(E->getExprLoc(), diag::err_sizeof_alignof_function_type) << ExprKind << E->getSourceRange(); return true; } // The operand for sizeof and alignof is in an unevaluated expression context, // so side effects could result in unintended consequences. if ((ExprKind == UETT_SizeOf || ExprKind == UETT_AlignOf || ExprKind == UETT_PreferredAlignOf) && !inTemplateInstantiation() && E->HasSideEffects(Context, false)) Diag(E->getExprLoc(), diag::warn_side_effects_unevaluated_context); if (CheckObjCTraitOperandConstraints(*this, ExprTy, E->getExprLoc(), E->getSourceRange(), ExprKind)) return true; if (ExprKind == UETT_SizeOf) { if (DeclRefExpr *DeclRef = dyn_cast(E->IgnoreParens())) { if (ParmVarDecl *PVD = dyn_cast(DeclRef->getFoundDecl())) { QualType OType = PVD->getOriginalType(); QualType Type = PVD->getType(); if (Type->isPointerType() && OType->isArrayType()) { Diag(E->getExprLoc(), diag::warn_sizeof_array_param) << Type << OType; Diag(PVD->getLocation(), diag::note_declared_at); } } } // Warn on "sizeof(array op x)" and "sizeof(x op array)", where the array // decays into a pointer and returns an unintended result. This is most // likely a typo for "sizeof(array) op x". if (BinaryOperator *BO = dyn_cast(E->IgnoreParens())) { warnOnSizeofOnArrayDecay(*this, BO->getOperatorLoc(), BO->getType(), BO->getLHS()); warnOnSizeofOnArrayDecay(*this, BO->getOperatorLoc(), BO->getType(), BO->getRHS()); } } return false; } /// Check the constraints on operands to unary expression and type /// traits. /// /// This will complete any types necessary, and validate the various constraints /// on those operands. /// /// The UsualUnaryConversions() function is *not* called by this routine. /// C99 6.3.2.1p[2-4] all state: /// Except when it is the operand of the sizeof operator ... /// /// C++ [expr.sizeof]p4 /// The lvalue-to-rvalue, array-to-pointer, and function-to-pointer /// standard conversions are not applied to the operand of sizeof. /// /// This policy is followed for all of the unary trait expressions. bool Sema::CheckUnaryExprOrTypeTraitOperand(QualType ExprType, SourceLocation OpLoc, SourceRange ExprRange, UnaryExprOrTypeTrait ExprKind) { if (ExprType->isDependentType()) return false; // C++ [expr.sizeof]p2: // When applied to a reference or a reference type, the result // is the size of the referenced type. // C++11 [expr.alignof]p3: // When alignof is applied to a reference type, the result // shall be the alignment of the referenced type. if (const ReferenceType *Ref = ExprType->getAs()) ExprType = Ref->getPointeeType(); // C11 6.5.3.4/3, C++11 [expr.alignof]p3: // When alignof or _Alignof is applied to an array type, the result // is the alignment of the element type. if (ExprKind == UETT_AlignOf || ExprKind == UETT_PreferredAlignOf || ExprKind == UETT_OpenMPRequiredSimdAlign) ExprType = Context.getBaseElementType(ExprType); if (ExprKind == UETT_VecStep) return CheckVecStepTraitOperandType(*this, ExprType, OpLoc, ExprRange); // Whitelist some types as extensions if (!CheckExtensionTraitOperandType(*this, ExprType, OpLoc, ExprRange, ExprKind)) return false; if (RequireCompleteType(OpLoc, ExprType, diag::err_sizeof_alignof_incomplete_type, ExprKind, ExprRange)) return true; if (ExprType->isFunctionType()) { Diag(OpLoc, diag::err_sizeof_alignof_function_type) << ExprKind << ExprRange; return true; } if (CheckObjCTraitOperandConstraints(*this, ExprType, OpLoc, ExprRange, ExprKind)) return true; return false; } static bool CheckAlignOfExpr(Sema &S, Expr *E, UnaryExprOrTypeTrait ExprKind) { E = E->IgnoreParens(); // Cannot know anything else if the expression is dependent. if (E->isTypeDependent()) return false; if (E->getObjectKind() == OK_BitField) { S.Diag(E->getExprLoc(), diag::err_sizeof_alignof_typeof_bitfield) << 1 << E->getSourceRange(); return true; } ValueDecl *D = nullptr; if (DeclRefExpr *DRE = dyn_cast(E)) { D = DRE->getDecl(); } else if (MemberExpr *ME = dyn_cast(E)) { D = ME->getMemberDecl(); } // If it's a field, require the containing struct to have a // complete definition so that we can compute the layout. // // This can happen in C++11 onwards, either by naming the member // in a way that is not transformed into a member access expression // (in an unevaluated operand, for instance), or by naming the member // in a trailing-return-type. // // For the record, since __alignof__ on expressions is a GCC // extension, GCC seems to permit this but always gives the // nonsensical answer 0. // // We don't really need the layout here --- we could instead just // directly check for all the appropriate alignment-lowing // attributes --- but that would require duplicating a lot of // logic that just isn't worth duplicating for such a marginal // use-case. if (FieldDecl *FD = dyn_cast_or_null(D)) { // Fast path this check, since we at least know the record has a // definition if we can find a member of it. if (!FD->getParent()->isCompleteDefinition()) { S.Diag(E->getExprLoc(), diag::err_alignof_member_of_incomplete_type) << E->getSourceRange(); return true; } // Otherwise, if it's a field, and the field doesn't have // reference type, then it must have a complete type (or be a // flexible array member, which we explicitly want to // white-list anyway), which makes the following checks trivial. if (!FD->getType()->isReferenceType()) return false; } return S.CheckUnaryExprOrTypeTraitOperand(E, ExprKind); } bool Sema::CheckVecStepExpr(Expr *E) { E = E->IgnoreParens(); // Cannot know anything else if the expression is dependent. if (E->isTypeDependent()) return false; return CheckUnaryExprOrTypeTraitOperand(E, UETT_VecStep); } static void captureVariablyModifiedType(ASTContext &Context, QualType T, CapturingScopeInfo *CSI) { assert(T->isVariablyModifiedType()); assert(CSI != nullptr); // We're going to walk down into the type and look for VLA expressions. do { const Type *Ty = T.getTypePtr(); switch (Ty->getTypeClass()) { #define TYPE(Class, Base) #define ABSTRACT_TYPE(Class, Base) #define NON_CANONICAL_TYPE(Class, Base) #define DEPENDENT_TYPE(Class, Base) case Type::Class: #define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) #include "clang/AST/TypeNodes.def" T = QualType(); break; // These types are never variably-modified. case Type::Builtin: case Type::Complex: case Type::Vector: case Type::ExtVector: case Type::Record: case Type::Enum: case Type::Elaborated: case Type::TemplateSpecialization: case Type::ObjCObject: case Type::ObjCInterface: case Type::ObjCObjectPointer: case Type::ObjCTypeParam: case Type::Pipe: llvm_unreachable("type class is never variably-modified!"); case Type::Adjusted: T = cast(Ty)->getOriginalType(); break; case Type::Decayed: T = cast(Ty)->getPointeeType(); break; case Type::Pointer: T = cast(Ty)->getPointeeType(); break; case Type::BlockPointer: T = cast(Ty)->getPointeeType(); break; case Type::LValueReference: case Type::RValueReference: T = cast(Ty)->getPointeeType(); break; case Type::MemberPointer: T = cast(Ty)->getPointeeType(); break; case Type::ConstantArray: case Type::IncompleteArray: // Losing element qualification here is fine. T = cast(Ty)->getElementType(); break; case Type::VariableArray: { // Losing element qualification here is fine. const VariableArrayType *VAT = cast(Ty); // Unknown size indication requires no size computation. // Otherwise, evaluate and record it. if (auto Size = VAT->getSizeExpr()) { if (!CSI->isVLATypeCaptured(VAT)) { RecordDecl *CapRecord = nullptr; if (auto LSI = dyn_cast(CSI)) { CapRecord = LSI->Lambda; } else if (auto CRSI = dyn_cast(CSI)) { CapRecord = CRSI->TheRecordDecl; } if (CapRecord) { auto ExprLoc = Size->getExprLoc(); auto SizeType = Context.getSizeType(); // Build the non-static data member. auto Field = FieldDecl::Create(Context, CapRecord, ExprLoc, ExprLoc, /*Id*/ nullptr, SizeType, /*TInfo*/ nullptr, /*BW*/ nullptr, /*Mutable*/ false, /*InitStyle*/ ICIS_NoInit); Field->setImplicit(true); Field->setAccess(AS_private); Field->setCapturedVLAType(VAT); CapRecord->addDecl(Field); CSI->addVLATypeCapture(ExprLoc, SizeType); } } } T = VAT->getElementType(); break; } case Type::FunctionProto: case Type::FunctionNoProto: T = cast(Ty)->getReturnType(); break; case Type::Paren: case Type::TypeOf: case Type::UnaryTransform: case Type::Attributed: case Type::SubstTemplateTypeParm: case Type::PackExpansion: // Keep walking after single level desugaring. T = T.getSingleStepDesugaredType(Context); break; case Type::Typedef: T = cast(Ty)->desugar(); break; case Type::Decltype: T = cast(Ty)->desugar(); break; case Type::Auto: case Type::DeducedTemplateSpecialization: T = cast(Ty)->getDeducedType(); break; case Type::TypeOfExpr: T = cast(Ty)->getUnderlyingExpr()->getType(); break; case Type::Atomic: T = cast(Ty)->getValueType(); break; } } while (!T.isNull() && T->isVariablyModifiedType()); } /// Build a sizeof or alignof expression given a type operand. ExprResult Sema::CreateUnaryExprOrTypeTraitExpr(TypeSourceInfo *TInfo, SourceLocation OpLoc, UnaryExprOrTypeTrait ExprKind, SourceRange R) { if (!TInfo) return ExprError(); QualType T = TInfo->getType(); if (!T->isDependentType() && CheckUnaryExprOrTypeTraitOperand(T, OpLoc, R, ExprKind)) return ExprError(); if (T->isVariablyModifiedType() && FunctionScopes.size() > 1) { if (auto *TT = T->getAs()) { for (auto I = FunctionScopes.rbegin(), E = std::prev(FunctionScopes.rend()); I != E; ++I) { auto *CSI = dyn_cast(*I); if (CSI == nullptr) break; DeclContext *DC = nullptr; if (auto *LSI = dyn_cast(CSI)) DC = LSI->CallOperator; else if (auto *CRSI = dyn_cast(CSI)) DC = CRSI->TheCapturedDecl; else if (auto *BSI = dyn_cast(CSI)) DC = BSI->TheDecl; if (DC) { if (DC->containsDecl(TT->getDecl())) break; captureVariablyModifiedType(Context, T, CSI); } } } } // C99 6.5.3.4p4: the type (an unsigned integer type) is size_t. return new (Context) UnaryExprOrTypeTraitExpr( ExprKind, TInfo, Context.getSizeType(), OpLoc, R.getEnd()); } /// Build a sizeof or alignof expression given an expression /// operand. ExprResult Sema::CreateUnaryExprOrTypeTraitExpr(Expr *E, SourceLocation OpLoc, UnaryExprOrTypeTrait ExprKind) { ExprResult PE = CheckPlaceholderExpr(E); if (PE.isInvalid()) return ExprError(); E = PE.get(); // Verify that the operand is valid. bool isInvalid = false; if (E->isTypeDependent()) { // Delay type-checking for type-dependent expressions. } else if (ExprKind == UETT_AlignOf || ExprKind == UETT_PreferredAlignOf) { isInvalid = CheckAlignOfExpr(*this, E, ExprKind); } else if (ExprKind == UETT_VecStep) { isInvalid = CheckVecStepExpr(E); } else if (ExprKind == UETT_OpenMPRequiredSimdAlign) { Diag(E->getExprLoc(), diag::err_openmp_default_simd_align_expr); isInvalid = true; } else if (E->refersToBitField()) { // C99 6.5.3.4p1. Diag(E->getExprLoc(), diag::err_sizeof_alignof_typeof_bitfield) << 0; isInvalid = true; } else { isInvalid = CheckUnaryExprOrTypeTraitOperand(E, UETT_SizeOf); } if (isInvalid) return ExprError(); if (ExprKind == UETT_SizeOf && E->getType()->isVariableArrayType()) { PE = TransformToPotentiallyEvaluated(E); if (PE.isInvalid()) return ExprError(); E = PE.get(); } // C99 6.5.3.4p4: the type (an unsigned integer type) is size_t. return new (Context) UnaryExprOrTypeTraitExpr( ExprKind, E, Context.getSizeType(), OpLoc, E->getSourceRange().getEnd()); } /// ActOnUnaryExprOrTypeTraitExpr - Handle @c sizeof(type) and @c sizeof @c /// expr and the same for @c alignof and @c __alignof /// Note that the ArgRange is invalid if isType is false. ExprResult Sema::ActOnUnaryExprOrTypeTraitExpr(SourceLocation OpLoc, UnaryExprOrTypeTrait ExprKind, bool IsType, void *TyOrEx, SourceRange ArgRange) { // If error parsing type, ignore. if (!TyOrEx) return ExprError(); if (IsType) { TypeSourceInfo *TInfo; (void) GetTypeFromParser(ParsedType::getFromOpaquePtr(TyOrEx), &TInfo); return CreateUnaryExprOrTypeTraitExpr(TInfo, OpLoc, ExprKind, ArgRange); } Expr *ArgEx = (Expr *)TyOrEx; ExprResult Result = CreateUnaryExprOrTypeTraitExpr(ArgEx, OpLoc, ExprKind); return Result; } static QualType CheckRealImagOperand(Sema &S, ExprResult &V, SourceLocation Loc, bool IsReal) { if (V.get()->isTypeDependent()) return S.Context.DependentTy; // _Real and _Imag are only l-values for normal l-values. if (V.get()->getObjectKind() != OK_Ordinary) { V = S.DefaultLvalueConversion(V.get()); if (V.isInvalid()) return QualType(); } // These operators return the element type of a complex type. if (const ComplexType *CT = V.get()->getType()->getAs()) return CT->getElementType(); // Otherwise they pass through real integer and floating point types here. if (V.get()->getType()->isArithmeticType()) return V.get()->getType(); // Test for placeholders. ExprResult PR = S.CheckPlaceholderExpr(V.get()); if (PR.isInvalid()) return QualType(); if (PR.get() != V.get()) { V = PR; return CheckRealImagOperand(S, V, Loc, IsReal); } // Reject anything else. S.Diag(Loc, diag::err_realimag_invalid_type) << V.get()->getType() << (IsReal ? "__real" : "__imag"); return QualType(); } ExprResult Sema::ActOnPostfixUnaryOp(Scope *S, SourceLocation OpLoc, tok::TokenKind Kind, Expr *Input) { UnaryOperatorKind Opc; switch (Kind) { default: llvm_unreachable("Unknown unary op!"); case tok::plusplus: Opc = UO_PostInc; break; case tok::minusminus: Opc = UO_PostDec; break; } // Since this might is a postfix expression, get rid of ParenListExprs. ExprResult Result = MaybeConvertParenListExprToParenExpr(S, Input); if (Result.isInvalid()) return ExprError(); Input = Result.get(); return BuildUnaryOp(S, OpLoc, Opc, Input); } /// Diagnose if arithmetic on the given ObjC pointer is illegal. /// /// \return true on error static bool checkArithmeticOnObjCPointer(Sema &S, SourceLocation opLoc, Expr *op) { assert(op->getType()->isObjCObjectPointerType()); if (S.LangOpts.ObjCRuntime.allowsPointerArithmetic() && !S.LangOpts.ObjCSubscriptingLegacyRuntime) return false; S.Diag(opLoc, diag::err_arithmetic_nonfragile_interface) << op->getType()->castAs()->getPointeeType() << op->getSourceRange(); return true; } static bool isMSPropertySubscriptExpr(Sema &S, Expr *Base) { auto *BaseNoParens = Base->IgnoreParens(); if (auto *MSProp = dyn_cast(BaseNoParens)) return MSProp->getPropertyDecl()->getType()->isArrayType(); return isa(BaseNoParens); } ExprResult Sema::ActOnArraySubscriptExpr(Scope *S, Expr *base, SourceLocation lbLoc, Expr *idx, SourceLocation rbLoc) { if (base && !base->getType().isNull() && base->getType()->isSpecificPlaceholderType(BuiltinType::OMPArraySection)) return ActOnOMPArraySectionExpr(base, lbLoc, idx, SourceLocation(), /*Length=*/nullptr, rbLoc); // Since this might be a postfix expression, get rid of ParenListExprs. if (isa(base)) { ExprResult result = MaybeConvertParenListExprToParenExpr(S, base); if (result.isInvalid()) return ExprError(); base = result.get(); } // Handle any non-overload placeholder types in the base and index // expressions. We can't handle overloads here because the other // operand might be an overloadable type, in which case the overload // resolution for the operator overload should get the first crack // at the overload. bool IsMSPropertySubscript = false; if (base->getType()->isNonOverloadPlaceholderType()) { IsMSPropertySubscript = isMSPropertySubscriptExpr(*this, base); if (!IsMSPropertySubscript) { ExprResult result = CheckPlaceholderExpr(base); if (result.isInvalid()) return ExprError(); base = result.get(); } } if (idx->getType()->isNonOverloadPlaceholderType()) { ExprResult result = CheckPlaceholderExpr(idx); if (result.isInvalid()) return ExprError(); idx = result.get(); } // Build an unanalyzed expression if either operand is type-dependent. if (getLangOpts().CPlusPlus && (base->isTypeDependent() || idx->isTypeDependent())) { return new (Context) ArraySubscriptExpr(base, idx, Context.DependentTy, VK_LValue, OK_Ordinary, rbLoc); } // MSDN, property (C++) // https://msdn.microsoft.com/en-us/library/yhfk0thd(v=vs.120).aspx // This attribute can also be used in the declaration of an empty array in a // class or structure definition. For example: // __declspec(property(get=GetX, put=PutX)) int x[]; // The above statement indicates that x[] can be used with one or more array // indices. In this case, i=p->x[a][b] will be turned into i=p->GetX(a, b), // and p->x[a][b] = i will be turned into p->PutX(a, b, i); if (IsMSPropertySubscript) { // Build MS property subscript expression if base is MS property reference // or MS property subscript. return new (Context) MSPropertySubscriptExpr( base, idx, Context.PseudoObjectTy, VK_LValue, OK_Ordinary, rbLoc); } // Use C++ overloaded-operator rules if either operand has record // type. The spec says to do this if either type is *overloadable*, // but enum types can't declare subscript operators or conversion // operators, so there's nothing interesting for overload resolution // to do if there aren't any record types involved. // // ObjC pointers have their own subscripting logic that is not tied // to overload resolution and so should not take this path. if (getLangOpts().CPlusPlus && (base->getType()->isRecordType() || (!base->getType()->isObjCObjectPointerType() && idx->getType()->isRecordType()))) { return CreateOverloadedArraySubscriptExpr(lbLoc, rbLoc, base, idx); } ExprResult Res = CreateBuiltinArraySubscriptExpr(base, lbLoc, idx, rbLoc); if (!Res.isInvalid() && isa(Res.get())) CheckSubscriptAccessOfNoDeref(cast(Res.get())); return Res; } void Sema::CheckAddressOfNoDeref(const Expr *E) { ExpressionEvaluationContextRecord &LastRecord = ExprEvalContexts.back(); const Expr *StrippedExpr = E->IgnoreParenImpCasts(); // For expressions like `&(*s).b`, the base is recorded and what should be // checked. const MemberExpr *Member = nullptr; while ((Member = dyn_cast(StrippedExpr)) && !Member->isArrow()) StrippedExpr = Member->getBase()->IgnoreParenImpCasts(); LastRecord.PossibleDerefs.erase(StrippedExpr); } void Sema::CheckSubscriptAccessOfNoDeref(const ArraySubscriptExpr *E) { QualType ResultTy = E->getType(); ExpressionEvaluationContextRecord &LastRecord = ExprEvalContexts.back(); // Bail if the element is an array since it is not memory access. if (isa(ResultTy)) return; if (ResultTy->hasAttr(attr::NoDeref)) { LastRecord.PossibleDerefs.insert(E); return; } // Check if the base type is a pointer to a member access of a struct // marked with noderef. const Expr *Base = E->getBase(); QualType BaseTy = Base->getType(); if (!(isa(BaseTy) || isa(BaseTy))) // Not a pointer access return; const MemberExpr *Member = nullptr; while ((Member = dyn_cast(Base->IgnoreParenCasts())) && Member->isArrow()) Base = Member->getBase(); if (const auto *Ptr = dyn_cast(Base->getType())) { if (Ptr->getPointeeType()->hasAttr(attr::NoDeref)) LastRecord.PossibleDerefs.insert(E); } } ExprResult Sema::ActOnOMPArraySectionExpr(Expr *Base, SourceLocation LBLoc, Expr *LowerBound, SourceLocation ColonLoc, Expr *Length, SourceLocation RBLoc) { if (Base->getType()->isPlaceholderType() && !Base->getType()->isSpecificPlaceholderType( BuiltinType::OMPArraySection)) { ExprResult Result = CheckPlaceholderExpr(Base); if (Result.isInvalid()) return ExprError(); Base = Result.get(); } if (LowerBound && LowerBound->getType()->isNonOverloadPlaceholderType()) { ExprResult Result = CheckPlaceholderExpr(LowerBound); if (Result.isInvalid()) return ExprError(); Result = DefaultLvalueConversion(Result.get()); if (Result.isInvalid()) return ExprError(); LowerBound = Result.get(); } if (Length && Length->getType()->isNonOverloadPlaceholderType()) { ExprResult Result = CheckPlaceholderExpr(Length); if (Result.isInvalid()) return ExprError(); Result = DefaultLvalueConversion(Result.get()); if (Result.isInvalid()) return ExprError(); Length = Result.get(); } // Build an unanalyzed expression if either operand is type-dependent. if (Base->isTypeDependent() || (LowerBound && (LowerBound->isTypeDependent() || LowerBound->isValueDependent())) || (Length && (Length->isTypeDependent() || Length->isValueDependent()))) { return new (Context) OMPArraySectionExpr(Base, LowerBound, Length, Context.DependentTy, VK_LValue, OK_Ordinary, ColonLoc, RBLoc); } // Perform default conversions. QualType OriginalTy = OMPArraySectionExpr::getBaseOriginalType(Base); QualType ResultTy; if (OriginalTy->isAnyPointerType()) { ResultTy = OriginalTy->getPointeeType(); } else if (OriginalTy->isArrayType()) { ResultTy = OriginalTy->getAsArrayTypeUnsafe()->getElementType(); } else { return ExprError( Diag(Base->getExprLoc(), diag::err_omp_typecheck_section_value) << Base->getSourceRange()); } // C99 6.5.2.1p1 if (LowerBound) { auto Res = PerformOpenMPImplicitIntegerConversion(LowerBound->getExprLoc(), LowerBound); if (Res.isInvalid()) return ExprError(Diag(LowerBound->getExprLoc(), diag::err_omp_typecheck_section_not_integer) << 0 << LowerBound->getSourceRange()); LowerBound = Res.get(); if (LowerBound->getType()->isSpecificBuiltinType(BuiltinType::Char_S) || LowerBound->getType()->isSpecificBuiltinType(BuiltinType::Char_U)) Diag(LowerBound->getExprLoc(), diag::warn_omp_section_is_char) << 0 << LowerBound->getSourceRange(); } if (Length) { auto Res = PerformOpenMPImplicitIntegerConversion(Length->getExprLoc(), Length); if (Res.isInvalid()) return ExprError(Diag(Length->getExprLoc(), diag::err_omp_typecheck_section_not_integer) << 1 << Length->getSourceRange()); Length = Res.get(); if (Length->getType()->isSpecificBuiltinType(BuiltinType::Char_S) || Length->getType()->isSpecificBuiltinType(BuiltinType::Char_U)) Diag(Length->getExprLoc(), diag::warn_omp_section_is_char) << 1 << Length->getSourceRange(); } // C99 6.5.2.1p1: "shall have type "pointer to *object* type". Similarly, // C++ [expr.sub]p1: The type "T" shall be a completely-defined object // type. Note that functions are not objects, and that (in C99 parlance) // incomplete types are not object types. if (ResultTy->isFunctionType()) { Diag(Base->getExprLoc(), diag::err_omp_section_function_type) << ResultTy << Base->getSourceRange(); return ExprError(); } if (RequireCompleteType(Base->getExprLoc(), ResultTy, diag::err_omp_section_incomplete_type, Base)) return ExprError(); if (LowerBound && !OriginalTy->isAnyPointerType()) { Expr::EvalResult Result; if (LowerBound->EvaluateAsInt(Result, Context)) { // OpenMP 4.5, [2.4 Array Sections] // The array section must be a subset of the original array. llvm::APSInt LowerBoundValue = Result.Val.getInt(); if (LowerBoundValue.isNegative()) { Diag(LowerBound->getExprLoc(), diag::err_omp_section_not_subset_of_array) << LowerBound->getSourceRange(); return ExprError(); } } } if (Length) { Expr::EvalResult Result; if (Length->EvaluateAsInt(Result, Context)) { // OpenMP 4.5, [2.4 Array Sections] // The length must evaluate to non-negative integers. llvm::APSInt LengthValue = Result.Val.getInt(); if (LengthValue.isNegative()) { Diag(Length->getExprLoc(), diag::err_omp_section_length_negative) << LengthValue.toString(/*Radix=*/10, /*Signed=*/true) << Length->getSourceRange(); return ExprError(); } } } else if (ColonLoc.isValid() && (OriginalTy.isNull() || (!OriginalTy->isConstantArrayType() && !OriginalTy->isVariableArrayType()))) { // OpenMP 4.5, [2.4 Array Sections] // When the size of the array dimension is not known, the length must be // specified explicitly. Diag(ColonLoc, diag::err_omp_section_length_undefined) << (!OriginalTy.isNull() && OriginalTy->isArrayType()); return ExprError(); } if (!Base->getType()->isSpecificPlaceholderType( BuiltinType::OMPArraySection)) { ExprResult Result = DefaultFunctionArrayLvalueConversion(Base); if (Result.isInvalid()) return ExprError(); Base = Result.get(); } return new (Context) OMPArraySectionExpr(Base, LowerBound, Length, Context.OMPArraySectionTy, VK_LValue, OK_Ordinary, ColonLoc, RBLoc); } ExprResult Sema::CreateBuiltinArraySubscriptExpr(Expr *Base, SourceLocation LLoc, Expr *Idx, SourceLocation RLoc) { Expr *LHSExp = Base; Expr *RHSExp = Idx; ExprValueKind VK = VK_LValue; ExprObjectKind OK = OK_Ordinary; // Per C++ core issue 1213, the result is an xvalue if either operand is // a non-lvalue array, and an lvalue otherwise. if (getLangOpts().CPlusPlus11) { for (auto *Op : {LHSExp, RHSExp}) { Op = Op->IgnoreImplicit(); if (Op->getType()->isArrayType() && !Op->isLValue()) VK = VK_XValue; } } // Perform default conversions. if (!LHSExp->getType()->getAs()) { ExprResult Result = DefaultFunctionArrayLvalueConversion(LHSExp); if (Result.isInvalid()) return ExprError(); LHSExp = Result.get(); } ExprResult Result = DefaultFunctionArrayLvalueConversion(RHSExp); if (Result.isInvalid()) return ExprError(); RHSExp = Result.get(); QualType LHSTy = LHSExp->getType(), RHSTy = RHSExp->getType(); // C99 6.5.2.1p2: the expression e1[e2] is by definition precisely equivalent // to the expression *((e1)+(e2)). This means the array "Base" may actually be // in the subscript position. As a result, we need to derive the array base // and index from the expression types. Expr *BaseExpr, *IndexExpr; QualType ResultType; if (LHSTy->isDependentType() || RHSTy->isDependentType()) { BaseExpr = LHSExp; IndexExpr = RHSExp; ResultType = Context.DependentTy; } else if (const PointerType *PTy = LHSTy->getAs()) { BaseExpr = LHSExp; IndexExpr = RHSExp; ResultType = PTy->getPointeeType(); } else if (const ObjCObjectPointerType *PTy = LHSTy->getAs()) { BaseExpr = LHSExp; IndexExpr = RHSExp; // Use custom logic if this should be the pseudo-object subscript // expression. if (!LangOpts.isSubscriptPointerArithmetic()) return BuildObjCSubscriptExpression(RLoc, BaseExpr, IndexExpr, nullptr, nullptr); ResultType = PTy->getPointeeType(); } else if (const PointerType *PTy = RHSTy->getAs()) { // Handle the uncommon case of "123[Ptr]". BaseExpr = RHSExp; IndexExpr = LHSExp; ResultType = PTy->getPointeeType(); } else if (const ObjCObjectPointerType *PTy = RHSTy->getAs()) { // Handle the uncommon case of "123[Ptr]". BaseExpr = RHSExp; IndexExpr = LHSExp; ResultType = PTy->getPointeeType(); if (!LangOpts.isSubscriptPointerArithmetic()) { Diag(LLoc, diag::err_subscript_nonfragile_interface) << ResultType << BaseExpr->getSourceRange(); return ExprError(); } } else if (const VectorType *VTy = LHSTy->getAs()) { BaseExpr = LHSExp; // vectors: V[123] IndexExpr = RHSExp; // We apply C++ DR1213 to vector subscripting too. if (getLangOpts().CPlusPlus11 && LHSExp->getValueKind() == VK_RValue) { ExprResult Materialized = TemporaryMaterializationConversion(LHSExp); if (Materialized.isInvalid()) return ExprError(); LHSExp = Materialized.get(); } VK = LHSExp->getValueKind(); if (VK != VK_RValue) OK = OK_VectorComponent; ResultType = VTy->getElementType(); QualType BaseType = BaseExpr->getType(); Qualifiers BaseQuals = BaseType.getQualifiers(); Qualifiers MemberQuals = ResultType.getQualifiers(); Qualifiers Combined = BaseQuals + MemberQuals; if (Combined != MemberQuals) ResultType = Context.getQualifiedType(ResultType, Combined); } else if (LHSTy->isArrayType()) { // If we see an array that wasn't promoted by // DefaultFunctionArrayLvalueConversion, it must be an array that // wasn't promoted because of the C90 rule that doesn't // allow promoting non-lvalue arrays. Warn, then // force the promotion here. Diag(LHSExp->getBeginLoc(), diag::ext_subscript_non_lvalue) << LHSExp->getSourceRange(); LHSExp = ImpCastExprToType(LHSExp, Context.getArrayDecayedType(LHSTy), CK_ArrayToPointerDecay).get(); LHSTy = LHSExp->getType(); BaseExpr = LHSExp; IndexExpr = RHSExp; ResultType = LHSTy->getAs()->getPointeeType(); } else if (RHSTy->isArrayType()) { // Same as previous, except for 123[f().a] case Diag(RHSExp->getBeginLoc(), diag::ext_subscript_non_lvalue) << RHSExp->getSourceRange(); RHSExp = ImpCastExprToType(RHSExp, Context.getArrayDecayedType(RHSTy), CK_ArrayToPointerDecay).get(); RHSTy = RHSExp->getType(); BaseExpr = RHSExp; IndexExpr = LHSExp; ResultType = RHSTy->getAs()->getPointeeType(); } else { return ExprError(Diag(LLoc, diag::err_typecheck_subscript_value) << LHSExp->getSourceRange() << RHSExp->getSourceRange()); } // C99 6.5.2.1p1 if (!IndexExpr->getType()->isIntegerType() && !IndexExpr->isTypeDependent()) return ExprError(Diag(LLoc, diag::err_typecheck_subscript_not_integer) << IndexExpr->getSourceRange()); if ((IndexExpr->getType()->isSpecificBuiltinType(BuiltinType::Char_S) || IndexExpr->getType()->isSpecificBuiltinType(BuiltinType::Char_U)) && !IndexExpr->isTypeDependent()) Diag(LLoc, diag::warn_subscript_is_char) << IndexExpr->getSourceRange(); // C99 6.5.2.1p1: "shall have type "pointer to *object* type". Similarly, // C++ [expr.sub]p1: The type "T" shall be a completely-defined object // type. Note that Functions are not objects, and that (in C99 parlance) // incomplete types are not object types. if (ResultType->isFunctionType()) { Diag(BaseExpr->getBeginLoc(), diag::err_subscript_function_type) << ResultType << BaseExpr->getSourceRange(); return ExprError(); } if (ResultType->isVoidType() && !getLangOpts().CPlusPlus) { // GNU extension: subscripting on pointer to void Diag(LLoc, diag::ext_gnu_subscript_void_type) << BaseExpr->getSourceRange(); // C forbids expressions of unqualified void type from being l-values. // See IsCForbiddenLValueType. if (!ResultType.hasQualifiers()) VK = VK_RValue; } else if (!ResultType->isDependentType() && RequireCompleteType(LLoc, ResultType, diag::err_subscript_incomplete_type, BaseExpr)) return ExprError(); assert(VK == VK_RValue || LangOpts.CPlusPlus || !ResultType.isCForbiddenLValueType()); return new (Context) ArraySubscriptExpr(LHSExp, RHSExp, ResultType, VK, OK, RLoc); } bool Sema::CheckCXXDefaultArgExpr(SourceLocation CallLoc, FunctionDecl *FD, ParmVarDecl *Param) { if (Param->hasUnparsedDefaultArg()) { Diag(CallLoc, diag::err_use_of_default_argument_to_function_declared_later) << FD << cast(FD->getDeclContext())->getDeclName(); Diag(UnparsedDefaultArgLocs[Param], diag::note_default_argument_declared_here); return true; } if (Param->hasUninstantiatedDefaultArg()) { Expr *UninstExpr = Param->getUninstantiatedDefaultArg(); EnterExpressionEvaluationContext EvalContext( *this, ExpressionEvaluationContext::PotentiallyEvaluated, Param); // Instantiate the expression. // // FIXME: Pass in a correct Pattern argument, otherwise // getTemplateInstantiationArgs uses the lexical context of FD, e.g. // // template // struct A { // static int FooImpl(); // // template // // bug: default argument A::FooImpl() is evaluated with 2-level // // template argument list [[T], [Tp]], should be [[Tp]]. // friend A Foo(int a); // }; // // template // A Foo(int a = A::FooImpl()); MultiLevelTemplateArgumentList MutiLevelArgList = getTemplateInstantiationArgs(FD, nullptr, /*RelativeToPrimary=*/true); InstantiatingTemplate Inst(*this, CallLoc, Param, MutiLevelArgList.getInnermost()); if (Inst.isInvalid()) return true; if (Inst.isAlreadyInstantiating()) { Diag(Param->getBeginLoc(), diag::err_recursive_default_argument) << FD; Param->setInvalidDecl(); return true; } ExprResult Result; { // C++ [dcl.fct.default]p5: // The names in the [default argument] expression are bound, and // the semantic constraints are checked, at the point where the // default argument expression appears. ContextRAII SavedContext(*this, FD); LocalInstantiationScope Local(*this); Result = SubstInitializer(UninstExpr, MutiLevelArgList, /*DirectInit*/false); } if (Result.isInvalid()) return true; // Check the expression as an initializer for the parameter. InitializedEntity Entity = InitializedEntity::InitializeParameter(Context, Param); InitializationKind Kind = InitializationKind::CreateCopy( Param->getLocation(), /*FIXME:EqualLoc*/ UninstExpr->getBeginLoc()); Expr *ResultE = Result.getAs(); InitializationSequence InitSeq(*this, Entity, Kind, ResultE); Result = InitSeq.Perform(*this, Entity, Kind, ResultE); if (Result.isInvalid()) return true; Result = ActOnFinishFullExpr(Result.getAs(), Param->getOuterLocStart()); if (Result.isInvalid()) return true; // Remember the instantiated default argument. Param->setDefaultArg(Result.getAs()); if (ASTMutationListener *L = getASTMutationListener()) { L->DefaultArgumentInstantiated(Param); } } // If the default argument expression is not set yet, we are building it now. if (!Param->hasInit()) { Diag(Param->getBeginLoc(), diag::err_recursive_default_argument) << FD; Param->setInvalidDecl(); return true; } // If the default expression creates temporaries, we need to // push them to the current stack of expression temporaries so they'll // be properly destroyed. // FIXME: We should really be rebuilding the default argument with new // bound temporaries; see the comment in PR5810. // We don't need to do that with block decls, though, because // blocks in default argument expression can never capture anything. if (auto Init = dyn_cast(Param->getInit())) { // Set the "needs cleanups" bit regardless of whether there are // any explicit objects. Cleanup.setExprNeedsCleanups(Init->cleanupsHaveSideEffects()); // Append all the objects to the cleanup list. Right now, this // should always be a no-op, because blocks in default argument // expressions should never be able to capture anything. assert(!Init->getNumObjects() && "default argument expression has capturing blocks?"); } // We already type-checked the argument, so we know it works. // Just mark all of the declarations in this potentially-evaluated expression // as being "referenced". MarkDeclarationsReferencedInExpr(Param->getDefaultArg(), /*SkipLocalVariables=*/true); return false; } ExprResult Sema::BuildCXXDefaultArgExpr(SourceLocation CallLoc, FunctionDecl *FD, ParmVarDecl *Param) { if (CheckCXXDefaultArgExpr(CallLoc, FD, Param)) return ExprError(); return CXXDefaultArgExpr::Create(Context, CallLoc, Param); } Sema::VariadicCallType Sema::getVariadicCallType(FunctionDecl *FDecl, const FunctionProtoType *Proto, Expr *Fn) { if (Proto && Proto->isVariadic()) { if (dyn_cast_or_null(FDecl)) return VariadicConstructor; else if (Fn && Fn->getType()->isBlockPointerType()) return VariadicBlock; else if (FDecl) { if (CXXMethodDecl *Method = dyn_cast_or_null(FDecl)) if (Method->isInstance()) return VariadicMethod; } else if (Fn && Fn->getType() == Context.BoundMemberTy) return VariadicMethod; return VariadicFunction; } return VariadicDoesNotApply; } namespace { class FunctionCallCCC : public FunctionCallFilterCCC { public: FunctionCallCCC(Sema &SemaRef, const IdentifierInfo *FuncName, unsigned NumArgs, MemberExpr *ME) : FunctionCallFilterCCC(SemaRef, NumArgs, false, ME), FunctionName(FuncName) {} bool ValidateCandidate(const TypoCorrection &candidate) override { if (!candidate.getCorrectionSpecifier() || candidate.getCorrectionAsIdentifierInfo() != FunctionName) { return false; } return FunctionCallFilterCCC::ValidateCandidate(candidate); } private: const IdentifierInfo *const FunctionName; }; } static TypoCorrection TryTypoCorrectionForCall(Sema &S, Expr *Fn, FunctionDecl *FDecl, ArrayRef Args) { MemberExpr *ME = dyn_cast(Fn); DeclarationName FuncName = FDecl->getDeclName(); SourceLocation NameLoc = ME ? ME->getMemberLoc() : Fn->getBeginLoc(); if (TypoCorrection Corrected = S.CorrectTypo( DeclarationNameInfo(FuncName, NameLoc), Sema::LookupOrdinaryName, S.getScopeForContext(S.CurContext), nullptr, llvm::make_unique(S, FuncName.getAsIdentifierInfo(), Args.size(), ME), Sema::CTK_ErrorRecovery)) { if (NamedDecl *ND = Corrected.getFoundDecl()) { if (Corrected.isOverloaded()) { OverloadCandidateSet OCS(NameLoc, OverloadCandidateSet::CSK_Normal); OverloadCandidateSet::iterator Best; for (NamedDecl *CD : Corrected) { if (FunctionDecl *FD = dyn_cast(CD)) S.AddOverloadCandidate(FD, DeclAccessPair::make(FD, AS_none), Args, OCS); } switch (OCS.BestViableFunction(S, NameLoc, Best)) { case OR_Success: ND = Best->FoundDecl; Corrected.setCorrectionDecl(ND); break; default: break; } } ND = ND->getUnderlyingDecl(); if (isa(ND) || isa(ND)) return Corrected; } } return TypoCorrection(); } /// ConvertArgumentsForCall - Converts the arguments specified in /// Args/NumArgs to the parameter types of the function FDecl with /// function prototype Proto. Call is the call expression itself, and /// Fn is the function expression. For a C++ member function, this /// routine does not attempt to convert the object argument. Returns /// true if the call is ill-formed. bool Sema::ConvertArgumentsForCall(CallExpr *Call, Expr *Fn, FunctionDecl *FDecl, const FunctionProtoType *Proto, ArrayRef Args, SourceLocation RParenLoc, bool IsExecConfig) { // Bail out early if calling a builtin with custom typechecking. if (FDecl) if (unsigned ID = FDecl->getBuiltinID()) if (Context.BuiltinInfo.hasCustomTypechecking(ID)) return false; // C99 6.5.2.2p7 - the arguments are implicitly converted, as if by // assignment, to the types of the corresponding parameter, ... unsigned NumParams = Proto->getNumParams(); bool Invalid = false; unsigned MinArgs = FDecl ? FDecl->getMinRequiredArguments() : NumParams; unsigned FnKind = Fn->getType()->isBlockPointerType() ? 1 /* block */ : (IsExecConfig ? 3 /* kernel function (exec config) */ : 0 /* function */); // If too few arguments are available (and we don't have default // arguments for the remaining parameters), don't make the call. if (Args.size() < NumParams) { if (Args.size() < MinArgs) { TypoCorrection TC; if (FDecl && (TC = TryTypoCorrectionForCall(*this, Fn, FDecl, Args))) { unsigned diag_id = MinArgs == NumParams && !Proto->isVariadic() ? diag::err_typecheck_call_too_few_args_suggest : diag::err_typecheck_call_too_few_args_at_least_suggest; diagnoseTypo(TC, PDiag(diag_id) << FnKind << MinArgs << static_cast(Args.size()) << TC.getCorrectionRange()); } else if (MinArgs == 1 && FDecl && FDecl->getParamDecl(0)->getDeclName()) Diag(RParenLoc, MinArgs == NumParams && !Proto->isVariadic() ? diag::err_typecheck_call_too_few_args_one : diag::err_typecheck_call_too_few_args_at_least_one) << FnKind << FDecl->getParamDecl(0) << Fn->getSourceRange(); else Diag(RParenLoc, MinArgs == NumParams && !Proto->isVariadic() ? diag::err_typecheck_call_too_few_args : diag::err_typecheck_call_too_few_args_at_least) << FnKind << MinArgs << static_cast(Args.size()) << Fn->getSourceRange(); // Emit the location of the prototype. if (!TC && FDecl && !FDecl->getBuiltinID() && !IsExecConfig) Diag(FDecl->getBeginLoc(), diag::note_callee_decl) << FDecl; return true; } // We reserve space for the default arguments when we create // the call expression, before calling ConvertArgumentsForCall. assert((Call->getNumArgs() == NumParams) && "We should have reserved space for the default arguments before!"); } // If too many are passed and not variadic, error on the extras and drop // them. if (Args.size() > NumParams) { if (!Proto->isVariadic()) { TypoCorrection TC; if (FDecl && (TC = TryTypoCorrectionForCall(*this, Fn, FDecl, Args))) { unsigned diag_id = MinArgs == NumParams && !Proto->isVariadic() ? diag::err_typecheck_call_too_many_args_suggest : diag::err_typecheck_call_too_many_args_at_most_suggest; diagnoseTypo(TC, PDiag(diag_id) << FnKind << NumParams << static_cast(Args.size()) << TC.getCorrectionRange()); } else if (NumParams == 1 && FDecl && FDecl->getParamDecl(0)->getDeclName()) Diag(Args[NumParams]->getBeginLoc(), MinArgs == NumParams ? diag::err_typecheck_call_too_many_args_one : diag::err_typecheck_call_too_many_args_at_most_one) << FnKind << FDecl->getParamDecl(0) << static_cast(Args.size()) << Fn->getSourceRange() << SourceRange(Args[NumParams]->getBeginLoc(), Args.back()->getEndLoc()); else Diag(Args[NumParams]->getBeginLoc(), MinArgs == NumParams ? diag::err_typecheck_call_too_many_args : diag::err_typecheck_call_too_many_args_at_most) << FnKind << NumParams << static_cast(Args.size()) << Fn->getSourceRange() << SourceRange(Args[NumParams]->getBeginLoc(), Args.back()->getEndLoc()); // Emit the location of the prototype. if (!TC && FDecl && !FDecl->getBuiltinID() && !IsExecConfig) Diag(FDecl->getBeginLoc(), diag::note_callee_decl) << FDecl; // This deletes the extra arguments. Call->shrinkNumArgs(NumParams); return true; } } SmallVector AllArgs; VariadicCallType CallType = getVariadicCallType(FDecl, Proto, Fn); Invalid = GatherArgumentsForCall(Call->getBeginLoc(), FDecl, Proto, 0, Args, AllArgs, CallType); if (Invalid) return true; unsigned TotalNumArgs = AllArgs.size(); for (unsigned i = 0; i < TotalNumArgs; ++i) Call->setArg(i, AllArgs[i]); return false; } bool Sema::GatherArgumentsForCall(SourceLocation CallLoc, FunctionDecl *FDecl, const FunctionProtoType *Proto, unsigned FirstParam, ArrayRef Args, SmallVectorImpl &AllArgs, VariadicCallType CallType, bool AllowExplicit, bool IsListInitialization) { unsigned NumParams = Proto->getNumParams(); bool Invalid = false; size_t ArgIx = 0; // Continue to check argument types (even if we have too few/many args). for (unsigned i = FirstParam; i < NumParams; i++) { QualType ProtoArgType = Proto->getParamType(i); Expr *Arg; ParmVarDecl *Param = FDecl ? FDecl->getParamDecl(i) : nullptr; if (ArgIx < Args.size()) { Arg = Args[ArgIx++]; if (RequireCompleteType(Arg->getBeginLoc(), ProtoArgType, diag::err_call_incomplete_argument, Arg)) return true; // Strip the unbridged-cast placeholder expression off, if applicable. bool CFAudited = false; if (Arg->getType() == Context.ARCUnbridgedCastTy && FDecl && FDecl->hasAttr() && (!Param || !Param->hasAttr())) Arg = stripARCUnbridgedCast(Arg); else if (getLangOpts().ObjCAutoRefCount && FDecl && FDecl->hasAttr() && (!Param || !Param->hasAttr())) CFAudited = true; if (Proto->getExtParameterInfo(i).isNoEscape()) if (auto *BE = dyn_cast(Arg->IgnoreParenNoopCasts(Context))) BE->getBlockDecl()->setDoesNotEscape(); InitializedEntity Entity = Param ? InitializedEntity::InitializeParameter(Context, Param, ProtoArgType) : InitializedEntity::InitializeParameter( Context, ProtoArgType, Proto->isParamConsumed(i)); // Remember that parameter belongs to a CF audited API. if (CFAudited) Entity.setParameterCFAudited(); ExprResult ArgE = PerformCopyInitialization( Entity, SourceLocation(), Arg, IsListInitialization, AllowExplicit); if (ArgE.isInvalid()) return true; Arg = ArgE.getAs(); } else { assert(Param && "can't use default arguments without a known callee"); ExprResult ArgExpr = BuildCXXDefaultArgExpr(CallLoc, FDecl, Param); if (ArgExpr.isInvalid()) return true; Arg = ArgExpr.getAs(); } // Check for array bounds violations for each argument to the call. This // check only triggers warnings when the argument isn't a more complex Expr // with its own checking, such as a BinaryOperator. CheckArrayAccess(Arg); // Check for violations of C99 static array rules (C99 6.7.5.3p7). CheckStaticArrayArgument(CallLoc, Param, Arg); AllArgs.push_back(Arg); } // If this is a variadic call, handle args passed through "...". if (CallType != VariadicDoesNotApply) { // Assume that extern "C" functions with variadic arguments that // return __unknown_anytype aren't *really* variadic. if (Proto->getReturnType() == Context.UnknownAnyTy && FDecl && FDecl->isExternC()) { for (Expr *A : Args.slice(ArgIx)) { QualType paramType; // ignored ExprResult arg = checkUnknownAnyArg(CallLoc, A, paramType); Invalid |= arg.isInvalid(); AllArgs.push_back(arg.get()); } // Otherwise do argument promotion, (C99 6.5.2.2p7). } else { for (Expr *A : Args.slice(ArgIx)) { ExprResult Arg = DefaultVariadicArgumentPromotion(A, CallType, FDecl); Invalid |= Arg.isInvalid(); AllArgs.push_back(Arg.get()); } } // Check for array bounds violations. for (Expr *A : Args.slice(ArgIx)) CheckArrayAccess(A); } return Invalid; } static void DiagnoseCalleeStaticArrayParam(Sema &S, ParmVarDecl *PVD) { TypeLoc TL = PVD->getTypeSourceInfo()->getTypeLoc(); if (DecayedTypeLoc DTL = TL.getAs()) TL = DTL.getOriginalLoc(); if (ArrayTypeLoc ATL = TL.getAs()) S.Diag(PVD->getLocation(), diag::note_callee_static_array) << ATL.getLocalSourceRange(); } /// CheckStaticArrayArgument - If the given argument corresponds to a static /// array parameter, check that it is non-null, and that if it is formed by /// array-to-pointer decay, the underlying array is sufficiently large. /// /// C99 6.7.5.3p7: If the keyword static also appears within the [ and ] of the /// array type derivation, then for each call to the function, the value of the /// corresponding actual argument shall provide access to the first element of /// an array with at least as many elements as specified by the size expression. void Sema::CheckStaticArrayArgument(SourceLocation CallLoc, ParmVarDecl *Param, const Expr *ArgExpr) { // Static array parameters are not supported in C++. if (!Param || getLangOpts().CPlusPlus) return; QualType OrigTy = Param->getOriginalType(); const ArrayType *AT = Context.getAsArrayType(OrigTy); if (!AT || AT->getSizeModifier() != ArrayType::Static) return; if (ArgExpr->isNullPointerConstant(Context, Expr::NPC_NeverValueDependent)) { Diag(CallLoc, diag::warn_null_arg) << ArgExpr->getSourceRange(); DiagnoseCalleeStaticArrayParam(*this, Param); return; } const ConstantArrayType *CAT = dyn_cast(AT); if (!CAT) return; const ConstantArrayType *ArgCAT = Context.getAsConstantArrayType(ArgExpr->IgnoreParenImpCasts()->getType()); if (!ArgCAT) return; if (ArgCAT->getSize().ult(CAT->getSize())) { Diag(CallLoc, diag::warn_static_array_too_small) << ArgExpr->getSourceRange() << (unsigned) ArgCAT->getSize().getZExtValue() << (unsigned) CAT->getSize().getZExtValue(); DiagnoseCalleeStaticArrayParam(*this, Param); } } /// Given a function expression of unknown-any type, try to rebuild it /// to have a function type. static ExprResult rebuildUnknownAnyFunction(Sema &S, Expr *fn); /// Is the given type a placeholder that we need to lower out /// immediately during argument processing? static bool isPlaceholderToRemoveAsArg(QualType type) { // Placeholders are never sugared. const BuiltinType *placeholder = dyn_cast(type); if (!placeholder) return false; switch (placeholder->getKind()) { // Ignore all the non-placeholder types. #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \ case BuiltinType::Id: #include "clang/Basic/OpenCLImageTypes.def" #define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \ case BuiltinType::Id: #include "clang/Basic/OpenCLExtensionTypes.def" #define PLACEHOLDER_TYPE(ID, SINGLETON_ID) #define BUILTIN_TYPE(ID, SINGLETON_ID) case BuiltinType::ID: #include "clang/AST/BuiltinTypes.def" return false; // We cannot lower out overload sets; they might validly be resolved // by the call machinery. case BuiltinType::Overload: return false; // Unbridged casts in ARC can be handled in some call positions and // should be left in place. case BuiltinType::ARCUnbridgedCast: return false; // Pseudo-objects should be converted as soon as possible. case BuiltinType::PseudoObject: return true; // The debugger mode could theoretically but currently does not try // to resolve unknown-typed arguments based on known parameter types. case BuiltinType::UnknownAny: return true; // These are always invalid as call arguments and should be reported. case BuiltinType::BoundMember: case BuiltinType::BuiltinFn: case BuiltinType::OMPArraySection: return true; } llvm_unreachable("bad builtin type kind"); } /// Check an argument list for placeholders that we won't try to /// handle later. static bool checkArgsForPlaceholders(Sema &S, MultiExprArg args) { // Apply this processing to all the arguments at once instead of // dying at the first failure. bool hasInvalid = false; for (size_t i = 0, e = args.size(); i != e; i++) { if (isPlaceholderToRemoveAsArg(args[i]->getType())) { ExprResult result = S.CheckPlaceholderExpr(args[i]); if (result.isInvalid()) hasInvalid = true; else args[i] = result.get(); } else if (hasInvalid) { (void)S.CorrectDelayedTyposInExpr(args[i]); } } return hasInvalid; } /// If a builtin function has a pointer argument with no explicit address /// space, then it should be able to accept a pointer to any address /// space as input. In order to do this, we need to replace the /// standard builtin declaration with one that uses the same address space /// as the call. /// /// \returns nullptr If this builtin is not a candidate for a rewrite i.e. /// it does not contain any pointer arguments without /// an address space qualifer. Otherwise the rewritten /// FunctionDecl is returned. /// TODO: Handle pointer return types. static FunctionDecl *rewriteBuiltinFunctionDecl(Sema *Sema, ASTContext &Context, const FunctionDecl *FDecl, MultiExprArg ArgExprs) { QualType DeclType = FDecl->getType(); const FunctionProtoType *FT = dyn_cast(DeclType); if (!Context.BuiltinInfo.hasPtrArgsOrResult(FDecl->getBuiltinID()) || !FT || FT->isVariadic() || ArgExprs.size() != FT->getNumParams()) return nullptr; bool NeedsNewDecl = false; unsigned i = 0; SmallVector OverloadParams; for (QualType ParamType : FT->param_types()) { // Convert array arguments to pointer to simplify type lookup. ExprResult ArgRes = Sema->DefaultFunctionArrayLvalueConversion(ArgExprs[i++]); if (ArgRes.isInvalid()) return nullptr; Expr *Arg = ArgRes.get(); QualType ArgType = Arg->getType(); if (!ParamType->isPointerType() || ParamType.getQualifiers().hasAddressSpace() || !ArgType->isPointerType() || !ArgType->getPointeeType().getQualifiers().hasAddressSpace()) { OverloadParams.push_back(ParamType); continue; } QualType PointeeType = ParamType->getPointeeType(); if (PointeeType.getQualifiers().hasAddressSpace()) continue; NeedsNewDecl = true; LangAS AS = ArgType->getPointeeType().getAddressSpace(); PointeeType = Context.getAddrSpaceQualType(PointeeType, AS); OverloadParams.push_back(Context.getPointerType(PointeeType)); } if (!NeedsNewDecl) return nullptr; FunctionProtoType::ExtProtoInfo EPI; QualType OverloadTy = Context.getFunctionType(FT->getReturnType(), OverloadParams, EPI); DeclContext *Parent = Context.getTranslationUnitDecl(); FunctionDecl *OverloadDecl = FunctionDecl::Create(Context, Parent, FDecl->getLocation(), FDecl->getLocation(), FDecl->getIdentifier(), OverloadTy, /*TInfo=*/nullptr, SC_Extern, false, /*hasPrototype=*/true); SmallVector Params; FT = cast(OverloadTy); for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) { QualType ParamType = FT->getParamType(i); ParmVarDecl *Parm = ParmVarDecl::Create(Context, OverloadDecl, SourceLocation(), SourceLocation(), nullptr, ParamType, /*TInfo=*/nullptr, SC_None, nullptr); Parm->setScopeInfo(0, i); Params.push_back(Parm); } OverloadDecl->setParams(Params); return OverloadDecl; } static void checkDirectCallValidity(Sema &S, const Expr *Fn, FunctionDecl *Callee, MultiExprArg ArgExprs) { // `Callee` (when called with ArgExprs) may be ill-formed. enable_if (and // similar attributes) really don't like it when functions are called with an // invalid number of args. if (S.TooManyArguments(Callee->getNumParams(), ArgExprs.size(), /*PartialOverloading=*/false) && !Callee->isVariadic()) return; if (Callee->getMinRequiredArguments() > ArgExprs.size()) return; if (const EnableIfAttr *Attr = S.CheckEnableIf(Callee, ArgExprs, true)) { S.Diag(Fn->getBeginLoc(), isa(Callee) ? diag::err_ovl_no_viable_member_function_in_call : diag::err_ovl_no_viable_function_in_call) << Callee << Callee->getSourceRange(); S.Diag(Callee->getLocation(), diag::note_ovl_candidate_disabled_by_function_cond_attr) << Attr->getCond()->getSourceRange() << Attr->getMessage(); return; } } static bool enclosingClassIsRelatedToClassInWhichMembersWereFound( const UnresolvedMemberExpr *const UME, Sema &S) { const auto GetFunctionLevelDCIfCXXClass = [](Sema &S) -> const CXXRecordDecl * { const DeclContext *const DC = S.getFunctionLevelDeclContext(); if (!DC || !DC->getParent()) return nullptr; // If the call to some member function was made from within a member // function body 'M' return return 'M's parent. if (const auto *MD = dyn_cast(DC)) return MD->getParent()->getCanonicalDecl(); // else the call was made from within a default member initializer of a // class, so return the class. if (const auto *RD = dyn_cast(DC)) return RD->getCanonicalDecl(); return nullptr; }; // If our DeclContext is neither a member function nor a class (in the // case of a lambda in a default member initializer), we can't have an // enclosing 'this'. const CXXRecordDecl *const CurParentClass = GetFunctionLevelDCIfCXXClass(S); if (!CurParentClass) return false; // The naming class for implicit member functions call is the class in which // name lookup starts. const CXXRecordDecl *const NamingClass = UME->getNamingClass()->getCanonicalDecl(); assert(NamingClass && "Must have naming class even for implicit access"); // If the unresolved member functions were found in a 'naming class' that is // related (either the same or derived from) to the class that contains the // member function that itself contained the implicit member access. return CurParentClass == NamingClass || CurParentClass->isDerivedFrom(NamingClass); } static void tryImplicitlyCaptureThisIfImplicitMemberFunctionAccessWithDependentArgs( Sema &S, const UnresolvedMemberExpr *const UME, SourceLocation CallLoc) { if (!UME) return; LambdaScopeInfo *const CurLSI = S.getCurLambda(); // Only try and implicitly capture 'this' within a C++ Lambda if it hasn't // already been captured, or if this is an implicit member function call (if // it isn't, an attempt to capture 'this' should already have been made). if (!CurLSI || CurLSI->ImpCaptureStyle == CurLSI->ImpCap_None || !UME->isImplicitAccess() || CurLSI->isCXXThisCaptured()) return; // Check if the naming class in which the unresolved members were found is // related (same as or is a base of) to the enclosing class. if (!enclosingClassIsRelatedToClassInWhichMembersWereFound(UME, S)) return; DeclContext *EnclosingFunctionCtx = S.CurContext->getParent()->getParent(); // If the enclosing function is not dependent, then this lambda is // capture ready, so if we can capture this, do so. if (!EnclosingFunctionCtx->isDependentContext()) { // If the current lambda and all enclosing lambdas can capture 'this' - // then go ahead and capture 'this' (since our unresolved overload set // contains at least one non-static member function). if (!S.CheckCXXThisCapture(CallLoc, /*Explcit*/ false, /*Diagnose*/ false)) S.CheckCXXThisCapture(CallLoc); } else if (S.CurContext->isDependentContext()) { // ... since this is an implicit member reference, that might potentially // involve a 'this' capture, mark 'this' for potential capture in // enclosing lambdas. if (CurLSI->ImpCaptureStyle != CurLSI->ImpCap_None) CurLSI->addPotentialThisCapture(CallLoc); } } /// ActOnCallExpr - Handle a call to Fn with the specified array of arguments. /// This provides the location of the left/right parens and a list of comma /// locations. ExprResult Sema::ActOnCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc, MultiExprArg ArgExprs, SourceLocation RParenLoc, Expr *ExecConfig, bool IsExecConfig) { // Since this might be a postfix expression, get rid of ParenListExprs. ExprResult Result = MaybeConvertParenListExprToParenExpr(Scope, Fn); if (Result.isInvalid()) return ExprError(); Fn = Result.get(); if (checkArgsForPlaceholders(*this, ArgExprs)) return ExprError(); if (getLangOpts().CPlusPlus) { // If this is a pseudo-destructor expression, build the call immediately. if (isa(Fn)) { if (!ArgExprs.empty()) { // Pseudo-destructor calls should not have any arguments. Diag(Fn->getBeginLoc(), diag::err_pseudo_dtor_call_with_args) << FixItHint::CreateRemoval( SourceRange(ArgExprs.front()->getBeginLoc(), ArgExprs.back()->getEndLoc())); } return CallExpr::Create(Context, Fn, /*Args=*/{}, Context.VoidTy, VK_RValue, RParenLoc); } if (Fn->getType() == Context.PseudoObjectTy) { ExprResult result = CheckPlaceholderExpr(Fn); if (result.isInvalid()) return ExprError(); Fn = result.get(); } // Determine whether this is a dependent call inside a C++ template, // in which case we won't do any semantic analysis now. if (Fn->isTypeDependent() || Expr::hasAnyTypeDependentArguments(ArgExprs)) { if (ExecConfig) { return CUDAKernelCallExpr::Create( Context, Fn, cast(ExecConfig), ArgExprs, Context.DependentTy, VK_RValue, RParenLoc); } else { tryImplicitlyCaptureThisIfImplicitMemberFunctionAccessWithDependentArgs( *this, dyn_cast(Fn->IgnoreParens()), Fn->getBeginLoc()); return CallExpr::Create(Context, Fn, ArgExprs, Context.DependentTy, VK_RValue, RParenLoc); } } // Determine whether this is a call to an object (C++ [over.call.object]). if (Fn->getType()->isRecordType()) return BuildCallToObjectOfClassType(Scope, Fn, LParenLoc, ArgExprs, RParenLoc); if (Fn->getType() == Context.UnknownAnyTy) { ExprResult result = rebuildUnknownAnyFunction(*this, Fn); if (result.isInvalid()) return ExprError(); Fn = result.get(); } if (Fn->getType() == Context.BoundMemberTy) { return BuildCallToMemberFunction(Scope, Fn, LParenLoc, ArgExprs, RParenLoc); } } // Check for overloaded calls. This can happen even in C due to extensions. if (Fn->getType() == Context.OverloadTy) { OverloadExpr::FindResult find = OverloadExpr::find(Fn); // We aren't supposed to apply this logic if there's an '&' involved. if (!find.HasFormOfMemberPointer) { if (Expr::hasAnyTypeDependentArguments(ArgExprs)) return CallExpr::Create(Context, Fn, ArgExprs, Context.DependentTy, VK_RValue, RParenLoc); OverloadExpr *ovl = find.Expression; if (UnresolvedLookupExpr *ULE = dyn_cast(ovl)) return BuildOverloadedCallExpr( Scope, Fn, ULE, LParenLoc, ArgExprs, RParenLoc, ExecConfig, /*AllowTypoCorrection=*/true, find.IsAddressOfOperand); return BuildCallToMemberFunction(Scope, Fn, LParenLoc, ArgExprs, RParenLoc); } } // If we're directly calling a function, get the appropriate declaration. if (Fn->getType() == Context.UnknownAnyTy) { ExprResult result = rebuildUnknownAnyFunction(*this, Fn); if (result.isInvalid()) return ExprError(); Fn = result.get(); } Expr *NakedFn = Fn->IgnoreParens(); bool CallingNDeclIndirectly = false; NamedDecl *NDecl = nullptr; if (UnaryOperator *UnOp = dyn_cast(NakedFn)) { if (UnOp->getOpcode() == UO_AddrOf) { CallingNDeclIndirectly = true; NakedFn = UnOp->getSubExpr()->IgnoreParens(); } } if (isa(NakedFn)) { NDecl = cast(NakedFn)->getDecl(); FunctionDecl *FDecl = dyn_cast(NDecl); if (FDecl && FDecl->getBuiltinID()) { // Rewrite the function decl for this builtin by replacing parameters // with no explicit address space with the address space of the arguments // in ArgExprs. if ((FDecl = rewriteBuiltinFunctionDecl(this, Context, FDecl, ArgExprs))) { NDecl = FDecl; Fn = DeclRefExpr::Create( Context, FDecl->getQualifierLoc(), SourceLocation(), FDecl, false, SourceLocation(), FDecl->getType(), Fn->getValueKind(), FDecl); } } } else if (isa(NakedFn)) NDecl = cast(NakedFn)->getMemberDecl(); if (FunctionDecl *FD = dyn_cast_or_null(NDecl)) { if (CallingNDeclIndirectly && !checkAddressOfFunctionIsAvailable( FD, /*Complain=*/true, Fn->getBeginLoc())) return ExprError(); if (getLangOpts().OpenCL && checkOpenCLDisabledDecl(*FD, *Fn)) return ExprError(); checkDirectCallValidity(*this, Fn, FD, ArgExprs); } return BuildResolvedCallExpr(Fn, NDecl, LParenLoc, ArgExprs, RParenLoc, ExecConfig, IsExecConfig); } /// ActOnAsTypeExpr - create a new asType (bitcast) from the arguments. /// /// __builtin_astype( value, dst type ) /// ExprResult Sema::ActOnAsTypeExpr(Expr *E, ParsedType ParsedDestTy, SourceLocation BuiltinLoc, SourceLocation RParenLoc) { ExprValueKind VK = VK_RValue; ExprObjectKind OK = OK_Ordinary; QualType DstTy = GetTypeFromParser(ParsedDestTy); QualType SrcTy = E->getType(); if (Context.getTypeSize(DstTy) != Context.getTypeSize(SrcTy)) return ExprError(Diag(BuiltinLoc, diag::err_invalid_astype_of_different_size) << DstTy << SrcTy << E->getSourceRange()); return new (Context) AsTypeExpr(E, DstTy, VK, OK, BuiltinLoc, RParenLoc); } /// ActOnConvertVectorExpr - create a new convert-vector expression from the /// provided arguments. /// /// __builtin_convertvector( value, dst type ) /// ExprResult Sema::ActOnConvertVectorExpr(Expr *E, ParsedType ParsedDestTy, SourceLocation BuiltinLoc, SourceLocation RParenLoc) { TypeSourceInfo *TInfo; GetTypeFromParser(ParsedDestTy, &TInfo); return SemaConvertVectorExpr(E, TInfo, BuiltinLoc, RParenLoc); } /// BuildResolvedCallExpr - Build a call to a resolved expression, /// i.e. an expression not of \p OverloadTy. The expression should /// unary-convert to an expression of function-pointer or /// block-pointer type. /// /// \param NDecl the declaration being called, if available ExprResult Sema::BuildResolvedCallExpr(Expr *Fn, NamedDecl *NDecl, SourceLocation LParenLoc, ArrayRef Args, SourceLocation RParenLoc, Expr *Config, bool IsExecConfig, ADLCallKind UsesADL) { FunctionDecl *FDecl = dyn_cast_or_null(NDecl); unsigned BuiltinID = (FDecl ? FDecl->getBuiltinID() : 0); // Functions with 'interrupt' attribute cannot be called directly. if (FDecl && FDecl->hasAttr()) { Diag(Fn->getExprLoc(), diag::err_anyx86_interrupt_called); return ExprError(); } // Interrupt handlers don't save off the VFP regs automatically on ARM, // so there's some risk when calling out to non-interrupt handler functions // that the callee might not preserve them. This is easy to diagnose here, // but can be very challenging to debug. if (auto *Caller = getCurFunctionDecl()) if (Caller->hasAttr()) { bool VFP = Context.getTargetInfo().hasFeature("vfp"); if (VFP && (!FDecl || !FDecl->hasAttr())) Diag(Fn->getExprLoc(), diag::warn_arm_interrupt_calling_convention); } // Promote the function operand. // We special-case function promotion here because we only allow promoting // builtin functions to function pointers in the callee of a call. ExprResult Result; QualType ResultTy; if (BuiltinID && Fn->getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn)) { // Extract the return type from the (builtin) function pointer type. // FIXME Several builtins still have setType in // Sema::CheckBuiltinFunctionCall. One should review their definitions in // Builtins.def to ensure they are correct before removing setType calls. QualType FnPtrTy = Context.getPointerType(FDecl->getType()); Result = ImpCastExprToType(Fn, FnPtrTy, CK_BuiltinFnToFnPtr).get(); ResultTy = FDecl->getCallResultType(); } else { Result = CallExprUnaryConversions(Fn); ResultTy = Context.BoolTy; } if (Result.isInvalid()) return ExprError(); Fn = Result.get(); // Check for a valid function type, but only if it is not a builtin which // requires custom type checking. These will be handled by // CheckBuiltinFunctionCall below just after creation of the call expression. const FunctionType *FuncT = nullptr; if (!BuiltinID || !Context.BuiltinInfo.hasCustomTypechecking(BuiltinID)) { retry: if (const PointerType *PT = Fn->getType()->getAs()) { // C99 6.5.2.2p1 - "The expression that denotes the called function shall // have type pointer to function". FuncT = PT->getPointeeType()->getAs(); if (!FuncT) return ExprError(Diag(LParenLoc, diag::err_typecheck_call_not_function) << Fn->getType() << Fn->getSourceRange()); } else if (const BlockPointerType *BPT = Fn->getType()->getAs()) { FuncT = BPT->getPointeeType()->castAs(); } else { // Handle calls to expressions of unknown-any type. if (Fn->getType() == Context.UnknownAnyTy) { ExprResult rewrite = rebuildUnknownAnyFunction(*this, Fn); if (rewrite.isInvalid()) return ExprError(); Fn = rewrite.get(); goto retry; } return ExprError(Diag(LParenLoc, diag::err_typecheck_call_not_function) << Fn->getType() << Fn->getSourceRange()); } } // Get the number of parameters in the function prototype, if any. // We will allocate space for max(Args.size(), NumParams) arguments // in the call expression. const auto *Proto = dyn_cast_or_null(FuncT); unsigned NumParams = Proto ? Proto->getNumParams() : 0; CallExpr *TheCall; if (Config) { assert(UsesADL == ADLCallKind::NotADL && "CUDAKernelCallExpr should not use ADL"); TheCall = CUDAKernelCallExpr::Create(Context, Fn, cast(Config), Args, ResultTy, VK_RValue, RParenLoc, NumParams); } else { TheCall = CallExpr::Create(Context, Fn, Args, ResultTy, VK_RValue, RParenLoc, NumParams, UsesADL); } if (!getLangOpts().CPlusPlus) { + // Forget about the nulled arguments since typo correction + // do not handle them well. + TheCall->shrinkNumArgs(Args.size()); // C cannot always handle TypoExpr nodes in builtin calls and direct // function calls as their argument checking don't necessarily handle // dependent types properly, so make sure any TypoExprs have been // dealt with. ExprResult Result = CorrectDelayedTyposInExpr(TheCall); if (!Result.isUsable()) return ExprError(); + CallExpr *TheOldCall = TheCall; TheCall = dyn_cast(Result.get()); + bool CorrectedTypos = TheCall != TheOldCall; if (!TheCall) return Result; - // TheCall at this point has max(Args.size(), NumParams) arguments, - // with extra arguments nulled. We don't want to introduce nulled - // arguments in Args and so we only take the first Args.size() arguments. - Args = llvm::makeArrayRef(TheCall->getArgs(), Args.size()); + Args = llvm::makeArrayRef(TheCall->getArgs(), TheCall->getNumArgs()); + + // A new call expression node was created if some typos were corrected. + // However it may not have been constructed with enough storage. In this + // case, rebuild the node with enough storage. The waste of space is + // immaterial since this only happens when some typos were corrected. + if (CorrectedTypos && Args.size() < NumParams) { + if (Config) + TheCall = CUDAKernelCallExpr::Create( + Context, Fn, cast(Config), Args, ResultTy, VK_RValue, + RParenLoc, NumParams); + else + TheCall = CallExpr::Create(Context, Fn, Args, ResultTy, VK_RValue, + RParenLoc, NumParams, UsesADL); + } + // We can now handle the nulled arguments for the default arguments. + TheCall->setNumArgsUnsafe(std::max(Args.size(), NumParams)); } // Bail out early if calling a builtin with custom type checking. if (BuiltinID && Context.BuiltinInfo.hasCustomTypechecking(BuiltinID)) return CheckBuiltinFunctionCall(FDecl, BuiltinID, TheCall); if (getLangOpts().CUDA) { if (Config) { // CUDA: Kernel calls must be to global functions if (FDecl && !FDecl->hasAttr()) return ExprError(Diag(LParenLoc,diag::err_kern_call_not_global_function) << FDecl << Fn->getSourceRange()); // CUDA: Kernel function must have 'void' return type if (!FuncT->getReturnType()->isVoidType()) return ExprError(Diag(LParenLoc, diag::err_kern_type_not_void_return) << Fn->getType() << Fn->getSourceRange()); } else { // CUDA: Calls to global functions must be configured if (FDecl && FDecl->hasAttr()) return ExprError(Diag(LParenLoc, diag::err_global_call_not_config) << FDecl << Fn->getSourceRange()); } } // Check for a valid return type if (CheckCallReturnType(FuncT->getReturnType(), Fn->getBeginLoc(), TheCall, FDecl)) return ExprError(); // We know the result type of the call, set it. TheCall->setType(FuncT->getCallResultType(Context)); TheCall->setValueKind(Expr::getValueKindForType(FuncT->getReturnType())); if (Proto) { if (ConvertArgumentsForCall(TheCall, Fn, FDecl, Proto, Args, RParenLoc, IsExecConfig)) return ExprError(); } else { assert(isa(FuncT) && "Unknown FunctionType!"); if (FDecl) { // Check if we have too few/too many template arguments, based // on our knowledge of the function definition. const FunctionDecl *Def = nullptr; if (FDecl->hasBody(Def) && Args.size() != Def->param_size()) { Proto = Def->getType()->getAs(); if (!Proto || !(Proto->isVariadic() && Args.size() >= Def->param_size())) Diag(RParenLoc, diag::warn_call_wrong_number_of_arguments) << (Args.size() > Def->param_size()) << FDecl << Fn->getSourceRange(); } // If the function we're calling isn't a function prototype, but we have // a function prototype from a prior declaratiom, use that prototype. if (!FDecl->hasPrototype()) Proto = FDecl->getType()->getAs(); } // Promote the arguments (C99 6.5.2.2p6). for (unsigned i = 0, e = Args.size(); i != e; i++) { Expr *Arg = Args[i]; if (Proto && i < Proto->getNumParams()) { InitializedEntity Entity = InitializedEntity::InitializeParameter( Context, Proto->getParamType(i), Proto->isParamConsumed(i)); ExprResult ArgE = PerformCopyInitialization(Entity, SourceLocation(), Arg); if (ArgE.isInvalid()) return true; Arg = ArgE.getAs(); } else { ExprResult ArgE = DefaultArgumentPromotion(Arg); if (ArgE.isInvalid()) return true; Arg = ArgE.getAs(); } if (RequireCompleteType(Arg->getBeginLoc(), Arg->getType(), diag::err_call_incomplete_argument, Arg)) return ExprError(); TheCall->setArg(i, Arg); } } if (CXXMethodDecl *Method = dyn_cast_or_null(FDecl)) if (!Method->isStatic()) return ExprError(Diag(LParenLoc, diag::err_member_call_without_object) << Fn->getSourceRange()); // Check for sentinels if (NDecl) DiagnoseSentinelCalls(NDecl, LParenLoc, Args); // Do special checking on direct calls to functions. if (FDecl) { if (CheckFunctionCall(FDecl, TheCall, Proto)) return ExprError(); if (BuiltinID) return CheckBuiltinFunctionCall(FDecl, BuiltinID, TheCall); } else if (NDecl) { if (CheckPointerCall(NDecl, TheCall, Proto)) return ExprError(); } else { if (CheckOtherCall(TheCall, Proto)) return ExprError(); } return MaybeBindToTemporary(TheCall); } ExprResult Sema::ActOnCompoundLiteral(SourceLocation LParenLoc, ParsedType Ty, SourceLocation RParenLoc, Expr *InitExpr) { assert(Ty && "ActOnCompoundLiteral(): missing type"); assert(InitExpr && "ActOnCompoundLiteral(): missing expression"); TypeSourceInfo *TInfo; QualType literalType = GetTypeFromParser(Ty, &TInfo); if (!TInfo) TInfo = Context.getTrivialTypeSourceInfo(literalType); return BuildCompoundLiteralExpr(LParenLoc, TInfo, RParenLoc, InitExpr); } ExprResult Sema::BuildCompoundLiteralExpr(SourceLocation LParenLoc, TypeSourceInfo *TInfo, SourceLocation RParenLoc, Expr *LiteralExpr) { QualType literalType = TInfo->getType(); if (literalType->isArrayType()) { if (RequireCompleteType(LParenLoc, Context.getBaseElementType(literalType), diag::err_illegal_decl_array_incomplete_type, SourceRange(LParenLoc, LiteralExpr->getSourceRange().getEnd()))) return ExprError(); if (literalType->isVariableArrayType()) return ExprError(Diag(LParenLoc, diag::err_variable_object_no_init) << SourceRange(LParenLoc, LiteralExpr->getSourceRange().getEnd())); } else if (!literalType->isDependentType() && RequireCompleteType(LParenLoc, literalType, diag::err_typecheck_decl_incomplete_type, SourceRange(LParenLoc, LiteralExpr->getSourceRange().getEnd()))) return ExprError(); InitializedEntity Entity = InitializedEntity::InitializeCompoundLiteralInit(TInfo); InitializationKind Kind = InitializationKind::CreateCStyleCast(LParenLoc, SourceRange(LParenLoc, RParenLoc), /*InitList=*/true); InitializationSequence InitSeq(*this, Entity, Kind, LiteralExpr); ExprResult Result = InitSeq.Perform(*this, Entity, Kind, LiteralExpr, &literalType); if (Result.isInvalid()) return ExprError(); LiteralExpr = Result.get(); bool isFileScope = !CurContext->isFunctionOrMethod(); // In C, compound literals are l-values for some reason. // For GCC compatibility, in C++, file-scope array compound literals with // constant initializers are also l-values, and compound literals are // otherwise prvalues. // // (GCC also treats C++ list-initialized file-scope array prvalues with // constant initializers as l-values, but that's non-conforming, so we don't // follow it there.) // // FIXME: It would be better to handle the lvalue cases as materializing and // lifetime-extending a temporary object, but our materialized temporaries // representation only supports lifetime extension from a variable, not "out // of thin air". // FIXME: For C++, we might want to instead lifetime-extend only if a pointer // is bound to the result of applying array-to-pointer decay to the compound // literal. // FIXME: GCC supports compound literals of reference type, which should // obviously have a value kind derived from the kind of reference involved. ExprValueKind VK = (getLangOpts().CPlusPlus && !(isFileScope && literalType->isArrayType())) ? VK_RValue : VK_LValue; if (isFileScope) if (auto ILE = dyn_cast(LiteralExpr)) for (unsigned i = 0, j = ILE->getNumInits(); i != j; i++) { Expr *Init = ILE->getInit(i); ILE->setInit(i, ConstantExpr::Create(Context, Init)); } Expr *E = new (Context) CompoundLiteralExpr(LParenLoc, TInfo, literalType, VK, LiteralExpr, isFileScope); if (isFileScope) { if (!LiteralExpr->isTypeDependent() && !LiteralExpr->isValueDependent() && !literalType->isDependentType()) // C99 6.5.2.5p3 if (CheckForConstantInitializer(LiteralExpr, literalType)) return ExprError(); } else if (literalType.getAddressSpace() != LangAS::opencl_private && literalType.getAddressSpace() != LangAS::Default) { // Embedded-C extensions to C99 6.5.2.5: // "If the compound literal occurs inside the body of a function, the // type name shall not be qualified by an address-space qualifier." Diag(LParenLoc, diag::err_compound_literal_with_address_space) << SourceRange(LParenLoc, LiteralExpr->getSourceRange().getEnd()); return ExprError(); } return MaybeBindToTemporary(E); } ExprResult Sema::ActOnInitList(SourceLocation LBraceLoc, MultiExprArg InitArgList, SourceLocation RBraceLoc) { // Immediately handle non-overload placeholders. Overloads can be // resolved contextually, but everything else here can't. for (unsigned I = 0, E = InitArgList.size(); I != E; ++I) { if (InitArgList[I]->getType()->isNonOverloadPlaceholderType()) { ExprResult result = CheckPlaceholderExpr(InitArgList[I]); // Ignore failures; dropping the entire initializer list because // of one failure would be terrible for indexing/etc. if (result.isInvalid()) continue; InitArgList[I] = result.get(); } } // Semantic analysis for initializers is done by ActOnDeclarator() and // CheckInitializer() - it requires knowledge of the object being initialized. InitListExpr *E = new (Context) InitListExpr(Context, LBraceLoc, InitArgList, RBraceLoc); E->setType(Context.VoidTy); // FIXME: just a place holder for now. return E; } /// Do an explicit extend of the given block pointer if we're in ARC. void Sema::maybeExtendBlockObject(ExprResult &E) { assert(E.get()->getType()->isBlockPointerType()); assert(E.get()->isRValue()); // Only do this in an r-value context. if (!getLangOpts().ObjCAutoRefCount) return; E = ImplicitCastExpr::Create(Context, E.get()->getType(), CK_ARCExtendBlockObject, E.get(), /*base path*/ nullptr, VK_RValue); Cleanup.setExprNeedsCleanups(true); } /// Prepare a conversion of the given expression to an ObjC object /// pointer type. CastKind Sema::PrepareCastToObjCObjectPointer(ExprResult &E) { QualType type = E.get()->getType(); if (type->isObjCObjectPointerType()) { return CK_BitCast; } else if (type->isBlockPointerType()) { maybeExtendBlockObject(E); return CK_BlockPointerToObjCPointerCast; } else { assert(type->isPointerType()); return CK_CPointerToObjCPointerCast; } } /// Prepares for a scalar cast, performing all the necessary stages /// except the final cast and returning the kind required. CastKind Sema::PrepareScalarCast(ExprResult &Src, QualType DestTy) { // Both Src and Dest are scalar types, i.e. arithmetic or pointer. // Also, callers should have filtered out the invalid cases with // pointers. Everything else should be possible. QualType SrcTy = Src.get()->getType(); if (Context.hasSameUnqualifiedType(SrcTy, DestTy)) return CK_NoOp; switch (Type::ScalarTypeKind SrcKind = SrcTy->getScalarTypeKind()) { case Type::STK_MemberPointer: llvm_unreachable("member pointer type in C"); case Type::STK_CPointer: case Type::STK_BlockPointer: case Type::STK_ObjCObjectPointer: switch (DestTy->getScalarTypeKind()) { case Type::STK_CPointer: { LangAS SrcAS = SrcTy->getPointeeType().getAddressSpace(); LangAS DestAS = DestTy->getPointeeType().getAddressSpace(); if (SrcAS != DestAS) return CK_AddressSpaceConversion; if (Context.hasCvrSimilarType(SrcTy, DestTy)) return CK_NoOp; return CK_BitCast; } case Type::STK_BlockPointer: return (SrcKind == Type::STK_BlockPointer ? CK_BitCast : CK_AnyPointerToBlockPointerCast); case Type::STK_ObjCObjectPointer: if (SrcKind == Type::STK_ObjCObjectPointer) return CK_BitCast; if (SrcKind == Type::STK_CPointer) return CK_CPointerToObjCPointerCast; maybeExtendBlockObject(Src); return CK_BlockPointerToObjCPointerCast; case Type::STK_Bool: return CK_PointerToBoolean; case Type::STK_Integral: return CK_PointerToIntegral; case Type::STK_Floating: case Type::STK_FloatingComplex: case Type::STK_IntegralComplex: case Type::STK_MemberPointer: case Type::STK_FixedPoint: llvm_unreachable("illegal cast from pointer"); } llvm_unreachable("Should have returned before this"); case Type::STK_FixedPoint: switch (DestTy->getScalarTypeKind()) { case Type::STK_FixedPoint: return CK_FixedPointCast; case Type::STK_Bool: return CK_FixedPointToBoolean; case Type::STK_Integral: case Type::STK_Floating: case Type::STK_IntegralComplex: case Type::STK_FloatingComplex: Diag(Src.get()->getExprLoc(), diag::err_unimplemented_conversion_with_fixed_point_type) << DestTy; return CK_IntegralCast; case Type::STK_CPointer: case Type::STK_ObjCObjectPointer: case Type::STK_BlockPointer: case Type::STK_MemberPointer: llvm_unreachable("illegal cast to pointer type"); } llvm_unreachable("Should have returned before this"); case Type::STK_Bool: // casting from bool is like casting from an integer case Type::STK_Integral: switch (DestTy->getScalarTypeKind()) { case Type::STK_CPointer: case Type::STK_ObjCObjectPointer: case Type::STK_BlockPointer: if (Src.get()->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull)) return CK_NullToPointer; return CK_IntegralToPointer; case Type::STK_Bool: return CK_IntegralToBoolean; case Type::STK_Integral: return CK_IntegralCast; case Type::STK_Floating: return CK_IntegralToFloating; case Type::STK_IntegralComplex: Src = ImpCastExprToType(Src.get(), DestTy->castAs()->getElementType(), CK_IntegralCast); return CK_IntegralRealToComplex; case Type::STK_FloatingComplex: Src = ImpCastExprToType(Src.get(), DestTy->castAs()->getElementType(), CK_IntegralToFloating); return CK_FloatingRealToComplex; case Type::STK_MemberPointer: llvm_unreachable("member pointer type in C"); case Type::STK_FixedPoint: Diag(Src.get()->getExprLoc(), diag::err_unimplemented_conversion_with_fixed_point_type) << SrcTy; return CK_IntegralCast; } llvm_unreachable("Should have returned before this"); case Type::STK_Floating: switch (DestTy->getScalarTypeKind()) { case Type::STK_Floating: return CK_FloatingCast; case Type::STK_Bool: return CK_FloatingToBoolean; case Type::STK_Integral: return CK_FloatingToIntegral; case Type::STK_FloatingComplex: Src = ImpCastExprToType(Src.get(), DestTy->castAs()->getElementType(), CK_FloatingCast); return CK_FloatingRealToComplex; case Type::STK_IntegralComplex: Src = ImpCastExprToType(Src.get(), DestTy->castAs()->getElementType(), CK_FloatingToIntegral); return CK_IntegralRealToComplex; case Type::STK_CPointer: case Type::STK_ObjCObjectPointer: case Type::STK_BlockPointer: llvm_unreachable("valid float->pointer cast?"); case Type::STK_MemberPointer: llvm_unreachable("member pointer type in C"); case Type::STK_FixedPoint: Diag(Src.get()->getExprLoc(), diag::err_unimplemented_conversion_with_fixed_point_type) << SrcTy; return CK_IntegralCast; } llvm_unreachable("Should have returned before this"); case Type::STK_FloatingComplex: switch (DestTy->getScalarTypeKind()) { case Type::STK_FloatingComplex: return CK_FloatingComplexCast; case Type::STK_IntegralComplex: return CK_FloatingComplexToIntegralComplex; case Type::STK_Floating: { QualType ET = SrcTy->castAs()->getElementType(); if (Context.hasSameType(ET, DestTy)) return CK_FloatingComplexToReal; Src = ImpCastExprToType(Src.get(), ET, CK_FloatingComplexToReal); return CK_FloatingCast; } case Type::STK_Bool: return CK_FloatingComplexToBoolean; case Type::STK_Integral: Src = ImpCastExprToType(Src.get(), SrcTy->castAs()->getElementType(), CK_FloatingComplexToReal); return CK_FloatingToIntegral; case Type::STK_CPointer: case Type::STK_ObjCObjectPointer: case Type::STK_BlockPointer: llvm_unreachable("valid complex float->pointer cast?"); case Type::STK_MemberPointer: llvm_unreachable("member pointer type in C"); case Type::STK_FixedPoint: Diag(Src.get()->getExprLoc(), diag::err_unimplemented_conversion_with_fixed_point_type) << SrcTy; return CK_IntegralCast; } llvm_unreachable("Should have returned before this"); case Type::STK_IntegralComplex: switch (DestTy->getScalarTypeKind()) { case Type::STK_FloatingComplex: return CK_IntegralComplexToFloatingComplex; case Type::STK_IntegralComplex: return CK_IntegralComplexCast; case Type::STK_Integral: { QualType ET = SrcTy->castAs()->getElementType(); if (Context.hasSameType(ET, DestTy)) return CK_IntegralComplexToReal; Src = ImpCastExprToType(Src.get(), ET, CK_IntegralComplexToReal); return CK_IntegralCast; } case Type::STK_Bool: return CK_IntegralComplexToBoolean; case Type::STK_Floating: Src = ImpCastExprToType(Src.get(), SrcTy->castAs()->getElementType(), CK_IntegralComplexToReal); return CK_IntegralToFloating; case Type::STK_CPointer: case Type::STK_ObjCObjectPointer: case Type::STK_BlockPointer: llvm_unreachable("valid complex int->pointer cast?"); case Type::STK_MemberPointer: llvm_unreachable("member pointer type in C"); case Type::STK_FixedPoint: Diag(Src.get()->getExprLoc(), diag::err_unimplemented_conversion_with_fixed_point_type) << SrcTy; return CK_IntegralCast; } llvm_unreachable("Should have returned before this"); } llvm_unreachable("Unhandled scalar cast"); } static bool breakDownVectorType(QualType type, uint64_t &len, QualType &eltType) { // Vectors are simple. if (const VectorType *vecType = type->getAs()) { len = vecType->getNumElements(); eltType = vecType->getElementType(); assert(eltType->isScalarType()); return true; } // We allow lax conversion to and from non-vector types, but only if // they're real types (i.e. non-complex, non-pointer scalar types). if (!type->isRealType()) return false; len = 1; eltType = type; return true; } /// Are the two types lax-compatible vector types? That is, given /// that one of them is a vector, do they have equal storage sizes, /// where the storage size is the number of elements times the element /// size? /// /// This will also return false if either of the types is neither a /// vector nor a real type. bool Sema::areLaxCompatibleVectorTypes(QualType srcTy, QualType destTy) { assert(destTy->isVectorType() || srcTy->isVectorType()); // Disallow lax conversions between scalars and ExtVectors (these // conversions are allowed for other vector types because common headers // depend on them). Most scalar OP ExtVector cases are handled by the // splat path anyway, which does what we want (convert, not bitcast). // What this rules out for ExtVectors is crazy things like char4*float. if (srcTy->isScalarType() && destTy->isExtVectorType()) return false; if (destTy->isScalarType() && srcTy->isExtVectorType()) return false; uint64_t srcLen, destLen; QualType srcEltTy, destEltTy; if (!breakDownVectorType(srcTy, srcLen, srcEltTy)) return false; if (!breakDownVectorType(destTy, destLen, destEltTy)) return false; // ASTContext::getTypeSize will return the size rounded up to a // power of 2, so instead of using that, we need to use the raw // element size multiplied by the element count. uint64_t srcEltSize = Context.getTypeSize(srcEltTy); uint64_t destEltSize = Context.getTypeSize(destEltTy); return (srcLen * srcEltSize == destLen * destEltSize); } /// Is this a legal conversion between two types, one of which is /// known to be a vector type? bool Sema::isLaxVectorConversion(QualType srcTy, QualType destTy) { assert(destTy->isVectorType() || srcTy->isVectorType()); if (!Context.getLangOpts().LaxVectorConversions) return false; return areLaxCompatibleVectorTypes(srcTy, destTy); } bool Sema::CheckVectorCast(SourceRange R, QualType VectorTy, QualType Ty, CastKind &Kind) { assert(VectorTy->isVectorType() && "Not a vector type!"); if (Ty->isVectorType() || Ty->isIntegralType(Context)) { if (!areLaxCompatibleVectorTypes(Ty, VectorTy)) return Diag(R.getBegin(), Ty->isVectorType() ? diag::err_invalid_conversion_between_vectors : diag::err_invalid_conversion_between_vector_and_integer) << VectorTy << Ty << R; } else return Diag(R.getBegin(), diag::err_invalid_conversion_between_vector_and_scalar) << VectorTy << Ty << R; Kind = CK_BitCast; return false; } ExprResult Sema::prepareVectorSplat(QualType VectorTy, Expr *SplattedExpr) { QualType DestElemTy = VectorTy->castAs()->getElementType(); if (DestElemTy == SplattedExpr->getType()) return SplattedExpr; assert(DestElemTy->isFloatingType() || DestElemTy->isIntegralOrEnumerationType()); CastKind CK; if (VectorTy->isExtVectorType() && SplattedExpr->getType()->isBooleanType()) { // OpenCL requires that we convert `true` boolean expressions to -1, but // only when splatting vectors. if (DestElemTy->isFloatingType()) { // To avoid having to have a CK_BooleanToSignedFloating cast kind, we cast // in two steps: boolean to signed integral, then to floating. ExprResult CastExprRes = ImpCastExprToType(SplattedExpr, Context.IntTy, CK_BooleanToSignedIntegral); SplattedExpr = CastExprRes.get(); CK = CK_IntegralToFloating; } else { CK = CK_BooleanToSignedIntegral; } } else { ExprResult CastExprRes = SplattedExpr; CK = PrepareScalarCast(CastExprRes, DestElemTy); if (CastExprRes.isInvalid()) return ExprError(); SplattedExpr = CastExprRes.get(); } return ImpCastExprToType(SplattedExpr, DestElemTy, CK); } ExprResult Sema::CheckExtVectorCast(SourceRange R, QualType DestTy, Expr *CastExpr, CastKind &Kind) { assert(DestTy->isExtVectorType() && "Not an extended vector type!"); QualType SrcTy = CastExpr->getType(); // If SrcTy is a VectorType, the total size must match to explicitly cast to // an ExtVectorType. // In OpenCL, casts between vectors of different types are not allowed. // (See OpenCL 6.2). if (SrcTy->isVectorType()) { if (!areLaxCompatibleVectorTypes(SrcTy, DestTy) || (getLangOpts().OpenCL && !Context.hasSameUnqualifiedType(DestTy, SrcTy))) { Diag(R.getBegin(),diag::err_invalid_conversion_between_ext_vectors) << DestTy << SrcTy << R; return ExprError(); } Kind = CK_BitCast; return CastExpr; } // All non-pointer scalars can be cast to ExtVector type. The appropriate // conversion will take place first from scalar to elt type, and then // splat from elt type to vector. if (SrcTy->isPointerType()) return Diag(R.getBegin(), diag::err_invalid_conversion_between_vector_and_scalar) << DestTy << SrcTy << R; Kind = CK_VectorSplat; return prepareVectorSplat(DestTy, CastExpr); } ExprResult Sema::ActOnCastExpr(Scope *S, SourceLocation LParenLoc, Declarator &D, ParsedType &Ty, SourceLocation RParenLoc, Expr *CastExpr) { assert(!D.isInvalidType() && (CastExpr != nullptr) && "ActOnCastExpr(): missing type or expr"); TypeSourceInfo *castTInfo = GetTypeForDeclaratorCast(D, CastExpr->getType()); if (D.isInvalidType()) return ExprError(); if (getLangOpts().CPlusPlus) { // Check that there are no default arguments (C++ only). CheckExtraCXXDefaultArguments(D); } else { // Make sure any TypoExprs have been dealt with. ExprResult Res = CorrectDelayedTyposInExpr(CastExpr); if (!Res.isUsable()) return ExprError(); CastExpr = Res.get(); } checkUnusedDeclAttributes(D); QualType castType = castTInfo->getType(); Ty = CreateParsedType(castType, castTInfo); bool isVectorLiteral = false; // Check for an altivec or OpenCL literal, // i.e. all the elements are integer constants. ParenExpr *PE = dyn_cast(CastExpr); ParenListExpr *PLE = dyn_cast(CastExpr); if ((getLangOpts().AltiVec || getLangOpts().ZVector || getLangOpts().OpenCL) && castType->isVectorType() && (PE || PLE)) { if (PLE && PLE->getNumExprs() == 0) { Diag(PLE->getExprLoc(), diag::err_altivec_empty_initializer); return ExprError(); } if (PE || PLE->getNumExprs() == 1) { Expr *E = (PE ? PE->getSubExpr() : PLE->getExpr(0)); if (!E->getType()->isVectorType()) isVectorLiteral = true; } else isVectorLiteral = true; } // If this is a vector initializer, '(' type ')' '(' init, ..., init ')' // then handle it as such. if (isVectorLiteral) return BuildVectorLiteral(LParenLoc, RParenLoc, CastExpr, castTInfo); // If the Expr being casted is a ParenListExpr, handle it specially. // This is not an AltiVec-style cast, so turn the ParenListExpr into a // sequence of BinOp comma operators. if (isa(CastExpr)) { ExprResult Result = MaybeConvertParenListExprToParenExpr(S, CastExpr); if (Result.isInvalid()) return ExprError(); CastExpr = Result.get(); } if (getLangOpts().CPlusPlus && !castType->isVoidType() && !getSourceManager().isInSystemMacro(LParenLoc)) Diag(LParenLoc, diag::warn_old_style_cast) << CastExpr->getSourceRange(); CheckTollFreeBridgeCast(castType, CastExpr); CheckObjCBridgeRelatedCast(castType, CastExpr); DiscardMisalignedMemberAddress(castType.getTypePtr(), CastExpr); return BuildCStyleCastExpr(LParenLoc, castTInfo, RParenLoc, CastExpr); } ExprResult Sema::BuildVectorLiteral(SourceLocation LParenLoc, SourceLocation RParenLoc, Expr *E, TypeSourceInfo *TInfo) { assert((isa(E) || isa(E)) && "Expected paren or paren list expression"); Expr **exprs; unsigned numExprs; Expr *subExpr; SourceLocation LiteralLParenLoc, LiteralRParenLoc; if (ParenListExpr *PE = dyn_cast(E)) { LiteralLParenLoc = PE->getLParenLoc(); LiteralRParenLoc = PE->getRParenLoc(); exprs = PE->getExprs(); numExprs = PE->getNumExprs(); } else { // isa by assertion at function entrance LiteralLParenLoc = cast(E)->getLParen(); LiteralRParenLoc = cast(E)->getRParen(); subExpr = cast(E)->getSubExpr(); exprs = &subExpr; numExprs = 1; } QualType Ty = TInfo->getType(); assert(Ty->isVectorType() && "Expected vector type"); SmallVector initExprs; const VectorType *VTy = Ty->getAs(); unsigned numElems = Ty->getAs()->getNumElements(); // '(...)' form of vector initialization in AltiVec: the number of // initializers must be one or must match the size of the vector. // If a single value is specified in the initializer then it will be // replicated to all the components of the vector if (VTy->getVectorKind() == VectorType::AltiVecVector) { // The number of initializers must be one or must match the size of the // vector. If a single value is specified in the initializer then it will // be replicated to all the components of the vector if (numExprs == 1) { QualType ElemTy = Ty->getAs()->getElementType(); ExprResult Literal = DefaultLvalueConversion(exprs[0]); if (Literal.isInvalid()) return ExprError(); Literal = ImpCastExprToType(Literal.get(), ElemTy, PrepareScalarCast(Literal, ElemTy)); return BuildCStyleCastExpr(LParenLoc, TInfo, RParenLoc, Literal.get()); } else if (numExprs < numElems) { Diag(E->getExprLoc(), diag::err_incorrect_number_of_vector_initializers); return ExprError(); } else initExprs.append(exprs, exprs + numExprs); } else { // For OpenCL, when the number of initializers is a single value, // it will be replicated to all components of the vector. if (getLangOpts().OpenCL && VTy->getVectorKind() == VectorType::GenericVector && numExprs == 1) { QualType ElemTy = Ty->getAs()->getElementType(); ExprResult Literal = DefaultLvalueConversion(exprs[0]); if (Literal.isInvalid()) return ExprError(); Literal = ImpCastExprToType(Literal.get(), ElemTy, PrepareScalarCast(Literal, ElemTy)); return BuildCStyleCastExpr(LParenLoc, TInfo, RParenLoc, Literal.get()); } initExprs.append(exprs, exprs + numExprs); } // FIXME: This means that pretty-printing the final AST will produce curly // braces instead of the original commas. InitListExpr *initE = new (Context) InitListExpr(Context, LiteralLParenLoc, initExprs, LiteralRParenLoc); initE->setType(Ty); return BuildCompoundLiteralExpr(LParenLoc, TInfo, RParenLoc, initE); } /// This is not an AltiVec-style cast or or C++ direct-initialization, so turn /// the ParenListExpr into a sequence of comma binary operators. ExprResult Sema::MaybeConvertParenListExprToParenExpr(Scope *S, Expr *OrigExpr) { ParenListExpr *E = dyn_cast(OrigExpr); if (!E) return OrigExpr; ExprResult Result(E->getExpr(0)); for (unsigned i = 1, e = E->getNumExprs(); i != e && !Result.isInvalid(); ++i) Result = ActOnBinOp(S, E->getExprLoc(), tok::comma, Result.get(), E->getExpr(i)); if (Result.isInvalid()) return ExprError(); return ActOnParenExpr(E->getLParenLoc(), E->getRParenLoc(), Result.get()); } ExprResult Sema::ActOnParenListExpr(SourceLocation L, SourceLocation R, MultiExprArg Val) { return ParenListExpr::Create(Context, L, Val, R); } /// Emit a specialized diagnostic when one expression is a null pointer /// constant and the other is not a pointer. Returns true if a diagnostic is /// emitted. bool Sema::DiagnoseConditionalForNull(Expr *LHSExpr, Expr *RHSExpr, SourceLocation QuestionLoc) { Expr *NullExpr = LHSExpr; Expr *NonPointerExpr = RHSExpr; Expr::NullPointerConstantKind NullKind = NullExpr->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNotNull); if (NullKind == Expr::NPCK_NotNull) { NullExpr = RHSExpr; NonPointerExpr = LHSExpr; NullKind = NullExpr->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNotNull); } if (NullKind == Expr::NPCK_NotNull) return false; if (NullKind == Expr::NPCK_ZeroExpression) return false; if (NullKind == Expr::NPCK_ZeroLiteral) { // In this case, check to make sure that we got here from a "NULL" // string in the source code. NullExpr = NullExpr->IgnoreParenImpCasts(); SourceLocation loc = NullExpr->getExprLoc(); if (!findMacroSpelling(loc, "NULL")) return false; } int DiagType = (NullKind == Expr::NPCK_CXX11_nullptr); Diag(QuestionLoc, diag::err_typecheck_cond_incompatible_operands_null) << NonPointerExpr->getType() << DiagType << NonPointerExpr->getSourceRange(); return true; } /// Return false if the condition expression is valid, true otherwise. static bool checkCondition(Sema &S, Expr *Cond, SourceLocation QuestionLoc) { QualType CondTy = Cond->getType(); // OpenCL v1.1 s6.3.i says the condition cannot be a floating point type. if (S.getLangOpts().OpenCL && CondTy->isFloatingType()) { S.Diag(QuestionLoc, diag::err_typecheck_cond_expect_nonfloat) << CondTy << Cond->getSourceRange(); return true; } // C99 6.5.15p2 if (CondTy->isScalarType()) return false; S.Diag(QuestionLoc, diag::err_typecheck_cond_expect_scalar) << CondTy << Cond->getSourceRange(); return true; } /// Handle when one or both operands are void type. static QualType checkConditionalVoidType(Sema &S, ExprResult &LHS, ExprResult &RHS) { Expr *LHSExpr = LHS.get(); Expr *RHSExpr = RHS.get(); if (!LHSExpr->getType()->isVoidType()) S.Diag(RHSExpr->getBeginLoc(), diag::ext_typecheck_cond_one_void) << RHSExpr->getSourceRange(); if (!RHSExpr->getType()->isVoidType()) S.Diag(LHSExpr->getBeginLoc(), diag::ext_typecheck_cond_one_void) << LHSExpr->getSourceRange(); LHS = S.ImpCastExprToType(LHS.get(), S.Context.VoidTy, CK_ToVoid); RHS = S.ImpCastExprToType(RHS.get(), S.Context.VoidTy, CK_ToVoid); return S.Context.VoidTy; } /// Return false if the NullExpr can be promoted to PointerTy, /// true otherwise. static bool checkConditionalNullPointer(Sema &S, ExprResult &NullExpr, QualType PointerTy) { if ((!PointerTy->isAnyPointerType() && !PointerTy->isBlockPointerType()) || !NullExpr.get()->isNullPointerConstant(S.Context, Expr::NPC_ValueDependentIsNull)) return true; NullExpr = S.ImpCastExprToType(NullExpr.get(), PointerTy, CK_NullToPointer); return false; } /// Checks compatibility between two pointers and return the resulting /// type. static QualType checkConditionalPointerCompatibility(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc) { QualType LHSTy = LHS.get()->getType(); QualType RHSTy = RHS.get()->getType(); if (S.Context.hasSameType(LHSTy, RHSTy)) { // Two identical pointers types are always compatible. return LHSTy; } QualType lhptee, rhptee; // Get the pointee types. bool IsBlockPointer = false; if (const BlockPointerType *LHSBTy = LHSTy->getAs()) { lhptee = LHSBTy->getPointeeType(); rhptee = RHSTy->castAs()->getPointeeType(); IsBlockPointer = true; } else { lhptee = LHSTy->castAs()->getPointeeType(); rhptee = RHSTy->castAs()->getPointeeType(); } // C99 6.5.15p6: If both operands are pointers to compatible types or to // differently qualified versions of compatible types, the result type is // a pointer to an appropriately qualified version of the composite // type. // Only CVR-qualifiers exist in the standard, and the differently-qualified // clause doesn't make sense for our extensions. E.g. address space 2 should // be incompatible with address space 3: they may live on different devices or // anything. Qualifiers lhQual = lhptee.getQualifiers(); Qualifiers rhQual = rhptee.getQualifiers(); LangAS ResultAddrSpace = LangAS::Default; LangAS LAddrSpace = lhQual.getAddressSpace(); LangAS RAddrSpace = rhQual.getAddressSpace(); // OpenCL v1.1 s6.5 - Conversion between pointers to distinct address // spaces is disallowed. if (lhQual.isAddressSpaceSupersetOf(rhQual)) ResultAddrSpace = LAddrSpace; else if (rhQual.isAddressSpaceSupersetOf(lhQual)) ResultAddrSpace = RAddrSpace; else { S.Diag(Loc, diag::err_typecheck_op_on_nonoverlapping_address_space_pointers) << LHSTy << RHSTy << 2 << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } unsigned MergedCVRQual = lhQual.getCVRQualifiers() | rhQual.getCVRQualifiers(); auto LHSCastKind = CK_BitCast, RHSCastKind = CK_BitCast; lhQual.removeCVRQualifiers(); rhQual.removeCVRQualifiers(); // OpenCL v2.0 specification doesn't extend compatibility of type qualifiers // (C99 6.7.3) for address spaces. We assume that the check should behave in // the same manner as it's defined for CVR qualifiers, so for OpenCL two // qual types are compatible iff // * corresponded types are compatible // * CVR qualifiers are equal // * address spaces are equal // Thus for conditional operator we merge CVR and address space unqualified // pointees and if there is a composite type we return a pointer to it with // merged qualifiers. LHSCastKind = LAddrSpace == ResultAddrSpace ? CK_BitCast : CK_AddressSpaceConversion; RHSCastKind = RAddrSpace == ResultAddrSpace ? CK_BitCast : CK_AddressSpaceConversion; lhQual.removeAddressSpace(); rhQual.removeAddressSpace(); lhptee = S.Context.getQualifiedType(lhptee.getUnqualifiedType(), lhQual); rhptee = S.Context.getQualifiedType(rhptee.getUnqualifiedType(), rhQual); QualType CompositeTy = S.Context.mergeTypes(lhptee, rhptee); if (CompositeTy.isNull()) { // In this situation, we assume void* type. No especially good // reason, but this is what gcc does, and we do have to pick // to get a consistent AST. QualType incompatTy; incompatTy = S.Context.getPointerType( S.Context.getAddrSpaceQualType(S.Context.VoidTy, ResultAddrSpace)); LHS = S.ImpCastExprToType(LHS.get(), incompatTy, LHSCastKind); RHS = S.ImpCastExprToType(RHS.get(), incompatTy, RHSCastKind); // FIXME: For OpenCL the warning emission and cast to void* leaves a room // for casts between types with incompatible address space qualifiers. // For the following code the compiler produces casts between global and // local address spaces of the corresponded innermost pointees: // local int *global *a; // global int *global *b; // a = (0 ? a : b); // see C99 6.5.16.1.p1. S.Diag(Loc, diag::ext_typecheck_cond_incompatible_pointers) << LHSTy << RHSTy << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return incompatTy; } // The pointer types are compatible. // In case of OpenCL ResultTy should have the address space qualifier // which is a superset of address spaces of both the 2nd and the 3rd // operands of the conditional operator. QualType ResultTy = [&, ResultAddrSpace]() { if (S.getLangOpts().OpenCL) { Qualifiers CompositeQuals = CompositeTy.getQualifiers(); CompositeQuals.setAddressSpace(ResultAddrSpace); return S.Context .getQualifiedType(CompositeTy.getUnqualifiedType(), CompositeQuals) .withCVRQualifiers(MergedCVRQual); } return CompositeTy.withCVRQualifiers(MergedCVRQual); }(); if (IsBlockPointer) ResultTy = S.Context.getBlockPointerType(ResultTy); else ResultTy = S.Context.getPointerType(ResultTy); LHS = S.ImpCastExprToType(LHS.get(), ResultTy, LHSCastKind); RHS = S.ImpCastExprToType(RHS.get(), ResultTy, RHSCastKind); return ResultTy; } /// Return the resulting type when the operands are both block pointers. static QualType checkConditionalBlockPointerCompatibility(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc) { QualType LHSTy = LHS.get()->getType(); QualType RHSTy = RHS.get()->getType(); if (!LHSTy->isBlockPointerType() || !RHSTy->isBlockPointerType()) { if (LHSTy->isVoidPointerType() || RHSTy->isVoidPointerType()) { QualType destType = S.Context.getPointerType(S.Context.VoidTy); LHS = S.ImpCastExprToType(LHS.get(), destType, CK_BitCast); RHS = S.ImpCastExprToType(RHS.get(), destType, CK_BitCast); return destType; } S.Diag(Loc, diag::err_typecheck_cond_incompatible_operands) << LHSTy << RHSTy << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } // We have 2 block pointer types. return checkConditionalPointerCompatibility(S, LHS, RHS, Loc); } /// Return the resulting type when the operands are both pointers. static QualType checkConditionalObjectPointersCompatibility(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc) { // get the pointer types QualType LHSTy = LHS.get()->getType(); QualType RHSTy = RHS.get()->getType(); // get the "pointed to" types QualType lhptee = LHSTy->getAs()->getPointeeType(); QualType rhptee = RHSTy->getAs()->getPointeeType(); // ignore qualifiers on void (C99 6.5.15p3, clause 6) if (lhptee->isVoidType() && rhptee->isIncompleteOrObjectType()) { // Figure out necessary qualifiers (C99 6.5.15p6) QualType destPointee = S.Context.getQualifiedType(lhptee, rhptee.getQualifiers()); QualType destType = S.Context.getPointerType(destPointee); // Add qualifiers if necessary. LHS = S.ImpCastExprToType(LHS.get(), destType, CK_NoOp); // Promote to void*. RHS = S.ImpCastExprToType(RHS.get(), destType, CK_BitCast); return destType; } if (rhptee->isVoidType() && lhptee->isIncompleteOrObjectType()) { QualType destPointee = S.Context.getQualifiedType(rhptee, lhptee.getQualifiers()); QualType destType = S.Context.getPointerType(destPointee); // Add qualifiers if necessary. RHS = S.ImpCastExprToType(RHS.get(), destType, CK_NoOp); // Promote to void*. LHS = S.ImpCastExprToType(LHS.get(), destType, CK_BitCast); return destType; } return checkConditionalPointerCompatibility(S, LHS, RHS, Loc); } /// Return false if the first expression is not an integer and the second /// expression is not a pointer, true otherwise. static bool checkPointerIntegerMismatch(Sema &S, ExprResult &Int, Expr* PointerExpr, SourceLocation Loc, bool IsIntFirstExpr) { if (!PointerExpr->getType()->isPointerType() || !Int.get()->getType()->isIntegerType()) return false; Expr *Expr1 = IsIntFirstExpr ? Int.get() : PointerExpr; Expr *Expr2 = IsIntFirstExpr ? PointerExpr : Int.get(); S.Diag(Loc, diag::ext_typecheck_cond_pointer_integer_mismatch) << Expr1->getType() << Expr2->getType() << Expr1->getSourceRange() << Expr2->getSourceRange(); Int = S.ImpCastExprToType(Int.get(), PointerExpr->getType(), CK_IntegralToPointer); return true; } /// Simple conversion between integer and floating point types. /// /// Used when handling the OpenCL conditional operator where the /// condition is a vector while the other operands are scalar. /// /// OpenCL v1.1 s6.3.i and s6.11.6 together require that the scalar /// types are either integer or floating type. Between the two /// operands, the type with the higher rank is defined as the "result /// type". The other operand needs to be promoted to the same type. No /// other type promotion is allowed. We cannot use /// UsualArithmeticConversions() for this purpose, since it always /// promotes promotable types. static QualType OpenCLArithmeticConversions(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation QuestionLoc) { LHS = S.DefaultFunctionArrayLvalueConversion(LHS.get()); if (LHS.isInvalid()) return QualType(); RHS = S.DefaultFunctionArrayLvalueConversion(RHS.get()); if (RHS.isInvalid()) return QualType(); // For conversion purposes, we ignore any qualifiers. // For example, "const float" and "float" are equivalent. QualType LHSType = S.Context.getCanonicalType(LHS.get()->getType()).getUnqualifiedType(); QualType RHSType = S.Context.getCanonicalType(RHS.get()->getType()).getUnqualifiedType(); if (!LHSType->isIntegerType() && !LHSType->isRealFloatingType()) { S.Diag(QuestionLoc, diag::err_typecheck_cond_expect_int_float) << LHSType << LHS.get()->getSourceRange(); return QualType(); } if (!RHSType->isIntegerType() && !RHSType->isRealFloatingType()) { S.Diag(QuestionLoc, diag::err_typecheck_cond_expect_int_float) << RHSType << RHS.get()->getSourceRange(); return QualType(); } // If both types are identical, no conversion is needed. if (LHSType == RHSType) return LHSType; // Now handle "real" floating types (i.e. float, double, long double). if (LHSType->isRealFloatingType() || RHSType->isRealFloatingType()) return handleFloatConversion(S, LHS, RHS, LHSType, RHSType, /*IsCompAssign = */ false); // Finally, we have two differing integer types. return handleIntegerConversion (S, LHS, RHS, LHSType, RHSType, /*IsCompAssign = */ false); } /// Convert scalar operands to a vector that matches the /// condition in length. /// /// Used when handling the OpenCL conditional operator where the /// condition is a vector while the other operands are scalar. /// /// We first compute the "result type" for the scalar operands /// according to OpenCL v1.1 s6.3.i. Both operands are then converted /// into a vector of that type where the length matches the condition /// vector type. s6.11.6 requires that the element types of the result /// and the condition must have the same number of bits. static QualType OpenCLConvertScalarsToVectors(Sema &S, ExprResult &LHS, ExprResult &RHS, QualType CondTy, SourceLocation QuestionLoc) { QualType ResTy = OpenCLArithmeticConversions(S, LHS, RHS, QuestionLoc); if (ResTy.isNull()) return QualType(); const VectorType *CV = CondTy->getAs(); assert(CV); // Determine the vector result type unsigned NumElements = CV->getNumElements(); QualType VectorTy = S.Context.getExtVectorType(ResTy, NumElements); // Ensure that all types have the same number of bits if (S.Context.getTypeSize(CV->getElementType()) != S.Context.getTypeSize(ResTy)) { // Since VectorTy is created internally, it does not pretty print // with an OpenCL name. Instead, we just print a description. std::string EleTyName = ResTy.getUnqualifiedType().getAsString(); SmallString<64> Str; llvm::raw_svector_ostream OS(Str); OS << "(vector of " << NumElements << " '" << EleTyName << "' values)"; S.Diag(QuestionLoc, diag::err_conditional_vector_element_size) << CondTy << OS.str(); return QualType(); } // Convert operands to the vector result type LHS = S.ImpCastExprToType(LHS.get(), VectorTy, CK_VectorSplat); RHS = S.ImpCastExprToType(RHS.get(), VectorTy, CK_VectorSplat); return VectorTy; } /// Return false if this is a valid OpenCL condition vector static bool checkOpenCLConditionVector(Sema &S, Expr *Cond, SourceLocation QuestionLoc) { // OpenCL v1.1 s6.11.6 says the elements of the vector must be of // integral type. const VectorType *CondTy = Cond->getType()->getAs(); assert(CondTy); QualType EleTy = CondTy->getElementType(); if (EleTy->isIntegerType()) return false; S.Diag(QuestionLoc, diag::err_typecheck_cond_expect_nonfloat) << Cond->getType() << Cond->getSourceRange(); return true; } /// Return false if the vector condition type and the vector /// result type are compatible. /// /// OpenCL v1.1 s6.11.6 requires that both vector types have the same /// number of elements, and their element types have the same number /// of bits. static bool checkVectorResult(Sema &S, QualType CondTy, QualType VecResTy, SourceLocation QuestionLoc) { const VectorType *CV = CondTy->getAs(); const VectorType *RV = VecResTy->getAs(); assert(CV && RV); if (CV->getNumElements() != RV->getNumElements()) { S.Diag(QuestionLoc, diag::err_conditional_vector_size) << CondTy << VecResTy; return true; } QualType CVE = CV->getElementType(); QualType RVE = RV->getElementType(); if (S.Context.getTypeSize(CVE) != S.Context.getTypeSize(RVE)) { S.Diag(QuestionLoc, diag::err_conditional_vector_element_size) << CondTy << VecResTy; return true; } return false; } /// Return the resulting type for the conditional operator in /// OpenCL (aka "ternary selection operator", OpenCL v1.1 /// s6.3.i) when the condition is a vector type. static QualType OpenCLCheckVectorConditional(Sema &S, ExprResult &Cond, ExprResult &LHS, ExprResult &RHS, SourceLocation QuestionLoc) { Cond = S.DefaultFunctionArrayLvalueConversion(Cond.get()); if (Cond.isInvalid()) return QualType(); QualType CondTy = Cond.get()->getType(); if (checkOpenCLConditionVector(S, Cond.get(), QuestionLoc)) return QualType(); // If either operand is a vector then find the vector type of the // result as specified in OpenCL v1.1 s6.3.i. if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) { QualType VecResTy = S.CheckVectorOperands(LHS, RHS, QuestionLoc, /*isCompAssign*/false, /*AllowBothBool*/true, /*AllowBoolConversions*/false); if (VecResTy.isNull()) return QualType(); // The result type must match the condition type as specified in // OpenCL v1.1 s6.11.6. if (checkVectorResult(S, CondTy, VecResTy, QuestionLoc)) return QualType(); return VecResTy; } // Both operands are scalar. return OpenCLConvertScalarsToVectors(S, LHS, RHS, CondTy, QuestionLoc); } /// Return true if the Expr is block type static bool checkBlockType(Sema &S, const Expr *E) { if (const CallExpr *CE = dyn_cast(E)) { QualType Ty = CE->getCallee()->getType(); if (Ty->isBlockPointerType()) { S.Diag(E->getExprLoc(), diag::err_opencl_ternary_with_block); return true; } } return false; } /// Note that LHS is not null here, even if this is the gnu "x ?: y" extension. /// In that case, LHS = cond. /// C99 6.5.15 QualType Sema::CheckConditionalOperands(ExprResult &Cond, ExprResult &LHS, ExprResult &RHS, ExprValueKind &VK, ExprObjectKind &OK, SourceLocation QuestionLoc) { ExprResult LHSResult = CheckPlaceholderExpr(LHS.get()); if (!LHSResult.isUsable()) return QualType(); LHS = LHSResult; ExprResult RHSResult = CheckPlaceholderExpr(RHS.get()); if (!RHSResult.isUsable()) return QualType(); RHS = RHSResult; // C++ is sufficiently different to merit its own checker. if (getLangOpts().CPlusPlus) return CXXCheckConditionalOperands(Cond, LHS, RHS, VK, OK, QuestionLoc); VK = VK_RValue; OK = OK_Ordinary; // The OpenCL operator with a vector condition is sufficiently // different to merit its own checker. if (getLangOpts().OpenCL && Cond.get()->getType()->isVectorType()) return OpenCLCheckVectorConditional(*this, Cond, LHS, RHS, QuestionLoc); // First, check the condition. Cond = UsualUnaryConversions(Cond.get()); if (Cond.isInvalid()) return QualType(); if (checkCondition(*this, Cond.get(), QuestionLoc)) return QualType(); // Now check the two expressions. if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) return CheckVectorOperands(LHS, RHS, QuestionLoc, /*isCompAssign*/false, /*AllowBothBool*/true, /*AllowBoolConversions*/false); QualType ResTy = UsualArithmeticConversions(LHS, RHS); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); QualType LHSTy = LHS.get()->getType(); QualType RHSTy = RHS.get()->getType(); // Diagnose attempts to convert between __float128 and long double where // such conversions currently can't be handled. if (unsupportedTypeConversion(*this, LHSTy, RHSTy)) { Diag(QuestionLoc, diag::err_typecheck_cond_incompatible_operands) << LHSTy << RHSTy << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } // OpenCL v2.0 s6.12.5 - Blocks cannot be used as expressions of the ternary // selection operator (?:). if (getLangOpts().OpenCL && (checkBlockType(*this, LHS.get()) | checkBlockType(*this, RHS.get()))) { return QualType(); } // If both operands have arithmetic type, do the usual arithmetic conversions // to find a common type: C99 6.5.15p3,5. if (LHSTy->isArithmeticType() && RHSTy->isArithmeticType()) { LHS = ImpCastExprToType(LHS.get(), ResTy, PrepareScalarCast(LHS, ResTy)); RHS = ImpCastExprToType(RHS.get(), ResTy, PrepareScalarCast(RHS, ResTy)); return ResTy; } // If both operands are the same structure or union type, the result is that // type. if (const RecordType *LHSRT = LHSTy->getAs()) { // C99 6.5.15p3 if (const RecordType *RHSRT = RHSTy->getAs()) if (LHSRT->getDecl() == RHSRT->getDecl()) // "If both the operands have structure or union type, the result has // that type." This implies that CV qualifiers are dropped. return LHSTy.getUnqualifiedType(); // FIXME: Type of conditional expression must be complete in C mode. } // C99 6.5.15p5: "If both operands have void type, the result has void type." // The following || allows only one side to be void (a GCC-ism). if (LHSTy->isVoidType() || RHSTy->isVoidType()) { return checkConditionalVoidType(*this, LHS, RHS); } // C99 6.5.15p6 - "if one operand is a null pointer constant, the result has // the type of the other operand." if (!checkConditionalNullPointer(*this, RHS, LHSTy)) return LHSTy; if (!checkConditionalNullPointer(*this, LHS, RHSTy)) return RHSTy; // All objective-c pointer type analysis is done here. QualType compositeType = FindCompositeObjCPointerType(LHS, RHS, QuestionLoc); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); if (!compositeType.isNull()) return compositeType; // Handle block pointer types. if (LHSTy->isBlockPointerType() || RHSTy->isBlockPointerType()) return checkConditionalBlockPointerCompatibility(*this, LHS, RHS, QuestionLoc); // Check constraints for C object pointers types (C99 6.5.15p3,6). if (LHSTy->isPointerType() && RHSTy->isPointerType()) return checkConditionalObjectPointersCompatibility(*this, LHS, RHS, QuestionLoc); // GCC compatibility: soften pointer/integer mismatch. Note that // null pointers have been filtered out by this point. if (checkPointerIntegerMismatch(*this, LHS, RHS.get(), QuestionLoc, /*isIntFirstExpr=*/true)) return RHSTy; if (checkPointerIntegerMismatch(*this, RHS, LHS.get(), QuestionLoc, /*isIntFirstExpr=*/false)) return LHSTy; // Emit a better diagnostic if one of the expressions is a null pointer // constant and the other is not a pointer type. In this case, the user most // likely forgot to take the address of the other expression. if (DiagnoseConditionalForNull(LHS.get(), RHS.get(), QuestionLoc)) return QualType(); // Otherwise, the operands are not compatible. Diag(QuestionLoc, diag::err_typecheck_cond_incompatible_operands) << LHSTy << RHSTy << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } /// FindCompositeObjCPointerType - Helper method to find composite type of /// two objective-c pointer types of the two input expressions. QualType Sema::FindCompositeObjCPointerType(ExprResult &LHS, ExprResult &RHS, SourceLocation QuestionLoc) { QualType LHSTy = LHS.get()->getType(); QualType RHSTy = RHS.get()->getType(); // Handle things like Class and struct objc_class*. Here we case the result // to the pseudo-builtin, because that will be implicitly cast back to the // redefinition type if an attempt is made to access its fields. if (LHSTy->isObjCClassType() && (Context.hasSameType(RHSTy, Context.getObjCClassRedefinitionType()))) { RHS = ImpCastExprToType(RHS.get(), LHSTy, CK_CPointerToObjCPointerCast); return LHSTy; } if (RHSTy->isObjCClassType() && (Context.hasSameType(LHSTy, Context.getObjCClassRedefinitionType()))) { LHS = ImpCastExprToType(LHS.get(), RHSTy, CK_CPointerToObjCPointerCast); return RHSTy; } // And the same for struct objc_object* / id if (LHSTy->isObjCIdType() && (Context.hasSameType(RHSTy, Context.getObjCIdRedefinitionType()))) { RHS = ImpCastExprToType(RHS.get(), LHSTy, CK_CPointerToObjCPointerCast); return LHSTy; } if (RHSTy->isObjCIdType() && (Context.hasSameType(LHSTy, Context.getObjCIdRedefinitionType()))) { LHS = ImpCastExprToType(LHS.get(), RHSTy, CK_CPointerToObjCPointerCast); return RHSTy; } // And the same for struct objc_selector* / SEL if (Context.isObjCSelType(LHSTy) && (Context.hasSameType(RHSTy, Context.getObjCSelRedefinitionType()))) { RHS = ImpCastExprToType(RHS.get(), LHSTy, CK_BitCast); return LHSTy; } if (Context.isObjCSelType(RHSTy) && (Context.hasSameType(LHSTy, Context.getObjCSelRedefinitionType()))) { LHS = ImpCastExprToType(LHS.get(), RHSTy, CK_BitCast); return RHSTy; } // Check constraints for Objective-C object pointers types. if (LHSTy->isObjCObjectPointerType() && RHSTy->isObjCObjectPointerType()) { if (Context.getCanonicalType(LHSTy) == Context.getCanonicalType(RHSTy)) { // Two identical object pointer types are always compatible. return LHSTy; } const ObjCObjectPointerType *LHSOPT = LHSTy->castAs(); const ObjCObjectPointerType *RHSOPT = RHSTy->castAs(); QualType compositeType = LHSTy; // If both operands are interfaces and either operand can be // assigned to the other, use that type as the composite // type. This allows // xxx ? (A*) a : (B*) b // where B is a subclass of A. // // Additionally, as for assignment, if either type is 'id' // allow silent coercion. Finally, if the types are // incompatible then make sure to use 'id' as the composite // type so the result is acceptable for sending messages to. // FIXME: Consider unifying with 'areComparableObjCPointerTypes'. // It could return the composite type. if (!(compositeType = Context.areCommonBaseCompatible(LHSOPT, RHSOPT)).isNull()) { // Nothing more to do. } else if (Context.canAssignObjCInterfaces(LHSOPT, RHSOPT)) { compositeType = RHSOPT->isObjCBuiltinType() ? RHSTy : LHSTy; } else if (Context.canAssignObjCInterfaces(RHSOPT, LHSOPT)) { compositeType = LHSOPT->isObjCBuiltinType() ? LHSTy : RHSTy; } else if ((LHSTy->isObjCQualifiedIdType() || RHSTy->isObjCQualifiedIdType()) && Context.ObjCQualifiedIdTypesAreCompatible(LHSTy, RHSTy, true)) { // Need to handle "id" explicitly. // GCC allows qualified id and any Objective-C type to devolve to // id. Currently localizing to here until clear this should be // part of ObjCQualifiedIdTypesAreCompatible. compositeType = Context.getObjCIdType(); } else if (LHSTy->isObjCIdType() || RHSTy->isObjCIdType()) { compositeType = Context.getObjCIdType(); } else { Diag(QuestionLoc, diag::ext_typecheck_cond_incompatible_operands) << LHSTy << RHSTy << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); QualType incompatTy = Context.getObjCIdType(); LHS = ImpCastExprToType(LHS.get(), incompatTy, CK_BitCast); RHS = ImpCastExprToType(RHS.get(), incompatTy, CK_BitCast); return incompatTy; } // The object pointer types are compatible. LHS = ImpCastExprToType(LHS.get(), compositeType, CK_BitCast); RHS = ImpCastExprToType(RHS.get(), compositeType, CK_BitCast); return compositeType; } // Check Objective-C object pointer types and 'void *' if (LHSTy->isVoidPointerType() && RHSTy->isObjCObjectPointerType()) { if (getLangOpts().ObjCAutoRefCount) { // ARC forbids the implicit conversion of object pointers to 'void *', // so these types are not compatible. Diag(QuestionLoc, diag::err_cond_voidptr_arc) << LHSTy << RHSTy << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); LHS = RHS = true; return QualType(); } QualType lhptee = LHSTy->getAs()->getPointeeType(); QualType rhptee = RHSTy->getAs()->getPointeeType(); QualType destPointee = Context.getQualifiedType(lhptee, rhptee.getQualifiers()); QualType destType = Context.getPointerType(destPointee); // Add qualifiers if necessary. LHS = ImpCastExprToType(LHS.get(), destType, CK_NoOp); // Promote to void*. RHS = ImpCastExprToType(RHS.get(), destType, CK_BitCast); return destType; } if (LHSTy->isObjCObjectPointerType() && RHSTy->isVoidPointerType()) { if (getLangOpts().ObjCAutoRefCount) { // ARC forbids the implicit conversion of object pointers to 'void *', // so these types are not compatible. Diag(QuestionLoc, diag::err_cond_voidptr_arc) << LHSTy << RHSTy << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); LHS = RHS = true; return QualType(); } QualType lhptee = LHSTy->getAs()->getPointeeType(); QualType rhptee = RHSTy->getAs()->getPointeeType(); QualType destPointee = Context.getQualifiedType(rhptee, lhptee.getQualifiers()); QualType destType = Context.getPointerType(destPointee); // Add qualifiers if necessary. RHS = ImpCastExprToType(RHS.get(), destType, CK_NoOp); // Promote to void*. LHS = ImpCastExprToType(LHS.get(), destType, CK_BitCast); return destType; } return QualType(); } /// SuggestParentheses - Emit a note with a fixit hint that wraps /// ParenRange in parentheses. static void SuggestParentheses(Sema &Self, SourceLocation Loc, const PartialDiagnostic &Note, SourceRange ParenRange) { SourceLocation EndLoc = Self.getLocForEndOfToken(ParenRange.getEnd()); if (ParenRange.getBegin().isFileID() && ParenRange.getEnd().isFileID() && EndLoc.isValid()) { Self.Diag(Loc, Note) << FixItHint::CreateInsertion(ParenRange.getBegin(), "(") << FixItHint::CreateInsertion(EndLoc, ")"); } else { // We can't display the parentheses, so just show the bare note. Self.Diag(Loc, Note) << ParenRange; } } static bool IsArithmeticOp(BinaryOperatorKind Opc) { return BinaryOperator::isAdditiveOp(Opc) || BinaryOperator::isMultiplicativeOp(Opc) || BinaryOperator::isShiftOp(Opc); } /// IsArithmeticBinaryExpr - Returns true if E is an arithmetic binary /// expression, either using a built-in or overloaded operator, /// and sets *OpCode to the opcode and *RHSExprs to the right-hand side /// expression. static bool IsArithmeticBinaryExpr(Expr *E, BinaryOperatorKind *Opcode, Expr **RHSExprs) { // Don't strip parenthesis: we should not warn if E is in parenthesis. E = E->IgnoreImpCasts(); E = E->IgnoreConversionOperator(); E = E->IgnoreImpCasts(); if (auto *MTE = dyn_cast(E)) { E = MTE->GetTemporaryExpr(); E = E->IgnoreImpCasts(); } // Built-in binary operator. if (BinaryOperator *OP = dyn_cast(E)) { if (IsArithmeticOp(OP->getOpcode())) { *Opcode = OP->getOpcode(); *RHSExprs = OP->getRHS(); return true; } } // Overloaded operator. if (CXXOperatorCallExpr *Call = dyn_cast(E)) { if (Call->getNumArgs() != 2) return false; // Make sure this is really a binary operator that is safe to pass into // BinaryOperator::getOverloadedOpcode(), e.g. it's not a subscript op. OverloadedOperatorKind OO = Call->getOperator(); if (OO < OO_Plus || OO > OO_Arrow || OO == OO_PlusPlus || OO == OO_MinusMinus) return false; BinaryOperatorKind OpKind = BinaryOperator::getOverloadedOpcode(OO); if (IsArithmeticOp(OpKind)) { *Opcode = OpKind; *RHSExprs = Call->getArg(1); return true; } } return false; } /// ExprLooksBoolean - Returns true if E looks boolean, i.e. it has boolean type /// or is a logical expression such as (x==y) which has int type, but is /// commonly interpreted as boolean. static bool ExprLooksBoolean(Expr *E) { E = E->IgnoreParenImpCasts(); if (E->getType()->isBooleanType()) return true; if (BinaryOperator *OP = dyn_cast(E)) return OP->isComparisonOp() || OP->isLogicalOp(); if (UnaryOperator *OP = dyn_cast(E)) return OP->getOpcode() == UO_LNot; if (E->getType()->isPointerType()) return true; // FIXME: What about overloaded operator calls returning "unspecified boolean // type"s (commonly pointer-to-members)? return false; } /// DiagnoseConditionalPrecedence - Emit a warning when a conditional operator /// and binary operator are mixed in a way that suggests the programmer assumed /// the conditional operator has higher precedence, for example: /// "int x = a + someBinaryCondition ? 1 : 2". static void DiagnoseConditionalPrecedence(Sema &Self, SourceLocation OpLoc, Expr *Condition, Expr *LHSExpr, Expr *RHSExpr) { BinaryOperatorKind CondOpcode; Expr *CondRHS; if (!IsArithmeticBinaryExpr(Condition, &CondOpcode, &CondRHS)) return; if (!ExprLooksBoolean(CondRHS)) return; // The condition is an arithmetic binary expression, with a right- // hand side that looks boolean, so warn. Self.Diag(OpLoc, diag::warn_precedence_conditional) << Condition->getSourceRange() << BinaryOperator::getOpcodeStr(CondOpcode); SuggestParentheses( Self, OpLoc, Self.PDiag(diag::note_precedence_silence) << BinaryOperator::getOpcodeStr(CondOpcode), SourceRange(Condition->getBeginLoc(), Condition->getEndLoc())); SuggestParentheses(Self, OpLoc, Self.PDiag(diag::note_precedence_conditional_first), SourceRange(CondRHS->getBeginLoc(), RHSExpr->getEndLoc())); } /// Compute the nullability of a conditional expression. static QualType computeConditionalNullability(QualType ResTy, bool IsBin, QualType LHSTy, QualType RHSTy, ASTContext &Ctx) { if (!ResTy->isAnyPointerType()) return ResTy; auto GetNullability = [&Ctx](QualType Ty) { Optional Kind = Ty->getNullability(Ctx); if (Kind) return *Kind; return NullabilityKind::Unspecified; }; auto LHSKind = GetNullability(LHSTy), RHSKind = GetNullability(RHSTy); NullabilityKind MergedKind; // Compute nullability of a binary conditional expression. if (IsBin) { if (LHSKind == NullabilityKind::NonNull) MergedKind = NullabilityKind::NonNull; else MergedKind = RHSKind; // Compute nullability of a normal conditional expression. } else { if (LHSKind == NullabilityKind::Nullable || RHSKind == NullabilityKind::Nullable) MergedKind = NullabilityKind::Nullable; else if (LHSKind == NullabilityKind::NonNull) MergedKind = RHSKind; else if (RHSKind == NullabilityKind::NonNull) MergedKind = LHSKind; else MergedKind = NullabilityKind::Unspecified; } // Return if ResTy already has the correct nullability. if (GetNullability(ResTy) == MergedKind) return ResTy; // Strip all nullability from ResTy. while (ResTy->getNullability(Ctx)) ResTy = ResTy.getSingleStepDesugaredType(Ctx); // Create a new AttributedType with the new nullability kind. auto NewAttr = AttributedType::getNullabilityAttrKind(MergedKind); return Ctx.getAttributedType(NewAttr, ResTy, ResTy); } /// ActOnConditionalOp - Parse a ?: operation. Note that 'LHS' may be null /// in the case of a the GNU conditional expr extension. ExprResult Sema::ActOnConditionalOp(SourceLocation QuestionLoc, SourceLocation ColonLoc, Expr *CondExpr, Expr *LHSExpr, Expr *RHSExpr) { if (!getLangOpts().CPlusPlus) { // C cannot handle TypoExpr nodes in the condition because it // doesn't handle dependent types properly, so make sure any TypoExprs have // been dealt with before checking the operands. ExprResult CondResult = CorrectDelayedTyposInExpr(CondExpr); ExprResult LHSResult = CorrectDelayedTyposInExpr(LHSExpr); ExprResult RHSResult = CorrectDelayedTyposInExpr(RHSExpr); if (!CondResult.isUsable()) return ExprError(); if (LHSExpr) { if (!LHSResult.isUsable()) return ExprError(); } if (!RHSResult.isUsable()) return ExprError(); CondExpr = CondResult.get(); LHSExpr = LHSResult.get(); RHSExpr = RHSResult.get(); } // If this is the gnu "x ?: y" extension, analyze the types as though the LHS // was the condition. OpaqueValueExpr *opaqueValue = nullptr; Expr *commonExpr = nullptr; if (!LHSExpr) { commonExpr = CondExpr; // Lower out placeholder types first. This is important so that we don't // try to capture a placeholder. This happens in few cases in C++; such // as Objective-C++'s dictionary subscripting syntax. if (commonExpr->hasPlaceholderType()) { ExprResult result = CheckPlaceholderExpr(commonExpr); if (!result.isUsable()) return ExprError(); commonExpr = result.get(); } // We usually want to apply unary conversions *before* saving, except // in the special case of a C++ l-value conditional. if (!(getLangOpts().CPlusPlus && !commonExpr->isTypeDependent() && commonExpr->getValueKind() == RHSExpr->getValueKind() && commonExpr->isGLValue() && commonExpr->isOrdinaryOrBitFieldObject() && RHSExpr->isOrdinaryOrBitFieldObject() && Context.hasSameType(commonExpr->getType(), RHSExpr->getType()))) { ExprResult commonRes = UsualUnaryConversions(commonExpr); if (commonRes.isInvalid()) return ExprError(); commonExpr = commonRes.get(); } // If the common expression is a class or array prvalue, materialize it // so that we can safely refer to it multiple times. if (commonExpr->isRValue() && (commonExpr->getType()->isRecordType() || commonExpr->getType()->isArrayType())) { ExprResult MatExpr = TemporaryMaterializationConversion(commonExpr); if (MatExpr.isInvalid()) return ExprError(); commonExpr = MatExpr.get(); } opaqueValue = new (Context) OpaqueValueExpr(commonExpr->getExprLoc(), commonExpr->getType(), commonExpr->getValueKind(), commonExpr->getObjectKind(), commonExpr); LHSExpr = CondExpr = opaqueValue; } QualType LHSTy = LHSExpr->getType(), RHSTy = RHSExpr->getType(); ExprValueKind VK = VK_RValue; ExprObjectKind OK = OK_Ordinary; ExprResult Cond = CondExpr, LHS = LHSExpr, RHS = RHSExpr; QualType result = CheckConditionalOperands(Cond, LHS, RHS, VK, OK, QuestionLoc); if (result.isNull() || Cond.isInvalid() || LHS.isInvalid() || RHS.isInvalid()) return ExprError(); DiagnoseConditionalPrecedence(*this, QuestionLoc, Cond.get(), LHS.get(), RHS.get()); CheckBoolLikeConversion(Cond.get(), QuestionLoc); result = computeConditionalNullability(result, commonExpr, LHSTy, RHSTy, Context); if (!commonExpr) return new (Context) ConditionalOperator(Cond.get(), QuestionLoc, LHS.get(), ColonLoc, RHS.get(), result, VK, OK); return new (Context) BinaryConditionalOperator( commonExpr, opaqueValue, Cond.get(), LHS.get(), RHS.get(), QuestionLoc, ColonLoc, result, VK, OK); } // checkPointerTypesForAssignment - This is a very tricky routine (despite // being closely modeled after the C99 spec:-). The odd characteristic of this // routine is it effectively iqnores the qualifiers on the top level pointee. // This circumvents the usual type rules specified in 6.2.7p1 & 6.7.5.[1-3]. // FIXME: add a couple examples in this comment. static Sema::AssignConvertType checkPointerTypesForAssignment(Sema &S, QualType LHSType, QualType RHSType) { assert(LHSType.isCanonical() && "LHS not canonicalized!"); assert(RHSType.isCanonical() && "RHS not canonicalized!"); // get the "pointed to" type (ignoring qualifiers at the top level) const Type *lhptee, *rhptee; Qualifiers lhq, rhq; std::tie(lhptee, lhq) = cast(LHSType)->getPointeeType().split().asPair(); std::tie(rhptee, rhq) = cast(RHSType)->getPointeeType().split().asPair(); Sema::AssignConvertType ConvTy = Sema::Compatible; // C99 6.5.16.1p1: This following citation is common to constraints // 3 & 4 (below). ...and the type *pointed to* by the left has all the // qualifiers of the type *pointed to* by the right; // As a special case, 'non-__weak A *' -> 'non-__weak const *' is okay. if (lhq.getObjCLifetime() != rhq.getObjCLifetime() && lhq.compatiblyIncludesObjCLifetime(rhq)) { // Ignore lifetime for further calculation. lhq.removeObjCLifetime(); rhq.removeObjCLifetime(); } if (!lhq.compatiblyIncludes(rhq)) { // Treat address-space mismatches as fatal. TODO: address subspaces if (!lhq.isAddressSpaceSupersetOf(rhq)) ConvTy = Sema::IncompatiblePointerDiscardsQualifiers; // It's okay to add or remove GC or lifetime qualifiers when converting to // and from void*. else if (lhq.withoutObjCGCAttr().withoutObjCLifetime() .compatiblyIncludes( rhq.withoutObjCGCAttr().withoutObjCLifetime()) && (lhptee->isVoidType() || rhptee->isVoidType())) ; // keep old // Treat lifetime mismatches as fatal. else if (lhq.getObjCLifetime() != rhq.getObjCLifetime()) ConvTy = Sema::IncompatiblePointerDiscardsQualifiers; // For GCC/MS compatibility, other qualifier mismatches are treated // as still compatible in C. else ConvTy = Sema::CompatiblePointerDiscardsQualifiers; } // C99 6.5.16.1p1 (constraint 4): If one operand is a pointer to an object or // incomplete type and the other is a pointer to a qualified or unqualified // version of void... if (lhptee->isVoidType()) { if (rhptee->isIncompleteOrObjectType()) return ConvTy; // As an extension, we allow cast to/from void* to function pointer. assert(rhptee->isFunctionType()); return Sema::FunctionVoidPointer; } if (rhptee->isVoidType()) { if (lhptee->isIncompleteOrObjectType()) return ConvTy; // As an extension, we allow cast to/from void* to function pointer. assert(lhptee->isFunctionType()); return Sema::FunctionVoidPointer; } // C99 6.5.16.1p1 (constraint 3): both operands are pointers to qualified or // unqualified versions of compatible types, ... QualType ltrans = QualType(lhptee, 0), rtrans = QualType(rhptee, 0); if (!S.Context.typesAreCompatible(ltrans, rtrans)) { // Check if the pointee types are compatible ignoring the sign. // We explicitly check for char so that we catch "char" vs // "unsigned char" on systems where "char" is unsigned. if (lhptee->isCharType()) ltrans = S.Context.UnsignedCharTy; else if (lhptee->hasSignedIntegerRepresentation()) ltrans = S.Context.getCorrespondingUnsignedType(ltrans); if (rhptee->isCharType()) rtrans = S.Context.UnsignedCharTy; else if (rhptee->hasSignedIntegerRepresentation()) rtrans = S.Context.getCorrespondingUnsignedType(rtrans); if (ltrans == rtrans) { // Types are compatible ignoring the sign. Qualifier incompatibility // takes priority over sign incompatibility because the sign // warning can be disabled. if (ConvTy != Sema::Compatible) return ConvTy; return Sema::IncompatiblePointerSign; } // If we are a multi-level pointer, it's possible that our issue is simply // one of qualification - e.g. char ** -> const char ** is not allowed. If // the eventual target type is the same and the pointers have the same // level of indirection, this must be the issue. if (isa(lhptee) && isa(rhptee)) { do { lhptee = cast(lhptee)->getPointeeType().getTypePtr(); rhptee = cast(rhptee)->getPointeeType().getTypePtr(); } while (isa(lhptee) && isa(rhptee)); if (lhptee == rhptee) return Sema::IncompatibleNestedPointerQualifiers; } // General pointer incompatibility takes priority over qualifiers. return Sema::IncompatiblePointer; } if (!S.getLangOpts().CPlusPlus && S.IsFunctionConversion(ltrans, rtrans, ltrans)) return Sema::IncompatiblePointer; return ConvTy; } /// checkBlockPointerTypesForAssignment - This routine determines whether two /// block pointer types are compatible or whether a block and normal pointer /// are compatible. It is more restrict than comparing two function pointer // types. static Sema::AssignConvertType checkBlockPointerTypesForAssignment(Sema &S, QualType LHSType, QualType RHSType) { assert(LHSType.isCanonical() && "LHS not canonicalized!"); assert(RHSType.isCanonical() && "RHS not canonicalized!"); QualType lhptee, rhptee; // get the "pointed to" type (ignoring qualifiers at the top level) lhptee = cast(LHSType)->getPointeeType(); rhptee = cast(RHSType)->getPointeeType(); // In C++, the types have to match exactly. if (S.getLangOpts().CPlusPlus) return Sema::IncompatibleBlockPointer; Sema::AssignConvertType ConvTy = Sema::Compatible; // For blocks we enforce that qualifiers are identical. Qualifiers LQuals = lhptee.getLocalQualifiers(); Qualifiers RQuals = rhptee.getLocalQualifiers(); if (S.getLangOpts().OpenCL) { LQuals.removeAddressSpace(); RQuals.removeAddressSpace(); } if (LQuals != RQuals) ConvTy = Sema::CompatiblePointerDiscardsQualifiers; // FIXME: OpenCL doesn't define the exact compile time semantics for a block // assignment. // The current behavior is similar to C++ lambdas. A block might be // assigned to a variable iff its return type and parameters are compatible // (C99 6.2.7) with the corresponding return type and parameters of the LHS of // an assignment. Presumably it should behave in way that a function pointer // assignment does in C, so for each parameter and return type: // * CVR and address space of LHS should be a superset of CVR and address // space of RHS. // * unqualified types should be compatible. if (S.getLangOpts().OpenCL) { if (!S.Context.typesAreBlockPointerCompatible( S.Context.getQualifiedType(LHSType.getUnqualifiedType(), LQuals), S.Context.getQualifiedType(RHSType.getUnqualifiedType(), RQuals))) return Sema::IncompatibleBlockPointer; } else if (!S.Context.typesAreBlockPointerCompatible(LHSType, RHSType)) return Sema::IncompatibleBlockPointer; return ConvTy; } /// checkObjCPointerTypesForAssignment - Compares two objective-c pointer types /// for assignment compatibility. static Sema::AssignConvertType checkObjCPointerTypesForAssignment(Sema &S, QualType LHSType, QualType RHSType) { assert(LHSType.isCanonical() && "LHS was not canonicalized!"); assert(RHSType.isCanonical() && "RHS was not canonicalized!"); if (LHSType->isObjCBuiltinType()) { // Class is not compatible with ObjC object pointers. if (LHSType->isObjCClassType() && !RHSType->isObjCBuiltinType() && !RHSType->isObjCQualifiedClassType()) return Sema::IncompatiblePointer; return Sema::Compatible; } if (RHSType->isObjCBuiltinType()) { if (RHSType->isObjCClassType() && !LHSType->isObjCBuiltinType() && !LHSType->isObjCQualifiedClassType()) return Sema::IncompatiblePointer; return Sema::Compatible; } QualType lhptee = LHSType->getAs()->getPointeeType(); QualType rhptee = RHSType->getAs()->getPointeeType(); if (!lhptee.isAtLeastAsQualifiedAs(rhptee) && // make an exception for id

!LHSType->isObjCQualifiedIdType()) return Sema::CompatiblePointerDiscardsQualifiers; if (S.Context.typesAreCompatible(LHSType, RHSType)) return Sema::Compatible; if (LHSType->isObjCQualifiedIdType() || RHSType->isObjCQualifiedIdType()) return Sema::IncompatibleObjCQualifiedId; return Sema::IncompatiblePointer; } Sema::AssignConvertType Sema::CheckAssignmentConstraints(SourceLocation Loc, QualType LHSType, QualType RHSType) { // Fake up an opaque expression. We don't actually care about what // cast operations are required, so if CheckAssignmentConstraints // adds casts to this they'll be wasted, but fortunately that doesn't // usually happen on valid code. OpaqueValueExpr RHSExpr(Loc, RHSType, VK_RValue); ExprResult RHSPtr = &RHSExpr; CastKind K; return CheckAssignmentConstraints(LHSType, RHSPtr, K, /*ConvertRHS=*/false); } /// This helper function returns true if QT is a vector type that has element /// type ElementType. static bool isVector(QualType QT, QualType ElementType) { if (const VectorType *VT = QT->getAs()) return VT->getElementType() == ElementType; return false; } /// CheckAssignmentConstraints (C99 6.5.16) - This routine currently /// has code to accommodate several GCC extensions when type checking /// pointers. Here are some objectionable examples that GCC considers warnings: /// /// int a, *pint; /// short *pshort; /// struct foo *pfoo; /// /// pint = pshort; // warning: assignment from incompatible pointer type /// a = pint; // warning: assignment makes integer from pointer without a cast /// pint = a; // warning: assignment makes pointer from integer without a cast /// pint = pfoo; // warning: assignment from incompatible pointer type /// /// As a result, the code for dealing with pointers is more complex than the /// C99 spec dictates. /// /// Sets 'Kind' for any result kind except Incompatible. Sema::AssignConvertType Sema::CheckAssignmentConstraints(QualType LHSType, ExprResult &RHS, CastKind &Kind, bool ConvertRHS) { QualType RHSType = RHS.get()->getType(); QualType OrigLHSType = LHSType; // Get canonical types. We're not formatting these types, just comparing // them. LHSType = Context.getCanonicalType(LHSType).getUnqualifiedType(); RHSType = Context.getCanonicalType(RHSType).getUnqualifiedType(); // Common case: no conversion required. if (LHSType == RHSType) { Kind = CK_NoOp; return Compatible; } // If we have an atomic type, try a non-atomic assignment, then just add an // atomic qualification step. if (const AtomicType *AtomicTy = dyn_cast(LHSType)) { Sema::AssignConvertType result = CheckAssignmentConstraints(AtomicTy->getValueType(), RHS, Kind); if (result != Compatible) return result; if (Kind != CK_NoOp && ConvertRHS) RHS = ImpCastExprToType(RHS.get(), AtomicTy->getValueType(), Kind); Kind = CK_NonAtomicToAtomic; return Compatible; } // If the left-hand side is a reference type, then we are in a // (rare!) case where we've allowed the use of references in C, // e.g., as a parameter type in a built-in function. In this case, // just make sure that the type referenced is compatible with the // right-hand side type. The caller is responsible for adjusting // LHSType so that the resulting expression does not have reference // type. if (const ReferenceType *LHSTypeRef = LHSType->getAs()) { if (Context.typesAreCompatible(LHSTypeRef->getPointeeType(), RHSType)) { Kind = CK_LValueBitCast; return Compatible; } return Incompatible; } // Allow scalar to ExtVector assignments, and assignments of an ExtVector type // to the same ExtVector type. if (LHSType->isExtVectorType()) { if (RHSType->isExtVectorType()) return Incompatible; if (RHSType->isArithmeticType()) { // CK_VectorSplat does T -> vector T, so first cast to the element type. if (ConvertRHS) RHS = prepareVectorSplat(LHSType, RHS.get()); Kind = CK_VectorSplat; return Compatible; } } // Conversions to or from vector type. if (LHSType->isVectorType() || RHSType->isVectorType()) { if (LHSType->isVectorType() && RHSType->isVectorType()) { // Allow assignments of an AltiVec vector type to an equivalent GCC // vector type and vice versa if (Context.areCompatibleVectorTypes(LHSType, RHSType)) { Kind = CK_BitCast; return Compatible; } // If we are allowing lax vector conversions, and LHS and RHS are both // vectors, the total size only needs to be the same. This is a bitcast; // no bits are changed but the result type is different. if (isLaxVectorConversion(RHSType, LHSType)) { Kind = CK_BitCast; return IncompatibleVectors; } } // When the RHS comes from another lax conversion (e.g. binops between // scalars and vectors) the result is canonicalized as a vector. When the // LHS is also a vector, the lax is allowed by the condition above. Handle // the case where LHS is a scalar. if (LHSType->isScalarType()) { const VectorType *VecType = RHSType->getAs(); if (VecType && VecType->getNumElements() == 1 && isLaxVectorConversion(RHSType, LHSType)) { ExprResult *VecExpr = &RHS; *VecExpr = ImpCastExprToType(VecExpr->get(), LHSType, CK_BitCast); Kind = CK_BitCast; return Compatible; } } return Incompatible; } // Diagnose attempts to convert between __float128 and long double where // such conversions currently can't be handled. if (unsupportedTypeConversion(*this, LHSType, RHSType)) return Incompatible; // Disallow assigning a _Complex to a real type in C++ mode since it simply // discards the imaginary part. if (getLangOpts().CPlusPlus && RHSType->getAs() && !LHSType->getAs()) return Incompatible; // Arithmetic conversions. if (LHSType->isArithmeticType() && RHSType->isArithmeticType() && !(getLangOpts().CPlusPlus && LHSType->isEnumeralType())) { if (ConvertRHS) Kind = PrepareScalarCast(RHS, LHSType); return Compatible; } // Conversions to normal pointers. if (const PointerType *LHSPointer = dyn_cast(LHSType)) { // U* -> T* if (isa(RHSType)) { LangAS AddrSpaceL = LHSPointer->getPointeeType().getAddressSpace(); LangAS AddrSpaceR = RHSType->getPointeeType().getAddressSpace(); if (AddrSpaceL != AddrSpaceR) Kind = CK_AddressSpaceConversion; else if (Context.hasCvrSimilarType(RHSType, LHSType)) Kind = CK_NoOp; else Kind = CK_BitCast; return checkPointerTypesForAssignment(*this, LHSType, RHSType); } // int -> T* if (RHSType->isIntegerType()) { Kind = CK_IntegralToPointer; // FIXME: null? return IntToPointer; } // C pointers are not compatible with ObjC object pointers, // with two exceptions: if (isa(RHSType)) { // - conversions to void* if (LHSPointer->getPointeeType()->isVoidType()) { Kind = CK_BitCast; return Compatible; } // - conversions from 'Class' to the redefinition type if (RHSType->isObjCClassType() && Context.hasSameType(LHSType, Context.getObjCClassRedefinitionType())) { Kind = CK_BitCast; return Compatible; } Kind = CK_BitCast; return IncompatiblePointer; } // U^ -> void* if (RHSType->getAs()) { if (LHSPointer->getPointeeType()->isVoidType()) { LangAS AddrSpaceL = LHSPointer->getPointeeType().getAddressSpace(); LangAS AddrSpaceR = RHSType->getAs() ->getPointeeType() .getAddressSpace(); Kind = AddrSpaceL != AddrSpaceR ? CK_AddressSpaceConversion : CK_BitCast; return Compatible; } } return Incompatible; } // Conversions to block pointers. if (isa(LHSType)) { // U^ -> T^ if (RHSType->isBlockPointerType()) { LangAS AddrSpaceL = LHSType->getAs() ->getPointeeType() .getAddressSpace(); LangAS AddrSpaceR = RHSType->getAs() ->getPointeeType() .getAddressSpace(); Kind = AddrSpaceL != AddrSpaceR ? CK_AddressSpaceConversion : CK_BitCast; return checkBlockPointerTypesForAssignment(*this, LHSType, RHSType); } // int or null -> T^ if (RHSType->isIntegerType()) { Kind = CK_IntegralToPointer; // FIXME: null return IntToBlockPointer; } // id -> T^ if (getLangOpts().ObjC && RHSType->isObjCIdType()) { Kind = CK_AnyPointerToBlockPointerCast; return Compatible; } // void* -> T^ if (const PointerType *RHSPT = RHSType->getAs()) if (RHSPT->getPointeeType()->isVoidType()) { Kind = CK_AnyPointerToBlockPointerCast; return Compatible; } return Incompatible; } // Conversions to Objective-C pointers. if (isa(LHSType)) { // A* -> B* if (RHSType->isObjCObjectPointerType()) { Kind = CK_BitCast; Sema::AssignConvertType result = checkObjCPointerTypesForAssignment(*this, LHSType, RHSType); if (getLangOpts().allowsNonTrivialObjCLifetimeQualifiers() && result == Compatible && !CheckObjCARCUnavailableWeakConversion(OrigLHSType, RHSType)) result = IncompatibleObjCWeakRef; return result; } // int or null -> A* if (RHSType->isIntegerType()) { Kind = CK_IntegralToPointer; // FIXME: null return IntToPointer; } // In general, C pointers are not compatible with ObjC object pointers, // with two exceptions: if (isa(RHSType)) { Kind = CK_CPointerToObjCPointerCast; // - conversions from 'void*' if (RHSType->isVoidPointerType()) { return Compatible; } // - conversions to 'Class' from its redefinition type if (LHSType->isObjCClassType() && Context.hasSameType(RHSType, Context.getObjCClassRedefinitionType())) { return Compatible; } return IncompatiblePointer; } // Only under strict condition T^ is compatible with an Objective-C pointer. if (RHSType->isBlockPointerType() && LHSType->isBlockCompatibleObjCPointerType(Context)) { if (ConvertRHS) maybeExtendBlockObject(RHS); Kind = CK_BlockPointerToObjCPointerCast; return Compatible; } return Incompatible; } // Conversions from pointers that are not covered by the above. if (isa(RHSType)) { // T* -> _Bool if (LHSType == Context.BoolTy) { Kind = CK_PointerToBoolean; return Compatible; } // T* -> int if (LHSType->isIntegerType()) { Kind = CK_PointerToIntegral; return PointerToInt; } return Incompatible; } // Conversions from Objective-C pointers that are not covered by the above. if (isa(RHSType)) { // T* -> _Bool if (LHSType == Context.BoolTy) { Kind = CK_PointerToBoolean; return Compatible; } // T* -> int if (LHSType->isIntegerType()) { Kind = CK_PointerToIntegral; return PointerToInt; } return Incompatible; } // struct A -> struct B if (isa(LHSType) && isa(RHSType)) { if (Context.typesAreCompatible(LHSType, RHSType)) { Kind = CK_NoOp; return Compatible; } } if (LHSType->isSamplerT() && RHSType->isIntegerType()) { Kind = CK_IntToOCLSampler; return Compatible; } return Incompatible; } /// Constructs a transparent union from an expression that is /// used to initialize the transparent union. static void ConstructTransparentUnion(Sema &S, ASTContext &C, ExprResult &EResult, QualType UnionType, FieldDecl *Field) { // Build an initializer list that designates the appropriate member // of the transparent union. Expr *E = EResult.get(); InitListExpr *Initializer = new (C) InitListExpr(C, SourceLocation(), E, SourceLocation()); Initializer->setType(UnionType); Initializer->setInitializedFieldInUnion(Field); // Build a compound literal constructing a value of the transparent // union type from this initializer list. TypeSourceInfo *unionTInfo = C.getTrivialTypeSourceInfo(UnionType); EResult = new (C) CompoundLiteralExpr(SourceLocation(), unionTInfo, UnionType, VK_RValue, Initializer, false); } Sema::AssignConvertType Sema::CheckTransparentUnionArgumentConstraints(QualType ArgType, ExprResult &RHS) { QualType RHSType = RHS.get()->getType(); // If the ArgType is a Union type, we want to handle a potential // transparent_union GCC extension. const RecordType *UT = ArgType->getAsUnionType(); if (!UT || !UT->getDecl()->hasAttr()) return Incompatible; // The field to initialize within the transparent union. RecordDecl *UD = UT->getDecl(); FieldDecl *InitField = nullptr; // It's compatible if the expression matches any of the fields. for (auto *it : UD->fields()) { if (it->getType()->isPointerType()) { // If the transparent union contains a pointer type, we allow: // 1) void pointer // 2) null pointer constant if (RHSType->isPointerType()) if (RHSType->castAs()->getPointeeType()->isVoidType()) { RHS = ImpCastExprToType(RHS.get(), it->getType(), CK_BitCast); InitField = it; break; } if (RHS.get()->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull)) { RHS = ImpCastExprToType(RHS.get(), it->getType(), CK_NullToPointer); InitField = it; break; } } CastKind Kind; if (CheckAssignmentConstraints(it->getType(), RHS, Kind) == Compatible) { RHS = ImpCastExprToType(RHS.get(), it->getType(), Kind); InitField = it; break; } } if (!InitField) return Incompatible; ConstructTransparentUnion(*this, Context, RHS, ArgType, InitField); return Compatible; } Sema::AssignConvertType Sema::CheckSingleAssignmentConstraints(QualType LHSType, ExprResult &CallerRHS, bool Diagnose, bool DiagnoseCFAudited, bool ConvertRHS) { // We need to be able to tell the caller whether we diagnosed a problem, if // they ask us to issue diagnostics. assert((ConvertRHS || !Diagnose) && "can't indicate whether we diagnosed"); // If ConvertRHS is false, we want to leave the caller's RHS untouched. Sadly, // we can't avoid *all* modifications at the moment, so we need some somewhere // to put the updated value. ExprResult LocalRHS = CallerRHS; ExprResult &RHS = ConvertRHS ? CallerRHS : LocalRHS; if (const auto *LHSPtrType = LHSType->getAs()) { if (const auto *RHSPtrType = RHS.get()->getType()->getAs()) { if (RHSPtrType->getPointeeType()->hasAttr(attr::NoDeref) && !LHSPtrType->getPointeeType()->hasAttr(attr::NoDeref)) { Diag(RHS.get()->getExprLoc(), diag::warn_noderef_to_dereferenceable_pointer) << RHS.get()->getSourceRange(); } } } if (getLangOpts().CPlusPlus) { if (!LHSType->isRecordType() && !LHSType->isAtomicType()) { // C++ 5.17p3: If the left operand is not of class type, the // expression is implicitly converted (C++ 4) to the // cv-unqualified type of the left operand. QualType RHSType = RHS.get()->getType(); if (Diagnose) { RHS = PerformImplicitConversion(RHS.get(), LHSType.getUnqualifiedType(), AA_Assigning); } else { ImplicitConversionSequence ICS = TryImplicitConversion(RHS.get(), LHSType.getUnqualifiedType(), /*SuppressUserConversions=*/false, /*AllowExplicit=*/false, /*InOverloadResolution=*/false, /*CStyle=*/false, /*AllowObjCWritebackConversion=*/false); if (ICS.isFailure()) return Incompatible; RHS = PerformImplicitConversion(RHS.get(), LHSType.getUnqualifiedType(), ICS, AA_Assigning); } if (RHS.isInvalid()) return Incompatible; Sema::AssignConvertType result = Compatible; if (getLangOpts().allowsNonTrivialObjCLifetimeQualifiers() && !CheckObjCARCUnavailableWeakConversion(LHSType, RHSType)) result = IncompatibleObjCWeakRef; return result; } // FIXME: Currently, we fall through and treat C++ classes like C // structures. // FIXME: We also fall through for atomics; not sure what should // happen there, though. } else if (RHS.get()->getType() == Context.OverloadTy) { // As a set of extensions to C, we support overloading on functions. These // functions need to be resolved here. DeclAccessPair DAP; if (FunctionDecl *FD = ResolveAddressOfOverloadedFunction( RHS.get(), LHSType, /*Complain=*/false, DAP)) RHS = FixOverloadedFunctionReference(RHS.get(), DAP, FD); else return Incompatible; } // C99 6.5.16.1p1: the left operand is a pointer and the right is // a null pointer constant. if ((LHSType->isPointerType() || LHSType->isObjCObjectPointerType() || LHSType->isBlockPointerType()) && RHS.get()->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull)) { if (Diagnose || ConvertRHS) { CastKind Kind; CXXCastPath Path; CheckPointerConversion(RHS.get(), LHSType, Kind, Path, /*IgnoreBaseAccess=*/false, Diagnose); if (ConvertRHS) RHS = ImpCastExprToType(RHS.get(), LHSType, Kind, VK_RValue, &Path); } return Compatible; } // OpenCL queue_t type assignment. if (LHSType->isQueueT() && RHS.get()->isNullPointerConstant( Context, Expr::NPC_ValueDependentIsNull)) { RHS = ImpCastExprToType(RHS.get(), LHSType, CK_NullToPointer); return Compatible; } // This check seems unnatural, however it is necessary to ensure the proper // conversion of functions/arrays. If the conversion were done for all // DeclExpr's (created by ActOnIdExpression), it would mess up the unary // expressions that suppress this implicit conversion (&, sizeof). // // Suppress this for references: C++ 8.5.3p5. if (!LHSType->isReferenceType()) { // FIXME: We potentially allocate here even if ConvertRHS is false. RHS = DefaultFunctionArrayLvalueConversion(RHS.get(), Diagnose); if (RHS.isInvalid()) return Incompatible; } CastKind Kind; Sema::AssignConvertType result = CheckAssignmentConstraints(LHSType, RHS, Kind, ConvertRHS); // C99 6.5.16.1p2: The value of the right operand is converted to the // type of the assignment expression. // CheckAssignmentConstraints allows the left-hand side to be a reference, // so that we can use references in built-in functions even in C. // The getNonReferenceType() call makes sure that the resulting expression // does not have reference type. if (result != Incompatible && RHS.get()->getType() != LHSType) { QualType Ty = LHSType.getNonLValueExprType(Context); Expr *E = RHS.get(); // Check for various Objective-C errors. If we are not reporting // diagnostics and just checking for errors, e.g., during overload // resolution, return Incompatible to indicate the failure. if (getLangOpts().allowsNonTrivialObjCLifetimeQualifiers() && CheckObjCConversion(SourceRange(), Ty, E, CCK_ImplicitConversion, Diagnose, DiagnoseCFAudited) != ACR_okay) { if (!Diagnose) return Incompatible; } if (getLangOpts().ObjC && (CheckObjCBridgeRelatedConversions(E->getBeginLoc(), LHSType, E->getType(), E, Diagnose) || ConversionToObjCStringLiteralCheck(LHSType, E, Diagnose))) { if (!Diagnose) return Incompatible; // Replace the expression with a corrected version and continue so we // can find further errors. RHS = E; return Compatible; } if (ConvertRHS) RHS = ImpCastExprToType(E, Ty, Kind); } return result; } namespace { /// The original operand to an operator, prior to the application of the usual /// arithmetic conversions and converting the arguments of a builtin operator /// candidate. struct OriginalOperand { explicit OriginalOperand(Expr *Op) : Orig(Op), Conversion(nullptr) { if (auto *MTE = dyn_cast(Op)) Op = MTE->GetTemporaryExpr(); if (auto *BTE = dyn_cast(Op)) Op = BTE->getSubExpr(); if (auto *ICE = dyn_cast(Op)) { Orig = ICE->getSubExprAsWritten(); Conversion = ICE->getConversionFunction(); } } QualType getType() const { return Orig->getType(); } Expr *Orig; NamedDecl *Conversion; }; } QualType Sema::InvalidOperands(SourceLocation Loc, ExprResult &LHS, ExprResult &RHS) { OriginalOperand OrigLHS(LHS.get()), OrigRHS(RHS.get()); Diag(Loc, diag::err_typecheck_invalid_operands) << OrigLHS.getType() << OrigRHS.getType() << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); // If a user-defined conversion was applied to either of the operands prior // to applying the built-in operator rules, tell the user about it. if (OrigLHS.Conversion) { Diag(OrigLHS.Conversion->getLocation(), diag::note_typecheck_invalid_operands_converted) << 0 << LHS.get()->getType(); } if (OrigRHS.Conversion) { Diag(OrigRHS.Conversion->getLocation(), diag::note_typecheck_invalid_operands_converted) << 1 << RHS.get()->getType(); } return QualType(); } // Diagnose cases where a scalar was implicitly converted to a vector and // diagnose the underlying types. Otherwise, diagnose the error // as invalid vector logical operands for non-C++ cases. QualType Sema::InvalidLogicalVectorOperands(SourceLocation Loc, ExprResult &LHS, ExprResult &RHS) { QualType LHSType = LHS.get()->IgnoreImpCasts()->getType(); QualType RHSType = RHS.get()->IgnoreImpCasts()->getType(); bool LHSNatVec = LHSType->isVectorType(); bool RHSNatVec = RHSType->isVectorType(); if (!(LHSNatVec && RHSNatVec)) { Expr *Vector = LHSNatVec ? LHS.get() : RHS.get(); Expr *NonVector = !LHSNatVec ? LHS.get() : RHS.get(); Diag(Loc, diag::err_typecheck_logical_vector_expr_gnu_cpp_restrict) << 0 << Vector->getType() << NonVector->IgnoreImpCasts()->getType() << Vector->getSourceRange(); return QualType(); } Diag(Loc, diag::err_typecheck_logical_vector_expr_gnu_cpp_restrict) << 1 << LHSType << RHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } /// Try to convert a value of non-vector type to a vector type by converting /// the type to the element type of the vector and then performing a splat. /// If the language is OpenCL, we only use conversions that promote scalar /// rank; for C, Obj-C, and C++ we allow any real scalar conversion except /// for float->int. /// /// OpenCL V2.0 6.2.6.p2: /// An error shall occur if any scalar operand type has greater rank /// than the type of the vector element. /// /// \param scalar - if non-null, actually perform the conversions /// \return true if the operation fails (but without diagnosing the failure) static bool tryVectorConvertAndSplat(Sema &S, ExprResult *scalar, QualType scalarTy, QualType vectorEltTy, QualType vectorTy, unsigned &DiagID) { // The conversion to apply to the scalar before splatting it, // if necessary. CastKind scalarCast = CK_NoOp; if (vectorEltTy->isIntegralType(S.Context)) { if (S.getLangOpts().OpenCL && (scalarTy->isRealFloatingType() || (scalarTy->isIntegerType() && S.Context.getIntegerTypeOrder(vectorEltTy, scalarTy) < 0))) { DiagID = diag::err_opencl_scalar_type_rank_greater_than_vector_type; return true; } if (!scalarTy->isIntegralType(S.Context)) return true; scalarCast = CK_IntegralCast; } else if (vectorEltTy->isRealFloatingType()) { if (scalarTy->isRealFloatingType()) { if (S.getLangOpts().OpenCL && S.Context.getFloatingTypeOrder(vectorEltTy, scalarTy) < 0) { DiagID = diag::err_opencl_scalar_type_rank_greater_than_vector_type; return true; } scalarCast = CK_FloatingCast; } else if (scalarTy->isIntegralType(S.Context)) scalarCast = CK_IntegralToFloating; else return true; } else { return true; } // Adjust scalar if desired. if (scalar) { if (scalarCast != CK_NoOp) *scalar = S.ImpCastExprToType(scalar->get(), vectorEltTy, scalarCast); *scalar = S.ImpCastExprToType(scalar->get(), vectorTy, CK_VectorSplat); } return false; } /// Convert vector E to a vector with the same number of elements but different /// element type. static ExprResult convertVector(Expr *E, QualType ElementType, Sema &S) { const auto *VecTy = E->getType()->getAs(); assert(VecTy && "Expression E must be a vector"); QualType NewVecTy = S.Context.getVectorType(ElementType, VecTy->getNumElements(), VecTy->getVectorKind()); // Look through the implicit cast. Return the subexpression if its type is // NewVecTy. if (auto *ICE = dyn_cast(E)) if (ICE->getSubExpr()->getType() == NewVecTy) return ICE->getSubExpr(); auto Cast = ElementType->isIntegerType() ? CK_IntegralCast : CK_FloatingCast; return S.ImpCastExprToType(E, NewVecTy, Cast); } /// Test if a (constant) integer Int can be casted to another integer type /// IntTy without losing precision. static bool canConvertIntToOtherIntTy(Sema &S, ExprResult *Int, QualType OtherIntTy) { QualType IntTy = Int->get()->getType().getUnqualifiedType(); // Reject cases where the value of the Int is unknown as that would // possibly cause truncation, but accept cases where the scalar can be // demoted without loss of precision. Expr::EvalResult EVResult; bool CstInt = Int->get()->EvaluateAsInt(EVResult, S.Context); int Order = S.Context.getIntegerTypeOrder(OtherIntTy, IntTy); bool IntSigned = IntTy->hasSignedIntegerRepresentation(); bool OtherIntSigned = OtherIntTy->hasSignedIntegerRepresentation(); if (CstInt) { // If the scalar is constant and is of a higher order and has more active // bits that the vector element type, reject it. llvm::APSInt Result = EVResult.Val.getInt(); unsigned NumBits = IntSigned ? (Result.isNegative() ? Result.getMinSignedBits() : Result.getActiveBits()) : Result.getActiveBits(); if (Order < 0 && S.Context.getIntWidth(OtherIntTy) < NumBits) return true; // If the signedness of the scalar type and the vector element type // differs and the number of bits is greater than that of the vector // element reject it. return (IntSigned != OtherIntSigned && NumBits > S.Context.getIntWidth(OtherIntTy)); } // Reject cases where the value of the scalar is not constant and it's // order is greater than that of the vector element type. return (Order < 0); } /// Test if a (constant) integer Int can be casted to floating point type /// FloatTy without losing precision. static bool canConvertIntTyToFloatTy(Sema &S, ExprResult *Int, QualType FloatTy) { QualType IntTy = Int->get()->getType().getUnqualifiedType(); // Determine if the integer constant can be expressed as a floating point // number of the appropriate type. Expr::EvalResult EVResult; bool CstInt = Int->get()->EvaluateAsInt(EVResult, S.Context); uint64_t Bits = 0; if (CstInt) { // Reject constants that would be truncated if they were converted to // the floating point type. Test by simple to/from conversion. // FIXME: Ideally the conversion to an APFloat and from an APFloat // could be avoided if there was a convertFromAPInt method // which could signal back if implicit truncation occurred. llvm::APSInt Result = EVResult.Val.getInt(); llvm::APFloat Float(S.Context.getFloatTypeSemantics(FloatTy)); Float.convertFromAPInt(Result, IntTy->hasSignedIntegerRepresentation(), llvm::APFloat::rmTowardZero); llvm::APSInt ConvertBack(S.Context.getIntWidth(IntTy), !IntTy->hasSignedIntegerRepresentation()); bool Ignored = false; Float.convertToInteger(ConvertBack, llvm::APFloat::rmNearestTiesToEven, &Ignored); if (Result != ConvertBack) return true; } else { // Reject types that cannot be fully encoded into the mantissa of // the float. Bits = S.Context.getTypeSize(IntTy); unsigned FloatPrec = llvm::APFloat::semanticsPrecision( S.Context.getFloatTypeSemantics(FloatTy)); if (Bits > FloatPrec) return true; } return false; } /// Attempt to convert and splat Scalar into a vector whose types matches /// Vector following GCC conversion rules. The rule is that implicit /// conversion can occur when Scalar can be casted to match Vector's element /// type without causing truncation of Scalar. static bool tryGCCVectorConvertAndSplat(Sema &S, ExprResult *Scalar, ExprResult *Vector) { QualType ScalarTy = Scalar->get()->getType().getUnqualifiedType(); QualType VectorTy = Vector->get()->getType().getUnqualifiedType(); const VectorType *VT = VectorTy->getAs(); assert(!isa(VT) && "ExtVectorTypes should not be handled here!"); QualType VectorEltTy = VT->getElementType(); // Reject cases where the vector element type or the scalar element type are // not integral or floating point types. if (!VectorEltTy->isArithmeticType() || !ScalarTy->isArithmeticType()) return true; // The conversion to apply to the scalar before splatting it, // if necessary. CastKind ScalarCast = CK_NoOp; // Accept cases where the vector elements are integers and the scalar is // an integer. // FIXME: Notionally if the scalar was a floating point value with a precise // integral representation, we could cast it to an appropriate integer // type and then perform the rest of the checks here. GCC will perform // this conversion in some cases as determined by the input language. // We should accept it on a language independent basis. if (VectorEltTy->isIntegralType(S.Context) && ScalarTy->isIntegralType(S.Context) && S.Context.getIntegerTypeOrder(VectorEltTy, ScalarTy)) { if (canConvertIntToOtherIntTy(S, Scalar, VectorEltTy)) return true; ScalarCast = CK_IntegralCast; } else if (VectorEltTy->isRealFloatingType()) { if (ScalarTy->isRealFloatingType()) { // Reject cases where the scalar type is not a constant and has a higher // Order than the vector element type. llvm::APFloat Result(0.0); bool CstScalar = Scalar->get()->EvaluateAsFloat(Result, S.Context); int Order = S.Context.getFloatingTypeOrder(VectorEltTy, ScalarTy); if (!CstScalar && Order < 0) return true; // If the scalar cannot be safely casted to the vector element type, // reject it. if (CstScalar) { bool Truncated = false; Result.convert(S.Context.getFloatTypeSemantics(VectorEltTy), llvm::APFloat::rmNearestTiesToEven, &Truncated); if (Truncated) return true; } ScalarCast = CK_FloatingCast; } else if (ScalarTy->isIntegralType(S.Context)) { if (canConvertIntTyToFloatTy(S, Scalar, VectorEltTy)) return true; ScalarCast = CK_IntegralToFloating; } else return true; } // Adjust scalar if desired. if (Scalar) { if (ScalarCast != CK_NoOp) *Scalar = S.ImpCastExprToType(Scalar->get(), VectorEltTy, ScalarCast); *Scalar = S.ImpCastExprToType(Scalar->get(), VectorTy, CK_VectorSplat); } return false; } QualType Sema::CheckVectorOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsCompAssign, bool AllowBothBool, bool AllowBoolConversions) { if (!IsCompAssign) { LHS = DefaultFunctionArrayLvalueConversion(LHS.get()); if (LHS.isInvalid()) return QualType(); } RHS = DefaultFunctionArrayLvalueConversion(RHS.get()); if (RHS.isInvalid()) return QualType(); // For conversion purposes, we ignore any qualifiers. // For example, "const float" and "float" are equivalent. QualType LHSType = LHS.get()->getType().getUnqualifiedType(); QualType RHSType = RHS.get()->getType().getUnqualifiedType(); const VectorType *LHSVecType = LHSType->getAs(); const VectorType *RHSVecType = RHSType->getAs(); assert(LHSVecType || RHSVecType); // AltiVec-style "vector bool op vector bool" combinations are allowed // for some operators but not others. if (!AllowBothBool && LHSVecType && LHSVecType->getVectorKind() == VectorType::AltiVecBool && RHSVecType && RHSVecType->getVectorKind() == VectorType::AltiVecBool) return InvalidOperands(Loc, LHS, RHS); // If the vector types are identical, return. if (Context.hasSameType(LHSType, RHSType)) return LHSType; // If we have compatible AltiVec and GCC vector types, use the AltiVec type. if (LHSVecType && RHSVecType && Context.areCompatibleVectorTypes(LHSType, RHSType)) { if (isa(LHSVecType)) { RHS = ImpCastExprToType(RHS.get(), LHSType, CK_BitCast); return LHSType; } if (!IsCompAssign) LHS = ImpCastExprToType(LHS.get(), RHSType, CK_BitCast); return RHSType; } // AllowBoolConversions says that bool and non-bool AltiVec vectors // can be mixed, with the result being the non-bool type. The non-bool // operand must have integer element type. if (AllowBoolConversions && LHSVecType && RHSVecType && LHSVecType->getNumElements() == RHSVecType->getNumElements() && (Context.getTypeSize(LHSVecType->getElementType()) == Context.getTypeSize(RHSVecType->getElementType()))) { if (LHSVecType->getVectorKind() == VectorType::AltiVecVector && LHSVecType->getElementType()->isIntegerType() && RHSVecType->getVectorKind() == VectorType::AltiVecBool) { RHS = ImpCastExprToType(RHS.get(), LHSType, CK_BitCast); return LHSType; } if (!IsCompAssign && LHSVecType->getVectorKind() == VectorType::AltiVecBool && RHSVecType->getVectorKind() == VectorType::AltiVecVector && RHSVecType->getElementType()->isIntegerType()) { LHS = ImpCastExprToType(LHS.get(), RHSType, CK_BitCast); return RHSType; } } // If there's a vector type and a scalar, try to convert the scalar to // the vector element type and splat. unsigned DiagID = diag::err_typecheck_vector_not_convertable; if (!RHSVecType) { if (isa(LHSVecType)) { if (!tryVectorConvertAndSplat(*this, &RHS, RHSType, LHSVecType->getElementType(), LHSType, DiagID)) return LHSType; } else { if (!tryGCCVectorConvertAndSplat(*this, &RHS, &LHS)) return LHSType; } } if (!LHSVecType) { if (isa(RHSVecType)) { if (!tryVectorConvertAndSplat(*this, (IsCompAssign ? nullptr : &LHS), LHSType, RHSVecType->getElementType(), RHSType, DiagID)) return RHSType; } else { if (LHS.get()->getValueKind() == VK_LValue || !tryGCCVectorConvertAndSplat(*this, &LHS, &RHS)) return RHSType; } } // FIXME: The code below also handles conversion between vectors and // non-scalars, we should break this down into fine grained specific checks // and emit proper diagnostics. QualType VecType = LHSVecType ? LHSType : RHSType; const VectorType *VT = LHSVecType ? LHSVecType : RHSVecType; QualType OtherType = LHSVecType ? RHSType : LHSType; ExprResult *OtherExpr = LHSVecType ? &RHS : &LHS; if (isLaxVectorConversion(OtherType, VecType)) { // If we're allowing lax vector conversions, only the total (data) size // needs to be the same. For non compound assignment, if one of the types is // scalar, the result is always the vector type. if (!IsCompAssign) { *OtherExpr = ImpCastExprToType(OtherExpr->get(), VecType, CK_BitCast); return VecType; // In a compound assignment, lhs += rhs, 'lhs' is a lvalue src, forbidding // any implicit cast. Here, the 'rhs' should be implicit casted to 'lhs' // type. Note that this is already done by non-compound assignments in // CheckAssignmentConstraints. If it's a scalar type, only bitcast for // <1 x T> -> T. The result is also a vector type. } else if (OtherType->isExtVectorType() || OtherType->isVectorType() || (OtherType->isScalarType() && VT->getNumElements() == 1)) { ExprResult *RHSExpr = &RHS; *RHSExpr = ImpCastExprToType(RHSExpr->get(), LHSType, CK_BitCast); return VecType; } } // Okay, the expression is invalid. // If there's a non-vector, non-real operand, diagnose that. if ((!RHSVecType && !RHSType->isRealType()) || (!LHSVecType && !LHSType->isRealType())) { Diag(Loc, diag::err_typecheck_vector_not_convertable_non_scalar) << LHSType << RHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } // OpenCL V1.1 6.2.6.p1: // If the operands are of more than one vector type, then an error shall // occur. Implicit conversions between vector types are not permitted, per // section 6.2.1. if (getLangOpts().OpenCL && RHSVecType && isa(RHSVecType) && LHSVecType && isa(LHSVecType)) { Diag(Loc, diag::err_opencl_implicit_vector_conversion) << LHSType << RHSType; return QualType(); } // If there is a vector type that is not a ExtVector and a scalar, we reach // this point if scalar could not be converted to the vector's element type // without truncation. if ((RHSVecType && !isa(RHSVecType)) || (LHSVecType && !isa(LHSVecType))) { QualType Scalar = LHSVecType ? RHSType : LHSType; QualType Vector = LHSVecType ? LHSType : RHSType; unsigned ScalarOrVector = LHSVecType && RHSVecType ? 1 : 0; Diag(Loc, diag::err_typecheck_vector_not_convertable_implict_truncation) << ScalarOrVector << Scalar << Vector; return QualType(); } // Otherwise, use the generic diagnostic. Diag(Loc, DiagID) << LHSType << RHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } // checkArithmeticNull - Detect when a NULL constant is used improperly in an // expression. These are mainly cases where the null pointer is used as an // integer instead of a pointer. static void checkArithmeticNull(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsCompare) { // The canonical way to check for a GNU null is with isNullPointerConstant, // but we use a bit of a hack here for speed; this is a relatively // hot path, and isNullPointerConstant is slow. bool LHSNull = isa(LHS.get()->IgnoreParenImpCasts()); bool RHSNull = isa(RHS.get()->IgnoreParenImpCasts()); QualType NonNullType = LHSNull ? RHS.get()->getType() : LHS.get()->getType(); // Avoid analyzing cases where the result will either be invalid (and // diagnosed as such) or entirely valid and not something to warn about. if ((!LHSNull && !RHSNull) || NonNullType->isBlockPointerType() || NonNullType->isMemberPointerType() || NonNullType->isFunctionType()) return; // Comparison operations would not make sense with a null pointer no matter // what the other expression is. if (!IsCompare) { S.Diag(Loc, diag::warn_null_in_arithmetic_operation) << (LHSNull ? LHS.get()->getSourceRange() : SourceRange()) << (RHSNull ? RHS.get()->getSourceRange() : SourceRange()); return; } // The rest of the operations only make sense with a null pointer // if the other expression is a pointer. if (LHSNull == RHSNull || NonNullType->isAnyPointerType() || NonNullType->canDecayToPointerType()) return; S.Diag(Loc, diag::warn_null_in_comparison_operation) << LHSNull /* LHS is NULL */ << NonNullType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } static void DiagnoseDivisionSizeofPointer(Sema &S, Expr *LHS, Expr *RHS, SourceLocation Loc) { const auto *LUE = dyn_cast(LHS); const auto *RUE = dyn_cast(RHS); if (!LUE || !RUE) return; if (LUE->getKind() != UETT_SizeOf || LUE->isArgumentType() || RUE->getKind() != UETT_SizeOf) return; QualType LHSTy = LUE->getArgumentExpr()->IgnoreParens()->getType(); QualType RHSTy; if (RUE->isArgumentType()) RHSTy = RUE->getArgumentType(); else RHSTy = RUE->getArgumentExpr()->IgnoreParens()->getType(); if (!LHSTy->isPointerType() || RHSTy->isPointerType()) return; if (LHSTy->getPointeeType() != RHSTy) return; S.Diag(Loc, diag::warn_division_sizeof_ptr) << LHS << LHS->getSourceRange(); } static void DiagnoseBadDivideOrRemainderValues(Sema& S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsDiv) { // Check for division/remainder by zero. Expr::EvalResult RHSValue; if (!RHS.get()->isValueDependent() && RHS.get()->EvaluateAsInt(RHSValue, S.Context) && RHSValue.Val.getInt() == 0) S.DiagRuntimeBehavior(Loc, RHS.get(), S.PDiag(diag::warn_remainder_division_by_zero) << IsDiv << RHS.get()->getSourceRange()); } QualType Sema::CheckMultiplyDivideOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsCompAssign, bool IsDiv) { checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false); if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) return CheckVectorOperands(LHS, RHS, Loc, IsCompAssign, /*AllowBothBool*/getLangOpts().AltiVec, /*AllowBoolConversions*/false); QualType compType = UsualArithmeticConversions(LHS, RHS, IsCompAssign); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); if (compType.isNull() || !compType->isArithmeticType()) return InvalidOperands(Loc, LHS, RHS); if (IsDiv) { DiagnoseBadDivideOrRemainderValues(*this, LHS, RHS, Loc, IsDiv); DiagnoseDivisionSizeofPointer(*this, LHS.get(), RHS.get(), Loc); } return compType; } QualType Sema::CheckRemainderOperands( ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsCompAssign) { checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false); if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) { if (LHS.get()->getType()->hasIntegerRepresentation() && RHS.get()->getType()->hasIntegerRepresentation()) return CheckVectorOperands(LHS, RHS, Loc, IsCompAssign, /*AllowBothBool*/getLangOpts().AltiVec, /*AllowBoolConversions*/false); return InvalidOperands(Loc, LHS, RHS); } QualType compType = UsualArithmeticConversions(LHS, RHS, IsCompAssign); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); if (compType.isNull() || !compType->isIntegerType()) return InvalidOperands(Loc, LHS, RHS); DiagnoseBadDivideOrRemainderValues(*this, LHS, RHS, Loc, false /* IsDiv */); return compType; } /// Diagnose invalid arithmetic on two void pointers. static void diagnoseArithmeticOnTwoVoidPointers(Sema &S, SourceLocation Loc, Expr *LHSExpr, Expr *RHSExpr) { S.Diag(Loc, S.getLangOpts().CPlusPlus ? diag::err_typecheck_pointer_arith_void_type : diag::ext_gnu_void_ptr) << 1 /* two pointers */ << LHSExpr->getSourceRange() << RHSExpr->getSourceRange(); } /// Diagnose invalid arithmetic on a void pointer. static void diagnoseArithmeticOnVoidPointer(Sema &S, SourceLocation Loc, Expr *Pointer) { S.Diag(Loc, S.getLangOpts().CPlusPlus ? diag::err_typecheck_pointer_arith_void_type : diag::ext_gnu_void_ptr) << 0 /* one pointer */ << Pointer->getSourceRange(); } /// Diagnose invalid arithmetic on a null pointer. /// /// If \p IsGNUIdiom is true, the operation is using the 'p = (i8*)nullptr + n' /// idiom, which we recognize as a GNU extension. /// static void diagnoseArithmeticOnNullPointer(Sema &S, SourceLocation Loc, Expr *Pointer, bool IsGNUIdiom) { if (IsGNUIdiom) S.Diag(Loc, diag::warn_gnu_null_ptr_arith) << Pointer->getSourceRange(); else S.Diag(Loc, diag::warn_pointer_arith_null_ptr) << S.getLangOpts().CPlusPlus << Pointer->getSourceRange(); } /// Diagnose invalid arithmetic on two function pointers. static void diagnoseArithmeticOnTwoFunctionPointers(Sema &S, SourceLocation Loc, Expr *LHS, Expr *RHS) { assert(LHS->getType()->isAnyPointerType()); assert(RHS->getType()->isAnyPointerType()); S.Diag(Loc, S.getLangOpts().CPlusPlus ? diag::err_typecheck_pointer_arith_function_type : diag::ext_gnu_ptr_func_arith) << 1 /* two pointers */ << LHS->getType()->getPointeeType() // We only show the second type if it differs from the first. << (unsigned)!S.Context.hasSameUnqualifiedType(LHS->getType(), RHS->getType()) << RHS->getType()->getPointeeType() << LHS->getSourceRange() << RHS->getSourceRange(); } /// Diagnose invalid arithmetic on a function pointer. static void diagnoseArithmeticOnFunctionPointer(Sema &S, SourceLocation Loc, Expr *Pointer) { assert(Pointer->getType()->isAnyPointerType()); S.Diag(Loc, S.getLangOpts().CPlusPlus ? diag::err_typecheck_pointer_arith_function_type : diag::ext_gnu_ptr_func_arith) << 0 /* one pointer */ << Pointer->getType()->getPointeeType() << 0 /* one pointer, so only one type */ << Pointer->getSourceRange(); } /// Emit error if Operand is incomplete pointer type /// /// \returns True if pointer has incomplete type static bool checkArithmeticIncompletePointerType(Sema &S, SourceLocation Loc, Expr *Operand) { QualType ResType = Operand->getType(); if (const AtomicType *ResAtomicType = ResType->getAs()) ResType = ResAtomicType->getValueType(); assert(ResType->isAnyPointerType() && !ResType->isDependentType()); QualType PointeeTy = ResType->getPointeeType(); return S.RequireCompleteType(Loc, PointeeTy, diag::err_typecheck_arithmetic_incomplete_type, PointeeTy, Operand->getSourceRange()); } /// Check the validity of an arithmetic pointer operand. /// /// If the operand has pointer type, this code will check for pointer types /// which are invalid in arithmetic operations. These will be diagnosed /// appropriately, including whether or not the use is supported as an /// extension. /// /// \returns True when the operand is valid to use (even if as an extension). static bool checkArithmeticOpPointerOperand(Sema &S, SourceLocation Loc, Expr *Operand) { QualType ResType = Operand->getType(); if (const AtomicType *ResAtomicType = ResType->getAs()) ResType = ResAtomicType->getValueType(); if (!ResType->isAnyPointerType()) return true; QualType PointeeTy = ResType->getPointeeType(); if (PointeeTy->isVoidType()) { diagnoseArithmeticOnVoidPointer(S, Loc, Operand); return !S.getLangOpts().CPlusPlus; } if (PointeeTy->isFunctionType()) { diagnoseArithmeticOnFunctionPointer(S, Loc, Operand); return !S.getLangOpts().CPlusPlus; } if (checkArithmeticIncompletePointerType(S, Loc, Operand)) return false; return true; } /// Check the validity of a binary arithmetic operation w.r.t. pointer /// operands. /// /// This routine will diagnose any invalid arithmetic on pointer operands much /// like \see checkArithmeticOpPointerOperand. However, it has special logic /// for emitting a single diagnostic even for operations where both LHS and RHS /// are (potentially problematic) pointers. /// /// \returns True when the operand is valid to use (even if as an extension). static bool checkArithmeticBinOpPointerOperands(Sema &S, SourceLocation Loc, Expr *LHSExpr, Expr *RHSExpr) { bool isLHSPointer = LHSExpr->getType()->isAnyPointerType(); bool isRHSPointer = RHSExpr->getType()->isAnyPointerType(); if (!isLHSPointer && !isRHSPointer) return true; QualType LHSPointeeTy, RHSPointeeTy; if (isLHSPointer) LHSPointeeTy = LHSExpr->getType()->getPointeeType(); if (isRHSPointer) RHSPointeeTy = RHSExpr->getType()->getPointeeType(); // if both are pointers check if operation is valid wrt address spaces if (S.getLangOpts().OpenCL && isLHSPointer && isRHSPointer) { const PointerType *lhsPtr = LHSExpr->getType()->getAs(); const PointerType *rhsPtr = RHSExpr->getType()->getAs(); if (!lhsPtr->isAddressSpaceOverlapping(*rhsPtr)) { S.Diag(Loc, diag::err_typecheck_op_on_nonoverlapping_address_space_pointers) << LHSExpr->getType() << RHSExpr->getType() << 1 /*arithmetic op*/ << LHSExpr->getSourceRange() << RHSExpr->getSourceRange(); return false; } } // Check for arithmetic on pointers to incomplete types. bool isLHSVoidPtr = isLHSPointer && LHSPointeeTy->isVoidType(); bool isRHSVoidPtr = isRHSPointer && RHSPointeeTy->isVoidType(); if (isLHSVoidPtr || isRHSVoidPtr) { if (!isRHSVoidPtr) diagnoseArithmeticOnVoidPointer(S, Loc, LHSExpr); else if (!isLHSVoidPtr) diagnoseArithmeticOnVoidPointer(S, Loc, RHSExpr); else diagnoseArithmeticOnTwoVoidPointers(S, Loc, LHSExpr, RHSExpr); return !S.getLangOpts().CPlusPlus; } bool isLHSFuncPtr = isLHSPointer && LHSPointeeTy->isFunctionType(); bool isRHSFuncPtr = isRHSPointer && RHSPointeeTy->isFunctionType(); if (isLHSFuncPtr || isRHSFuncPtr) { if (!isRHSFuncPtr) diagnoseArithmeticOnFunctionPointer(S, Loc, LHSExpr); else if (!isLHSFuncPtr) diagnoseArithmeticOnFunctionPointer(S, Loc, RHSExpr); else diagnoseArithmeticOnTwoFunctionPointers(S, Loc, LHSExpr, RHSExpr); return !S.getLangOpts().CPlusPlus; } if (isLHSPointer && checkArithmeticIncompletePointerType(S, Loc, LHSExpr)) return false; if (isRHSPointer && checkArithmeticIncompletePointerType(S, Loc, RHSExpr)) return false; return true; } /// diagnoseStringPlusInt - Emit a warning when adding an integer to a string /// literal. static void diagnoseStringPlusInt(Sema &Self, SourceLocation OpLoc, Expr *LHSExpr, Expr *RHSExpr) { StringLiteral* StrExpr = dyn_cast(LHSExpr->IgnoreImpCasts()); Expr* IndexExpr = RHSExpr; if (!StrExpr) { StrExpr = dyn_cast(RHSExpr->IgnoreImpCasts()); IndexExpr = LHSExpr; } bool IsStringPlusInt = StrExpr && IndexExpr->getType()->isIntegralOrUnscopedEnumerationType(); if (!IsStringPlusInt || IndexExpr->isValueDependent()) return; SourceRange DiagRange(LHSExpr->getBeginLoc(), RHSExpr->getEndLoc()); Self.Diag(OpLoc, diag::warn_string_plus_int) << DiagRange << IndexExpr->IgnoreImpCasts()->getType(); // Only print a fixit for "str" + int, not for int + "str". if (IndexExpr == RHSExpr) { SourceLocation EndLoc = Self.getLocForEndOfToken(RHSExpr->getEndLoc()); Self.Diag(OpLoc, diag::note_string_plus_scalar_silence) << FixItHint::CreateInsertion(LHSExpr->getBeginLoc(), "&") << FixItHint::CreateReplacement(SourceRange(OpLoc), "[") << FixItHint::CreateInsertion(EndLoc, "]"); } else Self.Diag(OpLoc, diag::note_string_plus_scalar_silence); } /// Emit a warning when adding a char literal to a string. static void diagnoseStringPlusChar(Sema &Self, SourceLocation OpLoc, Expr *LHSExpr, Expr *RHSExpr) { const Expr *StringRefExpr = LHSExpr; const CharacterLiteral *CharExpr = dyn_cast(RHSExpr->IgnoreImpCasts()); if (!CharExpr) { CharExpr = dyn_cast(LHSExpr->IgnoreImpCasts()); StringRefExpr = RHSExpr; } if (!CharExpr || !StringRefExpr) return; const QualType StringType = StringRefExpr->getType(); // Return if not a PointerType. if (!StringType->isAnyPointerType()) return; // Return if not a CharacterType. if (!StringType->getPointeeType()->isAnyCharacterType()) return; ASTContext &Ctx = Self.getASTContext(); SourceRange DiagRange(LHSExpr->getBeginLoc(), RHSExpr->getEndLoc()); const QualType CharType = CharExpr->getType(); if (!CharType->isAnyCharacterType() && CharType->isIntegerType() && llvm::isUIntN(Ctx.getCharWidth(), CharExpr->getValue())) { Self.Diag(OpLoc, diag::warn_string_plus_char) << DiagRange << Ctx.CharTy; } else { Self.Diag(OpLoc, diag::warn_string_plus_char) << DiagRange << CharExpr->getType(); } // Only print a fixit for str + char, not for char + str. if (isa(RHSExpr->IgnoreImpCasts())) { SourceLocation EndLoc = Self.getLocForEndOfToken(RHSExpr->getEndLoc()); Self.Diag(OpLoc, diag::note_string_plus_scalar_silence) << FixItHint::CreateInsertion(LHSExpr->getBeginLoc(), "&") << FixItHint::CreateReplacement(SourceRange(OpLoc), "[") << FixItHint::CreateInsertion(EndLoc, "]"); } else { Self.Diag(OpLoc, diag::note_string_plus_scalar_silence); } } /// Emit error when two pointers are incompatible. static void diagnosePointerIncompatibility(Sema &S, SourceLocation Loc, Expr *LHSExpr, Expr *RHSExpr) { assert(LHSExpr->getType()->isAnyPointerType()); assert(RHSExpr->getType()->isAnyPointerType()); S.Diag(Loc, diag::err_typecheck_sub_ptr_compatible) << LHSExpr->getType() << RHSExpr->getType() << LHSExpr->getSourceRange() << RHSExpr->getSourceRange(); } // C99 6.5.6 QualType Sema::CheckAdditionOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc, QualType* CompLHSTy) { checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false); if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) { QualType compType = CheckVectorOperands( LHS, RHS, Loc, CompLHSTy, /*AllowBothBool*/getLangOpts().AltiVec, /*AllowBoolConversions*/getLangOpts().ZVector); if (CompLHSTy) *CompLHSTy = compType; return compType; } QualType compType = UsualArithmeticConversions(LHS, RHS, CompLHSTy); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); // Diagnose "string literal" '+' int and string '+' "char literal". if (Opc == BO_Add) { diagnoseStringPlusInt(*this, Loc, LHS.get(), RHS.get()); diagnoseStringPlusChar(*this, Loc, LHS.get(), RHS.get()); } // handle the common case first (both operands are arithmetic). if (!compType.isNull() && compType->isArithmeticType()) { if (CompLHSTy) *CompLHSTy = compType; return compType; } // Type-checking. Ultimately the pointer's going to be in PExp; // note that we bias towards the LHS being the pointer. Expr *PExp = LHS.get(), *IExp = RHS.get(); bool isObjCPointer; if (PExp->getType()->isPointerType()) { isObjCPointer = false; } else if (PExp->getType()->isObjCObjectPointerType()) { isObjCPointer = true; } else { std::swap(PExp, IExp); if (PExp->getType()->isPointerType()) { isObjCPointer = false; } else if (PExp->getType()->isObjCObjectPointerType()) { isObjCPointer = true; } else { return InvalidOperands(Loc, LHS, RHS); } } assert(PExp->getType()->isAnyPointerType()); if (!IExp->getType()->isIntegerType()) return InvalidOperands(Loc, LHS, RHS); // Adding to a null pointer results in undefined behavior. if (PExp->IgnoreParenCasts()->isNullPointerConstant( Context, Expr::NPC_ValueDependentIsNotNull)) { // In C++ adding zero to a null pointer is defined. Expr::EvalResult KnownVal; if (!getLangOpts().CPlusPlus || (!IExp->isValueDependent() && (!IExp->EvaluateAsInt(KnownVal, Context) || KnownVal.Val.getInt() != 0))) { // Check the conditions to see if this is the 'p = nullptr + n' idiom. bool IsGNUIdiom = BinaryOperator::isNullPointerArithmeticExtension( Context, BO_Add, PExp, IExp); diagnoseArithmeticOnNullPointer(*this, Loc, PExp, IsGNUIdiom); } } if (!checkArithmeticOpPointerOperand(*this, Loc, PExp)) return QualType(); if (isObjCPointer && checkArithmeticOnObjCPointer(*this, Loc, PExp)) return QualType(); // Check array bounds for pointer arithemtic CheckArrayAccess(PExp, IExp); if (CompLHSTy) { QualType LHSTy = Context.isPromotableBitField(LHS.get()); if (LHSTy.isNull()) { LHSTy = LHS.get()->getType(); if (LHSTy->isPromotableIntegerType()) LHSTy = Context.getPromotedIntegerType(LHSTy); } *CompLHSTy = LHSTy; } return PExp->getType(); } // C99 6.5.6 QualType Sema::CheckSubtractionOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, QualType* CompLHSTy) { checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false); if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) { QualType compType = CheckVectorOperands( LHS, RHS, Loc, CompLHSTy, /*AllowBothBool*/getLangOpts().AltiVec, /*AllowBoolConversions*/getLangOpts().ZVector); if (CompLHSTy) *CompLHSTy = compType; return compType; } QualType compType = UsualArithmeticConversions(LHS, RHS, CompLHSTy); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); // Enforce type constraints: C99 6.5.6p3. // Handle the common case first (both operands are arithmetic). if (!compType.isNull() && compType->isArithmeticType()) { if (CompLHSTy) *CompLHSTy = compType; return compType; } // Either ptr - int or ptr - ptr. if (LHS.get()->getType()->isAnyPointerType()) { QualType lpointee = LHS.get()->getType()->getPointeeType(); // Diagnose bad cases where we step over interface counts. if (LHS.get()->getType()->isObjCObjectPointerType() && checkArithmeticOnObjCPointer(*this, Loc, LHS.get())) return QualType(); // The result type of a pointer-int computation is the pointer type. if (RHS.get()->getType()->isIntegerType()) { // Subtracting from a null pointer should produce a warning. // The last argument to the diagnose call says this doesn't match the // GNU int-to-pointer idiom. if (LHS.get()->IgnoreParenCasts()->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNotNull)) { // In C++ adding zero to a null pointer is defined. Expr::EvalResult KnownVal; if (!getLangOpts().CPlusPlus || (!RHS.get()->isValueDependent() && (!RHS.get()->EvaluateAsInt(KnownVal, Context) || KnownVal.Val.getInt() != 0))) { diagnoseArithmeticOnNullPointer(*this, Loc, LHS.get(), false); } } if (!checkArithmeticOpPointerOperand(*this, Loc, LHS.get())) return QualType(); // Check array bounds for pointer arithemtic CheckArrayAccess(LHS.get(), RHS.get(), /*ArraySubscriptExpr*/nullptr, /*AllowOnePastEnd*/true, /*IndexNegated*/true); if (CompLHSTy) *CompLHSTy = LHS.get()->getType(); return LHS.get()->getType(); } // Handle pointer-pointer subtractions. if (const PointerType *RHSPTy = RHS.get()->getType()->getAs()) { QualType rpointee = RHSPTy->getPointeeType(); if (getLangOpts().CPlusPlus) { // Pointee types must be the same: C++ [expr.add] if (!Context.hasSameUnqualifiedType(lpointee, rpointee)) { diagnosePointerIncompatibility(*this, Loc, LHS.get(), RHS.get()); } } else { // Pointee types must be compatible C99 6.5.6p3 if (!Context.typesAreCompatible( Context.getCanonicalType(lpointee).getUnqualifiedType(), Context.getCanonicalType(rpointee).getUnqualifiedType())) { diagnosePointerIncompatibility(*this, Loc, LHS.get(), RHS.get()); return QualType(); } } if (!checkArithmeticBinOpPointerOperands(*this, Loc, LHS.get(), RHS.get())) return QualType(); // FIXME: Add warnings for nullptr - ptr. // The pointee type may have zero size. As an extension, a structure or // union may have zero size or an array may have zero length. In this // case subtraction does not make sense. if (!rpointee->isVoidType() && !rpointee->isFunctionType()) { CharUnits ElementSize = Context.getTypeSizeInChars(rpointee); if (ElementSize.isZero()) { Diag(Loc,diag::warn_sub_ptr_zero_size_types) << rpointee.getUnqualifiedType() << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } } if (CompLHSTy) *CompLHSTy = LHS.get()->getType(); return Context.getPointerDiffType(); } } return InvalidOperands(Loc, LHS, RHS); } static bool isScopedEnumerationType(QualType T) { if (const EnumType *ET = T->getAs()) return ET->getDecl()->isScoped(); return false; } static void DiagnoseBadShiftValues(Sema& S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc, QualType LHSType) { // OpenCL 6.3j: shift values are effectively % word size of LHS (more defined), // so skip remaining warnings as we don't want to modify values within Sema. if (S.getLangOpts().OpenCL) return; // Check right/shifter operand Expr::EvalResult RHSResult; if (RHS.get()->isValueDependent() || !RHS.get()->EvaluateAsInt(RHSResult, S.Context)) return; llvm::APSInt Right = RHSResult.Val.getInt(); if (Right.isNegative()) { S.DiagRuntimeBehavior(Loc, RHS.get(), S.PDiag(diag::warn_shift_negative) << RHS.get()->getSourceRange()); return; } llvm::APInt LeftBits(Right.getBitWidth(), S.Context.getTypeSize(LHS.get()->getType())); if (Right.uge(LeftBits)) { S.DiagRuntimeBehavior(Loc, RHS.get(), S.PDiag(diag::warn_shift_gt_typewidth) << RHS.get()->getSourceRange()); return; } if (Opc != BO_Shl) return; // When left shifting an ICE which is signed, we can check for overflow which // according to C++ has undefined behavior ([expr.shift] 5.8/2). Unsigned // integers have defined behavior modulo one more than the maximum value // representable in the result type, so never warn for those. Expr::EvalResult LHSResult; if (LHS.get()->isValueDependent() || LHSType->hasUnsignedIntegerRepresentation() || !LHS.get()->EvaluateAsInt(LHSResult, S.Context)) return; llvm::APSInt Left = LHSResult.Val.getInt(); // If LHS does not have a signed type and non-negative value // then, the behavior is undefined. Warn about it. if (Left.isNegative() && !S.getLangOpts().isSignedOverflowDefined()) { S.DiagRuntimeBehavior(Loc, LHS.get(), S.PDiag(diag::warn_shift_lhs_negative) << LHS.get()->getSourceRange()); return; } llvm::APInt ResultBits = static_cast(Right) + Left.getMinSignedBits(); if (LeftBits.uge(ResultBits)) return; llvm::APSInt Result = Left.extend(ResultBits.getLimitedValue()); Result = Result.shl(Right); // Print the bit representation of the signed integer as an unsigned // hexadecimal number. SmallString<40> HexResult; Result.toString(HexResult, 16, /*Signed =*/false, /*Literal =*/true); // If we are only missing a sign bit, this is less likely to result in actual // bugs -- if the result is cast back to an unsigned type, it will have the // expected value. Thus we place this behind a different warning that can be // turned off separately if needed. if (LeftBits == ResultBits - 1) { S.Diag(Loc, diag::warn_shift_result_sets_sign_bit) << HexResult << LHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return; } S.Diag(Loc, diag::warn_shift_result_gt_typewidth) << HexResult.str() << Result.getMinSignedBits() << LHSType << Left.getBitWidth() << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } /// Return the resulting type when a vector is shifted /// by a scalar or vector shift amount. static QualType checkVectorShift(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsCompAssign) { // OpenCL v1.1 s6.3.j says RHS can be a vector only if LHS is a vector. if ((S.LangOpts.OpenCL || S.LangOpts.ZVector) && !LHS.get()->getType()->isVectorType()) { S.Diag(Loc, diag::err_shift_rhs_only_vector) << RHS.get()->getType() << LHS.get()->getType() << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } if (!IsCompAssign) { LHS = S.UsualUnaryConversions(LHS.get()); if (LHS.isInvalid()) return QualType(); } RHS = S.UsualUnaryConversions(RHS.get()); if (RHS.isInvalid()) return QualType(); QualType LHSType = LHS.get()->getType(); // Note that LHS might be a scalar because the routine calls not only in // OpenCL case. const VectorType *LHSVecTy = LHSType->getAs(); QualType LHSEleType = LHSVecTy ? LHSVecTy->getElementType() : LHSType; // Note that RHS might not be a vector. QualType RHSType = RHS.get()->getType(); const VectorType *RHSVecTy = RHSType->getAs(); QualType RHSEleType = RHSVecTy ? RHSVecTy->getElementType() : RHSType; // The operands need to be integers. if (!LHSEleType->isIntegerType()) { S.Diag(Loc, diag::err_typecheck_expect_int) << LHS.get()->getType() << LHS.get()->getSourceRange(); return QualType(); } if (!RHSEleType->isIntegerType()) { S.Diag(Loc, diag::err_typecheck_expect_int) << RHS.get()->getType() << RHS.get()->getSourceRange(); return QualType(); } if (!LHSVecTy) { assert(RHSVecTy); if (IsCompAssign) return RHSType; if (LHSEleType != RHSEleType) { LHS = S.ImpCastExprToType(LHS.get(),RHSEleType, CK_IntegralCast); LHSEleType = RHSEleType; } QualType VecTy = S.Context.getExtVectorType(LHSEleType, RHSVecTy->getNumElements()); LHS = S.ImpCastExprToType(LHS.get(), VecTy, CK_VectorSplat); LHSType = VecTy; } else if (RHSVecTy) { // OpenCL v1.1 s6.3.j says that for vector types, the operators // are applied component-wise. So if RHS is a vector, then ensure // that the number of elements is the same as LHS... if (RHSVecTy->getNumElements() != LHSVecTy->getNumElements()) { S.Diag(Loc, diag::err_typecheck_vector_lengths_not_equal) << LHS.get()->getType() << RHS.get()->getType() << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } if (!S.LangOpts.OpenCL && !S.LangOpts.ZVector) { const BuiltinType *LHSBT = LHSEleType->getAs(); const BuiltinType *RHSBT = RHSEleType->getAs(); if (LHSBT != RHSBT && S.Context.getTypeSize(LHSBT) != S.Context.getTypeSize(RHSBT)) { S.Diag(Loc, diag::warn_typecheck_vector_element_sizes_not_equal) << LHS.get()->getType() << RHS.get()->getType() << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } } } else { // ...else expand RHS to match the number of elements in LHS. QualType VecTy = S.Context.getExtVectorType(RHSEleType, LHSVecTy->getNumElements()); RHS = S.ImpCastExprToType(RHS.get(), VecTy, CK_VectorSplat); } return LHSType; } // C99 6.5.7 QualType Sema::CheckShiftOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc, bool IsCompAssign) { checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false); // Vector shifts promote their scalar inputs to vector type. if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) { if (LangOpts.ZVector) { // The shift operators for the z vector extensions work basically // like general shifts, except that neither the LHS nor the RHS is // allowed to be a "vector bool". if (auto LHSVecType = LHS.get()->getType()->getAs()) if (LHSVecType->getVectorKind() == VectorType::AltiVecBool) return InvalidOperands(Loc, LHS, RHS); if (auto RHSVecType = RHS.get()->getType()->getAs()) if (RHSVecType->getVectorKind() == VectorType::AltiVecBool) return InvalidOperands(Loc, LHS, RHS); } return checkVectorShift(*this, LHS, RHS, Loc, IsCompAssign); } // Shifts don't perform usual arithmetic conversions, they just do integer // promotions on each operand. C99 6.5.7p3 // For the LHS, do usual unary conversions, but then reset them away // if this is a compound assignment. ExprResult OldLHS = LHS; LHS = UsualUnaryConversions(LHS.get()); if (LHS.isInvalid()) return QualType(); QualType LHSType = LHS.get()->getType(); if (IsCompAssign) LHS = OldLHS; // The RHS is simpler. RHS = UsualUnaryConversions(RHS.get()); if (RHS.isInvalid()) return QualType(); QualType RHSType = RHS.get()->getType(); // C99 6.5.7p2: Each of the operands shall have integer type. if (!LHSType->hasIntegerRepresentation() || !RHSType->hasIntegerRepresentation()) return InvalidOperands(Loc, LHS, RHS); // C++0x: Don't allow scoped enums. FIXME: Use something better than // hasIntegerRepresentation() above instead of this. if (isScopedEnumerationType(LHSType) || isScopedEnumerationType(RHSType)) { return InvalidOperands(Loc, LHS, RHS); } // Sanity-check shift operands DiagnoseBadShiftValues(*this, LHS, RHS, Loc, Opc, LHSType); // "The type of the result is that of the promoted left operand." return LHSType; } /// If two different enums are compared, raise a warning. static void checkEnumComparison(Sema &S, SourceLocation Loc, Expr *LHS, Expr *RHS) { QualType LHSStrippedType = LHS->IgnoreParenImpCasts()->getType(); QualType RHSStrippedType = RHS->IgnoreParenImpCasts()->getType(); const EnumType *LHSEnumType = LHSStrippedType->getAs(); if (!LHSEnumType) return; const EnumType *RHSEnumType = RHSStrippedType->getAs(); if (!RHSEnumType) return; // Ignore anonymous enums. if (!LHSEnumType->getDecl()->getIdentifier() && !LHSEnumType->getDecl()->getTypedefNameForAnonDecl()) return; if (!RHSEnumType->getDecl()->getIdentifier() && !RHSEnumType->getDecl()->getTypedefNameForAnonDecl()) return; if (S.Context.hasSameUnqualifiedType(LHSStrippedType, RHSStrippedType)) return; S.Diag(Loc, diag::warn_comparison_of_mixed_enum_types) << LHSStrippedType << RHSStrippedType << LHS->getSourceRange() << RHS->getSourceRange(); } /// Diagnose bad pointer comparisons. static void diagnoseDistinctPointerComparison(Sema &S, SourceLocation Loc, ExprResult &LHS, ExprResult &RHS, bool IsError) { S.Diag(Loc, IsError ? diag::err_typecheck_comparison_of_distinct_pointers : diag::ext_typecheck_comparison_of_distinct_pointers) << LHS.get()->getType() << RHS.get()->getType() << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } /// Returns false if the pointers are converted to a composite type, /// true otherwise. static bool convertPointersToCompositeType(Sema &S, SourceLocation Loc, ExprResult &LHS, ExprResult &RHS) { // C++ [expr.rel]p2: // [...] Pointer conversions (4.10) and qualification // conversions (4.4) are performed on pointer operands (or on // a pointer operand and a null pointer constant) to bring // them to their composite pointer type. [...] // // C++ [expr.eq]p1 uses the same notion for (in)equality // comparisons of pointers. QualType LHSType = LHS.get()->getType(); QualType RHSType = RHS.get()->getType(); assert(LHSType->isPointerType() || RHSType->isPointerType() || LHSType->isMemberPointerType() || RHSType->isMemberPointerType()); QualType T = S.FindCompositePointerType(Loc, LHS, RHS); if (T.isNull()) { if ((LHSType->isPointerType() || LHSType->isMemberPointerType()) && (RHSType->isPointerType() || RHSType->isMemberPointerType())) diagnoseDistinctPointerComparison(S, Loc, LHS, RHS, /*isError*/true); else S.InvalidOperands(Loc, LHS, RHS); return true; } LHS = S.ImpCastExprToType(LHS.get(), T, CK_BitCast); RHS = S.ImpCastExprToType(RHS.get(), T, CK_BitCast); return false; } static void diagnoseFunctionPointerToVoidComparison(Sema &S, SourceLocation Loc, ExprResult &LHS, ExprResult &RHS, bool IsError) { S.Diag(Loc, IsError ? diag::err_typecheck_comparison_of_fptr_to_void : diag::ext_typecheck_comparison_of_fptr_to_void) << LHS.get()->getType() << RHS.get()->getType() << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } static bool isObjCObjectLiteral(ExprResult &E) { switch (E.get()->IgnoreParenImpCasts()->getStmtClass()) { case Stmt::ObjCArrayLiteralClass: case Stmt::ObjCDictionaryLiteralClass: case Stmt::ObjCStringLiteralClass: case Stmt::ObjCBoxedExprClass: return true; default: // Note that ObjCBoolLiteral is NOT an object literal! return false; } } static bool hasIsEqualMethod(Sema &S, const Expr *LHS, const Expr *RHS) { const ObjCObjectPointerType *Type = LHS->getType()->getAs(); // If this is not actually an Objective-C object, bail out. if (!Type) return false; // Get the LHS object's interface type. QualType InterfaceType = Type->getPointeeType(); // If the RHS isn't an Objective-C object, bail out. if (!RHS->getType()->isObjCObjectPointerType()) return false; // Try to find the -isEqual: method. Selector IsEqualSel = S.NSAPIObj->getIsEqualSelector(); ObjCMethodDecl *Method = S.LookupMethodInObjectType(IsEqualSel, InterfaceType, /*instance=*/true); if (!Method) { if (Type->isObjCIdType()) { // For 'id', just check the global pool. Method = S.LookupInstanceMethodInGlobalPool(IsEqualSel, SourceRange(), /*receiverId=*/true); } else { // Check protocols. Method = S.LookupMethodInQualifiedType(IsEqualSel, Type, /*instance=*/true); } } if (!Method) return false; QualType T = Method->parameters()[0]->getType(); if (!T->isObjCObjectPointerType()) return false; QualType R = Method->getReturnType(); if (!R->isScalarType()) return false; return true; } Sema::ObjCLiteralKind Sema::CheckLiteralKind(Expr *FromE) { FromE = FromE->IgnoreParenImpCasts(); switch (FromE->getStmtClass()) { default: break; case Stmt::ObjCStringLiteralClass: // "string literal" return LK_String; case Stmt::ObjCArrayLiteralClass: // "array literal" return LK_Array; case Stmt::ObjCDictionaryLiteralClass: // "dictionary literal" return LK_Dictionary; case Stmt::BlockExprClass: return LK_Block; case Stmt::ObjCBoxedExprClass: { Expr *Inner = cast(FromE)->getSubExpr()->IgnoreParens(); switch (Inner->getStmtClass()) { case Stmt::IntegerLiteralClass: case Stmt::FloatingLiteralClass: case Stmt::CharacterLiteralClass: case Stmt::ObjCBoolLiteralExprClass: case Stmt::CXXBoolLiteralExprClass: // "numeric literal" return LK_Numeric; case Stmt::ImplicitCastExprClass: { CastKind CK = cast(Inner)->getCastKind(); // Boolean literals can be represented by implicit casts. if (CK == CK_IntegralToBoolean || CK == CK_IntegralCast) return LK_Numeric; break; } default: break; } return LK_Boxed; } } return LK_None; } static void diagnoseObjCLiteralComparison(Sema &S, SourceLocation Loc, ExprResult &LHS, ExprResult &RHS, BinaryOperator::Opcode Opc){ Expr *Literal; Expr *Other; if (isObjCObjectLiteral(LHS)) { Literal = LHS.get(); Other = RHS.get(); } else { Literal = RHS.get(); Other = LHS.get(); } // Don't warn on comparisons against nil. Other = Other->IgnoreParenCasts(); if (Other->isNullPointerConstant(S.getASTContext(), Expr::NPC_ValueDependentIsNotNull)) return; // This should be kept in sync with warn_objc_literal_comparison. // LK_String should always be after the other literals, since it has its own // warning flag. Sema::ObjCLiteralKind LiteralKind = S.CheckLiteralKind(Literal); assert(LiteralKind != Sema::LK_Block); if (LiteralKind == Sema::LK_None) { llvm_unreachable("Unknown Objective-C object literal kind"); } if (LiteralKind == Sema::LK_String) S.Diag(Loc, diag::warn_objc_string_literal_comparison) << Literal->getSourceRange(); else S.Diag(Loc, diag::warn_objc_literal_comparison) << LiteralKind << Literal->getSourceRange(); if (BinaryOperator::isEqualityOp(Opc) && hasIsEqualMethod(S, LHS.get(), RHS.get())) { SourceLocation Start = LHS.get()->getBeginLoc(); SourceLocation End = S.getLocForEndOfToken(RHS.get()->getEndLoc()); CharSourceRange OpRange = CharSourceRange::getCharRange(Loc, S.getLocForEndOfToken(Loc)); S.Diag(Loc, diag::note_objc_literal_comparison_isequal) << FixItHint::CreateInsertion(Start, Opc == BO_EQ ? "[" : "![") << FixItHint::CreateReplacement(OpRange, " isEqual:") << FixItHint::CreateInsertion(End, "]"); } } /// Warns on !x < y, !x & y where !(x < y), !(x & y) was probably intended. static void diagnoseLogicalNotOnLHSofCheck(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc) { // Check that left hand side is !something. UnaryOperator *UO = dyn_cast(LHS.get()->IgnoreImpCasts()); if (!UO || UO->getOpcode() != UO_LNot) return; // Only check if the right hand side is non-bool arithmetic type. if (RHS.get()->isKnownToHaveBooleanValue()) return; // Make sure that the something in !something is not bool. Expr *SubExpr = UO->getSubExpr()->IgnoreImpCasts(); if (SubExpr->isKnownToHaveBooleanValue()) return; // Emit warning. bool IsBitwiseOp = Opc == BO_And || Opc == BO_Or || Opc == BO_Xor; S.Diag(UO->getOperatorLoc(), diag::warn_logical_not_on_lhs_of_check) << Loc << IsBitwiseOp; // First note suggest !(x < y) SourceLocation FirstOpen = SubExpr->getBeginLoc(); SourceLocation FirstClose = RHS.get()->getEndLoc(); FirstClose = S.getLocForEndOfToken(FirstClose); if (FirstClose.isInvalid()) FirstOpen = SourceLocation(); S.Diag(UO->getOperatorLoc(), diag::note_logical_not_fix) << IsBitwiseOp << FixItHint::CreateInsertion(FirstOpen, "(") << FixItHint::CreateInsertion(FirstClose, ")"); // Second note suggests (!x) < y SourceLocation SecondOpen = LHS.get()->getBeginLoc(); SourceLocation SecondClose = LHS.get()->getEndLoc(); SecondClose = S.getLocForEndOfToken(SecondClose); if (SecondClose.isInvalid()) SecondOpen = SourceLocation(); S.Diag(UO->getOperatorLoc(), diag::note_logical_not_silence_with_parens) << FixItHint::CreateInsertion(SecondOpen, "(") << FixItHint::CreateInsertion(SecondClose, ")"); } // Get the decl for a simple expression: a reference to a variable, // an implicit C++ field reference, or an implicit ObjC ivar reference. static ValueDecl *getCompareDecl(Expr *E) { if (DeclRefExpr *DR = dyn_cast(E)) return DR->getDecl(); if (ObjCIvarRefExpr *Ivar = dyn_cast(E)) { if (Ivar->isFreeIvar()) return Ivar->getDecl(); } if (MemberExpr *Mem = dyn_cast(E)) { if (Mem->isImplicitAccess()) return Mem->getMemberDecl(); } return nullptr; } /// Diagnose some forms of syntactically-obvious tautological comparison. static void diagnoseTautologicalComparison(Sema &S, SourceLocation Loc, Expr *LHS, Expr *RHS, BinaryOperatorKind Opc) { Expr *LHSStripped = LHS->IgnoreParenImpCasts(); Expr *RHSStripped = RHS->IgnoreParenImpCasts(); QualType LHSType = LHS->getType(); QualType RHSType = RHS->getType(); if (LHSType->hasFloatingRepresentation() || (LHSType->isBlockPointerType() && !BinaryOperator::isEqualityOp(Opc)) || LHS->getBeginLoc().isMacroID() || RHS->getBeginLoc().isMacroID() || S.inTemplateInstantiation()) return; // Comparisons between two array types are ill-formed for operator<=>, so // we shouldn't emit any additional warnings about it. if (Opc == BO_Cmp && LHSType->isArrayType() && RHSType->isArrayType()) return; // For non-floating point types, check for self-comparisons of the form // x == x, x != x, x < x, etc. These always evaluate to a constant, and // often indicate logic errors in the program. // // NOTE: Don't warn about comparison expressions resulting from macro // expansion. Also don't warn about comparisons which are only self // comparisons within a template instantiation. The warnings should catch // obvious cases in the definition of the template anyways. The idea is to // warn when the typed comparison operator will always evaluate to the same // result. ValueDecl *DL = getCompareDecl(LHSStripped); ValueDecl *DR = getCompareDecl(RHSStripped); if (DL && DR && declaresSameEntity(DL, DR)) { StringRef Result; switch (Opc) { case BO_EQ: case BO_LE: case BO_GE: Result = "true"; break; case BO_NE: case BO_LT: case BO_GT: Result = "false"; break; case BO_Cmp: Result = "'std::strong_ordering::equal'"; break; default: break; } S.DiagRuntimeBehavior(Loc, nullptr, S.PDiag(diag::warn_comparison_always) << 0 /*self-comparison*/ << !Result.empty() << Result); } else if (DL && DR && DL->getType()->isArrayType() && DR->getType()->isArrayType() && !DL->isWeak() && !DR->isWeak()) { // What is it always going to evaluate to? StringRef Result; switch(Opc) { case BO_EQ: // e.g. array1 == array2 Result = "false"; break; case BO_NE: // e.g. array1 != array2 Result = "true"; break; default: // e.g. array1 <= array2 // The best we can say is 'a constant' break; } S.DiagRuntimeBehavior(Loc, nullptr, S.PDiag(diag::warn_comparison_always) << 1 /*array comparison*/ << !Result.empty() << Result); } if (isa(LHSStripped)) LHSStripped = LHSStripped->IgnoreParenCasts(); if (isa(RHSStripped)) RHSStripped = RHSStripped->IgnoreParenCasts(); // Warn about comparisons against a string constant (unless the other // operand is null); the user probably wants strcmp. Expr *LiteralString = nullptr; Expr *LiteralStringStripped = nullptr; if ((isa(LHSStripped) || isa(LHSStripped)) && !RHSStripped->isNullPointerConstant(S.Context, Expr::NPC_ValueDependentIsNull)) { LiteralString = LHS; LiteralStringStripped = LHSStripped; } else if ((isa(RHSStripped) || isa(RHSStripped)) && !LHSStripped->isNullPointerConstant(S.Context, Expr::NPC_ValueDependentIsNull)) { LiteralString = RHS; LiteralStringStripped = RHSStripped; } if (LiteralString) { S.DiagRuntimeBehavior(Loc, nullptr, S.PDiag(diag::warn_stringcompare) << isa(LiteralStringStripped) << LiteralString->getSourceRange()); } } static ImplicitConversionKind castKindToImplicitConversionKind(CastKind CK) { switch (CK) { default: { #ifndef NDEBUG llvm::errs() << "unhandled cast kind: " << CastExpr::getCastKindName(CK) << "\n"; #endif llvm_unreachable("unhandled cast kind"); } case CK_UserDefinedConversion: return ICK_Identity; case CK_LValueToRValue: return ICK_Lvalue_To_Rvalue; case CK_ArrayToPointerDecay: return ICK_Array_To_Pointer; case CK_FunctionToPointerDecay: return ICK_Function_To_Pointer; case CK_IntegralCast: return ICK_Integral_Conversion; case CK_FloatingCast: return ICK_Floating_Conversion; case CK_IntegralToFloating: case CK_FloatingToIntegral: return ICK_Floating_Integral; case CK_IntegralComplexCast: case CK_FloatingComplexCast: case CK_FloatingComplexToIntegralComplex: case CK_IntegralComplexToFloatingComplex: return ICK_Complex_Conversion; case CK_FloatingComplexToReal: case CK_FloatingRealToComplex: case CK_IntegralComplexToReal: case CK_IntegralRealToComplex: return ICK_Complex_Real; } } static bool checkThreeWayNarrowingConversion(Sema &S, QualType ToType, Expr *E, QualType FromType, SourceLocation Loc) { // Check for a narrowing implicit conversion. StandardConversionSequence SCS; SCS.setAsIdentityConversion(); SCS.setToType(0, FromType); SCS.setToType(1, ToType); if (const auto *ICE = dyn_cast(E)) SCS.Second = castKindToImplicitConversionKind(ICE->getCastKind()); APValue PreNarrowingValue; QualType PreNarrowingType; switch (SCS.getNarrowingKind(S.Context, E, PreNarrowingValue, PreNarrowingType, /*IgnoreFloatToIntegralConversion*/ true)) { case NK_Dependent_Narrowing: // Implicit conversion to a narrower type, but the expression is // value-dependent so we can't tell whether it's actually narrowing. case NK_Not_Narrowing: return false; case NK_Constant_Narrowing: // Implicit conversion to a narrower type, and the value is not a constant // expression. S.Diag(E->getBeginLoc(), diag::err_spaceship_argument_narrowing) << /*Constant*/ 1 << PreNarrowingValue.getAsString(S.Context, PreNarrowingType) << ToType; return true; case NK_Variable_Narrowing: // Implicit conversion to a narrower type, and the value is not a constant // expression. case NK_Type_Narrowing: S.Diag(E->getBeginLoc(), diag::err_spaceship_argument_narrowing) << /*Constant*/ 0 << FromType << ToType; // TODO: It's not a constant expression, but what if the user intended it // to be? Can we produce notes to help them figure out why it isn't? return true; } llvm_unreachable("unhandled case in switch"); } static QualType checkArithmeticOrEnumeralThreeWayCompare(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc) { using CCT = ComparisonCategoryType; QualType LHSType = LHS.get()->getType(); QualType RHSType = RHS.get()->getType(); // Dig out the original argument type and expression before implicit casts // were applied. These are the types/expressions we need to check the // [expr.spaceship] requirements against. ExprResult LHSStripped = LHS.get()->IgnoreParenImpCasts(); ExprResult RHSStripped = RHS.get()->IgnoreParenImpCasts(); QualType LHSStrippedType = LHSStripped.get()->getType(); QualType RHSStrippedType = RHSStripped.get()->getType(); // C++2a [expr.spaceship]p3: If one of the operands is of type bool and the // other is not, the program is ill-formed. if (LHSStrippedType->isBooleanType() != RHSStrippedType->isBooleanType()) { S.InvalidOperands(Loc, LHSStripped, RHSStripped); return QualType(); } int NumEnumArgs = (int)LHSStrippedType->isEnumeralType() + RHSStrippedType->isEnumeralType(); if (NumEnumArgs == 1) { bool LHSIsEnum = LHSStrippedType->isEnumeralType(); QualType OtherTy = LHSIsEnum ? RHSStrippedType : LHSStrippedType; if (OtherTy->hasFloatingRepresentation()) { S.InvalidOperands(Loc, LHSStripped, RHSStripped); return QualType(); } } if (NumEnumArgs == 2) { // C++2a [expr.spaceship]p5: If both operands have the same enumeration // type E, the operator yields the result of converting the operands // to the underlying type of E and applying <=> to the converted operands. if (!S.Context.hasSameUnqualifiedType(LHSStrippedType, RHSStrippedType)) { S.InvalidOperands(Loc, LHS, RHS); return QualType(); } QualType IntType = LHSStrippedType->getAs()->getDecl()->getIntegerType(); assert(IntType->isArithmeticType()); // We can't use `CK_IntegralCast` when the underlying type is 'bool', so we // promote the boolean type, and all other promotable integer types, to // avoid this. if (IntType->isPromotableIntegerType()) IntType = S.Context.getPromotedIntegerType(IntType); LHS = S.ImpCastExprToType(LHS.get(), IntType, CK_IntegralCast); RHS = S.ImpCastExprToType(RHS.get(), IntType, CK_IntegralCast); LHSType = RHSType = IntType; } // C++2a [expr.spaceship]p4: If both operands have arithmetic types, the // usual arithmetic conversions are applied to the operands. QualType Type = S.UsualArithmeticConversions(LHS, RHS); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); if (Type.isNull()) return S.InvalidOperands(Loc, LHS, RHS); assert(Type->isArithmeticType() || Type->isEnumeralType()); bool HasNarrowing = checkThreeWayNarrowingConversion( S, Type, LHS.get(), LHSType, LHS.get()->getBeginLoc()); HasNarrowing |= checkThreeWayNarrowingConversion(S, Type, RHS.get(), RHSType, RHS.get()->getBeginLoc()); if (HasNarrowing) return QualType(); assert(!Type.isNull() && "composite type for <=> has not been set"); auto TypeKind = [&]() { if (const ComplexType *CT = Type->getAs()) { if (CT->getElementType()->hasFloatingRepresentation()) return CCT::WeakEquality; return CCT::StrongEquality; } if (Type->isIntegralOrEnumerationType()) return CCT::StrongOrdering; if (Type->hasFloatingRepresentation()) return CCT::PartialOrdering; llvm_unreachable("other types are unimplemented"); }(); return S.CheckComparisonCategoryType(TypeKind, Loc); } static QualType checkArithmeticOrEnumeralCompare(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc) { if (Opc == BO_Cmp) return checkArithmeticOrEnumeralThreeWayCompare(S, LHS, RHS, Loc); // C99 6.5.8p3 / C99 6.5.9p4 QualType Type = S.UsualArithmeticConversions(LHS, RHS); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); if (Type.isNull()) return S.InvalidOperands(Loc, LHS, RHS); assert(Type->isArithmeticType() || Type->isEnumeralType()); checkEnumComparison(S, Loc, LHS.get(), RHS.get()); if (Type->isAnyComplexType() && BinaryOperator::isRelationalOp(Opc)) return S.InvalidOperands(Loc, LHS, RHS); // Check for comparisons of floating point operands using != and ==. if (Type->hasFloatingRepresentation() && BinaryOperator::isEqualityOp(Opc)) S.CheckFloatComparison(Loc, LHS.get(), RHS.get()); // The result of comparisons is 'bool' in C++, 'int' in C. return S.Context.getLogicalOperationType(); } // C99 6.5.8, C++ [expr.rel] QualType Sema::CheckCompareOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc) { bool IsRelational = BinaryOperator::isRelationalOp(Opc); bool IsThreeWay = Opc == BO_Cmp; auto IsAnyPointerType = [](ExprResult E) { QualType Ty = E.get()->getType(); return Ty->isPointerType() || Ty->isMemberPointerType(); }; // C++2a [expr.spaceship]p6: If at least one of the operands is of pointer // type, array-to-pointer, ..., conversions are performed on both operands to // bring them to their composite type. // Otherwise, all comparisons expect an rvalue, so convert to rvalue before // any type-related checks. if (!IsThreeWay || IsAnyPointerType(LHS) || IsAnyPointerType(RHS)) { LHS = DefaultFunctionArrayLvalueConversion(LHS.get()); if (LHS.isInvalid()) return QualType(); RHS = DefaultFunctionArrayLvalueConversion(RHS.get()); if (RHS.isInvalid()) return QualType(); } else { LHS = DefaultLvalueConversion(LHS.get()); if (LHS.isInvalid()) return QualType(); RHS = DefaultLvalueConversion(RHS.get()); if (RHS.isInvalid()) return QualType(); } checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/true); // Handle vector comparisons separately. if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) return CheckVectorCompareOperands(LHS, RHS, Loc, Opc); diagnoseLogicalNotOnLHSofCheck(*this, LHS, RHS, Loc, Opc); diagnoseTautologicalComparison(*this, Loc, LHS.get(), RHS.get(), Opc); QualType LHSType = LHS.get()->getType(); QualType RHSType = RHS.get()->getType(); if ((LHSType->isArithmeticType() || LHSType->isEnumeralType()) && (RHSType->isArithmeticType() || RHSType->isEnumeralType())) return checkArithmeticOrEnumeralCompare(*this, LHS, RHS, Loc, Opc); const Expr::NullPointerConstantKind LHSNullKind = LHS.get()->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull); const Expr::NullPointerConstantKind RHSNullKind = RHS.get()->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull); bool LHSIsNull = LHSNullKind != Expr::NPCK_NotNull; bool RHSIsNull = RHSNullKind != Expr::NPCK_NotNull; auto computeResultTy = [&]() { if (Opc != BO_Cmp) return Context.getLogicalOperationType(); assert(getLangOpts().CPlusPlus); assert(Context.hasSameType(LHS.get()->getType(), RHS.get()->getType())); QualType CompositeTy = LHS.get()->getType(); assert(!CompositeTy->isReferenceType()); auto buildResultTy = [&](ComparisonCategoryType Kind) { return CheckComparisonCategoryType(Kind, Loc); }; // C++2a [expr.spaceship]p7: If the composite pointer type is a function // pointer type, a pointer-to-member type, or std::nullptr_t, the // result is of type std::strong_equality if (CompositeTy->isFunctionPointerType() || CompositeTy->isMemberPointerType() || CompositeTy->isNullPtrType()) // FIXME: consider making the function pointer case produce // strong_ordering not strong_equality, per P0946R0-Jax18 discussion // and direction polls return buildResultTy(ComparisonCategoryType::StrongEquality); // C++2a [expr.spaceship]p8: If the composite pointer type is an object // pointer type, p <=> q is of type std::strong_ordering. if (CompositeTy->isPointerType()) { // P0946R0: Comparisons between a null pointer constant and an object // pointer result in std::strong_equality if (LHSIsNull != RHSIsNull) return buildResultTy(ComparisonCategoryType::StrongEquality); return buildResultTy(ComparisonCategoryType::StrongOrdering); } // C++2a [expr.spaceship]p9: Otherwise, the program is ill-formed. // TODO: Extend support for operator<=> to ObjC types. return InvalidOperands(Loc, LHS, RHS); }; if (!IsRelational && LHSIsNull != RHSIsNull) { bool IsEquality = Opc == BO_EQ; if (RHSIsNull) DiagnoseAlwaysNonNullPointer(LHS.get(), RHSNullKind, IsEquality, RHS.get()->getSourceRange()); else DiagnoseAlwaysNonNullPointer(RHS.get(), LHSNullKind, IsEquality, LHS.get()->getSourceRange()); } if ((LHSType->isIntegerType() && !LHSIsNull) || (RHSType->isIntegerType() && !RHSIsNull)) { // Skip normal pointer conversion checks in this case; we have better // diagnostics for this below. } else if (getLangOpts().CPlusPlus) { // Equality comparison of a function pointer to a void pointer is invalid, // but we allow it as an extension. // FIXME: If we really want to allow this, should it be part of composite // pointer type computation so it works in conditionals too? if (!IsRelational && ((LHSType->isFunctionPointerType() && RHSType->isVoidPointerType()) || (RHSType->isFunctionPointerType() && LHSType->isVoidPointerType()))) { // This is a gcc extension compatibility comparison. // In a SFINAE context, we treat this as a hard error to maintain // conformance with the C++ standard. diagnoseFunctionPointerToVoidComparison( *this, Loc, LHS, RHS, /*isError*/ (bool)isSFINAEContext()); if (isSFINAEContext()) return QualType(); RHS = ImpCastExprToType(RHS.get(), LHSType, CK_BitCast); return computeResultTy(); } // C++ [expr.eq]p2: // If at least one operand is a pointer [...] bring them to their // composite pointer type. // C++ [expr.spaceship]p6 // If at least one of the operands is of pointer type, [...] bring them // to their composite pointer type. // C++ [expr.rel]p2: // If both operands are pointers, [...] bring them to their composite // pointer type. if ((int)LHSType->isPointerType() + (int)RHSType->isPointerType() >= (IsRelational ? 2 : 1) && (!LangOpts.ObjCAutoRefCount || !(LHSType->isObjCObjectPointerType() || RHSType->isObjCObjectPointerType()))) { if (convertPointersToCompositeType(*this, Loc, LHS, RHS)) return QualType(); return computeResultTy(); } } else if (LHSType->isPointerType() && RHSType->isPointerType()) { // C99 6.5.8p2 // All of the following pointer-related warnings are GCC extensions, except // when handling null pointer constants. QualType LCanPointeeTy = LHSType->castAs()->getPointeeType().getCanonicalType(); QualType RCanPointeeTy = RHSType->castAs()->getPointeeType().getCanonicalType(); // C99 6.5.9p2 and C99 6.5.8p2 if (Context.typesAreCompatible(LCanPointeeTy.getUnqualifiedType(), RCanPointeeTy.getUnqualifiedType())) { // Valid unless a relational comparison of function pointers if (IsRelational && LCanPointeeTy->isFunctionType()) { Diag(Loc, diag::ext_typecheck_ordered_comparison_of_function_pointers) << LHSType << RHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } } else if (!IsRelational && (LCanPointeeTy->isVoidType() || RCanPointeeTy->isVoidType())) { // Valid unless comparison between non-null pointer and function pointer if ((LCanPointeeTy->isFunctionType() || RCanPointeeTy->isFunctionType()) && !LHSIsNull && !RHSIsNull) diagnoseFunctionPointerToVoidComparison(*this, Loc, LHS, RHS, /*isError*/false); } else { // Invalid diagnoseDistinctPointerComparison(*this, Loc, LHS, RHS, /*isError*/false); } if (LCanPointeeTy != RCanPointeeTy) { // Treat NULL constant as a special case in OpenCL. if (getLangOpts().OpenCL && !LHSIsNull && !RHSIsNull) { const PointerType *LHSPtr = LHSType->getAs(); if (!LHSPtr->isAddressSpaceOverlapping(*RHSType->getAs())) { Diag(Loc, diag::err_typecheck_op_on_nonoverlapping_address_space_pointers) << LHSType << RHSType << 0 /* comparison */ << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } } LangAS AddrSpaceL = LCanPointeeTy.getAddressSpace(); LangAS AddrSpaceR = RCanPointeeTy.getAddressSpace(); CastKind Kind = AddrSpaceL != AddrSpaceR ? CK_AddressSpaceConversion : CK_BitCast; if (LHSIsNull && !RHSIsNull) LHS = ImpCastExprToType(LHS.get(), RHSType, Kind); else RHS = ImpCastExprToType(RHS.get(), LHSType, Kind); } return computeResultTy(); } if (getLangOpts().CPlusPlus) { // C++ [expr.eq]p4: // Two operands of type std::nullptr_t or one operand of type // std::nullptr_t and the other a null pointer constant compare equal. if (!IsRelational && LHSIsNull && RHSIsNull) { if (LHSType->isNullPtrType()) { RHS = ImpCastExprToType(RHS.get(), LHSType, CK_NullToPointer); return computeResultTy(); } if (RHSType->isNullPtrType()) { LHS = ImpCastExprToType(LHS.get(), RHSType, CK_NullToPointer); return computeResultTy(); } } // Comparison of Objective-C pointers and block pointers against nullptr_t. // These aren't covered by the composite pointer type rules. if (!IsRelational && RHSType->isNullPtrType() && (LHSType->isObjCObjectPointerType() || LHSType->isBlockPointerType())) { RHS = ImpCastExprToType(RHS.get(), LHSType, CK_NullToPointer); return computeResultTy(); } if (!IsRelational && LHSType->isNullPtrType() && (RHSType->isObjCObjectPointerType() || RHSType->isBlockPointerType())) { LHS = ImpCastExprToType(LHS.get(), RHSType, CK_NullToPointer); return computeResultTy(); } if (IsRelational && ((LHSType->isNullPtrType() && RHSType->isPointerType()) || (RHSType->isNullPtrType() && LHSType->isPointerType()))) { // HACK: Relational comparison of nullptr_t against a pointer type is // invalid per DR583, but we allow it within std::less<> and friends, // since otherwise common uses of it break. // FIXME: Consider removing this hack once LWG fixes std::less<> and // friends to have std::nullptr_t overload candidates. DeclContext *DC = CurContext; if (isa(DC)) DC = DC->getParent(); if (auto *CTSD = dyn_cast(DC)) { if (CTSD->isInStdNamespace() && llvm::StringSwitch(CTSD->getName()) .Cases("less", "less_equal", "greater", "greater_equal", true) .Default(false)) { if (RHSType->isNullPtrType()) RHS = ImpCastExprToType(RHS.get(), LHSType, CK_NullToPointer); else LHS = ImpCastExprToType(LHS.get(), RHSType, CK_NullToPointer); return computeResultTy(); } } } // C++ [expr.eq]p2: // If at least one operand is a pointer to member, [...] bring them to // their composite pointer type. if (!IsRelational && (LHSType->isMemberPointerType() || RHSType->isMemberPointerType())) { if (convertPointersToCompositeType(*this, Loc, LHS, RHS)) return QualType(); else return computeResultTy(); } } // Handle block pointer types. if (!IsRelational && LHSType->isBlockPointerType() && RHSType->isBlockPointerType()) { QualType lpointee = LHSType->castAs()->getPointeeType(); QualType rpointee = RHSType->castAs()->getPointeeType(); if (!LHSIsNull && !RHSIsNull && !Context.typesAreCompatible(lpointee, rpointee)) { Diag(Loc, diag::err_typecheck_comparison_of_distinct_blocks) << LHSType << RHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } RHS = ImpCastExprToType(RHS.get(), LHSType, CK_BitCast); return computeResultTy(); } // Allow block pointers to be compared with null pointer constants. if (!IsRelational && ((LHSType->isBlockPointerType() && RHSType->isPointerType()) || (LHSType->isPointerType() && RHSType->isBlockPointerType()))) { if (!LHSIsNull && !RHSIsNull) { if (!((RHSType->isPointerType() && RHSType->castAs() ->getPointeeType()->isVoidType()) || (LHSType->isPointerType() && LHSType->castAs() ->getPointeeType()->isVoidType()))) Diag(Loc, diag::err_typecheck_comparison_of_distinct_blocks) << LHSType << RHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } if (LHSIsNull && !RHSIsNull) LHS = ImpCastExprToType(LHS.get(), RHSType, RHSType->isPointerType() ? CK_BitCast : CK_AnyPointerToBlockPointerCast); else RHS = ImpCastExprToType(RHS.get(), LHSType, LHSType->isPointerType() ? CK_BitCast : CK_AnyPointerToBlockPointerCast); return computeResultTy(); } if (LHSType->isObjCObjectPointerType() || RHSType->isObjCObjectPointerType()) { const PointerType *LPT = LHSType->getAs(); const PointerType *RPT = RHSType->getAs(); if (LPT || RPT) { bool LPtrToVoid = LPT ? LPT->getPointeeType()->isVoidType() : false; bool RPtrToVoid = RPT ? RPT->getPointeeType()->isVoidType() : false; if (!LPtrToVoid && !RPtrToVoid && !Context.typesAreCompatible(LHSType, RHSType)) { diagnoseDistinctPointerComparison(*this, Loc, LHS, RHS, /*isError*/false); } if (LHSIsNull && !RHSIsNull) { Expr *E = LHS.get(); if (getLangOpts().ObjCAutoRefCount) CheckObjCConversion(SourceRange(), RHSType, E, CCK_ImplicitConversion); LHS = ImpCastExprToType(E, RHSType, RPT ? CK_BitCast :CK_CPointerToObjCPointerCast); } else { Expr *E = RHS.get(); if (getLangOpts().ObjCAutoRefCount) CheckObjCConversion(SourceRange(), LHSType, E, CCK_ImplicitConversion, /*Diagnose=*/true, /*DiagnoseCFAudited=*/false, Opc); RHS = ImpCastExprToType(E, LHSType, LPT ? CK_BitCast :CK_CPointerToObjCPointerCast); } return computeResultTy(); } if (LHSType->isObjCObjectPointerType() && RHSType->isObjCObjectPointerType()) { if (!Context.areComparableObjCPointerTypes(LHSType, RHSType)) diagnoseDistinctPointerComparison(*this, Loc, LHS, RHS, /*isError*/false); if (isObjCObjectLiteral(LHS) || isObjCObjectLiteral(RHS)) diagnoseObjCLiteralComparison(*this, Loc, LHS, RHS, Opc); if (LHSIsNull && !RHSIsNull) LHS = ImpCastExprToType(LHS.get(), RHSType, CK_BitCast); else RHS = ImpCastExprToType(RHS.get(), LHSType, CK_BitCast); return computeResultTy(); } if (!IsRelational && LHSType->isBlockPointerType() && RHSType->isBlockCompatibleObjCPointerType(Context)) { LHS = ImpCastExprToType(LHS.get(), RHSType, CK_BlockPointerToObjCPointerCast); return computeResultTy(); } else if (!IsRelational && LHSType->isBlockCompatibleObjCPointerType(Context) && RHSType->isBlockPointerType()) { RHS = ImpCastExprToType(RHS.get(), LHSType, CK_BlockPointerToObjCPointerCast); return computeResultTy(); } } if ((LHSType->isAnyPointerType() && RHSType->isIntegerType()) || (LHSType->isIntegerType() && RHSType->isAnyPointerType())) { unsigned DiagID = 0; bool isError = false; if (LangOpts.DebuggerSupport) { // Under a debugger, allow the comparison of pointers to integers, // since users tend to want to compare addresses. } else if ((LHSIsNull && LHSType->isIntegerType()) || (RHSIsNull && RHSType->isIntegerType())) { if (IsRelational) { isError = getLangOpts().CPlusPlus; DiagID = isError ? diag::err_typecheck_ordered_comparison_of_pointer_and_zero : diag::ext_typecheck_ordered_comparison_of_pointer_and_zero; } } else if (getLangOpts().CPlusPlus) { DiagID = diag::err_typecheck_comparison_of_pointer_integer; isError = true; } else if (IsRelational) DiagID = diag::ext_typecheck_ordered_comparison_of_pointer_integer; else DiagID = diag::ext_typecheck_comparison_of_pointer_integer; if (DiagID) { Diag(Loc, DiagID) << LHSType << RHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); if (isError) return QualType(); } if (LHSType->isIntegerType()) LHS = ImpCastExprToType(LHS.get(), RHSType, LHSIsNull ? CK_NullToPointer : CK_IntegralToPointer); else RHS = ImpCastExprToType(RHS.get(), LHSType, RHSIsNull ? CK_NullToPointer : CK_IntegralToPointer); return computeResultTy(); } // Handle block pointers. if (!IsRelational && RHSIsNull && LHSType->isBlockPointerType() && RHSType->isIntegerType()) { RHS = ImpCastExprToType(RHS.get(), LHSType, CK_NullToPointer); return computeResultTy(); } if (!IsRelational && LHSIsNull && LHSType->isIntegerType() && RHSType->isBlockPointerType()) { LHS = ImpCastExprToType(LHS.get(), RHSType, CK_NullToPointer); return computeResultTy(); } if (getLangOpts().OpenCLVersion >= 200) { if (LHSType->isClkEventT() && RHSType->isClkEventT()) { return computeResultTy(); } if (LHSType->isQueueT() && RHSType->isQueueT()) { return computeResultTy(); } if (LHSIsNull && RHSType->isQueueT()) { LHS = ImpCastExprToType(LHS.get(), RHSType, CK_NullToPointer); return computeResultTy(); } if (LHSType->isQueueT() && RHSIsNull) { RHS = ImpCastExprToType(RHS.get(), LHSType, CK_NullToPointer); return computeResultTy(); } } return InvalidOperands(Loc, LHS, RHS); } // Return a signed ext_vector_type that is of identical size and number of // elements. For floating point vectors, return an integer type of identical // size and number of elements. In the non ext_vector_type case, search from // the largest type to the smallest type to avoid cases where long long == long, // where long gets picked over long long. QualType Sema::GetSignedVectorType(QualType V) { const VectorType *VTy = V->getAs(); unsigned TypeSize = Context.getTypeSize(VTy->getElementType()); if (isa(VTy)) { if (TypeSize == Context.getTypeSize(Context.CharTy)) return Context.getExtVectorType(Context.CharTy, VTy->getNumElements()); else if (TypeSize == Context.getTypeSize(Context.ShortTy)) return Context.getExtVectorType(Context.ShortTy, VTy->getNumElements()); else if (TypeSize == Context.getTypeSize(Context.IntTy)) return Context.getExtVectorType(Context.IntTy, VTy->getNumElements()); else if (TypeSize == Context.getTypeSize(Context.LongTy)) return Context.getExtVectorType(Context.LongTy, VTy->getNumElements()); assert(TypeSize == Context.getTypeSize(Context.LongLongTy) && "Unhandled vector element size in vector compare"); return Context.getExtVectorType(Context.LongLongTy, VTy->getNumElements()); } if (TypeSize == Context.getTypeSize(Context.LongLongTy)) return Context.getVectorType(Context.LongLongTy, VTy->getNumElements(), VectorType::GenericVector); else if (TypeSize == Context.getTypeSize(Context.LongTy)) return Context.getVectorType(Context.LongTy, VTy->getNumElements(), VectorType::GenericVector); else if (TypeSize == Context.getTypeSize(Context.IntTy)) return Context.getVectorType(Context.IntTy, VTy->getNumElements(), VectorType::GenericVector); else if (TypeSize == Context.getTypeSize(Context.ShortTy)) return Context.getVectorType(Context.ShortTy, VTy->getNumElements(), VectorType::GenericVector); assert(TypeSize == Context.getTypeSize(Context.CharTy) && "Unhandled vector element size in vector compare"); return Context.getVectorType(Context.CharTy, VTy->getNumElements(), VectorType::GenericVector); } /// CheckVectorCompareOperands - vector comparisons are a clang extension that /// operates on extended vector types. Instead of producing an IntTy result, /// like a scalar comparison, a vector comparison produces a vector of integer /// types. QualType Sema::CheckVectorCompareOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc) { // Check to make sure we're operating on vectors of the same type and width, // Allowing one side to be a scalar of element type. QualType vType = CheckVectorOperands(LHS, RHS, Loc, /*isCompAssign*/false, /*AllowBothBool*/true, /*AllowBoolConversions*/getLangOpts().ZVector); if (vType.isNull()) return vType; QualType LHSType = LHS.get()->getType(); // If AltiVec, the comparison results in a numeric type, i.e. // bool for C++, int for C if (getLangOpts().AltiVec && vType->getAs()->getVectorKind() == VectorType::AltiVecVector) return Context.getLogicalOperationType(); // For non-floating point types, check for self-comparisons of the form // x == x, x != x, x < x, etc. These always evaluate to a constant, and // often indicate logic errors in the program. diagnoseTautologicalComparison(*this, Loc, LHS.get(), RHS.get(), Opc); // Check for comparisons of floating point operands using != and ==. if (BinaryOperator::isEqualityOp(Opc) && LHSType->hasFloatingRepresentation()) { assert(RHS.get()->getType()->hasFloatingRepresentation()); CheckFloatComparison(Loc, LHS.get(), RHS.get()); } // Return a signed type for the vector. return GetSignedVectorType(vType); } QualType Sema::CheckVectorLogicalOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc) { // Ensure that either both operands are of the same vector type, or // one operand is of a vector type and the other is of its element type. QualType vType = CheckVectorOperands(LHS, RHS, Loc, false, /*AllowBothBool*/true, /*AllowBoolConversions*/false); if (vType.isNull()) return InvalidOperands(Loc, LHS, RHS); if (getLangOpts().OpenCL && getLangOpts().OpenCLVersion < 120 && vType->hasFloatingRepresentation()) return InvalidOperands(Loc, LHS, RHS); // FIXME: The check for C++ here is for GCC compatibility. GCC rejects the // usage of the logical operators && and || with vectors in C. This // check could be notionally dropped. if (!getLangOpts().CPlusPlus && !(isa(vType->getAs()))) return InvalidLogicalVectorOperands(Loc, LHS, RHS); return GetSignedVectorType(LHS.get()->getType()); } inline QualType Sema::CheckBitwiseOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc) { checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false); bool IsCompAssign = Opc == BO_AndAssign || Opc == BO_OrAssign || Opc == BO_XorAssign; if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) { if (LHS.get()->getType()->hasIntegerRepresentation() && RHS.get()->getType()->hasIntegerRepresentation()) return CheckVectorOperands(LHS, RHS, Loc, IsCompAssign, /*AllowBothBool*/true, /*AllowBoolConversions*/getLangOpts().ZVector); return InvalidOperands(Loc, LHS, RHS); } if (Opc == BO_And) diagnoseLogicalNotOnLHSofCheck(*this, LHS, RHS, Loc, Opc); ExprResult LHSResult = LHS, RHSResult = RHS; QualType compType = UsualArithmeticConversions(LHSResult, RHSResult, IsCompAssign); if (LHSResult.isInvalid() || RHSResult.isInvalid()) return QualType(); LHS = LHSResult.get(); RHS = RHSResult.get(); if (!compType.isNull() && compType->isIntegralOrUnscopedEnumerationType()) return compType; return InvalidOperands(Loc, LHS, RHS); } // C99 6.5.[13,14] inline QualType Sema::CheckLogicalOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc) { // Check vector operands differently. if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) return CheckVectorLogicalOperands(LHS, RHS, Loc); // Diagnose cases where the user write a logical and/or but probably meant a // bitwise one. We do this when the LHS is a non-bool integer and the RHS // is a constant. if (LHS.get()->getType()->isIntegerType() && !LHS.get()->getType()->isBooleanType() && RHS.get()->getType()->isIntegerType() && !RHS.get()->isValueDependent() && // Don't warn in macros or template instantiations. !Loc.isMacroID() && !inTemplateInstantiation()) { // If the RHS can be constant folded, and if it constant folds to something // that isn't 0 or 1 (which indicate a potential logical operation that // happened to fold to true/false) then warn. // Parens on the RHS are ignored. Expr::EvalResult EVResult; if (RHS.get()->EvaluateAsInt(EVResult, Context)) { llvm::APSInt Result = EVResult.Val.getInt(); if ((getLangOpts().Bool && !RHS.get()->getType()->isBooleanType() && !RHS.get()->getExprLoc().isMacroID()) || (Result != 0 && Result != 1)) { Diag(Loc, diag::warn_logical_instead_of_bitwise) << RHS.get()->getSourceRange() << (Opc == BO_LAnd ? "&&" : "||"); // Suggest replacing the logical operator with the bitwise version Diag(Loc, diag::note_logical_instead_of_bitwise_change_operator) << (Opc == BO_LAnd ? "&" : "|") << FixItHint::CreateReplacement(SourceRange( Loc, getLocForEndOfToken(Loc)), Opc == BO_LAnd ? "&" : "|"); if (Opc == BO_LAnd) // Suggest replacing "Foo() && kNonZero" with "Foo()" Diag(Loc, diag::note_logical_instead_of_bitwise_remove_constant) << FixItHint::CreateRemoval( SourceRange(getLocForEndOfToken(LHS.get()->getEndLoc()), RHS.get()->getEndLoc())); } } } if (!Context.getLangOpts().CPlusPlus) { // OpenCL v1.1 s6.3.g: The logical operators and (&&), or (||) do // not operate on the built-in scalar and vector float types. if (Context.getLangOpts().OpenCL && Context.getLangOpts().OpenCLVersion < 120) { if (LHS.get()->getType()->isFloatingType() || RHS.get()->getType()->isFloatingType()) return InvalidOperands(Loc, LHS, RHS); } LHS = UsualUnaryConversions(LHS.get()); if (LHS.isInvalid()) return QualType(); RHS = UsualUnaryConversions(RHS.get()); if (RHS.isInvalid()) return QualType(); if (!LHS.get()->getType()->isScalarType() || !RHS.get()->getType()->isScalarType()) return InvalidOperands(Loc, LHS, RHS); return Context.IntTy; } // The following is safe because we only use this method for // non-overloadable operands. // C++ [expr.log.and]p1 // C++ [expr.log.or]p1 // The operands are both contextually converted to type bool. ExprResult LHSRes = PerformContextuallyConvertToBool(LHS.get()); if (LHSRes.isInvalid()) return InvalidOperands(Loc, LHS, RHS); LHS = LHSRes; ExprResult RHSRes = PerformContextuallyConvertToBool(RHS.get()); if (RHSRes.isInvalid()) return InvalidOperands(Loc, LHS, RHS); RHS = RHSRes; // C++ [expr.log.and]p2 // C++ [expr.log.or]p2 // The result is a bool. return Context.BoolTy; } static bool IsReadonlyMessage(Expr *E, Sema &S) { const MemberExpr *ME = dyn_cast(E); if (!ME) return false; if (!isa(ME->getMemberDecl())) return false; ObjCMessageExpr *Base = dyn_cast( ME->getBase()->IgnoreImplicit()->IgnoreParenImpCasts()); if (!Base) return false; return Base->getMethodDecl() != nullptr; } /// Is the given expression (which must be 'const') a reference to a /// variable which was originally non-const, but which has become /// 'const' due to being captured within a block? enum NonConstCaptureKind { NCCK_None, NCCK_Block, NCCK_Lambda }; static NonConstCaptureKind isReferenceToNonConstCapture(Sema &S, Expr *E) { assert(E->isLValue() && E->getType().isConstQualified()); E = E->IgnoreParens(); // Must be a reference to a declaration from an enclosing scope. DeclRefExpr *DRE = dyn_cast(E); if (!DRE) return NCCK_None; if (!DRE->refersToEnclosingVariableOrCapture()) return NCCK_None; // The declaration must be a variable which is not declared 'const'. VarDecl *var = dyn_cast(DRE->getDecl()); if (!var) return NCCK_None; if (var->getType().isConstQualified()) return NCCK_None; assert(var->hasLocalStorage() && "capture added 'const' to non-local?"); // Decide whether the first capture was for a block or a lambda. DeclContext *DC = S.CurContext, *Prev = nullptr; // Decide whether the first capture was for a block or a lambda. while (DC) { // For init-capture, it is possible that the variable belongs to the // template pattern of the current context. if (auto *FD = dyn_cast(DC)) if (var->isInitCapture() && FD->getTemplateInstantiationPattern() == var->getDeclContext()) break; if (DC == var->getDeclContext()) break; Prev = DC; DC = DC->getParent(); } // Unless we have an init-capture, we've gone one step too far. if (!var->isInitCapture()) DC = Prev; return (isa(DC) ? NCCK_Block : NCCK_Lambda); } static bool IsTypeModifiable(QualType Ty, bool IsDereference) { Ty = Ty.getNonReferenceType(); if (IsDereference && Ty->isPointerType()) Ty = Ty->getPointeeType(); return !Ty.isConstQualified(); } // Update err_typecheck_assign_const and note_typecheck_assign_const // when this enum is changed. enum { ConstFunction, ConstVariable, ConstMember, ConstMethod, NestedConstMember, ConstUnknown, // Keep as last element }; /// Emit the "read-only variable not assignable" error and print notes to give /// more information about why the variable is not assignable, such as pointing /// to the declaration of a const variable, showing that a method is const, or /// that the function is returning a const reference. static void DiagnoseConstAssignment(Sema &S, const Expr *E, SourceLocation Loc) { SourceRange ExprRange = E->getSourceRange(); // Only emit one error on the first const found. All other consts will emit // a note to the error. bool DiagnosticEmitted = false; // Track if the current expression is the result of a dereference, and if the // next checked expression is the result of a dereference. bool IsDereference = false; bool NextIsDereference = false; // Loop to process MemberExpr chains. while (true) { IsDereference = NextIsDereference; E = E->IgnoreImplicit()->IgnoreParenImpCasts(); if (const MemberExpr *ME = dyn_cast(E)) { NextIsDereference = ME->isArrow(); const ValueDecl *VD = ME->getMemberDecl(); if (const FieldDecl *Field = dyn_cast(VD)) { // Mutable fields can be modified even if the class is const. if (Field->isMutable()) { assert(DiagnosticEmitted && "Expected diagnostic not emitted."); break; } if (!IsTypeModifiable(Field->getType(), IsDereference)) { if (!DiagnosticEmitted) { S.Diag(Loc, diag::err_typecheck_assign_const) << ExprRange << ConstMember << false /*static*/ << Field << Field->getType(); DiagnosticEmitted = true; } S.Diag(VD->getLocation(), diag::note_typecheck_assign_const) << ConstMember << false /*static*/ << Field << Field->getType() << Field->getSourceRange(); } E = ME->getBase(); continue; } else if (const VarDecl *VDecl = dyn_cast(VD)) { if (VDecl->getType().isConstQualified()) { if (!DiagnosticEmitted) { S.Diag(Loc, diag::err_typecheck_assign_const) << ExprRange << ConstMember << true /*static*/ << VDecl << VDecl->getType(); DiagnosticEmitted = true; } S.Diag(VD->getLocation(), diag::note_typecheck_assign_const) << ConstMember << true /*static*/ << VDecl << VDecl->getType() << VDecl->getSourceRange(); } // Static fields do not inherit constness from parents. break; } break; // End MemberExpr } else if (const ArraySubscriptExpr *ASE = dyn_cast(E)) { E = ASE->getBase()->IgnoreParenImpCasts(); continue; } else if (const ExtVectorElementExpr *EVE = dyn_cast(E)) { E = EVE->getBase()->IgnoreParenImpCasts(); continue; } break; } if (const CallExpr *CE = dyn_cast(E)) { // Function calls const FunctionDecl *FD = CE->getDirectCallee(); if (FD && !IsTypeModifiable(FD->getReturnType(), IsDereference)) { if (!DiagnosticEmitted) { S.Diag(Loc, diag::err_typecheck_assign_const) << ExprRange << ConstFunction << FD; DiagnosticEmitted = true; } S.Diag(FD->getReturnTypeSourceRange().getBegin(), diag::note_typecheck_assign_const) << ConstFunction << FD << FD->getReturnType() << FD->getReturnTypeSourceRange(); } } else if (const DeclRefExpr *DRE = dyn_cast(E)) { // Point to variable declaration. if (const ValueDecl *VD = DRE->getDecl()) { if (!IsTypeModifiable(VD->getType(), IsDereference)) { if (!DiagnosticEmitted) { S.Diag(Loc, diag::err_typecheck_assign_const) << ExprRange << ConstVariable << VD << VD->getType(); DiagnosticEmitted = true; } S.Diag(VD->getLocation(), diag::note_typecheck_assign_const) << ConstVariable << VD << VD->getType() << VD->getSourceRange(); } } } else if (isa(E)) { if (const DeclContext *DC = S.getFunctionLevelDeclContext()) { if (const CXXMethodDecl *MD = dyn_cast(DC)) { if (MD->isConst()) { if (!DiagnosticEmitted) { S.Diag(Loc, diag::err_typecheck_assign_const) << ExprRange << ConstMethod << MD; DiagnosticEmitted = true; } S.Diag(MD->getLocation(), diag::note_typecheck_assign_const) << ConstMethod << MD << MD->getSourceRange(); } } } } if (DiagnosticEmitted) return; // Can't determine a more specific message, so display the generic error. S.Diag(Loc, diag::err_typecheck_assign_const) << ExprRange << ConstUnknown; } enum OriginalExprKind { OEK_Variable, OEK_Member, OEK_LValue }; static void DiagnoseRecursiveConstFields(Sema &S, const ValueDecl *VD, const RecordType *Ty, SourceLocation Loc, SourceRange Range, OriginalExprKind OEK, bool &DiagnosticEmitted) { std::vector RecordTypeList; RecordTypeList.push_back(Ty); unsigned NextToCheckIndex = 0; // We walk the record hierarchy breadth-first to ensure that we print // diagnostics in field nesting order. while (RecordTypeList.size() > NextToCheckIndex) { bool IsNested = NextToCheckIndex > 0; for (const FieldDecl *Field : RecordTypeList[NextToCheckIndex]->getDecl()->fields()) { // First, check every field for constness. QualType FieldTy = Field->getType(); if (FieldTy.isConstQualified()) { if (!DiagnosticEmitted) { S.Diag(Loc, diag::err_typecheck_assign_const) << Range << NestedConstMember << OEK << VD << IsNested << Field; DiagnosticEmitted = true; } S.Diag(Field->getLocation(), diag::note_typecheck_assign_const) << NestedConstMember << IsNested << Field << FieldTy << Field->getSourceRange(); } // Then we append it to the list to check next in order. FieldTy = FieldTy.getCanonicalType(); if (const auto *FieldRecTy = FieldTy->getAs()) { if (llvm::find(RecordTypeList, FieldRecTy) == RecordTypeList.end()) RecordTypeList.push_back(FieldRecTy); } } ++NextToCheckIndex; } } /// Emit an error for the case where a record we are trying to assign to has a /// const-qualified field somewhere in its hierarchy. static void DiagnoseRecursiveConstFields(Sema &S, const Expr *E, SourceLocation Loc) { QualType Ty = E->getType(); assert(Ty->isRecordType() && "lvalue was not record?"); SourceRange Range = E->getSourceRange(); const RecordType *RTy = Ty.getCanonicalType()->getAs(); bool DiagEmitted = false; if (const MemberExpr *ME = dyn_cast(E)) DiagnoseRecursiveConstFields(S, ME->getMemberDecl(), RTy, Loc, Range, OEK_Member, DiagEmitted); else if (const DeclRefExpr *DRE = dyn_cast(E)) DiagnoseRecursiveConstFields(S, DRE->getDecl(), RTy, Loc, Range, OEK_Variable, DiagEmitted); else DiagnoseRecursiveConstFields(S, nullptr, RTy, Loc, Range, OEK_LValue, DiagEmitted); if (!DiagEmitted) DiagnoseConstAssignment(S, E, Loc); } /// CheckForModifiableLvalue - Verify that E is a modifiable lvalue. If not, /// emit an error and return true. If so, return false. static bool CheckForModifiableLvalue(Expr *E, SourceLocation Loc, Sema &S) { assert(!E->hasPlaceholderType(BuiltinType::PseudoObject)); S.CheckShadowingDeclModification(E, Loc); SourceLocation OrigLoc = Loc; Expr::isModifiableLvalueResult IsLV = E->isModifiableLvalue(S.Context, &Loc); if (IsLV == Expr::MLV_ClassTemporary && IsReadonlyMessage(E, S)) IsLV = Expr::MLV_InvalidMessageExpression; if (IsLV == Expr::MLV_Valid) return false; unsigned DiagID = 0; bool NeedType = false; switch (IsLV) { // C99 6.5.16p2 case Expr::MLV_ConstQualified: // Use a specialized diagnostic when we're assigning to an object // from an enclosing function or block. if (NonConstCaptureKind NCCK = isReferenceToNonConstCapture(S, E)) { if (NCCK == NCCK_Block) DiagID = diag::err_block_decl_ref_not_modifiable_lvalue; else DiagID = diag::err_lambda_decl_ref_not_modifiable_lvalue; break; } // In ARC, use some specialized diagnostics for occasions where we // infer 'const'. These are always pseudo-strong variables. if (S.getLangOpts().ObjCAutoRefCount) { DeclRefExpr *declRef = dyn_cast(E->IgnoreParenCasts()); if (declRef && isa(declRef->getDecl())) { VarDecl *var = cast(declRef->getDecl()); // Use the normal diagnostic if it's pseudo-__strong but the // user actually wrote 'const'. if (var->isARCPseudoStrong() && (!var->getTypeSourceInfo() || !var->getTypeSourceInfo()->getType().isConstQualified())) { // There are three pseudo-strong cases: // - self ObjCMethodDecl *method = S.getCurMethodDecl(); if (method && var == method->getSelfDecl()) { DiagID = method->isClassMethod() ? diag::err_typecheck_arc_assign_self_class_method : diag::err_typecheck_arc_assign_self; // - Objective-C externally_retained attribute. } else if (var->hasAttr() || isa(var)) { DiagID = diag::err_typecheck_arc_assign_externally_retained; // - fast enumeration variables } else { DiagID = diag::err_typecheck_arr_assign_enumeration; } SourceRange Assign; if (Loc != OrigLoc) Assign = SourceRange(OrigLoc, OrigLoc); S.Diag(Loc, DiagID) << E->getSourceRange() << Assign; // We need to preserve the AST regardless, so migration tool // can do its job. return false; } } } // If none of the special cases above are triggered, then this is a // simple const assignment. if (DiagID == 0) { DiagnoseConstAssignment(S, E, Loc); return true; } break; case Expr::MLV_ConstAddrSpace: DiagnoseConstAssignment(S, E, Loc); return true; case Expr::MLV_ConstQualifiedField: DiagnoseRecursiveConstFields(S, E, Loc); return true; case Expr::MLV_ArrayType: case Expr::MLV_ArrayTemporary: DiagID = diag::err_typecheck_array_not_modifiable_lvalue; NeedType = true; break; case Expr::MLV_NotObjectType: DiagID = diag::err_typecheck_non_object_not_modifiable_lvalue; NeedType = true; break; case Expr::MLV_LValueCast: DiagID = diag::err_typecheck_lvalue_casts_not_supported; break; case Expr::MLV_Valid: llvm_unreachable("did not take early return for MLV_Valid"); case Expr::MLV_InvalidExpression: case Expr::MLV_MemberFunction: case Expr::MLV_ClassTemporary: DiagID = diag::err_typecheck_expression_not_modifiable_lvalue; break; case Expr::MLV_IncompleteType: case Expr::MLV_IncompleteVoidType: return S.RequireCompleteType(Loc, E->getType(), diag::err_typecheck_incomplete_type_not_modifiable_lvalue, E); case Expr::MLV_DuplicateVectorComponents: DiagID = diag::err_typecheck_duplicate_vector_components_not_mlvalue; break; case Expr::MLV_NoSetterProperty: llvm_unreachable("readonly properties should be processed differently"); case Expr::MLV_InvalidMessageExpression: DiagID = diag::err_readonly_message_assignment; break; case Expr::MLV_SubObjCPropertySetting: DiagID = diag::err_no_subobject_property_setting; break; } SourceRange Assign; if (Loc != OrigLoc) Assign = SourceRange(OrigLoc, OrigLoc); if (NeedType) S.Diag(Loc, DiagID) << E->getType() << E->getSourceRange() << Assign; else S.Diag(Loc, DiagID) << E->getSourceRange() << Assign; return true; } static void CheckIdentityFieldAssignment(Expr *LHSExpr, Expr *RHSExpr, SourceLocation Loc, Sema &Sema) { if (Sema.inTemplateInstantiation()) return; if (Sema.isUnevaluatedContext()) return; if (Loc.isInvalid() || Loc.isMacroID()) return; if (LHSExpr->getExprLoc().isMacroID() || RHSExpr->getExprLoc().isMacroID()) return; // C / C++ fields MemberExpr *ML = dyn_cast(LHSExpr); MemberExpr *MR = dyn_cast(RHSExpr); if (ML && MR) { if (!(isa(ML->getBase()) && isa(MR->getBase()))) return; const ValueDecl *LHSDecl = cast(ML->getMemberDecl()->getCanonicalDecl()); const ValueDecl *RHSDecl = cast(MR->getMemberDecl()->getCanonicalDecl()); if (LHSDecl != RHSDecl) return; if (LHSDecl->getType().isVolatileQualified()) return; if (const ReferenceType *RefTy = LHSDecl->getType()->getAs()) if (RefTy->getPointeeType().isVolatileQualified()) return; Sema.Diag(Loc, diag::warn_identity_field_assign) << 0; } // Objective-C instance variables ObjCIvarRefExpr *OL = dyn_cast(LHSExpr); ObjCIvarRefExpr *OR = dyn_cast(RHSExpr); if (OL && OR && OL->getDecl() == OR->getDecl()) { DeclRefExpr *RL = dyn_cast(OL->getBase()->IgnoreImpCasts()); DeclRefExpr *RR = dyn_cast(OR->getBase()->IgnoreImpCasts()); if (RL && RR && RL->getDecl() == RR->getDecl()) Sema.Diag(Loc, diag::warn_identity_field_assign) << 1; } } // C99 6.5.16.1 QualType Sema::CheckAssignmentOperands(Expr *LHSExpr, ExprResult &RHS, SourceLocation Loc, QualType CompoundType) { assert(!LHSExpr->hasPlaceholderType(BuiltinType::PseudoObject)); // Verify that LHS is a modifiable lvalue, and emit error if not. if (CheckForModifiableLvalue(LHSExpr, Loc, *this)) return QualType(); QualType LHSType = LHSExpr->getType(); QualType RHSType = CompoundType.isNull() ? RHS.get()->getType() : CompoundType; // OpenCL v1.2 s6.1.1.1 p2: // The half data type can only be used to declare a pointer to a buffer that // contains half values if (getLangOpts().OpenCL && !getOpenCLOptions().isEnabled("cl_khr_fp16") && LHSType->isHalfType()) { Diag(Loc, diag::err_opencl_half_load_store) << 1 << LHSType.getUnqualifiedType(); return QualType(); } AssignConvertType ConvTy; if (CompoundType.isNull()) { Expr *RHSCheck = RHS.get(); CheckIdentityFieldAssignment(LHSExpr, RHSCheck, Loc, *this); QualType LHSTy(LHSType); ConvTy = CheckSingleAssignmentConstraints(LHSTy, RHS); if (RHS.isInvalid()) return QualType(); // Special case of NSObject attributes on c-style pointer types. if (ConvTy == IncompatiblePointer && ((Context.isObjCNSObjectType(LHSType) && RHSType->isObjCObjectPointerType()) || (Context.isObjCNSObjectType(RHSType) && LHSType->isObjCObjectPointerType()))) ConvTy = Compatible; if (ConvTy == Compatible && LHSType->isObjCObjectType()) Diag(Loc, diag::err_objc_object_assignment) << LHSType; // If the RHS is a unary plus or minus, check to see if they = and + are // right next to each other. If so, the user may have typo'd "x =+ 4" // instead of "x += 4". if (ImplicitCastExpr *ICE = dyn_cast(RHSCheck)) RHSCheck = ICE->getSubExpr(); if (UnaryOperator *UO = dyn_cast(RHSCheck)) { if ((UO->getOpcode() == UO_Plus || UO->getOpcode() == UO_Minus) && Loc.isFileID() && UO->getOperatorLoc().isFileID() && // Only if the two operators are exactly adjacent. Loc.getLocWithOffset(1) == UO->getOperatorLoc() && // And there is a space or other character before the subexpr of the // unary +/-. We don't want to warn on "x=-1". Loc.getLocWithOffset(2) != UO->getSubExpr()->getBeginLoc() && UO->getSubExpr()->getBeginLoc().isFileID()) { Diag(Loc, diag::warn_not_compound_assign) << (UO->getOpcode() == UO_Plus ? "+" : "-") << SourceRange(UO->getOperatorLoc(), UO->getOperatorLoc()); } } if (ConvTy == Compatible) { if (LHSType.getObjCLifetime() == Qualifiers::OCL_Strong) { // Warn about retain cycles where a block captures the LHS, but // not if the LHS is a simple variable into which the block is // being stored...unless that variable can be captured by reference! const Expr *InnerLHS = LHSExpr->IgnoreParenCasts(); const DeclRefExpr *DRE = dyn_cast(InnerLHS); if (!DRE || DRE->getDecl()->hasAttr()) checkRetainCycles(LHSExpr, RHS.get()); } if (LHSType.getObjCLifetime() == Qualifiers::OCL_Strong || LHSType.isNonWeakInMRRWithObjCWeak(Context)) { // It is safe to assign a weak reference into a strong variable. // Although this code can still have problems: // id x = self.weakProp; // id y = self.weakProp; // we do not warn to warn spuriously when 'x' and 'y' are on separate // paths through the function. This should be revisited if // -Wrepeated-use-of-weak is made flow-sensitive. // For ObjCWeak only, we do not warn if the assign is to a non-weak // variable, which will be valid for the current autorelease scope. if (!Diags.isIgnored(diag::warn_arc_repeated_use_of_weak, RHS.get()->getBeginLoc())) getCurFunction()->markSafeWeakUse(RHS.get()); } else if (getLangOpts().ObjCAutoRefCount || getLangOpts().ObjCWeak) { checkUnsafeExprAssigns(Loc, LHSExpr, RHS.get()); } } } else { // Compound assignment "x += y" ConvTy = CheckAssignmentConstraints(Loc, LHSType, RHSType); } if (DiagnoseAssignmentResult(ConvTy, Loc, LHSType, RHSType, RHS.get(), AA_Assigning)) return QualType(); CheckForNullPointerDereference(*this, LHSExpr); // C99 6.5.16p3: The type of an assignment expression is the type of the // left operand unless the left operand has qualified type, in which case // it is the unqualified version of the type of the left operand. // C99 6.5.16.1p2: In simple assignment, the value of the right operand // is converted to the type of the assignment expression (above). // C++ 5.17p1: the type of the assignment expression is that of its left // operand. return (getLangOpts().CPlusPlus ? LHSType : LHSType.getUnqualifiedType()); } // Only ignore explicit casts to void. static bool IgnoreCommaOperand(const Expr *E) { E = E->IgnoreParens(); if (const CastExpr *CE = dyn_cast(E)) { if (CE->getCastKind() == CK_ToVoid) { return true; } // static_cast on a dependent type will not show up as CK_ToVoid. if (CE->getCastKind() == CK_Dependent && E->getType()->isVoidType() && CE->getSubExpr()->getType()->isDependentType()) { return true; } } return false; } // Look for instances where it is likely the comma operator is confused with // another operator. There is a whitelist of acceptable expressions for the // left hand side of the comma operator, otherwise emit a warning. void Sema::DiagnoseCommaOperator(const Expr *LHS, SourceLocation Loc) { // No warnings in macros if (Loc.isMacroID()) return; // Don't warn in template instantiations. if (inTemplateInstantiation()) return; // Scope isn't fine-grained enough to whitelist the specific cases, so // instead, skip more than needed, then call back into here with the // CommaVisitor in SemaStmt.cpp. // The whitelisted locations are the initialization and increment portions // of a for loop. The additional checks are on the condition of // if statements, do/while loops, and for loops. // Differences in scope flags for C89 mode requires the extra logic. const unsigned ForIncrementFlags = getLangOpts().C99 || getLangOpts().CPlusPlus ? Scope::ControlScope | Scope::ContinueScope | Scope::BreakScope : Scope::ContinueScope | Scope::BreakScope; const unsigned ForInitFlags = Scope::ControlScope | Scope::DeclScope; const unsigned ScopeFlags = getCurScope()->getFlags(); if ((ScopeFlags & ForIncrementFlags) == ForIncrementFlags || (ScopeFlags & ForInitFlags) == ForInitFlags) return; // If there are multiple comma operators used together, get the RHS of the // of the comma operator as the LHS. while (const BinaryOperator *BO = dyn_cast(LHS)) { if (BO->getOpcode() != BO_Comma) break; LHS = BO->getRHS(); } // Only allow some expressions on LHS to not warn. if (IgnoreCommaOperand(LHS)) return; Diag(Loc, diag::warn_comma_operator); Diag(LHS->getBeginLoc(), diag::note_cast_to_void) << LHS->getSourceRange() << FixItHint::CreateInsertion(LHS->getBeginLoc(), LangOpts.CPlusPlus ? "static_cast(" : "(void)(") << FixItHint::CreateInsertion(PP.getLocForEndOfToken(LHS->getEndLoc()), ")"); } // C99 6.5.17 static QualType CheckCommaOperands(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc) { LHS = S.CheckPlaceholderExpr(LHS.get()); RHS = S.CheckPlaceholderExpr(RHS.get()); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); // C's comma performs lvalue conversion (C99 6.3.2.1) on both its // operands, but not unary promotions. // C++'s comma does not do any conversions at all (C++ [expr.comma]p1). // So we treat the LHS as a ignored value, and in C++ we allow the // containing site to determine what should be done with the RHS. LHS = S.IgnoredValueConversions(LHS.get()); if (LHS.isInvalid()) return QualType(); S.DiagnoseUnusedExprResult(LHS.get()); if (!S.getLangOpts().CPlusPlus) { RHS = S.DefaultFunctionArrayLvalueConversion(RHS.get()); if (RHS.isInvalid()) return QualType(); if (!RHS.get()->getType()->isVoidType()) S.RequireCompleteType(Loc, RHS.get()->getType(), diag::err_incomplete_type); } if (!S.getDiagnostics().isIgnored(diag::warn_comma_operator, Loc)) S.DiagnoseCommaOperator(LHS.get(), Loc); return RHS.get()->getType(); } /// CheckIncrementDecrementOperand - unlike most "Check" methods, this routine /// doesn't need to call UsualUnaryConversions or UsualArithmeticConversions. static QualType CheckIncrementDecrementOperand(Sema &S, Expr *Op, ExprValueKind &VK, ExprObjectKind &OK, SourceLocation OpLoc, bool IsInc, bool IsPrefix) { if (Op->isTypeDependent()) return S.Context.DependentTy; QualType ResType = Op->getType(); // Atomic types can be used for increment / decrement where the non-atomic // versions can, so ignore the _Atomic() specifier for the purpose of // checking. if (const AtomicType *ResAtomicType = ResType->getAs()) ResType = ResAtomicType->getValueType(); assert(!ResType.isNull() && "no type for increment/decrement expression"); if (S.getLangOpts().CPlusPlus && ResType->isBooleanType()) { // Decrement of bool is not allowed. if (!IsInc) { S.Diag(OpLoc, diag::err_decrement_bool) << Op->getSourceRange(); return QualType(); } // Increment of bool sets it to true, but is deprecated. S.Diag(OpLoc, S.getLangOpts().CPlusPlus17 ? diag::ext_increment_bool : diag::warn_increment_bool) << Op->getSourceRange(); } else if (S.getLangOpts().CPlusPlus && ResType->isEnumeralType()) { // Error on enum increments and decrements in C++ mode S.Diag(OpLoc, diag::err_increment_decrement_enum) << IsInc << ResType; return QualType(); } else if (ResType->isRealType()) { // OK! } else if (ResType->isPointerType()) { // C99 6.5.2.4p2, 6.5.6p2 if (!checkArithmeticOpPointerOperand(S, OpLoc, Op)) return QualType(); } else if (ResType->isObjCObjectPointerType()) { // On modern runtimes, ObjC pointer arithmetic is forbidden. // Otherwise, we just need a complete type. if (checkArithmeticIncompletePointerType(S, OpLoc, Op) || checkArithmeticOnObjCPointer(S, OpLoc, Op)) return QualType(); } else if (ResType->isAnyComplexType()) { // C99 does not support ++/-- on complex types, we allow as an extension. S.Diag(OpLoc, diag::ext_integer_increment_complex) << ResType << Op->getSourceRange(); } else if (ResType->isPlaceholderType()) { ExprResult PR = S.CheckPlaceholderExpr(Op); if (PR.isInvalid()) return QualType(); return CheckIncrementDecrementOperand(S, PR.get(), VK, OK, OpLoc, IsInc, IsPrefix); } else if (S.getLangOpts().AltiVec && ResType->isVectorType()) { // OK! ( C/C++ Language Extensions for CBEA(Version 2.6) 10.3 ) } else if (S.getLangOpts().ZVector && ResType->isVectorType() && (ResType->getAs()->getVectorKind() != VectorType::AltiVecBool)) { // The z vector extensions allow ++ and -- for non-bool vectors. } else if(S.getLangOpts().OpenCL && ResType->isVectorType() && ResType->getAs()->getElementType()->isIntegerType()) { // OpenCL V1.2 6.3 says dec/inc ops operate on integer vector types. } else { S.Diag(OpLoc, diag::err_typecheck_illegal_increment_decrement) << ResType << int(IsInc) << Op->getSourceRange(); return QualType(); } // At this point, we know we have a real, complex or pointer type. // Now make sure the operand is a modifiable lvalue. if (CheckForModifiableLvalue(Op, OpLoc, S)) return QualType(); // In C++, a prefix increment is the same type as the operand. Otherwise // (in C or with postfix), the increment is the unqualified type of the // operand. if (IsPrefix && S.getLangOpts().CPlusPlus) { VK = VK_LValue; OK = Op->getObjectKind(); return ResType; } else { VK = VK_RValue; return ResType.getUnqualifiedType(); } } /// getPrimaryDecl - Helper function for CheckAddressOfOperand(). /// This routine allows us to typecheck complex/recursive expressions /// where the declaration is needed for type checking. We only need to /// handle cases when the expression references a function designator /// or is an lvalue. Here are some examples: /// - &(x) => x /// - &*****f => f for f a function designator. /// - &s.xx => s /// - &s.zz[1].yy -> s, if zz is an array /// - *(x + 1) -> x, if x is an array /// - &"123"[2] -> 0 /// - & __real__ x -> x static ValueDecl *getPrimaryDecl(Expr *E) { switch (E->getStmtClass()) { case Stmt::DeclRefExprClass: return cast(E)->getDecl(); case Stmt::MemberExprClass: // If this is an arrow operator, the address is an offset from // the base's value, so the object the base refers to is // irrelevant. if (cast(E)->isArrow()) return nullptr; // Otherwise, the expression refers to a part of the base return getPrimaryDecl(cast(E)->getBase()); case Stmt::ArraySubscriptExprClass: { // FIXME: This code shouldn't be necessary! We should catch the implicit // promotion of register arrays earlier. Expr* Base = cast(E)->getBase(); if (ImplicitCastExpr* ICE = dyn_cast(Base)) { if (ICE->getSubExpr()->getType()->isArrayType()) return getPrimaryDecl(ICE->getSubExpr()); } return nullptr; } case Stmt::UnaryOperatorClass: { UnaryOperator *UO = cast(E); switch(UO->getOpcode()) { case UO_Real: case UO_Imag: case UO_Extension: return getPrimaryDecl(UO->getSubExpr()); default: return nullptr; } } case Stmt::ParenExprClass: return getPrimaryDecl(cast(E)->getSubExpr()); case Stmt::ImplicitCastExprClass: // If the result of an implicit cast is an l-value, we care about // the sub-expression; otherwise, the result here doesn't matter. return getPrimaryDecl(cast(E)->getSubExpr()); default: return nullptr; } } namespace { enum { AO_Bit_Field = 0, AO_Vector_Element = 1, AO_Property_Expansion = 2, AO_Register_Variable = 3, AO_No_Error = 4 }; } /// Diagnose invalid operand for address of operations. /// /// \param Type The type of operand which cannot have its address taken. static void diagnoseAddressOfInvalidType(Sema &S, SourceLocation Loc, Expr *E, unsigned Type) { S.Diag(Loc, diag::err_typecheck_address_of) << Type << E->getSourceRange(); } /// CheckAddressOfOperand - The operand of & must be either a function /// designator or an lvalue designating an object. If it is an lvalue, the /// object cannot be declared with storage class register or be a bit field. /// Note: The usual conversions are *not* applied to the operand of the & /// operator (C99 6.3.2.1p[2-4]), and its result is never an lvalue. /// In C++, the operand might be an overloaded function name, in which case /// we allow the '&' but retain the overloaded-function type. QualType Sema::CheckAddressOfOperand(ExprResult &OrigOp, SourceLocation OpLoc) { if (const BuiltinType *PTy = OrigOp.get()->getType()->getAsPlaceholderType()){ if (PTy->getKind() == BuiltinType::Overload) { Expr *E = OrigOp.get()->IgnoreParens(); if (!isa(E)) { assert(cast(E)->getOpcode() == UO_AddrOf); Diag(OpLoc, diag::err_typecheck_invalid_lvalue_addrof_addrof_function) << OrigOp.get()->getSourceRange(); return QualType(); } OverloadExpr *Ovl = cast(E); if (isa(Ovl)) if (!ResolveSingleFunctionTemplateSpecialization(Ovl)) { Diag(OpLoc, diag::err_invalid_form_pointer_member_function) << OrigOp.get()->getSourceRange(); return QualType(); } return Context.OverloadTy; } if (PTy->getKind() == BuiltinType::UnknownAny) return Context.UnknownAnyTy; if (PTy->getKind() == BuiltinType::BoundMember) { Diag(OpLoc, diag::err_invalid_form_pointer_member_function) << OrigOp.get()->getSourceRange(); return QualType(); } OrigOp = CheckPlaceholderExpr(OrigOp.get()); if (OrigOp.isInvalid()) return QualType(); } if (OrigOp.get()->isTypeDependent()) return Context.DependentTy; assert(!OrigOp.get()->getType()->isPlaceholderType()); // Make sure to ignore parentheses in subsequent checks Expr *op = OrigOp.get()->IgnoreParens(); // In OpenCL captures for blocks called as lambda functions // are located in the private address space. Blocks used in // enqueue_kernel can be located in a different address space // depending on a vendor implementation. Thus preventing // taking an address of the capture to avoid invalid AS casts. if (LangOpts.OpenCL) { auto* VarRef = dyn_cast(op); if (VarRef && VarRef->refersToEnclosingVariableOrCapture()) { Diag(op->getExprLoc(), diag::err_opencl_taking_address_capture); return QualType(); } } if (getLangOpts().C99) { // Implement C99-only parts of addressof rules. if (UnaryOperator* uOp = dyn_cast(op)) { if (uOp->getOpcode() == UO_Deref) // Per C99 6.5.3.2, the address of a deref always returns a valid result // (assuming the deref expression is valid). return uOp->getSubExpr()->getType(); } // Technically, there should be a check for array subscript // expressions here, but the result of one is always an lvalue anyway. } ValueDecl *dcl = getPrimaryDecl(op); if (auto *FD = dyn_cast_or_null(dcl)) if (!checkAddressOfFunctionIsAvailable(FD, /*Complain=*/true, op->getBeginLoc())) return QualType(); Expr::LValueClassification lval = op->ClassifyLValue(Context); unsigned AddressOfError = AO_No_Error; if (lval == Expr::LV_ClassTemporary || lval == Expr::LV_ArrayTemporary) { bool sfinae = (bool)isSFINAEContext(); Diag(OpLoc, isSFINAEContext() ? diag::err_typecheck_addrof_temporary : diag::ext_typecheck_addrof_temporary) << op->getType() << op->getSourceRange(); if (sfinae) return QualType(); // Materialize the temporary as an lvalue so that we can take its address. OrigOp = op = CreateMaterializeTemporaryExpr(op->getType(), OrigOp.get(), true); } else if (isa(op)) { return Context.getPointerType(op->getType()); } else if (lval == Expr::LV_MemberFunction) { // If it's an instance method, make a member pointer. // The expression must have exactly the form &A::foo. // If the underlying expression isn't a decl ref, give up. if (!isa(op)) { Diag(OpLoc, diag::err_invalid_form_pointer_member_function) << OrigOp.get()->getSourceRange(); return QualType(); } DeclRefExpr *DRE = cast(op); CXXMethodDecl *MD = cast(DRE->getDecl()); // The id-expression was parenthesized. if (OrigOp.get() != DRE) { Diag(OpLoc, diag::err_parens_pointer_member_function) << OrigOp.get()->getSourceRange(); // The method was named without a qualifier. } else if (!DRE->getQualifier()) { if (MD->getParent()->getName().empty()) Diag(OpLoc, diag::err_unqualified_pointer_member_function) << op->getSourceRange(); else { SmallString<32> Str; StringRef Qual = (MD->getParent()->getName() + "::").toStringRef(Str); Diag(OpLoc, diag::err_unqualified_pointer_member_function) << op->getSourceRange() << FixItHint::CreateInsertion(op->getSourceRange().getBegin(), Qual); } } // Taking the address of a dtor is illegal per C++ [class.dtor]p2. if (isa(MD)) Diag(OpLoc, diag::err_typecheck_addrof_dtor) << op->getSourceRange(); QualType MPTy = Context.getMemberPointerType( op->getType(), Context.getTypeDeclType(MD->getParent()).getTypePtr()); // Under the MS ABI, lock down the inheritance model now. if (Context.getTargetInfo().getCXXABI().isMicrosoft()) (void)isCompleteType(OpLoc, MPTy); return MPTy; } else if (lval != Expr::LV_Valid && lval != Expr::LV_IncompleteVoidType) { // C99 6.5.3.2p1 // The operand must be either an l-value or a function designator if (!op->getType()->isFunctionType()) { // Use a special diagnostic for loads from property references. if (isa(op)) { AddressOfError = AO_Property_Expansion; } else { Diag(OpLoc, diag::err_typecheck_invalid_lvalue_addrof) << op->getType() << op->getSourceRange(); return QualType(); } } } else if (op->getObjectKind() == OK_BitField) { // C99 6.5.3.2p1 // The operand cannot be a bit-field AddressOfError = AO_Bit_Field; } else if (op->getObjectKind() == OK_VectorComponent) { // The operand cannot be an element of a vector AddressOfError = AO_Vector_Element; } else if (dcl) { // C99 6.5.3.2p1 // We have an lvalue with a decl. Make sure the decl is not declared // with the register storage-class specifier. if (const VarDecl *vd = dyn_cast(dcl)) { // in C++ it is not error to take address of a register // variable (c++03 7.1.1P3) if (vd->getStorageClass() == SC_Register && !getLangOpts().CPlusPlus) { AddressOfError = AO_Register_Variable; } } else if (isa(dcl)) { AddressOfError = AO_Property_Expansion; } else if (isa(dcl)) { return Context.OverloadTy; } else if (isa(dcl) || isa(dcl)) { // Okay: we can take the address of a field. // Could be a pointer to member, though, if there is an explicit // scope qualifier for the class. if (isa(op) && cast(op)->getQualifier()) { DeclContext *Ctx = dcl->getDeclContext(); if (Ctx && Ctx->isRecord()) { if (dcl->getType()->isReferenceType()) { Diag(OpLoc, diag::err_cannot_form_pointer_to_member_of_reference_type) << dcl->getDeclName() << dcl->getType(); return QualType(); } while (cast(Ctx)->isAnonymousStructOrUnion()) Ctx = Ctx->getParent(); QualType MPTy = Context.getMemberPointerType( op->getType(), Context.getTypeDeclType(cast(Ctx)).getTypePtr()); // Under the MS ABI, lock down the inheritance model now. if (Context.getTargetInfo().getCXXABI().isMicrosoft()) (void)isCompleteType(OpLoc, MPTy); return MPTy; } } } else if (!isa(dcl) && !isa(dcl) && !isa(dcl)) llvm_unreachable("Unknown/unexpected decl type"); } if (AddressOfError != AO_No_Error) { diagnoseAddressOfInvalidType(*this, OpLoc, op, AddressOfError); return QualType(); } if (lval == Expr::LV_IncompleteVoidType) { // Taking the address of a void variable is technically illegal, but we // allow it in cases which are otherwise valid. // Example: "extern void x; void* y = &x;". Diag(OpLoc, diag::ext_typecheck_addrof_void) << op->getSourceRange(); } // If the operand has type "type", the result has type "pointer to type". if (op->getType()->isObjCObjectType()) return Context.getObjCObjectPointerType(op->getType()); CheckAddressOfPackedMember(op); return Context.getPointerType(op->getType()); } static void RecordModifiableNonNullParam(Sema &S, const Expr *Exp) { const DeclRefExpr *DRE = dyn_cast(Exp); if (!DRE) return; const Decl *D = DRE->getDecl(); if (!D) return; const ParmVarDecl *Param = dyn_cast(D); if (!Param) return; if (const FunctionDecl* FD = dyn_cast(Param->getDeclContext())) if (!FD->hasAttr() && !Param->hasAttr()) return; if (FunctionScopeInfo *FD = S.getCurFunction()) if (!FD->ModifiedNonNullParams.count(Param)) FD->ModifiedNonNullParams.insert(Param); } /// CheckIndirectionOperand - Type check unary indirection (prefix '*'). static QualType CheckIndirectionOperand(Sema &S, Expr *Op, ExprValueKind &VK, SourceLocation OpLoc) { if (Op->isTypeDependent()) return S.Context.DependentTy; ExprResult ConvResult = S.UsualUnaryConversions(Op); if (ConvResult.isInvalid()) return QualType(); Op = ConvResult.get(); QualType OpTy = Op->getType(); QualType Result; if (isa(Op)) { QualType OpOrigType = Op->IgnoreParenCasts()->getType(); S.CheckCompatibleReinterpretCast(OpOrigType, OpTy, /*IsDereference*/true, Op->getSourceRange()); } if (const PointerType *PT = OpTy->getAs()) { Result = PT->getPointeeType(); } else if (const ObjCObjectPointerType *OPT = OpTy->getAs()) Result = OPT->getPointeeType(); else { ExprResult PR = S.CheckPlaceholderExpr(Op); if (PR.isInvalid()) return QualType(); if (PR.get() != Op) return CheckIndirectionOperand(S, PR.get(), VK, OpLoc); } if (Result.isNull()) { S.Diag(OpLoc, diag::err_typecheck_indirection_requires_pointer) << OpTy << Op->getSourceRange(); return QualType(); } // Note that per both C89 and C99, indirection is always legal, even if Result // is an incomplete type or void. It would be possible to warn about // dereferencing a void pointer, but it's completely well-defined, and such a // warning is unlikely to catch any mistakes. In C++, indirection is not valid // for pointers to 'void' but is fine for any other pointer type: // // C++ [expr.unary.op]p1: // [...] the expression to which [the unary * operator] is applied shall // be a pointer to an object type, or a pointer to a function type if (S.getLangOpts().CPlusPlus && Result->isVoidType()) S.Diag(OpLoc, diag::ext_typecheck_indirection_through_void_pointer) << OpTy << Op->getSourceRange(); // Dereferences are usually l-values... VK = VK_LValue; // ...except that certain expressions are never l-values in C. if (!S.getLangOpts().CPlusPlus && Result.isCForbiddenLValueType()) VK = VK_RValue; return Result; } BinaryOperatorKind Sema::ConvertTokenKindToBinaryOpcode(tok::TokenKind Kind) { BinaryOperatorKind Opc; switch (Kind) { default: llvm_unreachable("Unknown binop!"); case tok::periodstar: Opc = BO_PtrMemD; break; case tok::arrowstar: Opc = BO_PtrMemI; break; case tok::star: Opc = BO_Mul; break; case tok::slash: Opc = BO_Div; break; case tok::percent: Opc = BO_Rem; break; case tok::plus: Opc = BO_Add; break; case tok::minus: Opc = BO_Sub; break; case tok::lessless: Opc = BO_Shl; break; case tok::greatergreater: Opc = BO_Shr; break; case tok::lessequal: Opc = BO_LE; break; case tok::less: Opc = BO_LT; break; case tok::greaterequal: Opc = BO_GE; break; case tok::greater: Opc = BO_GT; break; case tok::exclaimequal: Opc = BO_NE; break; case tok::equalequal: Opc = BO_EQ; break; case tok::spaceship: Opc = BO_Cmp; break; case tok::amp: Opc = BO_And; break; case tok::caret: Opc = BO_Xor; break; case tok::pipe: Opc = BO_Or; break; case tok::ampamp: Opc = BO_LAnd; break; case tok::pipepipe: Opc = BO_LOr; break; case tok::equal: Opc = BO_Assign; break; case tok::starequal: Opc = BO_MulAssign; break; case tok::slashequal: Opc = BO_DivAssign; break; case tok::percentequal: Opc = BO_RemAssign; break; case tok::plusequal: Opc = BO_AddAssign; break; case tok::minusequal: Opc = BO_SubAssign; break; case tok::lesslessequal: Opc = BO_ShlAssign; break; case tok::greatergreaterequal: Opc = BO_ShrAssign; break; case tok::ampequal: Opc = BO_AndAssign; break; case tok::caretequal: Opc = BO_XorAssign; break; case tok::pipeequal: Opc = BO_OrAssign; break; case tok::comma: Opc = BO_Comma; break; } return Opc; } static inline UnaryOperatorKind ConvertTokenKindToUnaryOpcode( tok::TokenKind Kind) { UnaryOperatorKind Opc; switch (Kind) { default: llvm_unreachable("Unknown unary op!"); case tok::plusplus: Opc = UO_PreInc; break; case tok::minusminus: Opc = UO_PreDec; break; case tok::amp: Opc = UO_AddrOf; break; case tok::star: Opc = UO_Deref; break; case tok::plus: Opc = UO_Plus; break; case tok::minus: Opc = UO_Minus; break; case tok::tilde: Opc = UO_Not; break; case tok::exclaim: Opc = UO_LNot; break; case tok::kw___real: Opc = UO_Real; break; case tok::kw___imag: Opc = UO_Imag; break; case tok::kw___extension__: Opc = UO_Extension; break; } return Opc; } /// DiagnoseSelfAssignment - Emits a warning if a value is assigned to itself. /// This warning suppressed in the event of macro expansions. static void DiagnoseSelfAssignment(Sema &S, Expr *LHSExpr, Expr *RHSExpr, SourceLocation OpLoc, bool IsBuiltin) { if (S.inTemplateInstantiation()) return; if (S.isUnevaluatedContext()) return; if (OpLoc.isInvalid() || OpLoc.isMacroID()) return; LHSExpr = LHSExpr->IgnoreParenImpCasts(); RHSExpr = RHSExpr->IgnoreParenImpCasts(); const DeclRefExpr *LHSDeclRef = dyn_cast(LHSExpr); const DeclRefExpr *RHSDeclRef = dyn_cast(RHSExpr); if (!LHSDeclRef || !RHSDeclRef || LHSDeclRef->getLocation().isMacroID() || RHSDeclRef->getLocation().isMacroID()) return; const ValueDecl *LHSDecl = cast(LHSDeclRef->getDecl()->getCanonicalDecl()); const ValueDecl *RHSDecl = cast(RHSDeclRef->getDecl()->getCanonicalDecl()); if (LHSDecl != RHSDecl) return; if (LHSDecl->getType().isVolatileQualified()) return; if (const ReferenceType *RefTy = LHSDecl->getType()->getAs()) if (RefTy->getPointeeType().isVolatileQualified()) return; S.Diag(OpLoc, IsBuiltin ? diag::warn_self_assignment_builtin : diag::warn_self_assignment_overloaded) << LHSDeclRef->getType() << LHSExpr->getSourceRange() << RHSExpr->getSourceRange(); } /// Check if a bitwise-& is performed on an Objective-C pointer. This /// is usually indicative of introspection within the Objective-C pointer. static void checkObjCPointerIntrospection(Sema &S, ExprResult &L, ExprResult &R, SourceLocation OpLoc) { if (!S.getLangOpts().ObjC) return; const Expr *ObjCPointerExpr = nullptr, *OtherExpr = nullptr; const Expr *LHS = L.get(); const Expr *RHS = R.get(); if (LHS->IgnoreParenCasts()->getType()->isObjCObjectPointerType()) { ObjCPointerExpr = LHS; OtherExpr = RHS; } else if (RHS->IgnoreParenCasts()->getType()->isObjCObjectPointerType()) { ObjCPointerExpr = RHS; OtherExpr = LHS; } // This warning is deliberately made very specific to reduce false // positives with logic that uses '&' for hashing. This logic mainly // looks for code trying to introspect into tagged pointers, which // code should generally never do. if (ObjCPointerExpr && isa(OtherExpr->IgnoreParenCasts())) { unsigned Diag = diag::warn_objc_pointer_masking; // Determine if we are introspecting the result of performSelectorXXX. const Expr *Ex = ObjCPointerExpr->IgnoreParenCasts(); // Special case messages to -performSelector and friends, which // can return non-pointer values boxed in a pointer value. // Some clients may wish to silence warnings in this subcase. if (const ObjCMessageExpr *ME = dyn_cast(Ex)) { Selector S = ME->getSelector(); StringRef SelArg0 = S.getNameForSlot(0); if (SelArg0.startswith("performSelector")) Diag = diag::warn_objc_pointer_masking_performSelector; } S.Diag(OpLoc, Diag) << ObjCPointerExpr->getSourceRange(); } } static NamedDecl *getDeclFromExpr(Expr *E) { if (!E) return nullptr; if (auto *DRE = dyn_cast(E)) return DRE->getDecl(); if (auto *ME = dyn_cast(E)) return ME->getMemberDecl(); if (auto *IRE = dyn_cast(E)) return IRE->getDecl(); return nullptr; } // This helper function promotes a binary operator's operands (which are of a // half vector type) to a vector of floats and then truncates the result to // a vector of either half or short. static ExprResult convertHalfVecBinOp(Sema &S, ExprResult LHS, ExprResult RHS, BinaryOperatorKind Opc, QualType ResultTy, ExprValueKind VK, ExprObjectKind OK, bool IsCompAssign, SourceLocation OpLoc, FPOptions FPFeatures) { auto &Context = S.getASTContext(); assert((isVector(ResultTy, Context.HalfTy) || isVector(ResultTy, Context.ShortTy)) && "Result must be a vector of half or short"); assert(isVector(LHS.get()->getType(), Context.HalfTy) && isVector(RHS.get()->getType(), Context.HalfTy) && "both operands expected to be a half vector"); RHS = convertVector(RHS.get(), Context.FloatTy, S); QualType BinOpResTy = RHS.get()->getType(); // If Opc is a comparison, ResultType is a vector of shorts. In that case, // change BinOpResTy to a vector of ints. if (isVector(ResultTy, Context.ShortTy)) BinOpResTy = S.GetSignedVectorType(BinOpResTy); if (IsCompAssign) return new (Context) CompoundAssignOperator( LHS.get(), RHS.get(), Opc, ResultTy, VK, OK, BinOpResTy, BinOpResTy, OpLoc, FPFeatures); LHS = convertVector(LHS.get(), Context.FloatTy, S); auto *BO = new (Context) BinaryOperator(LHS.get(), RHS.get(), Opc, BinOpResTy, VK, OK, OpLoc, FPFeatures); return convertVector(BO, ResultTy->getAs()->getElementType(), S); } static std::pair CorrectDelayedTyposInBinOp(Sema &S, BinaryOperatorKind Opc, Expr *LHSExpr, Expr *RHSExpr) { ExprResult LHS = LHSExpr, RHS = RHSExpr; if (!S.getLangOpts().CPlusPlus) { // C cannot handle TypoExpr nodes on either side of a binop because it // doesn't handle dependent types properly, so make sure any TypoExprs have // been dealt with before checking the operands. LHS = S.CorrectDelayedTyposInExpr(LHS); RHS = S.CorrectDelayedTyposInExpr(RHS, [Opc, LHS](Expr *E) { if (Opc != BO_Assign) return ExprResult(E); // Avoid correcting the RHS to the same Expr as the LHS. Decl *D = getDeclFromExpr(E); return (D && D == getDeclFromExpr(LHS.get())) ? ExprError() : E; }); } return std::make_pair(LHS, RHS); } /// Returns true if conversion between vectors of halfs and vectors of floats /// is needed. static bool needsConversionOfHalfVec(bool OpRequiresConversion, ASTContext &Ctx, QualType SrcType) { return OpRequiresConversion && !Ctx.getLangOpts().NativeHalfType && !Ctx.getTargetInfo().useFP16ConversionIntrinsics() && isVector(SrcType, Ctx.HalfTy); } /// CreateBuiltinBinOp - Creates a new built-in binary operation with /// operator @p Opc at location @c TokLoc. This routine only supports /// built-in operations; ActOnBinOp handles overloaded operators. ExprResult Sema::CreateBuiltinBinOp(SourceLocation OpLoc, BinaryOperatorKind Opc, Expr *LHSExpr, Expr *RHSExpr) { if (getLangOpts().CPlusPlus11 && isa(RHSExpr)) { // The syntax only allows initializer lists on the RHS of assignment, // so we don't need to worry about accepting invalid code for // non-assignment operators. // C++11 5.17p9: // The meaning of x = {v} [...] is that of x = T(v) [...]. The meaning // of x = {} is x = T(). InitializationKind Kind = InitializationKind::CreateDirectList( RHSExpr->getBeginLoc(), RHSExpr->getBeginLoc(), RHSExpr->getEndLoc()); InitializedEntity Entity = InitializedEntity::InitializeTemporary(LHSExpr->getType()); InitializationSequence InitSeq(*this, Entity, Kind, RHSExpr); ExprResult Init = InitSeq.Perform(*this, Entity, Kind, RHSExpr); if (Init.isInvalid()) return Init; RHSExpr = Init.get(); } ExprResult LHS = LHSExpr, RHS = RHSExpr; QualType ResultTy; // Result type of the binary operator. // The following two variables are used for compound assignment operators QualType CompLHSTy; // Type of LHS after promotions for computation QualType CompResultTy; // Type of computation result ExprValueKind VK = VK_RValue; ExprObjectKind OK = OK_Ordinary; bool ConvertHalfVec = false; std::tie(LHS, RHS) = CorrectDelayedTyposInBinOp(*this, Opc, LHSExpr, RHSExpr); if (!LHS.isUsable() || !RHS.isUsable()) return ExprError(); if (getLangOpts().OpenCL) { QualType LHSTy = LHSExpr->getType(); QualType RHSTy = RHSExpr->getType(); // OpenCLC v2.0 s6.13.11.1 allows atomic variables to be initialized by // the ATOMIC_VAR_INIT macro. if (LHSTy->isAtomicType() || RHSTy->isAtomicType()) { SourceRange SR(LHSExpr->getBeginLoc(), RHSExpr->getEndLoc()); if (BO_Assign == Opc) Diag(OpLoc, diag::err_opencl_atomic_init) << 0 << SR; else ResultTy = InvalidOperands(OpLoc, LHS, RHS); return ExprError(); } // OpenCL special types - image, sampler, pipe, and blocks are to be used // only with a builtin functions and therefore should be disallowed here. if (LHSTy->isImageType() || RHSTy->isImageType() || LHSTy->isSamplerT() || RHSTy->isSamplerT() || LHSTy->isPipeType() || RHSTy->isPipeType() || LHSTy->isBlockPointerType() || RHSTy->isBlockPointerType()) { ResultTy = InvalidOperands(OpLoc, LHS, RHS); return ExprError(); } } switch (Opc) { case BO_Assign: ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, QualType()); if (getLangOpts().CPlusPlus && LHS.get()->getObjectKind() != OK_ObjCProperty) { VK = LHS.get()->getValueKind(); OK = LHS.get()->getObjectKind(); } if (!ResultTy.isNull()) { DiagnoseSelfAssignment(*this, LHS.get(), RHS.get(), OpLoc, true); DiagnoseSelfMove(LHS.get(), RHS.get(), OpLoc); } RecordModifiableNonNullParam(*this, LHS.get()); break; case BO_PtrMemD: case BO_PtrMemI: ResultTy = CheckPointerToMemberOperands(LHS, RHS, VK, OpLoc, Opc == BO_PtrMemI); break; case BO_Mul: case BO_Div: ConvertHalfVec = true; ResultTy = CheckMultiplyDivideOperands(LHS, RHS, OpLoc, false, Opc == BO_Div); break; case BO_Rem: ResultTy = CheckRemainderOperands(LHS, RHS, OpLoc); break; case BO_Add: ConvertHalfVec = true; ResultTy = CheckAdditionOperands(LHS, RHS, OpLoc, Opc); break; case BO_Sub: ConvertHalfVec = true; ResultTy = CheckSubtractionOperands(LHS, RHS, OpLoc); break; case BO_Shl: case BO_Shr: ResultTy = CheckShiftOperands(LHS, RHS, OpLoc, Opc); break; case BO_LE: case BO_LT: case BO_GE: case BO_GT: ConvertHalfVec = true; ResultTy = CheckCompareOperands(LHS, RHS, OpLoc, Opc); break; case BO_EQ: case BO_NE: ConvertHalfVec = true; ResultTy = CheckCompareOperands(LHS, RHS, OpLoc, Opc); break; case BO_Cmp: ConvertHalfVec = true; ResultTy = CheckCompareOperands(LHS, RHS, OpLoc, Opc); assert(ResultTy.isNull() || ResultTy->getAsCXXRecordDecl()); break; case BO_And: checkObjCPointerIntrospection(*this, LHS, RHS, OpLoc); LLVM_FALLTHROUGH; case BO_Xor: case BO_Or: ResultTy = CheckBitwiseOperands(LHS, RHS, OpLoc, Opc); break; case BO_LAnd: case BO_LOr: ConvertHalfVec = true; ResultTy = CheckLogicalOperands(LHS, RHS, OpLoc, Opc); break; case BO_MulAssign: case BO_DivAssign: ConvertHalfVec = true; CompResultTy = CheckMultiplyDivideOperands(LHS, RHS, OpLoc, true, Opc == BO_DivAssign); CompLHSTy = CompResultTy; if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid()) ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy); break; case BO_RemAssign: CompResultTy = CheckRemainderOperands(LHS, RHS, OpLoc, true); CompLHSTy = CompResultTy; if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid()) ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy); break; case BO_AddAssign: ConvertHalfVec = true; CompResultTy = CheckAdditionOperands(LHS, RHS, OpLoc, Opc, &CompLHSTy); if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid()) ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy); break; case BO_SubAssign: ConvertHalfVec = true; CompResultTy = CheckSubtractionOperands(LHS, RHS, OpLoc, &CompLHSTy); if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid()) ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy); break; case BO_ShlAssign: case BO_ShrAssign: CompResultTy = CheckShiftOperands(LHS, RHS, OpLoc, Opc, true); CompLHSTy = CompResultTy; if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid()) ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy); break; case BO_AndAssign: case BO_OrAssign: // fallthrough DiagnoseSelfAssignment(*this, LHS.get(), RHS.get(), OpLoc, true); LLVM_FALLTHROUGH; case BO_XorAssign: CompResultTy = CheckBitwiseOperands(LHS, RHS, OpLoc, Opc); CompLHSTy = CompResultTy; if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid()) ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy); break; case BO_Comma: ResultTy = CheckCommaOperands(*this, LHS, RHS, OpLoc); if (getLangOpts().CPlusPlus && !RHS.isInvalid()) { VK = RHS.get()->getValueKind(); OK = RHS.get()->getObjectKind(); } break; } if (ResultTy.isNull() || LHS.isInvalid() || RHS.isInvalid()) return ExprError(); // Some of the binary operations require promoting operands of half vector to // float vectors and truncating the result back to half vector. For now, we do // this only when HalfArgsAndReturn is set (that is, when the target is arm or // arm64). assert(isVector(RHS.get()->getType(), Context.HalfTy) == isVector(LHS.get()->getType(), Context.HalfTy) && "both sides are half vectors or neither sides are"); ConvertHalfVec = needsConversionOfHalfVec(ConvertHalfVec, Context, LHS.get()->getType()); // Check for array bounds violations for both sides of the BinaryOperator CheckArrayAccess(LHS.get()); CheckArrayAccess(RHS.get()); if (const ObjCIsaExpr *OISA = dyn_cast(LHS.get()->IgnoreParenCasts())) { NamedDecl *ObjectSetClass = LookupSingleName(TUScope, &Context.Idents.get("object_setClass"), SourceLocation(), LookupOrdinaryName); if (ObjectSetClass && isa(LHS.get())) { SourceLocation RHSLocEnd = getLocForEndOfToken(RHS.get()->getEndLoc()); Diag(LHS.get()->getExprLoc(), diag::warn_objc_isa_assign) << FixItHint::CreateInsertion(LHS.get()->getBeginLoc(), "object_setClass(") << FixItHint::CreateReplacement(SourceRange(OISA->getOpLoc(), OpLoc), ",") << FixItHint::CreateInsertion(RHSLocEnd, ")"); } else Diag(LHS.get()->getExprLoc(), diag::warn_objc_isa_assign); } else if (const ObjCIvarRefExpr *OIRE = dyn_cast(LHS.get()->IgnoreParenCasts())) DiagnoseDirectIsaAccess(*this, OIRE, OpLoc, RHS.get()); // Opc is not a compound assignment if CompResultTy is null. if (CompResultTy.isNull()) { if (ConvertHalfVec) return convertHalfVecBinOp(*this, LHS, RHS, Opc, ResultTy, VK, OK, false, OpLoc, FPFeatures); return new (Context) BinaryOperator(LHS.get(), RHS.get(), Opc, ResultTy, VK, OK, OpLoc, FPFeatures); } // Handle compound assignments. if (getLangOpts().CPlusPlus && LHS.get()->getObjectKind() != OK_ObjCProperty) { VK = VK_LValue; OK = LHS.get()->getObjectKind(); } if (ConvertHalfVec) return convertHalfVecBinOp(*this, LHS, RHS, Opc, ResultTy, VK, OK, true, OpLoc, FPFeatures); return new (Context) CompoundAssignOperator( LHS.get(), RHS.get(), Opc, ResultTy, VK, OK, CompLHSTy, CompResultTy, OpLoc, FPFeatures); } /// DiagnoseBitwisePrecedence - Emit a warning when bitwise and comparison /// operators are mixed in a way that suggests that the programmer forgot that /// comparison operators have higher precedence. The most typical example of /// such code is "flags & 0x0020 != 0", which is equivalent to "flags & 1". static void DiagnoseBitwisePrecedence(Sema &Self, BinaryOperatorKind Opc, SourceLocation OpLoc, Expr *LHSExpr, Expr *RHSExpr) { BinaryOperator *LHSBO = dyn_cast(LHSExpr); BinaryOperator *RHSBO = dyn_cast(RHSExpr); // Check that one of the sides is a comparison operator and the other isn't. bool isLeftComp = LHSBO && LHSBO->isComparisonOp(); bool isRightComp = RHSBO && RHSBO->isComparisonOp(); if (isLeftComp == isRightComp) return; // Bitwise operations are sometimes used as eager logical ops. // Don't diagnose this. bool isLeftBitwise = LHSBO && LHSBO->isBitwiseOp(); bool isRightBitwise = RHSBO && RHSBO->isBitwiseOp(); if (isLeftBitwise || isRightBitwise) return; SourceRange DiagRange = isLeftComp ? SourceRange(LHSExpr->getBeginLoc(), OpLoc) : SourceRange(OpLoc, RHSExpr->getEndLoc()); StringRef OpStr = isLeftComp ? LHSBO->getOpcodeStr() : RHSBO->getOpcodeStr(); SourceRange ParensRange = isLeftComp ? SourceRange(LHSBO->getRHS()->getBeginLoc(), RHSExpr->getEndLoc()) : SourceRange(LHSExpr->getBeginLoc(), RHSBO->getLHS()->getEndLoc()); Self.Diag(OpLoc, diag::warn_precedence_bitwise_rel) << DiagRange << BinaryOperator::getOpcodeStr(Opc) << OpStr; SuggestParentheses(Self, OpLoc, Self.PDiag(diag::note_precedence_silence) << OpStr, (isLeftComp ? LHSExpr : RHSExpr)->getSourceRange()); SuggestParentheses(Self, OpLoc, Self.PDiag(diag::note_precedence_bitwise_first) << BinaryOperator::getOpcodeStr(Opc), ParensRange); } /// It accepts a '&&' expr that is inside a '||' one. /// Emit a diagnostic together with a fixit hint that wraps the '&&' expression /// in parentheses. static void EmitDiagnosticForLogicalAndInLogicalOr(Sema &Self, SourceLocation OpLoc, BinaryOperator *Bop) { assert(Bop->getOpcode() == BO_LAnd); Self.Diag(Bop->getOperatorLoc(), diag::warn_logical_and_in_logical_or) << Bop->getSourceRange() << OpLoc; SuggestParentheses(Self, Bop->getOperatorLoc(), Self.PDiag(diag::note_precedence_silence) << Bop->getOpcodeStr(), Bop->getSourceRange()); } /// Returns true if the given expression can be evaluated as a constant /// 'true'. static bool EvaluatesAsTrue(Sema &S, Expr *E) { bool Res; return !E->isValueDependent() && E->EvaluateAsBooleanCondition(Res, S.getASTContext()) && Res; } /// Returns true if the given expression can be evaluated as a constant /// 'false'. static bool EvaluatesAsFalse(Sema &S, Expr *E) { bool Res; return !E->isValueDependent() && E->EvaluateAsBooleanCondition(Res, S.getASTContext()) && !Res; } /// Look for '&&' in the left hand of a '||' expr. static void DiagnoseLogicalAndInLogicalOrLHS(Sema &S, SourceLocation OpLoc, Expr *LHSExpr, Expr *RHSExpr) { if (BinaryOperator *Bop = dyn_cast(LHSExpr)) { if (Bop->getOpcode() == BO_LAnd) { // If it's "a && b || 0" don't warn since the precedence doesn't matter. if (EvaluatesAsFalse(S, RHSExpr)) return; // If it's "1 && a || b" don't warn since the precedence doesn't matter. if (!EvaluatesAsTrue(S, Bop->getLHS())) return EmitDiagnosticForLogicalAndInLogicalOr(S, OpLoc, Bop); } else if (Bop->getOpcode() == BO_LOr) { if (BinaryOperator *RBop = dyn_cast(Bop->getRHS())) { // If it's "a || b && 1 || c" we didn't warn earlier for // "a || b && 1", but warn now. if (RBop->getOpcode() == BO_LAnd && EvaluatesAsTrue(S, RBop->getRHS())) return EmitDiagnosticForLogicalAndInLogicalOr(S, OpLoc, RBop); } } } } /// Look for '&&' in the right hand of a '||' expr. static void DiagnoseLogicalAndInLogicalOrRHS(Sema &S, SourceLocation OpLoc, Expr *LHSExpr, Expr *RHSExpr) { if (BinaryOperator *Bop = dyn_cast(RHSExpr)) { if (Bop->getOpcode() == BO_LAnd) { // If it's "0 || a && b" don't warn since the precedence doesn't matter. if (EvaluatesAsFalse(S, LHSExpr)) return; // If it's "a || b && 1" don't warn since the precedence doesn't matter. if (!EvaluatesAsTrue(S, Bop->getRHS())) return EmitDiagnosticForLogicalAndInLogicalOr(S, OpLoc, Bop); } } } /// Look for bitwise op in the left or right hand of a bitwise op with /// lower precedence and emit a diagnostic together with a fixit hint that wraps /// the '&' expression in parentheses. static void DiagnoseBitwiseOpInBitwiseOp(Sema &S, BinaryOperatorKind Opc, SourceLocation OpLoc, Expr *SubExpr) { if (BinaryOperator *Bop = dyn_cast(SubExpr)) { if (Bop->isBitwiseOp() && Bop->getOpcode() < Opc) { S.Diag(Bop->getOperatorLoc(), diag::warn_bitwise_op_in_bitwise_op) << Bop->getOpcodeStr() << BinaryOperator::getOpcodeStr(Opc) << Bop->getSourceRange() << OpLoc; SuggestParentheses(S, Bop->getOperatorLoc(), S.PDiag(diag::note_precedence_silence) << Bop->getOpcodeStr(), Bop->getSourceRange()); } } } static void DiagnoseAdditionInShift(Sema &S, SourceLocation OpLoc, Expr *SubExpr, StringRef Shift) { if (BinaryOperator *Bop = dyn_cast(SubExpr)) { if (Bop->getOpcode() == BO_Add || Bop->getOpcode() == BO_Sub) { StringRef Op = Bop->getOpcodeStr(); S.Diag(Bop->getOperatorLoc(), diag::warn_addition_in_bitshift) << Bop->getSourceRange() << OpLoc << Shift << Op; SuggestParentheses(S, Bop->getOperatorLoc(), S.PDiag(diag::note_precedence_silence) << Op, Bop->getSourceRange()); } } } static void DiagnoseShiftCompare(Sema &S, SourceLocation OpLoc, Expr *LHSExpr, Expr *RHSExpr) { CXXOperatorCallExpr *OCE = dyn_cast(LHSExpr); if (!OCE) return; FunctionDecl *FD = OCE->getDirectCallee(); if (!FD || !FD->isOverloadedOperator()) return; OverloadedOperatorKind Kind = FD->getOverloadedOperator(); if (Kind != OO_LessLess && Kind != OO_GreaterGreater) return; S.Diag(OpLoc, diag::warn_overloaded_shift_in_comparison) << LHSExpr->getSourceRange() << RHSExpr->getSourceRange() << (Kind == OO_LessLess); SuggestParentheses(S, OCE->getOperatorLoc(), S.PDiag(diag::note_precedence_silence) << (Kind == OO_LessLess ? "<<" : ">>"), OCE->getSourceRange()); SuggestParentheses( S, OpLoc, S.PDiag(diag::note_evaluate_comparison_first), SourceRange(OCE->getArg(1)->getBeginLoc(), RHSExpr->getEndLoc())); } /// DiagnoseBinOpPrecedence - Emit warnings for expressions with tricky /// precedence. static void DiagnoseBinOpPrecedence(Sema &Self, BinaryOperatorKind Opc, SourceLocation OpLoc, Expr *LHSExpr, Expr *RHSExpr){ // Diagnose "arg1 'bitwise' arg2 'eq' arg3". if (BinaryOperator::isBitwiseOp(Opc)) DiagnoseBitwisePrecedence(Self, Opc, OpLoc, LHSExpr, RHSExpr); // Diagnose "arg1 & arg2 | arg3" if ((Opc == BO_Or || Opc == BO_Xor) && !OpLoc.isMacroID()/* Don't warn in macros. */) { DiagnoseBitwiseOpInBitwiseOp(Self, Opc, OpLoc, LHSExpr); DiagnoseBitwiseOpInBitwiseOp(Self, Opc, OpLoc, RHSExpr); } // Warn about arg1 || arg2 && arg3, as GCC 4.3+ does. // We don't warn for 'assert(a || b && "bad")' since this is safe. if (Opc == BO_LOr && !OpLoc.isMacroID()/* Don't warn in macros. */) { DiagnoseLogicalAndInLogicalOrLHS(Self, OpLoc, LHSExpr, RHSExpr); DiagnoseLogicalAndInLogicalOrRHS(Self, OpLoc, LHSExpr, RHSExpr); } if ((Opc == BO_Shl && LHSExpr->getType()->isIntegralType(Self.getASTContext())) || Opc == BO_Shr) { StringRef Shift = BinaryOperator::getOpcodeStr(Opc); DiagnoseAdditionInShift(Self, OpLoc, LHSExpr, Shift); DiagnoseAdditionInShift(Self, OpLoc, RHSExpr, Shift); } // Warn on overloaded shift operators and comparisons, such as: // cout << 5 == 4; if (BinaryOperator::isComparisonOp(Opc)) DiagnoseShiftCompare(Self, OpLoc, LHSExpr, RHSExpr); } // Binary Operators. 'Tok' is the token for the operator. ExprResult Sema::ActOnBinOp(Scope *S, SourceLocation TokLoc, tok::TokenKind Kind, Expr *LHSExpr, Expr *RHSExpr) { BinaryOperatorKind Opc = ConvertTokenKindToBinaryOpcode(Kind); assert(LHSExpr && "ActOnBinOp(): missing left expression"); assert(RHSExpr && "ActOnBinOp(): missing right expression"); // Emit warnings for tricky precedence issues, e.g. "bitfield & 0x4 == 0" DiagnoseBinOpPrecedence(*this, Opc, TokLoc, LHSExpr, RHSExpr); return BuildBinOp(S, TokLoc, Opc, LHSExpr, RHSExpr); } /// Build an overloaded binary operator expression in the given scope. static ExprResult BuildOverloadedBinOp(Sema &S, Scope *Sc, SourceLocation OpLoc, BinaryOperatorKind Opc, Expr *LHS, Expr *RHS) { switch (Opc) { case BO_Assign: case BO_DivAssign: case BO_RemAssign: case BO_SubAssign: case BO_AndAssign: case BO_OrAssign: case BO_XorAssign: DiagnoseSelfAssignment(S, LHS, RHS, OpLoc, false); CheckIdentityFieldAssignment(LHS, RHS, OpLoc, S); break; default: break; } // Find all of the overloaded operators visible from this // point. We perform both an operator-name lookup from the local // scope and an argument-dependent lookup based on the types of // the arguments. UnresolvedSet<16> Functions; OverloadedOperatorKind OverOp = BinaryOperator::getOverloadedOperator(Opc); if (Sc && OverOp != OO_None && OverOp != OO_Equal) S.LookupOverloadedOperatorName(OverOp, Sc, LHS->getType(), RHS->getType(), Functions); // Build the (potentially-overloaded, potentially-dependent) // binary operation. return S.CreateOverloadedBinOp(OpLoc, Opc, Functions, LHS, RHS); } ExprResult Sema::BuildBinOp(Scope *S, SourceLocation OpLoc, BinaryOperatorKind Opc, Expr *LHSExpr, Expr *RHSExpr) { ExprResult LHS, RHS; std::tie(LHS, RHS) = CorrectDelayedTyposInBinOp(*this, Opc, LHSExpr, RHSExpr); if (!LHS.isUsable() || !RHS.isUsable()) return ExprError(); LHSExpr = LHS.get(); RHSExpr = RHS.get(); // We want to end up calling one of checkPseudoObjectAssignment // (if the LHS is a pseudo-object), BuildOverloadedBinOp (if // both expressions are overloadable or either is type-dependent), // or CreateBuiltinBinOp (in any other case). We also want to get // any placeholder types out of the way. // Handle pseudo-objects in the LHS. if (const BuiltinType *pty = LHSExpr->getType()->getAsPlaceholderType()) { // Assignments with a pseudo-object l-value need special analysis. if (pty->getKind() == BuiltinType::PseudoObject && BinaryOperator::isAssignmentOp(Opc)) return checkPseudoObjectAssignment(S, OpLoc, Opc, LHSExpr, RHSExpr); // Don't resolve overloads if the other type is overloadable. if (getLangOpts().CPlusPlus && pty->getKind() == BuiltinType::Overload) { // We can't actually test that if we still have a placeholder, // though. Fortunately, none of the exceptions we see in that // code below are valid when the LHS is an overload set. Note // that an overload set can be dependently-typed, but it never // instantiates to having an overloadable type. ExprResult resolvedRHS = CheckPlaceholderExpr(RHSExpr); if (resolvedRHS.isInvalid()) return ExprError(); RHSExpr = resolvedRHS.get(); if (RHSExpr->isTypeDependent() || RHSExpr->getType()->isOverloadableType()) return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr); } // If we're instantiating "a.x < b" or "A::x < b" and 'x' names a function // template, diagnose the missing 'template' keyword instead of diagnosing // an invalid use of a bound member function. // // Note that "A::x < b" might be valid if 'b' has an overloadable type due // to C++1z [over.over]/1.4, but we already checked for that case above. if (Opc == BO_LT && inTemplateInstantiation() && (pty->getKind() == BuiltinType::BoundMember || pty->getKind() == BuiltinType::Overload)) { auto *OE = dyn_cast(LHSExpr); if (OE && !OE->hasTemplateKeyword() && !OE->hasExplicitTemplateArgs() && std::any_of(OE->decls_begin(), OE->decls_end(), [](NamedDecl *ND) { return isa(ND); })) { Diag(OE->getQualifier() ? OE->getQualifierLoc().getBeginLoc() : OE->getNameLoc(), diag::err_template_kw_missing) << OE->getName().getAsString() << ""; return ExprError(); } } ExprResult LHS = CheckPlaceholderExpr(LHSExpr); if (LHS.isInvalid()) return ExprError(); LHSExpr = LHS.get(); } // Handle pseudo-objects in the RHS. if (const BuiltinType *pty = RHSExpr->getType()->getAsPlaceholderType()) { // An overload in the RHS can potentially be resolved by the type // being assigned to. if (Opc == BO_Assign && pty->getKind() == BuiltinType::Overload) { if (getLangOpts().CPlusPlus && (LHSExpr->isTypeDependent() || RHSExpr->isTypeDependent() || LHSExpr->getType()->isOverloadableType())) return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr); return CreateBuiltinBinOp(OpLoc, Opc, LHSExpr, RHSExpr); } // Don't resolve overloads if the other type is overloadable. if (getLangOpts().CPlusPlus && pty->getKind() == BuiltinType::Overload && LHSExpr->getType()->isOverloadableType()) return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr); ExprResult resolvedRHS = CheckPlaceholderExpr(RHSExpr); if (!resolvedRHS.isUsable()) return ExprError(); RHSExpr = resolvedRHS.get(); } if (getLangOpts().CPlusPlus) { // If either expression is type-dependent, always build an // overloaded op. if (LHSExpr->isTypeDependent() || RHSExpr->isTypeDependent()) return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr); // Otherwise, build an overloaded op if either expression has an // overloadable type. if (LHSExpr->getType()->isOverloadableType() || RHSExpr->getType()->isOverloadableType()) return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr); } // Build a built-in binary operation. return CreateBuiltinBinOp(OpLoc, Opc, LHSExpr, RHSExpr); } static bool isOverflowingIntegerType(ASTContext &Ctx, QualType T) { if (T.isNull() || T->isDependentType()) return false; if (!T->isPromotableIntegerType()) return true; return Ctx.getIntWidth(T) >= Ctx.getIntWidth(Ctx.IntTy); } ExprResult Sema::CreateBuiltinUnaryOp(SourceLocation OpLoc, UnaryOperatorKind Opc, Expr *InputExpr) { ExprResult Input = InputExpr; ExprValueKind VK = VK_RValue; ExprObjectKind OK = OK_Ordinary; QualType resultType; bool CanOverflow = false; bool ConvertHalfVec = false; if (getLangOpts().OpenCL) { QualType Ty = InputExpr->getType(); // The only legal unary operation for atomics is '&'. if ((Opc != UO_AddrOf && Ty->isAtomicType()) || // OpenCL special types - image, sampler, pipe, and blocks are to be used // only with a builtin functions and therefore should be disallowed here. (Ty->isImageType() || Ty->isSamplerT() || Ty->isPipeType() || Ty->isBlockPointerType())) { return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr) << InputExpr->getType() << Input.get()->getSourceRange()); } } switch (Opc) { case UO_PreInc: case UO_PreDec: case UO_PostInc: case UO_PostDec: resultType = CheckIncrementDecrementOperand(*this, Input.get(), VK, OK, OpLoc, Opc == UO_PreInc || Opc == UO_PostInc, Opc == UO_PreInc || Opc == UO_PreDec); CanOverflow = isOverflowingIntegerType(Context, resultType); break; case UO_AddrOf: resultType = CheckAddressOfOperand(Input, OpLoc); CheckAddressOfNoDeref(InputExpr); RecordModifiableNonNullParam(*this, InputExpr); break; case UO_Deref: { Input = DefaultFunctionArrayLvalueConversion(Input.get()); if (Input.isInvalid()) return ExprError(); resultType = CheckIndirectionOperand(*this, Input.get(), VK, OpLoc); break; } case UO_Plus: case UO_Minus: CanOverflow = Opc == UO_Minus && isOverflowingIntegerType(Context, Input.get()->getType()); Input = UsualUnaryConversions(Input.get()); if (Input.isInvalid()) return ExprError(); // Unary plus and minus require promoting an operand of half vector to a // float vector and truncating the result back to a half vector. For now, we // do this only when HalfArgsAndReturns is set (that is, when the target is // arm or arm64). ConvertHalfVec = needsConversionOfHalfVec(true, Context, Input.get()->getType()); // If the operand is a half vector, promote it to a float vector. if (ConvertHalfVec) Input = convertVector(Input.get(), Context.FloatTy, *this); resultType = Input.get()->getType(); if (resultType->isDependentType()) break; if (resultType->isArithmeticType()) // C99 6.5.3.3p1 break; else if (resultType->isVectorType() && // The z vector extensions don't allow + or - with bool vectors. (!Context.getLangOpts().ZVector || resultType->getAs()->getVectorKind() != VectorType::AltiVecBool)) break; else if (getLangOpts().CPlusPlus && // C++ [expr.unary.op]p6 Opc == UO_Plus && resultType->isPointerType()) break; return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr) << resultType << Input.get()->getSourceRange()); case UO_Not: // bitwise complement Input = UsualUnaryConversions(Input.get()); if (Input.isInvalid()) return ExprError(); resultType = Input.get()->getType(); if (resultType->isDependentType()) break; // C99 6.5.3.3p1. We allow complex int and float as a GCC extension. if (resultType->isComplexType() || resultType->isComplexIntegerType()) // C99 does not support '~' for complex conjugation. Diag(OpLoc, diag::ext_integer_complement_complex) << resultType << Input.get()->getSourceRange(); else if (resultType->hasIntegerRepresentation()) break; else if (resultType->isExtVectorType() && Context.getLangOpts().OpenCL) { // OpenCL v1.1 s6.3.f: The bitwise operator not (~) does not operate // on vector float types. QualType T = resultType->getAs()->getElementType(); if (!T->isIntegerType()) return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr) << resultType << Input.get()->getSourceRange()); } else { return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr) << resultType << Input.get()->getSourceRange()); } break; case UO_LNot: // logical negation // Unlike +/-/~, integer promotions aren't done here (C99 6.5.3.3p5). Input = DefaultFunctionArrayLvalueConversion(Input.get()); if (Input.isInvalid()) return ExprError(); resultType = Input.get()->getType(); // Though we still have to promote half FP to float... if (resultType->isHalfType() && !Context.getLangOpts().NativeHalfType) { Input = ImpCastExprToType(Input.get(), Context.FloatTy, CK_FloatingCast).get(); resultType = Context.FloatTy; } if (resultType->isDependentType()) break; if (resultType->isScalarType() && !isScopedEnumerationType(resultType)) { // C99 6.5.3.3p1: ok, fallthrough; if (Context.getLangOpts().CPlusPlus) { // C++03 [expr.unary.op]p8, C++0x [expr.unary.op]p9: // operand contextually converted to bool. Input = ImpCastExprToType(Input.get(), Context.BoolTy, ScalarTypeToBooleanCastKind(resultType)); } else if (Context.getLangOpts().OpenCL && Context.getLangOpts().OpenCLVersion < 120) { // OpenCL v1.1 6.3.h: The logical operator not (!) does not // operate on scalar float types. if (!resultType->isIntegerType() && !resultType->isPointerType()) return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr) << resultType << Input.get()->getSourceRange()); } } else if (resultType->isExtVectorType()) { if (Context.getLangOpts().OpenCL && Context.getLangOpts().OpenCLVersion < 120) { // OpenCL v1.1 6.3.h: The logical operator not (!) does not // operate on vector float types. QualType T = resultType->getAs()->getElementType(); if (!T->isIntegerType()) return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr) << resultType << Input.get()->getSourceRange()); } // Vector logical not returns the signed variant of the operand type. resultType = GetSignedVectorType(resultType); break; } else { // FIXME: GCC's vector extension permits the usage of '!' with a vector // type in C++. We should allow that here too. return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr) << resultType << Input.get()->getSourceRange()); } // LNot always has type int. C99 6.5.3.3p5. // In C++, it's bool. C++ 5.3.1p8 resultType = Context.getLogicalOperationType(); break; case UO_Real: case UO_Imag: resultType = CheckRealImagOperand(*this, Input, OpLoc, Opc == UO_Real); // _Real maps ordinary l-values into ordinary l-values. _Imag maps ordinary // complex l-values to ordinary l-values and all other values to r-values. if (Input.isInvalid()) return ExprError(); if (Opc == UO_Real || Input.get()->getType()->isAnyComplexType()) { if (Input.get()->getValueKind() != VK_RValue && Input.get()->getObjectKind() == OK_Ordinary) VK = Input.get()->getValueKind(); } else if (!getLangOpts().CPlusPlus) { // In C, a volatile scalar is read by __imag. In C++, it is not. Input = DefaultLvalueConversion(Input.get()); } break; case UO_Extension: resultType = Input.get()->getType(); VK = Input.get()->getValueKind(); OK = Input.get()->getObjectKind(); break; case UO_Coawait: // It's unnecessary to represent the pass-through operator co_await in the // AST; just return the input expression instead. assert(!Input.get()->getType()->isDependentType() && "the co_await expression must be non-dependant before " "building operator co_await"); return Input; } if (resultType.isNull() || Input.isInvalid()) return ExprError(); // Check for array bounds violations in the operand of the UnaryOperator, // except for the '*' and '&' operators that have to be handled specially // by CheckArrayAccess (as there are special cases like &array[arraysize] // that are explicitly defined as valid by the standard). if (Opc != UO_AddrOf && Opc != UO_Deref) CheckArrayAccess(Input.get()); auto *UO = new (Context) UnaryOperator(Input.get(), Opc, resultType, VK, OK, OpLoc, CanOverflow); if (Opc == UO_Deref && UO->getType()->hasAttr(attr::NoDeref) && !isa(UO->getType().getDesugaredType(Context))) ExprEvalContexts.back().PossibleDerefs.insert(UO); // Convert the result back to a half vector. if (ConvertHalfVec) return convertVector(UO, Context.HalfTy, *this); return UO; } /// Determine whether the given expression is a qualified member /// access expression, of a form that could be turned into a pointer to member /// with the address-of operator. bool Sema::isQualifiedMemberAccess(Expr *E) { if (DeclRefExpr *DRE = dyn_cast(E)) { if (!DRE->getQualifier()) return false; ValueDecl *VD = DRE->getDecl(); if (!VD->isCXXClassMember()) return false; if (isa(VD) || isa(VD)) return true; if (CXXMethodDecl *Method = dyn_cast(VD)) return Method->isInstance(); return false; } if (UnresolvedLookupExpr *ULE = dyn_cast(E)) { if (!ULE->getQualifier()) return false; for (NamedDecl *D : ULE->decls()) { if (CXXMethodDecl *Method = dyn_cast(D)) { if (Method->isInstance()) return true; } else { // Overload set does not contain methods. break; } } return false; } return false; } ExprResult Sema::BuildUnaryOp(Scope *S, SourceLocation OpLoc, UnaryOperatorKind Opc, Expr *Input) { // First things first: handle placeholders so that the // overloaded-operator check considers the right type. if (const BuiltinType *pty = Input->getType()->getAsPlaceholderType()) { // Increment and decrement of pseudo-object references. if (pty->getKind() == BuiltinType::PseudoObject && UnaryOperator::isIncrementDecrementOp(Opc)) return checkPseudoObjectIncDec(S, OpLoc, Opc, Input); // extension is always a builtin operator. if (Opc == UO_Extension) return CreateBuiltinUnaryOp(OpLoc, Opc, Input); // & gets special logic for several kinds of placeholder. // The builtin code knows what to do. if (Opc == UO_AddrOf && (pty->getKind() == BuiltinType::Overload || pty->getKind() == BuiltinType::UnknownAny || pty->getKind() == BuiltinType::BoundMember)) return CreateBuiltinUnaryOp(OpLoc, Opc, Input); // Anything else needs to be handled now. ExprResult Result = CheckPlaceholderExpr(Input); if (Result.isInvalid()) return ExprError(); Input = Result.get(); } if (getLangOpts().CPlusPlus && Input->getType()->isOverloadableType() && UnaryOperator::getOverloadedOperator(Opc) != OO_None && !(Opc == UO_AddrOf && isQualifiedMemberAccess(Input))) { // Find all of the overloaded operators visible from this // point. We perform both an operator-name lookup from the local // scope and an argument-dependent lookup based on the types of // the arguments. UnresolvedSet<16> Functions; OverloadedOperatorKind OverOp = UnaryOperator::getOverloadedOperator(Opc); if (S && OverOp != OO_None) LookupOverloadedOperatorName(OverOp, S, Input->getType(), QualType(), Functions); return CreateOverloadedUnaryOp(OpLoc, Opc, Functions, Input); } return CreateBuiltinUnaryOp(OpLoc, Opc, Input); } // Unary Operators. 'Tok' is the token for the operator. ExprResult Sema::ActOnUnaryOp(Scope *S, SourceLocation OpLoc, tok::TokenKind Op, Expr *Input) { return BuildUnaryOp(S, OpLoc, ConvertTokenKindToUnaryOpcode(Op), Input); } /// ActOnAddrLabel - Parse the GNU address of label extension: "&&foo". ExprResult Sema::ActOnAddrLabel(SourceLocation OpLoc, SourceLocation LabLoc, LabelDecl *TheDecl) { TheDecl->markUsed(Context); // Create the AST node. The address of a label always has type 'void*'. return new (Context) AddrLabelExpr(OpLoc, LabLoc, TheDecl, Context.getPointerType(Context.VoidTy)); } /// Given the last statement in a statement-expression, check whether /// the result is a producing expression (like a call to an /// ns_returns_retained function) and, if so, rebuild it to hoist the /// release out of the full-expression. Otherwise, return null. /// Cannot fail. static Expr *maybeRebuildARCConsumingStmt(Stmt *Statement) { // Should always be wrapped with one of these. ExprWithCleanups *cleanups = dyn_cast(Statement); if (!cleanups) return nullptr; ImplicitCastExpr *cast = dyn_cast(cleanups->getSubExpr()); if (!cast || cast->getCastKind() != CK_ARCConsumeObject) return nullptr; // Splice out the cast. This shouldn't modify any interesting // features of the statement. Expr *producer = cast->getSubExpr(); assert(producer->getType() == cast->getType()); assert(producer->getValueKind() == cast->getValueKind()); cleanups->setSubExpr(producer); return cleanups; } void Sema::ActOnStartStmtExpr() { PushExpressionEvaluationContext(ExprEvalContexts.back().Context); } void Sema::ActOnStmtExprError() { // Note that function is also called by TreeTransform when leaving a // StmtExpr scope without rebuilding anything. DiscardCleanupsInEvaluationContext(); PopExpressionEvaluationContext(); } ExprResult Sema::ActOnStmtExpr(SourceLocation LPLoc, Stmt *SubStmt, SourceLocation RPLoc) { // "({..})" assert(SubStmt && isa(SubStmt) && "Invalid action invocation!"); CompoundStmt *Compound = cast(SubStmt); if (hasAnyUnrecoverableErrorsInThisFunction()) DiscardCleanupsInEvaluationContext(); assert(!Cleanup.exprNeedsCleanups() && "cleanups within StmtExpr not correctly bound!"); PopExpressionEvaluationContext(); // FIXME: there are a variety of strange constraints to enforce here, for // example, it is not possible to goto into a stmt expression apparently. // More semantic analysis is needed. // If there are sub-stmts in the compound stmt, take the type of the last one // as the type of the stmtexpr. QualType Ty = Context.VoidTy; bool StmtExprMayBindToTemp = false; if (!Compound->body_empty()) { Stmt *LastStmt = Compound->body_back(); LabelStmt *LastLabelStmt = nullptr; // If LastStmt is a label, skip down through into the body. while (LabelStmt *Label = dyn_cast(LastStmt)) { LastLabelStmt = Label; LastStmt = Label->getSubStmt(); } if (Expr *LastE = dyn_cast(LastStmt)) { // Do function/array conversion on the last expression, but not // lvalue-to-rvalue. However, initialize an unqualified type. ExprResult LastExpr = DefaultFunctionArrayConversion(LastE); if (LastExpr.isInvalid()) return ExprError(); Ty = LastExpr.get()->getType().getUnqualifiedType(); if (!Ty->isDependentType() && !LastExpr.get()->isTypeDependent()) { // In ARC, if the final expression ends in a consume, splice // the consume out and bind it later. In the alternate case // (when dealing with a retainable type), the result // initialization will create a produce. In both cases the // result will be +1, and we'll need to balance that out with // a bind. if (Expr *rebuiltLastStmt = maybeRebuildARCConsumingStmt(LastExpr.get())) { LastExpr = rebuiltLastStmt; } else { LastExpr = PerformCopyInitialization( InitializedEntity::InitializeStmtExprResult(LPLoc, Ty), SourceLocation(), LastExpr); } if (LastExpr.isInvalid()) return ExprError(); if (LastExpr.get() != nullptr) { if (!LastLabelStmt) Compound->setLastStmt(LastExpr.get()); else LastLabelStmt->setSubStmt(LastExpr.get()); StmtExprMayBindToTemp = true; } } } } // FIXME: Check that expression type is complete/non-abstract; statement // expressions are not lvalues. Expr *ResStmtExpr = new (Context) StmtExpr(Compound, Ty, LPLoc, RPLoc); if (StmtExprMayBindToTemp) return MaybeBindToTemporary(ResStmtExpr); return ResStmtExpr; } ExprResult Sema::BuildBuiltinOffsetOf(SourceLocation BuiltinLoc, TypeSourceInfo *TInfo, ArrayRef Components, SourceLocation RParenLoc) { QualType ArgTy = TInfo->getType(); bool Dependent = ArgTy->isDependentType(); SourceRange TypeRange = TInfo->getTypeLoc().getLocalSourceRange(); // We must have at least one component that refers to the type, and the first // one is known to be a field designator. Verify that the ArgTy represents // a struct/union/class. if (!Dependent && !ArgTy->isRecordType()) return ExprError(Diag(BuiltinLoc, diag::err_offsetof_record_type) << ArgTy << TypeRange); // Type must be complete per C99 7.17p3 because a declaring a variable // with an incomplete type would be ill-formed. if (!Dependent && RequireCompleteType(BuiltinLoc, ArgTy, diag::err_offsetof_incomplete_type, TypeRange)) return ExprError(); bool DidWarnAboutNonPOD = false; QualType CurrentType = ArgTy; SmallVector Comps; SmallVector Exprs; for (const OffsetOfComponent &OC : Components) { if (OC.isBrackets) { // Offset of an array sub-field. TODO: Should we allow vector elements? if (!CurrentType->isDependentType()) { const ArrayType *AT = Context.getAsArrayType(CurrentType); if(!AT) return ExprError(Diag(OC.LocEnd, diag::err_offsetof_array_type) << CurrentType); CurrentType = AT->getElementType(); } else CurrentType = Context.DependentTy; ExprResult IdxRval = DefaultLvalueConversion(static_cast(OC.U.E)); if (IdxRval.isInvalid()) return ExprError(); Expr *Idx = IdxRval.get(); // The expression must be an integral expression. // FIXME: An integral constant expression? if (!Idx->isTypeDependent() && !Idx->isValueDependent() && !Idx->getType()->isIntegerType()) return ExprError( Diag(Idx->getBeginLoc(), diag::err_typecheck_subscript_not_integer) << Idx->getSourceRange()); // Record this array index. Comps.push_back(OffsetOfNode(OC.LocStart, Exprs.size(), OC.LocEnd)); Exprs.push_back(Idx); continue; } // Offset of a field. if (CurrentType->isDependentType()) { // We have the offset of a field, but we can't look into the dependent // type. Just record the identifier of the field. Comps.push_back(OffsetOfNode(OC.LocStart, OC.U.IdentInfo, OC.LocEnd)); CurrentType = Context.DependentTy; continue; } // We need to have a complete type to look into. if (RequireCompleteType(OC.LocStart, CurrentType, diag::err_offsetof_incomplete_type)) return ExprError(); // Look for the designated field. const RecordType *RC = CurrentType->getAs(); if (!RC) return ExprError(Diag(OC.LocEnd, diag::err_offsetof_record_type) << CurrentType); RecordDecl *RD = RC->getDecl(); // C++ [lib.support.types]p5: // The macro offsetof accepts a restricted set of type arguments in this // International Standard. type shall be a POD structure or a POD union // (clause 9). // C++11 [support.types]p4: // If type is not a standard-layout class (Clause 9), the results are // undefined. if (CXXRecordDecl *CRD = dyn_cast(RD)) { bool IsSafe = LangOpts.CPlusPlus11? CRD->isStandardLayout() : CRD->isPOD(); unsigned DiagID = LangOpts.CPlusPlus11? diag::ext_offsetof_non_standardlayout_type : diag::ext_offsetof_non_pod_type; if (!IsSafe && !DidWarnAboutNonPOD && DiagRuntimeBehavior(BuiltinLoc, nullptr, PDiag(DiagID) << SourceRange(Components[0].LocStart, OC.LocEnd) << CurrentType)) DidWarnAboutNonPOD = true; } // Look for the field. LookupResult R(*this, OC.U.IdentInfo, OC.LocStart, LookupMemberName); LookupQualifiedName(R, RD); FieldDecl *MemberDecl = R.getAsSingle(); IndirectFieldDecl *IndirectMemberDecl = nullptr; if (!MemberDecl) { if ((IndirectMemberDecl = R.getAsSingle())) MemberDecl = IndirectMemberDecl->getAnonField(); } if (!MemberDecl) return ExprError(Diag(BuiltinLoc, diag::err_no_member) << OC.U.IdentInfo << RD << SourceRange(OC.LocStart, OC.LocEnd)); // C99 7.17p3: // (If the specified member is a bit-field, the behavior is undefined.) // // We diagnose this as an error. if (MemberDecl->isBitField()) { Diag(OC.LocEnd, diag::err_offsetof_bitfield) << MemberDecl->getDeclName() << SourceRange(BuiltinLoc, RParenLoc); Diag(MemberDecl->getLocation(), diag::note_bitfield_decl); return ExprError(); } RecordDecl *Parent = MemberDecl->getParent(); if (IndirectMemberDecl) Parent = cast(IndirectMemberDecl->getDeclContext()); // If the member was found in a base class, introduce OffsetOfNodes for // the base class indirections. CXXBasePaths Paths; if (IsDerivedFrom(OC.LocStart, CurrentType, Context.getTypeDeclType(Parent), Paths)) { if (Paths.getDetectedVirtual()) { Diag(OC.LocEnd, diag::err_offsetof_field_of_virtual_base) << MemberDecl->getDeclName() << SourceRange(BuiltinLoc, RParenLoc); return ExprError(); } CXXBasePath &Path = Paths.front(); for (const CXXBasePathElement &B : Path) Comps.push_back(OffsetOfNode(B.Base)); } if (IndirectMemberDecl) { for (auto *FI : IndirectMemberDecl->chain()) { assert(isa(FI)); Comps.push_back(OffsetOfNode(OC.LocStart, cast(FI), OC.LocEnd)); } } else Comps.push_back(OffsetOfNode(OC.LocStart, MemberDecl, OC.LocEnd)); CurrentType = MemberDecl->getType().getNonReferenceType(); } return OffsetOfExpr::Create(Context, Context.getSizeType(), BuiltinLoc, TInfo, Comps, Exprs, RParenLoc); } ExprResult Sema::ActOnBuiltinOffsetOf(Scope *S, SourceLocation BuiltinLoc, SourceLocation TypeLoc, ParsedType ParsedArgTy, ArrayRef Components, SourceLocation RParenLoc) { TypeSourceInfo *ArgTInfo; QualType ArgTy = GetTypeFromParser(ParsedArgTy, &ArgTInfo); if (ArgTy.isNull()) return ExprError(); if (!ArgTInfo) ArgTInfo = Context.getTrivialTypeSourceInfo(ArgTy, TypeLoc); return BuildBuiltinOffsetOf(BuiltinLoc, ArgTInfo, Components, RParenLoc); } ExprResult Sema::ActOnChooseExpr(SourceLocation BuiltinLoc, Expr *CondExpr, Expr *LHSExpr, Expr *RHSExpr, SourceLocation RPLoc) { assert((CondExpr && LHSExpr && RHSExpr) && "Missing type argument(s)"); ExprValueKind VK = VK_RValue; ExprObjectKind OK = OK_Ordinary; QualType resType; bool ValueDependent = false; bool CondIsTrue = false; if (CondExpr->isTypeDependent() || CondExpr->isValueDependent()) { resType = Context.DependentTy; ValueDependent = true; } else { // The conditional expression is required to be a constant expression. llvm::APSInt condEval(32); ExprResult CondICE = VerifyIntegerConstantExpression(CondExpr, &condEval, diag::err_typecheck_choose_expr_requires_constant, false); if (CondICE.isInvalid()) return ExprError(); CondExpr = CondICE.get(); CondIsTrue = condEval.getZExtValue(); // If the condition is > zero, then the AST type is the same as the LHSExpr. Expr *ActiveExpr = CondIsTrue ? LHSExpr : RHSExpr; resType = ActiveExpr->getType(); ValueDependent = ActiveExpr->isValueDependent(); VK = ActiveExpr->getValueKind(); OK = ActiveExpr->getObjectKind(); } return new (Context) ChooseExpr(BuiltinLoc, CondExpr, LHSExpr, RHSExpr, resType, VK, OK, RPLoc, CondIsTrue, resType->isDependentType(), ValueDependent); } //===----------------------------------------------------------------------===// // Clang Extensions. //===----------------------------------------------------------------------===// /// ActOnBlockStart - This callback is invoked when a block literal is started. void Sema::ActOnBlockStart(SourceLocation CaretLoc, Scope *CurScope) { BlockDecl *Block = BlockDecl::Create(Context, CurContext, CaretLoc); if (LangOpts.CPlusPlus) { Decl *ManglingContextDecl; if (MangleNumberingContext *MCtx = getCurrentMangleNumberContext(Block->getDeclContext(), ManglingContextDecl)) { unsigned ManglingNumber = MCtx->getManglingNumber(Block); Block->setBlockMangling(ManglingNumber, ManglingContextDecl); } } PushBlockScope(CurScope, Block); CurContext->addDecl(Block); if (CurScope) PushDeclContext(CurScope, Block); else CurContext = Block; getCurBlock()->HasImplicitReturnType = true; // Enter a new evaluation context to insulate the block from any // cleanups from the enclosing full-expression. PushExpressionEvaluationContext( ExpressionEvaluationContext::PotentiallyEvaluated); } void Sema::ActOnBlockArguments(SourceLocation CaretLoc, Declarator &ParamInfo, Scope *CurScope) { assert(ParamInfo.getIdentifier() == nullptr && "block-id should have no identifier!"); assert(ParamInfo.getContext() == DeclaratorContext::BlockLiteralContext); BlockScopeInfo *CurBlock = getCurBlock(); TypeSourceInfo *Sig = GetTypeForDeclarator(ParamInfo, CurScope); QualType T = Sig->getType(); // FIXME: We should allow unexpanded parameter packs here, but that would, // in turn, make the block expression contain unexpanded parameter packs. if (DiagnoseUnexpandedParameterPack(CaretLoc, Sig, UPPC_Block)) { // Drop the parameters. FunctionProtoType::ExtProtoInfo EPI; EPI.HasTrailingReturn = false; EPI.TypeQuals.addConst(); T = Context.getFunctionType(Context.DependentTy, None, EPI); Sig = Context.getTrivialTypeSourceInfo(T); } // GetTypeForDeclarator always produces a function type for a block // literal signature. Furthermore, it is always a FunctionProtoType // unless the function was written with a typedef. assert(T->isFunctionType() && "GetTypeForDeclarator made a non-function block signature"); // Look for an explicit signature in that function type. FunctionProtoTypeLoc ExplicitSignature; if ((ExplicitSignature = Sig->getTypeLoc().getAsAdjusted())) { // Check whether that explicit signature was synthesized by // GetTypeForDeclarator. If so, don't save that as part of the // written signature. if (ExplicitSignature.getLocalRangeBegin() == ExplicitSignature.getLocalRangeEnd()) { // This would be much cheaper if we stored TypeLocs instead of // TypeSourceInfos. TypeLoc Result = ExplicitSignature.getReturnLoc(); unsigned Size = Result.getFullDataSize(); Sig = Context.CreateTypeSourceInfo(Result.getType(), Size); Sig->getTypeLoc().initializeFullCopy(Result, Size); ExplicitSignature = FunctionProtoTypeLoc(); } } CurBlock->TheDecl->setSignatureAsWritten(Sig); CurBlock->FunctionType = T; const FunctionType *Fn = T->getAs(); QualType RetTy = Fn->getReturnType(); bool isVariadic = (isa(Fn) && cast(Fn)->isVariadic()); CurBlock->TheDecl->setIsVariadic(isVariadic); // Context.DependentTy is used as a placeholder for a missing block // return type. TODO: what should we do with declarators like: // ^ * { ... } // If the answer is "apply template argument deduction".... if (RetTy != Context.DependentTy) { CurBlock->ReturnType = RetTy; CurBlock->TheDecl->setBlockMissingReturnType(false); CurBlock->HasImplicitReturnType = false; } // Push block parameters from the declarator if we had them. SmallVector Params; if (ExplicitSignature) { for (unsigned I = 0, E = ExplicitSignature.getNumParams(); I != E; ++I) { ParmVarDecl *Param = ExplicitSignature.getParam(I); if (Param->getIdentifier() == nullptr && !Param->isImplicit() && !Param->isInvalidDecl() && !getLangOpts().CPlusPlus) Diag(Param->getLocation(), diag::err_parameter_name_omitted); Params.push_back(Param); } // Fake up parameter variables if we have a typedef, like // ^ fntype { ... } } else if (const FunctionProtoType *Fn = T->getAs()) { for (const auto &I : Fn->param_types()) { ParmVarDecl *Param = BuildParmVarDeclForTypedef( CurBlock->TheDecl, ParamInfo.getBeginLoc(), I); Params.push_back(Param); } } // Set the parameters on the block decl. if (!Params.empty()) { CurBlock->TheDecl->setParams(Params); CheckParmsForFunctionDef(CurBlock->TheDecl->parameters(), /*CheckParameterNames=*/false); } // Finally we can process decl attributes. ProcessDeclAttributes(CurScope, CurBlock->TheDecl, ParamInfo); // Put the parameter variables in scope. for (auto AI : CurBlock->TheDecl->parameters()) { AI->setOwningFunction(CurBlock->TheDecl); // If this has an identifier, add it to the scope stack. if (AI->getIdentifier()) { CheckShadow(CurBlock->TheScope, AI); PushOnScopeChains(AI, CurBlock->TheScope); } } } /// ActOnBlockError - If there is an error parsing a block, this callback /// is invoked to pop the information about the block from the action impl. void Sema::ActOnBlockError(SourceLocation CaretLoc, Scope *CurScope) { // Leave the expression-evaluation context. DiscardCleanupsInEvaluationContext(); PopExpressionEvaluationContext(); // Pop off CurBlock, handle nested blocks. PopDeclContext(); PopFunctionScopeInfo(); } /// ActOnBlockStmtExpr - This is called when the body of a block statement /// literal was successfully completed. ^(int x){...} ExprResult Sema::ActOnBlockStmtExpr(SourceLocation CaretLoc, Stmt *Body, Scope *CurScope) { // If blocks are disabled, emit an error. if (!LangOpts.Blocks) Diag(CaretLoc, diag::err_blocks_disable) << LangOpts.OpenCL; // Leave the expression-evaluation context. if (hasAnyUnrecoverableErrorsInThisFunction()) DiscardCleanupsInEvaluationContext(); assert(!Cleanup.exprNeedsCleanups() && "cleanups within block not correctly bound!"); PopExpressionEvaluationContext(); BlockScopeInfo *BSI = cast(FunctionScopes.back()); BlockDecl *BD = BSI->TheDecl; if (BSI->HasImplicitReturnType) deduceClosureReturnType(*BSI); PopDeclContext(); QualType RetTy = Context.VoidTy; if (!BSI->ReturnType.isNull()) RetTy = BSI->ReturnType; bool NoReturn = BD->hasAttr(); QualType BlockTy; // Set the captured variables on the block. // FIXME: Share capture structure between BlockDecl and CapturingScopeInfo! SmallVector Captures; for (Capture &Cap : BSI->Captures) { if (Cap.isThisCapture()) continue; BlockDecl::Capture NewCap(Cap.getVariable(), Cap.isBlockCapture(), Cap.isNested(), Cap.getInitExpr()); Captures.push_back(NewCap); } BD->setCaptures(Context, Captures, BSI->CXXThisCaptureIndex != 0); // If the user wrote a function type in some form, try to use that. if (!BSI->FunctionType.isNull()) { const FunctionType *FTy = BSI->FunctionType->getAs(); FunctionType::ExtInfo Ext = FTy->getExtInfo(); if (NoReturn && !Ext.getNoReturn()) Ext = Ext.withNoReturn(true); // Turn protoless block types into nullary block types. if (isa(FTy)) { FunctionProtoType::ExtProtoInfo EPI; EPI.ExtInfo = Ext; BlockTy = Context.getFunctionType(RetTy, None, EPI); // Otherwise, if we don't need to change anything about the function type, // preserve its sugar structure. } else if (FTy->getReturnType() == RetTy && (!NoReturn || FTy->getNoReturnAttr())) { BlockTy = BSI->FunctionType; // Otherwise, make the minimal modifications to the function type. } else { const FunctionProtoType *FPT = cast(FTy); FunctionProtoType::ExtProtoInfo EPI = FPT->getExtProtoInfo(); EPI.TypeQuals = Qualifiers(); EPI.ExtInfo = Ext; BlockTy = Context.getFunctionType(RetTy, FPT->getParamTypes(), EPI); } // If we don't have a function type, just build one from nothing. } else { FunctionProtoType::ExtProtoInfo EPI; EPI.ExtInfo = FunctionType::ExtInfo().withNoReturn(NoReturn); BlockTy = Context.getFunctionType(RetTy, None, EPI); } DiagnoseUnusedParameters(BD->parameters()); BlockTy = Context.getBlockPointerType(BlockTy); // If needed, diagnose invalid gotos and switches in the block. if (getCurFunction()->NeedsScopeChecking() && !PP.isCodeCompletionEnabled()) DiagnoseInvalidJumps(cast(Body)); BD->setBody(cast(Body)); if (Body && getCurFunction()->HasPotentialAvailabilityViolations) DiagnoseUnguardedAvailabilityViolations(BD); // Try to apply the named return value optimization. We have to check again // if we can do this, though, because blocks keep return statements around // to deduce an implicit return type. if (getLangOpts().CPlusPlus && RetTy->isRecordType() && !BD->isDependentContext()) computeNRVO(Body, BSI); BlockExpr *Result = new (Context) BlockExpr(BD, BlockTy); AnalysisBasedWarnings::Policy WP = AnalysisWarnings.getDefaultPolicy(); PopFunctionScopeInfo(&WP, Result->getBlockDecl(), Result); // If the block isn't obviously global, i.e. it captures anything at // all, then we need to do a few things in the surrounding context: if (Result->getBlockDecl()->hasCaptures()) { // First, this expression has a new cleanup object. ExprCleanupObjects.push_back(Result->getBlockDecl()); Cleanup.setExprNeedsCleanups(true); // It also gets a branch-protected scope if any of the captured // variables needs destruction. for (const auto &CI : Result->getBlockDecl()->captures()) { const VarDecl *var = CI.getVariable(); if (var->getType().isDestructedType() != QualType::DK_none) { setFunctionHasBranchProtectedScope(); break; } } } if (getCurFunction()) getCurFunction()->addBlock(BD); return Result; } ExprResult Sema::ActOnVAArg(SourceLocation BuiltinLoc, Expr *E, ParsedType Ty, SourceLocation RPLoc) { TypeSourceInfo *TInfo; GetTypeFromParser(Ty, &TInfo); return BuildVAArgExpr(BuiltinLoc, E, TInfo, RPLoc); } ExprResult Sema::BuildVAArgExpr(SourceLocation BuiltinLoc, Expr *E, TypeSourceInfo *TInfo, SourceLocation RPLoc) { Expr *OrigExpr = E; bool IsMS = false; // CUDA device code does not support varargs. if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { if (const FunctionDecl *F = dyn_cast(CurContext)) { CUDAFunctionTarget T = IdentifyCUDATarget(F); if (T == CFT_Global || T == CFT_Device || T == CFT_HostDevice) return ExprError(Diag(E->getBeginLoc(), diag::err_va_arg_in_device)); } } // It might be a __builtin_ms_va_list. (But don't ever mark a va_arg() // as Microsoft ABI on an actual Microsoft platform, where // __builtin_ms_va_list and __builtin_va_list are the same.) if (!E->isTypeDependent() && Context.getTargetInfo().hasBuiltinMSVaList() && Context.getTargetInfo().getBuiltinVaListKind() != TargetInfo::CharPtrBuiltinVaList) { QualType MSVaListType = Context.getBuiltinMSVaListType(); if (Context.hasSameType(MSVaListType, E->getType())) { if (CheckForModifiableLvalue(E, BuiltinLoc, *this)) return ExprError(); IsMS = true; } } // Get the va_list type QualType VaListType = Context.getBuiltinVaListType(); if (!IsMS) { if (VaListType->isArrayType()) { // Deal with implicit array decay; for example, on x86-64, // va_list is an array, but it's supposed to decay to // a pointer for va_arg. VaListType = Context.getArrayDecayedType(VaListType); // Make sure the input expression also decays appropriately. ExprResult Result = UsualUnaryConversions(E); if (Result.isInvalid()) return ExprError(); E = Result.get(); } else if (VaListType->isRecordType() && getLangOpts().CPlusPlus) { // If va_list is a record type and we are compiling in C++ mode, // check the argument using reference binding. InitializedEntity Entity = InitializedEntity::InitializeParameter( Context, Context.getLValueReferenceType(VaListType), false); ExprResult Init = PerformCopyInitialization(Entity, SourceLocation(), E); if (Init.isInvalid()) return ExprError(); E = Init.getAs(); } else { // Otherwise, the va_list argument must be an l-value because // it is modified by va_arg. if (!E->isTypeDependent() && CheckForModifiableLvalue(E, BuiltinLoc, *this)) return ExprError(); } } if (!IsMS && !E->isTypeDependent() && !Context.hasSameType(VaListType, E->getType())) return ExprError( Diag(E->getBeginLoc(), diag::err_first_argument_to_va_arg_not_of_type_va_list) << OrigExpr->getType() << E->getSourceRange()); if (!TInfo->getType()->isDependentType()) { if (RequireCompleteType(TInfo->getTypeLoc().getBeginLoc(), TInfo->getType(), diag::err_second_parameter_to_va_arg_incomplete, TInfo->getTypeLoc())) return ExprError(); if (RequireNonAbstractType(TInfo->getTypeLoc().getBeginLoc(), TInfo->getType(), diag::err_second_parameter_to_va_arg_abstract, TInfo->getTypeLoc())) return ExprError(); if (!TInfo->getType().isPODType(Context)) { Diag(TInfo->getTypeLoc().getBeginLoc(), TInfo->getType()->isObjCLifetimeType() ? diag::warn_second_parameter_to_va_arg_ownership_qualified : diag::warn_second_parameter_to_va_arg_not_pod) << TInfo->getType() << TInfo->getTypeLoc().getSourceRange(); } // Check for va_arg where arguments of the given type will be promoted // (i.e. this va_arg is guaranteed to have undefined behavior). QualType PromoteType; if (TInfo->getType()->isPromotableIntegerType()) { PromoteType = Context.getPromotedIntegerType(TInfo->getType()); if (Context.typesAreCompatible(PromoteType, TInfo->getType())) PromoteType = QualType(); } if (TInfo->getType()->isSpecificBuiltinType(BuiltinType::Float)) PromoteType = Context.DoubleTy; if (!PromoteType.isNull()) DiagRuntimeBehavior(TInfo->getTypeLoc().getBeginLoc(), E, PDiag(diag::warn_second_parameter_to_va_arg_never_compatible) << TInfo->getType() << PromoteType << TInfo->getTypeLoc().getSourceRange()); } QualType T = TInfo->getType().getNonLValueExprType(Context); return new (Context) VAArgExpr(BuiltinLoc, E, TInfo, RPLoc, T, IsMS); } ExprResult Sema::ActOnGNUNullExpr(SourceLocation TokenLoc) { // The type of __null will be int or long, depending on the size of // pointers on the target. QualType Ty; unsigned pw = Context.getTargetInfo().getPointerWidth(0); if (pw == Context.getTargetInfo().getIntWidth()) Ty = Context.IntTy; else if (pw == Context.getTargetInfo().getLongWidth()) Ty = Context.LongTy; else if (pw == Context.getTargetInfo().getLongLongWidth()) Ty = Context.LongLongTy; else { llvm_unreachable("I don't know size of pointer!"); } return new (Context) GNUNullExpr(Ty, TokenLoc); } bool Sema::ConversionToObjCStringLiteralCheck(QualType DstType, Expr *&Exp, bool Diagnose) { if (!getLangOpts().ObjC) return false; const ObjCObjectPointerType *PT = DstType->getAs(); if (!PT) return false; if (!PT->isObjCIdType()) { // Check if the destination is the 'NSString' interface. const ObjCInterfaceDecl *ID = PT->getInterfaceDecl(); if (!ID || !ID->getIdentifier()->isStr("NSString")) return false; } // Ignore any parens, implicit casts (should only be // array-to-pointer decays), and not-so-opaque values. The last is // important for making this trigger for property assignments. Expr *SrcExpr = Exp->IgnoreParenImpCasts(); if (OpaqueValueExpr *OV = dyn_cast(SrcExpr)) if (OV->getSourceExpr()) SrcExpr = OV->getSourceExpr()->IgnoreParenImpCasts(); StringLiteral *SL = dyn_cast(SrcExpr); if (!SL || !SL->isAscii()) return false; if (Diagnose) { Diag(SL->getBeginLoc(), diag::err_missing_atsign_prefix) << FixItHint::CreateInsertion(SL->getBeginLoc(), "@"); Exp = BuildObjCStringLiteral(SL->getBeginLoc(), SL).get(); } return true; } static bool maybeDiagnoseAssignmentToFunction(Sema &S, QualType DstType, const Expr *SrcExpr) { if (!DstType->isFunctionPointerType() || !SrcExpr->getType()->isFunctionType()) return false; auto *DRE = dyn_cast(SrcExpr->IgnoreParenImpCasts()); if (!DRE) return false; auto *FD = dyn_cast(DRE->getDecl()); if (!FD) return false; return !S.checkAddressOfFunctionIsAvailable(FD, /*Complain=*/true, SrcExpr->getBeginLoc()); } bool Sema::DiagnoseAssignmentResult(AssignConvertType ConvTy, SourceLocation Loc, QualType DstType, QualType SrcType, Expr *SrcExpr, AssignmentAction Action, bool *Complained) { if (Complained) *Complained = false; // Decode the result (notice that AST's are still created for extensions). bool CheckInferredResultType = false; bool isInvalid = false; unsigned DiagKind = 0; FixItHint Hint; ConversionFixItGenerator ConvHints; bool MayHaveConvFixit = false; bool MayHaveFunctionDiff = false; const ObjCInterfaceDecl *IFace = nullptr; const ObjCProtocolDecl *PDecl = nullptr; switch (ConvTy) { case Compatible: DiagnoseAssignmentEnum(DstType, SrcType, SrcExpr); return false; case PointerToInt: DiagKind = diag::ext_typecheck_convert_pointer_int; ConvHints.tryToFixConversion(SrcExpr, SrcType, DstType, *this); MayHaveConvFixit = true; break; case IntToPointer: DiagKind = diag::ext_typecheck_convert_int_pointer; ConvHints.tryToFixConversion(SrcExpr, SrcType, DstType, *this); MayHaveConvFixit = true; break; case IncompatiblePointer: if (Action == AA_Passing_CFAudited) DiagKind = diag::err_arc_typecheck_convert_incompatible_pointer; else if (SrcType->isFunctionPointerType() && DstType->isFunctionPointerType()) DiagKind = diag::ext_typecheck_convert_incompatible_function_pointer; else DiagKind = diag::ext_typecheck_convert_incompatible_pointer; CheckInferredResultType = DstType->isObjCObjectPointerType() && SrcType->isObjCObjectPointerType(); if (Hint.isNull() && !CheckInferredResultType) { ConvHints.tryToFixConversion(SrcExpr, SrcType, DstType, *this); } else if (CheckInferredResultType) { SrcType = SrcType.getUnqualifiedType(); DstType = DstType.getUnqualifiedType(); } MayHaveConvFixit = true; break; case IncompatiblePointerSign: DiagKind = diag::ext_typecheck_convert_incompatible_pointer_sign; break; case FunctionVoidPointer: DiagKind = diag::ext_typecheck_convert_pointer_void_func; break; case IncompatiblePointerDiscardsQualifiers: { // Perform array-to-pointer decay if necessary. if (SrcType->isArrayType()) SrcType = Context.getArrayDecayedType(SrcType); Qualifiers lhq = SrcType->getPointeeType().getQualifiers(); Qualifiers rhq = DstType->getPointeeType().getQualifiers(); if (lhq.getAddressSpace() != rhq.getAddressSpace()) { DiagKind = diag::err_typecheck_incompatible_address_space; break; } else if (lhq.getObjCLifetime() != rhq.getObjCLifetime()) { DiagKind = diag::err_typecheck_incompatible_ownership; break; } llvm_unreachable("unknown error case for discarding qualifiers!"); // fallthrough } case CompatiblePointerDiscardsQualifiers: // If the qualifiers lost were because we were applying the // (deprecated) C++ conversion from a string literal to a char* // (or wchar_t*), then there was no error (C++ 4.2p2). FIXME: // Ideally, this check would be performed in // checkPointerTypesForAssignment. However, that would require a // bit of refactoring (so that the second argument is an // expression, rather than a type), which should be done as part // of a larger effort to fix checkPointerTypesForAssignment for // C++ semantics. if (getLangOpts().CPlusPlus && IsStringLiteralToNonConstPointerConversion(SrcExpr, DstType)) return false; DiagKind = diag::ext_typecheck_convert_discards_qualifiers; break; case IncompatibleNestedPointerQualifiers: DiagKind = diag::ext_nested_pointer_qualifier_mismatch; break; case IntToBlockPointer: DiagKind = diag::err_int_to_block_pointer; break; case IncompatibleBlockPointer: DiagKind = diag::err_typecheck_convert_incompatible_block_pointer; break; case IncompatibleObjCQualifiedId: { if (SrcType->isObjCQualifiedIdType()) { const ObjCObjectPointerType *srcOPT = SrcType->getAs(); for (auto *srcProto : srcOPT->quals()) { PDecl = srcProto; break; } if (const ObjCInterfaceType *IFaceT = DstType->getAs()->getInterfaceType()) IFace = IFaceT->getDecl(); } else if (DstType->isObjCQualifiedIdType()) { const ObjCObjectPointerType *dstOPT = DstType->getAs(); for (auto *dstProto : dstOPT->quals()) { PDecl = dstProto; break; } if (const ObjCInterfaceType *IFaceT = SrcType->getAs()->getInterfaceType()) IFace = IFaceT->getDecl(); } DiagKind = diag::warn_incompatible_qualified_id; break; } case IncompatibleVectors: DiagKind = diag::warn_incompatible_vectors; break; case IncompatibleObjCWeakRef: DiagKind = diag::err_arc_weak_unavailable_assign; break; case Incompatible: if (maybeDiagnoseAssignmentToFunction(*this, DstType, SrcExpr)) { if (Complained) *Complained = true; return true; } DiagKind = diag::err_typecheck_convert_incompatible; ConvHints.tryToFixConversion(SrcExpr, SrcType, DstType, *this); MayHaveConvFixit = true; isInvalid = true; MayHaveFunctionDiff = true; break; } QualType FirstType, SecondType; switch (Action) { case AA_Assigning: case AA_Initializing: // The destination type comes first. FirstType = DstType; SecondType = SrcType; break; case AA_Returning: case AA_Passing: case AA_Passing_CFAudited: case AA_Converting: case AA_Sending: case AA_Casting: // The source type comes first. FirstType = SrcType; SecondType = DstType; break; } PartialDiagnostic FDiag = PDiag(DiagKind); if (Action == AA_Passing_CFAudited) FDiag << FirstType << SecondType << AA_Passing << SrcExpr->getSourceRange(); else FDiag << FirstType << SecondType << Action << SrcExpr->getSourceRange(); // If we can fix the conversion, suggest the FixIts. assert(ConvHints.isNull() || Hint.isNull()); if (!ConvHints.isNull()) { for (FixItHint &H : ConvHints.Hints) FDiag << H; } else { FDiag << Hint; } if (MayHaveConvFixit) { FDiag << (unsigned) (ConvHints.Kind); } if (MayHaveFunctionDiff) HandleFunctionTypeMismatch(FDiag, SecondType, FirstType); Diag(Loc, FDiag); if (DiagKind == diag::warn_incompatible_qualified_id && PDecl && IFace && !IFace->hasDefinition()) Diag(IFace->getLocation(), diag::note_incomplete_class_and_qualified_id) << IFace << PDecl; if (SecondType == Context.OverloadTy) NoteAllOverloadCandidates(OverloadExpr::find(SrcExpr).Expression, FirstType, /*TakingAddress=*/true); if (CheckInferredResultType) EmitRelatedResultTypeNote(SrcExpr); if (Action == AA_Returning && ConvTy == IncompatiblePointer) EmitRelatedResultTypeNoteForReturn(DstType); if (Complained) *Complained = true; return isInvalid; } ExprResult Sema::VerifyIntegerConstantExpression(Expr *E, llvm::APSInt *Result) { class SimpleICEDiagnoser : public VerifyICEDiagnoser { public: void diagnoseNotICE(Sema &S, SourceLocation Loc, SourceRange SR) override { S.Diag(Loc, diag::err_expr_not_ice) << S.LangOpts.CPlusPlus << SR; } } Diagnoser; return VerifyIntegerConstantExpression(E, Result, Diagnoser); } ExprResult Sema::VerifyIntegerConstantExpression(Expr *E, llvm::APSInt *Result, unsigned DiagID, bool AllowFold) { class IDDiagnoser : public VerifyICEDiagnoser { unsigned DiagID; public: IDDiagnoser(unsigned DiagID) : VerifyICEDiagnoser(DiagID == 0), DiagID(DiagID) { } void diagnoseNotICE(Sema &S, SourceLocation Loc, SourceRange SR) override { S.Diag(Loc, DiagID) << SR; } } Diagnoser(DiagID); return VerifyIntegerConstantExpression(E, Result, Diagnoser, AllowFold); } void Sema::VerifyICEDiagnoser::diagnoseFold(Sema &S, SourceLocation Loc, SourceRange SR) { S.Diag(Loc, diag::ext_expr_not_ice) << SR << S.LangOpts.CPlusPlus; } ExprResult Sema::VerifyIntegerConstantExpression(Expr *E, llvm::APSInt *Result, VerifyICEDiagnoser &Diagnoser, bool AllowFold) { SourceLocation DiagLoc = E->getBeginLoc(); if (getLangOpts().CPlusPlus11) { // C++11 [expr.const]p5: // If an expression of literal class type is used in a context where an // integral constant expression is required, then that class type shall // have a single non-explicit conversion function to an integral or // unscoped enumeration type ExprResult Converted; class CXX11ConvertDiagnoser : public ICEConvertDiagnoser { public: CXX11ConvertDiagnoser(bool Silent) : ICEConvertDiagnoser(/*AllowScopedEnumerations*/false, Silent, true) {} SemaDiagnosticBuilder diagnoseNotInt(Sema &S, SourceLocation Loc, QualType T) override { return S.Diag(Loc, diag::err_ice_not_integral) << T; } SemaDiagnosticBuilder diagnoseIncomplete( Sema &S, SourceLocation Loc, QualType T) override { return S.Diag(Loc, diag::err_ice_incomplete_type) << T; } SemaDiagnosticBuilder diagnoseExplicitConv( Sema &S, SourceLocation Loc, QualType T, QualType ConvTy) override { return S.Diag(Loc, diag::err_ice_explicit_conversion) << T << ConvTy; } SemaDiagnosticBuilder noteExplicitConv( Sema &S, CXXConversionDecl *Conv, QualType ConvTy) override { return S.Diag(Conv->getLocation(), diag::note_ice_conversion_here) << ConvTy->isEnumeralType() << ConvTy; } SemaDiagnosticBuilder diagnoseAmbiguous( Sema &S, SourceLocation Loc, QualType T) override { return S.Diag(Loc, diag::err_ice_ambiguous_conversion) << T; } SemaDiagnosticBuilder noteAmbiguous( Sema &S, CXXConversionDecl *Conv, QualType ConvTy) override { return S.Diag(Conv->getLocation(), diag::note_ice_conversion_here) << ConvTy->isEnumeralType() << ConvTy; } SemaDiagnosticBuilder diagnoseConversion( Sema &S, SourceLocation Loc, QualType T, QualType ConvTy) override { llvm_unreachable("conversion functions are permitted"); } } ConvertDiagnoser(Diagnoser.Suppress); Converted = PerformContextualImplicitConversion(DiagLoc, E, ConvertDiagnoser); if (Converted.isInvalid()) return Converted; E = Converted.get(); if (!E->getType()->isIntegralOrUnscopedEnumerationType()) return ExprError(); } else if (!E->getType()->isIntegralOrUnscopedEnumerationType()) { // An ICE must be of integral or unscoped enumeration type. if (!Diagnoser.Suppress) Diagnoser.diagnoseNotICE(*this, DiagLoc, E->getSourceRange()); return ExprError(); } if (!isa(E)) E = ConstantExpr::Create(Context, E); // Circumvent ICE checking in C++11 to avoid evaluating the expression twice // in the non-ICE case. if (!getLangOpts().CPlusPlus11 && E->isIntegerConstantExpr(Context)) { if (Result) *Result = E->EvaluateKnownConstIntCheckOverflow(Context); return E; } Expr::EvalResult EvalResult; SmallVector Notes; EvalResult.Diag = &Notes; // Try to evaluate the expression, and produce diagnostics explaining why it's // not a constant expression as a side-effect. bool Folded = E->EvaluateAsRValue(EvalResult, Context) && EvalResult.Val.isInt() && !EvalResult.HasSideEffects; // In C++11, we can rely on diagnostics being produced for any expression // which is not a constant expression. If no diagnostics were produced, then // this is a constant expression. if (Folded && getLangOpts().CPlusPlus11 && Notes.empty()) { if (Result) *Result = EvalResult.Val.getInt(); return E; } // If our only note is the usual "invalid subexpression" note, just point // the caret at its location rather than producing an essentially // redundant note. if (Notes.size() == 1 && Notes[0].second.getDiagID() == diag::note_invalid_subexpr_in_const_expr) { DiagLoc = Notes[0].first; Notes.clear(); } if (!Folded || !AllowFold) { if (!Diagnoser.Suppress) { Diagnoser.diagnoseNotICE(*this, DiagLoc, E->getSourceRange()); for (const PartialDiagnosticAt &Note : Notes) Diag(Note.first, Note.second); } return ExprError(); } Diagnoser.diagnoseFold(*this, DiagLoc, E->getSourceRange()); for (const PartialDiagnosticAt &Note : Notes) Diag(Note.first, Note.second); if (Result) *Result = EvalResult.Val.getInt(); return E; } namespace { // Handle the case where we conclude a expression which we speculatively // considered to be unevaluated is actually evaluated. class TransformToPE : public TreeTransform { typedef TreeTransform BaseTransform; public: TransformToPE(Sema &SemaRef) : BaseTransform(SemaRef) { } // Make sure we redo semantic analysis bool AlwaysRebuild() { return true; } // Make sure we handle LabelStmts correctly. // FIXME: This does the right thing, but maybe we need a more general // fix to TreeTransform? StmtResult TransformLabelStmt(LabelStmt *S) { S->getDecl()->setStmt(nullptr); return BaseTransform::TransformLabelStmt(S); } // We need to special-case DeclRefExprs referring to FieldDecls which // are not part of a member pointer formation; normal TreeTransforming // doesn't catch this case because of the way we represent them in the AST. // FIXME: This is a bit ugly; is it really the best way to handle this // case? // // Error on DeclRefExprs referring to FieldDecls. ExprResult TransformDeclRefExpr(DeclRefExpr *E) { if (isa(E->getDecl()) && !SemaRef.isUnevaluatedContext()) return SemaRef.Diag(E->getLocation(), diag::err_invalid_non_static_member_use) << E->getDecl() << E->getSourceRange(); return BaseTransform::TransformDeclRefExpr(E); } // Exception: filter out member pointer formation ExprResult TransformUnaryOperator(UnaryOperator *E) { if (E->getOpcode() == UO_AddrOf && E->getType()->isMemberPointerType()) return E; return BaseTransform::TransformUnaryOperator(E); } ExprResult TransformLambdaExpr(LambdaExpr *E) { // Lambdas never need to be transformed. return E; } }; } ExprResult Sema::TransformToPotentiallyEvaluated(Expr *E) { assert(isUnevaluatedContext() && "Should only transform unevaluated expressions"); ExprEvalContexts.back().Context = ExprEvalContexts[ExprEvalContexts.size()-2].Context; if (isUnevaluatedContext()) return E; return TransformToPE(*this).TransformExpr(E); } void Sema::PushExpressionEvaluationContext( ExpressionEvaluationContext NewContext, Decl *LambdaContextDecl, ExpressionEvaluationContextRecord::ExpressionKind ExprContext) { ExprEvalContexts.emplace_back(NewContext, ExprCleanupObjects.size(), Cleanup, LambdaContextDecl, ExprContext); Cleanup.reset(); if (!MaybeODRUseExprs.empty()) std::swap(MaybeODRUseExprs, ExprEvalContexts.back().SavedMaybeODRUseExprs); } void Sema::PushExpressionEvaluationContext( ExpressionEvaluationContext NewContext, ReuseLambdaContextDecl_t, ExpressionEvaluationContextRecord::ExpressionKind ExprContext) { Decl *ClosureContextDecl = ExprEvalContexts.back().ManglingContextDecl; PushExpressionEvaluationContext(NewContext, ClosureContextDecl, ExprContext); } namespace { const DeclRefExpr *CheckPossibleDeref(Sema &S, const Expr *PossibleDeref) { PossibleDeref = PossibleDeref->IgnoreParenImpCasts(); if (const auto *E = dyn_cast(PossibleDeref)) { if (E->getOpcode() == UO_Deref) return CheckPossibleDeref(S, E->getSubExpr()); } else if (const auto *E = dyn_cast(PossibleDeref)) { return CheckPossibleDeref(S, E->getBase()); } else if (const auto *E = dyn_cast(PossibleDeref)) { return CheckPossibleDeref(S, E->getBase()); } else if (const auto E = dyn_cast(PossibleDeref)) { QualType Inner; QualType Ty = E->getType(); if (const auto *Ptr = Ty->getAs()) Inner = Ptr->getPointeeType(); else if (const auto *Arr = S.Context.getAsArrayType(Ty)) Inner = Arr->getElementType(); else return nullptr; if (Inner->hasAttr(attr::NoDeref)) return E; } return nullptr; } } // namespace void Sema::WarnOnPendingNoDerefs(ExpressionEvaluationContextRecord &Rec) { for (const Expr *E : Rec.PossibleDerefs) { const DeclRefExpr *DeclRef = CheckPossibleDeref(*this, E); if (DeclRef) { const ValueDecl *Decl = DeclRef->getDecl(); Diag(E->getExprLoc(), diag::warn_dereference_of_noderef_type) << Decl->getName() << E->getSourceRange(); Diag(Decl->getLocation(), diag::note_previous_decl) << Decl->getName(); } else { Diag(E->getExprLoc(), diag::warn_dereference_of_noderef_type_no_decl) << E->getSourceRange(); } } Rec.PossibleDerefs.clear(); } void Sema::PopExpressionEvaluationContext() { ExpressionEvaluationContextRecord& Rec = ExprEvalContexts.back(); unsigned NumTypos = Rec.NumTypos; if (!Rec.Lambdas.empty()) { using ExpressionKind = ExpressionEvaluationContextRecord::ExpressionKind; if (Rec.ExprContext == ExpressionKind::EK_TemplateArgument || Rec.isUnevaluated() || (Rec.isConstantEvaluated() && !getLangOpts().CPlusPlus17)) { unsigned D; if (Rec.isUnevaluated()) { // C++11 [expr.prim.lambda]p2: // A lambda-expression shall not appear in an unevaluated operand // (Clause 5). D = diag::err_lambda_unevaluated_operand; } else if (Rec.isConstantEvaluated() && !getLangOpts().CPlusPlus17) { // C++1y [expr.const]p2: // A conditional-expression e is a core constant expression unless the // evaluation of e, following the rules of the abstract machine, would // evaluate [...] a lambda-expression. D = diag::err_lambda_in_constant_expression; } else if (Rec.ExprContext == ExpressionKind::EK_TemplateArgument) { // C++17 [expr.prim.lamda]p2: // A lambda-expression shall not appear [...] in a template-argument. D = diag::err_lambda_in_invalid_context; } else llvm_unreachable("Couldn't infer lambda error message."); for (const auto *L : Rec.Lambdas) Diag(L->getBeginLoc(), D); } else { // Mark the capture expressions odr-used. This was deferred // during lambda expression creation. for (auto *Lambda : Rec.Lambdas) { for (auto *C : Lambda->capture_inits()) MarkDeclarationsReferencedInExpr(C); } } } WarnOnPendingNoDerefs(Rec); // When are coming out of an unevaluated context, clear out any // temporaries that we may have created as part of the evaluation of // the expression in that context: they aren't relevant because they // will never be constructed. if (Rec.isUnevaluated() || Rec.isConstantEvaluated()) { ExprCleanupObjects.erase(ExprCleanupObjects.begin() + Rec.NumCleanupObjects, ExprCleanupObjects.end()); Cleanup = Rec.ParentCleanup; CleanupVarDeclMarking(); std::swap(MaybeODRUseExprs, Rec.SavedMaybeODRUseExprs); // Otherwise, merge the contexts together. } else { Cleanup.mergeFrom(Rec.ParentCleanup); MaybeODRUseExprs.insert(Rec.SavedMaybeODRUseExprs.begin(), Rec.SavedMaybeODRUseExprs.end()); } // Pop the current expression evaluation context off the stack. ExprEvalContexts.pop_back(); // The global expression evaluation context record is never popped. ExprEvalContexts.back().NumTypos += NumTypos; } void Sema::DiscardCleanupsInEvaluationContext() { ExprCleanupObjects.erase( ExprCleanupObjects.begin() + ExprEvalContexts.back().NumCleanupObjects, ExprCleanupObjects.end()); Cleanup.reset(); MaybeODRUseExprs.clear(); } ExprResult Sema::HandleExprEvaluationContextForTypeof(Expr *E) { ExprResult Result = CheckPlaceholderExpr(E); if (Result.isInvalid()) return ExprError(); E = Result.get(); if (!E->getType()->isVariablyModifiedType()) return E; return TransformToPotentiallyEvaluated(E); } /// Are we within a context in which some evaluation could be performed (be it /// constant evaluation or runtime evaluation)? Sadly, this notion is not quite /// captured by C++'s idea of an "unevaluated context". static bool isEvaluatableContext(Sema &SemaRef) { switch (SemaRef.ExprEvalContexts.back().Context) { case Sema::ExpressionEvaluationContext::Unevaluated: case Sema::ExpressionEvaluationContext::UnevaluatedAbstract: // Expressions in this context are never evaluated. return false; case Sema::ExpressionEvaluationContext::UnevaluatedList: case Sema::ExpressionEvaluationContext::ConstantEvaluated: case Sema::ExpressionEvaluationContext::PotentiallyEvaluated: case Sema::ExpressionEvaluationContext::DiscardedStatement: // Expressions in this context could be evaluated. return true; case Sema::ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed: // Referenced declarations will only be used if the construct in the // containing expression is used, at which point we'll be given another // turn to mark them. return false; } llvm_unreachable("Invalid context"); } /// Are we within a context in which references to resolved functions or to /// variables result in odr-use? static bool isOdrUseContext(Sema &SemaRef, bool SkipDependentUses = true) { // An expression in a template is not really an expression until it's been // instantiated, so it doesn't trigger odr-use. if (SkipDependentUses && SemaRef.CurContext->isDependentContext()) return false; switch (SemaRef.ExprEvalContexts.back().Context) { case Sema::ExpressionEvaluationContext::Unevaluated: case Sema::ExpressionEvaluationContext::UnevaluatedList: case Sema::ExpressionEvaluationContext::UnevaluatedAbstract: case Sema::ExpressionEvaluationContext::DiscardedStatement: return false; case Sema::ExpressionEvaluationContext::ConstantEvaluated: case Sema::ExpressionEvaluationContext::PotentiallyEvaluated: return true; case Sema::ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed: return false; } llvm_unreachable("Invalid context"); } static bool isImplicitlyDefinableConstexprFunction(FunctionDecl *Func) { CXXMethodDecl *MD = dyn_cast(Func); return Func->isConstexpr() && (Func->isImplicitlyInstantiable() || (MD && !MD->isUserProvided())); } /// Mark a function referenced, and check whether it is odr-used /// (C++ [basic.def.odr]p2, C99 6.9p3) void Sema::MarkFunctionReferenced(SourceLocation Loc, FunctionDecl *Func, bool MightBeOdrUse) { assert(Func && "No function?"); Func->setReferenced(); // C++11 [basic.def.odr]p3: // A function whose name appears as a potentially-evaluated expression is // odr-used if it is the unique lookup result or the selected member of a // set of overloaded functions [...]. // // We (incorrectly) mark overload resolution as an unevaluated context, so we // can just check that here. bool OdrUse = MightBeOdrUse && isOdrUseContext(*this); // Determine whether we require a function definition to exist, per // C++11 [temp.inst]p3: // Unless a function template specialization has been explicitly // instantiated or explicitly specialized, the function template // specialization is implicitly instantiated when the specialization is // referenced in a context that requires a function definition to exist. // // That is either when this is an odr-use, or when a usage of a constexpr // function occurs within an evaluatable context. bool NeedDefinition = OdrUse || (isEvaluatableContext(*this) && isImplicitlyDefinableConstexprFunction(Func)); // C++14 [temp.expl.spec]p6: // If a template [...] is explicitly specialized then that specialization // shall be declared before the first use of that specialization that would // cause an implicit instantiation to take place, in every translation unit // in which such a use occurs if (NeedDefinition && (Func->getTemplateSpecializationKind() != TSK_Undeclared || Func->getMemberSpecializationInfo())) checkSpecializationVisibility(Loc, Func); // C++14 [except.spec]p17: // An exception-specification is considered to be needed when: // - the function is odr-used or, if it appears in an unevaluated operand, // would be odr-used if the expression were potentially-evaluated; // // Note, we do this even if MightBeOdrUse is false. That indicates that the // function is a pure virtual function we're calling, and in that case the // function was selected by overload resolution and we need to resolve its // exception specification for a different reason. const FunctionProtoType *FPT = Func->getType()->getAs(); if (FPT && isUnresolvedExceptionSpec(FPT->getExceptionSpecType())) ResolveExceptionSpec(Loc, FPT); // If we don't need to mark the function as used, and we don't need to // try to provide a definition, there's nothing more to do. if ((Func->isUsed(/*CheckUsedAttr=*/false) || !OdrUse) && (!NeedDefinition || Func->getBody())) return; // Note that this declaration has been used. if (CXXConstructorDecl *Constructor = dyn_cast(Func)) { Constructor = cast(Constructor->getFirstDecl()); if (Constructor->isDefaulted() && !Constructor->isDeleted()) { if (Constructor->isDefaultConstructor()) { if (Constructor->isTrivial() && !Constructor->hasAttr()) return; DefineImplicitDefaultConstructor(Loc, Constructor); } else if (Constructor->isCopyConstructor()) { DefineImplicitCopyConstructor(Loc, Constructor); } else if (Constructor->isMoveConstructor()) { DefineImplicitMoveConstructor(Loc, Constructor); } } else if (Constructor->getInheritedConstructor()) { DefineInheritingConstructor(Loc, Constructor); } } else if (CXXDestructorDecl *Destructor = dyn_cast(Func)) { Destructor = cast(Destructor->getFirstDecl()); if (Destructor->isDefaulted() && !Destructor->isDeleted()) { if (Destructor->isTrivial() && !Destructor->hasAttr()) return; DefineImplicitDestructor(Loc, Destructor); } if (Destructor->isVirtual() && getLangOpts().AppleKext) MarkVTableUsed(Loc, Destructor->getParent()); } else if (CXXMethodDecl *MethodDecl = dyn_cast(Func)) { if (MethodDecl->isOverloadedOperator() && MethodDecl->getOverloadedOperator() == OO_Equal) { MethodDecl = cast(MethodDecl->getFirstDecl()); if (MethodDecl->isDefaulted() && !MethodDecl->isDeleted()) { if (MethodDecl->isCopyAssignmentOperator()) DefineImplicitCopyAssignment(Loc, MethodDecl); else if (MethodDecl->isMoveAssignmentOperator()) DefineImplicitMoveAssignment(Loc, MethodDecl); } } else if (isa(MethodDecl) && MethodDecl->getParent()->isLambda()) { CXXConversionDecl *Conversion = cast(MethodDecl->getFirstDecl()); if (Conversion->isLambdaToBlockPointerConversion()) DefineImplicitLambdaToBlockPointerConversion(Loc, Conversion); else DefineImplicitLambdaToFunctionPointerConversion(Loc, Conversion); } else if (MethodDecl->isVirtual() && getLangOpts().AppleKext) MarkVTableUsed(Loc, MethodDecl->getParent()); } // Recursive functions should be marked when used from another function. // FIXME: Is this really right? if (CurContext == Func) return; // Implicit instantiation of function templates and member functions of // class templates. if (Func->isImplicitlyInstantiable()) { TemplateSpecializationKind TSK = Func->getTemplateSpecializationKind(); SourceLocation PointOfInstantiation = Func->getPointOfInstantiation(); bool FirstInstantiation = PointOfInstantiation.isInvalid(); if (FirstInstantiation) { PointOfInstantiation = Loc; Func->setTemplateSpecializationKind(TSK, PointOfInstantiation); } else if (TSK != TSK_ImplicitInstantiation) { // Use the point of use as the point of instantiation, instead of the // point of explicit instantiation (which we track as the actual point of // instantiation). This gives better backtraces in diagnostics. PointOfInstantiation = Loc; } if (FirstInstantiation || TSK != TSK_ImplicitInstantiation || Func->isConstexpr()) { if (isa(Func->getDeclContext()) && cast(Func->getDeclContext())->isLocalClass() && CodeSynthesisContexts.size()) PendingLocalImplicitInstantiations.push_back( std::make_pair(Func, PointOfInstantiation)); else if (Func->isConstexpr()) // Do not defer instantiations of constexpr functions, to avoid the // expression evaluator needing to call back into Sema if it sees a // call to such a function. InstantiateFunctionDefinition(PointOfInstantiation, Func); else { Func->setInstantiationIsPending(true); PendingInstantiations.push_back(std::make_pair(Func, PointOfInstantiation)); // Notify the consumer that a function was implicitly instantiated. Consumer.HandleCXXImplicitFunctionInstantiation(Func); } } } else { // Walk redefinitions, as some of them may be instantiable. for (auto i : Func->redecls()) { if (!i->isUsed(false) && i->isImplicitlyInstantiable()) MarkFunctionReferenced(Loc, i, OdrUse); } } if (!OdrUse) return; // Keep track of used but undefined functions. if (!Func->isDefined()) { if (mightHaveNonExternalLinkage(Func)) UndefinedButUsed.insert(std::make_pair(Func->getCanonicalDecl(), Loc)); else if (Func->getMostRecentDecl()->isInlined() && !LangOpts.GNUInline && !Func->getMostRecentDecl()->hasAttr()) UndefinedButUsed.insert(std::make_pair(Func->getCanonicalDecl(), Loc)); else if (isExternalWithNoLinkageType(Func)) UndefinedButUsed.insert(std::make_pair(Func->getCanonicalDecl(), Loc)); } Func->markUsed(Context); } static void diagnoseUncapturableValueReference(Sema &S, SourceLocation loc, ValueDecl *var, DeclContext *DC) { DeclContext *VarDC = var->getDeclContext(); // If the parameter still belongs to the translation unit, then // we're actually just using one parameter in the declaration of // the next. if (isa(var) && isa(VarDC)) return; // For C code, don't diagnose about capture if we're not actually in code // right now; it's impossible to write a non-constant expression outside of // function context, so we'll get other (more useful) diagnostics later. // // For C++, things get a bit more nasty... it would be nice to suppress this // diagnostic for certain cases like using a local variable in an array bound // for a member of a local class, but the correct predicate is not obvious. if (!S.getLangOpts().CPlusPlus && !S.CurContext->isFunctionOrMethod()) return; unsigned ValueKind = isa(var) ? 1 : 0; unsigned ContextKind = 3; // unknown if (isa(VarDC) && cast(VarDC->getParent())->isLambda()) { ContextKind = 2; } else if (isa(VarDC)) { ContextKind = 0; } else if (isa(VarDC)) { ContextKind = 1; } S.Diag(loc, diag::err_reference_to_local_in_enclosing_context) << var << ValueKind << ContextKind << VarDC; S.Diag(var->getLocation(), diag::note_entity_declared_at) << var; // FIXME: Add additional diagnostic info about class etc. which prevents // capture. } static bool isVariableAlreadyCapturedInScopeInfo(CapturingScopeInfo *CSI, VarDecl *Var, bool &SubCapturesAreNested, QualType &CaptureType, QualType &DeclRefType) { // Check whether we've already captured it. if (CSI->CaptureMap.count(Var)) { // If we found a capture, any subcaptures are nested. SubCapturesAreNested = true; // Retrieve the capture type for this variable. CaptureType = CSI->getCapture(Var).getCaptureType(); // Compute the type of an expression that refers to this variable. DeclRefType = CaptureType.getNonReferenceType(); // Similarly to mutable captures in lambda, all the OpenMP captures by copy // are mutable in the sense that user can change their value - they are // private instances of the captured declarations. const Capture &Cap = CSI->getCapture(Var); if (Cap.isCopyCapture() && !(isa(CSI) && cast(CSI)->Mutable) && !(isa(CSI) && cast(CSI)->CapRegionKind == CR_OpenMP)) DeclRefType.addConst(); return true; } return false; } // Only block literals, captured statements, and lambda expressions can // capture; other scopes don't work. static DeclContext *getParentOfCapturingContextOrNull(DeclContext *DC, VarDecl *Var, SourceLocation Loc, const bool Diagnose, Sema &S) { if (isa(DC) || isa(DC) || isLambdaCallOperator(DC)) return getLambdaAwareParentOfDeclContext(DC); else if (Var->hasLocalStorage()) { if (Diagnose) diagnoseUncapturableValueReference(S, Loc, Var, DC); } return nullptr; } // Certain capturing entities (lambdas, blocks etc.) are not allowed to capture // certain types of variables (unnamed, variably modified types etc.) // so check for eligibility. static bool isVariableCapturable(CapturingScopeInfo *CSI, VarDecl *Var, SourceLocation Loc, const bool Diagnose, Sema &S) { bool IsBlock = isa(CSI); bool IsLambda = isa(CSI); // Lambdas are not allowed to capture unnamed variables // (e.g. anonymous unions). // FIXME: The C++11 rule don't actually state this explicitly, but I'm // assuming that's the intent. if (IsLambda && !Var->getDeclName()) { if (Diagnose) { S.Diag(Loc, diag::err_lambda_capture_anonymous_var); S.Diag(Var->getLocation(), diag::note_declared_at); } return false; } // Prohibit variably-modified types in blocks; they're difficult to deal with. if (Var->getType()->isVariablyModifiedType() && IsBlock) { if (Diagnose) { S.Diag(Loc, diag::err_ref_vm_type); S.Diag(Var->getLocation(), diag::note_previous_decl) << Var->getDeclName(); } return false; } // Prohibit structs with flexible array members too. // We cannot capture what is in the tail end of the struct. if (const RecordType *VTTy = Var->getType()->getAs()) { if (VTTy->getDecl()->hasFlexibleArrayMember()) { if (Diagnose) { if (IsBlock) S.Diag(Loc, diag::err_ref_flexarray_type); else S.Diag(Loc, diag::err_lambda_capture_flexarray_type) << Var->getDeclName(); S.Diag(Var->getLocation(), diag::note_previous_decl) << Var->getDeclName(); } return false; } } const bool HasBlocksAttr = Var->hasAttr(); // Lambdas and captured statements are not allowed to capture __block // variables; they don't support the expected semantics. if (HasBlocksAttr && (IsLambda || isa(CSI))) { if (Diagnose) { S.Diag(Loc, diag::err_capture_block_variable) << Var->getDeclName() << !IsLambda; S.Diag(Var->getLocation(), diag::note_previous_decl) << Var->getDeclName(); } return false; } // OpenCL v2.0 s6.12.5: Blocks cannot reference/capture other blocks if (S.getLangOpts().OpenCL && IsBlock && Var->getType()->isBlockPointerType()) { if (Diagnose) S.Diag(Loc, diag::err_opencl_block_ref_block); return false; } return true; } // Returns true if the capture by block was successful. static bool captureInBlock(BlockScopeInfo *BSI, VarDecl *Var, SourceLocation Loc, const bool BuildAndDiagnose, QualType &CaptureType, QualType &DeclRefType, const bool Nested, Sema &S) { Expr *CopyExpr = nullptr; bool ByRef = false; // Blocks are not allowed to capture arrays, excepting OpenCL. // OpenCL v2.0 s1.12.5 (revision 40): arrays are captured by reference // (decayed to pointers). if (!S.getLangOpts().OpenCL && CaptureType->isArrayType()) { if (BuildAndDiagnose) { S.Diag(Loc, diag::err_ref_array_type); S.Diag(Var->getLocation(), diag::note_previous_decl) << Var->getDeclName(); } return false; } // Forbid the block-capture of autoreleasing variables. if (CaptureType.getObjCLifetime() == Qualifiers::OCL_Autoreleasing) { if (BuildAndDiagnose) { S.Diag(Loc, diag::err_arc_autoreleasing_capture) << /*block*/ 0; S.Diag(Var->getLocation(), diag::note_previous_decl) << Var->getDeclName(); } return false; } // Warn about implicitly autoreleasing indirect parameters captured by blocks. if (const auto *PT = CaptureType->getAs()) { // This function finds out whether there is an AttributedType of kind // attr::ObjCOwnership in Ty. The existence of AttributedType of kind // attr::ObjCOwnership implies __autoreleasing was explicitly specified // rather than being added implicitly by the compiler. auto IsObjCOwnershipAttributedType = [](QualType Ty) { while (const auto *AttrTy = Ty->getAs()) { if (AttrTy->getAttrKind() == attr::ObjCOwnership) return true; // Peel off AttributedTypes that are not of kind ObjCOwnership. Ty = AttrTy->getModifiedType(); } return false; }; QualType PointeeTy = PT->getPointeeType(); if (PointeeTy->getAs() && PointeeTy.getObjCLifetime() == Qualifiers::OCL_Autoreleasing && !IsObjCOwnershipAttributedType(PointeeTy)) { if (BuildAndDiagnose) { SourceLocation VarLoc = Var->getLocation(); S.Diag(Loc, diag::warn_block_capture_autoreleasing); S.Diag(VarLoc, diag::note_declare_parameter_strong); } } } const bool HasBlocksAttr = Var->hasAttr(); if (HasBlocksAttr || CaptureType->isReferenceType() || (S.getLangOpts().OpenMP && S.isOpenMPCapturedDecl(Var))) { // Block capture by reference does not change the capture or // declaration reference types. ByRef = true; } else { // Block capture by copy introduces 'const'. CaptureType = CaptureType.getNonReferenceType().withConst(); DeclRefType = CaptureType; if (S.getLangOpts().CPlusPlus && BuildAndDiagnose) { if (const RecordType *Record = DeclRefType->getAs()) { // The capture logic needs the destructor, so make sure we mark it. // Usually this is unnecessary because most local variables have // their destructors marked at declaration time, but parameters are // an exception because it's technically only the call site that // actually requires the destructor. if (isa(Var)) S.FinalizeVarWithDestructor(Var, Record); // Enter a new evaluation context to insulate the copy // full-expression. EnterExpressionEvaluationContext scope( S, Sema::ExpressionEvaluationContext::PotentiallyEvaluated); // According to the blocks spec, the capture of a variable from // the stack requires a const copy constructor. This is not true // of the copy/move done to move a __block variable to the heap. Expr *DeclRef = new (S.Context) DeclRefExpr( S.Context, Var, Nested, DeclRefType.withConst(), VK_LValue, Loc); ExprResult Result = S.PerformCopyInitialization( InitializedEntity::InitializeBlock(Var->getLocation(), CaptureType, false), Loc, DeclRef); // Build a full-expression copy expression if initialization // succeeded and used a non-trivial constructor. Recover from // errors by pretending that the copy isn't necessary. if (!Result.isInvalid() && !cast(Result.get())->getConstructor() ->isTrivial()) { Result = S.MaybeCreateExprWithCleanups(Result); CopyExpr = Result.get(); } } } } // Actually capture the variable. if (BuildAndDiagnose) BSI->addCapture(Var, HasBlocksAttr, ByRef, Nested, Loc, SourceLocation(), CaptureType, CopyExpr); return true; } /// Capture the given variable in the captured region. static bool captureInCapturedRegion(CapturedRegionScopeInfo *RSI, VarDecl *Var, SourceLocation Loc, const bool BuildAndDiagnose, QualType &CaptureType, QualType &DeclRefType, const bool RefersToCapturedVariable, Sema &S) { // By default, capture variables by reference. bool ByRef = true; // Using an LValue reference type is consistent with Lambdas (see below). if (S.getLangOpts().OpenMP && RSI->CapRegionKind == CR_OpenMP) { if (S.isOpenMPCapturedDecl(Var)) { bool HasConst = DeclRefType.isConstQualified(); DeclRefType = DeclRefType.getUnqualifiedType(); // Don't lose diagnostics about assignments to const. if (HasConst) DeclRefType.addConst(); } ByRef = S.isOpenMPCapturedByRef(Var, RSI->OpenMPLevel); } if (ByRef) CaptureType = S.Context.getLValueReferenceType(DeclRefType); else CaptureType = DeclRefType; Expr *CopyExpr = nullptr; if (BuildAndDiagnose) { // The current implementation assumes that all variables are captured // by references. Since there is no capture by copy, no expression // evaluation will be needed. RecordDecl *RD = RSI->TheRecordDecl; FieldDecl *Field = FieldDecl::Create(S.Context, RD, Loc, Loc, nullptr, CaptureType, S.Context.getTrivialTypeSourceInfo(CaptureType, Loc), nullptr, false, ICIS_NoInit); Field->setImplicit(true); Field->setAccess(AS_private); RD->addDecl(Field); if (S.getLangOpts().OpenMP && RSI->CapRegionKind == CR_OpenMP) S.setOpenMPCaptureKind(Field, Var, RSI->OpenMPLevel); CopyExpr = new (S.Context) DeclRefExpr( S.Context, Var, RefersToCapturedVariable, DeclRefType, VK_LValue, Loc); Var->setReferenced(true); Var->markUsed(S.Context); } // Actually capture the variable. if (BuildAndDiagnose) RSI->addCapture(Var, /*isBlock*/false, ByRef, RefersToCapturedVariable, Loc, SourceLocation(), CaptureType, CopyExpr); return true; } /// Create a field within the lambda class for the variable /// being captured. static void addAsFieldToClosureType(Sema &S, LambdaScopeInfo *LSI, QualType FieldType, QualType DeclRefType, SourceLocation Loc, bool RefersToCapturedVariable) { CXXRecordDecl *Lambda = LSI->Lambda; // Build the non-static data member. FieldDecl *Field = FieldDecl::Create(S.Context, Lambda, Loc, Loc, nullptr, FieldType, S.Context.getTrivialTypeSourceInfo(FieldType, Loc), nullptr, false, ICIS_NoInit); // If the variable being captured has an invalid type, mark the lambda class // as invalid as well. if (!FieldType->isDependentType()) { if (S.RequireCompleteType(Loc, FieldType, diag::err_field_incomplete)) { Lambda->setInvalidDecl(); Field->setInvalidDecl(); } else { NamedDecl *Def; FieldType->isIncompleteType(&Def); if (Def && Def->isInvalidDecl()) { Lambda->setInvalidDecl(); Field->setInvalidDecl(); } } } Field->setImplicit(true); Field->setAccess(AS_private); Lambda->addDecl(Field); } /// Capture the given variable in the lambda. static bool captureInLambda(LambdaScopeInfo *LSI, VarDecl *Var, SourceLocation Loc, const bool BuildAndDiagnose, QualType &CaptureType, QualType &DeclRefType, const bool RefersToCapturedVariable, const Sema::TryCaptureKind Kind, SourceLocation EllipsisLoc, const bool IsTopScope, Sema &S) { // Determine whether we are capturing by reference or by value. bool ByRef = false; if (IsTopScope && Kind != Sema::TryCapture_Implicit) { ByRef = (Kind == Sema::TryCapture_ExplicitByRef); } else { ByRef = (LSI->ImpCaptureStyle == LambdaScopeInfo::ImpCap_LambdaByref); } // Compute the type of the field that will capture this variable. if (ByRef) { // C++11 [expr.prim.lambda]p15: // An entity is captured by reference if it is implicitly or // explicitly captured but not captured by copy. It is // unspecified whether additional unnamed non-static data // members are declared in the closure type for entities // captured by reference. // // FIXME: It is not clear whether we want to build an lvalue reference // to the DeclRefType or to CaptureType.getNonReferenceType(). GCC appears // to do the former, while EDG does the latter. Core issue 1249 will // clarify, but for now we follow GCC because it's a more permissive and // easily defensible position. CaptureType = S.Context.getLValueReferenceType(DeclRefType); } else { // C++11 [expr.prim.lambda]p14: // For each entity captured by copy, an unnamed non-static // data member is declared in the closure type. The // declaration order of these members is unspecified. The type // of such a data member is the type of the corresponding // captured entity if the entity is not a reference to an // object, or the referenced type otherwise. [Note: If the // captured entity is a reference to a function, the // corresponding data member is also a reference to a // function. - end note ] if (const ReferenceType *RefType = CaptureType->getAs()){ if (!RefType->getPointeeType()->isFunctionType()) CaptureType = RefType->getPointeeType(); } // Forbid the lambda copy-capture of autoreleasing variables. if (CaptureType.getObjCLifetime() == Qualifiers::OCL_Autoreleasing) { if (BuildAndDiagnose) { S.Diag(Loc, diag::err_arc_autoreleasing_capture) << /*lambda*/ 1; S.Diag(Var->getLocation(), diag::note_previous_decl) << Var->getDeclName(); } return false; } // Make sure that by-copy captures are of a complete and non-abstract type. if (BuildAndDiagnose) { if (!CaptureType->isDependentType() && S.RequireCompleteType(Loc, CaptureType, diag::err_capture_of_incomplete_type, Var->getDeclName())) return false; if (S.RequireNonAbstractType(Loc, CaptureType, diag::err_capture_of_abstract_type)) return false; } } // Capture this variable in the lambda. if (BuildAndDiagnose) addAsFieldToClosureType(S, LSI, CaptureType, DeclRefType, Loc, RefersToCapturedVariable); // Compute the type of a reference to this captured variable. if (ByRef) DeclRefType = CaptureType.getNonReferenceType(); else { // C++ [expr.prim.lambda]p5: // The closure type for a lambda-expression has a public inline // function call operator [...]. This function call operator is // declared const (9.3.1) if and only if the lambda-expression's // parameter-declaration-clause is not followed by mutable. DeclRefType = CaptureType.getNonReferenceType(); if (!LSI->Mutable && !CaptureType->isReferenceType()) DeclRefType.addConst(); } // Add the capture. if (BuildAndDiagnose) LSI->addCapture(Var, /*IsBlock=*/false, ByRef, RefersToCapturedVariable, Loc, EllipsisLoc, CaptureType, /*CopyExpr=*/nullptr); return true; } bool Sema::tryCaptureVariable( VarDecl *Var, SourceLocation ExprLoc, TryCaptureKind Kind, SourceLocation EllipsisLoc, bool BuildAndDiagnose, QualType &CaptureType, QualType &DeclRefType, const unsigned *const FunctionScopeIndexToStopAt) { // An init-capture is notionally from the context surrounding its // declaration, but its parent DC is the lambda class. DeclContext *VarDC = Var->getDeclContext(); if (Var->isInitCapture()) VarDC = VarDC->getParent(); DeclContext *DC = CurContext; const unsigned MaxFunctionScopesIndex = FunctionScopeIndexToStopAt ? *FunctionScopeIndexToStopAt : FunctionScopes.size() - 1; // We need to sync up the Declaration Context with the // FunctionScopeIndexToStopAt if (FunctionScopeIndexToStopAt) { unsigned FSIndex = FunctionScopes.size() - 1; while (FSIndex != MaxFunctionScopesIndex) { DC = getLambdaAwareParentOfDeclContext(DC); --FSIndex; } } // If the variable is declared in the current context, there is no need to // capture it. if (VarDC == DC) return true; // Capture global variables if it is required to use private copy of this // variable. bool IsGlobal = !Var->hasLocalStorage(); if (IsGlobal && !(LangOpts.OpenMP && isOpenMPCapturedDecl(Var))) return true; Var = Var->getCanonicalDecl(); // Walk up the stack to determine whether we can capture the variable, // performing the "simple" checks that don't depend on type. We stop when // we've either hit the declared scope of the variable or find an existing // capture of that variable. We start from the innermost capturing-entity // (the DC) and ensure that all intervening capturing-entities // (blocks/lambdas etc.) between the innermost capturer and the variable`s // declcontext can either capture the variable or have already captured // the variable. CaptureType = Var->getType(); DeclRefType = CaptureType.getNonReferenceType(); bool Nested = false; bool Explicit = (Kind != TryCapture_Implicit); unsigned FunctionScopesIndex = MaxFunctionScopesIndex; do { // Only block literals, captured statements, and lambda expressions can // capture; other scopes don't work. DeclContext *ParentDC = getParentOfCapturingContextOrNull(DC, Var, ExprLoc, BuildAndDiagnose, *this); // We need to check for the parent *first* because, if we *have* // private-captured a global variable, we need to recursively capture it in // intermediate blocks, lambdas, etc. if (!ParentDC) { if (IsGlobal) { FunctionScopesIndex = MaxFunctionScopesIndex - 1; break; } return true; } FunctionScopeInfo *FSI = FunctionScopes[FunctionScopesIndex]; CapturingScopeInfo *CSI = cast(FSI); // Check whether we've already captured it. if (isVariableAlreadyCapturedInScopeInfo(CSI, Var, Nested, CaptureType, DeclRefType)) { CSI->getCapture(Var).markUsed(BuildAndDiagnose); break; } // If we are instantiating a generic lambda call operator body, // we do not want to capture new variables. What was captured // during either a lambdas transformation or initial parsing // should be used. if (isGenericLambdaCallOperatorSpecialization(DC)) { if (BuildAndDiagnose) { LambdaScopeInfo *LSI = cast(CSI); if (LSI->ImpCaptureStyle == CapturingScopeInfo::ImpCap_None) { Diag(ExprLoc, diag::err_lambda_impcap) << Var->getDeclName(); Diag(Var->getLocation(), diag::note_previous_decl) << Var->getDeclName(); Diag(LSI->Lambda->getBeginLoc(), diag::note_lambda_decl); } else diagnoseUncapturableValueReference(*this, ExprLoc, Var, DC); } return true; } // Certain capturing entities (lambdas, blocks etc.) are not allowed to capture // certain types of variables (unnamed, variably modified types etc.) // so check for eligibility. if (!isVariableCapturable(CSI, Var, ExprLoc, BuildAndDiagnose, *this)) return true; // Try to capture variable-length arrays types. if (Var->getType()->isVariablyModifiedType()) { // We're going to walk down into the type and look for VLA // expressions. QualType QTy = Var->getType(); if (ParmVarDecl *PVD = dyn_cast_or_null(Var)) QTy = PVD->getOriginalType(); captureVariablyModifiedType(Context, QTy, CSI); } if (getLangOpts().OpenMP) { if (auto *RSI = dyn_cast(CSI)) { // OpenMP private variables should not be captured in outer scope, so // just break here. Similarly, global variables that are captured in a // target region should not be captured outside the scope of the region. if (RSI->CapRegionKind == CR_OpenMP) { bool IsOpenMPPrivateDecl = isOpenMPPrivateDecl(Var, RSI->OpenMPLevel); auto IsTargetCap = !IsOpenMPPrivateDecl && isOpenMPTargetCapturedDecl(Var, RSI->OpenMPLevel); // When we detect target captures we are looking from inside the // target region, therefore we need to propagate the capture from the // enclosing region. Therefore, the capture is not initially nested. if (IsTargetCap) adjustOpenMPTargetScopeIndex(FunctionScopesIndex, RSI->OpenMPLevel); if (IsTargetCap || IsOpenMPPrivateDecl) { Nested = !IsTargetCap; DeclRefType = DeclRefType.getUnqualifiedType(); CaptureType = Context.getLValueReferenceType(DeclRefType); break; } } } } if (CSI->ImpCaptureStyle == CapturingScopeInfo::ImpCap_None && !Explicit) { // No capture-default, and this is not an explicit capture // so cannot capture this variable. if (BuildAndDiagnose) { Diag(ExprLoc, diag::err_lambda_impcap) << Var->getDeclName(); Diag(Var->getLocation(), diag::note_previous_decl) << Var->getDeclName(); if (cast(CSI)->Lambda) Diag(cast(CSI)->Lambda->getBeginLoc(), diag::note_lambda_decl); // FIXME: If we error out because an outer lambda can not implicitly // capture a variable that an inner lambda explicitly captures, we // should have the inner lambda do the explicit capture - because // it makes for cleaner diagnostics later. This would purely be done // so that the diagnostic does not misleadingly claim that a variable // can not be captured by a lambda implicitly even though it is captured // explicitly. Suggestion: // - create const bool VariableCaptureWasInitiallyExplicit = Explicit // at the function head // - cache the StartingDeclContext - this must be a lambda // - captureInLambda in the innermost lambda the variable. } return true; } FunctionScopesIndex--; DC = ParentDC; Explicit = false; } while (!VarDC->Equals(DC)); // Walk back down the scope stack, (e.g. from outer lambda to inner lambda) // computing the type of the capture at each step, checking type-specific // requirements, and adding captures if requested. // If the variable had already been captured previously, we start capturing // at the lambda nested within that one. for (unsigned I = ++FunctionScopesIndex, N = MaxFunctionScopesIndex + 1; I != N; ++I) { CapturingScopeInfo *CSI = cast(FunctionScopes[I]); if (BlockScopeInfo *BSI = dyn_cast(CSI)) { if (!captureInBlock(BSI, Var, ExprLoc, BuildAndDiagnose, CaptureType, DeclRefType, Nested, *this)) return true; Nested = true; } else if (CapturedRegionScopeInfo *RSI = dyn_cast(CSI)) { if (!captureInCapturedRegion(RSI, Var, ExprLoc, BuildAndDiagnose, CaptureType, DeclRefType, Nested, *this)) return true; Nested = true; } else { LambdaScopeInfo *LSI = cast(CSI); if (!captureInLambda(LSI, Var, ExprLoc, BuildAndDiagnose, CaptureType, DeclRefType, Nested, Kind, EllipsisLoc, /*IsTopScope*/I == N - 1, *this)) return true; Nested = true; } } return false; } bool Sema::tryCaptureVariable(VarDecl *Var, SourceLocation Loc, TryCaptureKind Kind, SourceLocation EllipsisLoc) { QualType CaptureType; QualType DeclRefType; return tryCaptureVariable(Var, Loc, Kind, EllipsisLoc, /*BuildAndDiagnose=*/true, CaptureType, DeclRefType, nullptr); } bool Sema::NeedToCaptureVariable(VarDecl *Var, SourceLocation Loc) { QualType CaptureType; QualType DeclRefType; return !tryCaptureVariable(Var, Loc, TryCapture_Implicit, SourceLocation(), /*BuildAndDiagnose=*/false, CaptureType, DeclRefType, nullptr); } QualType Sema::getCapturedDeclRefType(VarDecl *Var, SourceLocation Loc) { QualType CaptureType; QualType DeclRefType; // Determine whether we can capture this variable. if (tryCaptureVariable(Var, Loc, TryCapture_Implicit, SourceLocation(), /*BuildAndDiagnose=*/false, CaptureType, DeclRefType, nullptr)) return QualType(); return DeclRefType; } // If either the type of the variable or the initializer is dependent, // return false. Otherwise, determine whether the variable is a constant // expression. Use this if you need to know if a variable that might or // might not be dependent is truly a constant expression. static inline bool IsVariableNonDependentAndAConstantExpression(VarDecl *Var, ASTContext &Context) { if (Var->getType()->isDependentType()) return false; const VarDecl *DefVD = nullptr; Var->getAnyInitializer(DefVD); if (!DefVD) return false; EvaluatedStmt *Eval = DefVD->ensureEvaluatedStmt(); Expr *Init = cast(Eval->Value); if (Init->isValueDependent()) return false; return IsVariableAConstantExpression(Var, Context); } void Sema::UpdateMarkingForLValueToRValue(Expr *E) { // Per C++11 [basic.def.odr], a variable is odr-used "unless it is // an object that satisfies the requirements for appearing in a // constant expression (5.19) and the lvalue-to-rvalue conversion (4.1) // is immediately applied." This function handles the lvalue-to-rvalue // conversion part. MaybeODRUseExprs.erase(E->IgnoreParens()); // If we are in a lambda, check if this DeclRefExpr or MemberExpr refers // to a variable that is a constant expression, and if so, identify it as // a reference to a variable that does not involve an odr-use of that // variable. if (LambdaScopeInfo *LSI = getCurLambda()) { Expr *SansParensExpr = E->IgnoreParens(); VarDecl *Var = nullptr; if (DeclRefExpr *DRE = dyn_cast(SansParensExpr)) Var = dyn_cast(DRE->getFoundDecl()); else if (MemberExpr *ME = dyn_cast(SansParensExpr)) Var = dyn_cast(ME->getMemberDecl()); if (Var && IsVariableNonDependentAndAConstantExpression(Var, Context)) LSI->markVariableExprAsNonODRUsed(SansParensExpr); } } ExprResult Sema::ActOnConstantExpression(ExprResult Res) { Res = CorrectDelayedTyposInExpr(Res); if (!Res.isUsable()) return Res; // If a constant-expression is a reference to a variable where we delay // deciding whether it is an odr-use, just assume we will apply the // lvalue-to-rvalue conversion. In the one case where this doesn't happen // (a non-type template argument), we have special handling anyway. UpdateMarkingForLValueToRValue(Res.get()); return Res; } void Sema::CleanupVarDeclMarking() { for (Expr *E : MaybeODRUseExprs) { VarDecl *Var; SourceLocation Loc; if (DeclRefExpr *DRE = dyn_cast(E)) { Var = cast(DRE->getDecl()); Loc = DRE->getLocation(); } else if (MemberExpr *ME = dyn_cast(E)) { Var = cast(ME->getMemberDecl()); Loc = ME->getMemberLoc(); } else { llvm_unreachable("Unexpected expression"); } MarkVarDeclODRUsed(Var, Loc, *this, /*MaxFunctionScopeIndex Pointer*/ nullptr); } MaybeODRUseExprs.clear(); } static void DoMarkVarDeclReferenced(Sema &SemaRef, SourceLocation Loc, VarDecl *Var, Expr *E) { assert((!E || isa(E) || isa(E)) && "Invalid Expr argument to DoMarkVarDeclReferenced"); Var->setReferenced(); TemplateSpecializationKind TSK = Var->getTemplateSpecializationKind(); bool OdrUseContext = isOdrUseContext(SemaRef); bool UsableInConstantExpr = Var->isUsableInConstantExpressions(SemaRef.Context); bool NeedDefinition = OdrUseContext || (isEvaluatableContext(SemaRef) && UsableInConstantExpr); VarTemplateSpecializationDecl *VarSpec = dyn_cast(Var); assert(!isa(Var) && "Can't instantiate a partial template specialization."); // If this might be a member specialization of a static data member, check // the specialization is visible. We already did the checks for variable // template specializations when we created them. if (NeedDefinition && TSK != TSK_Undeclared && !isa(Var)) SemaRef.checkSpecializationVisibility(Loc, Var); // Perform implicit instantiation of static data members, static data member // templates of class templates, and variable template specializations. Delay // instantiations of variable templates, except for those that could be used // in a constant expression. if (NeedDefinition && isTemplateInstantiation(TSK)) { // Per C++17 [temp.explicit]p10, we may instantiate despite an explicit // instantiation declaration if a variable is usable in a constant // expression (among other cases). bool TryInstantiating = TSK == TSK_ImplicitInstantiation || (TSK == TSK_ExplicitInstantiationDeclaration && UsableInConstantExpr); if (TryInstantiating) { SourceLocation PointOfInstantiation = Var->getPointOfInstantiation(); bool FirstInstantiation = PointOfInstantiation.isInvalid(); if (FirstInstantiation) { PointOfInstantiation = Loc; Var->setTemplateSpecializationKind(TSK, PointOfInstantiation); } bool InstantiationDependent = false; bool IsNonDependent = VarSpec ? !TemplateSpecializationType::anyDependentTemplateArguments( VarSpec->getTemplateArgsInfo(), InstantiationDependent) : true; // Do not instantiate specializations that are still type-dependent. if (IsNonDependent) { if (UsableInConstantExpr) { // Do not defer instantiations of variables that could be used in a // constant expression. SemaRef.InstantiateVariableDefinition(PointOfInstantiation, Var); } else if (FirstInstantiation || isa(Var)) { // FIXME: For a specialization of a variable template, we don't // distinguish between "declaration and type implicitly instantiated" // and "implicit instantiation of definition requested", so we have // no direct way to avoid enqueueing the pending instantiation // multiple times. SemaRef.PendingInstantiations .push_back(std::make_pair(Var, PointOfInstantiation)); } } } } // Per C++11 [basic.def.odr], a variable is odr-used "unless it satisfies // the requirements for appearing in a constant expression (5.19) and, if // it is an object, the lvalue-to-rvalue conversion (4.1) // is immediately applied." We check the first part here, and // Sema::UpdateMarkingForLValueToRValue deals with the second part. // Note that we use the C++11 definition everywhere because nothing in // C++03 depends on whether we get the C++03 version correct. The second // part does not apply to references, since they are not objects. if (OdrUseContext && E && IsVariableAConstantExpression(Var, SemaRef.Context)) { // A reference initialized by a constant expression can never be // odr-used, so simply ignore it. if (!Var->getType()->isReferenceType() || (SemaRef.LangOpts.OpenMP && SemaRef.isOpenMPCapturedDecl(Var))) SemaRef.MaybeODRUseExprs.insert(E); } else if (OdrUseContext) { MarkVarDeclODRUsed(Var, Loc, SemaRef, /*MaxFunctionScopeIndex ptr*/ nullptr); } else if (isOdrUseContext(SemaRef, /*SkipDependentUses*/false)) { // If this is a dependent context, we don't need to mark variables as // odr-used, but we may still need to track them for lambda capture. // FIXME: Do we also need to do this inside dependent typeid expressions // (which are modeled as unevaluated at this point)? const bool RefersToEnclosingScope = (SemaRef.CurContext != Var->getDeclContext() && Var->getDeclContext()->isFunctionOrMethod() && Var->hasLocalStorage()); if (RefersToEnclosingScope) { LambdaScopeInfo *const LSI = SemaRef.getCurLambda(/*IgnoreNonLambdaCapturingScope=*/true); if (LSI && (!LSI->CallOperator || !LSI->CallOperator->Encloses(Var->getDeclContext()))) { // If a variable could potentially be odr-used, defer marking it so // until we finish analyzing the full expression for any // lvalue-to-rvalue // or discarded value conversions that would obviate odr-use. // Add it to the list of potential captures that will be analyzed // later (ActOnFinishFullExpr) for eventual capture and odr-use marking // unless the variable is a reference that was initialized by a constant // expression (this will never need to be captured or odr-used). assert(E && "Capture variable should be used in an expression."); if (!Var->getType()->isReferenceType() || !IsVariableNonDependentAndAConstantExpression(Var, SemaRef.Context)) LSI->addPotentialCapture(E->IgnoreParens()); } } } } /// Mark a variable referenced, and check whether it is odr-used /// (C++ [basic.def.odr]p2, C99 6.9p3). Note that this should not be /// used directly for normal expressions referring to VarDecl. void Sema::MarkVariableReferenced(SourceLocation Loc, VarDecl *Var) { DoMarkVarDeclReferenced(*this, Loc, Var, nullptr); } static void MarkExprReferenced(Sema &SemaRef, SourceLocation Loc, Decl *D, Expr *E, bool MightBeOdrUse) { if (SemaRef.isInOpenMPDeclareTargetContext()) SemaRef.checkDeclIsAllowedInOpenMPTarget(E, D); if (VarDecl *Var = dyn_cast(D)) { DoMarkVarDeclReferenced(SemaRef, Loc, Var, E); return; } SemaRef.MarkAnyDeclReferenced(Loc, D, MightBeOdrUse); // If this is a call to a method via a cast, also mark the method in the // derived class used in case codegen can devirtualize the call. const MemberExpr *ME = dyn_cast(E); if (!ME) return; CXXMethodDecl *MD = dyn_cast(ME->getMemberDecl()); if (!MD) return; // Only attempt to devirtualize if this is truly a virtual call. bool IsVirtualCall = MD->isVirtual() && ME->performsVirtualDispatch(SemaRef.getLangOpts()); if (!IsVirtualCall) return; // If it's possible to devirtualize the call, mark the called function // referenced. CXXMethodDecl *DM = MD->getDevirtualizedMethod( ME->getBase(), SemaRef.getLangOpts().AppleKext); if (DM) SemaRef.MarkAnyDeclReferenced(Loc, DM, MightBeOdrUse); } /// Perform reference-marking and odr-use handling for a DeclRefExpr. void Sema::MarkDeclRefReferenced(DeclRefExpr *E, const Expr *Base) { // TODO: update this with DR# once a defect report is filed. // C++11 defect. The address of a pure member should not be an ODR use, even // if it's a qualified reference. bool OdrUse = true; if (const CXXMethodDecl *Method = dyn_cast(E->getDecl())) if (Method->isVirtual() && !Method->getDevirtualizedMethod(Base, getLangOpts().AppleKext)) OdrUse = false; MarkExprReferenced(*this, E->getLocation(), E->getDecl(), E, OdrUse); } /// Perform reference-marking and odr-use handling for a MemberExpr. void Sema::MarkMemberReferenced(MemberExpr *E) { // C++11 [basic.def.odr]p2: // A non-overloaded function whose name appears as a potentially-evaluated // expression or a member of a set of candidate functions, if selected by // overload resolution when referred to from a potentially-evaluated // expression, is odr-used, unless it is a pure virtual function and its // name is not explicitly qualified. bool MightBeOdrUse = true; if (E->performsVirtualDispatch(getLangOpts())) { if (CXXMethodDecl *Method = dyn_cast(E->getMemberDecl())) if (Method->isPure()) MightBeOdrUse = false; } SourceLocation Loc = E->getMemberLoc().isValid() ? E->getMemberLoc() : E->getBeginLoc(); MarkExprReferenced(*this, Loc, E->getMemberDecl(), E, MightBeOdrUse); } /// Perform marking for a reference to an arbitrary declaration. It /// marks the declaration referenced, and performs odr-use checking for /// functions and variables. This method should not be used when building a /// normal expression which refers to a variable. void Sema::MarkAnyDeclReferenced(SourceLocation Loc, Decl *D, bool MightBeOdrUse) { if (MightBeOdrUse) { if (auto *VD = dyn_cast(D)) { MarkVariableReferenced(Loc, VD); return; } } if (auto *FD = dyn_cast(D)) { MarkFunctionReferenced(Loc, FD, MightBeOdrUse); return; } D->setReferenced(); } namespace { // Mark all of the declarations used by a type as referenced. // FIXME: Not fully implemented yet! We need to have a better understanding // of when we're entering a context we should not recurse into. // FIXME: This is and EvaluatedExprMarker are more-or-less equivalent to // TreeTransforms rebuilding the type in a new context. Rather than // duplicating the TreeTransform logic, we should consider reusing it here. // Currently that causes problems when rebuilding LambdaExprs. class MarkReferencedDecls : public RecursiveASTVisitor { Sema &S; SourceLocation Loc; public: typedef RecursiveASTVisitor Inherited; MarkReferencedDecls(Sema &S, SourceLocation Loc) : S(S), Loc(Loc) { } bool TraverseTemplateArgument(const TemplateArgument &Arg); }; } bool MarkReferencedDecls::TraverseTemplateArgument( const TemplateArgument &Arg) { { // A non-type template argument is a constant-evaluated context. EnterExpressionEvaluationContext Evaluated( S, Sema::ExpressionEvaluationContext::ConstantEvaluated); if (Arg.getKind() == TemplateArgument::Declaration) { if (Decl *D = Arg.getAsDecl()) S.MarkAnyDeclReferenced(Loc, D, true); } else if (Arg.getKind() == TemplateArgument::Expression) { S.MarkDeclarationsReferencedInExpr(Arg.getAsExpr(), false); } } return Inherited::TraverseTemplateArgument(Arg); } void Sema::MarkDeclarationsReferencedInType(SourceLocation Loc, QualType T) { MarkReferencedDecls Marker(*this, Loc); Marker.TraverseType(T); } namespace { /// Helper class that marks all of the declarations referenced by /// potentially-evaluated subexpressions as "referenced". class EvaluatedExprMarker : public EvaluatedExprVisitor { Sema &S; bool SkipLocalVariables; public: typedef EvaluatedExprVisitor Inherited; EvaluatedExprMarker(Sema &S, bool SkipLocalVariables) : Inherited(S.Context), S(S), SkipLocalVariables(SkipLocalVariables) { } void VisitDeclRefExpr(DeclRefExpr *E) { // If we were asked not to visit local variables, don't. if (SkipLocalVariables) { if (VarDecl *VD = dyn_cast(E->getDecl())) if (VD->hasLocalStorage()) return; } S.MarkDeclRefReferenced(E); } void VisitMemberExpr(MemberExpr *E) { S.MarkMemberReferenced(E); Inherited::VisitMemberExpr(E); } void VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E) { S.MarkFunctionReferenced( E->getBeginLoc(), const_cast(E->getTemporary()->getDestructor())); Visit(E->getSubExpr()); } void VisitCXXNewExpr(CXXNewExpr *E) { if (E->getOperatorNew()) S.MarkFunctionReferenced(E->getBeginLoc(), E->getOperatorNew()); if (E->getOperatorDelete()) S.MarkFunctionReferenced(E->getBeginLoc(), E->getOperatorDelete()); Inherited::VisitCXXNewExpr(E); } void VisitCXXDeleteExpr(CXXDeleteExpr *E) { if (E->getOperatorDelete()) S.MarkFunctionReferenced(E->getBeginLoc(), E->getOperatorDelete()); QualType Destroyed = S.Context.getBaseElementType(E->getDestroyedType()); if (const RecordType *DestroyedRec = Destroyed->getAs()) { CXXRecordDecl *Record = cast(DestroyedRec->getDecl()); S.MarkFunctionReferenced(E->getBeginLoc(), S.LookupDestructor(Record)); } Inherited::VisitCXXDeleteExpr(E); } void VisitCXXConstructExpr(CXXConstructExpr *E) { S.MarkFunctionReferenced(E->getBeginLoc(), E->getConstructor()); Inherited::VisitCXXConstructExpr(E); } void VisitCXXDefaultArgExpr(CXXDefaultArgExpr *E) { Visit(E->getExpr()); } void VisitImplicitCastExpr(ImplicitCastExpr *E) { Inherited::VisitImplicitCastExpr(E); if (E->getCastKind() == CK_LValueToRValue) S.UpdateMarkingForLValueToRValue(E->getSubExpr()); } }; } /// Mark any declarations that appear within this expression or any /// potentially-evaluated subexpressions as "referenced". /// /// \param SkipLocalVariables If true, don't mark local variables as /// 'referenced'. void Sema::MarkDeclarationsReferencedInExpr(Expr *E, bool SkipLocalVariables) { EvaluatedExprMarker(*this, SkipLocalVariables).Visit(E); } /// Emit a diagnostic that describes an effect on the run-time behavior /// of the program being compiled. /// /// This routine emits the given diagnostic when the code currently being /// type-checked is "potentially evaluated", meaning that there is a /// possibility that the code will actually be executable. Code in sizeof() /// expressions, code used only during overload resolution, etc., are not /// potentially evaluated. This routine will suppress such diagnostics or, /// in the absolutely nutty case of potentially potentially evaluated /// expressions (C++ typeid), queue the diagnostic to potentially emit it /// later. /// /// This routine should be used for all diagnostics that describe the run-time /// behavior of a program, such as passing a non-POD value through an ellipsis. /// Failure to do so will likely result in spurious diagnostics or failures /// during overload resolution or within sizeof/alignof/typeof/typeid. bool Sema::DiagRuntimeBehavior(SourceLocation Loc, const Stmt *Statement, const PartialDiagnostic &PD) { switch (ExprEvalContexts.back().Context) { case ExpressionEvaluationContext::Unevaluated: case ExpressionEvaluationContext::UnevaluatedList: case ExpressionEvaluationContext::UnevaluatedAbstract: case ExpressionEvaluationContext::DiscardedStatement: // The argument will never be evaluated, so don't complain. break; case ExpressionEvaluationContext::ConstantEvaluated: // Relevant diagnostics should be produced by constant evaluation. break; case ExpressionEvaluationContext::PotentiallyEvaluated: case ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed: if (Statement && getCurFunctionOrMethodDecl()) { FunctionScopes.back()->PossiblyUnreachableDiags. push_back(sema::PossiblyUnreachableDiag(PD, Loc, Statement)); return true; } // The initializer of a constexpr variable or of the first declaration of a // static data member is not syntactically a constant evaluated constant, // but nonetheless is always required to be a constant expression, so we // can skip diagnosing. // FIXME: Using the mangling context here is a hack. if (auto *VD = dyn_cast_or_null( ExprEvalContexts.back().ManglingContextDecl)) { if (VD->isConstexpr() || (VD->isStaticDataMember() && VD->isFirstDecl() && !VD->isInline())) break; // FIXME: For any other kind of variable, we should build a CFG for its // initializer and check whether the context in question is reachable. } Diag(Loc, PD); return true; } return false; } bool Sema::CheckCallReturnType(QualType ReturnType, SourceLocation Loc, CallExpr *CE, FunctionDecl *FD) { if (ReturnType->isVoidType() || !ReturnType->isIncompleteType()) return false; // If we're inside a decltype's expression, don't check for a valid return // type or construct temporaries until we know whether this is the last call. if (ExprEvalContexts.back().ExprContext == ExpressionEvaluationContextRecord::EK_Decltype) { ExprEvalContexts.back().DelayedDecltypeCalls.push_back(CE); return false; } class CallReturnIncompleteDiagnoser : public TypeDiagnoser { FunctionDecl *FD; CallExpr *CE; public: CallReturnIncompleteDiagnoser(FunctionDecl *FD, CallExpr *CE) : FD(FD), CE(CE) { } void diagnose(Sema &S, SourceLocation Loc, QualType T) override { if (!FD) { S.Diag(Loc, diag::err_call_incomplete_return) << T << CE->getSourceRange(); return; } S.Diag(Loc, diag::err_call_function_incomplete_return) << CE->getSourceRange() << FD->getDeclName() << T; S.Diag(FD->getLocation(), diag::note_entity_declared_at) << FD->getDeclName(); } } Diagnoser(FD, CE); if (RequireCompleteType(Loc, ReturnType, Diagnoser)) return true; return false; } // Diagnose the s/=/==/ and s/\|=/!=/ typos. Note that adding parentheses // will prevent this condition from triggering, which is what we want. void Sema::DiagnoseAssignmentAsCondition(Expr *E) { SourceLocation Loc; unsigned diagnostic = diag::warn_condition_is_assignment; bool IsOrAssign = false; if (BinaryOperator *Op = dyn_cast(E)) { if (Op->getOpcode() != BO_Assign && Op->getOpcode() != BO_OrAssign) return; IsOrAssign = Op->getOpcode() == BO_OrAssign; // Greylist some idioms by putting them into a warning subcategory. if (ObjCMessageExpr *ME = dyn_cast(Op->getRHS()->IgnoreParenCasts())) { Selector Sel = ME->getSelector(); // self = [ init...] if (isSelfExpr(Op->getLHS()) && ME->getMethodFamily() == OMF_init) diagnostic = diag::warn_condition_is_idiomatic_assignment; // = [ nextObject] else if (Sel.isUnarySelector() && Sel.getNameForSlot(0) == "nextObject") diagnostic = diag::warn_condition_is_idiomatic_assignment; } Loc = Op->getOperatorLoc(); } else if (CXXOperatorCallExpr *Op = dyn_cast(E)) { if (Op->getOperator() != OO_Equal && Op->getOperator() != OO_PipeEqual) return; IsOrAssign = Op->getOperator() == OO_PipeEqual; Loc = Op->getOperatorLoc(); } else if (PseudoObjectExpr *POE = dyn_cast(E)) return DiagnoseAssignmentAsCondition(POE->getSyntacticForm()); else { // Not an assignment. return; } Diag(Loc, diagnostic) << E->getSourceRange(); SourceLocation Open = E->getBeginLoc(); SourceLocation Close = getLocForEndOfToken(E->getSourceRange().getEnd()); Diag(Loc, diag::note_condition_assign_silence) << FixItHint::CreateInsertion(Open, "(") << FixItHint::CreateInsertion(Close, ")"); if (IsOrAssign) Diag(Loc, diag::note_condition_or_assign_to_comparison) << FixItHint::CreateReplacement(Loc, "!="); else Diag(Loc, diag::note_condition_assign_to_comparison) << FixItHint::CreateReplacement(Loc, "=="); } /// Redundant parentheses over an equality comparison can indicate /// that the user intended an assignment used as condition. void Sema::DiagnoseEqualityWithExtraParens(ParenExpr *ParenE) { // Don't warn if the parens came from a macro. SourceLocation parenLoc = ParenE->getBeginLoc(); if (parenLoc.isInvalid() || parenLoc.isMacroID()) return; // Don't warn for dependent expressions. if (ParenE->isTypeDependent()) return; Expr *E = ParenE->IgnoreParens(); if (BinaryOperator *opE = dyn_cast(E)) if (opE->getOpcode() == BO_EQ && opE->getLHS()->IgnoreParenImpCasts()->isModifiableLvalue(Context) == Expr::MLV_Valid) { SourceLocation Loc = opE->getOperatorLoc(); Diag(Loc, diag::warn_equality_with_extra_parens) << E->getSourceRange(); SourceRange ParenERange = ParenE->getSourceRange(); Diag(Loc, diag::note_equality_comparison_silence) << FixItHint::CreateRemoval(ParenERange.getBegin()) << FixItHint::CreateRemoval(ParenERange.getEnd()); Diag(Loc, diag::note_equality_comparison_to_assign) << FixItHint::CreateReplacement(Loc, "="); } } ExprResult Sema::CheckBooleanCondition(SourceLocation Loc, Expr *E, bool IsConstexpr) { DiagnoseAssignmentAsCondition(E); if (ParenExpr *parenE = dyn_cast(E)) DiagnoseEqualityWithExtraParens(parenE); ExprResult result = CheckPlaceholderExpr(E); if (result.isInvalid()) return ExprError(); E = result.get(); if (!E->isTypeDependent()) { if (getLangOpts().CPlusPlus) return CheckCXXBooleanCondition(E, IsConstexpr); // C++ 6.4p4 ExprResult ERes = DefaultFunctionArrayLvalueConversion(E); if (ERes.isInvalid()) return ExprError(); E = ERes.get(); QualType T = E->getType(); if (!T->isScalarType()) { // C99 6.8.4.1p1 Diag(Loc, diag::err_typecheck_statement_requires_scalar) << T << E->getSourceRange(); return ExprError(); } CheckBoolLikeConversion(E, Loc); } return E; } Sema::ConditionResult Sema::ActOnCondition(Scope *S, SourceLocation Loc, Expr *SubExpr, ConditionKind CK) { // Empty conditions are valid in for-statements. if (!SubExpr) return ConditionResult(); ExprResult Cond; switch (CK) { case ConditionKind::Boolean: Cond = CheckBooleanCondition(Loc, SubExpr); break; case ConditionKind::ConstexprIf: Cond = CheckBooleanCondition(Loc, SubExpr, true); break; case ConditionKind::Switch: Cond = CheckSwitchCondition(Loc, SubExpr); break; } if (Cond.isInvalid()) return ConditionError(); // FIXME: FullExprArg doesn't have an invalid bit, so check nullness instead. FullExprArg FullExpr = MakeFullExpr(Cond.get(), Loc); if (!FullExpr.get()) return ConditionError(); return ConditionResult(*this, nullptr, FullExpr, CK == ConditionKind::ConstexprIf); } namespace { /// A visitor for rebuilding a call to an __unknown_any expression /// to have an appropriate type. struct RebuildUnknownAnyFunction : StmtVisitor { Sema &S; RebuildUnknownAnyFunction(Sema &S) : S(S) {} ExprResult VisitStmt(Stmt *S) { llvm_unreachable("unexpected statement!"); } ExprResult VisitExpr(Expr *E) { S.Diag(E->getExprLoc(), diag::err_unsupported_unknown_any_call) << E->getSourceRange(); return ExprError(); } /// Rebuild an expression which simply semantically wraps another /// expression which it shares the type and value kind of. template ExprResult rebuildSugarExpr(T *E) { ExprResult SubResult = Visit(E->getSubExpr()); if (SubResult.isInvalid()) return ExprError(); Expr *SubExpr = SubResult.get(); E->setSubExpr(SubExpr); E->setType(SubExpr->getType()); E->setValueKind(SubExpr->getValueKind()); assert(E->getObjectKind() == OK_Ordinary); return E; } ExprResult VisitParenExpr(ParenExpr *E) { return rebuildSugarExpr(E); } ExprResult VisitUnaryExtension(UnaryOperator *E) { return rebuildSugarExpr(E); } ExprResult VisitUnaryAddrOf(UnaryOperator *E) { ExprResult SubResult = Visit(E->getSubExpr()); if (SubResult.isInvalid()) return ExprError(); Expr *SubExpr = SubResult.get(); E->setSubExpr(SubExpr); E->setType(S.Context.getPointerType(SubExpr->getType())); assert(E->getValueKind() == VK_RValue); assert(E->getObjectKind() == OK_Ordinary); return E; } ExprResult resolveDecl(Expr *E, ValueDecl *VD) { if (!isa(VD)) return VisitExpr(E); E->setType(VD->getType()); assert(E->getValueKind() == VK_RValue); if (S.getLangOpts().CPlusPlus && !(isa(VD) && cast(VD)->isInstance())) E->setValueKind(VK_LValue); return E; } ExprResult VisitMemberExpr(MemberExpr *E) { return resolveDecl(E, E->getMemberDecl()); } ExprResult VisitDeclRefExpr(DeclRefExpr *E) { return resolveDecl(E, E->getDecl()); } }; } /// Given a function expression of unknown-any type, try to rebuild it /// to have a function type. static ExprResult rebuildUnknownAnyFunction(Sema &S, Expr *FunctionExpr) { ExprResult Result = RebuildUnknownAnyFunction(S).Visit(FunctionExpr); if (Result.isInvalid()) return ExprError(); return S.DefaultFunctionArrayConversion(Result.get()); } namespace { /// A visitor for rebuilding an expression of type __unknown_anytype /// into one which resolves the type directly on the referring /// expression. Strict preservation of the original source /// structure is not a goal. struct RebuildUnknownAnyExpr : StmtVisitor { Sema &S; /// The current destination type. QualType DestType; RebuildUnknownAnyExpr(Sema &S, QualType CastType) : S(S), DestType(CastType) {} ExprResult VisitStmt(Stmt *S) { llvm_unreachable("unexpected statement!"); } ExprResult VisitExpr(Expr *E) { S.Diag(E->getExprLoc(), diag::err_unsupported_unknown_any_expr) << E->getSourceRange(); return ExprError(); } ExprResult VisitCallExpr(CallExpr *E); ExprResult VisitObjCMessageExpr(ObjCMessageExpr *E); /// Rebuild an expression which simply semantically wraps another /// expression which it shares the type and value kind of. template ExprResult rebuildSugarExpr(T *E) { ExprResult SubResult = Visit(E->getSubExpr()); if (SubResult.isInvalid()) return ExprError(); Expr *SubExpr = SubResult.get(); E->setSubExpr(SubExpr); E->setType(SubExpr->getType()); E->setValueKind(SubExpr->getValueKind()); assert(E->getObjectKind() == OK_Ordinary); return E; } ExprResult VisitParenExpr(ParenExpr *E) { return rebuildSugarExpr(E); } ExprResult VisitUnaryExtension(UnaryOperator *E) { return rebuildSugarExpr(E); } ExprResult VisitUnaryAddrOf(UnaryOperator *E) { const PointerType *Ptr = DestType->getAs(); if (!Ptr) { S.Diag(E->getOperatorLoc(), diag::err_unknown_any_addrof) << E->getSourceRange(); return ExprError(); } if (isa(E->getSubExpr())) { S.Diag(E->getOperatorLoc(), diag::err_unknown_any_addrof_call) << E->getSourceRange(); return ExprError(); } assert(E->getValueKind() == VK_RValue); assert(E->getObjectKind() == OK_Ordinary); E->setType(DestType); // Build the sub-expression as if it were an object of the pointee type. DestType = Ptr->getPointeeType(); ExprResult SubResult = Visit(E->getSubExpr()); if (SubResult.isInvalid()) return ExprError(); E->setSubExpr(SubResult.get()); return E; } ExprResult VisitImplicitCastExpr(ImplicitCastExpr *E); ExprResult resolveDecl(Expr *E, ValueDecl *VD); ExprResult VisitMemberExpr(MemberExpr *E) { return resolveDecl(E, E->getMemberDecl()); } ExprResult VisitDeclRefExpr(DeclRefExpr *E) { return resolveDecl(E, E->getDecl()); } }; } /// Rebuilds a call expression which yielded __unknown_anytype. ExprResult RebuildUnknownAnyExpr::VisitCallExpr(CallExpr *E) { Expr *CalleeExpr = E->getCallee(); enum FnKind { FK_MemberFunction, FK_FunctionPointer, FK_BlockPointer }; FnKind Kind; QualType CalleeType = CalleeExpr->getType(); if (CalleeType == S.Context.BoundMemberTy) { assert(isa(E) || isa(E)); Kind = FK_MemberFunction; CalleeType = Expr::findBoundMemberType(CalleeExpr); } else if (const PointerType *Ptr = CalleeType->getAs()) { CalleeType = Ptr->getPointeeType(); Kind = FK_FunctionPointer; } else { CalleeType = CalleeType->castAs()->getPointeeType(); Kind = FK_BlockPointer; } const FunctionType *FnType = CalleeType->castAs(); // Verify that this is a legal result type of a function. if (DestType->isArrayType() || DestType->isFunctionType()) { unsigned diagID = diag::err_func_returning_array_function; if (Kind == FK_BlockPointer) diagID = diag::err_block_returning_array_function; S.Diag(E->getExprLoc(), diagID) << DestType->isFunctionType() << DestType; return ExprError(); } // Otherwise, go ahead and set DestType as the call's result. E->setType(DestType.getNonLValueExprType(S.Context)); E->setValueKind(Expr::getValueKindForType(DestType)); assert(E->getObjectKind() == OK_Ordinary); // Rebuild the function type, replacing the result type with DestType. const FunctionProtoType *Proto = dyn_cast(FnType); if (Proto) { // __unknown_anytype(...) is a special case used by the debugger when // it has no idea what a function's signature is. // // We want to build this call essentially under the K&R // unprototyped rules, but making a FunctionNoProtoType in C++ // would foul up all sorts of assumptions. However, we cannot // simply pass all arguments as variadic arguments, nor can we // portably just call the function under a non-variadic type; see // the comment on IR-gen's TargetInfo::isNoProtoCallVariadic. // However, it turns out that in practice it is generally safe to // call a function declared as "A foo(B,C,D);" under the prototype // "A foo(B,C,D,...);". The only known exception is with the // Windows ABI, where any variadic function is implicitly cdecl // regardless of its normal CC. Therefore we change the parameter // types to match the types of the arguments. // // This is a hack, but it is far superior to moving the // corresponding target-specific code from IR-gen to Sema/AST. ArrayRef ParamTypes = Proto->getParamTypes(); SmallVector ArgTypes; if (ParamTypes.empty() && Proto->isVariadic()) { // the special case ArgTypes.reserve(E->getNumArgs()); for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { Expr *Arg = E->getArg(i); QualType ArgType = Arg->getType(); if (E->isLValue()) { ArgType = S.Context.getLValueReferenceType(ArgType); } else if (E->isXValue()) { ArgType = S.Context.getRValueReferenceType(ArgType); } ArgTypes.push_back(ArgType); } ParamTypes = ArgTypes; } DestType = S.Context.getFunctionType(DestType, ParamTypes, Proto->getExtProtoInfo()); } else { DestType = S.Context.getFunctionNoProtoType(DestType, FnType->getExtInfo()); } // Rebuild the appropriate pointer-to-function type. switch (Kind) { case FK_MemberFunction: // Nothing to do. break; case FK_FunctionPointer: DestType = S.Context.getPointerType(DestType); break; case FK_BlockPointer: DestType = S.Context.getBlockPointerType(DestType); break; } // Finally, we can recurse. ExprResult CalleeResult = Visit(CalleeExpr); if (!CalleeResult.isUsable()) return ExprError(); E->setCallee(CalleeResult.get()); // Bind a temporary if necessary. return S.MaybeBindToTemporary(E); } ExprResult RebuildUnknownAnyExpr::VisitObjCMessageExpr(ObjCMessageExpr *E) { // Verify that this is a legal result type of a call. if (DestType->isArrayType() || DestType->isFunctionType()) { S.Diag(E->getExprLoc(), diag::err_func_returning_array_function) << DestType->isFunctionType() << DestType; return ExprError(); } // Rewrite the method result type if available. if (ObjCMethodDecl *Method = E->getMethodDecl()) { assert(Method->getReturnType() == S.Context.UnknownAnyTy); Method->setReturnType(DestType); } // Change the type of the message. E->setType(DestType.getNonReferenceType()); E->setValueKind(Expr::getValueKindForType(DestType)); return S.MaybeBindToTemporary(E); } ExprResult RebuildUnknownAnyExpr::VisitImplicitCastExpr(ImplicitCastExpr *E) { // The only case we should ever see here is a function-to-pointer decay. if (E->getCastKind() == CK_FunctionToPointerDecay) { assert(E->getValueKind() == VK_RValue); assert(E->getObjectKind() == OK_Ordinary); E->setType(DestType); // Rebuild the sub-expression as the pointee (function) type. DestType = DestType->castAs()->getPointeeType(); ExprResult Result = Visit(E->getSubExpr()); if (!Result.isUsable()) return ExprError(); E->setSubExpr(Result.get()); return E; } else if (E->getCastKind() == CK_LValueToRValue) { assert(E->getValueKind() == VK_RValue); assert(E->getObjectKind() == OK_Ordinary); assert(isa(E->getType())); E->setType(DestType); // The sub-expression has to be a lvalue reference, so rebuild it as such. DestType = S.Context.getLValueReferenceType(DestType); ExprResult Result = Visit(E->getSubExpr()); if (!Result.isUsable()) return ExprError(); E->setSubExpr(Result.get()); return E; } else { llvm_unreachable("Unhandled cast type!"); } } ExprResult RebuildUnknownAnyExpr::resolveDecl(Expr *E, ValueDecl *VD) { ExprValueKind ValueKind = VK_LValue; QualType Type = DestType; // We know how to make this work for certain kinds of decls: // - functions if (FunctionDecl *FD = dyn_cast(VD)) { if (const PointerType *Ptr = Type->getAs()) { DestType = Ptr->getPointeeType(); ExprResult Result = resolveDecl(E, VD); if (Result.isInvalid()) return ExprError(); return S.ImpCastExprToType(Result.get(), Type, CK_FunctionToPointerDecay, VK_RValue); } if (!Type->isFunctionType()) { S.Diag(E->getExprLoc(), diag::err_unknown_any_function) << VD << E->getSourceRange(); return ExprError(); } if (const FunctionProtoType *FT = Type->getAs()) { // We must match the FunctionDecl's type to the hack introduced in // RebuildUnknownAnyExpr::VisitCallExpr to vararg functions of unknown // type. See the lengthy commentary in that routine. QualType FDT = FD->getType(); const FunctionType *FnType = FDT->castAs(); const FunctionProtoType *Proto = dyn_cast_or_null(FnType); DeclRefExpr *DRE = dyn_cast(E); if (DRE && Proto && Proto->getParamTypes().empty() && Proto->isVariadic()) { SourceLocation Loc = FD->getLocation(); FunctionDecl *NewFD = FunctionDecl::Create(S.Context, FD->getDeclContext(), Loc, Loc, FD->getNameInfo().getName(), DestType, FD->getTypeSourceInfo(), SC_None, false/*isInlineSpecified*/, FD->hasPrototype(), false/*isConstexprSpecified*/); if (FD->getQualifier()) NewFD->setQualifierInfo(FD->getQualifierLoc()); SmallVector Params; for (const auto &AI : FT->param_types()) { ParmVarDecl *Param = S.BuildParmVarDeclForTypedef(FD, Loc, AI); Param->setScopeInfo(0, Params.size()); Params.push_back(Param); } NewFD->setParams(Params); DRE->setDecl(NewFD); VD = DRE->getDecl(); } } if (CXXMethodDecl *MD = dyn_cast(FD)) if (MD->isInstance()) { ValueKind = VK_RValue; Type = S.Context.BoundMemberTy; } // Function references aren't l-values in C. if (!S.getLangOpts().CPlusPlus) ValueKind = VK_RValue; // - variables } else if (isa(VD)) { if (const ReferenceType *RefTy = Type->getAs()) { Type = RefTy->getPointeeType(); } else if (Type->isFunctionType()) { S.Diag(E->getExprLoc(), diag::err_unknown_any_var_function_type) << VD << E->getSourceRange(); return ExprError(); } // - nothing else } else { S.Diag(E->getExprLoc(), diag::err_unsupported_unknown_any_decl) << VD << E->getSourceRange(); return ExprError(); } // Modifying the declaration like this is friendly to IR-gen but // also really dangerous. VD->setType(DestType); E->setType(Type); E->setValueKind(ValueKind); return E; } /// Check a cast of an unknown-any type. We intentionally only /// trigger this for C-style casts. ExprResult Sema::checkUnknownAnyCast(SourceRange TypeRange, QualType CastType, Expr *CastExpr, CastKind &CastKind, ExprValueKind &VK, CXXCastPath &Path) { // The type we're casting to must be either void or complete. if (!CastType->isVoidType() && RequireCompleteType(TypeRange.getBegin(), CastType, diag::err_typecheck_cast_to_incomplete)) return ExprError(); // Rewrite the casted expression from scratch. ExprResult result = RebuildUnknownAnyExpr(*this, CastType).Visit(CastExpr); if (!result.isUsable()) return ExprError(); CastExpr = result.get(); VK = CastExpr->getValueKind(); CastKind = CK_NoOp; return CastExpr; } ExprResult Sema::forceUnknownAnyToType(Expr *E, QualType ToType) { return RebuildUnknownAnyExpr(*this, ToType).Visit(E); } ExprResult Sema::checkUnknownAnyArg(SourceLocation callLoc, Expr *arg, QualType ¶mType) { // If the syntactic form of the argument is not an explicit cast of // any sort, just do default argument promotion. ExplicitCastExpr *castArg = dyn_cast(arg->IgnoreParens()); if (!castArg) { ExprResult result = DefaultArgumentPromotion(arg); if (result.isInvalid()) return ExprError(); paramType = result.get()->getType(); return result; } // Otherwise, use the type that was written in the explicit cast. assert(!arg->hasPlaceholderType()); paramType = castArg->getTypeAsWritten(); // Copy-initialize a parameter of that type. InitializedEntity entity = InitializedEntity::InitializeParameter(Context, paramType, /*consumed*/ false); return PerformCopyInitialization(entity, callLoc, arg); } static ExprResult diagnoseUnknownAnyExpr(Sema &S, Expr *E) { Expr *orig = E; unsigned diagID = diag::err_uncasted_use_of_unknown_any; while (true) { E = E->IgnoreParenImpCasts(); if (CallExpr *call = dyn_cast(E)) { E = call->getCallee(); diagID = diag::err_uncasted_call_of_unknown_any; } else { break; } } SourceLocation loc; NamedDecl *d; if (DeclRefExpr *ref = dyn_cast(E)) { loc = ref->getLocation(); d = ref->getDecl(); } else if (MemberExpr *mem = dyn_cast(E)) { loc = mem->getMemberLoc(); d = mem->getMemberDecl(); } else if (ObjCMessageExpr *msg = dyn_cast(E)) { diagID = diag::err_uncasted_call_of_unknown_any; loc = msg->getSelectorStartLoc(); d = msg->getMethodDecl(); if (!d) { S.Diag(loc, diag::err_uncasted_send_to_unknown_any_method) << static_cast(msg->isClassMessage()) << msg->getSelector() << orig->getSourceRange(); return ExprError(); } } else { S.Diag(E->getExprLoc(), diag::err_unsupported_unknown_any_expr) << E->getSourceRange(); return ExprError(); } S.Diag(loc, diagID) << d << orig->getSourceRange(); // Never recoverable. return ExprError(); } /// Check for operands with placeholder types and complain if found. /// Returns ExprError() if there was an error and no recovery was possible. ExprResult Sema::CheckPlaceholderExpr(Expr *E) { if (!getLangOpts().CPlusPlus) { // C cannot handle TypoExpr nodes on either side of a binop because it // doesn't handle dependent types properly, so make sure any TypoExprs have // been dealt with before checking the operands. ExprResult Result = CorrectDelayedTyposInExpr(E); if (!Result.isUsable()) return ExprError(); E = Result.get(); } const BuiltinType *placeholderType = E->getType()->getAsPlaceholderType(); if (!placeholderType) return E; switch (placeholderType->getKind()) { // Overloaded expressions. case BuiltinType::Overload: { // Try to resolve a single function template specialization. // This is obligatory. ExprResult Result = E; if (ResolveAndFixSingleFunctionTemplateSpecialization(Result, false)) return Result; // No guarantees that ResolveAndFixSingleFunctionTemplateSpecialization // leaves Result unchanged on failure. Result = E; if (resolveAndFixAddressOfOnlyViableOverloadCandidate(Result)) return Result; // If that failed, try to recover with a call. tryToRecoverWithCall(Result, PDiag(diag::err_ovl_unresolvable), /*complain*/ true); return Result; } // Bound member functions. case BuiltinType::BoundMember: { ExprResult result = E; const Expr *BME = E->IgnoreParens(); PartialDiagnostic PD = PDiag(diag::err_bound_member_function); // Try to give a nicer diagnostic if it is a bound member that we recognize. if (isa(BME)) { PD = PDiag(diag::err_dtor_expr_without_call) << /*pseudo-destructor*/ 1; } else if (const auto *ME = dyn_cast(BME)) { if (ME->getMemberNameInfo().getName().getNameKind() == DeclarationName::CXXDestructorName) PD = PDiag(diag::err_dtor_expr_without_call) << /*destructor*/ 0; } tryToRecoverWithCall(result, PD, /*complain*/ true); return result; } // ARC unbridged casts. case BuiltinType::ARCUnbridgedCast: { Expr *realCast = stripARCUnbridgedCast(E); diagnoseARCUnbridgedCast(realCast); return realCast; } // Expressions of unknown type. case BuiltinType::UnknownAny: return diagnoseUnknownAnyExpr(*this, E); // Pseudo-objects. case BuiltinType::PseudoObject: return checkPseudoObjectRValue(E); case BuiltinType::BuiltinFn: { // Accept __noop without parens by implicitly converting it to a call expr. auto *DRE = dyn_cast(E->IgnoreParenImpCasts()); if (DRE) { auto *FD = cast(DRE->getDecl()); if (FD->getBuiltinID() == Builtin::BI__noop) { E = ImpCastExprToType(E, Context.getPointerType(FD->getType()), CK_BuiltinFnToFnPtr) .get(); return CallExpr::Create(Context, E, /*Args=*/{}, Context.IntTy, VK_RValue, SourceLocation()); } } Diag(E->getBeginLoc(), diag::err_builtin_fn_use); return ExprError(); } // Expressions of unknown type. case BuiltinType::OMPArraySection: Diag(E->getBeginLoc(), diag::err_omp_array_section_use); return ExprError(); // Everything else should be impossible. #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \ case BuiltinType::Id: #include "clang/Basic/OpenCLImageTypes.def" #define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \ case BuiltinType::Id: #include "clang/Basic/OpenCLExtensionTypes.def" #define BUILTIN_TYPE(Id, SingletonId) case BuiltinType::Id: #define PLACEHOLDER_TYPE(Id, SingletonId) #include "clang/AST/BuiltinTypes.def" break; } llvm_unreachable("invalid placeholder type!"); } bool Sema::CheckCaseExpression(Expr *E) { if (E->isTypeDependent()) return true; if (E->isValueDependent() || E->isIntegerConstantExpr(Context)) return E->getType()->isIntegralOrEnumerationType(); return false; } /// ActOnObjCBoolLiteral - Parse {__objc_yes,__objc_no} literals. ExprResult Sema::ActOnObjCBoolLiteral(SourceLocation OpLoc, tok::TokenKind Kind) { assert((Kind == tok::kw___objc_yes || Kind == tok::kw___objc_no) && "Unknown Objective-C Boolean value!"); QualType BoolT = Context.ObjCBuiltinBoolTy; if (!Context.getBOOLDecl()) { LookupResult Result(*this, &Context.Idents.get("BOOL"), OpLoc, Sema::LookupOrdinaryName); if (LookupName(Result, getCurScope()) && Result.isSingleResult()) { NamedDecl *ND = Result.getFoundDecl(); if (TypedefDecl *TD = dyn_cast(ND)) Context.setBOOLDecl(TD); } } if (Context.getBOOLDecl()) BoolT = Context.getBOOLType(); return new (Context) ObjCBoolLiteralExpr(Kind == tok::kw___objc_yes, BoolT, OpLoc); } ExprResult Sema::ActOnObjCAvailabilityCheckExpr( llvm::ArrayRef AvailSpecs, SourceLocation AtLoc, SourceLocation RParen) { StringRef Platform = getASTContext().getTargetInfo().getPlatformName(); auto Spec = std::find_if(AvailSpecs.begin(), AvailSpecs.end(), [&](const AvailabilitySpec &Spec) { return Spec.getPlatform() == Platform; }); VersionTuple Version; if (Spec != AvailSpecs.end()) Version = Spec->getVersion(); // The use of `@available` in the enclosing function should be analyzed to // warn when it's used inappropriately (i.e. not if(@available)). if (getCurFunctionOrMethodDecl()) getEnclosingFunction()->HasPotentialAvailabilityViolations = true; else if (getCurBlock() || getCurLambda()) getCurFunction()->HasPotentialAvailabilityViolations = true; return new (Context) ObjCAvailabilityCheckExpr(Version, AtLoc, RParen, Context.BoolTy); } Index: projects/clang800-import/contrib/llvm/tools/clang =================================================================== --- projects/clang800-import/contrib/llvm/tools/clang (revision 344547) +++ projects/clang800-import/contrib/llvm/tools/clang (revision 344548) Property changes on: projects/clang800-import/contrib/llvm/tools/clang ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /vendor/clang/dist-release_80:r344177-344546 Index: projects/clang800-import/contrib/llvm/tools/lld/ELF/Options.td =================================================================== --- projects/clang800-import/contrib/llvm/tools/lld/ELF/Options.td (revision 344547) +++ projects/clang800-import/contrib/llvm/tools/lld/ELF/Options.td (revision 344548) @@ -1,521 +1,521 @@ include "llvm/Option/OptParser.td" // For options whose names are multiple letters, either one dash or // two can precede the option name except those that start with 'o'. class F: Flag<["--", "-"], name>; class J: Joined<["--", "-"], name>; multiclass Eq { def NAME: Separate<["--", "-"], name>; def NAME # _eq: Joined<["--", "-"], name # "=">, Alias(NAME)>, HelpText; } multiclass B { def NAME: Flag<["--", "-"], name>, HelpText; def no_ # NAME: Flag<["--", "-"], "no-" # name>, HelpText; } defm auxiliary: Eq<"auxiliary", "Set DT_AUXILIARY field to the specified name">; def Bsymbolic: F<"Bsymbolic">, HelpText<"Bind defined symbols locally">; def Bsymbolic_functions: F<"Bsymbolic-functions">, HelpText<"Bind defined function symbols locally">; def Bdynamic: F<"Bdynamic">, HelpText<"Link against shared libraries (default)">; def Bstatic: F<"Bstatic">, HelpText<"Do not link against shared libraries">; def build_id: F<"build-id">, HelpText<"Alias for --build-id=fast">; def build_id_eq: J<"build-id=">, HelpText<"Generate build ID note">, - MetaVarName<"[fast,md5,sha,uuid,0x]">; + MetaVarName<"[fast,md5,sha1,uuid,0x]">; defm check_sections: B<"check-sections", "Check section addresses for overlaps (default)", "Do not check section addresses for overlaps">; defm compress_debug_sections: Eq<"compress-debug-sections", "Compress DWARF debug sections">, MetaVarName<"[none,zlib]">; defm defsym: Eq<"defsym", "Define a symbol alias">, MetaVarName<"=">; defm split_stack_adjust_size : Eq<"split-stack-adjust-size", "Specify adjustment to stack size when a split-stack function calls a " "non-split-stack function">, MetaVarName<"">; defm library_path: Eq<"library-path", "Add a directory to the library search path">, MetaVarName<"

">; def O: JoinedOrSeparate<["-"], "O">, HelpText<"Optimize output file size">; defm Tbss: Eq<"Tbss", "Same as --section-start with .bss as the sectionname">; defm Tdata: Eq<"Tdata", "Same as --section-start with .data as the sectionname">; defm Ttext: Eq<"Ttext", "Same as --section-start with .text as the sectionname">; defm allow_multiple_definition: B<"allow-multiple-definition", "Allow multiple definitions", "Do not allow multiple definitions (default)">; defm apply_dynamic_relocs: B<"apply-dynamic-relocs", "Apply link-time values for dynamic relocations", "Do not apply link-time values for dynamic relocations (default)">; defm as_needed: B<"as-needed", "Only set DT_NEEDED for shared libraries if used", "Always set DT_NEEDED for shared libraries (default)">; defm call_graph_ordering_file: Eq<"call-graph-ordering-file", "Layout sections to optimize the given callgraph">; defm call_graph_profile_sort: B<"call-graph-profile-sort", "Reorder sections with call graph profile (default)", "Do not reorder sections with call graph profile">; // -chroot doesn't have a help text because it is an internal option. def chroot: Separate<["--", "-"], "chroot">; def color_diagnostics: F<"color-diagnostics">, HelpText<"Alias for --color-diagnostics=always">; def color_diagnostics_eq: J<"color-diagnostics=">, HelpText<"Use colors in diagnostics">, MetaVarName<"[auto,always,never]">; defm cref: B<"cref", "Output cross reference table", "Do not output cross reference table">; defm define_common: B<"define-common", "Assign space to common symbols", "Do not assign space to common symbols">; defm demangle: B<"demangle", "Demangle symbol names (default)", "Do not demangle symbol names">; def disable_new_dtags: F<"disable-new-dtags">, HelpText<"Disable new dynamic tags">; def discard_all: F<"discard-all">, HelpText<"Delete all local symbols">; def discard_locals: F<"discard-locals">, HelpText<"Delete temporary local symbols">; def discard_none: F<"discard-none">, HelpText<"Keep all symbols in the symbol table">; defm dynamic_linker: Eq<"dynamic-linker", "Which dynamic linker to use">; defm dynamic_list: Eq<"dynamic-list", "Read a list of dynamic symbols">; defm eh_frame_hdr: B<"eh-frame-hdr", "Request creation of .eh_frame_hdr section and PT_GNU_EH_FRAME segment header", "Do not create .eh_frame_hdr section">; def emit_relocs: F<"emit-relocs">, HelpText<"Generate relocations in output">; def enable_new_dtags: F<"enable-new-dtags">, HelpText<"Enable new dynamic tags (default)">; def end_group: F<"end-group">, HelpText<"Ignored for compatibility with GNU unless you pass --warn-backrefs">; def end_lib: F<"end-lib">, HelpText<"End a grouping of objects that should be treated as if they were together in an archive">; defm entry: Eq<"entry", "Name of entry point symbol">, MetaVarName<"">; defm error_limit: Eq<"error-limit", "Maximum number of errors to emit before stopping (0 = no limit)">; def error_unresolved_symbols: F<"error-unresolved-symbols">, HelpText<"Report unresolved symbols as errors">; defm exclude_libs: Eq<"exclude-libs", "Exclude static libraries from automatic export">; defm execute_only: B<"execute-only", "Mark executable sections unreadable", "Mark executable sections readable (default)">; defm export_dynamic: B<"export-dynamic", "Put symbols in the dynamic symbol table", "Do not put symbols in the dynamic symbol table (default)">; defm export_dynamic_symbol: Eq<"export-dynamic-symbol", "Put a symbol in the dynamic symbol table">; defm fatal_warnings: B<"fatal-warnings", "Treat warnings as errors", "Do not treat warnings as errors (default)">; defm filter: Eq<"filter", "Set DT_FILTER field to the specified name">; defm fini: Eq<"fini", "Specify a finalizer function">, MetaVarName<"">; def fix_cortex_a53_843419: F<"fix-cortex-a53-843419">, HelpText<"Apply fixes for AArch64 Cortex-A53 erratum 843419">; defm format: Eq<"format", "Change the input format of the inputs following this option">, MetaVarName<"[default,elf,binary]">; defm gc_sections: B<"gc-sections", "Enable garbage collection of unused sections", "Disable garbage collection of unused sections (default)">; defm gdb_index: B<"gdb-index", "Generate .gdb_index section", "Do not generate .gdb_index section (default)">; defm gnu_unique: B<"gnu-unique", "Enable STB_GNU_UNIQUE symbol binding (default)", "Disable STB_GNU_UNIQUE symbol binding">; defm hash_style: Eq<"hash-style", "Specify hash style (sysv, gnu or both)">; def help: F<"help">, HelpText<"Print option help">; def icf_all: F<"icf=all">, HelpText<"Enable identical code folding">; def icf_safe: F<"icf=safe">, HelpText<"Enable safe identical code folding">; def icf_none: F<"icf=none">, HelpText<"Disable identical code folding (default)">; def ignore_function_address_equality: F<"ignore-function-address-equality">, HelpText<"lld can break the address equality of functions">; def ignore_data_address_equality: F<"ignore-data-address-equality">, HelpText<"lld can break the address equality of data">; defm image_base: Eq<"image-base", "Set the base address">; defm init: Eq<"init", "Specify an initializer function">, MetaVarName<"">; defm just_symbols: Eq<"just-symbols", "Just link symbols">; defm keep_unique: Eq<"keep-unique", "Do not fold this symbol during ICF">; defm library: Eq<"library", "Root name of library to use">, MetaVarName<"">; def m: JoinedOrSeparate<["-"], "m">, HelpText<"Set target emulation">; defm Map: Eq<"Map", "Print a link map to the specified file">; defm merge_exidx_entries: B<"merge-exidx-entries", "Enable merging .ARM.exidx entries (default)", "Disable merging .ARM.exidx entries">; def nostdlib: F<"nostdlib">, HelpText<"Only search directories specified on the command line">; def no_color_diagnostics: F<"no-color-diagnostics">, HelpText<"Do not use colors in diagnostics">; def no_dynamic_linker: F<"no-dynamic-linker">, HelpText<"Inhibit output of .interp section">; def noinhibit_exec: F<"noinhibit-exec">, HelpText<"Retain the executable output file whenever it is still usable">; def no_omagic: F<"no-omagic">, MetaVarName<"">, HelpText<"Do not set the text data sections to be writable">; def no_rosegment: F<"no-rosegment">, HelpText<"Do not put read-only non-executable sections in their own segment">; def no_undefined: F<"no-undefined">, HelpText<"Report unresolved symbols even if the linker is creating a shared library">; def o: JoinedOrSeparate<["-"], "o">, MetaVarName<"">, HelpText<"Path to file to write output">; def oformat: Separate<["--"], "oformat">, MetaVarName<"">, HelpText<"Specify the binary format for the output object file">; def omagic: Flag<["--"], "omagic">, MetaVarName<"">, HelpText<"Set the text and data sections to be readable and writable">; defm orphan_handling: Eq<"orphan-handling", "Control how orphan sections are handled when linker script used">; defm pack_dyn_relocs: Eq<"pack-dyn-relocs", "Pack dynamic relocations in the given format">, MetaVarName<"[none,android,relr,android+relr]">; defm use_android_relr_tags: B<"use-android-relr-tags", "Use SHT_ANDROID_RELR / DT_ANDROID_RELR* tags instead of SHT_RELR / DT_RELR*", "Use SHT_RELR / DT_RELR* tags (default)">; def pic_veneer: F<"pic-veneer">, HelpText<"Always generate position independent thunks (veneers)">; defm pie: B<"pie", "Create a position independent executable", "Do not create a position independent executable (default)">; defm print_gc_sections: B<"print-gc-sections", "List removed unused sections", "Do not list removed unused sections (default)">; defm print_icf_sections: B<"print-icf-sections", "List identical folded sections", "Do not list identical folded sections (default)">; def pop_state: F<"pop-state">, HelpText<"Undo the effect of -push-state">; def push_state: F<"push-state">, HelpText<"Save the current state of -as-needed, -static and -whole-archive">; def print_map: F<"print-map">, HelpText<"Print a link map to the standard output">; defm reproduce: Eq<"reproduce", "Dump linker invocation and input files for debugging">; defm rpath: Eq<"rpath", "Add a DT_RUNPATH to the output">; def relocatable: F<"relocatable">, HelpText<"Create relocatable object file">; defm retain_symbols_file: Eq<"retain-symbols-file", "Retain only the symbols listed in the file">, MetaVarName<"">; defm script: Eq<"script", "Read linker script">; defm section_start: Eq<"section-start", "Set address of section">, MetaVarName<"
">; def shared: F<"shared">, HelpText<"Build a shared object">; defm soname: Eq<"soname", "Set DT_SONAME">; defm sort_section: Eq<"sort-section", "Specifies sections sorting rule when linkerscript is used">; def start_group: F<"start-group">, HelpText<"Ignored for compatibility with GNU unless you pass --warn-backrefs">; def start_lib: F<"start-lib">, HelpText<"Start a grouping of objects that should be treated as if they were together in an archive">; def strip_all: F<"strip-all">, HelpText<"Strip all symbols">; def strip_debug: F<"strip-debug">, HelpText<"Strip debugging information">; defm symbol_ordering_file: Eq<"symbol-ordering-file", "Layout sections to place symbols in the order specified by symbol ordering file">; defm sysroot: Eq<"sysroot", "Set the system root">; def target1_rel: F<"target1-rel">, HelpText<"Interpret R_ARM_TARGET1 as R_ARM_REL32">; def target1_abs: F<"target1-abs">, HelpText<"Interpret R_ARM_TARGET1 as R_ARM_ABS32 (default)">; defm target2: Eq<"target2", "Interpret R_ARM_TARGET2 as , where is one of rel, abs, or got-rel">, MetaVarName<"">; defm threads: B<"threads", "Run the linker multi-threaded (default)", "Do not run the linker multi-threaded">; defm toc_optimize : B<"toc-optimize", "(PowerPC64) Enable TOC related optimizations (default)", "(PowerPC64) Disable TOC related optimizations">; def trace: F<"trace">, HelpText<"Print the names of the input files">; defm trace_symbol: Eq<"trace-symbol", "Trace references to symbols">; defm undefined: Eq<"undefined", "Force undefined symbol during linking">, MetaVarName<"">; defm unresolved_symbols: Eq<"unresolved-symbols", "Determine how to handle unresolved symbols">; defm undefined_version: B<"undefined-version", "Allow unused version in version script (default)", "Report version scripts that refer undefined symbols">; defm rsp_quoting: Eq<"rsp-quoting", "Quoting style for response files">, MetaVarName<"[posix,windows]">; def v: Flag<["-"], "v">, HelpText<"Display the version number">; def verbose: F<"verbose">, HelpText<"Verbose mode">; def version: F<"version">, HelpText<"Display the version number and exit">; defm version_script: Eq<"version-script", "Read a version script">; defm warn_backrefs: B<"warn-backrefs", "Warn about backward symbol references to fetch archive members", "Do not warn about backward symbol references to fetch archive members (default)">; defm warn_common: B<"warn-common", "Warn about duplicate common symbols", "Do not warn about duplicate common symbols (default)">; defm warn_ifunc_textrel: B<"warn-ifunc-textrel", "Warn about using ifunc symbols with text relocations", "Do not warn about using ifunc symbols with text relocations (default)">; defm warn_symbol_ordering: B<"warn-symbol-ordering", "Warn about problems with the symbol ordering file (default)", "Do not warn about problems with the symbol ordering file">; def warn_unresolved_symbols: F<"warn-unresolved-symbols">, HelpText<"Report unresolved symbols as warnings">; defm whole_archive: B<"whole-archive", "Force load of all members in a static library", "Do not force load of all members in a static library (default)">; defm wrap: Eq<"wrap", "Use wrapper functions for symbol">, MetaVarName<"=">; def z: JoinedOrSeparate<["-"], "z">, MetaVarName<"