Index: head/contrib/llvm/lib/Target/PowerPC/PPC.td =================================================================== --- head/contrib/llvm/lib/Target/PowerPC/PPC.td (revision 309148) +++ head/contrib/llvm/lib/Target/PowerPC/PPC.td (revision 309149) @@ -1,453 +1,470 @@ //===-- PPC.td - Describe the PowerPC Target Machine -------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This is the top level entry point for the PowerPC target. // //===----------------------------------------------------------------------===// // Get the target-independent interfaces which we are implementing. // include "llvm/Target/Target.td" //===----------------------------------------------------------------------===// // PowerPC Subtarget features. // //===----------------------------------------------------------------------===// // CPU Directives // //===----------------------------------------------------------------------===// def Directive440 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_440", "">; def Directive601 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_601", "">; def Directive602 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_602", "">; def Directive603 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">; def Directive604 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">; def Directive620 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">; def Directive7400: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_7400", "">; def Directive750 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_750", "">; def Directive970 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_970", "">; def Directive32 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">; def Directive64 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">; def DirectiveA2 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_A2", "">; def DirectiveE500mc : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_E500mc", "">; def DirectiveE5500 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_E5500", "">; def DirectivePwr3: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR3", "">; def DirectivePwr4: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR4", "">; def DirectivePwr5: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5", "">; def DirectivePwr5x : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5X", "">; def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">; def DirectivePwr6x : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6X", "">; def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">; def DirectivePwr8: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR8", "">; def DirectivePwr9: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR9", "">; def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true", "Enable 64-bit instructions">; -def FeatureSoftFloat : SubtargetFeature<"soft-float", "UseSoftFloat", "true", - "Use software emulation for floating point">; +def FeatureHardFloat : SubtargetFeature<"hard-float", "HasHardFloat", "true", + "Enable floating-point instructions">; def Feature64BitRegs : SubtargetFeature<"64bitregs","Use64BitRegs", "true", "Enable 64-bit registers usage for ppc32 [beta]">; def FeatureCRBits : SubtargetFeature<"crbits", "UseCRBits", "true", "Use condition-register bits individually">; def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true", - "Enable Altivec instructions">; + "Enable Altivec instructions", + [FeatureHardFloat]>; def FeatureSPE : SubtargetFeature<"spe","HasSPE", "true", - "Enable SPE instructions">; + "Enable SPE instructions", + [FeatureHardFloat]>; def FeatureMFOCRF : SubtargetFeature<"mfocrf","HasMFOCRF", "true", "Enable the MFOCRF instruction">; def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true", - "Enable the fsqrt instruction">; + "Enable the fsqrt instruction", + [FeatureHardFloat]>; def FeatureFCPSGN : SubtargetFeature<"fcpsgn", "HasFCPSGN", "true", - "Enable the fcpsgn instruction">; + "Enable the fcpsgn instruction", + [FeatureHardFloat]>; def FeatureFRE : SubtargetFeature<"fre", "HasFRE", "true", - "Enable the fre instruction">; + "Enable the fre instruction", + [FeatureHardFloat]>; def FeatureFRES : SubtargetFeature<"fres", "HasFRES", "true", - "Enable the fres instruction">; + "Enable the fres instruction", + [FeatureHardFloat]>; def FeatureFRSQRTE : SubtargetFeature<"frsqrte", "HasFRSQRTE", "true", - "Enable the frsqrte instruction">; + "Enable the frsqrte instruction", + [FeatureHardFloat]>; def FeatureFRSQRTES : SubtargetFeature<"frsqrtes", "HasFRSQRTES", "true", - "Enable the frsqrtes instruction">; + "Enable the frsqrtes instruction", + [FeatureHardFloat]>; def FeatureRecipPrec : SubtargetFeature<"recipprec", "HasRecipPrec", "true", "Assume higher precision reciprocal estimates">; def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true", - "Enable the stfiwx instruction">; + "Enable the stfiwx instruction", + [FeatureHardFloat]>; def FeatureLFIWAX : SubtargetFeature<"lfiwax","HasLFIWAX", "true", - "Enable the lfiwax instruction">; + "Enable the lfiwax instruction", + [FeatureHardFloat]>; def FeatureFPRND : SubtargetFeature<"fprnd", "HasFPRND", "true", - "Enable the fri[mnpz] instructions">; + "Enable the fri[mnpz] instructions", + [FeatureHardFloat]>; def FeatureFPCVT : SubtargetFeature<"fpcvt", "HasFPCVT", "true", - "Enable fc[ft]* (unsigned and single-precision) and lfiwzx instructions">; + "Enable fc[ft]* (unsigned and single-precision) and lfiwzx instructions", + [FeatureHardFloat]>; def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true", "Enable the isel instruction">; def FeatureBPERMD : SubtargetFeature<"bpermd", "HasBPERMD", "true", "Enable the bpermd instruction">; def FeatureExtDiv : SubtargetFeature<"extdiv", "HasExtDiv", "true", "Enable extended divide instructions">; def FeatureLDBRX : SubtargetFeature<"ldbrx","HasLDBRX", "true", "Enable the ldbrx instruction">; def FeatureCMPB : SubtargetFeature<"cmpb", "HasCMPB", "true", "Enable the cmpb instruction">; def FeatureICBT : SubtargetFeature<"icbt","HasICBT", "true", "Enable icbt instruction">; def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true", "Enable Book E instructions", [FeatureICBT]>; def FeatureMSYNC : SubtargetFeature<"msync", "HasOnlyMSYNC", "true", "Has only the msync instruction instead of sync", [FeatureBookE]>; def FeatureE500 : SubtargetFeature<"e500", "IsE500", "true", "Enable E500/E500mc instructions">; def FeaturePPC4xx : SubtargetFeature<"ppc4xx", "IsPPC4xx", "true", "Enable PPC 4xx instructions">; def FeaturePPC6xx : SubtargetFeature<"ppc6xx", "IsPPC6xx", "true", "Enable PPC 6xx instructions">; def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true", - "Enable QPX instructions">; + "Enable QPX instructions", + [FeatureHardFloat]>; def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true", "Enable VSX instructions", [FeatureAltivec]>; def FeatureP8Altivec : SubtargetFeature<"power8-altivec", "HasP8Altivec", "true", "Enable POWER8 Altivec instructions", [FeatureAltivec]>; def FeatureP8Crypto : SubtargetFeature<"crypto", "HasP8Crypto", "true", "Enable POWER8 Crypto instructions", [FeatureP8Altivec]>; def FeatureP8Vector : SubtargetFeature<"power8-vector", "HasP8Vector", "true", "Enable POWER8 vector instructions", [FeatureVSX, FeatureP8Altivec]>; def FeatureDirectMove : SubtargetFeature<"direct-move", "HasDirectMove", "true", "Enable Power8 direct move instructions", [FeatureVSX]>; def FeaturePartwordAtomic : SubtargetFeature<"partword-atomics", "HasPartwordAtomics", "true", "Enable l[bh]arx and st[bh]cx.">; def FeatureInvariantFunctionDescriptors : SubtargetFeature<"invariant-function-descriptors", "HasInvariantFunctionDescriptors", "true", "Assume function descriptors are invariant">; def FeatureLongCall : SubtargetFeature<"longcall", "UseLongCalls", "true", "Always use indirect calls">; def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true", "Enable Hardware Transactional Memory instructions">; def FeatureMFTB : SubtargetFeature<"", "FeatureMFTB", "true", "Implement mftb using the mfspr instruction">; def FeatureFusion : SubtargetFeature<"fusion", "HasFusion", "true", "Target supports add/load integer fusion.">; def FeatureFloat128 : SubtargetFeature<"float128", "HasFloat128", "true", "Enable the __float128 data type for IEEE-754R Binary128.", [FeatureVSX]>; def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD", "POPCNTD_Fast", "Enable the popcnt[dw] instructions">; // Note that for the a2/a2q processor models we should not use popcnt[dw] by // default. These processors do support the instructions, but they're // microcoded, and the software emulation is about twice as fast. def FeatureSlowPOPCNTD : SubtargetFeature<"slow-popcntd","HasPOPCNTD", "POPCNTD_Slow", "Has slow popcnt[dw] instructions">; def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true", "Treat vector data stream cache control instructions as deprecated">; def FeatureISA3_0 : SubtargetFeature<"isa-v30-instructions", "IsISA3_0", "true", "Enable instructions added in ISA 3.0.">; def FeatureP9Altivec : SubtargetFeature<"power9-altivec", "HasP9Altivec", "true", "Enable POWER9 Altivec instructions", [FeatureISA3_0, FeatureP8Altivec]>; def FeatureP9Vector : SubtargetFeature<"power9-vector", "HasP9Vector", "true", "Enable POWER9 vector instructions", [FeatureISA3_0, FeatureP8Vector, FeatureP9Altivec]>; // Since new processors generally contain a superset of features of those that // came before them, the idea is to make implementations of new processors // less error prone and easier to read. // Namely: // list Power8FeatureList = ... // list FutureProcessorSpecificFeatureList = // [ features that Power8 does not support ] // list FutureProcessorFeatureList = // !listconcat(Power8FeatureList, FutureProcessorSpecificFeatureList) // Makes it explicit and obvious what is new in FutureProcesor vs. Power8 as // well as providing a single point of definition if the feature set will be // used elsewhere. def ProcessorFeatures { list Power7FeatureList = [DirectivePwr7, FeatureAltivec, FeatureVSX, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureFPRND, FeatureFPCVT, FeatureISEL, FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, Feature64Bit /*, Feature64BitRegs */, FeatureBPERMD, FeatureExtDiv, FeatureMFTB, DeprecatedDST]; list Power8SpecificFeatures = [DirectivePwr8, FeatureP8Altivec, FeatureP8Vector, FeatureP8Crypto, FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic, FeatureFusion]; list Power8FeatureList = !listconcat(Power7FeatureList, Power8SpecificFeatures); list Power9SpecificFeatures = [FeatureP9Altivec, FeatureP9Vector, FeatureISA3_0]; list Power9FeatureList = !listconcat(Power8FeatureList, Power9SpecificFeatures); } // Note: Future features to add when support is extended to more // recent ISA levels: // // DFP p6, p6x, p7 decimal floating-point instructions // POPCNTB p5 through p7 popcntb and related instructions //===----------------------------------------------------------------------===// // Classes used for relation maps. //===----------------------------------------------------------------------===// // RecFormRel - Filter class used to relate non-record-form instructions with // their record-form variants. class RecFormRel; // AltVSXFMARel - Filter class used to relate the primary addend-killing VSX // FMA instruction forms with their corresponding factor-killing forms. class AltVSXFMARel { bit IsVSXFMAAlt = 0; } //===----------------------------------------------------------------------===// // Relation Map Definitions. //===----------------------------------------------------------------------===// def getRecordFormOpcode : InstrMapping { let FilterClass = "RecFormRel"; // Instructions with the same BaseName and Interpretation64Bit values // form a row. let RowFields = ["BaseName", "Interpretation64Bit"]; // Instructions with the same RC value form a column. let ColFields = ["RC"]; // The key column are the non-record-form instructions. let KeyCol = ["0"]; // Value columns RC=1 let ValueCols = [["1"]]; } def getNonRecordFormOpcode : InstrMapping { let FilterClass = "RecFormRel"; // Instructions with the same BaseName and Interpretation64Bit values // form a row. let RowFields = ["BaseName", "Interpretation64Bit"]; // Instructions with the same RC value form a column. let ColFields = ["RC"]; // The key column are the record-form instructions. let KeyCol = ["1"]; // Value columns are RC=0 let ValueCols = [["0"]]; } def getAltVSXFMAOpcode : InstrMapping { let FilterClass = "AltVSXFMARel"; // Instructions with the same BaseName and Interpretation64Bit values // form a row. let RowFields = ["BaseName"]; // Instructions with the same RC value form a column. let ColFields = ["IsVSXFMAAlt"]; // The key column are the (default) addend-killing instructions. let KeyCol = ["0"]; // Value columns IsVSXFMAAlt=1 let ValueCols = [["1"]]; } //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// include "PPCRegisterInfo.td" include "PPCSchedule.td" include "PPCInstrInfo.td" //===----------------------------------------------------------------------===// // PowerPC processors supported. // -def : Processor<"generic", G3Itineraries, [Directive32, FeatureMFTB]>; +def : Processor<"generic", G3Itineraries, [Directive32, FeatureHardFloat, + FeatureMFTB]>; def : ProcessorModel<"440", PPC440Model, [Directive440, FeatureISEL, FeatureFRES, FeatureFRSQRTE, FeatureICBT, FeatureBookE, FeatureMSYNC, FeatureMFTB]>; def : ProcessorModel<"450", PPC440Model, [Directive440, FeatureISEL, FeatureFRES, FeatureFRSQRTE, FeatureICBT, FeatureBookE, FeatureMSYNC, FeatureMFTB]>; -def : Processor<"601", G3Itineraries, [Directive601]>; -def : Processor<"602", G3Itineraries, [Directive602, +def : Processor<"601", G3Itineraries, [Directive601, FeatureHardFloat]>; +def : Processor<"602", G3Itineraries, [Directive602, FeatureHardFloat, FeatureMFTB]>; def : Processor<"603", G3Itineraries, [Directive603, FeatureFRES, FeatureFRSQRTE, FeatureMFTB]>; def : Processor<"603e", G3Itineraries, [Directive603, FeatureFRES, FeatureFRSQRTE, FeatureMFTB]>; def : Processor<"603ev", G3Itineraries, [Directive603, FeatureFRES, FeatureFRSQRTE, FeatureMFTB]>; def : Processor<"604", G3Itineraries, [Directive604, FeatureFRES, FeatureFRSQRTE, FeatureMFTB]>; def : Processor<"604e", G3Itineraries, [Directive604, FeatureFRES, FeatureFRSQRTE, FeatureMFTB]>; def : Processor<"620", G3Itineraries, [Directive620, FeatureFRES, FeatureFRSQRTE, FeatureMFTB]>; def : Processor<"750", G4Itineraries, [Directive750, FeatureFRES, FeatureFRSQRTE, FeatureMFTB]>; def : Processor<"g3", G3Itineraries, [Directive750, FeatureFRES, FeatureFRSQRTE, FeatureMFTB]>; def : Processor<"7400", G4Itineraries, [Directive7400, FeatureAltivec, FeatureFRES, FeatureFRSQRTE, FeatureMFTB]>; def : Processor<"g4", G4Itineraries, [Directive7400, FeatureAltivec, FeatureFRES, FeatureFRSQRTE, FeatureMFTB]>; def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec, FeatureFRES, FeatureFRSQRTE, FeatureMFTB]>; def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec, FeatureFRES, FeatureFRSQRTE, FeatureMFTB]>; def : ProcessorModel<"970", G5Model, [Directive970, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureFRES, FeatureFRSQRTE, FeatureSTFIWX, Feature64Bit /*, Feature64BitRegs */, FeatureMFTB]>; def : ProcessorModel<"g5", G5Model, [Directive970, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, FeatureFRES, FeatureFRSQRTE, Feature64Bit /*, Feature64BitRegs */, FeatureMFTB, DeprecatedDST]>; def : ProcessorModel<"e500mc", PPCE500mcModel, [DirectiveE500mc, FeatureSTFIWX, FeatureICBT, FeatureBookE, FeatureISEL, FeatureMFTB]>; def : ProcessorModel<"e5500", PPCE5500Model, [DirectiveE5500, FeatureMFOCRF, Feature64Bit, FeatureSTFIWX, FeatureICBT, FeatureBookE, FeatureISEL, FeatureMFTB]>; def : ProcessorModel<"a2", PPCA2Model, [DirectiveA2, FeatureICBT, FeatureBookE, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureFPRND, FeatureFPCVT, FeatureISEL, FeatureSlowPOPCNTD, FeatureCMPB, FeatureLDBRX, Feature64Bit /*, Feature64BitRegs */, FeatureMFTB]>; def : ProcessorModel<"a2q", PPCA2Model, [DirectiveA2, FeatureICBT, FeatureBookE, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureFPRND, FeatureFPCVT, FeatureISEL, FeatureSlowPOPCNTD, FeatureCMPB, FeatureLDBRX, Feature64Bit /*, Feature64BitRegs */, FeatureQPX, FeatureMFTB]>; def : ProcessorModel<"pwr3", G5Model, [DirectivePwr3, FeatureAltivec, FeatureFRES, FeatureFRSQRTE, FeatureMFOCRF, FeatureSTFIWX, Feature64Bit]>; def : ProcessorModel<"pwr4", G5Model, [DirectivePwr4, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureFRES, FeatureFRSQRTE, FeatureSTFIWX, Feature64Bit, FeatureMFTB]>; def : ProcessorModel<"pwr5", G5Model, [DirectivePwr5, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, FeatureSTFIWX, Feature64Bit, FeatureMFTB, DeprecatedDST]>; def : ProcessorModel<"pwr5x", G5Model, [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, FeatureSTFIWX, FeatureFPRND, Feature64Bit, FeatureMFTB, DeprecatedDST]>; def : ProcessorModel<"pwr6", G5Model, [DirectivePwr6, FeatureAltivec, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureCMPB, FeatureFPRND, Feature64Bit /*, Feature64BitRegs */, FeatureMFTB, DeprecatedDST]>; def : ProcessorModel<"pwr6x", G5Model, [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureCMPB, FeatureFPRND, Feature64Bit, FeatureMFTB, DeprecatedDST]>; def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.Power7FeatureList>; def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>; // FIXME: Same as P8 until the POWER9 scheduling info is available def : ProcessorModel<"pwr9", P8Model, ProcessorFeatures.Power9FeatureList>; -def : Processor<"ppc", G3Itineraries, [Directive32, FeatureMFTB]>; +def : Processor<"ppc", G3Itineraries, [Directive32, FeatureHardFloat, + FeatureMFTB]>; +def : Processor<"ppc32", G3Itineraries, [Directive32, FeatureHardFloat, + FeatureMFTB]>; def : ProcessorModel<"ppc64", G5Model, [Directive64, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureFRES, FeatureFRSQRTE, FeatureSTFIWX, Feature64Bit /*, Feature64BitRegs */, FeatureMFTB]>; def : ProcessorModel<"ppc64le", P8Model, ProcessorFeatures.Power8FeatureList>; //===----------------------------------------------------------------------===// // Calling Conventions //===----------------------------------------------------------------------===// include "PPCCallingConv.td" def PPCInstrInfo : InstrInfo { let isLittleEndianEncoding = 1; // FIXME: Unset this when no longer needed! let decodePositionallyEncodedOperands = 1; let noNamedPositionallyEncodedOperands = 1; } def PPCAsmParser : AsmParser { let ShouldEmitMatchRegisterName = 0; } def PPCAsmParserVariant : AsmParserVariant { int Variant = 0; // We do not use hard coded registers in asm strings. However, some // InstAlias definitions use immediate literals. Set RegisterPrefix // so that those are not misinterpreted as registers. string RegisterPrefix = "%"; string BreakCharacters = "."; } def PPC : Target { // Information about the instructions. let InstructionSet = PPCInstrInfo; let AssemblyParsers = [PPCAsmParser]; let AssemblyParserVariants = [PPCAsmParserVariant]; } Index: head/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- head/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp (revision 309148) +++ head/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp (revision 309149) @@ -1,12122 +1,12122 @@ //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements the PPCISelLowering class. // //===----------------------------------------------------------------------===// #include "PPCISelLowering.h" #include "MCTargetDesc/PPCPredicates.h" #include "PPCCallingConv.h" #include "PPCCCState.h" #include "PPCMachineFunctionInfo.h" #include "PPCPerfectShuffle.h" #include "PPCTargetMachine.h" #include "PPCTargetObjectFile.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" #include using namespace llvm; #define DEBUG_TYPE "ppc-lowering" static cl::opt DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); static cl::opt DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); static cl::opt DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); static cl::opt DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden); STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumSiblingCalls, "Number of sibling calls"); // FIXME: Remove this once the bug has been fixed! extern cl::opt ANDIGlueBug; PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI) : TargetLowering(TM), Subtarget(STI) { // Use _setjmp/_longjmp instead of setjmp/longjmp. setUseUnderscoreSetJmp(true); setUseUnderscoreLongJmp(true); // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all // arguments are at least 4/8 bytes aligned. bool isPPC64 = Subtarget.isPPC64(); setMinStackArgumentAlignment(isPPC64 ? 8:4); // Set up the register classes. addRegisterClass(MVT::i32, &PPC::GPRCRegClass); if (!useSoftFloat()) { addRegisterClass(MVT::f32, &PPC::F4RCRegClass); addRegisterClass(MVT::f64, &PPC::F8RCRegClass); } // PowerPC has an i16 but no i8 (or i1) SEXTLOAD for (MVT VT : MVT::integer_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); } setTruncStoreAction(MVT::f64, MVT::f32, Expand); // PowerPC has pre-inc load and store's. setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal); if (Subtarget.useCRBits()) { setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); if (isPPC64 || Subtarget.hasFPCVT()) { setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); AddPromotedToType (ISD::SINT_TO_FP, MVT::i1, isPPC64 ? MVT::i64 : MVT::i32); setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, isPPC64 ? MVT::i64 : MVT::i32); } else { setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom); } // PowerPC does not support direct load / store of condition registers setOperationAction(ISD::LOAD, MVT::i1, Custom); setOperationAction(ISD::STORE, MVT::i1, Custom); // FIXME: Remove this once the ANDI glue bug is fixed: if (ANDIGlueBug) setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); for (MVT VT : MVT::integer_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); setTruncStoreAction(VT, MVT::i1, Expand); } addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass); } // This is used in the ppcf128->int sequence. Note it has different semantics // from FP_ROUND: that rounds to nearest, this rounds to zero. setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); // We do not currently implement these libm ops for PowerPC. setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand); setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand); setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand); setOperationAction(ISD::FRINT, MVT::ppcf128, Expand); setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand); setOperationAction(ISD::FREM, MVT::ppcf128, Expand); // PowerPC has no SREM/UREM instructions setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); setOperationAction(ISD::SREM, MVT::i64, Expand); setOperationAction(ISD::UREM, MVT::i64, Expand); // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); setOperationAction(ISD::UDIVREM, MVT::i32, Expand); setOperationAction(ISD::SDIVREM, MVT::i32, Expand); setOperationAction(ISD::UDIVREM, MVT::i64, Expand); setOperationAction(ISD::SDIVREM, MVT::i64, Expand); // We don't support sin/cos/sqrt/fmod/pow setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand); setOperationAction(ISD::FSINCOS, MVT::f64, Expand); setOperationAction(ISD::FREM , MVT::f64, Expand); setOperationAction(ISD::FPOW , MVT::f64, Expand); setOperationAction(ISD::FMA , MVT::f64, Legal); setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand); setOperationAction(ISD::FPOW , MVT::f32, Expand); setOperationAction(ISD::FMA , MVT::f32, Legal); setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); // If we're enabling GP optimizations, use hardware square root if (!Subtarget.hasFSQRT() && !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() && Subtarget.hasFRE())) setOperationAction(ISD::FSQRT, MVT::f64, Expand); if (!Subtarget.hasFSQRT() && !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() && Subtarget.hasFRES())) setOperationAction(ISD::FSQRT, MVT::f32, Expand); if (Subtarget.hasFCPSGN()) { setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal); } else { setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); } if (Subtarget.hasFPRND()) { setOperationAction(ISD::FFLOOR, MVT::f64, Legal); setOperationAction(ISD::FCEIL, MVT::f64, Legal); setOperationAction(ISD::FTRUNC, MVT::f64, Legal); setOperationAction(ISD::FROUND, MVT::f64, Legal); setOperationAction(ISD::FFLOOR, MVT::f32, Legal); setOperationAction(ISD::FCEIL, MVT::f32, Legal); setOperationAction(ISD::FTRUNC, MVT::f32, Legal); setOperationAction(ISD::FROUND, MVT::f32, Legal); } // PowerPC does not have BSWAP, CTPOP or CTTZ setOperationAction(ISD::BSWAP, MVT::i32 , Expand); setOperationAction(ISD::CTTZ , MVT::i32 , Expand); setOperationAction(ISD::BSWAP, MVT::i64 , Expand); setOperationAction(ISD::CTTZ , MVT::i64 , Expand); if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) { setOperationAction(ISD::CTPOP, MVT::i32 , Legal); setOperationAction(ISD::CTPOP, MVT::i64 , Legal); } else { setOperationAction(ISD::CTPOP, MVT::i32 , Expand); setOperationAction(ISD::CTPOP, MVT::i64 , Expand); } // PowerPC does not have ROTR setOperationAction(ISD::ROTR, MVT::i32 , Expand); setOperationAction(ISD::ROTR, MVT::i64 , Expand); if (!Subtarget.useCRBits()) { // PowerPC does not have Select setOperationAction(ISD::SELECT, MVT::i32, Expand); setOperationAction(ISD::SELECT, MVT::i64, Expand); setOperationAction(ISD::SELECT, MVT::f32, Expand); setOperationAction(ISD::SELECT, MVT::f64, Expand); } // PowerPC wants to turn select_cc of FP into fsel when possible. setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); // PowerPC wants to optimize integer setcc a bit if (!Subtarget.useCRBits()) setOperationAction(ISD::SETCC, MVT::i32, Custom); // PowerPC does not have BRCOND which requires SetCC if (!Subtarget.useCRBits()) setOperationAction(ISD::BRCOND, MVT::Other, Expand); setOperationAction(ISD::BR_JT, MVT::Other, Expand); // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); // PowerPC does not have [U|S]INT_TO_FP setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); if (Subtarget.hasDirectMove() && isPPC64) { setOperationAction(ISD::BITCAST, MVT::f32, Legal); setOperationAction(ISD::BITCAST, MVT::i32, Legal); setOperationAction(ISD::BITCAST, MVT::i64, Legal); setOperationAction(ISD::BITCAST, MVT::f64, Legal); } else { setOperationAction(ISD::BITCAST, MVT::f32, Expand); setOperationAction(ISD::BITCAST, MVT::i32, Expand); setOperationAction(ISD::BITCAST, MVT::i64, Expand); setOperationAction(ISD::BITCAST, MVT::f64, Expand); } // We cannot sextinreg(i1). Expand to shifts. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support // SjLj exception handling but a light-weight setjmp/longjmp replacement to // support continuation, user-level threading, and etc.. As a result, no // other SjLj exception interfaces are implemented and please don't build // your own exception handling based on them. // LLVM/Clang supports zero-cost DWARF exception handling. setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); // We want to legalize GlobalAddress and ConstantPool nodes into the // appropriate instructions to materialize the address. setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); setOperationAction(ISD::BlockAddress, MVT::i32, Custom); setOperationAction(ISD::ConstantPool, MVT::i32, Custom); setOperationAction(ISD::JumpTable, MVT::i32, Custom); setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); setOperationAction(ISD::BlockAddress, MVT::i64, Custom); setOperationAction(ISD::ConstantPool, MVT::i64, Custom); setOperationAction(ISD::JumpTable, MVT::i64, Custom); // TRAP is legal. setOperationAction(ISD::TRAP, MVT::Other, Legal); // TRAMPOLINE is custom lowered. setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); if (Subtarget.isSVR4ABI()) { if (isPPC64) { // VAARG always uses double-word chunks, so promote anything smaller. setOperationAction(ISD::VAARG, MVT::i1, Promote); AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64); setOperationAction(ISD::VAARG, MVT::i8, Promote); AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64); setOperationAction(ISD::VAARG, MVT::i16, Promote); AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64); setOperationAction(ISD::VAARG, MVT::i32, Promote); AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64); setOperationAction(ISD::VAARG, MVT::Other, Expand); } else { // VAARG is custom lowered with the 32-bit SVR4 ABI. setOperationAction(ISD::VAARG, MVT::Other, Custom); setOperationAction(ISD::VAARG, MVT::i64, Custom); } } else setOperationAction(ISD::VAARG, MVT::Other, Expand); if (Subtarget.isSVR4ABI() && !isPPC64) // VACOPY is custom lowered with the 32-bit SVR4 ABI. setOperationAction(ISD::VACOPY , MVT::Other, Custom); else setOperationAction(ISD::VACOPY , MVT::Other, Expand); // Use the default implementation. setOperationAction(ISD::VAEND , MVT::Other, Expand); setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom); setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom); setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); // To handle counter-based loop conditions. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); // Comparisons that require checking two conditions. setCondCodeAction(ISD::SETULT, MVT::f32, Expand); setCondCodeAction(ISD::SETULT, MVT::f64, Expand); setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand); setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); setCondCodeAction(ISD::SETONE, MVT::f32, Expand); setCondCodeAction(ISD::SETONE, MVT::f64, Expand); if (Subtarget.has64BitSupport()) { // They also have instructions for converting between i64 and fp. setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); // This is just the low 32 bits of a (signed) fp->i64 conversion. // We cannot do this with Promote because i64 is not a legal type. setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); } else { // PowerPC does not have FP_TO_UINT on 32-bit implementations. setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); } // With the instructions enabled under FPCVT, we can do everything. if (Subtarget.hasFPCVT()) { if (Subtarget.has64BitSupport()) { setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); } setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); } if (Subtarget.use64BitRegs()) { // 64-bit PowerPC implementations can support i64 types directly addRegisterClass(MVT::i64, &PPC::G8RCRegClass); // BUILD_PAIR can't be handled natively, and should be expanded to shl/or setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); // 64-bit PowerPC wants to expand i128 shifts itself. setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); } else { // 32-bit PowerPC wants to expand i64 shifts itself. setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); } if (Subtarget.hasAltivec()) { // First set operation action for all vector types to expand. Then we // will selectively turn on ones that can be effectively codegen'd. for (MVT VT : MVT::vector_valuetypes()) { // add/sub are legal for all supported vector VT's. setOperationAction(ISD::ADD, VT, Legal); setOperationAction(ISD::SUB, VT, Legal); // Vector instructions introduced in P8 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) { setOperationAction(ISD::CTPOP, VT, Legal); setOperationAction(ISD::CTLZ, VT, Legal); } else { setOperationAction(ISD::CTPOP, VT, Expand); setOperationAction(ISD::CTLZ, VT, Expand); } // We promote all shuffles to v16i8. setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8); // We promote all non-typed operations to v4i32. setOperationAction(ISD::AND , VT, Promote); AddPromotedToType (ISD::AND , VT, MVT::v4i32); setOperationAction(ISD::OR , VT, Promote); AddPromotedToType (ISD::OR , VT, MVT::v4i32); setOperationAction(ISD::XOR , VT, Promote); AddPromotedToType (ISD::XOR , VT, MVT::v4i32); setOperationAction(ISD::LOAD , VT, Promote); AddPromotedToType (ISD::LOAD , VT, MVT::v4i32); setOperationAction(ISD::SELECT, VT, Promote); AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); setOperationAction(ISD::SELECT_CC, VT, Promote); AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32); setOperationAction(ISD::STORE, VT, Promote); AddPromotedToType (ISD::STORE, VT, MVT::v4i32); // No other operations are legal. setOperationAction(ISD::MUL , VT, Expand); setOperationAction(ISD::SDIV, VT, Expand); setOperationAction(ISD::SREM, VT, Expand); setOperationAction(ISD::UDIV, VT, Expand); setOperationAction(ISD::UREM, VT, Expand); setOperationAction(ISD::FDIV, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); setOperationAction(ISD::FNEG, VT, Expand); setOperationAction(ISD::FSQRT, VT, Expand); setOperationAction(ISD::FLOG, VT, Expand); setOperationAction(ISD::FLOG10, VT, Expand); setOperationAction(ISD::FLOG2, VT, Expand); setOperationAction(ISD::FEXP, VT, Expand); setOperationAction(ISD::FEXP2, VT, Expand); setOperationAction(ISD::FSIN, VT, Expand); setOperationAction(ISD::FCOS, VT, Expand); setOperationAction(ISD::FABS, VT, Expand); setOperationAction(ISD::FPOWI, VT, Expand); setOperationAction(ISD::FFLOOR, VT, Expand); setOperationAction(ISD::FCEIL, VT, Expand); setOperationAction(ISD::FTRUNC, VT, Expand); setOperationAction(ISD::FRINT, VT, Expand); setOperationAction(ISD::FNEARBYINT, VT, Expand); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); setOperationAction(ISD::BUILD_VECTOR, VT, Expand); setOperationAction(ISD::MULHU, VT, Expand); setOperationAction(ISD::MULHS, VT, Expand); setOperationAction(ISD::UMUL_LOHI, VT, Expand); setOperationAction(ISD::SMUL_LOHI, VT, Expand); setOperationAction(ISD::UDIVREM, VT, Expand); setOperationAction(ISD::SDIVREM, VT, Expand); setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); setOperationAction(ISD::BSWAP, VT, Expand); setOperationAction(ISD::CTTZ, VT, Expand); setOperationAction(ISD::VSELECT, VT, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); setOperationAction(ISD::ROTL, VT, Expand); setOperationAction(ISD::ROTR, VT, Expand); for (MVT InnerVT : MVT::vector_valuetypes()) { setTruncStoreAction(VT, InnerVT, Expand); setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); } } // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle // with merges, splats, etc. setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); setOperationAction(ISD::AND , MVT::v4i32, Legal); setOperationAction(ISD::OR , MVT::v4i32, Legal); setOperationAction(ISD::XOR , MVT::v4i32, Legal); setOperationAction(ISD::LOAD , MVT::v4i32, Legal); setOperationAction(ISD::SELECT, MVT::v4i32, Subtarget.useCRBits() ? Legal : Expand); setOperationAction(ISD::STORE , MVT::v4i32, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass); addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass); addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass); addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass); setOperationAction(ISD::MUL, MVT::v4f32, Legal); setOperationAction(ISD::FMA, MVT::v4f32, Legal); if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) { setOperationAction(ISD::FDIV, MVT::v4f32, Legal); setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); } if (Subtarget.hasP8Altivec()) setOperationAction(ISD::MUL, MVT::v4i32, Legal); else setOperationAction(ISD::MUL, MVT::v4i32, Custom); setOperationAction(ISD::MUL, MVT::v8i16, Custom); setOperationAction(ISD::MUL, MVT::v16i8, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); // Altivec does not contain unordered floating-point compare instructions setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand); setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand); setCondCodeAction(ISD::SETO, MVT::v4f32, Expand); setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand); if (Subtarget.hasVSX()) { setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); if (Subtarget.hasP8Vector()) { setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal); } if (Subtarget.hasDirectMove() && isPPC64) { setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal); } setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); setOperationAction(ISD::FROUND, MVT::v2f64, Legal); setOperationAction(ISD::FROUND, MVT::v4f32, Legal); setOperationAction(ISD::MUL, MVT::v2f64, Legal); setOperationAction(ISD::FMA, MVT::v2f64, Legal); setOperationAction(ISD::FDIV, MVT::v2f64, Legal); setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); setOperationAction(ISD::VSELECT, MVT::v8i16, Legal); setOperationAction(ISD::VSELECT, MVT::v4i32, Legal); setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); setOperationAction(ISD::VSELECT, MVT::v2f64, Legal); // Share the Altivec comparison restrictions. setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand); setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand); setCondCodeAction(ISD::SETO, MVT::v2f64, Expand); setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand); setOperationAction(ISD::LOAD, MVT::v2f64, Legal); setOperationAction(ISD::STORE, MVT::v2f64, Legal); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal); if (Subtarget.hasP8Vector()) addRegisterClass(MVT::f32, &PPC::VSSRCRegClass); addRegisterClass(MVT::f64, &PPC::VSFRCRegClass); addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass); addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass); addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass); if (Subtarget.hasP8Altivec()) { setOperationAction(ISD::SHL, MVT::v2i64, Legal); setOperationAction(ISD::SRA, MVT::v2i64, Legal); setOperationAction(ISD::SRL, MVT::v2i64, Legal); setOperationAction(ISD::SETCC, MVT::v2i64, Legal); } else { setOperationAction(ISD::SHL, MVT::v2i64, Expand); setOperationAction(ISD::SRA, MVT::v2i64, Expand); setOperationAction(ISD::SRL, MVT::v2i64, Expand); setOperationAction(ISD::SETCC, MVT::v2i64, Custom); // VSX v2i64 only supports non-arithmetic operations. setOperationAction(ISD::ADD, MVT::v2i64, Expand); setOperationAction(ISD::SUB, MVT::v2i64, Expand); } setOperationAction(ISD::LOAD, MVT::v2i64, Promote); AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64); setOperationAction(ISD::STORE, MVT::v2i64, Promote); AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); // Vector operation legalization checks the result type of // SIGN_EXTEND_INREG, overall legalization checks the inner type. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom); setOperationAction(ISD::FNEG, MVT::v4f32, Legal); setOperationAction(ISD::FNEG, MVT::v2f64, Legal); setOperationAction(ISD::FABS, MVT::v4f32, Legal); setOperationAction(ISD::FABS, MVT::v2f64, Legal); addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass); } if (Subtarget.hasP8Altivec()) { addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass); addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass); } if (Subtarget.hasP9Vector()) { setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal); } } if (Subtarget.hasQPX()) { setOperationAction(ISD::FADD, MVT::v4f64, Legal); setOperationAction(ISD::FSUB, MVT::v4f64, Legal); setOperationAction(ISD::FMUL, MVT::v4f64, Legal); setOperationAction(ISD::FREM, MVT::v4f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal); setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand); setOperationAction(ISD::LOAD , MVT::v4f64, Custom); setOperationAction(ISD::STORE , MVT::v4f64, Custom); setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom); setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom); if (!Subtarget.useCRBits()) setOperationAction(ISD::SELECT, MVT::v4f64, Expand); setOperationAction(ISD::VSELECT, MVT::v4f64, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal); setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand); setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand); setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand); setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal); setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom); setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal); setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand); setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal); setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand); setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal); setOperationAction(ISD::FNEG , MVT::v4f64, Legal); setOperationAction(ISD::FABS , MVT::v4f64, Legal); setOperationAction(ISD::FSIN , MVT::v4f64, Expand); setOperationAction(ISD::FCOS , MVT::v4f64, Expand); setOperationAction(ISD::FPOWI , MVT::v4f64, Expand); setOperationAction(ISD::FPOW , MVT::v4f64, Expand); setOperationAction(ISD::FLOG , MVT::v4f64, Expand); setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand); setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand); setOperationAction(ISD::FEXP , MVT::v4f64, Expand); setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand); setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal); setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal); addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass); setOperationAction(ISD::FADD, MVT::v4f32, Legal); setOperationAction(ISD::FSUB, MVT::v4f32, Legal); setOperationAction(ISD::FMUL, MVT::v4f32, Legal); setOperationAction(ISD::FREM, MVT::v4f32, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal); setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand); setOperationAction(ISD::LOAD , MVT::v4f32, Custom); setOperationAction(ISD::STORE , MVT::v4f32, Custom); if (!Subtarget.useCRBits()) setOperationAction(ISD::SELECT, MVT::v4f32, Expand); setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal); setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand); setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand); setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand); setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal); setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand); setOperationAction(ISD::FNEG , MVT::v4f32, Legal); setOperationAction(ISD::FABS , MVT::v4f32, Legal); setOperationAction(ISD::FSIN , MVT::v4f32, Expand); setOperationAction(ISD::FCOS , MVT::v4f32, Expand); setOperationAction(ISD::FPOWI , MVT::v4f32, Expand); setOperationAction(ISD::FPOW , MVT::v4f32, Expand); setOperationAction(ISD::FLOG , MVT::v4f32, Expand); setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand); setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand); setOperationAction(ISD::FEXP , MVT::v4f32, Expand); setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand); setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal); addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass); setOperationAction(ISD::AND , MVT::v4i1, Legal); setOperationAction(ISD::OR , MVT::v4i1, Legal); setOperationAction(ISD::XOR , MVT::v4i1, Legal); if (!Subtarget.useCRBits()) setOperationAction(ISD::SELECT, MVT::v4i1, Expand); setOperationAction(ISD::VSELECT, MVT::v4i1, Legal); setOperationAction(ISD::LOAD , MVT::v4i1, Custom); setOperationAction(ISD::STORE , MVT::v4i1, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand); setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand); setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand); setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand); setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom); addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass); setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal); setOperationAction(ISD::FCEIL, MVT::v4f64, Legal); setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal); setOperationAction(ISD::FROUND, MVT::v4f64, Legal); setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); setOperationAction(ISD::FROUND, MVT::v4f32, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand); setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); // These need to set FE_INEXACT, and so cannot be vectorized here. setOperationAction(ISD::FRINT, MVT::v4f64, Expand); setOperationAction(ISD::FRINT, MVT::v4f32, Expand); if (TM.Options.UnsafeFPMath) { setOperationAction(ISD::FDIV, MVT::v4f64, Legal); setOperationAction(ISD::FSQRT, MVT::v4f64, Legal); setOperationAction(ISD::FDIV, MVT::v4f32, Legal); setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); } else { setOperationAction(ISD::FDIV, MVT::v4f64, Expand); setOperationAction(ISD::FSQRT, MVT::v4f64, Expand); setOperationAction(ISD::FDIV, MVT::v4f32, Expand); setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); } } if (Subtarget.has64BitSupport()) setOperationAction(ISD::PREFETCH, MVT::Other, Legal); setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom); if (!isPPC64) { setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); } setBooleanContents(ZeroOrOneBooleanContent); if (Subtarget.hasAltivec()) { // Altivec instructions set fields to all zeros or all ones. setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); } if (!isPPC64) { // These libcalls are not available in 32-bit. setLibcallName(RTLIB::SHL_I128, nullptr); setLibcallName(RTLIB::SRL_I128, nullptr); setLibcallName(RTLIB::SRA_I128, nullptr); } setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1); // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::BUILD_VECTOR); if (Subtarget.hasFPCVT()) setTargetDAGCombine(ISD::UINT_TO_FP); setTargetDAGCombine(ISD::LOAD); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::BR_CC); if (Subtarget.useCRBits()) setTargetDAGCombine(ISD::BRCOND); setTargetDAGCombine(ISD::BSWAP); setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); setTargetDAGCombine(ISD::INTRINSIC_VOID); setTargetDAGCombine(ISD::SIGN_EXTEND); setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::ANY_EXTEND); if (Subtarget.useCRBits()) { setTargetDAGCombine(ISD::TRUNCATE); setTargetDAGCombine(ISD::SETCC); setTargetDAGCombine(ISD::SELECT_CC); } // Use reciprocal estimates. if (TM.Options.UnsafeFPMath) { setTargetDAGCombine(ISD::FDIV); setTargetDAGCombine(ISD::FSQRT); } // Darwin long double math library functions have $LDBL128 appended. if (Subtarget.isDarwin()) { setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128"); setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128"); setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128"); setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128"); setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128"); setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128"); setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128"); } // With 32 condition bits, we don't need to sink (and duplicate) compares // aggressively in CodeGenPrep. if (Subtarget.useCRBits()) { setHasMultipleConditionRegisters(); setJumpIsExpensive(); } setMinFunctionAlignment(2); if (Subtarget.isDarwin()) setPrefFunctionAlignment(4); switch (Subtarget.getDarwinDirective()) { default: break; case PPC::DIR_970: case PPC::DIR_A2: case PPC::DIR_E500mc: case PPC::DIR_E5500: case PPC::DIR_PWR4: case PPC::DIR_PWR5: case PPC::DIR_PWR5X: case PPC::DIR_PWR6: case PPC::DIR_PWR6X: case PPC::DIR_PWR7: case PPC::DIR_PWR8: case PPC::DIR_PWR9: setPrefFunctionAlignment(4); setPrefLoopAlignment(4); break; } if (Subtarget.enableMachineScheduler()) setSchedulingPreference(Sched::Source); else setSchedulingPreference(Sched::Hybrid); computeRegisterProperties(STI.getRegisterInfo()); // The Freescale cores do better with aggressive inlining of memcpy and // friends. GCC uses same threshold of 128 bytes (= 32 word stores). if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc || Subtarget.getDarwinDirective() == PPC::DIR_E5500) { MaxStoresPerMemset = 32; MaxStoresPerMemsetOptSize = 16; MaxStoresPerMemcpy = 32; MaxStoresPerMemcpyOptSize = 8; MaxStoresPerMemmove = 32; MaxStoresPerMemmoveOptSize = 8; } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) { // The A2 also benefits from (very) aggressive inlining of memcpy and // friends. The overhead of a the function call, even when warm, can be // over one hundred cycles. MaxStoresPerMemset = 128; MaxStoresPerMemcpy = 128; MaxStoresPerMemmove = 128; } } /// getMaxByValAlign - Helper for getByValTypeAlignment to determine /// the desired ByVal argument alignment. static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, unsigned MaxMaxAlign) { if (MaxAlign == MaxMaxAlign) return; if (VectorType *VTy = dyn_cast(Ty)) { if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256) MaxAlign = 32; else if (VTy->getBitWidth() >= 128 && MaxAlign < 16) MaxAlign = 16; } else if (ArrayType *ATy = dyn_cast(Ty)) { unsigned EltAlign = 0; getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign); if (EltAlign > MaxAlign) MaxAlign = EltAlign; } else if (StructType *STy = dyn_cast(Ty)) { for (auto *EltTy : STy->elements()) { unsigned EltAlign = 0; getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign); if (EltAlign > MaxAlign) MaxAlign = EltAlign; if (MaxAlign == MaxMaxAlign) break; } } } /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty, const DataLayout &DL) const { // Darwin passes everything on 4 byte boundary. if (Subtarget.isDarwin()) return 4; // 16byte and wider vectors are passed on 16byte boundary. // The rest is 8 on PPC64 and 4 on PPC32 boundary. unsigned Align = Subtarget.isPPC64() ? 8 : 4; if (Subtarget.hasAltivec() || Subtarget.hasQPX()) getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16); return Align; } bool PPCTargetLowering::useSoftFloat() const { return Subtarget.useSoftFloat(); } const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((PPCISD::NodeType)Opcode) { case PPCISD::FIRST_NUMBER: break; case PPCISD::FSEL: return "PPCISD::FSEL"; case PPCISD::FCFID: return "PPCISD::FCFID"; case PPCISD::FCFIDU: return "PPCISD::FCFIDU"; case PPCISD::FCFIDS: return "PPCISD::FCFIDS"; case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS"; case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ"; case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ"; case PPCISD::FRE: return "PPCISD::FRE"; case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE"; case PPCISD::STFIWX: return "PPCISD::STFIWX"; case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; case PPCISD::VPERM: return "PPCISD::VPERM"; case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; case PPCISD::XXINSERT: return "PPCISD::XXINSERT"; case PPCISD::VECSHL: return "PPCISD::VECSHL"; case PPCISD::CMPB: return "PPCISD::CMPB"; case PPCISD::Hi: return "PPCISD::Hi"; case PPCISD::Lo: return "PPCISD::Lo"; case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET"; case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; case PPCISD::SRL: return "PPCISD::SRL"; case PPCISD::SRA: return "PPCISD::SRA"; case PPCISD::SHL: return "PPCISD::SHL"; case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE"; case PPCISD::CALL: return "PPCISD::CALL"; case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; case PPCISD::MTCTR: return "PPCISD::MTCTR"; case PPCISD::BCTRL: return "PPCISD::BCTRL"; case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC"; case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE"; case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; case PPCISD::MFOCRF: return "PPCISD::MFOCRF"; case PPCISD::MFVSR: return "PPCISD::MFVSR"; case PPCISD::MTVSRA: return "PPCISD::MTVSRA"; case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ"; case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP"; case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP"; case PPCISD::ANDIo_1_EQ_BIT: return "PPCISD::ANDIo_1_EQ_BIT"; case PPCISD::ANDIo_1_GT_BIT: return "PPCISD::ANDIo_1_GT_BIT"; case PPCISD::VCMP: return "PPCISD::VCMP"; case PPCISD::VCMPo: return "PPCISD::VCMPo"; case PPCISD::LBRX: return "PPCISD::LBRX"; case PPCISD::STBRX: return "PPCISD::STBRX"; case PPCISD::LFIWAX: return "PPCISD::LFIWAX"; case PPCISD::LFIWZX: return "PPCISD::LFIWZX"; case PPCISD::LXVD2X: return "PPCISD::LXVD2X"; case PPCISD::STXVD2X: return "PPCISD::STXVD2X"; case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; case PPCISD::BDNZ: return "PPCISD::BDNZ"; case PPCISD::BDZ: return "PPCISD::BDZ"; case PPCISD::MFFS: return "PPCISD::MFFS"; case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; case PPCISD::CR6SET: return "PPCISD::CR6SET"; case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT"; case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT"; case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR"; case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR"; case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR"; case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR"; case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; case PPCISD::SC: return "PPCISD::SC"; case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB"; case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE"; case PPCISD::RFEBB: return "PPCISD::RFEBB"; case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD"; case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN"; case PPCISD::QVFPERM: return "PPCISD::QVFPERM"; case PPCISD::QVGPCI: return "PPCISD::QVGPCI"; case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI"; case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI"; case PPCISD::QBFLT: return "PPCISD::QBFLT"; case PPCISD::QVLFSb: return "PPCISD::QVLFSb"; } return nullptr; } EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C, EVT VT) const { if (!VT.isVector()) return Subtarget.useCRBits() ? MVT::i1 : MVT::i32; if (Subtarget.hasQPX()) return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements()); return VT.changeVectorElementTypeToInteger(); } bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const { assert(VT.isFloatingPoint() && "Non-floating-point FMA?"); return true; } //===----------------------------------------------------------------------===// // Node matching predicates, for use by the tblgen matching code. //===----------------------------------------------------------------------===// /// isFloatingPointZero - Return true if this is 0.0 or -0.0. static bool isFloatingPointZero(SDValue Op) { if (ConstantFPSDNode *CFP = dyn_cast(Op)) return CFP->getValueAPF().isZero(); else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { // Maybe this has already been legalized into the constant pool? if (ConstantPoolSDNode *CP = dyn_cast(Op.getOperand(1))) if (const ConstantFP *CFP = dyn_cast(CP->getConstVal())) return CFP->getValueAPF().isZero(); } return false; } /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return /// true if Op is undef or if it matches the specified value. static bool isConstantOrUndef(int Op, int Val) { return Op < 0 || Op == Val; } /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUHUM instruction. /// The ShuffleKind distinguishes between big-endian operations with /// two different inputs (0), either-endian operations with two identical /// inputs (1), and little-endian operations with two different inputs (2). /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG) { bool IsLE = DAG.getDataLayout().isLittleEndian(); if (ShuffleKind == 0) { if (IsLE) return false; for (unsigned i = 0; i != 16; ++i) if (!isConstantOrUndef(N->getMaskElt(i), i*2+1)) return false; } else if (ShuffleKind == 2) { if (!IsLE) return false; for (unsigned i = 0; i != 16; ++i) if (!isConstantOrUndef(N->getMaskElt(i), i*2)) return false; } else if (ShuffleKind == 1) { unsigned j = IsLE ? 0 : 1; for (unsigned i = 0; i != 8; ++i) if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) || !isConstantOrUndef(N->getMaskElt(i+8), i*2+j)) return false; } return true; } /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUWUM instruction. /// The ShuffleKind distinguishes between big-endian operations with /// two different inputs (0), either-endian operations with two identical /// inputs (1), and little-endian operations with two different inputs (2). /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG) { bool IsLE = DAG.getDataLayout().isLittleEndian(); if (ShuffleKind == 0) { if (IsLE) return false; for (unsigned i = 0; i != 16; i += 2) if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || !isConstantOrUndef(N->getMaskElt(i+1), i*2+3)) return false; } else if (ShuffleKind == 2) { if (!IsLE) return false; for (unsigned i = 0; i != 16; i += 2) if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || !isConstantOrUndef(N->getMaskElt(i+1), i*2+1)) return false; } else if (ShuffleKind == 1) { unsigned j = IsLE ? 0 : 2; for (unsigned i = 0; i != 8; i += 2) if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1)) return false; } return true; } /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the /// current subtarget. /// /// The ShuffleKind distinguishes between big-endian operations with /// two different inputs (0), either-endian operations with two identical /// inputs (1), and little-endian operations with two different inputs (2). /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG) { const PPCSubtarget& Subtarget = static_cast(DAG.getSubtarget()); if (!Subtarget.hasP8Vector()) return false; bool IsLE = DAG.getDataLayout().isLittleEndian(); if (ShuffleKind == 0) { if (IsLE) return false; for (unsigned i = 0; i != 16; i += 4) if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) || !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) || !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) || !isConstantOrUndef(N->getMaskElt(i+3), i*2+7)) return false; } else if (ShuffleKind == 2) { if (!IsLE) return false; for (unsigned i = 0; i != 16; i += 4) if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) || !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) || !isConstantOrUndef(N->getMaskElt(i+3), i*2+3)) return false; } else if (ShuffleKind == 1) { unsigned j = IsLE ? 0 : 4; for (unsigned i = 0; i != 8; i += 4) if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) || !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) || !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) || !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) || !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3)) return false; } return true; } /// isVMerge - Common function, used to match vmrg* shuffles. /// static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart) { if (N->getValueType(0) != MVT::v16i8) return false; assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && "Unsupported merge size!"); for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j), LHSStart+j+i*UnitSize) || !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j), RHSStart+j+i*UnitSize)) return false; } return true; } /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes). /// The ShuffleKind distinguishes between big-endian merges with two /// different inputs (0), either-endian merges with two identical inputs (1), /// and little-endian merges with two different inputs (2). For the latter, /// the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG) { if (DAG.getDataLayout().isLittleEndian()) { if (ShuffleKind == 1) // unary return isVMerge(N, UnitSize, 0, 0); else if (ShuffleKind == 2) // swapped return isVMerge(N, UnitSize, 0, 16); else return false; } else { if (ShuffleKind == 1) // unary return isVMerge(N, UnitSize, 8, 8); else if (ShuffleKind == 0) // normal return isVMerge(N, UnitSize, 8, 24); else return false; } } /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes). /// The ShuffleKind distinguishes between big-endian merges with two /// different inputs (0), either-endian merges with two identical inputs (1), /// and little-endian merges with two different inputs (2). For the latter, /// the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG) { if (DAG.getDataLayout().isLittleEndian()) { if (ShuffleKind == 1) // unary return isVMerge(N, UnitSize, 8, 8); else if (ShuffleKind == 2) // swapped return isVMerge(N, UnitSize, 8, 24); else return false; } else { if (ShuffleKind == 1) // unary return isVMerge(N, UnitSize, 0, 0); else if (ShuffleKind == 0) // normal return isVMerge(N, UnitSize, 0, 16); else return false; } } /** * \brief Common function used to match vmrgew and vmrgow shuffles * * The indexOffset determines whether to look for even or odd words in * the shuffle mask. This is based on the of the endianness of the target * machine. * - Little Endian: * - Use offset of 0 to check for odd elements * - Use offset of 4 to check for even elements * - Big Endian: * - Use offset of 0 to check for even elements * - Use offset of 4 to check for odd elements * A detailed description of the vector element ordering for little endian and * big endian can be found at * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html * Targeting your applications - what little endian and big endian IBM XL C/C++ * compiler differences mean to you * * The mask to the shuffle vector instruction specifies the indices of the * elements from the two input vectors to place in the result. The elements are * numbered in array-access order, starting with the first vector. These vectors * are always of type v16i8, thus each vector will contain 16 elements of size * 8. More info on the shuffle vector can be found in the * http://llvm.org/docs/LangRef.html#shufflevector-instruction * Language Reference. * * The RHSStartValue indicates whether the same input vectors are used (unary) * or two different input vectors are used, based on the following: * - If the instruction uses the same vector for both inputs, the range of the * indices will be 0 to 15. In this case, the RHSStart value passed should * be 0. * - If the instruction has two different vectors then the range of the * indices will be 0 to 31. In this case, the RHSStart value passed should * be 16 (indices 0-15 specify elements in the first vector while indices 16 * to 31 specify elements in the second vector). * * \param[in] N The shuffle vector SD Node to analyze * \param[in] IndexOffset Specifies whether to look for even or odd elements * \param[in] RHSStartValue Specifies the starting index for the righthand input * vector to the shuffle_vector instruction * \return true iff this shuffle vector represents an even or odd word merge */ static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset, unsigned RHSStartValue) { if (N->getValueType(0) != MVT::v16i8) return false; for (unsigned i = 0; i < 2; ++i) for (unsigned j = 0; j < 4; ++j) if (!isConstantOrUndef(N->getMaskElt(i*4+j), i*RHSStartValue+j+IndexOffset) || !isConstantOrUndef(N->getMaskElt(i*4+j+8), i*RHSStartValue+j+IndexOffset+8)) return false; return true; } /** * \brief Determine if the specified shuffle mask is suitable for the vmrgew or * vmrgow instructions. * * \param[in] N The shuffle vector SD Node to analyze * \param[in] CheckEven Check for an even merge (true) or an odd merge (false) * \param[in] ShuffleKind Identify the type of merge: * - 0 = big-endian merge with two different inputs; * - 1 = either-endian merge with two identical inputs; * - 2 = little-endian merge with two different inputs (inputs are swapped for * little-endian merges). * \param[in] DAG The current SelectionDAG * \return true iff this shuffle mask */ bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG) { if (DAG.getDataLayout().isLittleEndian()) { unsigned indexOffset = CheckEven ? 4 : 0; if (ShuffleKind == 1) // Unary return isVMerge(N, indexOffset, 0); else if (ShuffleKind == 2) // swapped return isVMerge(N, indexOffset, 16); else return false; } else { unsigned indexOffset = CheckEven ? 0 : 4; if (ShuffleKind == 1) // Unary return isVMerge(N, indexOffset, 0); else if (ShuffleKind == 0) // Normal return isVMerge(N, indexOffset, 16); else return false; } return false; } /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift /// amount, otherwise return -1. /// The ShuffleKind distinguishes between big-endian operations with two /// different inputs (0), either-endian operations with two identical inputs /// (1), and little-endian operations with two different inputs (2). For the /// latter, the input operands are swapped (see PPCInstrAltivec.td). int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG) { if (N->getValueType(0) != MVT::v16i8) return -1; ShuffleVectorSDNode *SVOp = cast(N); // Find the first non-undef value in the shuffle mask. unsigned i; for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i) /*search*/; if (i == 16) return -1; // all undef. // Otherwise, check to see if the rest of the elements are consecutively // numbered from this value. unsigned ShiftAmt = SVOp->getMaskElt(i); if (ShiftAmt < i) return -1; ShiftAmt -= i; bool isLE = DAG.getDataLayout().isLittleEndian(); if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) { // Check the rest of the elements to see if they are consecutive. for (++i; i != 16; ++i) if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) return -1; } else if (ShuffleKind == 1) { // Check the rest of the elements to see if they are consecutive. for (++i; i != 16; ++i) if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)) return -1; } else return -1; if (isLE) ShiftAmt = 16 - ShiftAmt; return ShiftAmt; } /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a splat of a single element that is suitable for input to /// VSPLTB/VSPLTH/VSPLTW. bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { assert(N->getValueType(0) == MVT::v16i8 && (EltSize == 1 || EltSize == 2 || EltSize == 4)); // The consecutive indices need to specify an element, not part of two // different elements. So abandon ship early if this isn't the case. if (N->getMaskElt(0) % EltSize != 0) return false; // This is a splat operation if each element of the permute is the same, and // if the value doesn't reference the second vector. unsigned ElementBase = N->getMaskElt(0); // FIXME: Handle UNDEF elements too! if (ElementBase >= 16) return false; // Check that the indices are consecutive, in the case of a multi-byte element // splatted with a v16i8 mask. for (unsigned i = 1; i != EltSize; ++i) if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase)) return false; for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { if (N->getMaskElt(i) < 0) continue; for (unsigned j = 0; j != EltSize; ++j) if (N->getMaskElt(i+j) != N->getMaskElt(j)) return false; } return true; } bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE) { // Check that the mask is shuffling words for (unsigned i = 0; i < 4; ++i) { unsigned B0 = N->getMaskElt(i*4); unsigned B1 = N->getMaskElt(i*4+1); unsigned B2 = N->getMaskElt(i*4+2); unsigned B3 = N->getMaskElt(i*4+3); if (B0 % 4) return false; if (B1 != B0+1 || B2 != B1+1 || B3 != B2+1) return false; } // Now we look at mask elements 0,4,8,12 unsigned M0 = N->getMaskElt(0) / 4; unsigned M1 = N->getMaskElt(4) / 4; unsigned M2 = N->getMaskElt(8) / 4; unsigned M3 = N->getMaskElt(12) / 4; unsigned LittleEndianShifts[] = { 2, 1, 0, 3 }; unsigned BigEndianShifts[] = { 3, 0, 1, 2 }; // Below, let H and L be arbitrary elements of the shuffle mask // where H is in the range [4,7] and L is in the range [0,3]. // H, 1, 2, 3 or L, 5, 6, 7 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) || (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) { ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3]; InsertAtByte = IsLE ? 12 : 0; Swap = M0 < 4; return true; } // 0, H, 2, 3 or 4, L, 6, 7 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) || (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) { ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3]; InsertAtByte = IsLE ? 8 : 4; Swap = M1 < 4; return true; } // 0, 1, H, 3 or 4, 5, L, 7 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) || (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) { ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3]; InsertAtByte = IsLE ? 4 : 8; Swap = M2 < 4; return true; } // 0, 1, 2, H or 4, 5, 6, L if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) || (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) { ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3]; InsertAtByte = IsLE ? 0 : 12; Swap = M3 < 4; return true; } // If both vector operands for the shuffle are the same vector, the mask will // contain only elements from the first one and the second one will be undef. if (N->getOperand(1).isUndef()) { ShiftElts = 0; Swap = true; unsigned XXINSERTWSrcElem = IsLE ? 2 : 1; if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) { InsertAtByte = IsLE ? 12 : 0; return true; } if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) { InsertAtByte = IsLE ? 8 : 4; return true; } if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) { InsertAtByte = IsLE ? 4 : 8; return true; } if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) { InsertAtByte = IsLE ? 0 : 12; return true; } } return false; } /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG) { ShuffleVectorSDNode *SVOp = cast(N); assert(isSplatShuffleMask(SVOp, EltSize)); if (DAG.getDataLayout().isLittleEndian()) return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize); else return SVOp->getMaskElt(0) / EltSize; } /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed /// by using a vspltis[bhw] instruction of the specified element size, return /// the constant being splatted. The ByteSize field indicates the number of /// bytes of each element [124] -> [bhw]. SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { SDValue OpVal(nullptr, 0); // If ByteSize of the splat is bigger than the element size of the // build_vector, then we have a case where we are checking for a splat where // multiple elements of the buildvector are folded together into a single // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8). unsigned EltSize = 16/N->getNumOperands(); if (EltSize < ByteSize) { unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. SDValue UniquedVals[4]; assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); // See if all of the elements in the buildvector agree across. for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { if (N->getOperand(i).isUndef()) continue; // If the element isn't a constant, bail fully out. if (!isa(N->getOperand(i))) return SDValue(); if (!UniquedVals[i&(Multiple-1)].getNode()) UniquedVals[i&(Multiple-1)] = N->getOperand(i); else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) return SDValue(); // no match. } // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains // either constant or undef values that are identical for each chunk. See // if these chunks can form into a larger vspltis*. // Check to see if all of the leading entries are either 0 or -1. If // neither, then this won't fit into the immediate field. bool LeadingZero = true; bool LeadingOnes = true; for (unsigned i = 0; i != Multiple-1; ++i) { if (!UniquedVals[i].getNode()) continue; // Must have been undefs. LeadingZero &= isNullConstant(UniquedVals[i]); LeadingOnes &= isAllOnesConstant(UniquedVals[i]); } // Finally, check the least significant entry. if (LeadingZero) { if (!UniquedVals[Multiple-1].getNode()) return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef int Val = cast(UniquedVals[Multiple-1])->getZExtValue(); if (Val < 16) // 0,0,0,4 -> vspltisw(4) return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32); } if (LeadingOnes) { if (!UniquedVals[Multiple-1].getNode()) return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef int Val =cast(UniquedVals[Multiple-1])->getSExtValue(); if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32); } return SDValue(); } // Check to see if this buildvec has a single non-undef value in its elements. for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { if (N->getOperand(i).isUndef()) continue; if (!OpVal.getNode()) OpVal = N->getOperand(i); else if (OpVal != N->getOperand(i)) return SDValue(); } if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def. unsigned ValSizeInBytes = EltSize; uint64_t Value = 0; if (ConstantSDNode *CN = dyn_cast(OpVal)) { Value = CN->getZExtValue(); } else if (ConstantFPSDNode *CN = dyn_cast(OpVal)) { assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); Value = FloatToBits(CN->getValueAPF().convertToFloat()); } // If the splat value is larger than the element value, then we can never do // this splat. The only case that we could fit the replicated bits into our // immediate field for would be zero, and we prefer to use vxor for it. if (ValSizeInBytes < ByteSize) return SDValue(); // If the element value is larger than the splat value, check if it consists // of a repeated bit pattern of size ByteSize. if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8)) return SDValue(); // Properly sign extend the value. int MaskVal = SignExtend32(Value, ByteSize * 8); // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. if (MaskVal == 0) return SDValue(); // Finally, if this value fits in a 5 bit sext field, return it if (SignExtend32<5>(MaskVal) == MaskVal) return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32); return SDValue(); } /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift /// amount, otherwise return -1. int PPC::isQVALIGNIShuffleMask(SDNode *N) { EVT VT = N->getValueType(0); if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1) return -1; ShuffleVectorSDNode *SVOp = cast(N); // Find the first non-undef value in the shuffle mask. unsigned i; for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i) /*search*/; if (i == 4) return -1; // all undef. // Otherwise, check to see if the rest of the elements are consecutively // numbered from this value. unsigned ShiftAmt = SVOp->getMaskElt(i); if (ShiftAmt < i) return -1; ShiftAmt -= i; // Check the rest of the elements to see if they are consecutive. for (++i; i != 4; ++i) if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) return -1; return ShiftAmt; } //===----------------------------------------------------------------------===// // Addressing Mode Selection //===----------------------------------------------------------------------===// /// isIntS16Immediate - This method tests to see if the node is either a 32-bit /// or 64-bit immediate, and if the value can be accurately represented as a /// sign extension from a 16-bit value. If so, this returns true and the /// immediate. static bool isIntS16Immediate(SDNode *N, short &Imm) { if (!isa(N)) return false; Imm = (short)cast(N)->getZExtValue(); if (N->getValueType(0) == MVT::i32) return Imm == (int32_t)cast(N)->getZExtValue(); else return Imm == (int64_t)cast(N)->getZExtValue(); } static bool isIntS16Immediate(SDValue Op, short &Imm) { return isIntS16Immediate(Op.getNode(), Imm); } /// SelectAddressRegReg - Given the specified addressed, check to see if it /// can be represented as an indexed [r+r] operation. Returns false if it /// can be more efficiently represented with [r+imm]. bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const { short imm = 0; if (N.getOpcode() == ISD::ADD) { if (isIntS16Immediate(N.getOperand(1), imm)) return false; // r+i if (N.getOperand(1).getOpcode() == PPCISD::Lo) return false; // r+i Base = N.getOperand(0); Index = N.getOperand(1); return true; } else if (N.getOpcode() == ISD::OR) { if (isIntS16Immediate(N.getOperand(1), imm)) return false; // r+i can fold it if we can. // If this is an or of disjoint bitfields, we can codegen this as an add // (for better address arithmetic) if the LHS and RHS of the OR are provably // disjoint. APInt LHSKnownZero, LHSKnownOne; APInt RHSKnownZero, RHSKnownOne; DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); if (LHSKnownZero.getBoolValue()) { DAG.computeKnownBits(N.getOperand(1), RHSKnownZero, RHSKnownOne); // If all of the bits are known zero on the LHS or RHS, the add won't // carry. if (~(LHSKnownZero | RHSKnownZero) == 0) { Base = N.getOperand(0); Index = N.getOperand(1); return true; } } } return false; } // If we happen to be doing an i64 load or store into a stack slot that has // less than a 4-byte alignment, then the frame-index elimination may need to // use an indexed load or store instruction (because the offset may not be a // multiple of 4). The extra register needed to hold the offset comes from the // register scavenger, and it is possible that the scavenger will need to use // an emergency spill slot. As a result, we need to make sure that a spill slot // is allocated when doing an i64 load/store into a less-than-4-byte-aligned // stack slot. static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) { // FIXME: This does not handle the LWA case. if (VT != MVT::i64) return; // NOTE: We'll exclude negative FIs here, which come from argument // lowering, because there are no known test cases triggering this problem // using packed structures (or similar). We can remove this exclusion if // we find such a test case. The reason why this is so test-case driven is // because this entire 'fixup' is only to prevent crashes (from the // register scavenger) on not-really-valid inputs. For example, if we have: // %a = alloca i1 // %b = bitcast i1* %a to i64* // store i64* a, i64 b // then the store should really be marked as 'align 1', but is not. If it // were marked as 'align 1' then the indexed form would have been // instruction-selected initially, and the problem this 'fixup' is preventing // won't happen regardless. if (FrameIdx < 0) return; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); unsigned Align = MFI->getObjectAlignment(FrameIdx); if (Align >= 4) return; PPCFunctionInfo *FuncInfo = MF.getInfo(); FuncInfo->setHasNonRISpills(); } /// Returns true if the address N can be represented by a base register plus /// a signed 16-bit displacement [r+imm], and if it is not better /// represented as reg+reg. If Aligned is true, only accept displacements /// suitable for STD and friends, i.e. multiples of 4. bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, bool Aligned) const { // FIXME dl should come from parent load or store, not from address SDLoc dl(N); // If this can be more profitably realized as r+r, fail. if (SelectAddressRegReg(N, Disp, Base, DAG)) return false; if (N.getOpcode() == ISD::ADD) { short imm = 0; if (isIntS16Immediate(N.getOperand(1), imm) && (!Aligned || (imm & 3) == 0)) { Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); if (FrameIndexSDNode *FI = dyn_cast(N.getOperand(0))) { Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); } else { Base = N.getOperand(0); } return true; // [r+i] } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { // Match LOAD (ADD (X, Lo(G))). assert(!cast(N.getOperand(1).getOperand(1))->getZExtValue() && "Cannot handle constant offsets yet!"); Disp = N.getOperand(1).getOperand(0); // The global address. assert(Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable); Base = N.getOperand(0); return true; // [&g+r] } } else if (N.getOpcode() == ISD::OR) { short imm = 0; if (isIntS16Immediate(N.getOperand(1), imm) && (!Aligned || (imm & 3) == 0)) { // If this is an or of disjoint bitfields, we can codegen this as an add // (for better address arithmetic) if the LHS and RHS of the OR are // provably disjoint. APInt LHSKnownZero, LHSKnownOne; DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { // If all of the bits are known zero on the LHS or RHS, the add won't // carry. if (FrameIndexSDNode *FI = dyn_cast(N.getOperand(0))) { Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); } else { Base = N.getOperand(0); } Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); return true; } } } else if (ConstantSDNode *CN = dyn_cast(N)) { // Loading from a constant address. // If this address fits entirely in a 16-bit sext immediate field, codegen // this as "d, 0" short Imm; if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) { Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0)); Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, CN->getValueType(0)); return true; } // Handle 32-bit sext immediates with LIS + addr mode. if ((CN->getValueType(0) == MVT::i32 || (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) && (!Aligned || (CN->getZExtValue() & 3) == 0)) { int Addr = (int)CN->getZExtValue(); // Otherwise, break this down into an LIS + disp. Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32); Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl, MVT::i32); unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0); return true; } } Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout())); if (FrameIndexSDNode *FI = dyn_cast(N)) { Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); } else Base = N; return true; // [r+0] } /// SelectAddressRegRegOnly - Given the specified addressed, force it to be /// represented as an indexed [r+r] operation. bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const { // Check to see if we can easily represent this as an [r+r] address. This // will fail if it thinks that the address is more profitably represented as // reg+imm, e.g. where imm = 0. if (SelectAddressRegReg(N, Base, Index, DAG)) return true; // If the operand is an addition, always emit this as [r+r], since this is // better (for code size, and execution, as the memop does the add for free) // than emitting an explicit add. if (N.getOpcode() == ISD::ADD) { Base = N.getOperand(0); Index = N.getOperand(1); return true; } // Otherwise, do it the hard way, using R0 as the base register. Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, N.getValueType()); Index = N; return true; } /// getPreIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if the node's address /// can be legally represented as pre-indexed load / store address. bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const { if (DisablePPCPreinc) return false; bool isLoad = true; SDValue Ptr; EVT VT; unsigned Alignment; if (LoadSDNode *LD = dyn_cast(N)) { Ptr = LD->getBasePtr(); VT = LD->getMemoryVT(); Alignment = LD->getAlignment(); } else if (StoreSDNode *ST = dyn_cast(N)) { Ptr = ST->getBasePtr(); VT = ST->getMemoryVT(); Alignment = ST->getAlignment(); isLoad = false; } else return false; // PowerPC doesn't have preinc load/store instructions for vectors (except // for QPX, which does have preinc r+r forms). if (VT.isVector()) { if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) { return false; } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) { AM = ISD::PRE_INC; return true; } } if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) { // Common code will reject creating a pre-inc form if the base pointer // is a frame index, or if N is a store and the base pointer is either // the same as or a predecessor of the value being stored. Check for // those situations here, and try with swapped Base/Offset instead. bool Swap = false; if (isa(Base) || isa(Base)) Swap = true; else if (!isLoad) { SDValue Val = cast(N)->getValue(); if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode())) Swap = true; } if (Swap) std::swap(Base, Offset); AM = ISD::PRE_INC; return true; } // LDU/STU can only handle immediates that are a multiple of 4. if (VT != MVT::i64) { if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false)) return false; } else { // LDU/STU need an address with at least 4-byte alignment. if (Alignment < 4) return false; if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true)) return false; } if (LoadSDNode *LD = dyn_cast(N)) { // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of // sext i32 to i64 when addr mode is r+i. if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 && LD->getExtensionType() == ISD::SEXTLOAD && isa(Offset)) return false; } AM = ISD::PRE_INC; return true; } //===----------------------------------------------------------------------===// // LowerOperation implementation //===----------------------------------------------------------------------===// /// Return true if we should reference labels using a PICBase, set the HiOpFlags /// and LoOpFlags to the target MO flags. static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV = nullptr) { HiOpFlags = PPCII::MO_HA; LoOpFlags = PPCII::MO_LO; // Don't use the pic base if not in PIC relocation model. if (IsPIC) { HiOpFlags |= PPCII::MO_PIC_FLAG; LoOpFlags |= PPCII::MO_PIC_FLAG; } // If this is a reference to a global value that requires a non-lazy-ptr, make // sure that instruction lowering adds it. if (GV && Subtarget.hasLazyResolverStub(GV)) { HiOpFlags |= PPCII::MO_NLP_FLAG; LoOpFlags |= PPCII::MO_NLP_FLAG; if (GV->hasHiddenVisibility()) { HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; } } } static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG) { SDLoc DL(HiPart); EVT PtrVT = HiPart.getValueType(); SDValue Zero = DAG.getConstant(0, DL, PtrVT); SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero); SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero); // With PIC, the first instruction is actually "GR+hi(&G)". if (isPIC) Hi = DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi); // Generate non-pic code that has direct accesses to the constant pool. // The address of the global is just (hi(&g)+lo(&g)). return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo); } static void setUsesTOCBasePtr(MachineFunction &MF) { PPCFunctionInfo *FuncInfo = MF.getInfo(); FuncInfo->setUsesTOCBasePtr(); } static void setUsesTOCBasePtr(SelectionDAG &DAG) { setUsesTOCBasePtr(DAG.getMachineFunction()); } static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit, SDValue GA) { EVT VT = Is64Bit ? MVT::i64 : MVT::i32; SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT); SDValue Ops[] = { GA, Reg }; return DAG.getMemIntrinsicNode( PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT, MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0, false, true, false, 0); } SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); ConstantPoolSDNode *CP = cast(Op); const Constant *C = CP->getConstVal(); // 64-bit SVR4 ABI code is always position-independent. // The actual address of the GlobalValue is stored in the TOC. if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); return getTOCEntry(DAG, SDLoc(CP), true, GA); } unsigned MOHiFlag, MOLoFlag; bool IsPIC = isPositionIndependent(); getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); if (IsPIC && Subtarget.isSVR4ABI()) { SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), PPCII::MO_PIC_FLAG); return getTOCEntry(DAG, SDLoc(CP), false, GA); } SDValue CPIHi = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag); SDValue CPILo = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag); return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG); } SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); JumpTableSDNode *JT = cast(Op); // 64-bit SVR4 ABI code is always position-independent. // The actual address of the GlobalValue is stored in the TOC. if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); return getTOCEntry(DAG, SDLoc(JT), true, GA); } unsigned MOHiFlag, MOLoFlag; bool IsPIC = isPositionIndependent(); getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); if (IsPIC && Subtarget.isSVR4ABI()) { SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, PPCII::MO_PIC_FLAG); return getTOCEntry(DAG, SDLoc(GA), false, GA); } SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag); return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG); } SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); BlockAddressSDNode *BASDN = cast(Op); const BlockAddress *BA = BASDN->getBlockAddress(); // 64-bit SVR4 ABI code is always position-independent. // The actual BlockAddress is stored in the TOC. if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()); return getTOCEntry(DAG, SDLoc(BASDN), true, GA); } unsigned MOHiFlag, MOLoFlag; bool IsPIC = isPositionIndependent(); getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag); SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag); return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG); } SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { // FIXME: TLS addresses currently use medium model code sequences, // which is the most useful form. Eventually support for small and // large models could be added if users need it, at the cost of // additional complexity. GlobalAddressSDNode *GA = cast(Op); if (DAG.getTarget().Options.EmulatedTLS) return LowerToTLSEmulatedModel(GA, DAG); SDLoc dl(GA); const GlobalValue *GV = GA->getGlobal(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); bool is64bit = Subtarget.isPPC64(); const Module *M = DAG.getMachineFunction().getFunction()->getParent(); PICLevel::Level picLevel = M->getPICLevel(); TLSModel::Model Model = getTargetMachine().getTLSModel(GV); if (Model == TLSModel::LocalExec) { SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_HA); SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_LO); SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, is64bit ? MVT::i64 : MVT::i32); SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg); return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); } if (Model == TLSModel::InitialExec) { SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLS); SDValue GOTPtr; if (is64bit) { setUsesTOCBasePtr(DAG); SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA); } else GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT); SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr); return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS); } if (Model == TLSModel::GeneralDynamic) { SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); SDValue GOTPtr; if (is64bit) { setUsesTOCBasePtr(DAG); SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT, GOTReg, TGA); } else { if (picLevel == PICLevel::SmallPIC) GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); else GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); } return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT, GOTPtr, TGA, TGA); } if (Model == TLSModel::LocalDynamic) { SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); SDValue GOTPtr; if (is64bit) { setUsesTOCBasePtr(DAG); SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT, GOTReg, TGA); } else { if (picLevel == PICLevel::SmallPIC) GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); else GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); } SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl, PtrVT, GOTPtr, TGA, TGA); SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT, TLSAddr, TGA); return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA); } llvm_unreachable("Unknown TLS model!"); } SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); GlobalAddressSDNode *GSDN = cast(Op); SDLoc DL(GSDN); const GlobalValue *GV = GSDN->getGlobal(); // 64-bit SVR4 ABI code is always position-independent. // The actual address of the GlobalValue is stored in the TOC. if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); return getTOCEntry(DAG, DL, true, GA); } unsigned MOHiFlag, MOLoFlag; bool IsPIC = isPositionIndependent(); getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV); if (IsPIC && Subtarget.isSVR4ABI()) { SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), PPCII::MO_PIC_FLAG); return getTOCEntry(DAG, DL, false, GA); } SDValue GAHi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag); SDValue GALo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag); SDValue Ptr = LowerLabelRef(GAHi, GALo, IsPIC, DAG); // If the global reference is actually to a non-lazy-pointer, we have to do an // extra load to get the address of the global. if (MOHiFlag & PPCII::MO_NLP_FLAG) Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); return Ptr; } SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { ISD::CondCode CC = cast(Op.getOperand(2))->get(); SDLoc dl(Op); if (Op.getValueType() == MVT::v2i64) { // When the operands themselves are v2i64 values, we need to do something // special because VSX has no underlying comparison operations for these. if (Op.getOperand(0).getValueType() == MVT::v2i64) { // Equality can be handled by casting to the legal type for Altivec // comparisons, everything else needs to be expanded. if (CC == ISD::SETEQ || CC == ISD::SETNE) { return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, DAG.getSetCC(dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)), DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)), CC)); } return SDValue(); } // We handle most of these in the usual way. return Op; } // If we're comparing for equality to zero, expose the fact that this is // implemented as a ctlz/srl pair on ppc, so that the dag combiner can // fold the new nodes. if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) { if (C->isNullValue() && CC == ISD::SETEQ) { EVT VT = Op.getOperand(0).getValueType(); SDValue Zext = Op.getOperand(0); if (VT.bitsLT(MVT::i32)) { VT = MVT::i32; Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0)); } unsigned Log2b = Log2_32(VT.getSizeInBits()); SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext); SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz, DAG.getConstant(Log2b, dl, MVT::i32)); return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc); } // Leave comparisons against 0 and -1 alone for now, since they're usually // optimized. FIXME: revisit this when we can custom lower all setcc // optimizations. if (C->isAllOnesValue() || C->isNullValue()) return SDValue(); } // If we have an integer seteq/setne, turn it into a compare against zero // by xor'ing the rhs with the lhs, which is faster than setting a // condition register, reading it back out, and masking the correct bit. The // normal approach here uses sub to do this instead of xor. Using xor exposes // the result to other bit-twiddling opportunities. EVT LHSVT = Op.getOperand(0).getValueType(); if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { EVT VT = Op.getValueType(); SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0), Op.getOperand(1)); return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC); } return SDValue(); } SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); EVT VT = Node->getValueType(0); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue InChain = Node->getOperand(0); SDValue VAListPtr = Node->getOperand(1); const Value *SV = cast(Node->getOperand(2))->getValue(); SDLoc dl(Node); assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only"); // gpr_index SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, VAListPtr, MachinePointerInfo(SV), MVT::i8); InChain = GprIndex.getValue(1); if (VT == MVT::i64) { // Check if GprIndex is even SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex, DAG.getConstant(1, dl, MVT::i32)); SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd, DAG.getConstant(0, dl, MVT::i32), ISD::SETNE); SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex, DAG.getConstant(1, dl, MVT::i32)); // Align GprIndex to be even if it isn't GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne, GprIndex); } // fpr index is 1 byte after gpr SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, DAG.getConstant(1, dl, MVT::i32)); // fpr SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, FprPtr, MachinePointerInfo(SV), MVT::i8); InChain = FprIndex.getValue(1); SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, DAG.getConstant(8, dl, MVT::i32)); SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, DAG.getConstant(4, dl, MVT::i32)); // areas SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo()); InChain = OverflowArea.getValue(1); SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo()); InChain = RegSaveArea.getValue(1); // select overflow_area if index > 8 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, DAG.getConstant(8, dl, MVT::i32), ISD::SETLT); // adjustment constant gpr_index * 4/8 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, DAG.getConstant(VT.isInteger() ? 4 : 8, dl, MVT::i32)); // OurReg = RegSaveArea + RegConstant SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea, RegConstant); // Floating types are 32 bytes into RegSaveArea if (VT.isFloatingPoint()) OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg, DAG.getConstant(32, dl, MVT::i32)); // increase {f,g}pr_index by 1 (or 2 if VT is i64) SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl, MVT::i32)); InChain = DAG.getTruncStore(InChain, dl, IndexPlus1, VT.isInteger() ? VAListPtr : FprPtr, MachinePointerInfo(SV), MVT::i8); // determine if we should load from reg_save_area or overflow_area SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea); // increase overflow_area by 4/8 if gpr/fpr > 8 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea, DAG.getConstant(VT.isInteger() ? 4 : 8, dl, MVT::i32)); OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea, OverflowAreaPlusN); InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr, MachinePointerInfo(), MVT::i32); return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo()); } SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only"); // We have to copy the entire va_list struct: // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2), DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true, false, MachinePointerInfo(), MachinePointerInfo()); } SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { return Op.getOperand(0); } SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Trmp = Op.getOperand(1); // trampoline SDValue FPtr = Op.getOperand(2); // nested function SDValue Nest = Op.getOperand(3); // 'nest' parameter value SDLoc dl(Op); EVT PtrVT = getPointerTy(DAG.getDataLayout()); bool isPPC64 = (PtrVT == MVT::i64); Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Ty = IntPtrTy; Entry.Node = Trmp; Args.push_back(Entry); // TrampSize == (isPPC64 ? 48 : 40); Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl, isPPC64 ? MVT::i64 : MVT::i32); Args.push_back(Entry); Entry.Node = FPtr; Args.push_back(Entry); Entry.Node = Nest; Args.push_back(Entry); // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(Chain) .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args)); std::pair CallResult = LowerCallTo(CLI); return CallResult.second; } SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); PPCFunctionInfo *FuncInfo = MF.getInfo(); EVT PtrVT = getPointerTy(MF.getDataLayout()); SDLoc dl(Op); if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); const Value *SV = cast(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), MachinePointerInfo(SV)); } // For the 32-bit SVR4 ABI we follow the layout of the va_list struct. // We suppose the given va_list is already allocated. // // typedef struct { // char gpr; /* index into the array of 8 GPRs // * stored in the register save area // * gpr=0 corresponds to r3, // * gpr=1 to r4, etc. // */ // char fpr; /* index into the array of 8 FPRs // * stored in the register save area // * fpr=0 corresponds to f1, // * fpr=1 to f2, etc. // */ // char *overflow_arg_area; // /* location on stack that holds // * the next overflow argument // */ // char *reg_save_area; // /* where r3:r10 and f1:f8 (if saved) // * are stored // */ // } va_list[1]; SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32); SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32); SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(), PtrVT); SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); uint64_t FrameOffset = PtrVT.getSizeInBits()/8; SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT); uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1; SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT); uint64_t FPROffset = 1; SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT); const Value *SV = cast(Op.getOperand(2))->getValue(); // Store first byte : number of int regs SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1), MachinePointerInfo(SV), MVT::i8); uint64_t nextOffset = FPROffset; SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1), ConstFPROffset); // Store second byte : number of float regs SDValue secondStore = DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, MachinePointerInfo(SV, nextOffset), MVT::i8); nextOffset += StackOffset; nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset); // Store second word : arguments given on stack SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, MachinePointerInfo(SV, nextOffset)); nextOffset += FrameOffset; nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset); // Store third word : arguments given in registers return DAG.getStore(thirdStore, dl, FR, nextPtr, MachinePointerInfo(SV, nextOffset)); } #include "PPCGenCallingConv.inc" // Function whose sole purpose is to kill compiler warnings // stemming from unused functions included from PPCGenCallingConv.inc. CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const { return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS; } bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { return true; } bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { static const MCPhysReg ArgRegs[] = { PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; const unsigned NumArgRegs = array_lengthof(ArgRegs); unsigned RegNum = State.getFirstUnallocated(ArgRegs); // Skip one register if the first unallocated register has an even register // number and there are still argument registers available which have not been // allocated yet. RegNum is actually an index into ArgRegs, which means we // need to skip a register if RegNum is odd. if (RegNum != NumArgRegs && RegNum % 2 == 1) { State.AllocateReg(ArgRegs[RegNum]); } // Always return false here, as this function only makes sure that the first // unallocated register has an odd register number and does not actually // allocate a register for the current argument. return false; } bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { static const MCPhysReg ArgRegs[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8 }; const unsigned NumArgRegs = array_lengthof(ArgRegs); unsigned RegNum = State.getFirstUnallocated(ArgRegs); // If there is only one Floating-point register left we need to put both f64 // values of a split ppc_fp128 value on the stack. if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) { State.AllocateReg(ArgRegs[RegNum]); } // Always return false here, as this function only makes sure that the two f64 // values a ppc_fp128 value is split into are both passed in registers or both // passed on the stack and does not actually allocate a register for the // current argument. return false; } /// FPR - The set of FP registers that should be allocated for arguments, /// on Darwin. static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13}; /// QFPR - The set of QPX registers that should be allocated for arguments. static const MCPhysReg QFPR[] = { PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7, PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13}; /// CalculateStackSlotSize - Calculates the size reserved for this argument on /// the stack. static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize) { unsigned ArgSize = ArgVT.getStoreSize(); if (Flags.isByVal()) ArgSize = Flags.getByValSize(); // Round up to multiples of the pointer size, except for array members, // which are always packed. if (!Flags.isInConsecutiveRegs()) ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; return ArgSize; } /// CalculateStackSlotAlignment - Calculates the alignment of this argument /// on the stack. static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize) { unsigned Align = PtrByteSize; // Altivec parameters are padded to a 16 byte boundary. if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 || ArgVT == MVT::v1i128) Align = 16; // QPX vector types stored in double-precision are padded to a 32 byte // boundary. else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1) Align = 32; // ByVal parameters are aligned as requested. if (Flags.isByVal()) { unsigned BVAlign = Flags.getByValAlign(); if (BVAlign > PtrByteSize) { if (BVAlign % PtrByteSize != 0) llvm_unreachable( "ByVal alignment is not a multiple of the pointer size"); Align = BVAlign; } } // Array members are always packed to their original alignment. if (Flags.isInConsecutiveRegs()) { // If the array member was split into multiple registers, the first // needs to be aligned to the size of the full type. (Except for // ppcf128, which is only aligned as its f64 components.) if (Flags.isSplit() && OrigVT != MVT::ppcf128) Align = OrigVT.getStoreSize(); else Align = ArgVT.getStoreSize(); } return Align; } /// CalculateStackSlotUsed - Return whether this argument will use its /// stack slot (instead of being passed in registers). ArgOffset, /// AvailableFPRs, and AvailableVRs must hold the current argument /// position, and will be updated to account for this argument. static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs, bool HasQPX) { bool UseMemory = false; // Respect alignment of argument on the stack. unsigned Align = CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; // If there's no space left in the argument save area, we must // use memory (this check also catches zero-sized arguments). if (ArgOffset >= LinkageSize + ParamAreaSize) UseMemory = true; // Allocate argument on the stack. ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); if (Flags.isInConsecutiveRegsLast()) ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; // If we overran the argument save area, we must use memory // (this check catches arguments passed partially in memory) if (ArgOffset > LinkageSize + ParamAreaSize) UseMemory = true; // However, if the argument is actually passed in an FPR or a VR, // we don't use memory after all. if (!Flags.isByVal()) { if (ArgVT == MVT::f32 || ArgVT == MVT::f64 || // QPX registers overlap with the scalar FP registers. (HasQPX && (ArgVT == MVT::v4f32 || ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1))) if (AvailableFPRs > 0) { --AvailableFPRs; return false; } if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 || ArgVT == MVT::v1i128) if (AvailableVRs > 0) { --AvailableVRs; return false; } } return UseMemory; } /// EnsureStackAlignment - Round stack frame size up from NumBytes to /// ensure minimum alignment required for target. static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes) { unsigned TargetAlign = Lowering->getStackAlignment(); unsigned AlignMask = TargetAlign - 1; NumBytes = (NumBytes + AlignMask) & ~AlignMask; return NumBytes; } SDValue PPCTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { if (Subtarget.isSVR4ABI()) { if (Subtarget.isPPC64()) return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); else return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); } else { return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); } } SDValue PPCTargetLowering::LowerFormalArguments_32SVR4( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { // 32-bit SVR4 ABI Stack Frame Layout: // +-----------------------------------+ // +--> | Back chain | // | +-----------------------------------+ // | | Floating-point register save area | // | +-----------------------------------+ // | | General register save area | // | +-----------------------------------+ // | | CR save word | // | +-----------------------------------+ // | | VRSAVE save word | // | +-----------------------------------+ // | | Alignment padding | // | +-----------------------------------+ // | | Vector register save area | // | +-----------------------------------+ // | | Local variable space | // | +-----------------------------------+ // | | Parameter list area | // | +-----------------------------------+ // | | LR save word | // | +-----------------------------------+ // SP--> +--- | Back chain | // +-----------------------------------+ // // Specifications: // System V Application Binary Interface PowerPC Processor Supplement // AltiVec Technology Programming Interface Manual MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); PPCFunctionInfo *FuncInfo = MF.getInfo(); EVT PtrVT = getPointerTy(MF.getDataLayout()); // Potential tail calls could cause overwriting of argument stack slots. bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && (CallConv == CallingConv::Fast)); unsigned PtrByteSize = 4; // Assign locations to all of the incoming arguments. SmallVector ArgLocs; PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext()); // Reserve space for the linkage area on the stack. unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); CCInfo.AllocateStack(LinkageSize, PtrByteSize); if (useSoftFloat()) CCInfo.PreAnalyzeFormalArguments(Ins); CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4); CCInfo.clearWasPPCF128(); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; // Arguments stored in registers. if (VA.isRegLoc()) { const TargetRegisterClass *RC; EVT ValVT = VA.getValVT(); switch (ValVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("ValVT not supported by formal arguments Lowering"); case MVT::i1: case MVT::i32: RC = &PPC::GPRCRegClass; break; case MVT::f32: if (Subtarget.hasP8Vector()) RC = &PPC::VSSRCRegClass; else RC = &PPC::F4RCRegClass; break; case MVT::f64: if (Subtarget.hasVSX()) RC = &PPC::VSFRCRegClass; else RC = &PPC::F8RCRegClass; break; case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: RC = &PPC::VRRCRegClass; break; case MVT::v4f32: RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass; break; case MVT::v2f64: case MVT::v2i64: RC = &PPC::VSHRCRegClass; break; case MVT::v4f64: RC = &PPC::QFRCRegClass; break; case MVT::v4i1: RC = &PPC::QBRCRegClass; break; } // Transform the arguments stored in physical registers into virtual ones. unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT == MVT::i1 ? MVT::i32 : ValVT); if (ValVT == MVT::i1) ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue); InVals.push_back(ArgValue); } else { // Argument stored in memory. assert(VA.isMemLoc()); unsigned ArgSize = VA.getLocVT().getStoreSize(); int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), isImmutable); // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, PtrVT); InVals.push_back( DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo())); } } // Assign locations to all of the incoming aggregate by value arguments. // Aggregates passed by value are stored in the local variable space of the // caller's stack frame, right above the parameter list area. SmallVector ByValArgLocs; CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), ByValArgLocs, *DAG.getContext()); // Reserve stack space for the allocations in CCInfo. CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal); // Area that is at least reserved in the caller of this function. unsigned MinReservedArea = CCByValInfo.getNextStackOffset(); MinReservedArea = std::max(MinReservedArea, LinkageSize); // Set the size that is at least reserved in caller of this function. Tail // call optimized function's reserved stack space needs to be aligned so that // taking the difference between two stack areas will result in an aligned // stack. MinReservedArea = EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); FuncInfo->setMinReservedArea(MinReservedArea); SmallVector MemOps; // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { static const MCPhysReg GPArgRegs[] = { PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; const unsigned NumGPArgRegs = array_lengthof(GPArgRegs); static const MCPhysReg FPArgRegs[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8 }; unsigned NumFPArgRegs = array_lengthof(FPArgRegs); if (useSoftFloat()) NumFPArgRegs = 0; FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs)); FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs)); // Make room for NumGPArgRegs and NumFPArgRegs. int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 + NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8; FuncInfo->setVarArgsStackOffset( MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, CCInfo.getNextStackOffset(), true)); FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false)); SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); // The fixed integer arguments of a variadic function are stored to the // VarArgsFrameIndex on the stack so that they may be loaded by // dereferencing the result of va_next. for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) { // Get an existing live-in vreg, or add a new one. unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]); if (!VReg) VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); MemOps.push_back(Store); // Increment the address by four for the next argument to store SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT); FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); } // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6 // is set. // The double arguments are stored to the VarArgsFrameIndex // on the stack. for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) { // Get an existing live-in vreg, or add a new one. unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]); if (!VReg) VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); MemOps.push_back(Store); // Increment the address by eight for the next argument to store SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl, PtrVT); FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); } } if (!MemOps.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); return Chain; } // PPC64 passes i8, i16, and i32 values in i64 registers. Promote // value to MVT::i64 and then truncate to the correct register size. SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, SelectionDAG &DAG, SDValue ArgVal, const SDLoc &dl) const { if (Flags.isSExt()) ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal, DAG.getValueType(ObjectVT)); else if (Flags.isZExt()) ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal, DAG.getValueType(ObjectVT)); return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal); } SDValue PPCTargetLowering::LowerFormalArguments_64SVR4( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { // TODO: add description of PPC stack frame format, or at least some docs. // bool isELFv2ABI = Subtarget.isELFv2ABI(); bool isLittleEndian = Subtarget.isLittleEndian(); MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); PPCFunctionInfo *FuncInfo = MF.getInfo(); assert(!(CallConv == CallingConv::Fast && isVarArg) && "fastcc not supported on varargs functions"); EVT PtrVT = getPointerTy(MF.getDataLayout()); // Potential tail calls could cause overwriting of argument stack slots. bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && (CallConv == CallingConv::Fast)); unsigned PtrByteSize = 8; unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); static const MCPhysReg GPR[] = { PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; static const MCPhysReg VSRH[] = { PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8, PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 }; const unsigned Num_GPR_Regs = array_lengthof(GPR); - const unsigned Num_FPR_Regs = 13; + const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13; const unsigned Num_VR_Regs = array_lengthof(VR); const unsigned Num_QFPR_Regs = Num_FPR_Regs; // Do a first pass over the arguments to determine whether the ABI // guarantees that our caller has allocated the parameter save area // on its stack frame. In the ELFv1 ABI, this is always the case; // in the ELFv2 ABI, it is true if this is a vararg function or if // any parameter is located in a stack slot. bool HasParameterArea = !isELFv2ABI || isVarArg; unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize; unsigned NumBytes = LinkageSize; unsigned AvailableFPRs = Num_FPR_Regs; unsigned AvailableVRs = Num_VR_Regs; for (unsigned i = 0, e = Ins.size(); i != e; ++i) { if (Ins[i].Flags.isNest()) continue; if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags, PtrByteSize, LinkageSize, ParamAreaSize, NumBytes, AvailableFPRs, AvailableVRs, Subtarget.hasQPX())) HasParameterArea = true; } // Add DAG nodes to load the arguments or copy them out of registers. On // entry to a function on PPC, the arguments start after the linkage area, // although the first ones are often in registers. unsigned ArgOffset = LinkageSize; unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; unsigned &QFPR_idx = FPR_idx; SmallVector MemOps; Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); unsigned CurArgIdx = 0; for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { SDValue ArgVal; bool needsLoad = false; EVT ObjectVT = Ins[ArgNo].VT; EVT OrigVT = Ins[ArgNo].ArgVT; unsigned ObjSize = ObjectVT.getStoreSize(); unsigned ArgSize = ObjSize; ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; if (Ins[ArgNo].isOrigArg()) { std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx); CurArgIdx = Ins[ArgNo].getOrigArgIndex(); } // We re-align the argument offset for each argument, except when using the // fast calling convention, when we need to make sure we do that only when // we'll actually use a stack slot. unsigned CurArgOffset, Align; auto ComputeArgOffset = [&]() { /* Respect alignment of argument on the stack. */ Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize); ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; CurArgOffset = ArgOffset; }; if (CallConv != CallingConv::Fast) { ComputeArgOffset(); /* Compute GPR index associated with argument offset. */ GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; GPR_idx = std::min(GPR_idx, Num_GPR_Regs); } // FIXME the codegen can be much improved in some cases. // We do not have to keep everything in memory. if (Flags.isByVal()) { assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"); if (CallConv == CallingConv::Fast) ComputeArgOffset(); // ObjSize is the true size, ArgSize rounded up to multiple of registers. ObjSize = Flags.getByValSize(); ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; // Empty aggregate parameters do not take up registers. Examples: // struct { } a; // union { } b; // int c[0]; // etc. However, we have to provide a place-holder in InVals, so // pretend we have an 8-byte item at the current address for that // purpose. if (!ObjSize) { int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); InVals.push_back(FIN); continue; } // Create a stack object covering all stack doublewords occupied // by the argument. If the argument is (fully or partially) on // the stack, or if the argument is fully in registers but the // caller has allocated the parameter save anyway, we can refer // directly to the caller's stack frame. Otherwise, create a // local copy in our own frame. int FI; if (HasParameterArea || ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize) FI = MFI->CreateFixedObject(ArgSize, ArgOffset, false, true); else FI = MFI->CreateStackObject(ArgSize, Align, false); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); // Handle aggregates smaller than 8 bytes. if (ObjSize < PtrByteSize) { // The value of the object is its address, which differs from the // address of the enclosing doubleword on big-endian systems. SDValue Arg = FIN; if (!isLittleEndian) { SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT); Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff); } InVals.push_back(Arg); if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store; if (ObjSize==1 || ObjSize==2 || ObjSize==4) { EVT ObjType = (ObjSize == 1 ? MVT::i8 : (ObjSize == 2 ? MVT::i16 : MVT::i32)); Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg, MachinePointerInfo(&*FuncArg), ObjType); } else { // For sizes that don't fit a truncating store (3, 5, 6, 7), // store the whole register as-is to the parameter save area // slot. Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo(&*FuncArg)); } MemOps.push_back(Store); } // Whether we copied from a register or not, advance the offset // into the parameter save area by a full doubleword. ArgOffset += PtrByteSize; continue; } // The value of the object is its address, which is the address of // its first stack doubleword. InVals.push_back(FIN); // Store whatever pieces of the object are in registers to memory. for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { if (GPR_idx == Num_GPR_Regs) break; unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Addr = FIN; if (j) { SDValue Off = DAG.getConstant(j, dl, PtrVT); Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off); } SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr, MachinePointerInfo(&*FuncArg, j)); MemOps.push_back(Store); ++GPR_idx; } ArgOffset += ArgSize; continue; } switch (ObjectVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unhandled argument type!"); case MVT::i1: case MVT::i32: case MVT::i64: if (Flags.isNest()) { // The 'nest' parameter, if any, is passed in R11. unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); break; } // These can be scalar arguments or elements of an integer array type // passed directly. Clang may use those instead of "byval" aggregate // types to avoid forcing arguments to memory unnecessarily. if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) // PPC64 passes i8, i16, and i32 values in i64 registers. Promote // value to MVT::i64 and then truncate to the correct register size. ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); } else { if (CallConv == CallingConv::Fast) ComputeArgOffset(); needsLoad = true; ArgSize = PtrByteSize; } if (CallConv != CallingConv::Fast || needsLoad) ArgOffset += 8; break; case MVT::f32: case MVT::f64: // These can be scalar arguments or elements of a float array type // passed directly. The latter are used to implement ELFv2 homogenous // float aggregates. if (FPR_idx != Num_FPR_Regs) { unsigned VReg; if (ObjectVT == MVT::f32) VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasP8Vector() ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass); else VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX() ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++FPR_idx; } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) { // FIXME: We may want to re-enable this for CallingConv::Fast on the P8 // once we support fp <-> gpr moves. // This can only ever happen in the presence of f32 array types, // since otherwise we never run out of FPRs before running out // of GPRs. unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); if (ObjectVT == MVT::f32) { if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0)) ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal, DAG.getConstant(32, dl, MVT::i32)); ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal); } ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal); } else { if (CallConv == CallingConv::Fast) ComputeArgOffset(); needsLoad = true; } // When passing an array of floats, the array occupies consecutive // space in the argument area; only round up to the next doubleword // at the end of the array. Otherwise, each float takes 8 bytes. if (CallConv != CallingConv::Fast || needsLoad) { ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize; ArgOffset += ArgSize; if (Flags.isInConsecutiveRegsLast()) ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; } break; case MVT::v4f32: case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: case MVT::v2f64: case MVT::v2i64: case MVT::v1i128: if (!Subtarget.hasQPX()) { // These can be scalar arguments or elements of a vector array type // passed directly. The latter are used to implement ELFv2 homogenous // vector aggregates. if (VR_idx != Num_VR_Regs) { unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ? MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) : MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++VR_idx; } else { if (CallConv == CallingConv::Fast) ComputeArgOffset(); needsLoad = true; } if (CallConv != CallingConv::Fast || needsLoad) ArgOffset += 16; break; } // not QPX assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 && "Invalid QPX parameter type"); /* fall through */ case MVT::v4f64: case MVT::v4i1: // QPX vectors are treated like their scalar floating-point subregisters // (except that they're larger). unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32; if (QFPR_idx != Num_QFPR_Regs) { const TargetRegisterClass *RC; switch (ObjectVT.getSimpleVT().SimpleTy) { case MVT::v4f64: RC = &PPC::QFRCRegClass; break; case MVT::v4f32: RC = &PPC::QSRCRegClass; break; default: RC = &PPC::QBRCRegClass; break; } unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++QFPR_idx; } else { if (CallConv == CallingConv::Fast) ComputeArgOffset(); needsLoad = true; } if (CallConv != CallingConv::Fast || needsLoad) ArgOffset += Sz; break; } // We need to load the argument to a virtual register if we determined // above that we ran out of physical registers of the appropriate type. if (needsLoad) { if (ObjSize < ArgSize && !isLittleEndian) CurArgOffset += ArgSize - ObjSize; int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, isImmutable); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo()); } InVals.push_back(ArgVal); } // Area that is at least reserved in the caller of this function. unsigned MinReservedArea; if (HasParameterArea) MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize); else MinReservedArea = LinkageSize; // Set the size that is at least reserved in caller of this function. Tail // call optimized functions' reserved stack space needs to be aligned so that // taking the difference between two stack areas will result in an aligned // stack. MinReservedArea = EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); FuncInfo->setMinReservedArea(MinReservedArea); // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { int Depth = ArgOffset; FuncInfo->setVarArgsFrameIndex( MFI->CreateFixedObject(PtrByteSize, Depth, true)); SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); // If this function is vararg, store any remaining integer argument regs // to their spots on the stack so that they may be loaded by dereferencing // the result of va_next. for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; GPR_idx < Num_GPR_Regs; ++GPR_idx) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); MemOps.push_back(Store); // Increment the address by four for the next argument to store SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT); FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); } } if (!MemOps.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); return Chain; } SDValue PPCTargetLowering::LowerFormalArguments_Darwin( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { // TODO: add description of PPC stack frame format, or at least some docs. // MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); PPCFunctionInfo *FuncInfo = MF.getInfo(); EVT PtrVT = getPointerTy(MF.getDataLayout()); bool isPPC64 = PtrVT == MVT::i64; // Potential tail calls could cause overwriting of argument stack slots. bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && (CallConv == CallingConv::Fast)); unsigned PtrByteSize = isPPC64 ? 8 : 4; unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); unsigned ArgOffset = LinkageSize; // Area that is at least reserved in caller of this function. unsigned MinReservedArea = ArgOffset; static const MCPhysReg GPR_32[] = { // 32-bit registers. PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; static const MCPhysReg GPR_64[] = { // 64-bit registers. PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; const unsigned Num_GPR_Regs = array_lengthof(GPR_32); - const unsigned Num_FPR_Regs = 13; + const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13; const unsigned Num_VR_Regs = array_lengthof( VR); unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; // In 32-bit non-varargs functions, the stack space for vectors is after the // stack space for non-vectors. We do not use this space unless we have // too many vectors to fit in registers, something that only occurs in // constructed examples:), but we have to walk the arglist to figure // that out...for the pathological case, compute VecArgOffset as the // start of the vector parameter area. Computing VecArgOffset is the // entire point of the following loop. unsigned VecArgOffset = ArgOffset; if (!isVarArg && !isPPC64) { for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { EVT ObjectVT = Ins[ArgNo].VT; ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; if (Flags.isByVal()) { // ObjSize is the true size, ArgSize rounded up to multiple of regs. unsigned ObjSize = Flags.getByValSize(); unsigned ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; VecArgOffset += ArgSize; continue; } switch(ObjectVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unhandled argument type!"); case MVT::i1: case MVT::i32: case MVT::f32: VecArgOffset += 4; break; case MVT::i64: // PPC64 case MVT::f64: // FIXME: We are guaranteed to be !isPPC64 at this point. // Does MVT::i64 apply? VecArgOffset += 8; break; case MVT::v4f32: case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: // Nothing to do, we're only looking at Nonvector args here. break; } } } // We've found where the vector parameter area in memory is. Skip the // first 12 parameters; these don't use that memory. VecArgOffset = ((VecArgOffset+15)/16)*16; VecArgOffset += 12*16; // Add DAG nodes to load the arguments or copy them out of registers. On // entry to a function on PPC, the arguments start after the linkage area, // although the first ones are often in registers. SmallVector MemOps; unsigned nAltivecParamsAtEnd = 0; Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); unsigned CurArgIdx = 0; for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { SDValue ArgVal; bool needsLoad = false; EVT ObjectVT = Ins[ArgNo].VT; unsigned ObjSize = ObjectVT.getSizeInBits()/8; unsigned ArgSize = ObjSize; ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; if (Ins[ArgNo].isOrigArg()) { std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx); CurArgIdx = Ins[ArgNo].getOrigArgIndex(); } unsigned CurArgOffset = ArgOffset; // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { if (isVarArg || isPPC64) { MinReservedArea = ((MinReservedArea+15)/16)*16; MinReservedArea += CalculateStackSlotSize(ObjectVT, Flags, PtrByteSize); } else nAltivecParamsAtEnd++; } else // Calculate min reserved area. MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT, Flags, PtrByteSize); // FIXME the codegen can be much improved in some cases. // We do not have to keep everything in memory. if (Flags.isByVal()) { assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"); // ObjSize is the true size, ArgSize rounded up to multiple of registers. ObjSize = Flags.getByValSize(); ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; // Objects of size 1 and 2 are right justified, everything else is // left justified. This means the memory address is adjusted forwards. if (ObjSize==1 || ObjSize==2) { CurArgOffset = CurArgOffset + (4 - ObjSize); } // The value of the object is its address. int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, false, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); InVals.push_back(FIN); if (ObjSize==1 || ObjSize==2) { if (GPR_idx != Num_GPR_Regs) { unsigned VReg; if (isPPC64) VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); else VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16; SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo(&*FuncArg), ObjType); MemOps.push_back(Store); ++GPR_idx; } ArgOffset += PtrByteSize; continue; } for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { // Store whatever pieces of the object are in registers // to memory. ArgOffset will be the address of the beginning // of the object. if (GPR_idx != Num_GPR_Regs) { unsigned VReg; if (isPPC64) VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); else VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo(&*FuncArg, j)); MemOps.push_back(Store); ++GPR_idx; ArgOffset += PtrByteSize; } else { ArgOffset += ArgSize - (ArgOffset-CurArgOffset); break; } } continue; } switch (ObjectVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unhandled argument type!"); case MVT::i1: case MVT::i32: if (!isPPC64) { if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); if (ObjectVT == MVT::i1) ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal); ++GPR_idx; } else { needsLoad = true; ArgSize = PtrByteSize; } // All int arguments reserve stack space in the Darwin ABI. ArgOffset += PtrByteSize; break; } // FALLTHROUGH case MVT::i64: // PPC64 if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) // PPC64 passes i8, i16, and i32 values in i64 registers. Promote // value to MVT::i64 and then truncate to the correct register size. ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); ++GPR_idx; } else { needsLoad = true; ArgSize = PtrByteSize; } // All int arguments reserve stack space in the Darwin ABI. ArgOffset += 8; break; case MVT::f32: case MVT::f64: // Every 4 bytes of argument space consumes one of the GPRs available for // argument passing. if (GPR_idx != Num_GPR_Regs) { ++GPR_idx; if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64) ++GPR_idx; } if (FPR_idx != Num_FPR_Regs) { unsigned VReg; if (ObjectVT == MVT::f32) VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); else VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++FPR_idx; } else { needsLoad = true; } // All FP arguments reserve stack space in the Darwin ABI. ArgOffset += isPPC64 ? 8 : ObjSize; break; case MVT::v4f32: case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: // Note that vector arguments in registers don't reserve stack space, // except in varargs functions. if (VR_idx != Num_VR_Regs) { unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); if (isVarArg) { while ((ArgOffset % 16) != 0) { ArgOffset += PtrByteSize; if (GPR_idx != Num_GPR_Regs) GPR_idx++; } ArgOffset += 16; GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64? } ++VR_idx; } else { if (!isVarArg && !isPPC64) { // Vectors go after all the nonvectors. CurArgOffset = VecArgOffset; VecArgOffset += 16; } else { // Vectors are aligned. ArgOffset = ((ArgOffset+15)/16)*16; CurArgOffset = ArgOffset; ArgOffset += 16; } needsLoad = true; } break; } // We need to load the argument to a virtual register if we determined above // that we ran out of physical registers of the appropriate type. if (needsLoad) { int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset + (ArgSize - ObjSize), isImmutable); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo()); } InVals.push_back(ArgVal); } // Allow for Altivec parameters at the end, if needed. if (nAltivecParamsAtEnd) { MinReservedArea = ((MinReservedArea+15)/16)*16; MinReservedArea += 16*nAltivecParamsAtEnd; } // Area that is at least reserved in the caller of this function. MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize); // Set the size that is at least reserved in caller of this function. Tail // call optimized functions' reserved stack space needs to be aligned so that // taking the difference between two stack areas will result in an aligned // stack. MinReservedArea = EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); FuncInfo->setMinReservedArea(MinReservedArea); // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { int Depth = ArgOffset; FuncInfo->setVarArgsFrameIndex( MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, Depth, true)); SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); // If this function is vararg, store any remaining integer argument regs // to their spots on the stack so that they may be loaded by dereferencing // the result of va_next. for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { unsigned VReg; if (isPPC64) VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); else VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); MemOps.push_back(Store); // Increment the address by four for the next argument to store SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT); FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); } } if (!MemOps.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); return Chain; } /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be /// adjusted to accommodate the arguments for the tailcall. static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall, unsigned ParamSize) { if (!isTailCall) return 0; PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo(); unsigned CallerMinReservedArea = FI->getMinReservedArea(); int SPDiff = (int)CallerMinReservedArea - (int)ParamSize; // Remember only if the new adjustement is bigger. if (SPDiff < FI->getTailCallSPDelta()) FI->setTailCallSPDelta(SPDiff); return SPDiff; } static bool isFunctionGlobalAddress(SDValue Callee); static bool resideInSameModule(SDValue Callee, Reloc::Model RelMod) { // If !G, Callee can be an external symbol. GlobalAddressSDNode *G = dyn_cast(Callee); if (!G) return false; const GlobalValue *GV = G->getGlobal(); if (GV->isDeclaration()) return false; switch(GV->getLinkage()) { default: llvm_unreachable("unknow linkage type"); case GlobalValue::AvailableExternallyLinkage: case GlobalValue::ExternalWeakLinkage: return false; // Callee with weak linkage is allowed if it has hidden or protected // visibility case GlobalValue::LinkOnceAnyLinkage: case GlobalValue::LinkOnceODRLinkage: // e.g. c++ inline functions case GlobalValue::WeakAnyLinkage: case GlobalValue::WeakODRLinkage: // e.g. c++ template instantiation if (GV->hasDefaultVisibility()) return false; case GlobalValue::ExternalLinkage: case GlobalValue::InternalLinkage: case GlobalValue::PrivateLinkage: break; } // With '-fPIC', calling default visiblity function need insert 'nop' after // function call, no matter that function resides in same module or not, so // we treat it as in different module. if (RelMod == Reloc::PIC_ && GV->hasDefaultVisibility()) return false; return true; } static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl &Outs) { assert(Subtarget.isSVR4ABI() && Subtarget.isPPC64()); const unsigned PtrByteSize = 8; const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); static const MCPhysReg GPR[] = { PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; const unsigned NumGPRs = array_lengthof(GPR); const unsigned NumFPRs = 13; const unsigned NumVRs = array_lengthof(VR); const unsigned ParamAreaSize = NumGPRs * PtrByteSize; unsigned NumBytes = LinkageSize; unsigned AvailableFPRs = NumFPRs; unsigned AvailableVRs = NumVRs; for (const ISD::OutputArg& Param : Outs) { if (Param.Flags.isNest()) continue; if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize, LinkageSize, ParamAreaSize, NumBytes, AvailableFPRs, AvailableVRs, Subtarget.hasQPX())) return true; } return false; } static bool hasSameArgumentList(const Function *CallerFn, ImmutableCallSite *CS) { if (CS->arg_size() != CallerFn->getArgumentList().size()) return false; ImmutableCallSite::arg_iterator CalleeArgIter = CS->arg_begin(); ImmutableCallSite::arg_iterator CalleeArgEnd = CS->arg_end(); Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin(); for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) { const Value* CalleeArg = *CalleeArgIter; const Value* CallerArg = &(*CallerArgIter); if (CalleeArg == CallerArg) continue; // e.g. @caller([4 x i64] %a, [4 x i64] %b) { // tail call @callee([4 x i64] undef, [4 x i64] %b) // } // 1st argument of callee is undef and has the same type as caller. if (CalleeArg->getType() == CallerArg->getType() && isa(CalleeArg)) continue; return false; } return true; } bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4( SDValue Callee, CallingConv::ID CalleeCC, ImmutableCallSite *CS, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &Ins, SelectionDAG& DAG) const { bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt; if (DisableSCO && !TailCallOpt) return false; // Variadic argument functions are not supported. if (isVarArg) return false; MachineFunction &MF = DAG.getMachineFunction(); CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); // Tail or Sibling call optimization (TCO/SCO) needs callee and caller has // the same calling convention if (CallerCC != CalleeCC) return false; // SCO support C calling convention if (CalleeCC != CallingConv::Fast && CalleeCC != CallingConv::C) return false; // Caller contains any byval parameter is not supported. if (std::any_of(Ins.begin(), Ins.end(), [](const ISD::InputArg& IA) { return IA.Flags.isByVal(); })) return false; // Callee contains any byval parameter is not supported, too. // Note: This is a quick work around, because in some cases, e.g. // caller's stack size > callee's stack size, we are still able to apply // sibling call optimization. See: https://reviews.llvm.org/D23441#513574 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); })) return false; // No TCO/SCO on indirect call because Caller have to restore its TOC if (!isFunctionGlobalAddress(Callee) && !isa(Callee)) return false; // Check if Callee resides in the same module, because for now, PPC64 SVR4 ABI // (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another // module. // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977 if (!resideInSameModule(Callee, getTargetMachine().getRelocationModel())) return false; // TCO allows altering callee ABI, so we don't have to check further. if (CalleeCC == CallingConv::Fast && TailCallOpt) return true; if (DisableSCO) return false; // If callee use the same argument list that caller is using, then we can // apply SCO on this case. If it is not, then we need to check if callee needs // stack for passing arguments. if (!hasSameArgumentList(MF.getFunction(), CS) && needStackSlotPassParameters(Subtarget, Outs)) { return false; } return true; } /// IsEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. Targets which want to do tail call /// optimization should implement this function. bool PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, const SmallVectorImpl &Ins, SelectionDAG& DAG) const { if (!getTargetMachine().Options.GuaranteedTailCallOpt) return false; // Variable argument functions are not supported. if (isVarArg) return false; MachineFunction &MF = DAG.getMachineFunction(); CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { // Functions containing by val parameters are not supported. for (unsigned i = 0; i != Ins.size(); i++) { ISD::ArgFlagsTy Flags = Ins[i].Flags; if (Flags.isByVal()) return false; } // Non-PIC/GOT tail calls are supported. if (getTargetMachine().getRelocationModel() != Reloc::PIC_) return true; // At the moment we can only do local tail calls (in same module, hidden // or protected) if we are generating PIC. if (GlobalAddressSDNode *G = dyn_cast(Callee)) return G->getGlobal()->hasHiddenVisibility() || G->getGlobal()->hasProtectedVisibility(); } return false; } /// isCallCompatibleAddress - Return the immediate to use if the specified /// 32-bit value is representable in the immediate field of a BxA instruction. static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { ConstantSDNode *C = dyn_cast(Op); if (!C) return nullptr; int Addr = C->getZExtValue(); if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. SignExtend32<26>(Addr) != Addr) return nullptr; // Top 6 bits have to be sext of immediate. return DAG .getConstant( (int)C->getZExtValue() >> 2, SDLoc(Op), DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())) .getNode(); } namespace { struct TailCallArgumentInfo { SDValue Arg; SDValue FrameIdxOp; int FrameIdx; TailCallArgumentInfo() : FrameIdx(0) {} }; } /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot. static void StoreTailCallArgumentsToStackSlot( SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl &TailCallArgs, SmallVectorImpl &MemOpChains, const SDLoc &dl) { for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) { SDValue Arg = TailCallArgs[i].Arg; SDValue FIN = TailCallArgs[i].FrameIdxOp; int FI = TailCallArgs[i].FrameIdx; // Store relative to framepointer. MemOpChains.push_back(DAG.getStore( Chain, dl, Arg, FIN, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI))); } } /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to /// the appropriate stack slot for the tail call optimized function call. static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl) { if (SPDiff) { // Calculate the new stack slot for the return address. MachineFunction &MF = DAG.getMachineFunction(); const PPCSubtarget &Subtarget = MF.getSubtarget(); const PPCFrameLowering *FL = Subtarget.getFrameLowering(); bool isPPC64 = Subtarget.isPPC64(); int SlotSize = isPPC64 ? 8 : 4; int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset(); int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewRetAddrLoc, true); EVT VT = isPPC64 ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, MachinePointerInfo::getFixedStack(MF, NewRetAddr)); // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack // slot as the FP is never overwritten. if (Subtarget.isDarwinABI()) { int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset(); int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc, true); SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, MachinePointerInfo::getFixedStack( DAG.getMachineFunction(), NewFPIdx)); } } return Chain; } /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate /// the position of the argument. static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl& TailCallArguments) { int Offset = ArgOffset + SPDiff; uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8; int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); EVT VT = isPPC64 ? MVT::i64 : MVT::i32; SDValue FIN = DAG.getFrameIndex(FI, VT); TailCallArgumentInfo Info; Info.Arg = Arg; Info.FrameIdxOp = FIN; Info.FrameIdx = FI; TailCallArguments.push_back(Info); } /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address /// stack slot. Returns the chain as result and the loaded frame pointers in /// LROpOut/FPOpout. Used when tail calling. SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr( SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut, SDValue &FPOpOut, const SDLoc &dl) const { if (SPDiff) { // Load the LR and FP stack slot for later adjusting. EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32; LROpOut = getReturnAddrFrameIndex(DAG); LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo()); Chain = SDValue(LROpOut.getNode(), 1); // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack // slot as the FP is never overwritten. if (Subtarget.isDarwinABI()) { FPOpOut = getFramePointerFrameIndex(DAG); FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo()); Chain = SDValue(FPOpOut.getNode(), 1); } } return Chain; } /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified /// by "Src" to address "Dst" of size "Size". Alignment information is /// specified by the specific parameter attribute. The copy will be passed as /// a byval function parameter. /// Sometimes what we are copying is the end of a larger object, the part that /// does not fit in registers. static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl) { SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32); return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), false, false, false, MachinePointerInfo(), MachinePointerInfo()); } /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of /// tail calls. static void LowerMemOpCallTo( SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl &MemOpChains, SmallVectorImpl &TailCallArguments, const SDLoc &dl) { EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); if (!isTailCall) { if (isVector) { SDValue StackPtr; if (isPPC64) StackPtr = DAG.getRegister(PPC::X1, MVT::i64); else StackPtr = DAG.getRegister(PPC::R1, MVT::i32); PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, DAG.getConstant(ArgOffset, dl, PtrVT)); } MemOpChains.push_back( DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo())); // Calculate and remember argument location. } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset, TailCallArguments); } static void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl &TailCallArguments) { // Emit a sequence of copyto/copyfrom virtual registers for arguments that // might overwrite each other in case of tail call optimization. SmallVector MemOpChains2; // Do not flag preceding copytoreg stuff together with the following stuff. InFlag = SDValue(); StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, MemOpChains2, dl); if (!MemOpChains2.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2); // Store the return address to the appropriate stack slot. Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl); // Emit callseq_end just before tailcall node. Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), DAG.getIntPtrConstant(0, dl, true), InFlag, dl); InFlag = Chain.getValue(1); } // Is this global address that of a function that can be called by name? (as // opposed to something that must hold a descriptor for an indirect call). static bool isFunctionGlobalAddress(SDValue Callee) { if (GlobalAddressSDNode *G = dyn_cast(Callee)) { if (Callee.getOpcode() == ISD::GlobalTLSAddress || Callee.getOpcode() == ISD::TargetGlobalTLSAddress) return false; return G->getGlobal()->getValueType()->isFunctionTy(); } return false; } static unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall, bool isPatchPoint, bool hasNest, SmallVectorImpl> &RegsToPass, SmallVectorImpl &Ops, std::vector &NodeTys, ImmutableCallSite *CS, const PPCSubtarget &Subtarget) { bool isPPC64 = Subtarget.isPPC64(); bool isSVR4ABI = Subtarget.isSVR4ABI(); bool isELFv2ABI = Subtarget.isELFv2ABI(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); NodeTys.push_back(MVT::Other); // Returns a chain NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. unsigned CallOpc = PPCISD::CALL; bool needIndirectCall = true; if (!isSVR4ABI || !isPPC64) if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) { // If this is an absolute destination address, use the munged value. Callee = SDValue(Dest, 0); needIndirectCall = false; } // PC-relative references to external symbols should go through $stub, unless // we're building with the leopard linker or later, which automatically // synthesizes these stubs. const TargetMachine &TM = DAG.getTarget(); const Module *Mod = DAG.getMachineFunction().getFunction()->getParent(); const GlobalValue *GV = nullptr; if (auto *G = dyn_cast(Callee)) GV = G->getGlobal(); bool Local = TM.shouldAssumeDSOLocal(*Mod, GV); bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64; if (isFunctionGlobalAddress(Callee)) { GlobalAddressSDNode *G = cast(Callee); // A call to a TLS address is actually an indirect call to a // thread-specific pointer. unsigned OpFlags = 0; if (UsePlt) OpFlags = PPCII::MO_PLT; // If the callee is a GlobalAddress/ExternalSymbol node (quite common, // every direct call is) turn it into a TargetGlobalAddress / // TargetExternalSymbol node so that legalize doesn't hack it. Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, Callee.getValueType(), 0, OpFlags); needIndirectCall = false; } if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { unsigned char OpFlags = 0; if (UsePlt) OpFlags = PPCII::MO_PLT; Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(), OpFlags); needIndirectCall = false; } if (isPatchPoint) { // We'll form an invalid direct call when lowering a patchpoint; the full // sequence for an indirect call is complicated, and many of the // instructions introduced might have side effects (and, thus, can't be // removed later). The call itself will be removed as soon as the // argument/return lowering is complete, so the fact that it has the wrong // kind of operands should not really matter. needIndirectCall = false; } if (needIndirectCall) { // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair // to do the call, we can't use PPCISD::CALL. SDValue MTCTROps[] = {Chain, Callee, InFlag}; if (isSVR4ABI && isPPC64 && !isELFv2ABI) { // Function pointers in the 64-bit SVR4 ABI do not point to the function // entry point, but to the function descriptor (the function entry point // address is part of the function descriptor though). // The function descriptor is a three doubleword structure with the // following fields: function entry point, TOC base address and // environment pointer. // Thus for a call through a function pointer, the following actions need // to be performed: // 1. Save the TOC of the caller in the TOC save area of its stack // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()). // 2. Load the address of the function entry point from the function // descriptor. // 3. Load the TOC of the callee from the function descriptor into r2. // 4. Load the environment pointer from the function descriptor into // r11. // 5. Branch to the function entry point address. // 6. On return of the callee, the TOC of the caller needs to be // restored (this is done in FinishCall()). // // The loads are scheduled at the beginning of the call sequence, and the // register copies are flagged together to ensure that no other // operations can be scheduled in between. E.g. without flagging the // copies together, a TOC access in the caller could be scheduled between // the assignment of the callee TOC and the branch to the callee, which // results in the TOC access going through the TOC of the callee instead // of going through the TOC of the caller, which leads to incorrect code. // Load the address of the function entry point from the function // descriptor. SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1); if (LDChain.getValueType() == MVT::Glue) LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2); auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors() ? MachineMemOperand::MOInvariant : MachineMemOperand::MONone; MachinePointerInfo MPI(CS ? CS->getCalledValue() : nullptr); SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI, /* Alignment = */ 8, MMOFlags); // Load environment pointer into r11. SDValue PtrOff = DAG.getIntPtrConstant(16, dl); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff); SDValue LoadEnvPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16), /* Alignment = */ 8, MMOFlags); SDValue TOCOff = DAG.getIntPtrConstant(8, dl); SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff); SDValue TOCPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8), /* Alignment = */ 8, MMOFlags); setUsesTOCBasePtr(DAG); SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr, InFlag); Chain = TOCVal.getValue(0); InFlag = TOCVal.getValue(1); // If the function call has an explicit 'nest' parameter, it takes the // place of the environment pointer. if (!hasNest) { SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, InFlag); Chain = EnvVal.getValue(0); InFlag = EnvVal.getValue(1); } MTCTROps[0] = Chain; MTCTROps[1] = LoadFuncPtr; MTCTROps[2] = InFlag; } Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2)); InFlag = Chain.getValue(1); NodeTys.clear(); NodeTys.push_back(MVT::Other); NodeTys.push_back(MVT::Glue); Ops.push_back(Chain); CallOpc = PPCISD::BCTRL; Callee.setNode(nullptr); // Add use of X11 (holding environment pointer) if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest) Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); // Add CTR register as callee so a bctr can be emitted later. if (isTailCall) Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT)); } // If this is a direct call, pass the chain and the callee. if (Callee.getNode()) { Ops.push_back(Chain); Ops.push_back(Callee); } // If this is a tail call add stack pointer delta. if (isTailCall) Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32)); // Add argument registers to the end of the list so that they are known live // into the call. for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live // into the call. if (isSVR4ABI && isPPC64 && !isPatchPoint) { setUsesTOCBasePtr(DAG); Ops.push_back(DAG.getRegister(PPC::X2, PtrVT)); } return CallOpc; } static bool isLocalCall(const SDValue &Callee) { if (GlobalAddressSDNode *G = dyn_cast(Callee)) return G->getGlobal()->isStrongDefinitionForLinker(); return false; } SDValue PPCTargetLowering::LowerCallResult( SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { SmallVector RVLocs; CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC); // Copy all of the result registers out of their specified physreg. for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), InFlag); Chain = Val.getValue(1); InFlag = Val.getValue(2); switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::AExt: Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); break; case CCValAssign::ZExt: Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val, DAG.getValueType(VA.getValVT())); Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); break; case CCValAssign::SExt: Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val, DAG.getValueType(VA.getValVT())); Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); break; } InVals.push_back(Val); } return Chain; } SDValue PPCTargetLowering::FinishCall( CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg, bool isPatchPoint, bool hasNest, SelectionDAG &DAG, SmallVector, 8> &RegsToPass, SDValue InFlag, SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff, unsigned NumBytes, const SmallVectorImpl &Ins, SmallVectorImpl &InVals, ImmutableCallSite *CS) const { std::vector NodeTys; SmallVector Ops; unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl, SPDiff, isTailCall, isPatchPoint, hasNest, RegsToPass, Ops, NodeTys, CS, Subtarget); // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64()) Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32)); // When performing tail call optimization the callee pops its arguments off // the stack. Account for this here so these bytes can be pushed back on in // PPCFrameLowering::eliminateCallFramePseudoInstr. int BytesCalleePops = (CallConv == CallingConv::Fast && getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0; // Add a register mask operand representing the call-preserved registers. const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); const uint32_t *Mask = TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); if (InFlag.getNode()) Ops.push_back(InFlag); // Emit tail call. if (isTailCall) { assert(((Callee.getOpcode() == ISD::Register && cast(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa(Callee)) && "Expecting an global address, external symbol, absolute value or register"); DAG.getMachineFunction().getFrameInfo()->setHasTailCall(); return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops); } // Add a NOP immediately after the branch instruction when using the 64-bit // SVR4 ABI. At link time, if caller and callee are in a different module and // thus have a different TOC, the call will be replaced with a call to a stub // function which saves the current TOC, loads the TOC of the callee and // branches to the callee. The NOP will be replaced with a load instruction // which restores the TOC of the caller from the TOC save slot of the current // stack frame. If caller and callee belong to the same module (and have the // same TOC), the NOP will remain unchanged. if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() && !isPatchPoint) { if (CallOpc == PPCISD::BCTRL) { // This is a call through a function pointer. // Restore the caller TOC from the save area into R2. // See PrepareCall() for more information about calls through function // pointers in the 64-bit SVR4 ABI. // We are using a target-specific load with r2 hard coded, because the // result of a target-independent load would never go directly into r2, // since r2 is a reserved register (which prevents the register allocator // from allocating it), resulting in an additional register being // allocated and an unnecessary move instruction being generated. CallOpc = PPCISD::BCTRL_LOAD_TOC; EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT); unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff); // The address needs to go after the chain input but before the flag (or // any other variadic arguments). Ops.insert(std::next(Ops.begin()), AddTOC); } else if ((CallOpc == PPCISD::CALL) && (!isLocalCall(Callee) || DAG.getTarget().getRelocationModel() == Reloc::PIC_)) // Otherwise insert NOP for non-local calls. CallOpc = PPCISD::CALL_NOP; } Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); InFlag = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), DAG.getIntPtrConstant(BytesCalleePops, dl, true), InFlag, dl); if (!Ins.empty()) InFlag = Chain.getValue(1); return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, InVals); } SDValue PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; SDLoc &dl = CLI.DL; SmallVectorImpl &Outs = CLI.Outs; SmallVectorImpl &OutVals = CLI.OutVals; SmallVectorImpl &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &isTailCall = CLI.IsTailCall; CallingConv::ID CallConv = CLI.CallConv; bool isVarArg = CLI.IsVarArg; bool isPatchPoint = CLI.IsPatchPoint; ImmutableCallSite *CS = CLI.CS; if (isTailCall) { if (Subtarget.useLongCalls() && !(CS && CS->isMustTailCall())) isTailCall = false; else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) isTailCall = IsEligibleForTailCallOptimization_64SVR4(Callee, CallConv, CS, isVarArg, Outs, Ins, DAG); else isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, Ins, DAG); if (isTailCall) { ++NumTailCalls; if (!getTargetMachine().Options.GuaranteedTailCallOpt) ++NumSiblingCalls; assert(isa(Callee) && "Callee should be an llvm::Function object."); DEBUG( const GlobalValue *GV = cast(Callee)->getGlobal(); const unsigned Width = 80 - strlen("TCO caller: ") - strlen(", callee linkage: 0, 0"); dbgs() << "TCO caller: " << left_justify(DAG.getMachineFunction().getName(), Width) << ", callee linkage: " << GV->getVisibility() << ", " << GV->getLinkage() << "\n" ); } } if (!isTailCall && CS && CS->isMustTailCall()) report_fatal_error("failed to perform tail call elimination on a call " "site marked musttail"); // When long calls (i.e. indirect calls) are always used, calls are always // made via function pointer. If we have a function name, first translate it // into a pointer. if (Subtarget.useLongCalls() && isa(Callee) && !isTailCall) Callee = LowerGlobalAddress(Callee, DAG); if (Subtarget.isSVR4ABI()) { if (Subtarget.isPPC64()) return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg, isTailCall, isPatchPoint, Outs, OutVals, Ins, dl, DAG, InVals, CS); else return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg, isTailCall, isPatchPoint, Outs, OutVals, Ins, dl, DAG, InVals, CS); } return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, isTailCall, isPatchPoint, Outs, OutVals, Ins, dl, DAG, InVals, CS); } SDValue PPCTargetLowering::LowerCall_32SVR4( SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, bool isPatchPoint, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals, ImmutableCallSite *CS) const { // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description // of the 32-bit SVR4 ABI stack frame layout. assert((CallConv == CallingConv::C || CallConv == CallingConv::Fast) && "Unknown calling convention!"); unsigned PtrByteSize = 4; MachineFunction &MF = DAG.getMachineFunction(); // Mark this function as potentially containing a function that contains a // tail call. As a consequence the frame pointer will be used for dynamicalloc // and restoring the callers stack pointer in this functions epilog. This is // done because by tail calling the called function might overwrite the value // in this function's (MF) stack pointer stack slot 0(SP). if (getTargetMachine().Options.GuaranteedTailCallOpt && CallConv == CallingConv::Fast) MF.getInfo()->setHasFastCall(); // Count how many bytes are to be pushed on the stack, including the linkage // area, parameter list area and the part of the local variable space which // contains copies of aggregates which are passed by value. // Assign locations to all of the outgoing arguments. SmallVector ArgLocs; PPCCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); // Reserve space for the linkage area on the stack. CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(), PtrByteSize); if (useSoftFloat()) CCInfo.PreAnalyzeCallOperands(Outs); if (isVarArg) { // Handle fixed and variable vector arguments differently. // Fixed vector arguments go into registers as long as registers are // available. Variable vector arguments always go into memory. unsigned NumArgs = Outs.size(); for (unsigned i = 0; i != NumArgs; ++i) { MVT ArgVT = Outs[i].VT; ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; bool Result; if (Outs[i].IsFixed) { Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); } else { Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); } if (Result) { #ifndef NDEBUG errs() << "Call operand #" << i << " has unhandled type " << EVT(ArgVT).getEVTString() << "\n"; #endif llvm_unreachable(nullptr); } } } else { // All arguments are treated the same. CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4); } CCInfo.clearWasPPCF128(); // Assign locations to all of the outgoing aggregate by value arguments. SmallVector ByValArgLocs; CCState CCByValInfo(CallConv, isVarArg, MF, ByValArgLocs, *DAG.getContext()); // Reserve stack space for the allocations in CCInfo. CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal); // Size of the linkage area, parameter list area and the part of the local // space variable where copies of aggregates which are passed by value are // stored. unsigned NumBytes = CCByValInfo.getNextStackOffset(); // Calculate by how many bytes the stack has to be adjusted in case of tail // call optimization. int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), dl); SDValue CallSeqStart = Chain; // Load the return address and frame pointer so it can be moved somewhere else // later. SDValue LROp, FPOp; Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); // Set up a copy of the stack pointer for use loading and storing any // arguments that may not fit in the registers available for argument // passing. SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32); SmallVector, 8> RegsToPass; SmallVector TailCallArguments; SmallVector MemOpChains; bool seenFloatArg = false; // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; if (Flags.isByVal()) { // Argument is an aggregate which is passed by value, thus we need to // create a copy of it in the local variable space of the current stack // frame (which is the stack frame of the caller) and pass the address of // this copy to the callee. assert((j < ByValArgLocs.size()) && "Index out of bounds!"); CCValAssign &ByValVA = ByValArgLocs[j++]; assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"); // Memory reserved in the local variable space of the callers stack frame. unsigned LocMemOffset = ByValVA.getLocMemOffset(); SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()), StackPtr, PtrOff); // Create a copy of the argument in the local area of the current // stack frame. SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, CallSeqStart.getNode()->getOperand(0), Flags, DAG, dl); // This must go outside the CALLSEQ_START..END. SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, CallSeqStart.getNode()->getOperand(1), SDLoc(MemcpyCall)); DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode()); Chain = CallSeqStart = NewCallSeqStart; // Pass the address of the aggregate copy on the stack either in a // physical register or in the parameter list area of the current stack // frame to the callee. Arg = PtrOff; } if (VA.isRegLoc()) { if (Arg.getValueType() == MVT::i1) Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg); seenFloatArg |= VA.getLocVT().isFloatingPoint(); // Put argument in a physical register. RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } else { // Put argument in the parameter list area of the current stack frame. assert(VA.isMemLoc()); unsigned LocMemOffset = VA.getLocMemOffset(); if (!isTailCall) { SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()), StackPtr, PtrOff); MemOpChains.push_back( DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo())); } else { // Calculate and remember argument location. CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset, TailCallArguments); } } } if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } // Set CR bit 6 to true if this is a vararg call with floating args passed in // registers. if (isVarArg) { SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, InFlag }; Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1)); InFlag = Chain.getValue(1); } if (isTailCall) PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, TailCallArguments); return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, /* unused except on PPC64 ELFv1 */ false, DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, NumBytes, Ins, InVals, CS); } // Copy an argument into memory, being careful to do this outside the // call sequence for the call to which the argument belongs. SDValue PPCTargetLowering::createMemcpyOutsideCallSeq( SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl) const { SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, CallSeqStart.getNode()->getOperand(0), Flags, DAG, dl); // The MEMCPY must go outside the CALLSEQ_START..END. SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, CallSeqStart.getNode()->getOperand(1), SDLoc(MemcpyCall)); DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode()); return NewCallSeqStart; } SDValue PPCTargetLowering::LowerCall_64SVR4( SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, bool isPatchPoint, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals, ImmutableCallSite *CS) const { bool isELFv2ABI = Subtarget.isELFv2ABI(); bool isLittleEndian = Subtarget.isLittleEndian(); unsigned NumOps = Outs.size(); bool hasNest = false; bool IsSibCall = false; EVT PtrVT = getPointerTy(DAG.getDataLayout()); unsigned PtrByteSize = 8; MachineFunction &MF = DAG.getMachineFunction(); if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt) IsSibCall = true; // Mark this function as potentially containing a function that contains a // tail call. As a consequence the frame pointer will be used for dynamicalloc // and restoring the callers stack pointer in this functions epilog. This is // done because by tail calling the called function might overwrite the value // in this function's (MF) stack pointer stack slot 0(SP). if (getTargetMachine().Options.GuaranteedTailCallOpt && CallConv == CallingConv::Fast) MF.getInfo()->setHasFastCall(); assert(!(CallConv == CallingConv::Fast && isVarArg) && "fastcc not supported on varargs functions"); // Count how many bytes are to be pushed on the stack, including the linkage // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage // area is 32 bytes reserved space for [SP][CR][LR][TOC]. unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); unsigned NumBytes = LinkageSize; unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; unsigned &QFPR_idx = FPR_idx; static const MCPhysReg GPR[] = { PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; static const MCPhysReg VSRH[] = { PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8, PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 }; const unsigned NumGPRs = array_lengthof(GPR); const unsigned NumFPRs = 13; const unsigned NumVRs = array_lengthof(VR); const unsigned NumQFPRs = NumFPRs; // When using the fast calling convention, we don't provide backing for // arguments that will be in registers. unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0; // Add up all the space actually used. for (unsigned i = 0; i != NumOps; ++i) { ISD::ArgFlagsTy Flags = Outs[i].Flags; EVT ArgVT = Outs[i].VT; EVT OrigVT = Outs[i].ArgVT; if (Flags.isNest()) continue; if (CallConv == CallingConv::Fast) { if (Flags.isByVal()) NumGPRsUsed += (Flags.getByValSize()+7)/8; else switch (ArgVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unexpected ValueType for argument!"); case MVT::i1: case MVT::i32: case MVT::i64: if (++NumGPRsUsed <= NumGPRs) continue; break; case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: case MVT::v2f64: case MVT::v2i64: case MVT::v1i128: if (++NumVRsUsed <= NumVRs) continue; break; case MVT::v4f32: // When using QPX, this is handled like a FP register, otherwise, it // is an Altivec register. if (Subtarget.hasQPX()) { if (++NumFPRsUsed <= NumFPRs) continue; } else { if (++NumVRsUsed <= NumVRs) continue; } break; case MVT::f32: case MVT::f64: case MVT::v4f64: // QPX case MVT::v4i1: // QPX if (++NumFPRsUsed <= NumFPRs) continue; break; } } /* Respect alignment of argument on the stack. */ unsigned Align = CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); NumBytes = ((NumBytes + Align - 1) / Align) * Align; NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); if (Flags.isInConsecutiveRegsLast()) NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; } unsigned NumBytesActuallyUsed = NumBytes; // The prolog code of the callee may store up to 8 GPR argument registers to // the stack, allowing va_start to index over them in memory if its varargs. // Because we cannot tell if this is needed on the caller side, we have to // conservatively assume that it is needed. As such, make sure we have at // least enough stack space for the caller to store the 8 GPRs. // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area. NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); // Tail call needs the stack to be aligned. if (getTargetMachine().Options.GuaranteedTailCallOpt && CallConv == CallingConv::Fast) NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes); int SPDiff = 0; // Calculate by how many bytes the stack has to be adjusted in case of tail // call optimization. if (!IsSibCall) SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); // To protect arguments on the stack from being clobbered in a tail call, // force all the loads to happen before doing any other lowering. if (isTailCall) Chain = DAG.getStackArgumentTokenFactor(Chain); // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass if (!IsSibCall) Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), dl); SDValue CallSeqStart = Chain; // Load the return address and frame pointer so it can be move somewhere else // later. SDValue LROp, FPOp; Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); // Set up a copy of the stack pointer for use loading and storing any // arguments that may not fit in the registers available for argument // passing. SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64); // Figure out which arguments are going to go in registers, and which in // memory. Also, if this is a vararg function, floating point operations // must be stored to our stack, and loaded into integer regs as well, if // any integer regs are available for argument passing. unsigned ArgOffset = LinkageSize; SmallVector, 8> RegsToPass; SmallVector TailCallArguments; SmallVector MemOpChains; for (unsigned i = 0; i != NumOps; ++i) { SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; EVT ArgVT = Outs[i].VT; EVT OrigVT = Outs[i].ArgVT; // PtrOff will be used to store the current argument to the stack if a // register cannot be found for it. SDValue PtrOff; // We re-align the argument offset for each argument, except when using the // fast calling convention, when we need to make sure we do that only when // we'll actually use a stack slot. auto ComputePtrOff = [&]() { /* Respect alignment of argument on the stack. */ unsigned Align = CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType()); PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); }; if (CallConv != CallingConv::Fast) { ComputePtrOff(); /* Compute GPR index associated with argument offset. */ GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; GPR_idx = std::min(GPR_idx, NumGPRs); } // Promote integers to 64-bit values. if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) { // FIXME: Should this use ANY_EXTEND if neither sext nor zext? unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); } // FIXME memcpy is used way more than necessary. Correctness first. // Note: "by value" is code for passing a structure by value, not // basic types. if (Flags.isByVal()) { // Note: Size includes alignment padding, so // struct x { short a; char b; } // will have Size = 4. With #pragma pack(1), it will have Size = 3. // These are the proper values we need for right-justifying the // aggregate in a parameter register. unsigned Size = Flags.getByValSize(); // An empty aggregate parameter takes up no storage and no // registers. if (Size == 0) continue; if (CallConv == CallingConv::Fast) ComputePtrOff(); // All aggregates smaller than 8 bytes must be passed right-justified. if (Size==1 || Size==2 || Size==4) { EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); if (GPR_idx != NumGPRs) { SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, MachinePointerInfo(), VT); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); ArgOffset += PtrByteSize; continue; } } if (GPR_idx == NumGPRs && Size < 8) { SDValue AddPtr = PtrOff; if (!isLittleEndian) { SDValue Const = DAG.getConstant(PtrByteSize - Size, dl, PtrOff.getValueType()); AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); } Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, CallSeqStart, Flags, DAG, dl); ArgOffset += PtrByteSize; continue; } // Copy entire object into memory. There are cases where gcc-generated // code assumes it is there, even if it could be put entirely into // registers. (This is not what the doc says.) // FIXME: The above statement is likely due to a misunderstanding of the // documents. All arguments must be copied into the parameter area BY // THE CALLEE in the event that the callee takes the address of any // formal argument. That has not yet been implemented. However, it is // reasonable to use the stack area as a staging area for the register // load. // Skip this for small aggregates, as we will use the same slot for a // right-justified copy, below. if (Size >= 8) Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, CallSeqStart, Flags, DAG, dl); // When a register is available, pass a small aggregate right-justified. if (Size < 8 && GPR_idx != NumGPRs) { // The easiest way to get this right-justified in a register // is to copy the structure into the rightmost portion of a // local variable slot, then load the whole slot into the // register. // FIXME: The memcpy seems to produce pretty awful code for // small aggregates, particularly for packed ones. // FIXME: It would be preferable to use the slot in the // parameter save area instead of a new local variable. SDValue AddPtr = PtrOff; if (!isLittleEndian) { SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType()); AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); } Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, CallSeqStart, Flags, DAG, dl); // Load the slot into the register. SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo()); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); // Done with this argument. ArgOffset += PtrByteSize; continue; } // For aggregates larger than PtrByteSize, copy the pieces of the // object that fit into registers from the parameter save area. for (unsigned j=0; j gpr moves. // In the non-vararg case, this can only ever happen in the // presence of f32 array types, since otherwise we never run // out of FPRs before running out of GPRs. SDValue ArgVal; // Double values are always passed in a single GPR. if (Arg.getValueType() != MVT::f32) { ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg); // Non-array float values are extended and passed in a GPR. } else if (!Flags.isInConsecutiveRegs()) { ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); // If we have an array of floats, we collect every odd element // together with its predecessor into one GPR. } else if (ArgOffset % PtrByteSize != 0) { SDValue Lo, Hi; Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]); Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); if (!isLittleEndian) std::swap(Lo, Hi); ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); // The final element, if even, goes into the first half of a GPR. } else if (Flags.isInConsecutiveRegsLast()) { ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); if (!isLittleEndian) ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal, DAG.getConstant(32, dl, MVT::i32)); // Non-final even elements are skipped; they will be handled // together the with subsequent argument on the next go-around. } else ArgVal = SDValue(); if (ArgVal.getNode()) RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal)); } else { if (CallConv == CallingConv::Fast) ComputePtrOff(); // Single-precision floating-point values are mapped to the // second (rightmost) word of the stack doubleword. if (Arg.getValueType() == MVT::f32 && !isLittleEndian && !Flags.isInConsecutiveRegs()) { SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType()); PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); } LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, true, isTailCall, false, MemOpChains, TailCallArguments, dl); NeededLoad = true; } // When passing an array of floats, the array occupies consecutive // space in the argument area; only round up to the next doubleword // at the end of the array. Otherwise, each float takes 8 bytes. if (CallConv != CallingConv::Fast || NeededLoad) { ArgOffset += (Arg.getValueType() == MVT::f32 && Flags.isInConsecutiveRegs()) ? 4 : 8; if (Flags.isInConsecutiveRegsLast()) ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; } break; } case MVT::v4f32: case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: case MVT::v2f64: case MVT::v2i64: case MVT::v1i128: if (!Subtarget.hasQPX()) { // These can be scalar arguments or elements of a vector array type // passed directly. The latter are used to implement ELFv2 homogenous // vector aggregates. // For a varargs call, named arguments go into VRs or on the stack as // usual; unnamed arguments always go to the stack or the corresponding // GPRs when within range. For now, we always put the value in both // locations (or even all three). if (isVarArg) { // We could elide this store in the case where the object fits // entirely in R registers. Maybe later. SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); MemOpChains.push_back(Store); if (VR_idx != NumVRs) { SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo()); MemOpChains.push_back(Load.getValue(1)); unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 || Arg.getSimpleValueType() == MVT::v2i64) ? VSRH[VR_idx] : VR[VR_idx]; ++VR_idx; RegsToPass.push_back(std::make_pair(VReg, Load)); } ArgOffset += 16; for (unsigned i=0; i<16; i+=PtrByteSize) { if (GPR_idx == NumGPRs) break; SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, DAG.getConstant(i, dl, PtrVT)); SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo()); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); } break; } // Non-varargs Altivec params go into VRs or on the stack. if (VR_idx != NumVRs) { unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 || Arg.getSimpleValueType() == MVT::v2i64) ? VSRH[VR_idx] : VR[VR_idx]; ++VR_idx; RegsToPass.push_back(std::make_pair(VReg, Arg)); } else { if (CallConv == CallingConv::Fast) ComputePtrOff(); LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, true, isTailCall, true, MemOpChains, TailCallArguments, dl); if (CallConv == CallingConv::Fast) ArgOffset += 16; } if (CallConv != CallingConv::Fast) ArgOffset += 16; break; } // not QPX assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 && "Invalid QPX parameter type"); /* fall through */ case MVT::v4f64: case MVT::v4i1: { bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32; if (isVarArg) { // We could elide this store in the case where the object fits // entirely in R registers. Maybe later. SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); MemOpChains.push_back(Store); if (QFPR_idx != NumQFPRs) { SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl, Store, PtrOff, MachinePointerInfo()); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load)); } ArgOffset += (IsF32 ? 16 : 32); for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) { if (GPR_idx == NumGPRs) break; SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, DAG.getConstant(i, dl, PtrVT)); SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo()); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); } break; } // Non-varargs QPX params go into registers or on the stack. if (QFPR_idx != NumQFPRs) { RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg)); } else { if (CallConv == CallingConv::Fast) ComputePtrOff(); LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, true, isTailCall, true, MemOpChains, TailCallArguments, dl); if (CallConv == CallingConv::Fast) ArgOffset += (IsF32 ? 16 : 32); } if (CallConv != CallingConv::Fast) ArgOffset += (IsF32 ? 16 : 32); break; } } } assert(NumBytesActuallyUsed == ArgOffset); (void)NumBytesActuallyUsed; if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); // Check if this is an indirect call (MTCTR/BCTRL). // See PrepareCall() for more information about calls through function // pointers in the 64-bit SVR4 ABI. if (!isTailCall && !isPatchPoint && !isFunctionGlobalAddress(Callee) && !isa(Callee)) { // Load r2 into a virtual register and store it to the TOC save area. setUsesTOCBasePtr(DAG); SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); // TOC save area offset. unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); Chain = DAG.getStore( Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset)); // In the ELFv2 ABI, R12 must contain the address of an indirect callee. // This does not mean the MTCTR instruction must use R12; it's easier // to model this as an extra parameter, so do that. if (isELFv2ABI && !isPatchPoint) RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee)); } // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } if (isTailCall && !IsSibCall) PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, TailCallArguments); return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, hasNest, DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, NumBytes, Ins, InVals, CS); } SDValue PPCTargetLowering::LowerCall_Darwin( SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, bool isPatchPoint, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals, ImmutableCallSite *CS) const { unsigned NumOps = Outs.size(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); bool isPPC64 = PtrVT == MVT::i64; unsigned PtrByteSize = isPPC64 ? 8 : 4; MachineFunction &MF = DAG.getMachineFunction(); // Mark this function as potentially containing a function that contains a // tail call. As a consequence the frame pointer will be used for dynamicalloc // and restoring the callers stack pointer in this functions epilog. This is // done because by tail calling the called function might overwrite the value // in this function's (MF) stack pointer stack slot 0(SP). if (getTargetMachine().Options.GuaranteedTailCallOpt && CallConv == CallingConv::Fast) MF.getInfo()->setHasFastCall(); // Count how many bytes are to be pushed on the stack, including the linkage // area, and parameter passing area. We start with 24/48 bytes, which is // prereserved space for [SP][CR][LR][3 x unused]. unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); unsigned NumBytes = LinkageSize; // Add up all the space actually used. // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually // they all go in registers, but we must reserve stack space for them for // possible use by the caller. In varargs or 64-bit calls, parameters are // assigned stack space in order, with padding so Altivec parameters are // 16-byte aligned. unsigned nAltivecParamsAtEnd = 0; for (unsigned i = 0; i != NumOps; ++i) { ISD::ArgFlagsTy Flags = Outs[i].Flags; EVT ArgVT = Outs[i].VT; // Varargs Altivec parameters are padded to a 16 byte boundary. if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) { if (!isVarArg && !isPPC64) { // Non-varargs Altivec parameters go after all the non-Altivec // parameters; handle those later so we know how much padding we need. nAltivecParamsAtEnd++; continue; } // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. NumBytes = ((NumBytes+15)/16)*16; } NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); } // Allow for Altivec parameters at the end, if needed. if (nAltivecParamsAtEnd) { NumBytes = ((NumBytes+15)/16)*16; NumBytes += 16*nAltivecParamsAtEnd; } // The prolog code of the callee may store up to 8 GPR argument registers to // the stack, allowing va_start to index over them in memory if its varargs. // Because we cannot tell if this is needed on the caller side, we have to // conservatively assume that it is needed. As such, make sure we have at // least enough stack space for the caller to store the 8 GPRs. NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); // Tail call needs the stack to be aligned. if (getTargetMachine().Options.GuaranteedTailCallOpt && CallConv == CallingConv::Fast) NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes); // Calculate by how many bytes the stack has to be adjusted in case of tail // call optimization. int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); // To protect arguments on the stack from being clobbered in a tail call, // force all the loads to happen before doing any other lowering. if (isTailCall) Chain = DAG.getStackArgumentTokenFactor(Chain); // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), dl); SDValue CallSeqStart = Chain; // Load the return address and frame pointer so it can be move somewhere else // later. SDValue LROp, FPOp; Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); // Set up a copy of the stack pointer for use loading and storing any // arguments that may not fit in the registers available for argument // passing. SDValue StackPtr; if (isPPC64) StackPtr = DAG.getRegister(PPC::X1, MVT::i64); else StackPtr = DAG.getRegister(PPC::R1, MVT::i32); // Figure out which arguments are going to go in registers, and which in // memory. Also, if this is a vararg function, floating point operations // must be stored to our stack, and loaded into integer regs as well, if // any integer regs are available for argument passing. unsigned ArgOffset = LinkageSize; unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; static const MCPhysReg GPR_32[] = { // 32-bit registers. PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; static const MCPhysReg GPR_64[] = { // 64-bit registers. PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; const unsigned NumGPRs = array_lengthof(GPR_32); const unsigned NumFPRs = 13; const unsigned NumVRs = array_lengthof(VR); const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; SmallVector, 8> RegsToPass; SmallVector TailCallArguments; SmallVector MemOpChains; for (unsigned i = 0; i != NumOps; ++i) { SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; // PtrOff will be used to store the current argument to the stack if a // register cannot be found for it. SDValue PtrOff; PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType()); PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); // On PPC64, promote integers to 64-bit values. if (isPPC64 && Arg.getValueType() == MVT::i32) { // FIXME: Should this use ANY_EXTEND if neither sext nor zext? unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); } // FIXME memcpy is used way more than necessary. Correctness first. // Note: "by value" is code for passing a structure by value, not // basic types. if (Flags.isByVal()) { unsigned Size = Flags.getByValSize(); // Very small objects are passed right-justified. Everything else is // passed left-justified. if (Size==1 || Size==2) { EVT VT = (Size==1) ? MVT::i8 : MVT::i16; if (GPR_idx != NumGPRs) { SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, MachinePointerInfo(), VT); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); ArgOffset += PtrByteSize; } else { SDValue Const = DAG.getConstant(PtrByteSize - Size, dl, PtrOff.getValueType()); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, CallSeqStart, Flags, DAG, dl); ArgOffset += PtrByteSize; } continue; } // Copy entire object into memory. There are cases where gcc-generated // code assumes it is there, even if it could be put entirely into // registers. (This is not what the doc says.) Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, CallSeqStart, Flags, DAG, dl); // For small aggregates (Darwin only) and aggregates >= PtrByteSize, // copy the pieces of the object that fit into registers from the // parameter save area. for (unsigned j=0; j NumVRs) { unsigned j = 0; // Offset is aligned; skip 1st 12 params which go in V registers. ArgOffset = ((ArgOffset+15)/16)*16; ArgOffset += 12*16; for (unsigned i = 0; i != NumOps; ++i) { SDValue Arg = OutVals[i]; EVT ArgType = Outs[i].VT; if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 || ArgType==MVT::v8i16 || ArgType==MVT::v16i8) { if (++j > NumVRs) { SDValue PtrOff; // We are emitting Altivec params in order. LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, isPPC64, isTailCall, true, MemOpChains, TailCallArguments, dl); ArgOffset += 16; } } } } if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); // On Darwin, R12 must contain the address of an indirect callee. This does // not mean the MTCTR instruction must use R12; it's easier to model this as // an extra parameter, so do that. if (!isTailCall && !isFunctionGlobalAddress(Callee) && !isa(Callee) && !isBLACompatibleAddress(Callee, DAG)) RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 : PPC::R12), Callee)); // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } if (isTailCall) PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, TailCallArguments); return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, /* unused except on PPC64 ELFv1 */ false, DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, NumBytes, Ins, InVals, CS); } bool PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl &Outs, LLVMContext &Context) const { SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); return CCInfo.CheckReturn(Outs, RetCC_PPC); } SDValue PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &dl, SelectionDAG &DAG) const { SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeReturn(Outs, RetCC_PPC); SDValue Flag; SmallVector RetOps(1, Chain); // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); SDValue Arg = OutVals[i]; switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::AExt: Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); break; case CCValAssign::ZExt: Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); break; case CCValAssign::SExt: Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); break; } Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); const MCPhysReg *I = TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); if (I) { for (; *I; ++I) { if (PPC::G8RCRegClass.contains(*I)) RetOps.push_back(DAG.getRegister(*I, MVT::i64)); else if (PPC::F8RCRegClass.contains(*I)) RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64))); else if (PPC::CRRCRegClass.contains(*I)) RetOps.push_back(DAG.getRegister(*I, MVT::i1)); else if (PPC::VRRCRegClass.contains(*I)) RetOps.push_back(DAG.getRegister(*I, MVT::Other)); else llvm_unreachable("Unexpected register class in CSRsViaCopy!"); } } RetOps[0] = Chain; // Update chain. // Add the flag if we have it. if (Flag.getNode()) RetOps.push_back(Flag); return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps); } SDValue PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); // Get the corect type for integers. EVT IntVT = Op.getValueType(); // Get the inputs. SDValue Chain = Op.getOperand(0); SDValue FPSIdx = getFramePointerFrameIndex(DAG); // Build a DYNAREAOFFSET node. SDValue Ops[2] = {Chain, FPSIdx}; SDVTList VTs = DAG.getVTList(IntVT); return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops); } SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const { // When we pop the dynamic allocation we need to restore the SP link. SDLoc dl(Op); // Get the corect type for pointers. EVT PtrVT = getPointerTy(DAG.getDataLayout()); // Construct the stack pointer operand. bool isPPC64 = Subtarget.isPPC64(); unsigned SP = isPPC64 ? PPC::X1 : PPC::R1; SDValue StackPtr = DAG.getRegister(SP, PtrVT); // Get the operands for the STACKRESTORE. SDValue Chain = Op.getOperand(0); SDValue SaveSP = Op.getOperand(1); // Load the old link SP. SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo()); // Restore the stack pointer. Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP); // Store the old link SP. return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo()); } SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); bool isPPC64 = Subtarget.isPPC64(); EVT PtrVT = getPointerTy(MF.getDataLayout()); // Get current frame pointer save index. The users of this index will be // primarily DYNALLOC instructions. PPCFunctionInfo *FI = MF.getInfo(); int RASI = FI->getReturnAddrSaveIndex(); // If the frame pointer save index hasn't been defined yet. if (!RASI) { // Find out what the fix offset of the frame pointer save area. int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset(); // Allocate the frame index for frame pointer save area. RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, false); // Save the result. FI->setReturnAddrSaveIndex(RASI); } return DAG.getFrameIndex(RASI, PtrVT); } SDValue PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { MachineFunction &MF = DAG.getMachineFunction(); bool isPPC64 = Subtarget.isPPC64(); EVT PtrVT = getPointerTy(MF.getDataLayout()); // Get current frame pointer save index. The users of this index will be // primarily DYNALLOC instructions. PPCFunctionInfo *FI = MF.getInfo(); int FPSI = FI->getFramePointerSaveIndex(); // If the frame pointer save index hasn't been defined yet. if (!FPSI) { // Find out what the fix offset of the frame pointer save area. int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset(); // Allocate the frame index for frame pointer save area. FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); // Save the result. FI->setFramePointerSaveIndex(FPSI); } return DAG.getFrameIndex(FPSI, PtrVT); } SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { // Get the inputs. SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); SDLoc dl(Op); // Get the corect type for pointers. EVT PtrVT = getPointerTy(DAG.getDataLayout()); // Negate the size. SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT, DAG.getConstant(0, dl, PtrVT), Size); // Construct a node for the frame pointer save index. SDValue FPSIdx = getFramePointerFrameIndex(DAG); // Build a DYNALLOC node. SDValue Ops[3] = { Chain, NegSize, FPSIdx }; SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other); return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops); } SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); bool isPPC64 = Subtarget.isPPC64(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); int FI = MF.getFrameInfo()->CreateFixedObject(isPPC64 ? 8 : 4, 0, false); return DAG.getFrameIndex(FI, PtrVT); } SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL, DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), Op.getOperand(1)); } SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other, Op.getOperand(0), Op.getOperand(1)); } SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { if (Op.getValueType().isVector()) return LowerVectorLoad(Op, DAG); assert(Op.getValueType() == MVT::i1 && "Custom lowering only for i1 loads"); // First, load 8 bits into 32 bits, then truncate to 1 bit. SDLoc dl(Op); LoadSDNode *LD = cast(Op); SDValue Chain = LD->getChain(); SDValue BasePtr = LD->getBasePtr(); MachineMemOperand *MMO = LD->getMemOperand(); SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain, BasePtr, MVT::i8, MMO); SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD); SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) }; return DAG.getMergeValues(Ops, dl); } SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { if (Op.getOperand(1).getValueType().isVector()) return LowerVectorStore(Op, DAG); assert(Op.getOperand(1).getValueType() == MVT::i1 && "Custom lowering only for i1 stores"); // First, zero extend to 32 bits, then use a truncating store to 8 bits. SDLoc dl(Op); StoreSDNode *ST = cast(Op); SDValue Chain = ST->getChain(); SDValue BasePtr = ST->getBasePtr(); SDValue Value = ST->getValue(); MachineMemOperand *MMO = ST->getMemOperand(); Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()), Value); return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO); } // FIXME: Remove this once the ANDI glue bug is fixed: SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { assert(Op.getValueType() == MVT::i1 && "Custom lowering only for i1 results"); SDLoc DL(Op); return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1, Op.getOperand(0)); } /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when /// possible. SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { // Not FP? Not a fsel. if (!Op.getOperand(0).getValueType().isFloatingPoint() || !Op.getOperand(2).getValueType().isFloatingPoint()) return Op; // We might be able to do better than this under some circumstances, but in // general, fsel-based lowering of select is a finite-math-only optimization. // For more information, see section F.3 of the 2.06 ISA specification. if (!DAG.getTarget().Options.NoInfsFPMath || !DAG.getTarget().Options.NoNaNsFPMath) return Op; // TODO: Propagate flags from the select rather than global settings. SDNodeFlags Flags; Flags.setNoInfs(true); Flags.setNoNaNs(true); ISD::CondCode CC = cast(Op.getOperand(4))->get(); EVT ResVT = Op.getValueType(); EVT CmpVT = Op.getOperand(0).getValueType(); SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); SDValue TV = Op.getOperand(2), FV = Op.getOperand(3); SDLoc dl(Op); // If the RHS of the comparison is a 0.0, we don't need to do the // subtraction at all. SDValue Sel1; if (isFloatingPointZero(RHS)) switch (CC) { default: break; // SETUO etc aren't handled by fsel. case ISD::SETNE: std::swap(TV, FV); case ISD::SETEQ: if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); return DAG.getNode(PPCISD::FSEL, dl, ResVT, DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV); case ISD::SETULT: case ISD::SETLT: std::swap(TV, FV); // fsel is natively setge, swap operands for setlt case ISD::SETOGE: case ISD::SETGE: if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); case ISD::SETUGT: case ISD::SETGT: std::swap(TV, FV); // fsel is natively setge, swap operands for setlt case ISD::SETOLE: case ISD::SETLE: if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); return DAG.getNode(PPCISD::FSEL, dl, ResVT, DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV); } SDValue Cmp; switch (CC) { default: break; // SETUO etc aren't handled by fsel. case ISD::SETNE: std::swap(TV, FV); case ISD::SETEQ: Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); return DAG.getNode(PPCISD::FSEL, dl, ResVT, DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV); case ISD::SETULT: case ISD::SETLT: Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); case ISD::SETOGE: case ISD::SETGE: Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); case ISD::SETUGT: case ISD::SETGT: Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); case ISD::SETOLE: case ISD::SETLE: Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); } return Op; } void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, SelectionDAG &DAG, const SDLoc &dl) const { assert(Op.getOperand(0).getValueType().isFloatingPoint()); SDValue Src = Op.getOperand(0); if (Src.getValueType() == MVT::f32) Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); SDValue Tmp; switch (Op.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); case MVT::i32: Tmp = DAG.getNode( Op.getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIWZ : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), dl, MVT::f64, Src); break; case MVT::i64: assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && "i64 FP_TO_UINT is supported only with FPCVT"); Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ, dl, MVT::f64, Src); break; } // Convert the FP value to an int value through memory. bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() && (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()); SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64); int FI = cast(FIPtr)->getIndex(); MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); // Emit a store to the stack slot. SDValue Chain; if (i32Stack) { MachineFunction &MF = DAG.getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4); SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr }; Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO); } else Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI); // Result is a load from the stack slot. If loading 4 bytes, make sure to // add in a bias on big endian. if (Op.getValueType() == MVT::i32 && !i32Stack) { FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, DAG.getConstant(4, dl, FIPtr.getValueType())); MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4); } RLI.Chain = Chain; RLI.Ptr = FIPtr; RLI.MPI = MPI; } /// \brief Custom lowers floating point to integer conversions to use /// the direct move instructions available in ISA 2.07 to avoid the /// need for load/store combinations. SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) const { assert(Op.getOperand(0).getValueType().isFloatingPoint()); SDValue Src = Op.getOperand(0); if (Src.getValueType() == MVT::f32) Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); SDValue Tmp; switch (Op.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); case MVT::i32: Tmp = DAG.getNode( Op.getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIWZ : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), dl, MVT::f64, Src); Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp); break; case MVT::i64: assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && "i64 FP_TO_UINT is supported only with FPCVT"); Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ, dl, MVT::f64, Src); Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp); break; } return Tmp; } SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) const { if (Subtarget.hasDirectMove() && Subtarget.isPPC64()) return LowerFP_TO_INTDirectMove(Op, DAG, dl); ReuseLoadInfo RLI; LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI, RLI.Alignment, RLI.IsInvariant ? MachineMemOperand::MOInvariant : MachineMemOperand::MONone, RLI.AAInfo, RLI.Ranges); } // We're trying to insert a regular store, S, and then a load, L. If the // incoming value, O, is a load, we might just be able to have our load use the // address used by O. However, we don't know if anything else will store to // that address before we can load from it. To prevent this situation, we need // to insert our load, L, into the chain as a peer of O. To do this, we give L // the same chain operand as O, we create a token factor from the chain results // of O and L, and we replace all uses of O's chain result with that token // factor (see spliceIntoChain below for this last part). bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI, SelectionDAG &DAG, ISD::LoadExtType ET) const { SDLoc dl(Op); if (ET == ISD::NON_EXTLOAD && (Op.getOpcode() == ISD::FP_TO_UINT || Op.getOpcode() == ISD::FP_TO_SINT) && isOperationLegalOrCustom(Op.getOpcode(), Op.getOperand(0).getValueType())) { LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); return true; } LoadSDNode *LD = dyn_cast(Op); if (!LD || LD->getExtensionType() != ET || LD->isVolatile() || LD->isNonTemporal()) return false; if (LD->getMemoryVT() != MemVT) return false; RLI.Ptr = LD->getBasePtr(); if (LD->isIndexed() && !LD->getOffset().isUndef()) { assert(LD->getAddressingMode() == ISD::PRE_INC && "Non-pre-inc AM on PPC?"); RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr, LD->getOffset()); } RLI.Chain = LD->getChain(); RLI.MPI = LD->getPointerInfo(); RLI.IsInvariant = LD->isInvariant(); RLI.Alignment = LD->getAlignment(); RLI.AAInfo = LD->getAAInfo(); RLI.Ranges = LD->getRanges(); RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1); return true; } // Given the head of the old chain, ResChain, insert a token factor containing // it and NewResChain, and make users of ResChain now be users of that token // factor. void PPCTargetLowering::spliceIntoChain(SDValue ResChain, SDValue NewResChain, SelectionDAG &DAG) const { if (!ResChain) return; SDLoc dl(NewResChain); SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, NewResChain, DAG.getUNDEF(MVT::Other)); assert(TF.getNode() != NewResChain.getNode() && "A new TF really is required here"); DAG.ReplaceAllUsesOfValueWith(ResChain, TF); DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain); } /// \brief Analyze profitability of direct move /// prefer float load to int load plus direct move /// when there is no integer use of int load static bool directMoveIsProfitable(const SDValue &Op) { SDNode *Origin = Op.getOperand(0).getNode(); if (Origin->getOpcode() != ISD::LOAD) return true; for (SDNode::use_iterator UI = Origin->use_begin(), UE = Origin->use_end(); UI != UE; ++UI) { // Only look at the users of the loaded value. if (UI.getUse().get().getResNo() != 0) continue; if (UI->getOpcode() != ISD::SINT_TO_FP && UI->getOpcode() != ISD::UINT_TO_FP) return true; } return false; } /// \brief Custom lowers integer to floating point conversions to use /// the direct move instructions available in ISA 2.07 to avoid the /// need for load/store combinations. SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) const { assert((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) && "Invalid floating point type as target of conversion"); assert(Subtarget.hasFPCVT() && "Int to FP conversions with direct moves require FPCVT"); SDValue FP; SDValue Src = Op.getOperand(0); bool SinglePrec = Op.getValueType() == MVT::f32; bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32; bool Signed = Op.getOpcode() == ISD::SINT_TO_FP; unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) : (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU); if (WordInt) { FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ, dl, MVT::f64, Src); FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP); } else { FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src); FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP); } return FP; } SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) { if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64) return SDValue(); SDValue Value = Op.getOperand(0); // The values are now known to be -1 (false) or 1 (true). To convert this // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); if (Op.getValueType() != MVT::v4f64) Value = DAG.getNode(ISD::FP_ROUND, dl, Op.getValueType(), Value, DAG.getIntPtrConstant(1, dl)); return Value; } // Don't handle ppc_fp128 here; let it be lowered to a libcall. if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) return SDValue(); if (Op.getOperand(0).getValueType() == MVT::i1) return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0), DAG.getConstantFP(1.0, dl, Op.getValueType()), DAG.getConstantFP(0.0, dl, Op.getValueType())); // If we have direct moves, we can do all the conversion, skip the store/load // however, without FPCVT we can't do most conversions. if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) && Subtarget.isPPC64() && Subtarget.hasFPCVT()) return LowerINT_TO_FPDirectMove(Op, DAG, dl); assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && "UINT_TO_FP is supported only with FPCVT"); // If we have FCFIDS, then use it when converting to single-precision. // Otherwise, convert to double-precision and then round. unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS : PPCISD::FCFIDS) : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU : PPCISD::FCFID); MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? MVT::f32 : MVT::f64; if (Op.getOperand(0).getValueType() == MVT::i64) { SDValue SINT = Op.getOperand(0); // When converting to single-precision, we actually need to convert // to double-precision first and then round to single-precision. // To avoid double-rounding effects during that operation, we have // to prepare the input operand. Bits that might be truncated when // converting to double-precision are replaced by a bit that won't // be lost at this stage, but is below the single-precision rounding // position. // // However, if -enable-unsafe-fp-math is in effect, accept double // rounding to avoid the extra overhead. if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT() && !DAG.getTarget().Options.UnsafeFPMath) { // Twiddle input to make sure the low 11 bits are zero. (If this // is the case, we are guaranteed the value will fit into the 53 bit // mantissa of an IEEE double-precision value without rounding.) // If any of those low 11 bits were not zero originally, make sure // bit 12 (value 2048) is set instead, so that the final rounding // to single-precision gets the correct result. SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64, SINT, DAG.getConstant(2047, dl, MVT::i64)); Round = DAG.getNode(ISD::ADD, dl, MVT::i64, Round, DAG.getConstant(2047, dl, MVT::i64)); Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT); Round = DAG.getNode(ISD::AND, dl, MVT::i64, Round, DAG.getConstant(-2048, dl, MVT::i64)); // However, we cannot use that value unconditionally: if the magnitude // of the input value is small, the bit-twiddling we did above might // end up visibly changing the output. Fortunately, in that case, we // don't need to twiddle bits since the original input will convert // exactly to double-precision floating-point already. Therefore, // construct a conditional to use the original value if the top 11 // bits are all sign-bit copies, and use the rounded value computed // above otherwise. SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64, SINT, DAG.getConstant(53, dl, MVT::i32)); Cond = DAG.getNode(ISD::ADD, dl, MVT::i64, Cond, DAG.getConstant(1, dl, MVT::i64)); Cond = DAG.getSetCC(dl, MVT::i32, Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT); SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT); } ReuseLoadInfo RLI; SDValue Bits; MachineFunction &MF = DAG.getMachineFunction(); if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) { Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, RLI.Alignment, RLI.IsInvariant ? MachineMemOperand::MOInvariant : MachineMemOperand::MONone, RLI.AAInfo, RLI.Ranges); spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); } else if (Subtarget.hasLFIWAX() && canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) { MachineMemOperand *MMO = MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, RLI.Alignment, RLI.AAInfo, RLI.Ranges); SDValue Ops[] = { RLI.Chain, RLI.Ptr }; Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl, DAG.getVTList(MVT::f64, MVT::Other), Ops, MVT::i32, MMO); spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); } else if (Subtarget.hasFPCVT() && canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) { MachineMemOperand *MMO = MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, RLI.Alignment, RLI.AAInfo, RLI.Ranges); SDValue Ops[] = { RLI.Chain, RLI.Ptr }; Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl, DAG.getVTList(MVT::f64, MVT::Other), Ops, MVT::i32, MMO); spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); } else if (((Subtarget.hasLFIWAX() && SINT.getOpcode() == ISD::SIGN_EXTEND) || (Subtarget.hasFPCVT() && SINT.getOpcode() == ISD::ZERO_EXTEND)) && SINT.getOperand(0).getValueType() == MVT::i32) { MachineFrameInfo *FrameInfo = MF.getFrameInfo(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx, MachinePointerInfo::getFixedStack( DAG.getMachineFunction(), FrameIdx)); assert(cast(Store)->getMemoryVT() == MVT::i32 && "Expected an i32 store"); RLI.Ptr = FIdx; RLI.Chain = Store; RLI.MPI = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); RLI.Alignment = 4; MachineMemOperand *MMO = MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, RLI.Alignment, RLI.AAInfo, RLI.Ranges); SDValue Ops[] = { RLI.Chain, RLI.Ptr }; Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ? PPCISD::LFIWZX : PPCISD::LFIWAX, dl, DAG.getVTList(MVT::f64, MVT::Other), Ops, MVT::i32, MMO); } else Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits); if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0, dl)); return FP; } assert(Op.getOperand(0).getValueType() == MVT::i32 && "Unhandled INT_TO_FP type in custom expander!"); // Since we only generate this in 64-bit mode, we can take advantage of // 64-bit registers. In particular, sign extend the input value into the // 64-bit register with extsw, store the WHOLE 64-bit value into the stack // then lfd it and fcfid it. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *FrameInfo = MF.getFrameInfo(); EVT PtrVT = getPointerTy(MF.getDataLayout()); SDValue Ld; if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) { ReuseLoadInfo RLI; bool ReusingLoad; if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI, DAG))) { int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, MachinePointerInfo::getFixedStack( DAG.getMachineFunction(), FrameIdx)); assert(cast(Store)->getMemoryVT() == MVT::i32 && "Expected an i32 store"); RLI.Ptr = FIdx; RLI.Chain = Store; RLI.MPI = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); RLI.Alignment = 4; } MachineMemOperand *MMO = MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, RLI.Alignment, RLI.AAInfo, RLI.Ranges); SDValue Ops[] = { RLI.Chain, RLI.Ptr }; Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::LFIWZX : PPCISD::LFIWAX, dl, DAG.getVTList(MVT::f64, MVT::Other), Ops, MVT::i32, MMO); if (ReusingLoad) spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG); } else { assert(Subtarget.isPPC64() && "i32->FP without LFIWAX supported only on PPC64"); int FrameIdx = FrameInfo->CreateStackObject(8, 8, false); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Op.getOperand(0)); // STD the extended value into the stack slot. SDValue Store = DAG.getStore( DAG.getEntryNode(), dl, Ext64, FIdx, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx)); // Load the value as a double. Ld = DAG.getLoad( MVT::f64, dl, Store, FIdx, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx)); } // FCFID it and return it. SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld); if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0, dl)); return FP; } SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); /* The rounding mode is in bits 30:31 of FPSR, and has the following settings: 00 Round to nearest 01 Round to 0 10 Round to +inf 11 Round to -inf FLT_ROUNDS, on the other hand, expects the following: -1 Undefined 0 Round to 0 1 Round to nearest 2 Round to +inf 3 Round to -inf To perform the conversion, we do: ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1)) */ MachineFunction &MF = DAG.getMachineFunction(); EVT VT = Op.getValueType(); EVT PtrVT = getPointerTy(MF.getDataLayout()); // Save FP Control Word to register EVT NodeTys[] = { MVT::f64, // return register MVT::Glue // unused in this context }; SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None); // Save FP register to stack slot int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot, MachinePointerInfo()); // Load FP Control Word from low 32 bits of stack slot. SDValue Four = DAG.getConstant(4, dl, PtrVT); SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo()); // Transform as necessary SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, CWD, DAG.getConstant(3, dl, MVT::i32)); SDValue CWD2 = DAG.getNode(ISD::SRL, dl, MVT::i32, DAG.getNode(ISD::AND, dl, MVT::i32, DAG.getNode(ISD::XOR, dl, MVT::i32, CWD, DAG.getConstant(3, dl, MVT::i32)), DAG.getConstant(3, dl, MVT::i32)), DAG.getConstant(1, dl, MVT::i32)); SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2); return DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal); } SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); unsigned BitWidth = VT.getSizeInBits(); SDLoc dl(Op); assert(Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && "Unexpected SHL!"); // Expand into a bunch of logical ops. Note that these ops // depend on the PPC behavior for oversized shift amounts. SDValue Lo = Op.getOperand(0); SDValue Hi = Op.getOperand(1); SDValue Amt = Op.getOperand(2); EVT AmtVT = Amt.getValueType(); SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Amt); SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt); SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1); SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3); SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, DAG.getConstant(-BitWidth, dl, AmtVT)); SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5); SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt); SDValue OutOps[] = { OutLo, OutHi }; return DAG.getMergeValues(OutOps, dl); } SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc dl(Op); unsigned BitWidth = VT.getSizeInBits(); assert(Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && "Unexpected SRL!"); // Expand into a bunch of logical ops. Note that these ops // depend on the PPC behavior for oversized shift amounts. SDValue Lo = Op.getOperand(0); SDValue Hi = Op.getOperand(1); SDValue Amt = Op.getOperand(2); EVT AmtVT = Amt.getValueType(); SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Amt); SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, DAG.getConstant(-BitWidth, dl, AmtVT)); SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5); SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt); SDValue OutOps[] = { OutLo, OutHi }; return DAG.getMergeValues(OutOps, dl); } SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); EVT VT = Op.getValueType(); unsigned BitWidth = VT.getSizeInBits(); assert(Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && "Unexpected SRA!"); // Expand into a bunch of logical ops, followed by a select_cc. SDValue Lo = Op.getOperand(0); SDValue Hi = Op.getOperand(1); SDValue Amt = Op.getOperand(2); EVT AmtVT = Amt.getValueType(); SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Amt); SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, DAG.getConstant(-BitWidth, dl, AmtVT)); SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5); SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt); SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT), Tmp4, Tmp6, ISD::SETLE); SDValue OutOps[] = { OutLo, OutHi }; return DAG.getMergeValues(OutOps, dl); } //===----------------------------------------------------------------------===// // Vector related lowering. // /// BuildSplatI - Build a canonical splati of Val with an element size of /// SplatSize. Cast the result to VT. static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl) { assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); static const MVT VTys[] = { // canonical VT to use for each size. MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 }; EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; // Force vspltis[hw] -1 to vspltisb -1 to canonicalize. if (Val == -1) SplatSize = 1; EVT CanonicalVT = VTys[SplatSize-1]; // Build a canonical splat for this value. return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT)); } /// BuildIntrinsicOp - Return a unary operator intrinsic node with the /// specified intrinsic ID. static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT = MVT::Other) { if (DestVT == MVT::Other) DestVT = Op.getValueType(); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, DAG.getConstant(IID, dl, MVT::i32), Op); } /// BuildIntrinsicOp - Return a binary operator intrinsic node with the /// specified intrinsic ID. static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT = MVT::Other) { if (DestVT == MVT::Other) DestVT = LHS.getValueType(); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, DAG.getConstant(IID, dl, MVT::i32), LHS, RHS); } /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the /// specified intrinsic ID. static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, SDValue Op2, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT = MVT::Other) { if (DestVT == MVT::Other) DestVT = Op0.getValueType(); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2); } /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified /// amount. The result has the specified value type. static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl) { // Force LHS/RHS to be the right type. LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS); RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS); int Ops[16]; for (unsigned i = 0; i != 16; ++i) Ops[i] = i + Amt; SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, T); } // If this is a case we can't handle, return null and let the default // expansion code take care of it. If we CAN select this case, and if it // selects to a single instruction, return Op. Otherwise, if we can codegen // this case more efficiently than a constant pool load, lower it to the // sequence of ops that should be used. SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); BuildVectorSDNode *BVN = dyn_cast(Op.getNode()); assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) { // We first build an i32 vector, load it into a QPX register, // then convert it to a floating-point vector and compare it // to a zero vector to get the boolean result. MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); assert(BVN->getNumOperands() == 4 && "BUILD_VECTOR for v4i1 does not have 4 operands"); bool IsConst = true; for (unsigned i = 0; i < 4; ++i) { if (BVN->getOperand(i).isUndef()) continue; if (!isa(BVN->getOperand(i))) { IsConst = false; break; } } if (IsConst) { Constant *One = ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0); Constant *NegOne = ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0); Constant *CV[4]; for (unsigned i = 0; i < 4; ++i) { if (BVN->getOperand(i).isUndef()) CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext())); else if (isNullConstant(BVN->getOperand(i))) CV[i] = NegOne; else CV[i] = One; } Constant *CP = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()), 16 /* alignment */); SDValue Ops[] = {DAG.getEntryNode(), CPIdx}; SDVTList VTs = DAG.getVTList({MVT::v4i1, /*chain*/ MVT::Other}); return DAG.getMemIntrinsicNode( PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); } SmallVector Stores; for (unsigned i = 0; i < 4; ++i) { if (BVN->getOperand(i).isUndef()) continue; unsigned Offset = 4*i; SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize(); if (StoreSize > 4) { Stores.push_back( DAG.getTruncStore(DAG.getEntryNode(), dl, BVN->getOperand(i), Idx, PtrInfo.getWithOffset(Offset), MVT::i32)); } else { SDValue StoreValue = BVN->getOperand(i); if (StoreSize < 4) StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue); Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, StoreValue, Idx, PtrInfo.getWithOffset(Offset))); } } SDValue StoreChain; if (!Stores.empty()) StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); else StoreChain = DAG.getEntryNode(); // Now load from v4i32 into the QPX register; this will extend it to // v4i64 but not yet convert it to a floating point. Nevertheless, this // is typed as v4f64 because the QPX register integer states are not // explicitly represented. SDValue Ops[] = {StoreChain, DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32), FIdx}; SDVTList VTs = DAG.getVTList({MVT::v4f64, /*chain*/ MVT::Other}); SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, VTs, Ops, MVT::v4i32, PtrInfo); LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32), LoadedVect); SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::v4f64); return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ); } // All other QPX vectors are handled by generic code. if (Subtarget.hasQPX()) return SDValue(); // Check if this is a splat of a constant value. APInt APSplatBits, APSplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0, !Subtarget.isLittleEndian()) || SplatBitSize > 32) return SDValue(); unsigned SplatBits = APSplatBits.getZExtValue(); unsigned SplatUndef = APSplatUndef.getZExtValue(); unsigned SplatSize = SplatBitSize / 8; // First, handle single instruction cases. // All zeros? if (SplatBits == 0) { // Canonicalize all zero vectors to be v4i32. if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { SDValue Z = DAG.getConstant(0, dl, MVT::v4i32); Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z); } return Op; } // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >> (32-SplatBitSize)); if (SextVal >= -16 && SextVal <= 15) return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl); // Two instruction sequences. // If this value is in the range [-32,30] and is even, use: // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2) // If this value is in the range [17,31] and is odd, use: // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16) // If this value is in the range [-31,-17] and is odd, use: // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16) // Note the last two are three-instruction sequences. if (SextVal >= -32 && SextVal <= 31) { // To avoid having these optimizations undone by constant folding, // we convert to a pseudo that will be expanded later into one of // the above forms. SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32); EVT VT = (SplatSize == 1 ? MVT::v16i8 : (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32)); SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32); SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize); if (VT == Op.getValueType()) return RetVal; else return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal); } // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important // for fneg/fabs. if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { // Make -1 and vspltisw -1: SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl); // Make the VSLW intrinsic, computing 0x8000_0000. SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, OnesV, DAG, dl); // xor by OnesV to invert it. Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV); return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); } // Check to see if this is a wide variety of vsplti*, binop self cases. static const signed char SplatCsts[] = { -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 }; for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) { // Indirect through the SplatCsts array so that we favor 'vsplti -1' for // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' int i = SplatCsts[idx]; // Figure out what shift amount will be used by altivec if shifted by i in // this splat size. unsigned TypeShiftAmt = i & (SplatBitSize-1); // vsplti + shl self. if (SextVal == (int)((unsigned)i << TypeShiftAmt)) { SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); static const unsigned IIDs[] = { // Intrinsic to use for each size. Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, Intrinsic::ppc_altivec_vslw }; Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); } // vsplti + srl self. if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); static const unsigned IIDs[] = { // Intrinsic to use for each size. Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, Intrinsic::ppc_altivec_vsrw }; Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); } // vsplti + sra self. if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); static const unsigned IIDs[] = { // Intrinsic to use for each size. Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, Intrinsic::ppc_altivec_vsraw }; Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); } // vsplti + rol self. if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); static const unsigned IIDs[] = { // Intrinsic to use for each size. Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, Intrinsic::ppc_altivec_vrlw }; Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); } // t = vsplti c, result = vsldoi t, t, 1 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) { SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1; return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); } // t = vsplti c, result = vsldoi t, t, 2 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) { SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2; return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); } // t = vsplti c, result = vsldoi t, t, 3 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) { SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3; return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); } } return SDValue(); } /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit /// the specified operations to build the shuffle. static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl) { unsigned OpNum = (PFEntry >> 26) & 0x0F; unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); enum { OP_COPY = 0, // Copy, used for things like to say it is <0,1,2,3> OP_VMRGHW, OP_VMRGLW, OP_VSPLTISW0, OP_VSPLTISW1, OP_VSPLTISW2, OP_VSPLTISW3, OP_VSLDOI4, OP_VSLDOI8, OP_VSLDOI12 }; if (OpNum == OP_COPY) { if (LHSID == (1*9+2)*9+3) return LHS; assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); return RHS; } SDValue OpLHS, OpRHS; OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); int ShufIdxs[16]; switch (OpNum) { default: llvm_unreachable("Unknown i32 permute!"); case OP_VMRGHW: ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3; ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19; ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7; ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23; break; case OP_VMRGLW: ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11; ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27; ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15; ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31; break; case OP_VSPLTISW0: for (unsigned i = 0; i != 16; ++i) ShufIdxs[i] = (i&3)+0; break; case OP_VSPLTISW1: for (unsigned i = 0; i != 16; ++i) ShufIdxs[i] = (i&3)+4; break; case OP_VSPLTISW2: for (unsigned i = 0; i != 16; ++i) ShufIdxs[i] = (i&3)+8; break; case OP_VSPLTISW3: for (unsigned i = 0; i != 16; ++i) ShufIdxs[i] = (i&3)+12; break; case OP_VSLDOI4: return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl); case OP_VSLDOI8: return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl); case OP_VSLDOI12: return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl); } EVT VT = OpLHS.getValueType(); OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS); OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS); SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs); return DAG.getNode(ISD::BITCAST, dl, VT, T); } /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this /// is a shuffle we can handle in a single instruction, return it. Otherwise, /// return the code it can be lowered into. Worst case, it can always be /// lowered into a vperm. SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); ShuffleVectorSDNode *SVOp = cast(Op); EVT VT = Op.getValueType(); bool isLittleEndian = Subtarget.isLittleEndian(); unsigned ShiftElts, InsertAtByte; bool Swap; if (Subtarget.hasP9Vector() && PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap, isLittleEndian)) { if (Swap) std::swap(V1, V2); SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2); if (ShiftElts) { SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2, DAG.getConstant(ShiftElts, dl, MVT::i32)); SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Shl, DAG.getConstant(InsertAtByte, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); } SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Conv2, DAG.getConstant(InsertAtByte, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); } if (Subtarget.hasVSX()) { if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) { int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG); SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv, DAG.getConstant(SplatIdx, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat); } // Left shifts of 8 bytes are actually swaps. Convert accordingly. if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) { SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1); SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap); } } if (Subtarget.hasQPX()) { if (VT.getVectorNumElements() != 4) return SDValue(); if (V2.isUndef()) V2 = V1; int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp); if (AlignIdx != -1) { return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2, DAG.getConstant(AlignIdx, dl, MVT::i32)); } else if (SVOp->isSplat()) { int SplatIdx = SVOp->getSplatIndex(); if (SplatIdx >= 4) { std::swap(V1, V2); SplatIdx -= 4; } return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1, DAG.getConstant(SplatIdx, dl, MVT::i32)); } // Lower this into a qvgpci/qvfperm pair. // Compute the qvgpci literal unsigned idx = 0; for (unsigned i = 0; i < 4; ++i) { int m = SVOp->getMaskElt(i); unsigned mm = m >= 0 ? (unsigned) m : i; idx |= mm << (3-i)*3; } SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64, DAG.getConstant(idx, dl, MVT::i32)); return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3); } // Cases that are handled by instructions that take permute immediates // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be // selected by the instruction selector. if (V2.isUndef()) { if (PPC::isSplatShuffleMask(SVOp, 1) || PPC::isSplatShuffleMask(SVOp, 2) || PPC::isSplatShuffleMask(SVOp, 4) || PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) || PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) || PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 || PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) || PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) || PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) || PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) || PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) || PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) || (Subtarget.hasP8Altivec() && ( PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) || PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) || PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) { return Op; } } // Altivec has a variety of "shuffle immediates" that take two vector inputs // and produce a fixed permutation. If any of these match, do not lower to // VPERM. unsigned int ShuffleKind = isLittleEndian ? 2 : 0; if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) || PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) || PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 || PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) || PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) || PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) || PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) || PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) || PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) || (Subtarget.hasP8Altivec() && ( PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) || PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) || PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG)))) return Op; // Check to see if this is a shuffle of 4-byte values. If so, we can use our // perfect shuffle table to emit an optimal matching sequence. ArrayRef PermMask = SVOp->getMask(); unsigned PFIndexes[4]; bool isFourElementShuffle = true; for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number unsigned EltNo = 8; // Start out undef. for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. if (PermMask[i*4+j] < 0) continue; // Undef, ignore it. unsigned ByteSource = PermMask[i*4+j]; if ((ByteSource & 3) != j) { isFourElementShuffle = false; break; } if (EltNo == 8) { EltNo = ByteSource/4; } else if (EltNo != ByteSource/4) { isFourElementShuffle = false; break; } } PFIndexes[i] = EltNo; } // If this shuffle can be expressed as a shuffle of 4-byte elements, use the // perfect shuffle vector to determine if it is cost effective to do this as // discrete instructions, or whether we should use a vperm. // For now, we skip this for little endian until such time as we have a // little-endian perfect shuffle table. if (isFourElementShuffle && !isLittleEndian) { // Compute the index in the perfect shuffle table. unsigned PFTableIndex = PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; unsigned Cost = (PFEntry >> 30); // Determining when to avoid vperm is tricky. Many things affect the cost // of vperm, particularly how many times the perm mask needs to be computed. // For example, if the perm mask can be hoisted out of a loop or is already // used (perhaps because there are multiple permutes with the same shuffle // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of // the loop requires an extra register. // // As a compromise, we only emit discrete instructions if the shuffle can be // generated in 3 or fewer operations. When we have loop information // available, if this block is within a loop, we should avoid using vperm // for 3-operation perms and use a constant pool load instead. if (Cost < 3) return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); } // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant // vector that will get spilled to the constant pool. if (V2.isUndef()) V2 = V1; // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except // that it is in input element units, not in bytes. Convert now. // For little endian, the order of the input vectors is reversed, and // the permutation mask is complemented with respect to 31. This is // necessary to produce proper semantics with the big-endian-biased vperm // instruction. EVT EltVT = V1.getValueType().getVectorElementType(); unsigned BytesPerElement = EltVT.getSizeInBits()/8; SmallVector ResultMask; for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; for (unsigned j = 0; j != BytesPerElement; ++j) if (isLittleEndian) ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j), dl, MVT::i32)); else ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl, MVT::i32)); } SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask); if (isLittleEndian) return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V2, V1, VPermMask); else return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask); } /// getVectorCompareInfo - Given an intrinsic, return false if it is not a /// vector comparison. If it is, return true and fill in Opc/isDot with /// information about the intrinsic. static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget) { unsigned IntrinsicID = cast(Intrin.getOperand(0))->getZExtValue(); CompareOpc = -1; isDot = false; switch (IntrinsicID) { default: return false; // Comparison predicates. case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpequd_p: if (Subtarget.hasP8Altivec()) { CompareOpc = 199; isDot = 1; } else return false; break; case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtsd_p: if (Subtarget.hasP8Altivec()) { CompareOpc = 967; isDot = 1; } else return false; break; case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtud_p: if (Subtarget.hasP8Altivec()) { CompareOpc = 711; isDot = 1; } else return false; break; // VSX predicate comparisons use the same infrastructure case Intrinsic::ppc_vsx_xvcmpeqdp_p: case Intrinsic::ppc_vsx_xvcmpgedp_p: case Intrinsic::ppc_vsx_xvcmpgtdp_p: case Intrinsic::ppc_vsx_xvcmpeqsp_p: case Intrinsic::ppc_vsx_xvcmpgesp_p: case Intrinsic::ppc_vsx_xvcmpgtsp_p: if (Subtarget.hasVSX()) { switch (IntrinsicID) { case Intrinsic::ppc_vsx_xvcmpeqdp_p: CompareOpc = 99; break; case Intrinsic::ppc_vsx_xvcmpgedp_p: CompareOpc = 115; break; case Intrinsic::ppc_vsx_xvcmpgtdp_p: CompareOpc = 107; break; case Intrinsic::ppc_vsx_xvcmpeqsp_p: CompareOpc = 67; break; case Intrinsic::ppc_vsx_xvcmpgesp_p: CompareOpc = 83; break; case Intrinsic::ppc_vsx_xvcmpgtsp_p: CompareOpc = 75; break; } isDot = 1; } else return false; break; // Normal Comparisons. case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpequd: if (Subtarget.hasP8Altivec()) { CompareOpc = 199; isDot = 0; } else return false; break; case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtsd: if (Subtarget.hasP8Altivec()) { CompareOpc = 967; isDot = 0; } else return false; break; case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtud: if (Subtarget.hasP8Altivec()) { CompareOpc = 711; isDot = 0; } else return false; break; } return true; } /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom /// lower, do it, otherwise return null. SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned IntrinsicID = cast(Op.getOperand(0))->getZExtValue(); if (IntrinsicID == Intrinsic::thread_pointer) { // Reads the thread pointer register, used for __builtin_thread_pointer. bool is64bit = Subtarget.isPPC64(); return DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, is64bit ? MVT::i64 : MVT::i32); } // If this is a lowered altivec predicate compare, CompareOpc is set to the // opcode number of the comparison. SDLoc dl(Op); int CompareOpc; bool isDot; if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget)) return SDValue(); // Don't custom lower most intrinsics. // If this is a non-dot comparison, make the VCMP node and we are done. if (!isDot) { SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(), Op.getOperand(1), Op.getOperand(2), DAG.getConstant(CompareOpc, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp); } // Create the PPCISD altivec 'dot' comparison node. SDValue Ops[] = { Op.getOperand(2), // LHS Op.getOperand(3), // RHS DAG.getConstant(CompareOpc, dl, MVT::i32) }; EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue }; SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); // Now that we have the comparison, emit a copy from the CR to a GPR. // This is flagged to the above dot comparison. SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32, DAG.getRegister(PPC::CR6, MVT::i32), CompNode.getValue(1)); // Unpack the result based on how the target uses it. unsigned BitNo; // Bit # of CR6. bool InvertBit; // Invert result? switch (cast(Op.getOperand(1))->getZExtValue()) { default: // Can't happen, don't crash on invalid number though. case 0: // Return the value of the EQ bit of CR6. BitNo = 0; InvertBit = false; break; case 1: // Return the inverted value of the EQ bit of CR6. BitNo = 0; InvertBit = true; break; case 2: // Return the value of the LT bit of CR6. BitNo = 2; InvertBit = false; break; case 3: // Return the inverted value of the LT bit of CR6. BitNo = 2; InvertBit = true; break; } // Shift the bit into the low position. Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags, DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32)); // Isolate the bit. Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags, DAG.getConstant(1, dl, MVT::i32)); // If we are supposed to, toggle the bit. if (InvertBit) Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags, DAG.getConstant(1, dl, MVT::i32)); return Flags; } SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int // instructions), but for smaller types, we need to first extend up to v2i32 // before doing going farther. if (Op.getValueType() == MVT::v2i64) { EVT ExtVT = cast(Op.getOperand(1))->getVT(); if (ExtVT != MVT::v2i32) { Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)); Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op, DAG.getValueType(EVT::getVectorVT(*DAG.getContext(), ExtVT.getVectorElementType(), 4))); Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op); Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op, DAG.getValueType(MVT::v2i32)); } return Op; } return SDValue(); } SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); // Create a stack slot that is 16-byte aligned. MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); // Store the input value into Value#0 of the stack slot. SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, MachinePointerInfo()); // Load it out. return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo()); } SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); SDNode *N = Op.getNode(); assert(N->getOperand(0).getValueType() == MVT::v4i1 && "Unknown extract_vector_elt type"); SDValue Value = N->getOperand(0); // The first part of this is like the store lowering except that we don't // need to track the chain. // The values are now known to be -1 (false) or 1 (true). To convert this // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to // understand how to form the extending load. SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); // Now convert to an integer and store. Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32), Value); MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); SDValue StoreChain = DAG.getEntryNode(); SDValue Ops[] = {StoreChain, DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32), Value, FIdx}; SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other); StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, VTs, Ops, MVT::v4i32, PtrInfo); // Extract the value requested. unsigned Offset = 4*cast(N->getOperand(1))->getZExtValue(); SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); SDValue IntVal = DAG.getLoad(MVT::i32, dl, StoreChain, Idx, PtrInfo.getWithOffset(Offset)); if (!Subtarget.useCRBits()) return IntVal; return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal); } /// Lowering for QPX v4i1 loads SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); LoadSDNode *LN = cast(Op.getNode()); SDValue LoadChain = LN->getChain(); SDValue BasePtr = LN->getBasePtr(); if (Op.getValueType() == MVT::v4f64 || Op.getValueType() == MVT::v4f32) { EVT MemVT = LN->getMemoryVT(); unsigned Alignment = LN->getAlignment(); // If this load is properly aligned, then it is legal. if (Alignment >= MemVT.getStoreSize()) return Op; EVT ScalarVT = Op.getValueType().getScalarType(), ScalarMemVT = MemVT.getScalarType(); unsigned Stride = ScalarMemVT.getStoreSize(); SDValue Vals[4], LoadChains[4]; for (unsigned Idx = 0; Idx < 4; ++Idx) { SDValue Load; if (ScalarVT != ScalarMemVT) Load = DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain, BasePtr, LN->getPointerInfo().getWithOffset(Idx * Stride), ScalarMemVT, MinAlign(Alignment, Idx * Stride), LN->getMemOperand()->getFlags(), LN->getAAInfo()); else Load = DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr, LN->getPointerInfo().getWithOffset(Idx * Stride), MinAlign(Alignment, Idx * Stride), LN->getMemOperand()->getFlags(), LN->getAAInfo()); if (Idx == 0 && LN->isIndexed()) { assert(LN->getAddressingMode() == ISD::PRE_INC && "Unknown addressing mode on vector load"); Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(), LN->getAddressingMode()); } Vals[Idx] = Load; LoadChains[Idx] = Load.getValue(1); BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, DAG.getConstant(Stride, dl, BasePtr.getValueType())); } SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); SDValue Value = DAG.getBuildVector(Op.getValueType(), dl, Vals); if (LN->isIndexed()) { SDValue RetOps[] = { Value, Vals[0].getValue(1), TF }; return DAG.getMergeValues(RetOps, dl); } SDValue RetOps[] = { Value, TF }; return DAG.getMergeValues(RetOps, dl); } assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower"); assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported"); // To lower v4i1 from a byte array, we load the byte elements of the // vector and then reuse the BUILD_VECTOR logic. SDValue VectElmts[4], VectElmtChains[4]; for (unsigned i = 0; i < 4; ++i) { SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType()); Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx); VectElmts[i] = DAG.getExtLoad( ISD::EXTLOAD, dl, MVT::i32, LoadChain, Idx, LN->getPointerInfo().getWithOffset(i), MVT::i8, /* Alignment = */ 1, LN->getMemOperand()->getFlags(), LN->getAAInfo()); VectElmtChains[i] = VectElmts[i].getValue(1); } LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains); SDValue Value = DAG.getBuildVector(MVT::v4i1, dl, VectElmts); SDValue RVals[] = { Value, LoadChain }; return DAG.getMergeValues(RVals, dl); } /// Lowering for QPX v4i1 stores SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); StoreSDNode *SN = cast(Op.getNode()); SDValue StoreChain = SN->getChain(); SDValue BasePtr = SN->getBasePtr(); SDValue Value = SN->getValue(); if (Value.getValueType() == MVT::v4f64 || Value.getValueType() == MVT::v4f32) { EVT MemVT = SN->getMemoryVT(); unsigned Alignment = SN->getAlignment(); // If this store is properly aligned, then it is legal. if (Alignment >= MemVT.getStoreSize()) return Op; EVT ScalarVT = Value.getValueType().getScalarType(), ScalarMemVT = MemVT.getScalarType(); unsigned Stride = ScalarMemVT.getStoreSize(); SDValue Stores[4]; for (unsigned Idx = 0; Idx < 4; ++Idx) { SDValue Ex = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value, DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout()))); SDValue Store; if (ScalarVT != ScalarMemVT) Store = DAG.getTruncStore(StoreChain, dl, Ex, BasePtr, SN->getPointerInfo().getWithOffset(Idx * Stride), ScalarMemVT, MinAlign(Alignment, Idx * Stride), SN->getMemOperand()->getFlags(), SN->getAAInfo()); else Store = DAG.getStore(StoreChain, dl, Ex, BasePtr, SN->getPointerInfo().getWithOffset(Idx * Stride), MinAlign(Alignment, Idx * Stride), SN->getMemOperand()->getFlags(), SN->getAAInfo()); if (Idx == 0 && SN->isIndexed()) { assert(SN->getAddressingMode() == ISD::PRE_INC && "Unknown addressing mode on vector store"); Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(), SN->getAddressingMode()); } BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, DAG.getConstant(Stride, dl, BasePtr.getValueType())); Stores[Idx] = Store; } SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); if (SN->isIndexed()) { SDValue RetOps[] = { TF, Stores[0].getValue(1) }; return DAG.getMergeValues(RetOps, dl); } return TF; } assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported"); assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower"); // The values are now known to be -1 (false) or 1 (true). To convert this // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to // understand how to form the extending load. SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); // Now convert to an integer and store. Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32), Value); MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); SDValue Ops[] = {StoreChain, DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32), Value, FIdx}; SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other); StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, VTs, Ops, MVT::v4i32, PtrInfo); // Move data into the byte array. SDValue Loads[4], LoadChains[4]; for (unsigned i = 0; i < 4; ++i) { unsigned Offset = 4*i; SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); Loads[i] = DAG.getLoad(MVT::i32, dl, StoreChain, Idx, PtrInfo.getWithOffset(Offset)); LoadChains[i] = Loads[i].getValue(1); } StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); SDValue Stores[4]; for (unsigned i = 0; i < 4; ++i) { SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType()); Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx); Stores[i] = DAG.getTruncStore( StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i), MVT::i8, /* Alignment = */ 1, SN->getMemOperand()->getFlags(), SN->getAAInfo()); } StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); return StoreChain; } SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); if (Op.getValueType() == MVT::v4i32) { SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl); SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt. SDValue RHSSwap = // = vrlw RHS, 16 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl); // Shrinkify inputs to v8i16. LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS); RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS); RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap); // Low parts multiplied together, generating 32-bit results (we ignore the // top parts). SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, LHS, RHS, DAG, dl, MVT::v4i32); SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm, LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32); // Shift the high parts up 16 bits. HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, Neg16, DAG, dl); return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd); } else if (Op.getValueType() == MVT::v8i16) { SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl); return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm, LHS, RHS, Zero, DAG, dl); } else if (Op.getValueType() == MVT::v16i8) { SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); bool isLittleEndian = Subtarget.isLittleEndian(); // Multiply the even 8-bit parts, producing 16-bit sums. SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, LHS, RHS, DAG, dl, MVT::v8i16); EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts); // Multiply the odd 8-bit parts, producing 16-bit sums. SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, LHS, RHS, DAG, dl, MVT::v8i16); OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts); // Merge the results together. Because vmuleub and vmuloub are // instructions with a big-endian bias, we must reverse the // element numbering and reverse the meaning of "odd" and "even" // when generating little endian code. int Ops[16]; for (unsigned i = 0; i != 8; ++i) { if (isLittleEndian) { Ops[i*2 ] = 2*i; Ops[i*2+1] = 2*i+16; } else { Ops[i*2 ] = 2*i+1; Ops[i*2+1] = 2*i+1+16; } } if (isLittleEndian) return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops); else return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops); } else { llvm_unreachable("Unknown mul to lower!"); } } /// LowerOperation - Provide custom lowering hooks for some operations. /// SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Wasn't expecting to be able to lower this!"); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::VAARG: return LowerVAARG(Op, DAG); case ISD::VACOPY: return LowerVACOPY(Op, DAG); case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::GET_DYNAMIC_AREA_OFFSET: return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG); case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG); case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); case ISD::LOAD: return LowerLOAD(Op, DAG); case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::FP_TO_UINT: case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op)); case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); // Lower 64-bit shifts. case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG); case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG); // Vector-related lowering. case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); // For counter-based loop handling. case ISD::INTRINSIC_W_CHAIN: return SDValue(); // Frame & Return address. case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); } } void PPCTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, SelectionDAG &DAG) const { SDLoc dl(N); switch (N->getOpcode()) { default: llvm_unreachable("Do not know how to custom type legalize this operation!"); case ISD::READCYCLECOUNTER: { SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0)); Results.push_back(RTB); Results.push_back(RTB.getValue(1)); Results.push_back(RTB.getValue(2)); break; } case ISD::INTRINSIC_W_CHAIN: { if (cast(N->getOperand(1))->getZExtValue() != Intrinsic::ppc_is_decremented_ctr_nonzero) break; assert(N->getValueType(0) == MVT::i1 && "Unexpected result type for CTR decrement intrinsic"); EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), N->getValueType(0)); SDVTList VTs = DAG.getVTList(SVT, MVT::Other); SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0), N->getOperand(1)); Results.push_back(NewInt); Results.push_back(NewInt.getValue(1)); break; } case ISD::VAARG: { if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64()) return; EVT VT = N->getValueType(0); if (VT == MVT::i64) { SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG); Results.push_back(NewNode); Results.push_back(NewNode.getValue(1)); } return; } case ISD::FP_ROUND_INREG: { assert(N->getValueType(0) == MVT::ppcf128); assert(N->getOperand(0).getValueType() == MVT::ppcf128); SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, N->getOperand(0), DAG.getIntPtrConstant(0, dl)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, N->getOperand(0), DAG.getIntPtrConstant(1, dl)); // Add the two halves of the long double in round-to-zero mode. SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi); // We know the low half is about to be thrown away, so just use something // convenient. Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128, FPreg, FPreg)); return; } case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: // LowerFP_TO_INT() can only handle f32 and f64. if (N->getOperand(0).getValueType() == MVT::ppcf128) return; Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); return; } } //===----------------------------------------------------------------------===// // Other Lowering Code //===----------------------------------------------------------------------===// static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); Function *Func = Intrinsic::getDeclaration(M, Id); return Builder.CreateCall(Func, {}); } // The mappings for emitLeading/TrailingFence is taken from // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const { if (Ord == AtomicOrdering::SequentiallyConsistent) return callIntrinsic(Builder, Intrinsic::ppc_sync); if (isReleaseOrStronger(Ord)) return callIntrinsic(Builder, Intrinsic::ppc_lwsync); return nullptr; } Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const { if (IsLoad && isAcquireOrStronger(Ord)) return callIntrinsic(Builder, Intrinsic::ppc_lwsync); // FIXME: this is too conservative, a dependent branch + isync is enough. // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification. return nullptr; } MachineBasicBlock * PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, unsigned AtomicSize, unsigned BinOpcode) const { // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. const TargetInstrInfo *TII = Subtarget.getInstrInfo(); auto LoadMnemonic = PPC::LDARX; auto StoreMnemonic = PPC::STDCX; switch (AtomicSize) { default: llvm_unreachable("Unexpected size of atomic entity"); case 1: LoadMnemonic = PPC::LBARX; StoreMnemonic = PPC::STBCX; assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4"); break; case 2: LoadMnemonic = PPC::LHARX; StoreMnemonic = PPC::STHCX; assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4"); break; case 4: LoadMnemonic = PPC::LWARX; StoreMnemonic = PPC::STWCX; break; case 8: LoadMnemonic = PPC::LDARX; StoreMnemonic = PPC::STDCX; break; } const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction *F = BB->getParent(); MachineFunction::iterator It = ++BB->getIterator(); unsigned dest = MI.getOperand(0).getReg(); unsigned ptrA = MI.getOperand(1).getReg(); unsigned ptrB = MI.getOperand(2).getReg(); unsigned incr = MI.getOperand(3).getReg(); DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, loopMBB); F->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); MachineRegisterInfo &RegInfo = F->getRegInfo(); unsigned TmpReg = (!BinOpcode) ? incr : RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass); // thisMBB: // ... // fallthrough --> loopMBB BB->addSuccessor(loopMBB); // loopMBB: // l[wd]arx dest, ptr // add r0, dest, incr // st[wd]cx. r0, ptr // bne- loopMBB // fallthrough --> exitMBB BB = loopMBB; BuildMI(BB, dl, TII->get(LoadMnemonic), dest) .addReg(ptrA).addReg(ptrB); if (BinOpcode) BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); BuildMI(BB, dl, TII->get(StoreMnemonic)) .addReg(TmpReg).addReg(ptrA).addReg(ptrB); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); BB->addSuccessor(loopMBB); BB->addSuccessor(exitMBB); // exitMBB: // ... BB = exitMBB; return BB; } MachineBasicBlock * PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, bool is8bit, // operation unsigned BinOpcode) const { // If we support part-word atomic mnemonics, just use them if (Subtarget.hasPartwordAtomics()) return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode); // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. const TargetInstrInfo *TII = Subtarget.getInstrInfo(); // In 64 bit mode we have to use 64 bits for addresses, even though the // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address // registers without caring whether they're 32 or 64, but here we're // doing actual arithmetic on the addresses. bool is64bit = Subtarget.isPPC64(); unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction *F = BB->getParent(); MachineFunction::iterator It = ++BB->getIterator(); unsigned dest = MI.getOperand(0).getReg(); unsigned ptrA = MI.getOperand(1).getReg(); unsigned ptrB = MI.getOperand(2).getReg(); unsigned incr = MI.getOperand(3).getReg(); DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, loopMBB); F->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); MachineRegisterInfo &RegInfo = F->getRegInfo(); const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; unsigned PtrReg = RegInfo.createVirtualRegister(RC); unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); unsigned ShiftReg = RegInfo.createVirtualRegister(RC); unsigned Incr2Reg = RegInfo.createVirtualRegister(RC); unsigned MaskReg = RegInfo.createVirtualRegister(RC); unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC); unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); unsigned Ptr1Reg; unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC); // thisMBB: // ... // fallthrough --> loopMBB BB->addSuccessor(loopMBB); // The 4-byte load must be aligned, while a char or short may be // anywhere in the word. Hence all this nasty bookkeeping code. // add ptr1, ptrA, ptrB [copy if ptrA==0] // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] // xori shift, shift1, 24 [16] // rlwinm ptr, ptr1, 0, 0, 29 // slw incr2, incr, shift // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] // slw mask, mask2, shift // loopMBB: // lwarx tmpDest, ptr // add tmp, tmpDest, incr2 // andc tmp2, tmpDest, mask // and tmp3, tmp, mask // or tmp4, tmp3, tmp2 // stwcx. tmp4, ptr // bne- loopMBB // fallthrough --> exitMBB // srw dest, tmpDest, shift if (ptrA != ZeroReg) { Ptr1Reg = RegInfo.createVirtualRegister(RC); BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) .addReg(ptrA).addReg(ptrB); } else { Ptr1Reg = ptrB; } BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); if (is64bit) BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) .addReg(Ptr1Reg).addImm(0).addImm(61); else BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg) .addReg(incr).addReg(ShiftReg); if (is8bit) BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); else { BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535); } BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) .addReg(Mask2Reg).addReg(ShiftReg); BB = loopMBB; BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) .addReg(ZeroReg).addReg(PtrReg); if (BinOpcode) BuildMI(BB, dl, TII->get(BinOpcode), TmpReg) .addReg(Incr2Reg).addReg(TmpDestReg); BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg) .addReg(TmpDestReg).addReg(MaskReg); BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg) .addReg(TmpReg).addReg(MaskReg); BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg) .addReg(Tmp3Reg).addReg(Tmp2Reg); BuildMI(BB, dl, TII->get(PPC::STWCX)) .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); BB->addSuccessor(loopMBB); BB->addSuccessor(exitMBB); // exitMBB: // ... BB = exitMBB; BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg) .addReg(ShiftReg); return BB; } llvm::MachineBasicBlock * PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const { DebugLoc DL = MI.getDebugLoc(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); const BasicBlock *BB = MBB->getBasicBlock(); MachineFunction::iterator I = ++MBB->getIterator(); // Memory Reference MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin(); MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end(); unsigned DstReg = MI.getOperand(0).getReg(); const TargetRegisterClass *RC = MRI.getRegClass(DstReg); assert(RC->hasType(MVT::i32) && "Invalid destination!"); unsigned mainDstReg = MRI.createVirtualRegister(RC); unsigned restoreDstReg = MRI.createVirtualRegister(RC); MVT PVT = getPointerTy(MF->getDataLayout()); assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"); // For v = setjmp(buf), we generate // // thisMBB: // SjLjSetup mainMBB // bl mainMBB // v_restore = 1 // b sinkMBB // // mainMBB: // buf[LabelOffset] = LR // v_main = 0 // // sinkMBB: // v = phi(main, restore) // MachineBasicBlock *thisMBB = MBB; MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); MF->insert(I, mainMBB); MF->insert(I, sinkMBB); MachineInstrBuilder MIB; // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)), MBB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); // Note that the structure of the jmp_buf used here is not compatible // with that used by libc, and is not designed to be. Specifically, it // stores only those 'reserved' registers that LLVM does not otherwise // understand how to spill. Also, by convention, by the time this // intrinsic is called, Clang has already stored the frame address in the // first slot of the buffer and stack address in the third. Following the // X86 target code, we'll store the jump address in the second slot. We also // need to save the TOC pointer (R2) to handle jumps between shared // libraries, and that will be stored in the fourth slot. The thread // identifier (R13) is not affected. // thisMBB: const int64_t LabelOffset = 1 * PVT.getStoreSize(); const int64_t TOCOffset = 3 * PVT.getStoreSize(); const int64_t BPOffset = 4 * PVT.getStoreSize(); // Prepare IP either in reg. const TargetRegisterClass *PtrRC = getRegClassFor(PVT); unsigned LabelReg = MRI.createVirtualRegister(PtrRC); unsigned BufReg = MI.getOperand(1).getReg(); if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) { setUsesTOCBasePtr(*MBB->getParent()); MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) .addReg(PPC::X2) .addImm(TOCOffset) .addReg(BufReg); MIB.setMemRefs(MMOBegin, MMOEnd); } // Naked functions never have a base pointer, and so we use r1. For all // other functions, this decision must be delayed until during PEI. unsigned BaseReg; if (MF->getFunction()->hasFnAttribute(Attribute::Naked)) BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1; else BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP; MIB = BuildMI(*thisMBB, MI, DL, TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW)) .addReg(BaseReg) .addImm(BPOffset) .addReg(BufReg); MIB.setMemRefs(MMOBegin, MMOEnd); // Setup MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB); const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); MIB.addRegMask(TRI->getNoPreservedMask()); BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1); MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup)) .addMBB(mainMBB); MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB); thisMBB->addSuccessor(mainMBB, BranchProbability::getZero()); thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne()); // mainMBB: // mainDstReg = 0 MIB = BuildMI(mainMBB, DL, TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg); // Store IP if (Subtarget.isPPC64()) { MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD)) .addReg(LabelReg) .addImm(LabelOffset) .addReg(BufReg); } else { MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW)) .addReg(LabelReg) .addImm(LabelOffset) .addReg(BufReg); } MIB.setMemRefs(MMOBegin, MMOEnd); BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0); mainMBB->addSuccessor(sinkMBB); // sinkMBB: BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(PPC::PHI), DstReg) .addReg(mainDstReg).addMBB(mainMBB) .addReg(restoreDstReg).addMBB(thisMBB); MI.eraseFromParent(); return sinkMBB; } MachineBasicBlock * PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const { DebugLoc DL = MI.getDebugLoc(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); // Memory Reference MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin(); MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end(); MVT PVT = getPointerTy(MF->getDataLayout()); assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"); const TargetRegisterClass *RC = (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; unsigned Tmp = MRI.createVirtualRegister(RC); // Since FP is only updated here but NOT referenced, it's treated as GPR. unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31; unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1; unsigned BP = (PVT == MVT::i64) ? PPC::X30 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29 : PPC::R30); MachineInstrBuilder MIB; const int64_t LabelOffset = 1 * PVT.getStoreSize(); const int64_t SPOffset = 2 * PVT.getStoreSize(); const int64_t TOCOffset = 3 * PVT.getStoreSize(); const int64_t BPOffset = 4 * PVT.getStoreSize(); unsigned BufReg = MI.getOperand(0).getReg(); // Reload FP (the jumped-to function may not have had a // frame pointer, and if so, then its r31 will be restored // as necessary). if (PVT == MVT::i64) { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP) .addImm(0) .addReg(BufReg); } else { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP) .addImm(0) .addReg(BufReg); } MIB.setMemRefs(MMOBegin, MMOEnd); // Reload IP if (PVT == MVT::i64) { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp) .addImm(LabelOffset) .addReg(BufReg); } else { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp) .addImm(LabelOffset) .addReg(BufReg); } MIB.setMemRefs(MMOBegin, MMOEnd); // Reload SP if (PVT == MVT::i64) { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP) .addImm(SPOffset) .addReg(BufReg); } else { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP) .addImm(SPOffset) .addReg(BufReg); } MIB.setMemRefs(MMOBegin, MMOEnd); // Reload BP if (PVT == MVT::i64) { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP) .addImm(BPOffset) .addReg(BufReg); } else { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP) .addImm(BPOffset) .addReg(BufReg); } MIB.setMemRefs(MMOBegin, MMOEnd); // Reload TOC if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) { setUsesTOCBasePtr(*MBB->getParent()); MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2) .addImm(TOCOffset) .addReg(BufReg); MIB.setMemRefs(MMOBegin, MMOEnd); } // Jump BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp); BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR)); MI.eraseFromParent(); return MBB; } MachineBasicBlock * PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { if (MI.getOpcode() == TargetOpcode::STACKMAP || MI.getOpcode() == TargetOpcode::PATCHPOINT) { if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() && MI.getOpcode() == TargetOpcode::PATCHPOINT) { // Call lowering should have added an r2 operand to indicate a dependence // on the TOC base pointer value. It can't however, because there is no // way to mark the dependence as implicit there, and so the stackmap code // will confuse it with a regular operand. Instead, add the dependence // here. setUsesTOCBasePtr(*BB->getParent()); MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true)); } return emitPatchPoint(MI, BB); } if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 || MI.getOpcode() == PPC::EH_SjLj_SetJmp64) { return emitEHSjLjSetJmp(MI, BB); } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 || MI.getOpcode() == PPC::EH_SjLj_LongJmp64) { return emitEHSjLjLongJmp(MI, BB); } const TargetInstrInfo *TII = Subtarget.getInstrInfo(); // To "insert" these instructions we actually have to insert their // control-flow patterns. const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator It = ++BB->getIterator(); MachineFunction *F = BB->getParent(); if (Subtarget.hasISEL() && (MI.getOpcode() == PPC::SELECT_CC_I4 || MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) { SmallVector Cond; if (MI.getOpcode() == PPC::SELECT_CC_I4 || MI.getOpcode() == PPC::SELECT_CC_I8) Cond.push_back(MI.getOperand(4)); else Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET)); Cond.push_back(MI.getOperand(1)); DebugLoc dl = MI.getDebugLoc(); TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond, MI.getOperand(2).getReg(), MI.getOperand(3).getReg()); } else if (MI.getOpcode() == PPC::SELECT_CC_I4 || MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_CC_F4 || MI.getOpcode() == PPC::SELECT_CC_F8 || MI.getOpcode() == PPC::SELECT_CC_QFRC || MI.getOpcode() == PPC::SELECT_CC_QSRC || MI.getOpcode() == PPC::SELECT_CC_QBRC || MI.getOpcode() == PPC::SELECT_CC_VRRC || MI.getOpcode() == PPC::SELECT_CC_VSFRC || MI.getOpcode() == PPC::SELECT_CC_VSSRC || MI.getOpcode() == PPC::SELECT_CC_VSRC || MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 || MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 || MI.getOpcode() == PPC::SELECT_QFRC || MI.getOpcode() == PPC::SELECT_QSRC || MI.getOpcode() == PPC::SELECT_QBRC || MI.getOpcode() == PPC::SELECT_VRRC || MI.getOpcode() == PPC::SELECT_VSFRC || MI.getOpcode() == PPC::SELECT_VSSRC || MI.getOpcode() == PPC::SELECT_VSRC) { // The incoming instruction knows the destination vreg to set, the // condition code register to branch on, the true/false values to // select between, and a branch opcode to use. // thisMBB: // ... // TrueVal = ... // cmpTY ccX, r1, r2 // bCC copy1MBB // fallthrough --> copy0MBB MachineBasicBlock *thisMBB = BB; MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); DebugLoc dl = MI.getDebugLoc(); F->insert(It, copy0MBB); F->insert(It, sinkMBB); // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(BB); // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 || MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 || MI.getOpcode() == PPC::SELECT_QFRC || MI.getOpcode() == PPC::SELECT_QSRC || MI.getOpcode() == PPC::SELECT_QBRC || MI.getOpcode() == PPC::SELECT_VRRC || MI.getOpcode() == PPC::SELECT_VSFRC || MI.getOpcode() == PPC::SELECT_VSSRC || MI.getOpcode() == PPC::SELECT_VSRC) { BuildMI(BB, dl, TII->get(PPC::BC)) .addReg(MI.getOperand(1).getReg()) .addMBB(sinkMBB); } else { unsigned SelectPred = MI.getOperand(4).getImm(); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(SelectPred) .addReg(MI.getOperand(1).getReg()) .addMBB(sinkMBB); } // copy0MBB: // %FalseValue = ... // # fallthrough to sinkMBB BB = copy0MBB; // Update machine-CFG edges BB->addSuccessor(sinkMBB); // sinkMBB: // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = sinkMBB; BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg()) .addReg(MI.getOperand(3).getReg()) .addMBB(copy0MBB) .addReg(MI.getOperand(2).getReg()) .addMBB(thisMBB); } else if (MI.getOpcode() == PPC::ReadTB) { // To read the 64-bit time-base register on a 32-bit target, we read the // two halves. Should the counter have wrapped while it was being read, we // need to try again. // ... // readLoop: // mfspr Rx,TBU # load from TBU // mfspr Ry,TB # load from TB // mfspr Rz,TBU # load from TBU // cmpw crX,Rx,Rz # check if 'old'='new' // bne readLoop # branch if they're not equal // ... MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); DebugLoc dl = MI.getDebugLoc(); F->insert(It, readMBB); F->insert(It, sinkMBB); // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(BB); BB->addSuccessor(readMBB); BB = readMBB; MachineRegisterInfo &RegInfo = F->getRegInfo(); unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); unsigned LoReg = MI.getOperand(0).getReg(); unsigned HiReg = MI.getOperand(1).getReg(); BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269); BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268); BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269); unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg) .addReg(HiReg).addReg(ReadAgainReg); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(CmpReg).addMBB(readMBB); BB->addSuccessor(readMBB); BB->addSuccessor(sinkMBB); } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32) BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64) BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32) BB = EmitAtomicBinary(MI, BB, 4, PPC::AND); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64) BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32) BB = EmitAtomicBinary(MI, BB, 4, PPC::OR); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64) BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32) BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64) BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32) BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64) BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32) BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8); else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, 0); else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, 0); else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32) BB = EmitAtomicBinary(MI, BB, 4, 0); else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64) BB = EmitAtomicBinary(MI, BB, 8, 0); else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 || MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 || (Subtarget.hasPartwordAtomics() && MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) || (Subtarget.hasPartwordAtomics() && MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) { bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64; auto LoadMnemonic = PPC::LDARX; auto StoreMnemonic = PPC::STDCX; switch (MI.getOpcode()) { default: llvm_unreachable("Compare and swap of unknown size"); case PPC::ATOMIC_CMP_SWAP_I8: LoadMnemonic = PPC::LBARX; StoreMnemonic = PPC::STBCX; assert(Subtarget.hasPartwordAtomics() && "No support partword atomics."); break; case PPC::ATOMIC_CMP_SWAP_I16: LoadMnemonic = PPC::LHARX; StoreMnemonic = PPC::STHCX; assert(Subtarget.hasPartwordAtomics() && "No support partword atomics."); break; case PPC::ATOMIC_CMP_SWAP_I32: LoadMnemonic = PPC::LWARX; StoreMnemonic = PPC::STWCX; break; case PPC::ATOMIC_CMP_SWAP_I64: LoadMnemonic = PPC::LDARX; StoreMnemonic = PPC::STDCX; break; } unsigned dest = MI.getOperand(0).getReg(); unsigned ptrA = MI.getOperand(1).getReg(); unsigned ptrB = MI.getOperand(2).getReg(); unsigned oldval = MI.getOperand(3).getReg(); unsigned newval = MI.getOperand(4).getReg(); DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, loop1MBB); F->insert(It, loop2MBB); F->insert(It, midMBB); F->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); // thisMBB: // ... // fallthrough --> loopMBB BB->addSuccessor(loop1MBB); // loop1MBB: // l[bhwd]arx dest, ptr // cmp[wd] dest, oldval // bne- midMBB // loop2MBB: // st[bhwd]cx. newval, ptr // bne- loopMBB // b exitBB // midMBB: // st[bhwd]cx. dest, ptr // exitBB: BB = loop1MBB; BuildMI(BB, dl, TII->get(LoadMnemonic), dest) .addReg(ptrA).addReg(ptrB); BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0) .addReg(oldval).addReg(dest); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); BB->addSuccessor(loop2MBB); BB->addSuccessor(midMBB); BB = loop2MBB; BuildMI(BB, dl, TII->get(StoreMnemonic)) .addReg(newval).addReg(ptrA).addReg(ptrB); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); BB->addSuccessor(loop1MBB); BB->addSuccessor(exitMBB); BB = midMBB; BuildMI(BB, dl, TII->get(StoreMnemonic)) .addReg(dest).addReg(ptrA).addReg(ptrB); BB->addSuccessor(exitMBB); // exitMBB: // ... BB = exitMBB; } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 || MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) { // We must use 64-bit registers for addresses when targeting 64-bit, // since we're actually doing arithmetic on them. Other registers // can be 32-bit. bool is64bit = Subtarget.isPPC64(); bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; unsigned dest = MI.getOperand(0).getReg(); unsigned ptrA = MI.getOperand(1).getReg(); unsigned ptrB = MI.getOperand(2).getReg(); unsigned oldval = MI.getOperand(3).getReg(); unsigned newval = MI.getOperand(4).getReg(); DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, loop1MBB); F->insert(It, loop2MBB); F->insert(It, midMBB); F->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); MachineRegisterInfo &RegInfo = F->getRegInfo(); const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; unsigned PtrReg = RegInfo.createVirtualRegister(RC); unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); unsigned ShiftReg = RegInfo.createVirtualRegister(RC); unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC); unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC); unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC); unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC); unsigned MaskReg = RegInfo.createVirtualRegister(RC); unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); unsigned Ptr1Reg; unsigned TmpReg = RegInfo.createVirtualRegister(RC); unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; // thisMBB: // ... // fallthrough --> loopMBB BB->addSuccessor(loop1MBB); // The 4-byte load must be aligned, while a char or short may be // anywhere in the word. Hence all this nasty bookkeeping code. // add ptr1, ptrA, ptrB [copy if ptrA==0] // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] // xori shift, shift1, 24 [16] // rlwinm ptr, ptr1, 0, 0, 29 // slw newval2, newval, shift // slw oldval2, oldval,shift // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] // slw mask, mask2, shift // and newval3, newval2, mask // and oldval3, oldval2, mask // loop1MBB: // lwarx tmpDest, ptr // and tmp, tmpDest, mask // cmpw tmp, oldval3 // bne- midMBB // loop2MBB: // andc tmp2, tmpDest, mask // or tmp4, tmp2, newval3 // stwcx. tmp4, ptr // bne- loop1MBB // b exitBB // midMBB: // stwcx. tmpDest, ptr // exitBB: // srw dest, tmpDest, shift if (ptrA != ZeroReg) { Ptr1Reg = RegInfo.createVirtualRegister(RC); BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) .addReg(ptrA).addReg(ptrB); } else { Ptr1Reg = ptrB; } BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); if (is64bit) BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) .addReg(Ptr1Reg).addImm(0).addImm(61); else BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg) .addReg(newval).addReg(ShiftReg); BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg) .addReg(oldval).addReg(ShiftReg); if (is8bit) BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); else { BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg) .addReg(Mask3Reg).addImm(65535); } BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) .addReg(Mask2Reg).addReg(ShiftReg); BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg) .addReg(NewVal2Reg).addReg(MaskReg); BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg) .addReg(OldVal2Reg).addReg(MaskReg); BB = loop1MBB; BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) .addReg(ZeroReg).addReg(PtrReg); BuildMI(BB, dl, TII->get(PPC::AND),TmpReg) .addReg(TmpDestReg).addReg(MaskReg); BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0) .addReg(TmpReg).addReg(OldVal3Reg); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); BB->addSuccessor(loop2MBB); BB->addSuccessor(midMBB); BB = loop2MBB; BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg) .addReg(TmpDestReg).addReg(MaskReg); BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg) .addReg(Tmp2Reg).addReg(NewVal3Reg); BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg) .addReg(ZeroReg).addReg(PtrReg); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); BB->addSuccessor(loop1MBB); BB->addSuccessor(exitMBB); BB = midMBB; BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg) .addReg(ZeroReg).addReg(PtrReg); BB->addSuccessor(exitMBB); // exitMBB: // ... BB = exitMBB; BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg) .addReg(ShiftReg); } else if (MI.getOpcode() == PPC::FADDrtz) { // This pseudo performs an FADD with rounding mode temporarily forced // to round-to-zero. We emit this via custom inserter since the FPSCR // is not modeled at the SelectionDAG level. unsigned Dest = MI.getOperand(0).getReg(); unsigned Src1 = MI.getOperand(1).getReg(); unsigned Src2 = MI.getOperand(2).getReg(); DebugLoc dl = MI.getDebugLoc(); MachineRegisterInfo &RegInfo = F->getRegInfo(); unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); // Save FPSCR value. BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg); // Set rounding mode to round-to-zero. BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31); BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30); // Perform addition. BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2); // Restore FPSCR value. BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg); } else if (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT || MI.getOpcode() == PPC::ANDIo_1_GT_BIT || MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 || MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) { unsigned Opcode = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 || MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) ? PPC::ANDIo8 : PPC::ANDIo; bool isEQ = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT || MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8); MachineRegisterInfo &RegInfo = F->getRegInfo(); unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ? &PPC::GPRCRegClass : &PPC::G8RCRegClass); DebugLoc dl = MI.getDebugLoc(); BuildMI(*BB, MI, dl, TII->get(Opcode), Dest) .addReg(MI.getOperand(1).getReg()) .addImm(1); BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg()) .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT); } else if (MI.getOpcode() == PPC::TCHECK_RET) { DebugLoc Dl = MI.getDebugLoc(); MachineRegisterInfo &RegInfo = F->getRegInfo(); unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg); return BB; } else { llvm_unreachable("Unexpected instr type to insert"); } MI.eraseFromParent(); // The pseudo instruction is gone now. return BB; } //===----------------------------------------------------------------------===// // Target Optimization Hooks //===----------------------------------------------------------------------===// static std::string getRecipOp(const char *Base, EVT VT) { std::string RecipOp(Base); if (VT.getScalarType() == MVT::f64) RecipOp += "d"; else RecipOp += "f"; if (VT.isVector()) RecipOp = "vec-" + RecipOp; return RecipOp; } SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI, unsigned &RefinementSteps, bool &UseOneConstNR) const { EVT VT = Operand.getValueType(); if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) || (VT == MVT::f64 && Subtarget.hasFRSQRTE()) || (VT == MVT::v4f32 && Subtarget.hasAltivec()) || (VT == MVT::v2f64 && Subtarget.hasVSX()) || (VT == MVT::v4f32 && Subtarget.hasQPX()) || (VT == MVT::v4f64 && Subtarget.hasQPX())) { TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals; std::string RecipOp = getRecipOp("sqrt", VT); if (!Recips.isEnabled(RecipOp)) return SDValue(); RefinementSteps = Recips.getRefinementSteps(RecipOp); UseOneConstNR = true; return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand); } return SDValue(); } SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI, unsigned &RefinementSteps) const { EVT VT = Operand.getValueType(); if ((VT == MVT::f32 && Subtarget.hasFRES()) || (VT == MVT::f64 && Subtarget.hasFRE()) || (VT == MVT::v4f32 && Subtarget.hasAltivec()) || (VT == MVT::v2f64 && Subtarget.hasVSX()) || (VT == MVT::v4f32 && Subtarget.hasQPX()) || (VT == MVT::v4f64 && Subtarget.hasQPX())) { TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals; std::string RecipOp = getRecipOp("div", VT); if (!Recips.isEnabled(RecipOp)) return SDValue(); RefinementSteps = Recips.getRefinementSteps(RecipOp); return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand); } return SDValue(); } unsigned PPCTargetLowering::combineRepeatedFPDivisors() const { // Note: This functionality is used only when unsafe-fp-math is enabled, and // on cores with reciprocal estimates (which are used when unsafe-fp-math is // enabled for division), this functionality is redundant with the default // combiner logic (once the division -> reciprocal/multiply transformation // has taken place). As a result, this matters more for older cores than for // newer ones. // Combine multiple FDIVs with the same divisor into multiple FMULs by the // reciprocal if there are two or more FDIVs (for embedded cores with only // one FP pipeline) for three or more FDIVs (for generic OOO cores). switch (Subtarget.getDarwinDirective()) { default: return 3; case PPC::DIR_440: case PPC::DIR_A2: case PPC::DIR_E500mc: case PPC::DIR_E5500: return 2; } } // isConsecutiveLSLoc needs to work even if all adds have not yet been // collapsed, and so we need to look through chains of them. static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base, int64_t& Offset, SelectionDAG &DAG) { if (DAG.isBaseWithConstantOffset(Loc)) { Base = Loc.getOperand(0); Offset += cast(Loc.getOperand(1))->getSExtValue(); // The base might itself be a base plus an offset, and if so, accumulate // that as well. getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG); } } static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG) { if (VT.getSizeInBits() / 8 != Bytes) return false; SDValue BaseLoc = Base->getBasePtr(); if (Loc.getOpcode() == ISD::FrameIndex) { if (BaseLoc.getOpcode() != ISD::FrameIndex) return false; const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); int FI = cast(Loc)->getIndex(); int BFI = cast(BaseLoc)->getIndex(); int FS = MFI->getObjectSize(FI); int BFS = MFI->getObjectSize(BFI); if (FS != BFS || FS != (int)Bytes) return false; return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); } SDValue Base1 = Loc, Base2 = BaseLoc; int64_t Offset1 = 0, Offset2 = 0; getBaseWithConstantOffset(Loc, Base1, Offset1, DAG); getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG); if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes)) return true; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const GlobalValue *GV1 = nullptr; const GlobalValue *GV2 = nullptr; Offset1 = 0; Offset2 = 0; bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); if (isGA1 && isGA2 && GV1 == GV2) return Offset1 == (Offset2 + Dist*Bytes); return false; } // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does // not enforce equality of the chain operands. static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG) { if (LSBaseSDNode *LS = dyn_cast(N)) { EVT VT = LS->getMemoryVT(); SDValue Loc = LS->getBasePtr(); return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG); } if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) { EVT VT; switch (cast(N->getOperand(1))->getZExtValue()) { default: return false; case Intrinsic::ppc_qpx_qvlfd: case Intrinsic::ppc_qpx_qvlfda: VT = MVT::v4f64; break; case Intrinsic::ppc_qpx_qvlfs: case Intrinsic::ppc_qpx_qvlfsa: VT = MVT::v4f32; break; case Intrinsic::ppc_qpx_qvlfcd: case Intrinsic::ppc_qpx_qvlfcda: VT = MVT::v2f64; break; case Intrinsic::ppc_qpx_qvlfcs: case Intrinsic::ppc_qpx_qvlfcsa: VT = MVT::v2f32; break; case Intrinsic::ppc_qpx_qvlfiwa: case Intrinsic::ppc_qpx_qvlfiwz: case Intrinsic::ppc_altivec_lvx: case Intrinsic::ppc_altivec_lvxl: case Intrinsic::ppc_vsx_lxvw4x: VT = MVT::v4i32; break; case Intrinsic::ppc_vsx_lxvd2x: VT = MVT::v2f64; break; case Intrinsic::ppc_altivec_lvebx: VT = MVT::i8; break; case Intrinsic::ppc_altivec_lvehx: VT = MVT::i16; break; case Intrinsic::ppc_altivec_lvewx: VT = MVT::i32; break; } return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG); } if (N->getOpcode() == ISD::INTRINSIC_VOID) { EVT VT; switch (cast(N->getOperand(1))->getZExtValue()) { default: return false; case Intrinsic::ppc_qpx_qvstfd: case Intrinsic::ppc_qpx_qvstfda: VT = MVT::v4f64; break; case Intrinsic::ppc_qpx_qvstfs: case Intrinsic::ppc_qpx_qvstfsa: VT = MVT::v4f32; break; case Intrinsic::ppc_qpx_qvstfcd: case Intrinsic::ppc_qpx_qvstfcda: VT = MVT::v2f64; break; case Intrinsic::ppc_qpx_qvstfcs: case Intrinsic::ppc_qpx_qvstfcsa: VT = MVT::v2f32; break; case Intrinsic::ppc_qpx_qvstfiw: case Intrinsic::ppc_qpx_qvstfiwa: case Intrinsic::ppc_altivec_stvx: case Intrinsic::ppc_altivec_stvxl: case Intrinsic::ppc_vsx_stxvw4x: VT = MVT::v4i32; break; case Intrinsic::ppc_vsx_stxvd2x: VT = MVT::v2f64; break; case Intrinsic::ppc_altivec_stvebx: VT = MVT::i8; break; case Intrinsic::ppc_altivec_stvehx: VT = MVT::i16; break; case Intrinsic::ppc_altivec_stvewx: VT = MVT::i32; break; } return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG); } return false; } // Return true is there is a nearyby consecutive load to the one provided // (regardless of alignment). We search up and down the chain, looking though // token factors and other loads (but nothing else). As a result, a true result // indicates that it is safe to create a new consecutive load adjacent to the // load provided. static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { SDValue Chain = LD->getChain(); EVT VT = LD->getMemoryVT(); SmallSet LoadRoots; SmallVector Queue(1, Chain.getNode()); SmallSet Visited; // First, search up the chain, branching to follow all token-factor operands. // If we find a consecutive load, then we're done, otherwise, record all // nodes just above the top-level loads and token factors. while (!Queue.empty()) { SDNode *ChainNext = Queue.pop_back_val(); if (!Visited.insert(ChainNext).second) continue; if (MemSDNode *ChainLD = dyn_cast(ChainNext)) { if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) return true; if (!Visited.count(ChainLD->getChain().getNode())) Queue.push_back(ChainLD->getChain().getNode()); } else if (ChainNext->getOpcode() == ISD::TokenFactor) { for (const SDUse &O : ChainNext->ops()) if (!Visited.count(O.getNode())) Queue.push_back(O.getNode()); } else LoadRoots.insert(ChainNext); } // Second, search down the chain, starting from the top-level nodes recorded // in the first phase. These top-level nodes are the nodes just above all // loads and token factors. Starting with their uses, recursively look though // all loads (just the chain uses) and token factors to find a consecutive // load. Visited.clear(); Queue.clear(); for (SmallSet::iterator I = LoadRoots.begin(), IE = LoadRoots.end(); I != IE; ++I) { Queue.push_back(*I); while (!Queue.empty()) { SDNode *LoadRoot = Queue.pop_back_val(); if (!Visited.insert(LoadRoot).second) continue; if (MemSDNode *ChainLD = dyn_cast(LoadRoot)) if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) return true; for (SDNode::use_iterator UI = LoadRoot->use_begin(), UE = LoadRoot->use_end(); UI != UE; ++UI) if (((isa(*UI) && cast(*UI)->getChain().getNode() == LoadRoot) || UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI)) Queue.push_back(*UI); } } return false; } SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits"); // If we're tracking CR bits, we need to be careful that we don't have: // trunc(binary-ops(zext(x), zext(y))) // or // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...) // such that we're unnecessarily moving things into GPRs when it would be // better to keep them in CR bits. // Note that trunc here can be an actual i1 trunc, or can be the effective // truncation that comes from a setcc or select_cc. if (N->getOpcode() == ISD::TRUNCATE && N->getValueType(0) != MVT::i1) return SDValue(); if (N->getOperand(0).getValueType() != MVT::i32 && N->getOperand(0).getValueType() != MVT::i64) return SDValue(); if (N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) { // If we're looking at a comparison, then we need to make sure that the // high bits (all except for the first) don't matter the result. ISD::CondCode CC = cast(N->getOperand( N->getOpcode() == ISD::SETCC ? 2 : 4))->get(); unsigned OpBits = N->getOperand(0).getValueSizeInBits(); if (ISD::isSignedIntSetCC(CC)) { if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits || DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits) return SDValue(); } else if (ISD::isUnsignedIntSetCC(CC)) { if (!DAG.MaskedValueIsZero(N->getOperand(0), APInt::getHighBitsSet(OpBits, OpBits-1)) || !DAG.MaskedValueIsZero(N->getOperand(1), APInt::getHighBitsSet(OpBits, OpBits-1))) return SDValue(); } else { // This is neither a signed nor an unsigned comparison, just make sure // that the high bits are equal. APInt Op1Zero, Op1One; APInt Op2Zero, Op2One; DAG.computeKnownBits(N->getOperand(0), Op1Zero, Op1One); DAG.computeKnownBits(N->getOperand(1), Op2Zero, Op2One); // We don't really care about what is known about the first bit (if // anything), so clear it in all masks prior to comparing them. Op1Zero.clearBit(0); Op1One.clearBit(0); Op2Zero.clearBit(0); Op2One.clearBit(0); if (Op1Zero != Op2Zero || Op1One != Op2One) return SDValue(); } } // We now know that the higher-order bits are irrelevant, we just need to // make sure that all of the intermediate operations are bit operations, and // all inputs are extensions. if (N->getOperand(0).getOpcode() != ISD::AND && N->getOperand(0).getOpcode() != ISD::OR && N->getOperand(0).getOpcode() != ISD::XOR && N->getOperand(0).getOpcode() != ISD::SELECT && N->getOperand(0).getOpcode() != ISD::SELECT_CC && N->getOperand(0).getOpcode() != ISD::TRUNCATE && N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND && N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND && N->getOperand(0).getOpcode() != ISD::ANY_EXTEND) return SDValue(); if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) && N->getOperand(1).getOpcode() != ISD::AND && N->getOperand(1).getOpcode() != ISD::OR && N->getOperand(1).getOpcode() != ISD::XOR && N->getOperand(1).getOpcode() != ISD::SELECT && N->getOperand(1).getOpcode() != ISD::SELECT_CC && N->getOperand(1).getOpcode() != ISD::TRUNCATE && N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND && N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND && N->getOperand(1).getOpcode() != ISD::ANY_EXTEND) return SDValue(); SmallVector Inputs; SmallVector BinOps, PromOps; SmallPtrSet Visited; for (unsigned i = 0; i < 2; ++i) { if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND || N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND || N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) && N->getOperand(i).getOperand(0).getValueType() == MVT::i1) || isa(N->getOperand(i))) Inputs.push_back(N->getOperand(i)); else BinOps.push_back(N->getOperand(i)); if (N->getOpcode() == ISD::TRUNCATE) break; } // Visit all inputs, collect all binary operations (and, or, xor and // select) that are all fed by extensions. while (!BinOps.empty()) { SDValue BinOp = BinOps.back(); BinOps.pop_back(); if (!Visited.insert(BinOp.getNode()).second) continue; PromOps.push_back(BinOp); for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { // The condition of the select is not promoted. if (BinOp.getOpcode() == ISD::SELECT && i == 0) continue; if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) continue; if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) && BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) || isa(BinOp.getOperand(i))) { Inputs.push_back(BinOp.getOperand(i)); } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || BinOp.getOperand(i).getOpcode() == ISD::OR || BinOp.getOperand(i).getOpcode() == ISD::XOR || BinOp.getOperand(i).getOpcode() == ISD::SELECT || BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC || BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) { BinOps.push_back(BinOp.getOperand(i)); } else { // We have an input that is not an extension or another binary // operation; we'll abort this transformation. return SDValue(); } } } // Make sure that this is a self-contained cluster of operations (which // is not quite the same thing as saying that everything has only one // use). for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { if (isa(Inputs[i])) continue; for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), UE = Inputs[i].getNode()->use_end(); UI != UE; ++UI) { SDNode *User = *UI; if (User != N && !Visited.count(User)) return SDValue(); // Make sure that we're not going to promote the non-output-value // operand(s) or SELECT or SELECT_CC. // FIXME: Although we could sometimes handle this, and it does occur in // practice that one of the condition inputs to the select is also one of // the outputs, we currently can't deal with this. if (User->getOpcode() == ISD::SELECT) { if (User->getOperand(0) == Inputs[i]) return SDValue(); } else if (User->getOpcode() == ISD::SELECT_CC) { if (User->getOperand(0) == Inputs[i] || User->getOperand(1) == Inputs[i]) return SDValue(); } } } for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), UE = PromOps[i].getNode()->use_end(); UI != UE; ++UI) { SDNode *User = *UI; if (User != N && !Visited.count(User)) return SDValue(); // Make sure that we're not going to promote the non-output-value // operand(s) or SELECT or SELECT_CC. // FIXME: Although we could sometimes handle this, and it does occur in // practice that one of the condition inputs to the select is also one of // the outputs, we currently can't deal with this. if (User->getOpcode() == ISD::SELECT) { if (User->getOperand(0) == PromOps[i]) return SDValue(); } else if (User->getOpcode() == ISD::SELECT_CC) { if (User->getOperand(0) == PromOps[i] || User->getOperand(1) == PromOps[i]) return SDValue(); } } } // Replace all inputs with the extension operand. for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { // Constants may have users outside the cluster of to-be-promoted nodes, // and so we need to replace those as we do the promotions. if (isa(Inputs[i])) continue; else DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0)); } std::list PromOpHandles; for (auto &PromOp : PromOps) PromOpHandles.emplace_back(PromOp); // Replace all operations (these are all the same, but have a different // (i1) return type). DAG.getNode will validate that the types of // a binary operator match, so go through the list in reverse so that // we've likely promoted both operands first. Any intermediate truncations or // extensions disappear. while (!PromOpHandles.empty()) { SDValue PromOp = PromOpHandles.back().getValue(); PromOpHandles.pop_back(); if (PromOp.getOpcode() == ISD::TRUNCATE || PromOp.getOpcode() == ISD::SIGN_EXTEND || PromOp.getOpcode() == ISD::ZERO_EXTEND || PromOp.getOpcode() == ISD::ANY_EXTEND) { if (!isa(PromOp.getOperand(0)) && PromOp.getOperand(0).getValueType() != MVT::i1) { // The operand is not yet ready (see comment below). PromOpHandles.emplace_front(PromOp); continue; } SDValue RepValue = PromOp.getOperand(0); if (isa(RepValue)) RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue); DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue); continue; } unsigned C; switch (PromOp.getOpcode()) { default: C = 0; break; case ISD::SELECT: C = 1; break; case ISD::SELECT_CC: C = 2; break; } if ((!isa(PromOp.getOperand(C)) && PromOp.getOperand(C).getValueType() != MVT::i1) || (!isa(PromOp.getOperand(C+1)) && PromOp.getOperand(C+1).getValueType() != MVT::i1)) { // The to-be-promoted operands of this node have not yet been // promoted (this should be rare because we're going through the // list backward, but if one of the operands has several users in // this cluster of to-be-promoted nodes, it is possible). PromOpHandles.emplace_front(PromOp); continue; } SmallVector Ops(PromOp.getNode()->op_begin(), PromOp.getNode()->op_end()); // If there are any constant inputs, make sure they're replaced now. for (unsigned i = 0; i < 2; ++i) if (isa(Ops[C+i])) Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]); DAG.ReplaceAllUsesOfValueWith(PromOp, DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops)); } // Now we're left with the initial truncation itself. if (N->getOpcode() == ISD::TRUNCATE) return N->getOperand(0); // Otherwise, this is a comparison. The operands to be compared have just // changed type (to i1), but everything else is the same. return SDValue(N, 0); } SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); // If we're tracking CR bits, we need to be careful that we don't have: // zext(binary-ops(trunc(x), trunc(y))) // or // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...) // such that we're unnecessarily moving things into CR bits that can more // efficiently stay in GPRs. Note that if we're not certain that the high // bits are set as required by the final extension, we still may need to do // some masking to get the proper behavior. // This same functionality is important on PPC64 when dealing with // 32-to-64-bit extensions; these occur often when 32-bit values are used as // the return values of functions. Because it is so similar, it is handled // here as well. if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) return SDValue(); if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) || (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64()))) return SDValue(); if (N->getOperand(0).getOpcode() != ISD::AND && N->getOperand(0).getOpcode() != ISD::OR && N->getOperand(0).getOpcode() != ISD::XOR && N->getOperand(0).getOpcode() != ISD::SELECT && N->getOperand(0).getOpcode() != ISD::SELECT_CC) return SDValue(); SmallVector Inputs; SmallVector BinOps(1, N->getOperand(0)), PromOps; SmallPtrSet Visited; // Visit all inputs, collect all binary operations (and, or, xor and // select) that are all fed by truncations. while (!BinOps.empty()) { SDValue BinOp = BinOps.back(); BinOps.pop_back(); if (!Visited.insert(BinOp.getNode()).second) continue; PromOps.push_back(BinOp); for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { // The condition of the select is not promoted. if (BinOp.getOpcode() == ISD::SELECT && i == 0) continue; if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) continue; if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || isa(BinOp.getOperand(i))) { Inputs.push_back(BinOp.getOperand(i)); } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || BinOp.getOperand(i).getOpcode() == ISD::OR || BinOp.getOperand(i).getOpcode() == ISD::XOR || BinOp.getOperand(i).getOpcode() == ISD::SELECT || BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) { BinOps.push_back(BinOp.getOperand(i)); } else { // We have an input that is not a truncation or another binary // operation; we'll abort this transformation. return SDValue(); } } } // The operands of a select that must be truncated when the select is // promoted because the operand is actually part of the to-be-promoted set. DenseMap SelectTruncOp[2]; // Make sure that this is a self-contained cluster of operations (which // is not quite the same thing as saying that everything has only one // use). for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { if (isa(Inputs[i])) continue; for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), UE = Inputs[i].getNode()->use_end(); UI != UE; ++UI) { SDNode *User = *UI; if (User != N && !Visited.count(User)) return SDValue(); // If we're going to promote the non-output-value operand(s) or SELECT or // SELECT_CC, record them for truncation. if (User->getOpcode() == ISD::SELECT) { if (User->getOperand(0) == Inputs[i]) SelectTruncOp[0].insert(std::make_pair(User, User->getOperand(0).getValueType())); } else if (User->getOpcode() == ISD::SELECT_CC) { if (User->getOperand(0) == Inputs[i]) SelectTruncOp[0].insert(std::make_pair(User, User->getOperand(0).getValueType())); if (User->getOperand(1) == Inputs[i]) SelectTruncOp[1].insert(std::make_pair(User, User->getOperand(1).getValueType())); } } } for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), UE = PromOps[i].getNode()->use_end(); UI != UE; ++UI) { SDNode *User = *UI; if (User != N && !Visited.count(User)) return SDValue(); // If we're going to promote the non-output-value operand(s) or SELECT or // SELECT_CC, record them for truncation. if (User->getOpcode() == ISD::SELECT) { if (User->getOperand(0) == PromOps[i]) SelectTruncOp[0].insert(std::make_pair(User, User->getOperand(0).getValueType())); } else if (User->getOpcode() == ISD::SELECT_CC) { if (User->getOperand(0) == PromOps[i]) SelectTruncOp[0].insert(std::make_pair(User, User->getOperand(0).getValueType())); if (User->getOperand(1) == PromOps[i]) SelectTruncOp[1].insert(std::make_pair(User, User->getOperand(1).getValueType())); } } } unsigned PromBits = N->getOperand(0).getValueSizeInBits(); bool ReallyNeedsExt = false; if (N->getOpcode() != ISD::ANY_EXTEND) { // If all of the inputs are not already sign/zero extended, then // we'll still need to do that at the end. for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { if (isa(Inputs[i])) continue; unsigned OpBits = Inputs[i].getOperand(0).getValueSizeInBits(); assert(PromBits < OpBits && "Truncation not to a smaller bit count?"); if ((N->getOpcode() == ISD::ZERO_EXTEND && !DAG.MaskedValueIsZero(Inputs[i].getOperand(0), APInt::getHighBitsSet(OpBits, OpBits-PromBits))) || (N->getOpcode() == ISD::SIGN_EXTEND && DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) < (OpBits-(PromBits-1)))) { ReallyNeedsExt = true; break; } } } // Replace all inputs, either with the truncation operand, or a // truncation or extension to the final output type. for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { // Constant inputs need to be replaced with the to-be-promoted nodes that // use them because they might have users outside of the cluster of // promoted nodes. if (isa(Inputs[i])) continue; SDValue InSrc = Inputs[i].getOperand(0); if (Inputs[i].getValueType() == N->getValueType(0)) DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc); else if (N->getOpcode() == ISD::SIGN_EXTEND) DAG.ReplaceAllUsesOfValueWith(Inputs[i], DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0))); else if (N->getOpcode() == ISD::ZERO_EXTEND) DAG.ReplaceAllUsesOfValueWith(Inputs[i], DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0))); else DAG.ReplaceAllUsesOfValueWith(Inputs[i], DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0))); } std::list PromOpHandles; for (auto &PromOp : PromOps) PromOpHandles.emplace_back(PromOp); // Replace all operations (these are all the same, but have a different // (promoted) return type). DAG.getNode will validate that the types of // a binary operator match, so go through the list in reverse so that // we've likely promoted both operands first. while (!PromOpHandles.empty()) { SDValue PromOp = PromOpHandles.back().getValue(); PromOpHandles.pop_back(); unsigned C; switch (PromOp.getOpcode()) { default: C = 0; break; case ISD::SELECT: C = 1; break; case ISD::SELECT_CC: C = 2; break; } if ((!isa(PromOp.getOperand(C)) && PromOp.getOperand(C).getValueType() != N->getValueType(0)) || (!isa(PromOp.getOperand(C+1)) && PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) { // The to-be-promoted operands of this node have not yet been // promoted (this should be rare because we're going through the // list backward, but if one of the operands has several users in // this cluster of to-be-promoted nodes, it is possible). PromOpHandles.emplace_front(PromOp); continue; } // For SELECT and SELECT_CC nodes, we do a similar check for any // to-be-promoted comparison inputs. if (PromOp.getOpcode() == ISD::SELECT || PromOp.getOpcode() == ISD::SELECT_CC) { if ((SelectTruncOp[0].count(PromOp.getNode()) && PromOp.getOperand(0).getValueType() != N->getValueType(0)) || (SelectTruncOp[1].count(PromOp.getNode()) && PromOp.getOperand(1).getValueType() != N->getValueType(0))) { PromOpHandles.emplace_front(PromOp); continue; } } SmallVector Ops(PromOp.getNode()->op_begin(), PromOp.getNode()->op_end()); // If this node has constant inputs, then they'll need to be promoted here. for (unsigned i = 0; i < 2; ++i) { if (!isa(Ops[C+i])) continue; if (Ops[C+i].getValueType() == N->getValueType(0)) continue; if (N->getOpcode() == ISD::SIGN_EXTEND) Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); else if (N->getOpcode() == ISD::ZERO_EXTEND) Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); else Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); } // If we've promoted the comparison inputs of a SELECT or SELECT_CC, // truncate them again to the original value type. if (PromOp.getOpcode() == ISD::SELECT || PromOp.getOpcode() == ISD::SELECT_CC) { auto SI0 = SelectTruncOp[0].find(PromOp.getNode()); if (SI0 != SelectTruncOp[0].end()) Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]); auto SI1 = SelectTruncOp[1].find(PromOp.getNode()); if (SI1 != SelectTruncOp[1].end()) Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]); } DAG.ReplaceAllUsesOfValueWith(PromOp, DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops)); } // Now we're left with the initial extension itself. if (!ReallyNeedsExt) return N->getOperand(0); // To zero extend, just mask off everything except for the first bit (in the // i1 case). if (N->getOpcode() == ISD::ZERO_EXTEND) return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0), DAG.getConstant(APInt::getLowBitsSet( N->getValueSizeInBits(0), PromBits), dl, N->getValueType(0))); assert(N->getOpcode() == ISD::SIGN_EXTEND && "Invalid extension type"); EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout()); SDValue ShiftCst = DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy); return DAG.getNode( ISD::SRA, dl, N->getValueType(0), DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst), ShiftCst); } SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const { assert(N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"); SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); if (N->getValueType(0) != MVT::v2f64 || !Subtarget.hasVSX()) return SDValue(); // Looking for: // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1)) if (N->getOperand(0).getOpcode() != ISD::SINT_TO_FP && N->getOperand(0).getOpcode() != ISD::UINT_TO_FP) return SDValue(); if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP && N->getOperand(1).getOpcode() != ISD::UINT_TO_FP) return SDValue(); if (N->getOperand(0).getOpcode() != N->getOperand(1).getOpcode()) return SDValue(); SDValue Ext1 = N->getOperand(0).getOperand(0); SDValue Ext2 = N->getOperand(1).getOperand(0); if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT || Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT) return SDValue(); ConstantSDNode *Ext1Op = dyn_cast(Ext1.getOperand(1)); ConstantSDNode *Ext2Op = dyn_cast(Ext2.getOperand(1)); if (!Ext1Op || !Ext2Op) return SDValue(); if (Ext1.getValueType() != MVT::i32 || Ext2.getValueType() != MVT::i32) if (Ext1.getOperand(0) != Ext2.getOperand(0)) return SDValue(); int FirstElem = Ext1Op->getZExtValue(); int SecondElem = Ext2Op->getZExtValue(); int SubvecIdx; if (FirstElem == 0 && SecondElem == 1) SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0; else if (FirstElem == 2 && SecondElem == 3) SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1; else return SDValue(); SDValue SrcVec = Ext1.getOperand(0); auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ? PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP; return DAG.getNode(NodeType, dl, MVT::v2f64, SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl)); } SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const { assert((N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() == ISD::UINT_TO_FP) && "Need an int -> FP conversion node here"); - if (!Subtarget.has64BitSupport()) + if (useSoftFloat() || !Subtarget.has64BitSupport()) return SDValue(); SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); SDValue Op(N, 0); // Don't handle ppc_fp128 here or i1 conversions. if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) return SDValue(); if (Op.getOperand(0).getValueType() == MVT::i1) return SDValue(); // For i32 intermediate values, unfortunately, the conversion functions // leave the upper 32 bits of the value are undefined. Within the set of // scalar instructions, we have no method for zero- or sign-extending the // value. Thus, we cannot handle i32 intermediate values here. if (Op.getOperand(0).getValueType() == MVT::i32) return SDValue(); assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && "UINT_TO_FP is supported only with FPCVT"); // If we have FCFIDS, then use it when converting to single-precision. // Otherwise, convert to double-precision and then round. unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS : PPCISD::FCFIDS) : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU : PPCISD::FCFID); MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? MVT::f32 : MVT::f64; // If we're converting from a float, to an int, and back to a float again, // then we don't need the store/load pair at all. if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT && Subtarget.hasFPCVT()) || (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) { SDValue Src = Op.getOperand(0).getOperand(0); if (Src.getValueType() == MVT::f32) { Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); DCI.AddToWorklist(Src.getNode()); } else if (Src.getValueType() != MVT::f64) { // Make sure that we don't pick up a ppc_fp128 source value. return SDValue(); } unsigned FCTOp = Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ; SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src); SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp); if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) { FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0, dl)); DCI.AddToWorklist(FP.getNode()); } return FP; } return SDValue(); } // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for // builtins) into loads with swaps. SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); SDValue Chain; SDValue Base; MachineMemOperand *MMO; switch (N->getOpcode()) { default: llvm_unreachable("Unexpected opcode for little endian VSX load"); case ISD::LOAD: { LoadSDNode *LD = cast(N); Chain = LD->getChain(); Base = LD->getBasePtr(); MMO = LD->getMemOperand(); // If the MMO suggests this isn't a load of a full vector, leave // things alone. For a built-in, we have to make the change for // correctness, so if there is a size problem that will be a bug. if (MMO->getSize() < 16) return SDValue(); break; } case ISD::INTRINSIC_W_CHAIN: { MemIntrinsicSDNode *Intrin = cast(N); Chain = Intrin->getChain(); // Similarly to the store case below, Intrin->getBasePtr() doesn't get // us what we want. Get operand 2 instead. Base = Intrin->getOperand(2); MMO = Intrin->getMemOperand(); break; } } MVT VecTy = N->getValueType(0).getSimpleVT(); SDValue LoadOps[] = { Chain, Base }; SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl, DAG.getVTList(MVT::v2f64, MVT::Other), LoadOps, MVT::v2f64, MMO); DCI.AddToWorklist(Load.getNode()); Chain = Load.getValue(1); SDValue Swap = DAG.getNode( PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load); DCI.AddToWorklist(Swap.getNode()); // Add a bitcast if the resulting load type doesn't match v2f64. if (VecTy != MVT::v2f64) { SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap); DCI.AddToWorklist(N.getNode()); // Package {bitcast value, swap's chain} to match Load's shape. return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other), N, Swap.getValue(1)); } return Swap; } // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for // builtins) into stores with swaps. SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); SDValue Chain; SDValue Base; unsigned SrcOpnd; MachineMemOperand *MMO; switch (N->getOpcode()) { default: llvm_unreachable("Unexpected opcode for little endian VSX store"); case ISD::STORE: { StoreSDNode *ST = cast(N); Chain = ST->getChain(); Base = ST->getBasePtr(); MMO = ST->getMemOperand(); SrcOpnd = 1; // If the MMO suggests this isn't a store of a full vector, leave // things alone. For a built-in, we have to make the change for // correctness, so if there is a size problem that will be a bug. if (MMO->getSize() < 16) return SDValue(); break; } case ISD::INTRINSIC_VOID: { MemIntrinsicSDNode *Intrin = cast(N); Chain = Intrin->getChain(); // Intrin->getBasePtr() oddly does not get what we want. Base = Intrin->getOperand(3); MMO = Intrin->getMemOperand(); SrcOpnd = 2; break; } } SDValue Src = N->getOperand(SrcOpnd); MVT VecTy = Src.getValueType().getSimpleVT(); // All stores are done as v2f64 and possible bit cast. if (VecTy != MVT::v2f64) { Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src); DCI.AddToWorklist(Src.getNode()); } SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src); DCI.AddToWorklist(Swap.getNode()); Chain = Swap.getValue(1); SDValue StoreOps[] = { Chain, Swap, Base }; SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl, DAG.getVTList(MVT::Other), StoreOps, VecTy, MMO); DCI.AddToWorklist(Store.getNode()); return Store; } SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); switch (N->getOpcode()) { default: break; case PPCISD::SHL: if (isNullConstant(N->getOperand(0))) // 0 << V -> 0. return N->getOperand(0); break; case PPCISD::SRL: if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0. return N->getOperand(0); break; case PPCISD::SRA: if (ConstantSDNode *C = dyn_cast(N->getOperand(0))) { if (C->isNullValue() || // 0 >>s V -> 0. C->isAllOnesValue()) // -1 >>s V -> -1. return N->getOperand(0); } break; case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: return DAGCombineExtBoolTrunc(N, DCI); case ISD::TRUNCATE: case ISD::SETCC: case ISD::SELECT_CC: return DAGCombineTruncBoolExt(N, DCI); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return combineFPToIntToFP(N, DCI); case ISD::STORE: { // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). if (Subtarget.hasSTFIWX() && !cast(N)->isTruncatingStore() && N->getOperand(1).getOpcode() == ISD::FP_TO_SINT && N->getOperand(1).getValueType() == MVT::i32 && N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) { SDValue Val = N->getOperand(1).getOperand(0); if (Val.getValueType() == MVT::f32) { Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); DCI.AddToWorklist(Val.getNode()); } Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val); DCI.AddToWorklist(Val.getNode()); SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2), DAG.getValueType(N->getOperand(1).getValueType()) }; Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, DAG.getVTList(MVT::Other), Ops, cast(N)->getMemoryVT(), cast(N)->getMemOperand()); DCI.AddToWorklist(Val.getNode()); return Val; } // Turn STORE (BSWAP) -> sthbrx/stwbrx. if (cast(N)->isUnindexed() && N->getOperand(1).getOpcode() == ISD::BSWAP && N->getOperand(1).getNode()->hasOneUse() && (N->getOperand(1).getValueType() == MVT::i32 || N->getOperand(1).getValueType() == MVT::i16 || (Subtarget.hasLDBRX() && Subtarget.isPPC64() && N->getOperand(1).getValueType() == MVT::i64))) { SDValue BSwapOp = N->getOperand(1).getOperand(0); // Do an any-extend to 32-bits if this is a half-word input. if (BSwapOp.getValueType() == MVT::i16) BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp); SDValue Ops[] = { N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(N->getOperand(1).getValueType()) }; return DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other), Ops, cast(N)->getMemoryVT(), cast(N)->getMemOperand()); } // For little endian, VSX stores require generating xxswapd/lxvd2x. EVT VT = N->getOperand(1).getValueType(); if (VT.isSimple()) { MVT StoreVT = VT.getSimpleVT(); if (Subtarget.hasVSX() && Subtarget.isLittleEndian() && (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 || StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32)) return expandVSXStoreForLE(N, DCI); } break; } case ISD::LOAD: { LoadSDNode *LD = cast(N); EVT VT = LD->getValueType(0); // For little endian, VSX loads require generating lxvd2x/xxswapd. if (VT.isSimple()) { MVT LoadVT = VT.getSimpleVT(); if (Subtarget.hasVSX() && Subtarget.isLittleEndian() && (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 || LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32)) return expandVSXLoadForLE(N, DCI); } // We sometimes end up with a 64-bit integer load, from which we extract // two single-precision floating-point numbers. This happens with // std::complex, and other similar structures, because of the way we // canonicalize structure copies. However, if we lack direct moves, // then the final bitcasts from the extracted integer values to the // floating-point numbers turn into store/load pairs. Even with direct moves, // just loading the two floating-point numbers is likely better. auto ReplaceTwoFloatLoad = [&]() { if (VT != MVT::i64) return false; if (LD->getExtensionType() != ISD::NON_EXTLOAD || LD->isVolatile()) return false; // We're looking for a sequence like this: // t13: i64,ch = load t0, t6, undef:i64 // t16: i64 = srl t13, Constant:i32<32> // t17: i32 = truncate t16 // t18: f32 = bitcast t17 // t19: i32 = truncate t13 // t20: f32 = bitcast t19 if (!LD->hasNUsesOfValue(2, 0)) return false; auto UI = LD->use_begin(); while (UI.getUse().getResNo() != 0) ++UI; SDNode *Trunc = *UI++; while (UI.getUse().getResNo() != 0) ++UI; SDNode *RightShift = *UI; if (Trunc->getOpcode() != ISD::TRUNCATE) std::swap(Trunc, RightShift); if (Trunc->getOpcode() != ISD::TRUNCATE || Trunc->getValueType(0) != MVT::i32 || !Trunc->hasOneUse()) return false; if (RightShift->getOpcode() != ISD::SRL || !isa(RightShift->getOperand(1)) || RightShift->getConstantOperandVal(1) != 32 || !RightShift->hasOneUse()) return false; SDNode *Trunc2 = *RightShift->use_begin(); if (Trunc2->getOpcode() != ISD::TRUNCATE || Trunc2->getValueType(0) != MVT::i32 || !Trunc2->hasOneUse()) return false; SDNode *Bitcast = *Trunc->use_begin(); SDNode *Bitcast2 = *Trunc2->use_begin(); if (Bitcast->getOpcode() != ISD::BITCAST || Bitcast->getValueType(0) != MVT::f32) return false; if (Bitcast2->getOpcode() != ISD::BITCAST || Bitcast2->getValueType(0) != MVT::f32) return false; if (Subtarget.isLittleEndian()) std::swap(Bitcast, Bitcast2); // Bitcast has the second float (in memory-layout order) and Bitcast2 // has the first one. SDValue BasePtr = LD->getBasePtr(); if (LD->isIndexed()) { assert(LD->getAddressingMode() == ISD::PRE_INC && "Non-pre-inc AM on PPC?"); BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, LD->getOffset()); } auto MMOFlags = LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile; SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr, LD->getPointerInfo(), LD->getAlignment(), MMOFlags, LD->getAAInfo()); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, DAG.getIntPtrConstant(4, dl)); SDValue FloatLoad2 = DAG.getLoad( MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr, LD->getPointerInfo().getWithOffset(4), MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo()); if (LD->isIndexed()) { // Note that DAGCombine should re-form any pre-increment load(s) from // what is produced here if that makes sense. DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr); } DCI.CombineTo(Bitcast2, FloatLoad); DCI.CombineTo(Bitcast, FloatLoad2); DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1), SDValue(FloatLoad2.getNode(), 1)); return true; }; if (ReplaceTwoFloatLoad()) return SDValue(N, 0); EVT MemVT = LD->getMemoryVT(); Type *Ty = MemVT.getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty); Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext()); unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy); if (LD->isUnindexed() && VT.isVector() && ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) && // P8 and later hardware should just use LOAD. !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v4f32)) || (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) && LD->getAlignment() >= ScalarABIAlignment)) && LD->getAlignment() < ABIAlignment) { // This is a type-legal unaligned Altivec or QPX load. SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); bool isLittleEndian = Subtarget.isLittleEndian(); // This implements the loading of unaligned vectors as described in // the venerable Apple Velocity Engine overview. Specifically: // https://developer.apple.com/hardwaredrivers/ve/alignment.html // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html // // The general idea is to expand a sequence of one or more unaligned // loads into an alignment-based permutation-control instruction (lvsl // or lvsr), a series of regular vector loads (which always truncate // their input address to an aligned address), and a series of // permutations. The results of these permutations are the requested // loaded values. The trick is that the last "extra" load is not taken // from the address you might suspect (sizeof(vector) bytes after the // last requested load), but rather sizeof(vector) - 1 bytes after the // last requested vector. The point of this is to avoid a page fault if // the base address happened to be aligned. This works because if the // base address is aligned, then adding less than a full vector length // will cause the last vector in the sequence to be (re)loaded. // Otherwise, the next vector will be fetched as you might suspect was // necessary. // We might be able to reuse the permutation generation from // a different base address offset from this one by an aligned amount. // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this // optimization later. Intrinsic::ID Intr, IntrLD, IntrPerm; MVT PermCntlTy, PermTy, LDTy; if (Subtarget.hasAltivec()) { Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr : Intrinsic::ppc_altivec_lvsl; IntrLD = Intrinsic::ppc_altivec_lvx; IntrPerm = Intrinsic::ppc_altivec_vperm; PermCntlTy = MVT::v16i8; PermTy = MVT::v4i32; LDTy = MVT::v4i32; } else { Intr = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld : Intrinsic::ppc_qpx_qvlpcls; IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd : Intrinsic::ppc_qpx_qvlfs; IntrPerm = Intrinsic::ppc_qpx_qvfperm; PermCntlTy = MVT::v4f64; PermTy = MVT::v4f64; LDTy = MemVT.getSimpleVT(); } SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy); // Create the new MMO for the new base load. It is like the original MMO, // but represents an area in memory almost twice the vector size centered // on the original address. If the address is unaligned, we might start // reading up to (sizeof(vector)-1) bytes below the address of the // original unaligned load. MachineFunction &MF = DAG.getMachineFunction(); MachineMemOperand *BaseMMO = MF.getMachineMemOperand(LD->getMemOperand(), -(long)MemVT.getStoreSize()+1, 2*MemVT.getStoreSize()-1); // Create the new base load. SDValue LDXIntID = DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout())); SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr }; SDValue BaseLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, DAG.getVTList(PermTy, MVT::Other), BaseLoadOps, LDTy, BaseMMO); // Note that the value of IncOffset (which is provided to the next // load's pointer info offset value, and thus used to calculate the // alignment), and the value of IncValue (which is actually used to // increment the pointer value) are different! This is because we // require the next load to appear to be aligned, even though it // is actually offset from the base pointer by a lesser amount. int IncOffset = VT.getSizeInBits() / 8; int IncValue = IncOffset; // Walk (both up and down) the chain looking for another load at the real // (aligned) offset (the alignment of the other load does not matter in // this case). If found, then do not use the offset reduction trick, as // that will prevent the loads from being later combined (as they would // otherwise be duplicates). if (!findConsecutiveLoad(LD, DAG)) --IncValue; SDValue Increment = DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout())); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); MachineMemOperand *ExtraMMO = MF.getMachineMemOperand(LD->getMemOperand(), 1, 2*MemVT.getStoreSize()-1); SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr }; SDValue ExtraLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, DAG.getVTList(PermTy, MVT::Other), ExtraLoadOps, LDTy, ExtraMMO); SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, BaseLoad.getValue(1), ExtraLoad.getValue(1)); // Because vperm has a big-endian bias, we must reverse the order // of the input vectors and complement the permute control vector // when generating little endian code. We have already handled the // latter by using lvsr instead of lvsl, so just reverse BaseLoad // and ExtraLoad here. SDValue Perm; if (isLittleEndian) Perm = BuildIntrinsicOp(IntrPerm, ExtraLoad, BaseLoad, PermCntl, DAG, dl); else Perm = BuildIntrinsicOp(IntrPerm, BaseLoad, ExtraLoad, PermCntl, DAG, dl); if (VT != PermTy) Perm = Subtarget.hasAltivec() ? DAG.getNode(ISD::BITCAST, dl, VT, Perm) : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX DAG.getTargetConstant(1, dl, MVT::i64)); // second argument is 1 because this rounding // is always exact. // The output of the permutation is our loaded result, the TokenFactor is // our new chain. DCI.CombineTo(N, Perm, TF); return SDValue(N, 0); } } break; case ISD::INTRINSIC_WO_CHAIN: { bool isLittleEndian = Subtarget.isLittleEndian(); unsigned IID = cast(N->getOperand(0))->getZExtValue(); Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr : Intrinsic::ppc_altivec_lvsl); if ((IID == Intr || IID == Intrinsic::ppc_qpx_qvlpcld || IID == Intrinsic::ppc_qpx_qvlpcls) && N->getOperand(1)->getOpcode() == ISD::ADD) { SDValue Add = N->getOperand(1); int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ? 5 /* 32 byte alignment */ : 4 /* 16 byte alignment */; if (DAG.MaskedValueIsZero( Add->getOperand(1), APInt::getAllOnesValue(Bits /* alignment */) .zext( Add.getValueType().getScalarType().getSizeInBits()))) { SDNode *BasePtr = Add->getOperand(0).getNode(); for (SDNode::use_iterator UI = BasePtr->use_begin(), UE = BasePtr->use_end(); UI != UE; ++UI) { if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && cast(UI->getOperand(0))->getZExtValue() == IID) { // We've found another LVSL/LVSR, and this address is an aligned // multiple of that one. The results will be the same, so use the // one we've just found instead. return SDValue(*UI, 0); } } } if (isa(Add->getOperand(1))) { SDNode *BasePtr = Add->getOperand(0).getNode(); for (SDNode::use_iterator UI = BasePtr->use_begin(), UE = BasePtr->use_end(); UI != UE; ++UI) { if (UI->getOpcode() == ISD::ADD && isa(UI->getOperand(1)) && (cast(Add->getOperand(1))->getZExtValue() - cast(UI->getOperand(1))->getZExtValue()) % (1ULL << Bits) == 0) { SDNode *OtherAdd = *UI; for (SDNode::use_iterator VI = OtherAdd->use_begin(), VE = OtherAdd->use_end(); VI != VE; ++VI) { if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && cast(VI->getOperand(0))->getZExtValue() == IID) { return SDValue(*VI, 0); } } } } } } } break; case ISD::INTRINSIC_W_CHAIN: { // For little endian, VSX loads require generating lxvd2x/xxswapd. if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) { switch (cast(N->getOperand(1))->getZExtValue()) { default: break; case Intrinsic::ppc_vsx_lxvw4x: case Intrinsic::ppc_vsx_lxvd2x: return expandVSXLoadForLE(N, DCI); } } break; } case ISD::INTRINSIC_VOID: { // For little endian, VSX stores require generating xxswapd/stxvd2x. if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) { switch (cast(N->getOperand(1))->getZExtValue()) { default: break; case Intrinsic::ppc_vsx_stxvw4x: case Intrinsic::ppc_vsx_stxvd2x: return expandVSXStoreForLE(N, DCI); } } break; } case ISD::BSWAP: // Turn BSWAP (LOAD) -> lhbrx/lwbrx. if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && N->getOperand(0).hasOneUse() && (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 || (Subtarget.hasLDBRX() && Subtarget.isPPC64() && N->getValueType(0) == MVT::i64))) { SDValue Load = N->getOperand(0); LoadSDNode *LD = cast(Load); // Create the byte-swapping load. SDValue Ops[] = { LD->getChain(), // Chain LD->getBasePtr(), // Ptr DAG.getValueType(N->getValueType(0)) // VT }; SDValue BSLoad = DAG.getMemIntrinsicNode(PPCISD::LBRX, dl, DAG.getVTList(N->getValueType(0) == MVT::i64 ? MVT::i64 : MVT::i32, MVT::Other), Ops, LD->getMemoryVT(), LD->getMemOperand()); // If this is an i16 load, insert the truncate. SDValue ResVal = BSLoad; if (N->getValueType(0) == MVT::i16) ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad); // First, combine the bswap away. This makes the value produced by the // load dead. DCI.CombineTo(N, ResVal); // Next, combine the load away, we give it a bogus result value but a real // chain result. The result value is dead because the bswap is dead. DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); // Return N so it doesn't get rechecked! return SDValue(N, 0); } break; case PPCISD::VCMP: { // If a VCMPo node already exists with exactly the same operands as this // node, use its result instead of this node (VCMPo computes both a CR6 and // a normal output). // if (!N->getOperand(0).hasOneUse() && !N->getOperand(1).hasOneUse() && !N->getOperand(2).hasOneUse()) { // Scan all of the users of the LHS, looking for VCMPo's that match. SDNode *VCMPoNode = nullptr; SDNode *LHSN = N->getOperand(0).getNode(); for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); UI != E; ++UI) if (UI->getOpcode() == PPCISD::VCMPo && UI->getOperand(1) == N->getOperand(1) && UI->getOperand(2) == N->getOperand(2) && UI->getOperand(0) == N->getOperand(0)) { VCMPoNode = *UI; break; } // If there is no VCMPo node, or if the flag value has a single use, don't // transform this. if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1)) break; // Look at the (necessarily single) use of the flag value. If it has a // chain, this transformation is more complex. Note that multiple things // could use the value result, which we should ignore. SDNode *FlagUser = nullptr; for (SDNode::use_iterator UI = VCMPoNode->use_begin(); FlagUser == nullptr; ++UI) { assert(UI != VCMPoNode->use_end() && "Didn't find user!"); SDNode *User = *UI; for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { if (User->getOperand(i) == SDValue(VCMPoNode, 1)) { FlagUser = User; break; } } } // If the user is a MFOCRF instruction, we know this is safe. // Otherwise we give up for right now. if (FlagUser->getOpcode() == PPCISD::MFOCRF) return SDValue(VCMPoNode, 0); } break; } case ISD::BRCOND: { SDValue Cond = N->getOperand(1); SDValue Target = N->getOperand(2); if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN && cast(Cond.getOperand(1))->getZExtValue() == Intrinsic::ppc_is_decremented_ctr_nonzero) { // We now need to make the intrinsic dead (it cannot be instruction // selected). DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0)); assert(Cond.getNode()->hasOneUse() && "Counter decrement has more than one use"); return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other, N->getOperand(0), Target); } } break; case ISD::BR_CC: { // If this is a branch on an altivec predicate comparison, lower this so // that we don't have to do a MFOCRF: instead, branch directly on CR6. This // lowering is done pre-legalize, because the legalizer lowers the predicate // compare down to code that is difficult to reassemble. ISD::CondCode CC = cast(N->getOperand(1))->get(); SDValue LHS = N->getOperand(2), RHS = N->getOperand(3); // Sometimes the promoted value of the intrinsic is ANDed by some non-zero // value. If so, pass-through the AND to get to the intrinsic. if (LHS.getOpcode() == ISD::AND && LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN && cast(LHS.getOperand(0).getOperand(1))->getZExtValue() == Intrinsic::ppc_is_decremented_ctr_nonzero && isa(LHS.getOperand(1)) && !isNullConstant(LHS.getOperand(1))) LHS = LHS.getOperand(0); if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN && cast(LHS.getOperand(1))->getZExtValue() == Intrinsic::ppc_is_decremented_ctr_nonzero && isa(RHS)) { assert((CC == ISD::SETEQ || CC == ISD::SETNE) && "Counter decrement comparison is not EQ or NE"); unsigned Val = cast(RHS)->getZExtValue(); bool isBDNZ = (CC == ISD::SETEQ && Val) || (CC == ISD::SETNE && !Val); // We now need to make the intrinsic dead (it cannot be instruction // selected). DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0)); assert(LHS.getNode()->hasOneUse() && "Counter decrement has more than one use"); return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other, N->getOperand(0), N->getOperand(4)); } int CompareOpc; bool isDot; if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && isa(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) { assert(isDot && "Can't compare against a vector result!"); // If this is a comparison against something other than 0/1, then we know // that the condition is never/always true. unsigned Val = cast(RHS)->getZExtValue(); if (Val != 0 && Val != 1) { if (CC == ISD::SETEQ) // Cond never true, remove branch. return N->getOperand(0); // Always !=, turn it into an unconditional branch. return DAG.getNode(ISD::BR, dl, MVT::Other, N->getOperand(0), N->getOperand(4)); } bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); // Create the PPCISD altivec 'dot' comparison node. SDValue Ops[] = { LHS.getOperand(2), // LHS of compare LHS.getOperand(3), // RHS of compare DAG.getConstant(CompareOpc, dl, MVT::i32) }; EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue }; SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); // Unpack the result based on how the target uses it. PPC::Predicate CompOpc; switch (cast(LHS.getOperand(1))->getZExtValue()) { default: // Can't happen, don't crash on invalid number though. case 0: // Branch on the value of the EQ bit of CR6. CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE; break; case 1: // Branch on the inverted value of the EQ bit of CR6. CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ; break; case 2: // Branch on the value of the LT bit of CR6. CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE; break; case 3: // Branch on the inverted value of the LT bit of CR6. CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT; break; } return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0), DAG.getConstant(CompOpc, dl, MVT::i32), DAG.getRegister(PPC::CR6, MVT::i32), N->getOperand(4), CompNode.getValue(1)); } break; } case ISD::BUILD_VECTOR: return DAGCombineBuildVector(N, DCI); } return SDValue(); } SDValue PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, std::vector *Created) const { // fold (sdiv X, pow2) EVT VT = N->getValueType(0); if (VT == MVT::i64 && !Subtarget.isPPC64()) return SDValue(); if ((VT != MVT::i32 && VT != MVT::i64) || !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2())) return SDValue(); SDLoc DL(N); SDValue N0 = N->getOperand(0); bool IsNegPow2 = (-Divisor).isPowerOf2(); unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros(); SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT); SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt); if (Created) Created->push_back(Op.getNode()); if (IsNegPow2) { Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op); if (Created) Created->push_back(Op.getNode()); } return Op; } //===----------------------------------------------------------------------===// // Inline Assembly Support //===----------------------------------------------------------------------===// void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const { KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); switch (Op.getOpcode()) { default: break; case PPCISD::LBRX: { // lhbrx is known to have the top bits cleared out. if (cast(Op.getOperand(2))->getVT() == MVT::i16) KnownZero = 0xFFFF0000; break; } case ISD::INTRINSIC_WO_CHAIN: { switch (cast(Op.getOperand(0))->getZExtValue()) { default: break; case Intrinsic::ppc_altivec_vcmpbfp_p: case Intrinsic::ppc_altivec_vcmpeqfp_p: case Intrinsic::ppc_altivec_vcmpequb_p: case Intrinsic::ppc_altivec_vcmpequh_p: case Intrinsic::ppc_altivec_vcmpequw_p: case Intrinsic::ppc_altivec_vcmpequd_p: case Intrinsic::ppc_altivec_vcmpgefp_p: case Intrinsic::ppc_altivec_vcmpgtfp_p: case Intrinsic::ppc_altivec_vcmpgtsb_p: case Intrinsic::ppc_altivec_vcmpgtsh_p: case Intrinsic::ppc_altivec_vcmpgtsw_p: case Intrinsic::ppc_altivec_vcmpgtsd_p: case Intrinsic::ppc_altivec_vcmpgtub_p: case Intrinsic::ppc_altivec_vcmpgtuh_p: case Intrinsic::ppc_altivec_vcmpgtuw_p: case Intrinsic::ppc_altivec_vcmpgtud_p: KnownZero = ~1U; // All bits but the low one are known to be zero. break; } } } } unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { switch (Subtarget.getDarwinDirective()) { default: break; case PPC::DIR_970: case PPC::DIR_PWR4: case PPC::DIR_PWR5: case PPC::DIR_PWR5X: case PPC::DIR_PWR6: case PPC::DIR_PWR6X: case PPC::DIR_PWR7: case PPC::DIR_PWR8: case PPC::DIR_PWR9: { if (!ML) break; const PPCInstrInfo *TII = Subtarget.getInstrInfo(); // For small loops (between 5 and 8 instructions), align to a 32-byte // boundary so that the entire loop fits in one instruction-cache line. uint64_t LoopSize = 0; for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I) for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) { LoopSize += TII->GetInstSizeInBytes(*J); if (LoopSize > 32) break; } if (LoopSize > 16 && LoopSize <= 32) return 5; break; } } return TargetLowering::getPrefLoopAlignment(ML); } /// getConstraintType - Given a constraint, return the type of /// constraint it is for this target. PPCTargetLowering::ConstraintType PPCTargetLowering::getConstraintType(StringRef Constraint) const { if (Constraint.size() == 1) { switch (Constraint[0]) { default: break; case 'b': case 'r': case 'f': case 'd': case 'v': case 'y': return C_RegisterClass; case 'Z': // FIXME: While Z does indicate a memory constraint, it specifically // indicates an r+r address (used in conjunction with the 'y' modifier // in the replacement string). Currently, we're forcing the base // register to be r0 in the asm printer (which is interpreted as zero) // and forming the complete address in the second register. This is // suboptimal. return C_Memory; } } else if (Constraint == "wc") { // individual CR bits. return C_RegisterClass; } else if (Constraint == "wa" || Constraint == "wd" || Constraint == "wf" || Constraint == "ws") { return C_RegisterClass; // VSX registers. } return TargetLowering::getConstraintType(Constraint); } /// Examine constraint type and operand type and determine a weight value. /// This object must already have been set up with the operand type /// and the current alternative constraint selected. TargetLowering::ConstraintWeight PPCTargetLowering::getSingleConstraintMatchWeight( AsmOperandInfo &info, const char *constraint) const { ConstraintWeight weight = CW_Invalid; Value *CallOperandVal = info.CallOperandVal; // If we don't have a value, we can't do a match, // but allow it at the lowest weight. if (!CallOperandVal) return CW_Default; Type *type = CallOperandVal->getType(); // Look at the constraint type. if (StringRef(constraint) == "wc" && type->isIntegerTy(1)) return CW_Register; // an individual CR bit. else if ((StringRef(constraint) == "wa" || StringRef(constraint) == "wd" || StringRef(constraint) == "wf") && type->isVectorTy()) return CW_Register; else if (StringRef(constraint) == "ws" && type->isDoubleTy()) return CW_Register; switch (*constraint) { default: weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); break; case 'b': if (type->isIntegerTy()) weight = CW_Register; break; case 'f': if (type->isFloatTy()) weight = CW_Register; break; case 'd': if (type->isDoubleTy()) weight = CW_Register; break; case 'v': if (type->isVectorTy()) weight = CW_Register; break; case 'y': weight = CW_Register; break; case 'Z': weight = CW_Memory; break; } return weight; } std::pair PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { if (Constraint.size() == 1) { // GCC RS6000 Constraint Letters switch (Constraint[0]) { case 'b': // R1-R31 if (VT == MVT::i64 && Subtarget.isPPC64()) return std::make_pair(0U, &PPC::G8RC_NOX0RegClass); return std::make_pair(0U, &PPC::GPRC_NOR0RegClass); case 'r': // R0-R31 if (VT == MVT::i64 && Subtarget.isPPC64()) return std::make_pair(0U, &PPC::G8RCRegClass); return std::make_pair(0U, &PPC::GPRCRegClass); // 'd' and 'f' constraints are both defined to be "the floating point // registers", where one is for 32-bit and the other for 64-bit. We don't // really care overly much here so just give them all the same reg classes. case 'd': case 'f': if (VT == MVT::f32 || VT == MVT::i32) return std::make_pair(0U, &PPC::F4RCRegClass); if (VT == MVT::f64 || VT == MVT::i64) return std::make_pair(0U, &PPC::F8RCRegClass); if (VT == MVT::v4f64 && Subtarget.hasQPX()) return std::make_pair(0U, &PPC::QFRCRegClass); if (VT == MVT::v4f32 && Subtarget.hasQPX()) return std::make_pair(0U, &PPC::QSRCRegClass); break; case 'v': if (VT == MVT::v4f64 && Subtarget.hasQPX()) return std::make_pair(0U, &PPC::QFRCRegClass); if (VT == MVT::v4f32 && Subtarget.hasQPX()) return std::make_pair(0U, &PPC::QSRCRegClass); if (Subtarget.hasAltivec()) return std::make_pair(0U, &PPC::VRRCRegClass); case 'y': // crrc return std::make_pair(0U, &PPC::CRRCRegClass); } } else if (Constraint == "wc" && Subtarget.useCRBits()) { // An individual CR bit. return std::make_pair(0U, &PPC::CRBITRCRegClass); } else if ((Constraint == "wa" || Constraint == "wd" || Constraint == "wf") && Subtarget.hasVSX()) { return std::make_pair(0U, &PPC::VSRCRegClass); } else if (Constraint == "ws" && Subtarget.hasVSX()) { if (VT == MVT::f32 && Subtarget.hasP8Vector()) return std::make_pair(0U, &PPC::VSSRCRegClass); else return std::make_pair(0U, &PPC::VSFRCRegClass); } std::pair R = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers // (which we call X[0-9]+). If a 64-bit value has been requested, and a // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent // register. // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use // the AsmName field from *RegisterInfo.td, then this would not be necessary. if (R.first && VT == MVT::i64 && Subtarget.isPPC64() && PPC::GPRCRegClass.contains(R.first)) return std::make_pair(TRI->getMatchingSuperReg(R.first, PPC::sub_32, &PPC::G8RCRegClass), &PPC::G8RCRegClass); // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same. if (!R.second && StringRef("{cc}").equals_lower(Constraint)) { R.first = PPC::CR0; R.second = &PPC::CRRCRegClass; } return R; } /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector&Ops, SelectionDAG &DAG) const { SDValue Result; // Only support length 1 constraints. if (Constraint.length() > 1) return; char Letter = Constraint[0]; switch (Letter) { default: break; case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': { ConstantSDNode *CST = dyn_cast(Op); if (!CST) return; // Must be an immediate to match. SDLoc dl(Op); int64_t Value = CST->getSExtValue(); EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative // numbers are printed as such. switch (Letter) { default: llvm_unreachable("Unknown constraint letter!"); case 'I': // "I" is a signed 16-bit constant. if (isInt<16>(Value)) Result = DAG.getTargetConstant(Value, dl, TCVT); break; case 'J': // "J" is a constant with only the high-order 16 bits nonzero. if (isShiftedUInt<16, 16>(Value)) Result = DAG.getTargetConstant(Value, dl, TCVT); break; case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. if (isShiftedInt<16, 16>(Value)) Result = DAG.getTargetConstant(Value, dl, TCVT); break; case 'K': // "K" is a constant with only the low-order 16 bits nonzero. if (isUInt<16>(Value)) Result = DAG.getTargetConstant(Value, dl, TCVT); break; case 'M': // "M" is a constant that is greater than 31. if (Value > 31) Result = DAG.getTargetConstant(Value, dl, TCVT); break; case 'N': // "N" is a positive constant that is an exact power of two. if (Value > 0 && isPowerOf2_64(Value)) Result = DAG.getTargetConstant(Value, dl, TCVT); break; case 'O': // "O" is the constant zero. if (Value == 0) Result = DAG.getTargetConstant(Value, dl, TCVT); break; case 'P': // "P" is a constant whose negation is a signed 16-bit constant. if (isInt<16>(-Value)) Result = DAG.getTargetConstant(Value, dl, TCVT); break; } break; } } if (Result.getNode()) { Ops.push_back(Result); return; } // Handle standard constraint letters. TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } // isLegalAddressingMode - Return true if the addressing mode represented // by AM is legal for this target, for a load/store of the specified type. bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const { // PPC does not allow r+i addressing modes for vectors! if (Ty->isVectorTy() && AM.BaseOffs != 0) return false; // PPC allows a sign-extended 16-bit immediate field. if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) return false; // No global is ever allowed as a base. if (AM.BaseGV) return false; // PPC only support r+r, switch (AM.Scale) { case 0: // "r+i" or just "i", depending on HasBaseReg. break; case 1: if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. return false; // Otherwise we have r+r or r+i. break; case 2: if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. return false; // Allow 2*r as r+r. break; default: // No other scales are supported. return false; } return true; } SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setReturnAddressIsTaken(true); if (verifyReturnAddressArgumentIsConstant(Op, DAG)) return SDValue(); SDLoc dl(Op); unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); // Make sure the function does not optimize away the store of the RA to // the stack. PPCFunctionInfo *FuncInfo = MF.getInfo(); FuncInfo->setLRStoreRequired(); bool isPPC64 = Subtarget.isPPC64(); auto PtrVT = getPointerTy(MF.getDataLayout()); if (Depth > 0) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); SDValue Offset = DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl, isPPC64 ? MVT::i64 : MVT::i32); return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset), MachinePointerInfo()); } // Just load the return address off the stack. SDValue RetAddrFI = getReturnAddrFrameIndex(DAG); return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI, MachinePointerInfo()); } SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setFrameAddressIsTaken(true); EVT PtrVT = getPointerTy(MF.getDataLayout()); bool isPPC64 = PtrVT == MVT::i64; // Naked functions never have a frame pointer, and so we use r1. For all // other functions, this decision must be delayed until during PEI. unsigned FrameReg; if (MF.getFunction()->hasFnAttribute(Attribute::Naked)) FrameReg = isPPC64 ? PPC::X1 : PPC::R1; else FrameReg = isPPC64 ? PPC::FP8 : PPC::FP; SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, PtrVT); while (Depth--) FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(), FrameAddr, MachinePointerInfo()); return FrameAddr; } // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT, SelectionDAG &DAG) const { bool isPPC64 = Subtarget.isPPC64(); bool isDarwinABI = Subtarget.isDarwinABI(); if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) || (!isPPC64 && VT != MVT::i32)) report_fatal_error("Invalid register global variable type"); bool is64Bit = isPPC64 && VT == MVT::i64; unsigned Reg = StringSwitch(RegName) .Case("r1", is64Bit ? PPC::X1 : PPC::R1) .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2) .Case("r13", (!isPPC64 && isDarwinABI) ? 0 : (is64Bit ? PPC::X13 : PPC::R13)) .Default(0); if (Reg) return Reg; report_fatal_error("Invalid register name global variable"); } bool PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The PowerPC target isn't yet aware of offsets. return false; } bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const { switch (Intrinsic) { case Intrinsic::ppc_qpx_qvlfd: case Intrinsic::ppc_qpx_qvlfs: case Intrinsic::ppc_qpx_qvlfcd: case Intrinsic::ppc_qpx_qvlfcs: case Intrinsic::ppc_qpx_qvlfiwa: case Intrinsic::ppc_qpx_qvlfiwz: case Intrinsic::ppc_altivec_lvx: case Intrinsic::ppc_altivec_lvxl: case Intrinsic::ppc_altivec_lvebx: case Intrinsic::ppc_altivec_lvehx: case Intrinsic::ppc_altivec_lvewx: case Intrinsic::ppc_vsx_lxvd2x: case Intrinsic::ppc_vsx_lxvw4x: { EVT VT; switch (Intrinsic) { case Intrinsic::ppc_altivec_lvebx: VT = MVT::i8; break; case Intrinsic::ppc_altivec_lvehx: VT = MVT::i16; break; case Intrinsic::ppc_altivec_lvewx: VT = MVT::i32; break; case Intrinsic::ppc_vsx_lxvd2x: VT = MVT::v2f64; break; case Intrinsic::ppc_qpx_qvlfd: VT = MVT::v4f64; break; case Intrinsic::ppc_qpx_qvlfs: VT = MVT::v4f32; break; case Intrinsic::ppc_qpx_qvlfcd: VT = MVT::v2f64; break; case Intrinsic::ppc_qpx_qvlfcs: VT = MVT::v2f32; break; default: VT = MVT::v4i32; break; } Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = VT; Info.ptrVal = I.getArgOperand(0); Info.offset = -VT.getStoreSize()+1; Info.size = 2*VT.getStoreSize()-1; Info.align = 1; Info.vol = false; Info.readMem = true; Info.writeMem = false; return true; } case Intrinsic::ppc_qpx_qvlfda: case Intrinsic::ppc_qpx_qvlfsa: case Intrinsic::ppc_qpx_qvlfcda: case Intrinsic::ppc_qpx_qvlfcsa: case Intrinsic::ppc_qpx_qvlfiwaa: case Intrinsic::ppc_qpx_qvlfiwza: { EVT VT; switch (Intrinsic) { case Intrinsic::ppc_qpx_qvlfda: VT = MVT::v4f64; break; case Intrinsic::ppc_qpx_qvlfsa: VT = MVT::v4f32; break; case Intrinsic::ppc_qpx_qvlfcda: VT = MVT::v2f64; break; case Intrinsic::ppc_qpx_qvlfcsa: VT = MVT::v2f32; break; default: VT = MVT::v4i32; break; } Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = VT; Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.size = VT.getStoreSize(); Info.align = 1; Info.vol = false; Info.readMem = true; Info.writeMem = false; return true; } case Intrinsic::ppc_qpx_qvstfd: case Intrinsic::ppc_qpx_qvstfs: case Intrinsic::ppc_qpx_qvstfcd: case Intrinsic::ppc_qpx_qvstfcs: case Intrinsic::ppc_qpx_qvstfiw: case Intrinsic::ppc_altivec_stvx: case Intrinsic::ppc_altivec_stvxl: case Intrinsic::ppc_altivec_stvebx: case Intrinsic::ppc_altivec_stvehx: case Intrinsic::ppc_altivec_stvewx: case Intrinsic::ppc_vsx_stxvd2x: case Intrinsic::ppc_vsx_stxvw4x: { EVT VT; switch (Intrinsic) { case Intrinsic::ppc_altivec_stvebx: VT = MVT::i8; break; case Intrinsic::ppc_altivec_stvehx: VT = MVT::i16; break; case Intrinsic::ppc_altivec_stvewx: VT = MVT::i32; break; case Intrinsic::ppc_vsx_stxvd2x: VT = MVT::v2f64; break; case Intrinsic::ppc_qpx_qvstfd: VT = MVT::v4f64; break; case Intrinsic::ppc_qpx_qvstfs: VT = MVT::v4f32; break; case Intrinsic::ppc_qpx_qvstfcd: VT = MVT::v2f64; break; case Intrinsic::ppc_qpx_qvstfcs: VT = MVT::v2f32; break; default: VT = MVT::v4i32; break; } Info.opc = ISD::INTRINSIC_VOID; Info.memVT = VT; Info.ptrVal = I.getArgOperand(1); Info.offset = -VT.getStoreSize()+1; Info.size = 2*VT.getStoreSize()-1; Info.align = 1; Info.vol = false; Info.readMem = false; Info.writeMem = true; return true; } case Intrinsic::ppc_qpx_qvstfda: case Intrinsic::ppc_qpx_qvstfsa: case Intrinsic::ppc_qpx_qvstfcda: case Intrinsic::ppc_qpx_qvstfcsa: case Intrinsic::ppc_qpx_qvstfiwa: { EVT VT; switch (Intrinsic) { case Intrinsic::ppc_qpx_qvstfda: VT = MVT::v4f64; break; case Intrinsic::ppc_qpx_qvstfsa: VT = MVT::v4f32; break; case Intrinsic::ppc_qpx_qvstfcda: VT = MVT::v2f64; break; case Intrinsic::ppc_qpx_qvstfcsa: VT = MVT::v2f32; break; default: VT = MVT::v4i32; break; } Info.opc = ISD::INTRINSIC_VOID; Info.memVT = VT; Info.ptrVal = I.getArgOperand(1); Info.offset = 0; Info.size = VT.getStoreSize(); Info.align = 1; Info.vol = false; Info.readMem = false; Info.writeMem = true; return true; } default: break; } return false; } /// getOptimalMemOpType - Returns the target specific optimal type for load /// and store operations as a result of memset, memcpy, and memmove /// lowering. If DstAlign is zero that means it's safe to destination /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, /// probably because the source does not need to be loaded. If 'IsMemset' is /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy /// source is constant so it does not need to be loaded. /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { if (getTargetMachine().getOptLevel() != CodeGenOpt::None) { const Function *F = MF.getFunction(); // When expanding a memset, require at least two QPX instructions to cover // the cost of loading the value to be stored from the constant pool. if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) && (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) && !F->hasFnAttribute(Attribute::NoImplicitFloat)) { return MVT::v4f64; } // We should use Altivec/VSX loads and stores when available. For unaligned // addresses, unaligned VSX loads are only fast starting with the P8. if (Subtarget.hasAltivec() && Size >= 16 && (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) || ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector()))) return MVT::v4i32; } if (Subtarget.isPPC64()) { return MVT::i64; } return MVT::i32; } /// \brief Returns true if it is beneficial to convert a load of a constant /// to just the constant itself. bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); return !(BitSize == 0 || BitSize > 64); } bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) return false; unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); return NumBits1 == 64 && NumBits2 == 32; } bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { if (!VT1.isInteger() || !VT2.isInteger()) return false; unsigned NumBits1 = VT1.getSizeInBits(); unsigned NumBits2 = VT2.getSizeInBits(); return NumBits1 == 64 && NumBits2 == 32; } bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { // Generally speaking, zexts are not free, but they are free when they can be // folded with other operations. if (LoadSDNode *LD = dyn_cast(Val)) { EVT MemVT = LD->getMemoryVT(); if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 || (Subtarget.isPPC64() && MemVT == MVT::i32)) && (LD->getExtensionType() == ISD::NON_EXTLOAD || LD->getExtensionType() == ISD::ZEXTLOAD)) return true; } // FIXME: Add other cases... // - 32-bit shifts with a zext to i64 // - zext after ctlz, bswap, etc. // - zext after and by a constant mask return TargetLowering::isZExtFree(Val, VT2); } bool PPCTargetLowering::isFPExtFree(EVT VT) const { assert(VT.isFloatingPoint()); return true; } bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const { return isInt<16>(Imm) || isUInt<16>(Imm); } bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const { return isInt<16>(Imm) || isUInt<16>(Imm); } bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, unsigned, bool *Fast) const { if (DisablePPCUnaligned) return false; // PowerPC supports unaligned memory access for simple non-vector types. // Although accessing unaligned addresses is not as efficient as accessing // aligned addresses, it is generally more efficient than manual expansion, // and generally only traps for software emulation when crossing page // boundaries. if (!VT.isSimple()) return false; if (VT.getSimpleVT().isVector()) { if (Subtarget.hasVSX()) { if (VT != MVT::v2f64 && VT != MVT::v2i64 && VT != MVT::v4f32 && VT != MVT::v4i32) return false; } else { return false; } } if (VT == MVT::ppcf128) return false; if (Fast) *Fast = true; return true; } bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { VT = VT.getScalarType(); if (!VT.isSimple()) return false; switch (VT.getSimpleVT().SimpleTy) { case MVT::f32: case MVT::f64: return true; default: break; } return false; } const MCPhysReg * PPCTargetLowering::getScratchRegisters(CallingConv::ID) const { // LR is a callee-save register, but we must treat it as clobbered by any call // site. Hence we include LR in the scratch registers, which are in turn added // as implicit-defs for stackmaps and patchpoints. The same reasoning applies // to CTR, which is used by any indirect call. static const MCPhysReg ScratchRegs[] = { PPC::X12, PPC::LR8, PPC::CTR8, 0 }; return ScratchRegs; } unsigned PPCTargetLowering::getExceptionPointerRegister( const Constant *PersonalityFn) const { return Subtarget.isPPC64() ? PPC::X3 : PPC::R3; } unsigned PPCTargetLowering::getExceptionSelectorRegister( const Constant *PersonalityFn) const { return Subtarget.isPPC64() ? PPC::X4 : PPC::R4; } bool PPCTargetLowering::shouldExpandBuildVectorWithShuffles( EVT VT , unsigned DefinedValues) const { if (VT == MVT::v2i64) return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves if (Subtarget.hasVSX() || Subtarget.hasQPX()) return true; return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues); } Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { if (DisableILPPref || Subtarget.enableMachineScheduler()) return TargetLowering::getSchedulingPreference(N); return Sched::ILP; } // Create a fast isel object. FastISel * PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const { return PPC::createFastISel(FuncInfo, LibInfo); } void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { if (Subtarget.isDarwinABI()) return; if (!Subtarget.isPPC64()) return; // Update IsSplitCSR in PPCFunctionInfo PPCFunctionInfo *PFI = Entry->getParent()->getInfo(); PFI->setIsSplitCSR(true); } void PPCTargetLowering::insertCopiesSplitCSR( MachineBasicBlock *Entry, const SmallVectorImpl &Exits) const { const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent()); if (!IStart) return; const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); MachineBasicBlock::iterator MBBI = Entry->begin(); for (const MCPhysReg *I = IStart; *I; ++I) { const TargetRegisterClass *RC = nullptr; if (PPC::G8RCRegClass.contains(*I)) RC = &PPC::G8RCRegClass; else if (PPC::F8RCRegClass.contains(*I)) RC = &PPC::F8RCRegClass; else if (PPC::CRRCRegClass.contains(*I)) RC = &PPC::CRRCRegClass; else if (PPC::VRRCRegClass.contains(*I)) RC = &PPC::VRRCRegClass; else llvm_unreachable("Unexpected register class in CSRsViaCopy!"); unsigned NewVR = MRI->createVirtualRegister(RC); // Create copy from CSR to a virtual register. // FIXME: this currently does not emit CFI pseudo-instructions, it works // fine for CXX_FAST_TLS since the C++-style TLS access functions should be // nounwind. If we want to generalize this later, we may need to emit // CFI pseudo-instructions. assert(Entry->getParent()->getFunction()->hasFnAttribute( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"); Entry->addLiveIn(*I); BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) .addReg(*I); // Insert the copy-back instructions right before the terminator for (auto *Exit : Exits) BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), TII->get(TargetOpcode::COPY), *I) .addReg(NewVR); } } // Override to enable LOAD_STACK_GUARD lowering on Linux. bool PPCTargetLowering::useLoadStackGuardNode() const { if (!Subtarget.isTargetLinux()) return TargetLowering::useLoadStackGuardNode(); return true; } // Override to disable global variable loading on Linux. void PPCTargetLowering::insertSSPDeclarations(Module &M) const { if (!Subtarget.isTargetLinux()) return TargetLowering::insertSSPDeclarations(M); } Index: head/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp =================================================================== --- head/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp (revision 309148) +++ head/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp (revision 309149) @@ -1,252 +1,252 @@ //===-- PowerPCSubtarget.cpp - PPC Subtarget Information ------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements the PPC specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// #include "PPCSubtarget.h" #include "PPC.h" #include "PPCRegisterInfo.h" #include "PPCTargetMachine.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetMachine.h" #include using namespace llvm; #define DEBUG_TYPE "ppc-subtarget" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR #include "PPCGenSubtargetInfo.inc" static cl::opt UseSubRegLiveness("ppc-track-subreg-liveness", cl::desc("Enable subregister liveness tracking for PPC"), cl::Hidden); static cl::opt QPXStackUnaligned("qpx-stack-unaligned", cl::desc("Even when QPX is enabled the stack is not 32-byte aligned"), cl::Hidden); PPCSubtarget &PPCSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { initializeEnvironment(); initSubtargetFeatures(CPU, FS); return *this; } PPCSubtarget::PPCSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const PPCTargetMachine &TM) : PPCGenSubtargetInfo(TT, CPU, FS), TargetTriple(TT), IsPPC64(TargetTriple.getArch() == Triple::ppc64 || TargetTriple.getArch() == Triple::ppc64le), TM(TM), FrameLowering(initializeSubtargetDependencies(CPU, FS)), InstrInfo(*this), TLInfo(TM, *this) {} void PPCSubtarget::initializeEnvironment() { StackAlignment = 16; DarwinDirective = PPC::DIR_NONE; HasMFOCRF = false; Has64BitSupport = false; Use64BitRegs = false; UseCRBits = false; - UseSoftFloat = false; + HasHardFloat = false; HasAltivec = false; HasSPE = false; HasQPX = false; HasVSX = false; HasP8Vector = false; HasP8Altivec = false; HasP8Crypto = false; HasP9Vector = false; HasP9Altivec = false; HasFCPSGN = false; HasFSQRT = false; HasFRE = false; HasFRES = false; HasFRSQRTE = false; HasFRSQRTES = false; HasRecipPrec = false; HasSTFIWX = false; HasLFIWAX = false; HasFPRND = false; HasFPCVT = false; HasISEL = false; HasBPERMD = false; HasExtDiv = false; HasCMPB = false; HasLDBRX = false; IsBookE = false; HasOnlyMSYNC = false; IsPPC4xx = false; IsPPC6xx = false; IsE500 = false; FeatureMFTB = false; DeprecatedDST = false; HasLazyResolverStubs = false; HasICBT = false; HasInvariantFunctionDescriptors = false; HasPartwordAtomics = false; HasDirectMove = false; IsQPXStackUnaligned = false; HasHTM = false; HasFusion = false; HasFloat128 = false; IsISA3_0 = false; UseLongCalls = false; HasPOPCNTD = POPCNTD_Unavailable; } void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { // Determine default and user specified characteristics std::string CPUName = CPU; if (CPUName.empty() || CPU == "generic") { // If cross-compiling with -march=ppc64le without -mcpu if (TargetTriple.getArch() == Triple::ppc64le) CPUName = "ppc64le"; else CPUName = "generic"; } // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUName); // Parse features string. ParseSubtargetFeatures(CPUName, FS); // If the user requested use of 64-bit regs, but the cpu selected doesn't // support it, ignore. if (IsPPC64 && has64BitSupport()) Use64BitRegs = true; // Set up darwin-specific properties. if (isDarwin()) HasLazyResolverStubs = true; // QPX requires a 32-byte aligned stack. Note that we need to do this if // we're compiling for a BG/Q system regardless of whether or not QPX // is enabled because external functions will assume this alignment. IsQPXStackUnaligned = QPXStackUnaligned; StackAlignment = getPlatformStackAlignment(); // Determine endianness. // FIXME: Part of the TargetMachine. IsLittleEndian = (TargetTriple.getArch() == Triple::ppc64le); } /// Return true if accesses to the specified global have to go through a dyld /// lazy resolution stub. This means that an extra load is required to get the /// address of the global. bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV) const { if (!HasLazyResolverStubs) return false; if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) return true; // 32 bit macho has no relocation for a-b if a is undefined, even if b is in // the section that is being relocated. This means we have to use o load even // for GVs that are known to be local to the dso. if (GV->isDeclarationForLinker() || GV->hasCommonLinkage()) return true; return false; } // Embedded cores need aggressive scheduling (and some others also benefit). static bool needsAggressiveScheduling(unsigned Directive) { switch (Directive) { default: return false; case PPC::DIR_440: case PPC::DIR_A2: case PPC::DIR_E500mc: case PPC::DIR_E5500: case PPC::DIR_PWR7: case PPC::DIR_PWR8: // FIXME: Same as P8 until POWER9 scheduling info is available case PPC::DIR_PWR9: return true; } } bool PPCSubtarget::enableMachineScheduler() const { // Enable MI scheduling for the embedded cores. // FIXME: Enable this for all cores (some additional modeling // may be necessary). return needsAggressiveScheduling(DarwinDirective); } // This overrides the PostRAScheduler bit in the SchedModel for each CPU. bool PPCSubtarget::enablePostRAScheduler() const { return true; } PPCGenSubtargetInfo::AntiDepBreakMode PPCSubtarget::getAntiDepBreakMode() const { return TargetSubtargetInfo::ANTIDEP_ALL; } void PPCSubtarget::getCriticalPathRCs(RegClassVector &CriticalPathRCs) const { CriticalPathRCs.clear(); CriticalPathRCs.push_back(isPPC64() ? &PPC::G8RCRegClass : &PPC::GPRCRegClass); } void PPCSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const { if (needsAggressiveScheduling(DarwinDirective)) { Policy.OnlyTopDown = false; Policy.OnlyBottomUp = false; } // Spilling is generally expensive on all PPC cores, so always enable // register-pressure tracking. Policy.ShouldTrackPressure = true; } bool PPCSubtarget::useAA() const { // Use AA during code generation for the embedded cores. return needsAggressiveScheduling(DarwinDirective); } bool PPCSubtarget::enableSubRegLiveness() const { return UseSubRegLiveness; } unsigned char PPCSubtarget::classifyGlobalReference( const GlobalValue *GV) const { // Note that currently we don't generate non-pic references. // If a caller wants that, this will have to be updated. // Large code model always uses the TOC even for local symbols. if (TM.getCodeModel() == CodeModel::Large) return PPCII::MO_PIC_FLAG | PPCII::MO_NLP_FLAG; unsigned char flags = PPCII::MO_PIC_FLAG; // Only if the relocation mode is PIC do we have to worry about // interposition. In all other cases we can use a slightly looser standard to // decide how to access the symbol. if (TM.getRelocationModel() == Reloc::PIC_) { // If it's local, or it's non-default, it can't be interposed. if (!GV->hasLocalLinkage() && GV->hasDefaultVisibility()) { flags |= PPCII::MO_NLP_FLAG; } return flags; } if (GV->isStrongDefinitionForLinker()) return flags; return flags | PPCII::MO_NLP_FLAG; } bool PPCSubtarget::isELFv2ABI() const { return TM.isELFv2ABI(); } bool PPCSubtarget::isPPC64() const { return TM.isPPC64(); } Index: head/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h =================================================================== --- head/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h (revision 309148) +++ head/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h (revision 309149) @@ -1,319 +1,319 @@ //===-- PPCSubtarget.h - Define Subtarget for the PPC ----------*- C++ -*--===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file declares the PowerPC specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_POWERPC_PPCSUBTARGET_H #define LLVM_LIB_TARGET_POWERPC_PPCSUBTARGET_H #include "PPCFrameLowering.h" #include "PPCISelLowering.h" #include "PPCInstrInfo.h" #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetSubtargetInfo.h" #include #define GET_SUBTARGETINFO_HEADER #include "PPCGenSubtargetInfo.inc" // GCC #defines PPC on Linux but we use it as our namespace name #undef PPC namespace llvm { class StringRef; namespace PPC { // -m directive values. enum { DIR_NONE, DIR_32, DIR_440, DIR_601, DIR_602, DIR_603, DIR_7400, DIR_750, DIR_970, DIR_A2, DIR_E500mc, DIR_E5500, DIR_PWR3, DIR_PWR4, DIR_PWR5, DIR_PWR5X, DIR_PWR6, DIR_PWR6X, DIR_PWR7, DIR_PWR8, DIR_PWR9, DIR_64 }; } class GlobalValue; class TargetMachine; class PPCSubtarget : public PPCGenSubtargetInfo { public: enum POPCNTDKind { POPCNTD_Unavailable, POPCNTD_Slow, POPCNTD_Fast }; protected: /// TargetTriple - What processor and OS we're targeting. Triple TargetTriple; /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned StackAlignment; /// Selected instruction itineraries (one entry per itinerary class.) InstrItineraryData InstrItins; /// Which cpu directive was used. unsigned DarwinDirective; /// Used by the ISel to turn in optimizations for POWER4-derived architectures bool HasMFOCRF; bool Has64BitSupport; bool Use64BitRegs; bool UseCRBits; - bool UseSoftFloat; + bool HasHardFloat; bool IsPPC64; bool HasAltivec; bool HasSPE; bool HasQPX; bool HasVSX; bool HasP8Vector; bool HasP8Altivec; bool HasP8Crypto; bool HasP9Vector; bool HasP9Altivec; bool HasFCPSGN; bool HasFSQRT; bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES; bool HasRecipPrec; bool HasSTFIWX; bool HasLFIWAX; bool HasFPRND; bool HasFPCVT; bool HasISEL; bool HasBPERMD; bool HasExtDiv; bool HasCMPB; bool HasLDBRX; bool IsBookE; bool HasOnlyMSYNC; bool IsE500; bool IsPPC4xx; bool IsPPC6xx; bool FeatureMFTB; bool DeprecatedDST; bool HasLazyResolverStubs; bool IsLittleEndian; bool HasICBT; bool HasInvariantFunctionDescriptors; bool HasPartwordAtomics; bool HasDirectMove; bool HasHTM; bool HasFusion; bool HasFloat128; bool IsISA3_0; bool UseLongCalls; POPCNTDKind HasPOPCNTD; /// When targeting QPX running a stock PPC64 Linux kernel where the stack /// alignment has not been changed, we need to keep the 16-byte alignment /// of the stack. bool IsQPXStackUnaligned; const PPCTargetMachine &TM; PPCFrameLowering FrameLowering; PPCInstrInfo InstrInfo; PPCTargetLowering TLInfo; SelectionDAGTargetInfo TSInfo; public: /// This constructor initializes the data members to match that /// of the specified triple. /// PPCSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const PPCTargetMachine &TM); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); /// getStackAlignment - Returns the minimum alignment known to hold of the /// stack frame on entry to the function and which must be maintained by every /// function for this subtarget. unsigned getStackAlignment() const { return StackAlignment; } /// getDarwinDirective - Returns the -m directive specified for the cpu. /// unsigned getDarwinDirective() const { return DarwinDirective; } /// getInstrItins - Return the instruction itineraries based on subtarget /// selection. const InstrItineraryData *getInstrItineraryData() const override { return &InstrItins; } const PPCFrameLowering *getFrameLowering() const override { return &FrameLowering; } const PPCInstrInfo *getInstrInfo() const override { return &InstrInfo; } const PPCTargetLowering *getTargetLowering() const override { return &TLInfo; } const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { return &TSInfo; } const PPCRegisterInfo *getRegisterInfo() const override { return &getInstrInfo()->getRegisterInfo(); } const PPCTargetMachine &getTargetMachine() const { return TM; } /// initializeSubtargetDependencies - Initializes using a CPU and feature string /// so that we can use initializer lists for subtarget initialization. PPCSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS); private: void initializeEnvironment(); void initSubtargetFeatures(StringRef CPU, StringRef FS); public: /// isPPC64 - Return true if we are generating code for 64-bit pointer mode. /// bool isPPC64() const; /// has64BitSupport - Return true if the selected CPU supports 64-bit /// instructions, regardless of whether we are in 32-bit or 64-bit mode. bool has64BitSupport() const { return Has64BitSupport; } // useSoftFloat - Return true if soft-float option is turned on. - bool useSoftFloat() const { return UseSoftFloat; } + bool useSoftFloat() const { return !HasHardFloat; } /// use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit /// registers in 32-bit mode when possible. This can only true if /// has64BitSupport() returns true. bool use64BitRegs() const { return Use64BitRegs; } /// useCRBits - Return true if we should store and manipulate i1 values in /// the individual condition register bits. bool useCRBits() const { return UseCRBits; } /// hasLazyResolverStub - Return true if accesses to the specified global have /// to go through a dyld lazy resolution stub. This means that an extra load /// is required to get the address of the global. bool hasLazyResolverStub(const GlobalValue *GV) const; // isLittleEndian - True if generating little-endian code bool isLittleEndian() const { return IsLittleEndian; } // Specific obvious features. bool hasFCPSGN() const { return HasFCPSGN; } bool hasFSQRT() const { return HasFSQRT; } bool hasFRE() const { return HasFRE; } bool hasFRES() const { return HasFRES; } bool hasFRSQRTE() const { return HasFRSQRTE; } bool hasFRSQRTES() const { return HasFRSQRTES; } bool hasRecipPrec() const { return HasRecipPrec; } bool hasSTFIWX() const { return HasSTFIWX; } bool hasLFIWAX() const { return HasLFIWAX; } bool hasFPRND() const { return HasFPRND; } bool hasFPCVT() const { return HasFPCVT; } bool hasAltivec() const { return HasAltivec; } bool hasSPE() const { return HasSPE; } bool hasQPX() const { return HasQPX; } bool hasVSX() const { return HasVSX; } bool hasP8Vector() const { return HasP8Vector; } bool hasP8Altivec() const { return HasP8Altivec; } bool hasP8Crypto() const { return HasP8Crypto; } bool hasP9Vector() const { return HasP9Vector; } bool hasP9Altivec() const { return HasP9Altivec; } bool hasMFOCRF() const { return HasMFOCRF; } bool hasISEL() const { return HasISEL; } bool hasBPERMD() const { return HasBPERMD; } bool hasExtDiv() const { return HasExtDiv; } bool hasCMPB() const { return HasCMPB; } bool hasLDBRX() const { return HasLDBRX; } bool isBookE() const { return IsBookE; } bool hasOnlyMSYNC() const { return HasOnlyMSYNC; } bool isPPC4xx() const { return IsPPC4xx; } bool isPPC6xx() const { return IsPPC6xx; } bool isE500() const { return IsE500; } bool isFeatureMFTB() const { return FeatureMFTB; } bool isDeprecatedDST() const { return DeprecatedDST; } bool hasICBT() const { return HasICBT; } bool hasInvariantFunctionDescriptors() const { return HasInvariantFunctionDescriptors; } bool hasPartwordAtomics() const { return HasPartwordAtomics; } bool hasDirectMove() const { return HasDirectMove; } bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; } unsigned getPlatformStackAlignment() const { if ((hasQPX() || isBGQ()) && !isQPXStackUnaligned()) return 32; return 16; } bool hasHTM() const { return HasHTM; } bool hasFusion() const { return HasFusion; } bool hasFloat128() const { return HasFloat128; } bool isISA3_0() const { return IsISA3_0; } bool useLongCalls() const { return UseLongCalls; } POPCNTDKind hasPOPCNTD() const { return HasPOPCNTD; } const Triple &getTargetTriple() const { return TargetTriple; } /// isDarwin - True if this is any darwin platform. bool isDarwin() const { return TargetTriple.isMacOSX(); } /// isBGQ - True if this is a BG/Q platform. bool isBGQ() const { return TargetTriple.getVendor() == Triple::BGQ; } bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } bool isTargetLinux() const { return TargetTriple.isOSLinux(); } bool isDarwinABI() const { return isTargetMachO() || isDarwin(); } bool isSVR4ABI() const { return !isDarwinABI(); } bool isELFv2ABI() const; bool enableEarlyIfConversion() const override { return hasISEL(); } // Scheduling customization. bool enableMachineScheduler() const override; // This overrides the PostRAScheduler bit in the SchedModel for each CPU. bool enablePostRAScheduler() const override; AntiDepBreakMode getAntiDepBreakMode() const override; void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const override; void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override; bool useAA() const override; bool enableSubRegLiveness() const override; /// classifyGlobalReference - Classify a global variable reference for the /// current subtarget accourding to how we should reference it. unsigned char classifyGlobalReference(const GlobalValue *GV) const; }; } // End llvm namespace #endif Index: head/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp =================================================================== --- head/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp (revision 309148) +++ head/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp (revision 309149) @@ -1,442 +1,442 @@ //===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // Top-level implementation for the PowerPC target. // //===----------------------------------------------------------------------===// #include "PPCTargetMachine.h" #include "PPC.h" #include "PPCTargetObjectFile.h" #include "PPCTargetTransformInfo.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Function.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Scalar.h" using namespace llvm; static cl:: opt DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden, cl::desc("Disable CTR loops for PPC")); static cl:: opt DisablePreIncPrep("disable-ppc-preinc-prep", cl::Hidden, cl::desc("Disable PPC loop preinc prep")); static cl::opt VSXFMAMutateEarly("schedule-ppc-vsx-fma-mutation-early", cl::Hidden, cl::desc("Schedule VSX FMA instruction mutation early")); static cl:: opt DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal", cl::Hidden, cl::desc("Disable VSX Swap Removal for PPC")); static cl:: opt DisableQPXLoadSplat("disable-ppc-qpx-load-splat", cl::Hidden, cl::desc("Disable QPX load splat simplification")); static cl:: opt DisableMIPeephole("disable-ppc-peephole", cl::Hidden, cl::desc("Disable machine peepholes for PPC")); static cl::opt EnableGEPOpt("ppc-gep-opt", cl::Hidden, cl::desc("Enable optimizations on complex GEPs"), cl::init(true)); static cl::opt EnablePrefetch("enable-ppc-prefetching", cl::desc("disable software prefetching on PPC"), cl::init(false), cl::Hidden); static cl::opt EnableExtraTOCRegDeps("enable-ppc-extra-toc-reg-deps", cl::desc("Add extra TOC register dependencies"), cl::init(true), cl::Hidden); static cl::opt EnableMachineCombinerPass("ppc-machine-combiner", cl::desc("Enable the machine combiner pass"), cl::init(true), cl::Hidden); extern "C" void LLVMInitializePowerPCTarget() { // Register the targets RegisterTargetMachine A(ThePPC32Target); RegisterTargetMachine B(ThePPC64Target); RegisterTargetMachine C(ThePPC64LETarget); PassRegistry &PR = *PassRegistry::getPassRegistry(); initializePPCBoolRetToIntPass(PR); } /// Return the datalayout string of a subtarget. static std::string getDataLayoutString(const Triple &T) { bool is64Bit = T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le; std::string Ret; // Most PPC* platforms are big endian, PPC64LE is little endian. if (T.getArch() == Triple::ppc64le) Ret = "e"; else Ret = "E"; Ret += DataLayout::getManglingComponent(T); // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit // pointers. if (!is64Bit || T.getOS() == Triple::Lv2) Ret += "-p:32:32"; // Note, the alignment values for f64 and i64 on ppc64 in Darwin // documentation are wrong; these are correct (i.e. "what gcc does"). if (is64Bit || !T.isOSDarwin()) Ret += "-i64:64"; else Ret += "-f64:32:64"; // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones. if (is64Bit) Ret += "-n32:64"; else Ret += "-n32"; return Ret; } static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL, const Triple &TT) { std::string FullFS = FS; // Make sure 64-bit features are available when CPUname is generic if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le) { if (!FullFS.empty()) FullFS = "+64bit," + FullFS; else FullFS = "+64bit"; } if (OL >= CodeGenOpt::Default) { if (!FullFS.empty()) FullFS = "+crbits," + FullFS; else FullFS = "+crbits"; } if (OL != CodeGenOpt::None) { if (!FullFS.empty()) FullFS = "+invariant-function-descriptors," + FullFS; else FullFS = "+invariant-function-descriptors"; } return FullFS; } static std::unique_ptr createTLOF(const Triple &TT) { // If it isn't a Mach-O file then it's going to be a linux ELF // object file. if (TT.isOSDarwin()) return make_unique(); return make_unique(); } static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT, const TargetOptions &Options) { if (Options.MCOptions.getABIName().startswith("elfv1")) return PPCTargetMachine::PPC_ABI_ELFv1; else if (Options.MCOptions.getABIName().startswith("elfv2")) return PPCTargetMachine::PPC_ABI_ELFv2; assert(Options.MCOptions.getABIName().empty() && "Unknown target-abi option!"); if (!TT.isMacOSX()) { switch (TT.getArch()) { case Triple::ppc64le: return PPCTargetMachine::PPC_ABI_ELFv2; case Triple::ppc64: return PPCTargetMachine::PPC_ABI_ELFv1; default: // Fallthrough. ; } } return PPCTargetMachine::PPC_ABI_UNKNOWN; } static Reloc::Model getEffectiveRelocModel(const Triple &TT, Optional RM) { if (!RM.hasValue()) { if (TT.isOSDarwin()) return Reloc::DynamicNoPIC; return Reloc::Static; } return *RM; } // The FeatureString here is a little subtle. We are modifying the feature // string with what are (currently) non-function specific overrides as it goes // into the LLVMTargetMachine constructor and then using the stored value in the // Subtarget constructor below it. PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Optional RM, CodeModel::Model CM, CodeGenOpt::Level OL) : LLVMTargetMachine(T, getDataLayoutString(TT), TT, CPU, computeFSAdditions(FS, OL, TT), Options, getEffectiveRelocModel(TT, RM), CM, OL), TLOF(createTLOF(getTargetTriple())), TargetABI(computeTargetABI(TT, Options)), Subtarget(TargetTriple, CPU, computeFSAdditions(FS, OL, TT), *this) { // For the estimates, convergence is quadratic, so we essentially double the // number of digits correct after every iteration. For both FRE and FRSQRTE, // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(), // this is 2^-14. IEEE float has 23 digits and double has 52 digits. unsigned RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3, RefinementSteps64 = RefinementSteps + 1; this->Options.Reciprocals.setDefaults("sqrtf", true, RefinementSteps); this->Options.Reciprocals.setDefaults("vec-sqrtf", true, RefinementSteps); this->Options.Reciprocals.setDefaults("divf", true, RefinementSteps); this->Options.Reciprocals.setDefaults("vec-divf", true, RefinementSteps); this->Options.Reciprocals.setDefaults("sqrtd", true, RefinementSteps64); this->Options.Reciprocals.setDefaults("vec-sqrtd", true, RefinementSteps64); this->Options.Reciprocals.setDefaults("divd", true, RefinementSteps64); this->Options.Reciprocals.setDefaults("vec-divd", true, RefinementSteps64); initAsmInfo(); } PPCTargetMachine::~PPCTargetMachine() {} void PPC32TargetMachine::anchor() { } PPC32TargetMachine::PPC32TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Optional RM, CodeModel::Model CM, CodeGenOpt::Level OL) : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {} void PPC64TargetMachine::anchor() { } PPC64TargetMachine::PPC64TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Optional RM, CodeModel::Model CM, CodeGenOpt::Level OL) : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {} const PPCSubtarget * PPCTargetMachine::getSubtargetImpl(const Function &F) const { Attribute CPUAttr = F.getFnAttribute("target-cpu"); Attribute FSAttr = F.getFnAttribute("target-features"); std::string CPU = !CPUAttr.hasAttribute(Attribute::None) ? CPUAttr.getValueAsString().str() : TargetCPU; std::string FS = !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString().str() : TargetFS; // FIXME: This is related to the code below to reset the target options, // we need to know whether or not the soft float flag is set on the // function before we can generate a subtarget. We also need to use // it as a key for the subtarget since that can be the only difference // between two functions. bool SoftFloat = F.getFnAttribute("use-soft-float").getValueAsString() == "true"; // If the soft float attribute is set on the function turn on the soft float // subtarget feature. if (SoftFloat) - FS += FS.empty() ? "+soft-float" : ",+soft-float"; + FS += FS.empty() ? "-hard-float" : ",-hard-float"; auto &I = SubtargetMap[CPU + FS]; if (!I) { // This needs to be done before we create a new subtarget since any // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); I = llvm::make_unique( TargetTriple, CPU, // FIXME: It would be good to have the subtarget additions here // not necessary. Anything that turns them on/off (overrides) ends // up being put at the end of the feature string, but the defaults // shouldn't require adding them. Fixing this means pulling Feature64Bit // out of most of the target cpus in the .td file and making it set only // as part of initialization via the TargetTriple. computeFSAdditions(FS, getOptLevel(), getTargetTriple()), *this); } return I.get(); } //===----------------------------------------------------------------------===// // Pass Pipeline Configuration //===----------------------------------------------------------------------===// namespace { /// PPC Code Generator Pass Configuration Options. class PPCPassConfig : public TargetPassConfig { public: PPCPassConfig(PPCTargetMachine *TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) {} PPCTargetMachine &getPPCTargetMachine() const { return getTM(); } void addIRPasses() override; bool addPreISel() override; bool addILPOpts() override; bool addInstSelector() override; void addMachineSSAOptimization() override; void addPreRegAlloc() override; void addPreSched2() override; void addPreEmitPass() override; }; } // namespace TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) { return new PPCPassConfig(this, PM); } void PPCPassConfig::addIRPasses() { if (TM->getOptLevel() != CodeGenOpt::None) addPass(createPPCBoolRetToIntPass()); addPass(createAtomicExpandPass(&getPPCTargetMachine())); // For the BG/Q (or if explicitly requested), add explicit data prefetch // intrinsics. bool UsePrefetching = TM->getTargetTriple().getVendor() == Triple::BGQ && getOptLevel() != CodeGenOpt::None; if (EnablePrefetch.getNumOccurrences() > 0) UsePrefetching = EnablePrefetch; if (UsePrefetching) addPass(createLoopDataPrefetchPass()); if (TM->getOptLevel() >= CodeGenOpt::Default && EnableGEPOpt) { // Call SeparateConstOffsetFromGEP pass to extract constants within indices // and lower a GEP with multiple indices to either arithmetic operations or // multiple GEPs with single index. addPass(createSeparateConstOffsetFromGEPPass(TM, true)); // Call EarlyCSE pass to find and remove subexpressions in the lowered // result. addPass(createEarlyCSEPass()); // Do loop invariant code motion in case part of the lowered result is // invariant. addPass(createLICMPass()); } TargetPassConfig::addIRPasses(); } bool PPCPassConfig::addPreISel() { if (!DisablePreIncPrep && getOptLevel() != CodeGenOpt::None) addPass(createPPCLoopPreIncPrepPass(getPPCTargetMachine())); if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None) addPass(createPPCCTRLoops(getPPCTargetMachine())); return false; } bool PPCPassConfig::addILPOpts() { addPass(&EarlyIfConverterID); if (EnableMachineCombinerPass) addPass(&MachineCombinerID); return true; } bool PPCPassConfig::addInstSelector() { // Install an instruction selector. addPass(createPPCISelDag(getPPCTargetMachine())); #ifndef NDEBUG if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None) addPass(createPPCCTRLoopsVerify()); #endif addPass(createPPCVSXCopyPass()); return false; } void PPCPassConfig::addMachineSSAOptimization() { TargetPassConfig::addMachineSSAOptimization(); // For little endian, remove where possible the vector swap instructions // introduced at code generation to normalize vector element order. if (TM->getTargetTriple().getArch() == Triple::ppc64le && !DisableVSXSwapRemoval) addPass(createPPCVSXSwapRemovalPass()); // Target-specific peephole cleanups performed after instruction // selection. if (!DisableMIPeephole) { addPass(createPPCMIPeepholePass()); addPass(&DeadMachineInstructionElimID); } } void PPCPassConfig::addPreRegAlloc() { if (getOptLevel() != CodeGenOpt::None) { initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry()); insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID, &PPCVSXFMAMutateID); } // FIXME: We probably don't need to run these for -fPIE. if (getPPCTargetMachine().isPositionIndependent()) { // FIXME: LiveVariables should not be necessary here! // PPCTLSDYnamicCallPass uses LiveIntervals which previously dependet on // LiveVariables. This (unnecessary) dependency has been removed now, // however a stage-2 clang build fails without LiveVariables computed here. addPass(&LiveVariablesID, false); addPass(createPPCTLSDynamicCallPass()); } if (EnableExtraTOCRegDeps) addPass(createPPCTOCRegDepsPass()); } void PPCPassConfig::addPreSched2() { if (getOptLevel() != CodeGenOpt::None) { addPass(&IfConverterID); // This optimization must happen after anything that might do store-to-load // forwarding. Here we're after RA (and, thus, when spills are inserted) // but before post-RA scheduling. if (!DisableQPXLoadSplat) addPass(createPPCQPXLoadSplatPass()); } } void PPCPassConfig::addPreEmitPass() { if (getOptLevel() != CodeGenOpt::None) addPass(createPPCEarlyReturnPass(), false); // Must run branch selection immediately preceding the asm printer. addPass(createPPCBranchSelectionPass(), false); } TargetIRAnalysis PPCTargetMachine::getTargetIRAnalysis() { return TargetIRAnalysis([this](const Function &F) { return TargetTransformInfo(PPCTTIImpl(this, F)); }); } Index: head/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp =================================================================== --- head/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp (revision 309148) +++ head/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp (revision 309149) @@ -1,8048 +1,8060 @@ //===---- TargetInfo.cpp - Encapsulate target details -----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // These classes wrap the information about a call or function // definition used to handle ABI compliancy. // //===----------------------------------------------------------------------===// #include "TargetInfo.h" #include "ABIInfo.h" #include "CGCXXABI.h" #include "CGValue.h" #include "CodeGenFunction.h" #include "clang/AST/RecordLayout.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "clang/CodeGen/SwiftCallingConv.h" #include "clang/Frontend/CodeGenOptions.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Type.h" #include "llvm/Support/raw_ostream.h" #include // std::sort using namespace clang; using namespace CodeGen; static void AssignToArrayRange(CodeGen::CGBuilderTy &Builder, llvm::Value *Array, llvm::Value *Value, unsigned FirstIndex, unsigned LastIndex) { // Alternatively, we could emit this as a loop in the source. for (unsigned I = FirstIndex; I <= LastIndex; ++I) { llvm::Value *Cell = Builder.CreateConstInBoundsGEP1_32(Builder.getInt8Ty(), Array, I); Builder.CreateAlignedStore(Value, Cell, CharUnits::One()); } } static bool isAggregateTypeForABI(QualType T) { return !CodeGenFunction::hasScalarEvaluationKind(T) || T->isMemberFunctionPointerType(); } ABIArgInfo ABIInfo::getNaturalAlignIndirect(QualType Ty, bool ByRef, bool Realign, llvm::Type *Padding) const { return ABIArgInfo::getIndirect(getContext().getTypeAlignInChars(Ty), ByRef, Realign, Padding); } ABIArgInfo ABIInfo::getNaturalAlignIndirectInReg(QualType Ty, bool Realign) const { return ABIArgInfo::getIndirectInReg(getContext().getTypeAlignInChars(Ty), /*ByRef*/ false, Realign); } Address ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { return Address::invalid(); } ABIInfo::~ABIInfo() {} /// Does the given lowering require more than the given number of /// registers when expanded? /// /// This is intended to be the basis of a reasonable basic implementation /// of should{Pass,Return}IndirectlyForSwift. /// /// For most targets, a limit of four total registers is reasonable; this /// limits the amount of code required in order to move around the value /// in case it wasn't produced immediately prior to the call by the caller /// (or wasn't produced in exactly the right registers) or isn't used /// immediately within the callee. But some targets may need to further /// limit the register count due to an inability to support that many /// return registers. static bool occupiesMoreThan(CodeGenTypes &cgt, ArrayRef scalarTypes, unsigned maxAllRegisters) { unsigned intCount = 0, fpCount = 0; for (llvm::Type *type : scalarTypes) { if (type->isPointerTy()) { intCount++; } else if (auto intTy = dyn_cast(type)) { auto ptrWidth = cgt.getTarget().getPointerWidth(0); intCount += (intTy->getBitWidth() + ptrWidth - 1) / ptrWidth; } else { assert(type->isVectorTy() || type->isFloatingPointTy()); fpCount++; } } return (intCount + fpCount > maxAllRegisters); } bool SwiftABIInfo::isLegalVectorTypeForSwift(CharUnits vectorSize, llvm::Type *eltTy, unsigned numElts) const { // The default implementation of this assumes that the target guarantees // 128-bit SIMD support but nothing more. return (vectorSize.getQuantity() > 8 && vectorSize.getQuantity() <= 16); } static CGCXXABI::RecordArgABI getRecordArgABI(const RecordType *RT, CGCXXABI &CXXABI) { const CXXRecordDecl *RD = dyn_cast(RT->getDecl()); if (!RD) return CGCXXABI::RAA_Default; return CXXABI.getRecordArgABI(RD); } static CGCXXABI::RecordArgABI getRecordArgABI(QualType T, CGCXXABI &CXXABI) { const RecordType *RT = T->getAs(); if (!RT) return CGCXXABI::RAA_Default; return getRecordArgABI(RT, CXXABI); } /// Pass transparent unions as if they were the type of the first element. Sema /// should ensure that all elements of the union have the same "machine type". static QualType useFirstFieldIfTransparentUnion(QualType Ty) { if (const RecordType *UT = Ty->getAsUnionType()) { const RecordDecl *UD = UT->getDecl(); if (UD->hasAttr()) { assert(!UD->field_empty() && "sema created an empty transparent union"); return UD->field_begin()->getType(); } } return Ty; } CGCXXABI &ABIInfo::getCXXABI() const { return CGT.getCXXABI(); } ASTContext &ABIInfo::getContext() const { return CGT.getContext(); } llvm::LLVMContext &ABIInfo::getVMContext() const { return CGT.getLLVMContext(); } const llvm::DataLayout &ABIInfo::getDataLayout() const { return CGT.getDataLayout(); } const TargetInfo &ABIInfo::getTarget() const { return CGT.getTarget(); } bool ABIInfo:: isAndroid() const { return getTarget().getTriple().isAndroid(); } bool ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { return false; } bool ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base, uint64_t Members) const { return false; } bool ABIInfo::shouldSignExtUnsignedType(QualType Ty) const { return false; } LLVM_DUMP_METHOD void ABIArgInfo::dump() const { raw_ostream &OS = llvm::errs(); OS << "(ABIArgInfo Kind="; switch (TheKind) { case Direct: OS << "Direct Type="; if (llvm::Type *Ty = getCoerceToType()) Ty->print(OS); else OS << "null"; break; case Extend: OS << "Extend"; break; case Ignore: OS << "Ignore"; break; case InAlloca: OS << "InAlloca Offset=" << getInAllocaFieldIndex(); break; case Indirect: OS << "Indirect Align=" << getIndirectAlign().getQuantity() << " ByVal=" << getIndirectByVal() << " Realign=" << getIndirectRealign(); break; case Expand: OS << "Expand"; break; case CoerceAndExpand: OS << "CoerceAndExpand Type="; getCoerceAndExpandType()->print(OS); break; } OS << ")\n"; } // Dynamically round a pointer up to a multiple of the given alignment. static llvm::Value *emitRoundPointerUpToAlignment(CodeGenFunction &CGF, llvm::Value *Ptr, CharUnits Align) { llvm::Value *PtrAsInt = Ptr; // OverflowArgArea = (OverflowArgArea + Align - 1) & -Align; PtrAsInt = CGF.Builder.CreatePtrToInt(PtrAsInt, CGF.IntPtrTy); PtrAsInt = CGF.Builder.CreateAdd(PtrAsInt, llvm::ConstantInt::get(CGF.IntPtrTy, Align.getQuantity() - 1)); PtrAsInt = CGF.Builder.CreateAnd(PtrAsInt, llvm::ConstantInt::get(CGF.IntPtrTy, -Align.getQuantity())); PtrAsInt = CGF.Builder.CreateIntToPtr(PtrAsInt, Ptr->getType(), Ptr->getName() + ".aligned"); return PtrAsInt; } /// Emit va_arg for a platform using the common void* representation, /// where arguments are simply emitted in an array of slots on the stack. /// /// This version implements the core direct-value passing rules. /// /// \param SlotSize - The size and alignment of a stack slot. /// Each argument will be allocated to a multiple of this number of /// slots, and all the slots will be aligned to this value. /// \param AllowHigherAlign - The slot alignment is not a cap; /// an argument type with an alignment greater than the slot size /// will be emitted on a higher-alignment address, potentially /// leaving one or more empty slots behind as padding. If this /// is false, the returned address might be less-aligned than /// DirectAlign. static Address emitVoidPtrDirectVAArg(CodeGenFunction &CGF, Address VAListAddr, llvm::Type *DirectTy, CharUnits DirectSize, CharUnits DirectAlign, CharUnits SlotSize, bool AllowHigherAlign) { // Cast the element type to i8* if necessary. Some platforms define // va_list as a struct containing an i8* instead of just an i8*. if (VAListAddr.getElementType() != CGF.Int8PtrTy) VAListAddr = CGF.Builder.CreateElementBitCast(VAListAddr, CGF.Int8PtrTy); llvm::Value *Ptr = CGF.Builder.CreateLoad(VAListAddr, "argp.cur"); // If the CC aligns values higher than the slot size, do so if needed. Address Addr = Address::invalid(); if (AllowHigherAlign && DirectAlign > SlotSize) { Addr = Address(emitRoundPointerUpToAlignment(CGF, Ptr, DirectAlign), DirectAlign); } else { Addr = Address(Ptr, SlotSize); } // Advance the pointer past the argument, then store that back. CharUnits FullDirectSize = DirectSize.alignTo(SlotSize); llvm::Value *NextPtr = CGF.Builder.CreateConstInBoundsByteGEP(Addr.getPointer(), FullDirectSize, "argp.next"); CGF.Builder.CreateStore(NextPtr, VAListAddr); // If the argument is smaller than a slot, and this is a big-endian // target, the argument will be right-adjusted in its slot. if (DirectSize < SlotSize && CGF.CGM.getDataLayout().isBigEndian() && !DirectTy->isStructTy()) { Addr = CGF.Builder.CreateConstInBoundsByteGEP(Addr, SlotSize - DirectSize); } Addr = CGF.Builder.CreateElementBitCast(Addr, DirectTy); return Addr; } /// Emit va_arg for a platform using the common void* representation, /// where arguments are simply emitted in an array of slots on the stack. /// /// \param IsIndirect - Values of this type are passed indirectly. /// \param ValueInfo - The size and alignment of this type, generally /// computed with getContext().getTypeInfoInChars(ValueTy). /// \param SlotSizeAndAlign - The size and alignment of a stack slot. /// Each argument will be allocated to a multiple of this number of /// slots, and all the slots will be aligned to this value. /// \param AllowHigherAlign - The slot alignment is not a cap; /// an argument type with an alignment greater than the slot size /// will be emitted on a higher-alignment address, potentially /// leaving one or more empty slots behind as padding. static Address emitVoidPtrVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType ValueTy, bool IsIndirect, std::pair ValueInfo, CharUnits SlotSizeAndAlign, bool AllowHigherAlign) { // The size and alignment of the value that was passed directly. CharUnits DirectSize, DirectAlign; if (IsIndirect) { DirectSize = CGF.getPointerSize(); DirectAlign = CGF.getPointerAlign(); } else { DirectSize = ValueInfo.first; DirectAlign = ValueInfo.second; } // Cast the address we've calculated to the right type. llvm::Type *DirectTy = CGF.ConvertTypeForMem(ValueTy); if (IsIndirect) DirectTy = DirectTy->getPointerTo(0); Address Addr = emitVoidPtrDirectVAArg(CGF, VAListAddr, DirectTy, DirectSize, DirectAlign, SlotSizeAndAlign, AllowHigherAlign); if (IsIndirect) { Addr = Address(CGF.Builder.CreateLoad(Addr), ValueInfo.second); } return Addr; } static Address emitMergePHI(CodeGenFunction &CGF, Address Addr1, llvm::BasicBlock *Block1, Address Addr2, llvm::BasicBlock *Block2, const llvm::Twine &Name = "") { assert(Addr1.getType() == Addr2.getType()); llvm::PHINode *PHI = CGF.Builder.CreatePHI(Addr1.getType(), 2, Name); PHI->addIncoming(Addr1.getPointer(), Block1); PHI->addIncoming(Addr2.getPointer(), Block2); CharUnits Align = std::min(Addr1.getAlignment(), Addr2.getAlignment()); return Address(PHI, Align); } TargetCodeGenInfo::~TargetCodeGenInfo() { delete Info; } // If someone can figure out a general rule for this, that would be great. // It's probably just doomed to be platform-dependent, though. unsigned TargetCodeGenInfo::getSizeOfUnwindException() const { // Verified for: // x86-64 FreeBSD, Linux, Darwin // x86-32 FreeBSD, Linux, Darwin // PowerPC Linux, Darwin // ARM Darwin (*not* EABI) // AArch64 Linux return 32; } bool TargetCodeGenInfo::isNoProtoCallVariadic(const CallArgList &args, const FunctionNoProtoType *fnType) const { // The following conventions are known to require this to be false: // x86_stdcall // MIPS // For everything else, we just prefer false unless we opt out. return false; } void TargetCodeGenInfo::getDependentLibraryOption(llvm::StringRef Lib, llvm::SmallString<24> &Opt) const { // This assumes the user is passing a library name like "rt" instead of a // filename like "librt.a/so", and that they don't care whether it's static or // dynamic. Opt = "-l"; Opt += Lib; } unsigned TargetCodeGenInfo::getOpenCLKernelCallingConv() const { return llvm::CallingConv::C; } static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays); /// isEmptyField - Return true iff a the field is "empty", that is it /// is an unnamed bit-field or an (array of) empty record(s). static bool isEmptyField(ASTContext &Context, const FieldDecl *FD, bool AllowArrays) { if (FD->isUnnamedBitfield()) return true; QualType FT = FD->getType(); // Constant arrays of empty records count as empty, strip them off. // Constant arrays of zero length always count as empty. if (AllowArrays) while (const ConstantArrayType *AT = Context.getAsConstantArrayType(FT)) { if (AT->getSize() == 0) return true; FT = AT->getElementType(); } const RecordType *RT = FT->getAs(); if (!RT) return false; // C++ record fields are never empty, at least in the Itanium ABI. // // FIXME: We should use a predicate for whether this behavior is true in the // current ABI. if (isa(RT->getDecl())) return false; return isEmptyRecord(Context, FT, AllowArrays); } /// isEmptyRecord - Return true iff a structure contains only empty /// fields. Note that a structure with a flexible array member is not /// considered empty. static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays) { const RecordType *RT = T->getAs(); if (!RT) return false; const RecordDecl *RD = RT->getDecl(); if (RD->hasFlexibleArrayMember()) return false; // If this is a C++ record, check the bases first. if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) for (const auto &I : CXXRD->bases()) if (!isEmptyRecord(Context, I.getType(), true)) return false; for (const auto *I : RD->fields()) if (!isEmptyField(Context, I, AllowArrays)) return false; return true; } /// isSingleElementStruct - Determine if a structure is a "single /// element struct", i.e. it has exactly one non-empty field or /// exactly one field which is itself a single element /// struct. Structures with flexible array members are never /// considered single element structs. /// /// \return The field declaration for the single non-empty field, if /// it exists. static const Type *isSingleElementStruct(QualType T, ASTContext &Context) { const RecordType *RT = T->getAs(); if (!RT) return nullptr; const RecordDecl *RD = RT->getDecl(); if (RD->hasFlexibleArrayMember()) return nullptr; const Type *Found = nullptr; // If this is a C++ record, check the bases first. if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) { for (const auto &I : CXXRD->bases()) { // Ignore empty records. if (isEmptyRecord(Context, I.getType(), true)) continue; // If we already found an element then this isn't a single-element struct. if (Found) return nullptr; // If this is non-empty and not a single element struct, the composite // cannot be a single element struct. Found = isSingleElementStruct(I.getType(), Context); if (!Found) return nullptr; } } // Check for single element. for (const auto *FD : RD->fields()) { QualType FT = FD->getType(); // Ignore empty fields. if (isEmptyField(Context, FD, true)) continue; // If we already found an element then this isn't a single-element // struct. if (Found) return nullptr; // Treat single element arrays as the element. while (const ConstantArrayType *AT = Context.getAsConstantArrayType(FT)) { if (AT->getSize().getZExtValue() != 1) break; FT = AT->getElementType(); } if (!isAggregateTypeForABI(FT)) { Found = FT.getTypePtr(); } else { Found = isSingleElementStruct(FT, Context); if (!Found) return nullptr; } } // We don't consider a struct a single-element struct if it has // padding beyond the element type. if (Found && Context.getTypeSize(Found) != Context.getTypeSize(T)) return nullptr; return Found; } namespace { Address EmitVAArgInstr(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, const ABIArgInfo &AI) { // This default implementation defers to the llvm backend's va_arg // instruction. It can handle only passing arguments directly // (typically only handled in the backend for primitive types), or // aggregates passed indirectly by pointer (NOTE: if the "byval" // flag has ABI impact in the callee, this implementation cannot // work.) // Only a few cases are covered here at the moment -- those needed // by the default abi. llvm::Value *Val; if (AI.isIndirect()) { assert(!AI.getPaddingType() && "Unexpected PaddingType seen in arginfo in generic VAArg emitter!"); assert( !AI.getIndirectRealign() && "Unexpected IndirectRealign seen in arginfo in generic VAArg emitter!"); auto TyInfo = CGF.getContext().getTypeInfoInChars(Ty); CharUnits TyAlignForABI = TyInfo.second; llvm::Type *BaseTy = llvm::PointerType::getUnqual(CGF.ConvertTypeForMem(Ty)); llvm::Value *Addr = CGF.Builder.CreateVAArg(VAListAddr.getPointer(), BaseTy); return Address(Addr, TyAlignForABI); } else { assert((AI.isDirect() || AI.isExtend()) && "Unexpected ArgInfo Kind in generic VAArg emitter!"); assert(!AI.getInReg() && "Unexpected InReg seen in arginfo in generic VAArg emitter!"); assert(!AI.getPaddingType() && "Unexpected PaddingType seen in arginfo in generic VAArg emitter!"); assert(!AI.getDirectOffset() && "Unexpected DirectOffset seen in arginfo in generic VAArg emitter!"); assert(!AI.getCoerceToType() && "Unexpected CoerceToType seen in arginfo in generic VAArg emitter!"); Address Temp = CGF.CreateMemTemp(Ty, "varet"); Val = CGF.Builder.CreateVAArg(VAListAddr.getPointer(), CGF.ConvertType(Ty)); CGF.Builder.CreateStore(Val, Temp); return Temp; } } /// DefaultABIInfo - The default implementation for ABI specific /// details. This implementation provides information which results in /// self-consistent and sensible LLVM IR generation, but does not /// conform to any particular ABI. class DefaultABIInfo : public ABIInfo { public: DefaultABIInfo(CodeGen::CodeGenTypes &CGT) : ABIInfo(CGT) {} ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType RetTy) const; void computeInfo(CGFunctionInfo &FI) const override { if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &I : FI.arguments()) I.info = classifyArgumentType(I.type); } Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override { return EmitVAArgInstr(CGF, VAListAddr, Ty, classifyArgumentType(Ty)); } }; class DefaultTargetCodeGenInfo : public TargetCodeGenInfo { public: DefaultTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {} }; ABIArgInfo DefaultABIInfo::classifyArgumentType(QualType Ty) const { Ty = useFirstFieldIfTransparentUnion(Ty); if (isAggregateTypeForABI(Ty)) { // Records with non-trivial destructors/copy-constructors should not be // passed by value. if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); return getNaturalAlignIndirect(Ty); } // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } ABIArgInfo DefaultABIInfo::classifyReturnType(QualType RetTy) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); if (isAggregateTypeForABI(RetTy)) return getNaturalAlignIndirect(RetTy); // Treat an enum type as its underlying type. if (const EnumType *EnumTy = RetTy->getAs()) RetTy = EnumTy->getDecl()->getIntegerType(); return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } //===----------------------------------------------------------------------===// // WebAssembly ABI Implementation // // This is a very simple ABI that relies a lot on DefaultABIInfo. //===----------------------------------------------------------------------===// class WebAssemblyABIInfo final : public DefaultABIInfo { public: explicit WebAssemblyABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} private: ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType Ty) const; // DefaultABIInfo's classifyReturnType and classifyArgumentType are // non-virtual, but computeInfo and EmitVAArg are virtual, so we // overload them. void computeInfo(CGFunctionInfo &FI) const override { if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &Arg : FI.arguments()) Arg.info = classifyArgumentType(Arg.type); } Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; }; class WebAssemblyTargetCodeGenInfo final : public TargetCodeGenInfo { public: explicit WebAssemblyTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) : TargetCodeGenInfo(new WebAssemblyABIInfo(CGT)) {} }; /// \brief Classify argument of given type \p Ty. ABIArgInfo WebAssemblyABIInfo::classifyArgumentType(QualType Ty) const { Ty = useFirstFieldIfTransparentUnion(Ty); if (isAggregateTypeForABI(Ty)) { // Records with non-trivial destructors/copy-constructors should not be // passed by value. if (auto RAA = getRecordArgABI(Ty, getCXXABI())) return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); // Ignore empty structs/unions. if (isEmptyRecord(getContext(), Ty, true)) return ABIArgInfo::getIgnore(); // Lower single-element structs to just pass a regular value. TODO: We // could do reasonable-size multiple-element structs too, using getExpand(), // though watch out for things like bitfields. if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); } // Otherwise just do the default thing. return DefaultABIInfo::classifyArgumentType(Ty); } ABIArgInfo WebAssemblyABIInfo::classifyReturnType(QualType RetTy) const { if (isAggregateTypeForABI(RetTy)) { // Records with non-trivial destructors/copy-constructors should not be // returned by value. if (!getRecordArgABI(RetTy, getCXXABI())) { // Ignore empty structs/unions. if (isEmptyRecord(getContext(), RetTy, true)) return ABIArgInfo::getIgnore(); // Lower single-element structs to just return a regular value. TODO: We // could do reasonable-size multiple-element structs too, using // ABIArgInfo::getDirect(). if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext())) return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); } } // Otherwise just do the default thing. return DefaultABIInfo::classifyReturnType(RetTy); } Address WebAssemblyABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect=*/ false, getContext().getTypeInfoInChars(Ty), CharUnits::fromQuantity(4), /*AllowHigherAlign=*/ true); } //===----------------------------------------------------------------------===// // le32/PNaCl bitcode ABI Implementation // // This is a simplified version of the x86_32 ABI. Arguments and return values // are always passed on the stack. //===----------------------------------------------------------------------===// class PNaClABIInfo : public ABIInfo { public: PNaClABIInfo(CodeGen::CodeGenTypes &CGT) : ABIInfo(CGT) {} ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType RetTy) const; void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; }; class PNaClTargetCodeGenInfo : public TargetCodeGenInfo { public: PNaClTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) : TargetCodeGenInfo(new PNaClABIInfo(CGT)) {} }; void PNaClABIInfo::computeInfo(CGFunctionInfo &FI) const { if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &I : FI.arguments()) I.info = classifyArgumentType(I.type); } Address PNaClABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { // The PNaCL ABI is a bit odd, in that varargs don't use normal // function classification. Structs get passed directly for varargs // functions, through a rewriting transform in // pnacl-llvm/lib/Transforms/NaCl/ExpandVarArgs.cpp, which allows // this target to actually support a va_arg instructions with an // aggregate type, unlike other targets. return EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect()); } /// \brief Classify argument of given type \p Ty. ABIArgInfo PNaClABIInfo::classifyArgumentType(QualType Ty) const { if (isAggregateTypeForABI(Ty)) { if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); return getNaturalAlignIndirect(Ty); } else if (const EnumType *EnumTy = Ty->getAs()) { // Treat an enum type as its underlying type. Ty = EnumTy->getDecl()->getIntegerType(); } else if (Ty->isFloatingType()) { // Floating-point types don't go inreg. return ABIArgInfo::getDirect(); } return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } ABIArgInfo PNaClABIInfo::classifyReturnType(QualType RetTy) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); // In the PNaCl ABI we always return records/structures on the stack. if (isAggregateTypeForABI(RetTy)) return getNaturalAlignIndirect(RetTy); // Treat an enum type as its underlying type. if (const EnumType *EnumTy = RetTy->getAs()) RetTy = EnumTy->getDecl()->getIntegerType(); return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } /// IsX86_MMXType - Return true if this is an MMX type. bool IsX86_MMXType(llvm::Type *IRType) { // Return true if the type is an MMX type <2 x i32>, <4 x i16>, or <8 x i8>. return IRType->isVectorTy() && IRType->getPrimitiveSizeInBits() == 64 && cast(IRType)->getElementType()->isIntegerTy() && IRType->getScalarSizeInBits() != 64; } static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF, StringRef Constraint, llvm::Type* Ty) { if ((Constraint == "y" || Constraint == "&y") && Ty->isVectorTy()) { if (cast(Ty)->getBitWidth() != 64) { // Invalid MMX constraint return nullptr; } return llvm::Type::getX86_MMXTy(CGF.getLLVMContext()); } // No operation needed return Ty; } /// Returns true if this type can be passed in SSE registers with the /// X86_VectorCall calling convention. Shared between x86_32 and x86_64. static bool isX86VectorTypeForVectorCall(ASTContext &Context, QualType Ty) { if (const BuiltinType *BT = Ty->getAs()) { if (BT->isFloatingPoint() && BT->getKind() != BuiltinType::Half) return true; } else if (const VectorType *VT = Ty->getAs()) { // vectorcall can pass XMM, YMM, and ZMM vectors. We don't pass SSE1 MMX // registers specially. unsigned VecSize = Context.getTypeSize(VT); if (VecSize == 128 || VecSize == 256 || VecSize == 512) return true; } return false; } /// Returns true if this aggregate is small enough to be passed in SSE registers /// in the X86_VectorCall calling convention. Shared between x86_32 and x86_64. static bool isX86VectorCallAggregateSmallEnough(uint64_t NumMembers) { return NumMembers <= 4; } //===----------------------------------------------------------------------===// // X86-32 ABI Implementation //===----------------------------------------------------------------------===// /// \brief Similar to llvm::CCState, but for Clang. struct CCState { CCState(unsigned CC) : CC(CC), FreeRegs(0), FreeSSERegs(0) {} unsigned CC; unsigned FreeRegs; unsigned FreeSSERegs; }; /// X86_32ABIInfo - The X86-32 ABI information. class X86_32ABIInfo : public SwiftABIInfo { enum Class { Integer, Float }; static const unsigned MinABIStackAlignInBytes = 4; bool IsDarwinVectorABI; bool IsRetSmallStructInRegABI; bool IsWin32StructABI; bool IsSoftFloatABI; bool IsMCUABI; unsigned DefaultNumRegisterParameters; static bool isRegisterSize(unsigned Size) { return (Size == 8 || Size == 16 || Size == 32 || Size == 64); } bool isHomogeneousAggregateBaseType(QualType Ty) const override { // FIXME: Assumes vectorcall is in use. return isX86VectorTypeForVectorCall(getContext(), Ty); } bool isHomogeneousAggregateSmallEnough(const Type *Ty, uint64_t NumMembers) const override { // FIXME: Assumes vectorcall is in use. return isX86VectorCallAggregateSmallEnough(NumMembers); } bool shouldReturnTypeInRegister(QualType Ty, ASTContext &Context) const; /// getIndirectResult - Give a source type \arg Ty, return a suitable result /// such that the argument will be passed in memory. ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State) const; ABIArgInfo getIndirectReturnResult(QualType Ty, CCState &State) const; /// \brief Return the alignment to use for the given type on the stack. unsigned getTypeStackAlignInBytes(QualType Ty, unsigned Align) const; Class classify(QualType Ty) const; ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const; ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const; /// \brief Updates the number of available free registers, returns /// true if any registers were allocated. bool updateFreeRegs(QualType Ty, CCState &State) const; bool shouldAggregateUseDirect(QualType Ty, CCState &State, bool &InReg, bool &NeedsPadding) const; bool shouldPrimitiveUseInReg(QualType Ty, CCState &State) const; bool canExpandIndirectArgument(QualType Ty) const; /// \brief Rewrite the function info so that all memory arguments use /// inalloca. void rewriteWithInAlloca(CGFunctionInfo &FI) const; void addFieldToArgStruct(SmallVector &FrameFields, CharUnits &StackOffset, ABIArgInfo &Info, QualType Type) const; public: void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI, unsigned NumRegisterParameters, bool SoftFloatABI) : SwiftABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI), IsRetSmallStructInRegABI(RetSmallStructInRegABI), IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI), IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()), DefaultNumRegisterParameters(NumRegisterParameters) {} bool shouldPassIndirectlyForSwift(CharUnits totalSize, ArrayRef scalars, bool asReturnValue) const override { // LLVM's x86-32 lowering currently only assigns up to three // integer registers and three fp registers. Oddly, it'll use up to // four vector registers for vectors, but those can overlap with the // scalar registers. return occupiesMoreThan(CGT, scalars, /*total*/ 3); } }; class X86_32TargetCodeGenInfo : public TargetCodeGenInfo { public: X86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI, unsigned NumRegisterParameters, bool SoftFloatABI) : TargetCodeGenInfo(new X86_32ABIInfo( CGT, DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI, NumRegisterParameters, SoftFloatABI)) {} static bool isStructReturnInRegABI( const llvm::Triple &Triple, const CodeGenOptions &Opts); void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override; int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { // Darwin uses different dwarf register numbers for EH. if (CGM.getTarget().getTriple().isOSDarwin()) return 5; return 4; } bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const override; llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF, StringRef Constraint, llvm::Type* Ty) const override { return X86AdjustInlineAsmType(CGF, Constraint, Ty); } void addReturnRegisterOutputs(CodeGenFunction &CGF, LValue ReturnValue, std::string &Constraints, std::vector &ResultRegTypes, std::vector &ResultTruncRegTypes, std::vector &ResultRegDests, std::string &AsmString, unsigned NumOutputs) const override; llvm::Constant * getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const override { unsigned Sig = (0xeb << 0) | // jmp rel8 (0x06 << 8) | // .+0x08 ('F' << 16) | ('T' << 24); return llvm::ConstantInt::get(CGM.Int32Ty, Sig); } StringRef getARCRetainAutoreleasedReturnValueMarker() const override { return "movl\t%ebp, %ebp" "\t\t## marker for objc_retainAutoreleaseReturnValue"; } }; } /// Rewrite input constraint references after adding some output constraints. /// In the case where there is one output and one input and we add one output, /// we need to replace all operand references greater than or equal to 1: /// mov $0, $1 /// mov eax, $1 /// The result will be: /// mov $0, $2 /// mov eax, $2 static void rewriteInputConstraintReferences(unsigned FirstIn, unsigned NumNewOuts, std::string &AsmString) { std::string Buf; llvm::raw_string_ostream OS(Buf); size_t Pos = 0; while (Pos < AsmString.size()) { size_t DollarStart = AsmString.find('$', Pos); if (DollarStart == std::string::npos) DollarStart = AsmString.size(); size_t DollarEnd = AsmString.find_first_not_of('$', DollarStart); if (DollarEnd == std::string::npos) DollarEnd = AsmString.size(); OS << StringRef(&AsmString[Pos], DollarEnd - Pos); Pos = DollarEnd; size_t NumDollars = DollarEnd - DollarStart; if (NumDollars % 2 != 0 && Pos < AsmString.size()) { // We have an operand reference. size_t DigitStart = Pos; size_t DigitEnd = AsmString.find_first_not_of("0123456789", DigitStart); if (DigitEnd == std::string::npos) DigitEnd = AsmString.size(); StringRef OperandStr(&AsmString[DigitStart], DigitEnd - DigitStart); unsigned OperandIndex; if (!OperandStr.getAsInteger(10, OperandIndex)) { if (OperandIndex >= FirstIn) OperandIndex += NumNewOuts; OS << OperandIndex; } else { OS << OperandStr; } Pos = DigitEnd; } } AsmString = std::move(OS.str()); } /// Add output constraints for EAX:EDX because they are return registers. void X86_32TargetCodeGenInfo::addReturnRegisterOutputs( CodeGenFunction &CGF, LValue ReturnSlot, std::string &Constraints, std::vector &ResultRegTypes, std::vector &ResultTruncRegTypes, std::vector &ResultRegDests, std::string &AsmString, unsigned NumOutputs) const { uint64_t RetWidth = CGF.getContext().getTypeSize(ReturnSlot.getType()); // Use the EAX constraint if the width is 32 or smaller and EAX:EDX if it is // larger. if (!Constraints.empty()) Constraints += ','; if (RetWidth <= 32) { Constraints += "={eax}"; ResultRegTypes.push_back(CGF.Int32Ty); } else { // Use the 'A' constraint for EAX:EDX. Constraints += "=A"; ResultRegTypes.push_back(CGF.Int64Ty); } // Truncate EAX or EAX:EDX to an integer of the appropriate size. llvm::Type *CoerceTy = llvm::IntegerType::get(CGF.getLLVMContext(), RetWidth); ResultTruncRegTypes.push_back(CoerceTy); // Coerce the integer by bitcasting the return slot pointer. ReturnSlot.setAddress(CGF.Builder.CreateBitCast(ReturnSlot.getAddress(), CoerceTy->getPointerTo())); ResultRegDests.push_back(ReturnSlot); rewriteInputConstraintReferences(NumOutputs, 1, AsmString); } /// shouldReturnTypeInRegister - Determine if the given type should be /// returned in a register (for the Darwin and MCU ABI). bool X86_32ABIInfo::shouldReturnTypeInRegister(QualType Ty, ASTContext &Context) const { uint64_t Size = Context.getTypeSize(Ty); // For i386, type must be register sized. // For the MCU ABI, it only needs to be <= 8-byte if ((IsMCUABI && Size > 64) || (!IsMCUABI && !isRegisterSize(Size))) return false; if (Ty->isVectorType()) { // 64- and 128- bit vectors inside structures are not returned in // registers. if (Size == 64 || Size == 128) return false; return true; } // If this is a builtin, pointer, enum, complex type, member pointer, or // member function pointer it is ok. if (Ty->getAs() || Ty->hasPointerRepresentation() || Ty->isAnyComplexType() || Ty->isEnumeralType() || Ty->isBlockPointerType() || Ty->isMemberPointerType()) return true; // Arrays are treated like records. if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) return shouldReturnTypeInRegister(AT->getElementType(), Context); // Otherwise, it must be a record type. const RecordType *RT = Ty->getAs(); if (!RT) return false; // FIXME: Traverse bases here too. // Structure types are passed in register if all fields would be // passed in a register. for (const auto *FD : RT->getDecl()->fields()) { // Empty fields are ignored. if (isEmptyField(Context, FD, true)) continue; // Check fields recursively. if (!shouldReturnTypeInRegister(FD->getType(), Context)) return false; } return true; } static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) { // Treat complex types as the element type. if (const ComplexType *CTy = Ty->getAs()) Ty = CTy->getElementType(); // Check for a type which we know has a simple scalar argument-passing // convention without any padding. (We're specifically looking for 32 // and 64-bit integer and integer-equivalents, float, and double.) if (!Ty->getAs() && !Ty->hasPointerRepresentation() && !Ty->isEnumeralType() && !Ty->isBlockPointerType()) return false; uint64_t Size = Context.getTypeSize(Ty); return Size == 32 || Size == 64; } /// Test whether an argument type which is to be passed indirectly (on the /// stack) would have the equivalent layout if it was expanded into separate /// arguments. If so, we prefer to do the latter to avoid inhibiting /// optimizations. bool X86_32ABIInfo::canExpandIndirectArgument(QualType Ty) const { // We can only expand structure types. const RecordType *RT = Ty->getAs(); if (!RT) return false; const RecordDecl *RD = RT->getDecl(); if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) { if (!IsWin32StructABI ) { // On non-Windows, we have to conservatively match our old bitcode // prototypes in order to be ABI-compatible at the bitcode level. if (!CXXRD->isCLike()) return false; } else { // Don't do this for dynamic classes. if (CXXRD->isDynamicClass()) return false; // Don't do this if there are any non-empty bases. for (const CXXBaseSpecifier &Base : CXXRD->bases()) { if (!isEmptyRecord(getContext(), Base.getType(), /*AllowArrays=*/true)) return false; } } } uint64_t Size = 0; for (const auto *FD : RD->fields()) { // Scalar arguments on the stack get 4 byte alignment on x86. If the // argument is smaller than 32-bits, expanding the struct will create // alignment padding. if (!is32Or64BitBasicType(FD->getType(), getContext())) return false; // FIXME: Reject bit-fields wholesale; there are two problems, we don't know // how to expand them yet, and the predicate for telling if a bitfield still // counts as "basic" is more complicated than what we were doing previously. if (FD->isBitField()) return false; Size += getContext().getTypeSize(FD->getType()); } // We can do this if there was no alignment padding. return Size == getContext().getTypeSize(Ty); } ABIArgInfo X86_32ABIInfo::getIndirectReturnResult(QualType RetTy, CCState &State) const { // If the return value is indirect, then the hidden argument is consuming one // integer register. if (State.FreeRegs) { --State.FreeRegs; if (!IsMCUABI) return getNaturalAlignIndirectInReg(RetTy); } return getNaturalAlignIndirect(RetTy, /*ByVal=*/false); } ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy, CCState &State) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); const Type *Base = nullptr; uint64_t NumElts = 0; if (State.CC == llvm::CallingConv::X86_VectorCall && isHomogeneousAggregate(RetTy, Base, NumElts)) { // The LLVM struct type for such an aggregate should lower properly. return ABIArgInfo::getDirect(); } if (const VectorType *VT = RetTy->getAs()) { // On Darwin, some vectors are returned in registers. if (IsDarwinVectorABI) { uint64_t Size = getContext().getTypeSize(RetTy); // 128-bit vectors are a special case; they are returned in // registers and we need to make sure to pick a type the LLVM // backend will like. if (Size == 128) return ABIArgInfo::getDirect(llvm::VectorType::get( llvm::Type::getInt64Ty(getVMContext()), 2)); // Always return in register if it fits in a general purpose // register, or if it is 64 bits and has a single element. if ((Size == 8 || Size == 16 || Size == 32) || (Size == 64 && VT->getNumElements() == 1)) return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size)); return getIndirectReturnResult(RetTy, State); } return ABIArgInfo::getDirect(); } if (isAggregateTypeForABI(RetTy)) { if (const RecordType *RT = RetTy->getAs()) { // Structures with flexible arrays are always indirect. if (RT->getDecl()->hasFlexibleArrayMember()) return getIndirectReturnResult(RetTy, State); } // If specified, structs and unions are always indirect. if (!IsRetSmallStructInRegABI && !RetTy->isAnyComplexType()) return getIndirectReturnResult(RetTy, State); // Ignore empty structs/unions. if (isEmptyRecord(getContext(), RetTy, true)) return ABIArgInfo::getIgnore(); // Small structures which are register sized are generally returned // in a register. if (shouldReturnTypeInRegister(RetTy, getContext())) { uint64_t Size = getContext().getTypeSize(RetTy); // As a special-case, if the struct is a "single-element" struct, and // the field is of type "float" or "double", return it in a // floating-point register. (MSVC does not apply this special case.) // We apply a similar transformation for pointer types to improve the // quality of the generated IR. if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext())) if ((!IsWin32StructABI && SeltTy->isRealFloatingType()) || SeltTy->hasPointerRepresentation()) return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); // FIXME: We should be able to narrow this integer in cases with dead // padding. return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),Size)); } return getIndirectReturnResult(RetTy, State); } // Treat an enum type as its underlying type. if (const EnumType *EnumTy = RetTy->getAs()) RetTy = EnumTy->getDecl()->getIntegerType(); return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } static bool isSSEVectorType(ASTContext &Context, QualType Ty) { return Ty->getAs() && Context.getTypeSize(Ty) == 128; } static bool isRecordWithSSEVectorType(ASTContext &Context, QualType Ty) { const RecordType *RT = Ty->getAs(); if (!RT) return 0; const RecordDecl *RD = RT->getDecl(); // If this is a C++ record, check the bases first. if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) for (const auto &I : CXXRD->bases()) if (!isRecordWithSSEVectorType(Context, I.getType())) return false; for (const auto *i : RD->fields()) { QualType FT = i->getType(); if (isSSEVectorType(Context, FT)) return true; if (isRecordWithSSEVectorType(Context, FT)) return true; } return false; } unsigned X86_32ABIInfo::getTypeStackAlignInBytes(QualType Ty, unsigned Align) const { // Otherwise, if the alignment is less than or equal to the minimum ABI // alignment, just use the default; the backend will handle this. if (Align <= MinABIStackAlignInBytes) return 0; // Use default alignment. // On non-Darwin, the stack type alignment is always 4. if (!IsDarwinVectorABI) { // Set explicit alignment, since we may need to realign the top. return MinABIStackAlignInBytes; } // Otherwise, if the type contains an SSE vector type, the alignment is 16. if (Align >= 16 && (isSSEVectorType(getContext(), Ty) || isRecordWithSSEVectorType(getContext(), Ty))) return 16; return MinABIStackAlignInBytes; } ABIArgInfo X86_32ABIInfo::getIndirectResult(QualType Ty, bool ByVal, CCState &State) const { if (!ByVal) { if (State.FreeRegs) { --State.FreeRegs; // Non-byval indirects just use one pointer. if (!IsMCUABI) return getNaturalAlignIndirectInReg(Ty); } return getNaturalAlignIndirect(Ty, false); } // Compute the byval alignment. unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8; unsigned StackAlign = getTypeStackAlignInBytes(Ty, TypeAlign); if (StackAlign == 0) return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true); // If the stack alignment is less than the type alignment, realign the // argument. bool Realign = TypeAlign > StackAlign; return ABIArgInfo::getIndirect(CharUnits::fromQuantity(StackAlign), /*ByVal=*/true, Realign); } X86_32ABIInfo::Class X86_32ABIInfo::classify(QualType Ty) const { const Type *T = isSingleElementStruct(Ty, getContext()); if (!T) T = Ty.getTypePtr(); if (const BuiltinType *BT = T->getAs()) { BuiltinType::Kind K = BT->getKind(); if (K == BuiltinType::Float || K == BuiltinType::Double) return Float; } return Integer; } bool X86_32ABIInfo::updateFreeRegs(QualType Ty, CCState &State) const { if (!IsSoftFloatABI) { Class C = classify(Ty); if (C == Float) return false; } unsigned Size = getContext().getTypeSize(Ty); unsigned SizeInRegs = (Size + 31) / 32; if (SizeInRegs == 0) return false; if (!IsMCUABI) { if (SizeInRegs > State.FreeRegs) { State.FreeRegs = 0; return false; } } else { // The MCU psABI allows passing parameters in-reg even if there are // earlier parameters that are passed on the stack. Also, // it does not allow passing >8-byte structs in-register, // even if there are 3 free registers available. if (SizeInRegs > State.FreeRegs || SizeInRegs > 2) return false; } State.FreeRegs -= SizeInRegs; return true; } bool X86_32ABIInfo::shouldAggregateUseDirect(QualType Ty, CCState &State, bool &InReg, bool &NeedsPadding) const { // On Windows, aggregates other than HFAs are never passed in registers, and // they do not consume register slots. Homogenous floating-point aggregates // (HFAs) have already been dealt with at this point. if (IsWin32StructABI && isAggregateTypeForABI(Ty)) return false; NeedsPadding = false; InReg = !IsMCUABI; if (!updateFreeRegs(Ty, State)) return false; if (IsMCUABI) return true; if (State.CC == llvm::CallingConv::X86_FastCall || State.CC == llvm::CallingConv::X86_VectorCall) { if (getContext().getTypeSize(Ty) <= 32 && State.FreeRegs) NeedsPadding = true; return false; } return true; } bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const { if (!updateFreeRegs(Ty, State)) return false; if (IsMCUABI) return false; if (State.CC == llvm::CallingConv::X86_FastCall || State.CC == llvm::CallingConv::X86_VectorCall) { if (getContext().getTypeSize(Ty) > 32) return false; return (Ty->isIntegralOrEnumerationType() || Ty->isPointerType() || Ty->isReferenceType()); } return true; } ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, CCState &State) const { // FIXME: Set alignment on indirect arguments. Ty = useFirstFieldIfTransparentUnion(Ty); // Check with the C++ ABI first. const RecordType *RT = Ty->getAs(); if (RT) { CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()); if (RAA == CGCXXABI::RAA_Indirect) { return getIndirectResult(Ty, false, State); } else if (RAA == CGCXXABI::RAA_DirectInMemory) { // The field index doesn't matter, we'll fix it up later. return ABIArgInfo::getInAlloca(/*FieldIndex=*/0); } } // vectorcall adds the concept of a homogenous vector aggregate, similar // to other targets. const Type *Base = nullptr; uint64_t NumElts = 0; if (State.CC == llvm::CallingConv::X86_VectorCall && isHomogeneousAggregate(Ty, Base, NumElts)) { if (State.FreeSSERegs >= NumElts) { State.FreeSSERegs -= NumElts; if (Ty->isBuiltinType() || Ty->isVectorType()) return ABIArgInfo::getDirect(); return ABIArgInfo::getExpand(); } return getIndirectResult(Ty, /*ByVal=*/false, State); } if (isAggregateTypeForABI(Ty)) { // Structures with flexible arrays are always indirect. // FIXME: This should not be byval! if (RT && RT->getDecl()->hasFlexibleArrayMember()) return getIndirectResult(Ty, true, State); // Ignore empty structs/unions on non-Windows. if (!IsWin32StructABI && isEmptyRecord(getContext(), Ty, true)) return ABIArgInfo::getIgnore(); llvm::LLVMContext &LLVMContext = getVMContext(); llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext); bool NeedsPadding = false; bool InReg; if (shouldAggregateUseDirect(Ty, State, InReg, NeedsPadding)) { unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32; SmallVector Elements(SizeInRegs, Int32); llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements); if (InReg) return ABIArgInfo::getDirectInReg(Result); else return ABIArgInfo::getDirect(Result); } llvm::IntegerType *PaddingType = NeedsPadding ? Int32 : nullptr; // Expand small (<= 128-bit) record types when we know that the stack layout // of those arguments will match the struct. This is important because the // LLVM backend isn't smart enough to remove byval, which inhibits many // optimizations. // Don't do this for the MCU if there are still free integer registers // (see X86_64 ABI for full explanation). if (getContext().getTypeSize(Ty) <= 4 * 32 && (!IsMCUABI || State.FreeRegs == 0) && canExpandIndirectArgument(Ty)) return ABIArgInfo::getExpandWithPadding( State.CC == llvm::CallingConv::X86_FastCall || State.CC == llvm::CallingConv::X86_VectorCall, PaddingType); return getIndirectResult(Ty, true, State); } if (const VectorType *VT = Ty->getAs()) { // On Darwin, some vectors are passed in memory, we handle this by passing // it as an i8/i16/i32/i64. if (IsDarwinVectorABI) { uint64_t Size = getContext().getTypeSize(Ty); if ((Size == 8 || Size == 16 || Size == 32) || (Size == 64 && VT->getNumElements() == 1)) return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size)); } if (IsX86_MMXType(CGT.ConvertType(Ty))) return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), 64)); return ABIArgInfo::getDirect(); } if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); bool InReg = shouldPrimitiveUseInReg(Ty, State); if (Ty->isPromotableIntegerType()) { if (InReg) return ABIArgInfo::getExtendInReg(); return ABIArgInfo::getExtend(); } if (InReg) return ABIArgInfo::getDirectInReg(); return ABIArgInfo::getDirect(); } void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const { CCState State(FI.getCallingConvention()); if (IsMCUABI) State.FreeRegs = 3; else if (State.CC == llvm::CallingConv::X86_FastCall) State.FreeRegs = 2; else if (State.CC == llvm::CallingConv::X86_VectorCall) { State.FreeRegs = 2; State.FreeSSERegs = 6; } else if (FI.getHasRegParm()) State.FreeRegs = FI.getRegParm(); else State.FreeRegs = DefaultNumRegisterParameters; if (!getCXXABI().classifyReturnType(FI)) { FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), State); } else if (FI.getReturnInfo().isIndirect()) { // The C++ ABI is not aware of register usage, so we have to check if the // return value was sret and put it in a register ourselves if appropriate. if (State.FreeRegs) { --State.FreeRegs; // The sret parameter consumes a register. if (!IsMCUABI) FI.getReturnInfo().setInReg(true); } } // The chain argument effectively gives us another free register. if (FI.isChainCall()) ++State.FreeRegs; bool UsedInAlloca = false; for (auto &I : FI.arguments()) { I.info = classifyArgumentType(I.type, State); UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca); } // If we needed to use inalloca for any argument, do a second pass and rewrite // all the memory arguments to use inalloca. if (UsedInAlloca) rewriteWithInAlloca(FI); } void X86_32ABIInfo::addFieldToArgStruct(SmallVector &FrameFields, CharUnits &StackOffset, ABIArgInfo &Info, QualType Type) const { // Arguments are always 4-byte-aligned. CharUnits FieldAlign = CharUnits::fromQuantity(4); assert(StackOffset.isMultipleOf(FieldAlign) && "unaligned inalloca struct"); Info = ABIArgInfo::getInAlloca(FrameFields.size()); FrameFields.push_back(CGT.ConvertTypeForMem(Type)); StackOffset += getContext().getTypeSizeInChars(Type); // Insert padding bytes to respect alignment. CharUnits FieldEnd = StackOffset; StackOffset = FieldEnd.alignTo(FieldAlign); if (StackOffset != FieldEnd) { CharUnits NumBytes = StackOffset - FieldEnd; llvm::Type *Ty = llvm::Type::getInt8Ty(getVMContext()); Ty = llvm::ArrayType::get(Ty, NumBytes.getQuantity()); FrameFields.push_back(Ty); } } static bool isArgInAlloca(const ABIArgInfo &Info) { // Leave ignored and inreg arguments alone. switch (Info.getKind()) { case ABIArgInfo::InAlloca: return true; case ABIArgInfo::Indirect: assert(Info.getIndirectByVal()); return true; case ABIArgInfo::Ignore: return false; case ABIArgInfo::Direct: case ABIArgInfo::Extend: if (Info.getInReg()) return false; return true; case ABIArgInfo::Expand: case ABIArgInfo::CoerceAndExpand: // These are aggregate types which are never passed in registers when // inalloca is involved. return true; } llvm_unreachable("invalid enum"); } void X86_32ABIInfo::rewriteWithInAlloca(CGFunctionInfo &FI) const { assert(IsWin32StructABI && "inalloca only supported on win32"); // Build a packed struct type for all of the arguments in memory. SmallVector FrameFields; // The stack alignment is always 4. CharUnits StackAlign = CharUnits::fromQuantity(4); CharUnits StackOffset; CGFunctionInfo::arg_iterator I = FI.arg_begin(), E = FI.arg_end(); // Put 'this' into the struct before 'sret', if necessary. bool IsThisCall = FI.getCallingConvention() == llvm::CallingConv::X86_ThisCall; ABIArgInfo &Ret = FI.getReturnInfo(); if (Ret.isIndirect() && Ret.isSRetAfterThis() && !IsThisCall && isArgInAlloca(I->info)) { addFieldToArgStruct(FrameFields, StackOffset, I->info, I->type); ++I; } // Put the sret parameter into the inalloca struct if it's in memory. if (Ret.isIndirect() && !Ret.getInReg()) { CanQualType PtrTy = getContext().getPointerType(FI.getReturnType()); addFieldToArgStruct(FrameFields, StackOffset, Ret, PtrTy); // On Windows, the hidden sret parameter is always returned in eax. Ret.setInAllocaSRet(IsWin32StructABI); } // Skip the 'this' parameter in ecx. if (IsThisCall) ++I; // Put arguments passed in memory into the struct. for (; I != E; ++I) { if (isArgInAlloca(I->info)) addFieldToArgStruct(FrameFields, StackOffset, I->info, I->type); } FI.setArgStruct(llvm::StructType::get(getVMContext(), FrameFields, /*isPacked=*/true), StackAlign); } Address X86_32ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { auto TypeInfo = getContext().getTypeInfoInChars(Ty); // x86-32 changes the alignment of certain arguments on the stack. // // Just messing with TypeInfo like this works because we never pass // anything indirectly. TypeInfo.second = CharUnits::fromQuantity( getTypeStackAlignInBytes(Ty, TypeInfo.second.getQuantity())); return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false, TypeInfo, CharUnits::fromQuantity(4), /*AllowHigherAlign*/ true); } bool X86_32TargetCodeGenInfo::isStructReturnInRegABI( const llvm::Triple &Triple, const CodeGenOptions &Opts) { assert(Triple.getArch() == llvm::Triple::x86); switch (Opts.getStructReturnConvention()) { case CodeGenOptions::SRCK_Default: break; case CodeGenOptions::SRCK_OnStack: // -fpcc-struct-return return false; case CodeGenOptions::SRCK_InRegs: // -freg-struct-return return true; } if (Triple.isOSDarwin() || Triple.isOSIAMCU()) return true; switch (Triple.getOS()) { case llvm::Triple::DragonFly: case llvm::Triple::FreeBSD: case llvm::Triple::OpenBSD: case llvm::Triple::Bitrig: case llvm::Triple::Win32: return true; default: return false; } } void X86_32TargetCodeGenInfo::setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { if (const FunctionDecl *FD = dyn_cast_or_null(D)) { if (FD->hasAttr()) { // Get the LLVM function. llvm::Function *Fn = cast(GV); // Now add the 'alignstack' attribute with a value of 16. llvm::AttrBuilder B; B.addStackAlignmentAttr(16); Fn->addAttributes(llvm::AttributeSet::FunctionIndex, llvm::AttributeSet::get(CGM.getLLVMContext(), llvm::AttributeSet::FunctionIndex, B)); } if (FD->hasAttr()) { llvm::Function *Fn = cast(GV); Fn->setCallingConv(llvm::CallingConv::X86_INTR); } } } bool X86_32TargetCodeGenInfo::initDwarfEHRegSizeTable( CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const { CodeGen::CGBuilderTy &Builder = CGF.Builder; llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4); // 0-7 are the eight integer registers; the order is different // on Darwin (for EH), but the range is the same. // 8 is %eip. AssignToArrayRange(Builder, Address, Four8, 0, 8); if (CGF.CGM.getTarget().getTriple().isOSDarwin()) { // 12-16 are st(0..4). Not sure why we stop at 4. // These have size 16, which is sizeof(long double) on // platforms with 8-byte alignment for that type. llvm::Value *Sixteen8 = llvm::ConstantInt::get(CGF.Int8Ty, 16); AssignToArrayRange(Builder, Address, Sixteen8, 12, 16); } else { // 9 is %eflags, which doesn't get a size on Darwin for some // reason. Builder.CreateAlignedStore( Four8, Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, Address, 9), CharUnits::One()); // 11-16 are st(0..5). Not sure why we stop at 5. // These have size 12, which is sizeof(long double) on // platforms with 4-byte alignment for that type. llvm::Value *Twelve8 = llvm::ConstantInt::get(CGF.Int8Ty, 12); AssignToArrayRange(Builder, Address, Twelve8, 11, 16); } return false; } //===----------------------------------------------------------------------===// // X86-64 ABI Implementation //===----------------------------------------------------------------------===// namespace { /// The AVX ABI level for X86 targets. enum class X86AVXABILevel { None, AVX, AVX512 }; /// \p returns the size in bits of the largest (native) vector for \p AVXLevel. static unsigned getNativeVectorSizeForAVXABI(X86AVXABILevel AVXLevel) { switch (AVXLevel) { case X86AVXABILevel::AVX512: return 512; case X86AVXABILevel::AVX: return 256; case X86AVXABILevel::None: return 128; } llvm_unreachable("Unknown AVXLevel"); } /// X86_64ABIInfo - The X86_64 ABI information. class X86_64ABIInfo : public SwiftABIInfo { enum Class { Integer = 0, SSE, SSEUp, X87, X87Up, ComplexX87, NoClass, Memory }; /// merge - Implement the X86_64 ABI merging algorithm. /// /// Merge an accumulating classification \arg Accum with a field /// classification \arg Field. /// /// \param Accum - The accumulating classification. This should /// always be either NoClass or the result of a previous merge /// call. In addition, this should never be Memory (the caller /// should just return Memory for the aggregate). static Class merge(Class Accum, Class Field); /// postMerge - Implement the X86_64 ABI post merging algorithm. /// /// Post merger cleanup, reduces a malformed Hi and Lo pair to /// final MEMORY or SSE classes when necessary. /// /// \param AggregateSize - The size of the current aggregate in /// the classification process. /// /// \param Lo - The classification for the parts of the type /// residing in the low word of the containing object. /// /// \param Hi - The classification for the parts of the type /// residing in the higher words of the containing object. /// void postMerge(unsigned AggregateSize, Class &Lo, Class &Hi) const; /// classify - Determine the x86_64 register classes in which the /// given type T should be passed. /// /// \param Lo - The classification for the parts of the type /// residing in the low word of the containing object. /// /// \param Hi - The classification for the parts of the type /// residing in the high word of the containing object. /// /// \param OffsetBase - The bit offset of this type in the /// containing object. Some parameters are classified different /// depending on whether they straddle an eightbyte boundary. /// /// \param isNamedArg - Whether the argument in question is a "named" /// argument, as used in AMD64-ABI 3.5.7. /// /// If a word is unused its result will be NoClass; if a type should /// be passed in Memory then at least the classification of \arg Lo /// will be Memory. /// /// The \arg Lo class will be NoClass iff the argument is ignored. /// /// If the \arg Lo class is ComplexX87, then the \arg Hi class will /// also be ComplexX87. void classify(QualType T, uint64_t OffsetBase, Class &Lo, Class &Hi, bool isNamedArg) const; llvm::Type *GetByteVectorType(QualType Ty) const; llvm::Type *GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset, QualType SourceTy, unsigned SourceOffset) const; llvm::Type *GetINTEGERTypeAtOffset(llvm::Type *IRType, unsigned IROffset, QualType SourceTy, unsigned SourceOffset) const; /// getIndirectResult - Give a source type \arg Ty, return a suitable result /// such that the argument will be returned in memory. ABIArgInfo getIndirectReturnResult(QualType Ty) const; /// getIndirectResult - Give a source type \arg Ty, return a suitable result /// such that the argument will be passed in memory. /// /// \param freeIntRegs - The number of free integer registers remaining /// available. ABIArgInfo getIndirectResult(QualType Ty, unsigned freeIntRegs) const; ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType Ty, unsigned freeIntRegs, unsigned &neededInt, unsigned &neededSSE, bool isNamedArg) const; bool IsIllegalVectorType(QualType Ty) const; /// The 0.98 ABI revision clarified a lot of ambiguities, /// unfortunately in ways that were not always consistent with /// certain previous compilers. In particular, platforms which /// required strict binary compatibility with older versions of GCC /// may need to exempt themselves. bool honorsRevision0_98() const { return !getTarget().getTriple().isOSDarwin(); } /// GCC classifies <1 x long long> as SSE but compatibility with older clang // compilers require us to classify it as INTEGER. bool classifyIntegerMMXAsSSE() const { const llvm::Triple &Triple = getTarget().getTriple(); if (Triple.isOSDarwin() || Triple.getOS() == llvm::Triple::PS4) return false; if (Triple.isOSFreeBSD() && Triple.getOSMajorVersion() >= 10) return false; return true; } X86AVXABILevel AVXLevel; // Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on // 64-bit hardware. bool Has64BitPointers; public: X86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) : SwiftABIInfo(CGT), AVXLevel(AVXLevel), Has64BitPointers(CGT.getDataLayout().getPointerSize(0) == 8) { } bool isPassedUsingAVXType(QualType type) const { unsigned neededInt, neededSSE; // The freeIntRegs argument doesn't matter here. ABIArgInfo info = classifyArgumentType(type, 0, neededInt, neededSSE, /*isNamedArg*/true); if (info.isDirect()) { llvm::Type *ty = info.getCoerceToType(); if (llvm::VectorType *vectorTy = dyn_cast_or_null(ty)) return (vectorTy->getBitWidth() > 128); } return false; } void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; bool has64BitPointers() const { return Has64BitPointers; } bool shouldPassIndirectlyForSwift(CharUnits totalSize, ArrayRef scalars, bool asReturnValue) const override { return occupiesMoreThan(CGT, scalars, /*total*/ 4); } }; /// WinX86_64ABIInfo - The Windows X86_64 ABI information. class WinX86_64ABIInfo : public ABIInfo { public: WinX86_64ABIInfo(CodeGen::CodeGenTypes &CGT) : ABIInfo(CGT), IsMingw64(getTarget().getTriple().isWindowsGNUEnvironment()) {} void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; bool isHomogeneousAggregateBaseType(QualType Ty) const override { // FIXME: Assumes vectorcall is in use. return isX86VectorTypeForVectorCall(getContext(), Ty); } bool isHomogeneousAggregateSmallEnough(const Type *Ty, uint64_t NumMembers) const override { // FIXME: Assumes vectorcall is in use. return isX86VectorCallAggregateSmallEnough(NumMembers); } private: ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, bool IsReturnType) const; bool IsMingw64; }; class X86_64TargetCodeGenInfo : public TargetCodeGenInfo { public: X86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) : TargetCodeGenInfo(new X86_64ABIInfo(CGT, AVXLevel)) {} const X86_64ABIInfo &getABIInfo() const { return static_cast(TargetCodeGenInfo::getABIInfo()); } int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { return 7; } bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const override { llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8); // 0-15 are the 16 integer registers. // 16 is %rip. AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 16); return false; } llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF, StringRef Constraint, llvm::Type* Ty) const override { return X86AdjustInlineAsmType(CGF, Constraint, Ty); } bool isNoProtoCallVariadic(const CallArgList &args, const FunctionNoProtoType *fnType) const override { // The default CC on x86-64 sets %al to the number of SSA // registers used, and GCC sets this when calling an unprototyped // function, so we override the default behavior. However, don't do // that when AVX types are involved: the ABI explicitly states it is // undefined, and it doesn't work in practice because of how the ABI // defines varargs anyway. if (fnType->getCallConv() == CC_C) { bool HasAVXType = false; for (CallArgList::const_iterator it = args.begin(), ie = args.end(); it != ie; ++it) { if (getABIInfo().isPassedUsingAVXType(it->Ty)) { HasAVXType = true; break; } } if (!HasAVXType) return true; } return TargetCodeGenInfo::isNoProtoCallVariadic(args, fnType); } llvm::Constant * getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const override { unsigned Sig; if (getABIInfo().has64BitPointers()) Sig = (0xeb << 0) | // jmp rel8 (0x0a << 8) | // .+0x0c ('F' << 16) | ('T' << 24); else Sig = (0xeb << 0) | // jmp rel8 (0x06 << 8) | // .+0x08 ('F' << 16) | ('T' << 24); return llvm::ConstantInt::get(CGM.Int32Ty, Sig); } void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override { if (const FunctionDecl *FD = dyn_cast_or_null(D)) { if (FD->hasAttr()) { llvm::Function *Fn = cast(GV); Fn->setCallingConv(llvm::CallingConv::X86_INTR); } } } }; class PS4TargetCodeGenInfo : public X86_64TargetCodeGenInfo { public: PS4TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) : X86_64TargetCodeGenInfo(CGT, AVXLevel) {} void getDependentLibraryOption(llvm::StringRef Lib, llvm::SmallString<24> &Opt) const override { Opt = "\01"; // If the argument contains a space, enclose it in quotes. if (Lib.find(" ") != StringRef::npos) Opt += "\"" + Lib.str() + "\""; else Opt += Lib; } }; static std::string qualifyWindowsLibrary(llvm::StringRef Lib) { // If the argument does not end in .lib, automatically add the suffix. // If the argument contains a space, enclose it in quotes. // This matches the behavior of MSVC. bool Quote = (Lib.find(" ") != StringRef::npos); std::string ArgStr = Quote ? "\"" : ""; ArgStr += Lib; if (!Lib.endswith_lower(".lib")) ArgStr += ".lib"; ArgStr += Quote ? "\"" : ""; return ArgStr; } class WinX86_32TargetCodeGenInfo : public X86_32TargetCodeGenInfo { public: WinX86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI, unsigned NumRegisterParameters) : X86_32TargetCodeGenInfo(CGT, DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI, NumRegisterParameters, false) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override; void getDependentLibraryOption(llvm::StringRef Lib, llvm::SmallString<24> &Opt) const override { Opt = "/DEFAULTLIB:"; Opt += qualifyWindowsLibrary(Lib); } void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value, llvm::SmallString<32> &Opt) const override { Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; } }; static void addStackProbeSizeTargetAttribute(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) { if (D && isa(D)) { if (CGM.getCodeGenOpts().StackProbeSize != 4096) { llvm::Function *Fn = cast(GV); Fn->addFnAttr("stack-probe-size", llvm::utostr(CGM.getCodeGenOpts().StackProbeSize)); } } } void WinX86_32TargetCodeGenInfo::setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { X86_32TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); addStackProbeSizeTargetAttribute(D, GV, CGM); } class WinX86_64TargetCodeGenInfo : public TargetCodeGenInfo { public: WinX86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) : TargetCodeGenInfo(new WinX86_64ABIInfo(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override; int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { return 7; } bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const override { llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8); // 0-15 are the 16 integer registers. // 16 is %rip. AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 16); return false; } void getDependentLibraryOption(llvm::StringRef Lib, llvm::SmallString<24> &Opt) const override { Opt = "/DEFAULTLIB:"; Opt += qualifyWindowsLibrary(Lib); } void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value, llvm::SmallString<32> &Opt) const override { Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; } }; void WinX86_64TargetCodeGenInfo::setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); if (const FunctionDecl *FD = dyn_cast_or_null(D)) { if (FD->hasAttr()) { llvm::Function *Fn = cast(GV); Fn->setCallingConv(llvm::CallingConv::X86_INTR); } } addStackProbeSizeTargetAttribute(D, GV, CGM); } } void X86_64ABIInfo::postMerge(unsigned AggregateSize, Class &Lo, Class &Hi) const { // AMD64-ABI 3.2.3p2: Rule 5. Then a post merger cleanup is done: // // (a) If one of the classes is Memory, the whole argument is passed in // memory. // // (b) If X87UP is not preceded by X87, the whole argument is passed in // memory. // // (c) If the size of the aggregate exceeds two eightbytes and the first // eightbyte isn't SSE or any other eightbyte isn't SSEUP, the whole // argument is passed in memory. NOTE: This is necessary to keep the // ABI working for processors that don't support the __m256 type. // // (d) If SSEUP is not preceded by SSE or SSEUP, it is converted to SSE. // // Some of these are enforced by the merging logic. Others can arise // only with unions; for example: // union { _Complex double; unsigned; } // // Note that clauses (b) and (c) were added in 0.98. // if (Hi == Memory) Lo = Memory; if (Hi == X87Up && Lo != X87 && honorsRevision0_98()) Lo = Memory; if (AggregateSize > 128 && (Lo != SSE || Hi != SSEUp)) Lo = Memory; if (Hi == SSEUp && Lo != SSE) Hi = SSE; } X86_64ABIInfo::Class X86_64ABIInfo::merge(Class Accum, Class Field) { // AMD64-ABI 3.2.3p2: Rule 4. Each field of an object is // classified recursively so that always two fields are // considered. The resulting class is calculated according to // the classes of the fields in the eightbyte: // // (a) If both classes are equal, this is the resulting class. // // (b) If one of the classes is NO_CLASS, the resulting class is // the other class. // // (c) If one of the classes is MEMORY, the result is the MEMORY // class. // // (d) If one of the classes is INTEGER, the result is the // INTEGER. // // (e) If one of the classes is X87, X87UP, COMPLEX_X87 class, // MEMORY is used as class. // // (f) Otherwise class SSE is used. // Accum should never be memory (we should have returned) or // ComplexX87 (because this cannot be passed in a structure). assert((Accum != Memory && Accum != ComplexX87) && "Invalid accumulated classification during merge."); if (Accum == Field || Field == NoClass) return Accum; if (Field == Memory) return Memory; if (Accum == NoClass) return Field; if (Accum == Integer || Field == Integer) return Integer; if (Field == X87 || Field == X87Up || Field == ComplexX87 || Accum == X87 || Accum == X87Up) return Memory; return SSE; } void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Class &Lo, Class &Hi, bool isNamedArg) const { // FIXME: This code can be simplified by introducing a simple value class for // Class pairs with appropriate constructor methods for the various // situations. // FIXME: Some of the split computations are wrong; unaligned vectors // shouldn't be passed in registers for example, so there is no chance they // can straddle an eightbyte. Verify & simplify. Lo = Hi = NoClass; Class &Current = OffsetBase < 64 ? Lo : Hi; Current = Memory; if (const BuiltinType *BT = Ty->getAs()) { BuiltinType::Kind k = BT->getKind(); if (k == BuiltinType::Void) { Current = NoClass; } else if (k == BuiltinType::Int128 || k == BuiltinType::UInt128) { Lo = Integer; Hi = Integer; } else if (k >= BuiltinType::Bool && k <= BuiltinType::LongLong) { Current = Integer; } else if (k == BuiltinType::Float || k == BuiltinType::Double) { Current = SSE; } else if (k == BuiltinType::LongDouble) { const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); if (LDF == &llvm::APFloat::IEEEquad) { Lo = SSE; Hi = SSEUp; } else if (LDF == &llvm::APFloat::x87DoubleExtended) { Lo = X87; Hi = X87Up; } else if (LDF == &llvm::APFloat::IEEEdouble) { Current = SSE; } else llvm_unreachable("unexpected long double representation!"); } // FIXME: _Decimal32 and _Decimal64 are SSE. // FIXME: _float128 and _Decimal128 are (SSE, SSEUp). return; } if (const EnumType *ET = Ty->getAs()) { // Classify the underlying integer type. classify(ET->getDecl()->getIntegerType(), OffsetBase, Lo, Hi, isNamedArg); return; } if (Ty->hasPointerRepresentation()) { Current = Integer; return; } if (Ty->isMemberPointerType()) { if (Ty->isMemberFunctionPointerType()) { if (Has64BitPointers) { // If Has64BitPointers, this is an {i64, i64}, so classify both // Lo and Hi now. Lo = Hi = Integer; } else { // Otherwise, with 32-bit pointers, this is an {i32, i32}. If that // straddles an eightbyte boundary, Hi should be classified as well. uint64_t EB_FuncPtr = (OffsetBase) / 64; uint64_t EB_ThisAdj = (OffsetBase + 64 - 1) / 64; if (EB_FuncPtr != EB_ThisAdj) { Lo = Hi = Integer; } else { Current = Integer; } } } else { Current = Integer; } return; } if (const VectorType *VT = Ty->getAs()) { uint64_t Size = getContext().getTypeSize(VT); if (Size == 1 || Size == 8 || Size == 16 || Size == 32) { // gcc passes the following as integer: // 4 bytes - <4 x char>, <2 x short>, <1 x int>, <1 x float> // 2 bytes - <2 x char>, <1 x short> // 1 byte - <1 x char> Current = Integer; // If this type crosses an eightbyte boundary, it should be // split. uint64_t EB_Lo = (OffsetBase) / 64; uint64_t EB_Hi = (OffsetBase + Size - 1) / 64; if (EB_Lo != EB_Hi) Hi = Lo; } else if (Size == 64) { QualType ElementType = VT->getElementType(); // gcc passes <1 x double> in memory. :( if (ElementType->isSpecificBuiltinType(BuiltinType::Double)) return; // gcc passes <1 x long long> as SSE but clang used to unconditionally // pass them as integer. For platforms where clang is the de facto // platform compiler, we must continue to use integer. if (!classifyIntegerMMXAsSSE() && (ElementType->isSpecificBuiltinType(BuiltinType::LongLong) || ElementType->isSpecificBuiltinType(BuiltinType::ULongLong) || ElementType->isSpecificBuiltinType(BuiltinType::Long) || ElementType->isSpecificBuiltinType(BuiltinType::ULong))) Current = Integer; else Current = SSE; // If this type crosses an eightbyte boundary, it should be // split. if (OffsetBase && OffsetBase != 64) Hi = Lo; } else if (Size == 128 || (isNamedArg && Size <= getNativeVectorSizeForAVXABI(AVXLevel))) { // Arguments of 256-bits are split into four eightbyte chunks. The // least significant one belongs to class SSE and all the others to class // SSEUP. The original Lo and Hi design considers that types can't be // greater than 128-bits, so a 64-bit split in Hi and Lo makes sense. // This design isn't correct for 256-bits, but since there're no cases // where the upper parts would need to be inspected, avoid adding // complexity and just consider Hi to match the 64-256 part. // // Note that per 3.5.7 of AMD64-ABI, 256-bit args are only passed in // registers if they are "named", i.e. not part of the "..." of a // variadic function. // // Similarly, per 3.2.3. of the AVX512 draft, 512-bits ("named") args are // split into eight eightbyte chunks, one SSE and seven SSEUP. Lo = SSE; Hi = SSEUp; } return; } if (const ComplexType *CT = Ty->getAs()) { QualType ET = getContext().getCanonicalType(CT->getElementType()); uint64_t Size = getContext().getTypeSize(Ty); if (ET->isIntegralOrEnumerationType()) { if (Size <= 64) Current = Integer; else if (Size <= 128) Lo = Hi = Integer; } else if (ET == getContext().FloatTy) { Current = SSE; } else if (ET == getContext().DoubleTy) { Lo = Hi = SSE; } else if (ET == getContext().LongDoubleTy) { const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); if (LDF == &llvm::APFloat::IEEEquad) Current = Memory; else if (LDF == &llvm::APFloat::x87DoubleExtended) Current = ComplexX87; else if (LDF == &llvm::APFloat::IEEEdouble) Lo = Hi = SSE; else llvm_unreachable("unexpected long double representation!"); } // If this complex type crosses an eightbyte boundary then it // should be split. uint64_t EB_Real = (OffsetBase) / 64; uint64_t EB_Imag = (OffsetBase + getContext().getTypeSize(ET)) / 64; if (Hi == NoClass && EB_Real != EB_Imag) Hi = Lo; return; } if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) { // Arrays are treated like structures. uint64_t Size = getContext().getTypeSize(Ty); // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger // than four eightbytes, ..., it has class MEMORY. if (Size > 256) return; // AMD64-ABI 3.2.3p2: Rule 1. If ..., or it contains unaligned // fields, it has class MEMORY. // // Only need to check alignment of array base. if (OffsetBase % getContext().getTypeAlign(AT->getElementType())) return; // Otherwise implement simplified merge. We could be smarter about // this, but it isn't worth it and would be harder to verify. Current = NoClass; uint64_t EltSize = getContext().getTypeSize(AT->getElementType()); uint64_t ArraySize = AT->getSize().getZExtValue(); // The only case a 256-bit wide vector could be used is when the array // contains a single 256-bit element. Since Lo and Hi logic isn't extended // to work for sizes wider than 128, early check and fallback to memory. if (Size > 128 && EltSize != 256) return; for (uint64_t i=0, Offset=OffsetBase; igetElementType(), Offset, FieldLo, FieldHi, isNamedArg); Lo = merge(Lo, FieldLo); Hi = merge(Hi, FieldHi); if (Lo == Memory || Hi == Memory) break; } postMerge(Size, Lo, Hi); assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp array classification."); return; } if (const RecordType *RT = Ty->getAs()) { uint64_t Size = getContext().getTypeSize(Ty); // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger // than four eightbytes, ..., it has class MEMORY. if (Size > 256) return; // AMD64-ABI 3.2.3p2: Rule 2. If a C++ object has either a non-trivial // copy constructor or a non-trivial destructor, it is passed by invisible // reference. if (getRecordArgABI(RT, getCXXABI())) return; const RecordDecl *RD = RT->getDecl(); // Assume variable sized types are passed in memory. if (RD->hasFlexibleArrayMember()) return; const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); // Reset Lo class, this will be recomputed. Current = NoClass; // If this is a C++ record, classify the bases first. if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) { for (const auto &I : CXXRD->bases()) { assert(!I.isVirtual() && !I.getType()->isDependentType() && "Unexpected base class!"); const CXXRecordDecl *Base = cast(I.getType()->getAs()->getDecl()); // Classify this field. // // AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate exceeds a // single eightbyte, each is classified separately. Each eightbyte gets // initialized to class NO_CLASS. Class FieldLo, FieldHi; uint64_t Offset = OffsetBase + getContext().toBits(Layout.getBaseClassOffset(Base)); classify(I.getType(), Offset, FieldLo, FieldHi, isNamedArg); Lo = merge(Lo, FieldLo); Hi = merge(Hi, FieldHi); if (Lo == Memory || Hi == Memory) { postMerge(Size, Lo, Hi); return; } } } // Classify the fields one at a time, merging the results. unsigned idx = 0; for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); i != e; ++i, ++idx) { uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx); bool BitField = i->isBitField(); // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger than // four eightbytes, or it contains unaligned fields, it has class MEMORY. // // The only case a 256-bit wide vector could be used is when the struct // contains a single 256-bit element. Since Lo and Hi logic isn't extended // to work for sizes wider than 128, early check and fallback to memory. // if (Size > 128 && getContext().getTypeSize(i->getType()) != 256) { Lo = Memory; postMerge(Size, Lo, Hi); return; } // Note, skip this test for bit-fields, see below. if (!BitField && Offset % getContext().getTypeAlign(i->getType())) { Lo = Memory; postMerge(Size, Lo, Hi); return; } // Classify this field. // // AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate // exceeds a single eightbyte, each is classified // separately. Each eightbyte gets initialized to class // NO_CLASS. Class FieldLo, FieldHi; // Bit-fields require special handling, they do not force the // structure to be passed in memory even if unaligned, and // therefore they can straddle an eightbyte. if (BitField) { // Ignore padding bit-fields. if (i->isUnnamedBitfield()) continue; uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx); uint64_t Size = i->getBitWidthValue(getContext()); uint64_t EB_Lo = Offset / 64; uint64_t EB_Hi = (Offset + Size - 1) / 64; if (EB_Lo) { assert(EB_Hi == EB_Lo && "Invalid classification, type > 16 bytes."); FieldLo = NoClass; FieldHi = Integer; } else { FieldLo = Integer; FieldHi = EB_Hi ? Integer : NoClass; } } else classify(i->getType(), Offset, FieldLo, FieldHi, isNamedArg); Lo = merge(Lo, FieldLo); Hi = merge(Hi, FieldHi); if (Lo == Memory || Hi == Memory) break; } postMerge(Size, Lo, Hi); } } ABIArgInfo X86_64ABIInfo::getIndirectReturnResult(QualType Ty) const { // If this is a scalar LLVM value then assume LLVM will pass it in the right // place naturally. if (!isAggregateTypeForABI(Ty)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } return getNaturalAlignIndirect(Ty); } bool X86_64ABIInfo::IsIllegalVectorType(QualType Ty) const { if (const VectorType *VecTy = Ty->getAs()) { uint64_t Size = getContext().getTypeSize(VecTy); unsigned LargestVector = getNativeVectorSizeForAVXABI(AVXLevel); if (Size <= 64 || Size > LargestVector) return true; } return false; } ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty, unsigned freeIntRegs) const { // If this is a scalar LLVM value then assume LLVM will pass it in the right // place naturally. // // This assumption is optimistic, as there could be free registers available // when we need to pass this argument in memory, and LLVM could try to pass // the argument in the free register. This does not seem to happen currently, // but this code would be much safer if we could mark the argument with // 'onstack'. See PR12193. if (!isAggregateTypeForABI(Ty) && !IsIllegalVectorType(Ty)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); // Compute the byval alignment. We specify the alignment of the byval in all // cases so that the mid-level optimizer knows the alignment of the byval. unsigned Align = std::max(getContext().getTypeAlign(Ty) / 8, 8U); // Attempt to avoid passing indirect results using byval when possible. This // is important for good codegen. // // We do this by coercing the value into a scalar type which the backend can // handle naturally (i.e., without using byval). // // For simplicity, we currently only do this when we have exhausted all of the // free integer registers. Doing this when there are free integer registers // would require more care, as we would have to ensure that the coerced value // did not claim the unused register. That would require either reording the // arguments to the function (so that any subsequent inreg values came first), // or only doing this optimization when there were no following arguments that // might be inreg. // // We currently expect it to be rare (particularly in well written code) for // arguments to be passed on the stack when there are still free integer // registers available (this would typically imply large structs being passed // by value), so this seems like a fair tradeoff for now. // // We can revisit this if the backend grows support for 'onstack' parameter // attributes. See PR12193. if (freeIntRegs == 0) { uint64_t Size = getContext().getTypeSize(Ty); // If this type fits in an eightbyte, coerce it into the matching integral // type, which will end up on the stack (with alignment 8). if (Align == 8 && Size <= 64) return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size)); } return ABIArgInfo::getIndirect(CharUnits::fromQuantity(Align)); } /// The ABI specifies that a value should be passed in a full vector XMM/YMM /// register. Pick an LLVM IR type that will be passed as a vector register. llvm::Type *X86_64ABIInfo::GetByteVectorType(QualType Ty) const { // Wrapper structs/arrays that only contain vectors are passed just like // vectors; strip them off if present. if (const Type *InnerTy = isSingleElementStruct(Ty, getContext())) Ty = QualType(InnerTy, 0); llvm::Type *IRType = CGT.ConvertType(Ty); if (isa(IRType) || IRType->getTypeID() == llvm::Type::FP128TyID) return IRType; // We couldn't find the preferred IR vector type for 'Ty'. uint64_t Size = getContext().getTypeSize(Ty); assert((Size == 128 || Size == 256) && "Invalid type found!"); // Return a LLVM IR vector type based on the size of 'Ty'. return llvm::VectorType::get(llvm::Type::getDoubleTy(getVMContext()), Size / 64); } /// BitsContainNoUserData - Return true if the specified [start,end) bit range /// is known to either be off the end of the specified type or being in /// alignment padding. The user type specified is known to be at most 128 bits /// in size, and have passed through X86_64ABIInfo::classify with a successful /// classification that put one of the two halves in the INTEGER class. /// /// It is conservatively correct to return false. static bool BitsContainNoUserData(QualType Ty, unsigned StartBit, unsigned EndBit, ASTContext &Context) { // If the bytes being queried are off the end of the type, there is no user // data hiding here. This handles analysis of builtins, vectors and other // types that don't contain interesting padding. unsigned TySize = (unsigned)Context.getTypeSize(Ty); if (TySize <= StartBit) return true; if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) { unsigned EltSize = (unsigned)Context.getTypeSize(AT->getElementType()); unsigned NumElts = (unsigned)AT->getSize().getZExtValue(); // Check each element to see if the element overlaps with the queried range. for (unsigned i = 0; i != NumElts; ++i) { // If the element is after the span we care about, then we're done.. unsigned EltOffset = i*EltSize; if (EltOffset >= EndBit) break; unsigned EltStart = EltOffset < StartBit ? StartBit-EltOffset :0; if (!BitsContainNoUserData(AT->getElementType(), EltStart, EndBit-EltOffset, Context)) return false; } // If it overlaps no elements, then it is safe to process as padding. return true; } if (const RecordType *RT = Ty->getAs()) { const RecordDecl *RD = RT->getDecl(); const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); // If this is a C++ record, check the bases first. if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) { for (const auto &I : CXXRD->bases()) { assert(!I.isVirtual() && !I.getType()->isDependentType() && "Unexpected base class!"); const CXXRecordDecl *Base = cast(I.getType()->getAs()->getDecl()); // If the base is after the span we care about, ignore it. unsigned BaseOffset = Context.toBits(Layout.getBaseClassOffset(Base)); if (BaseOffset >= EndBit) continue; unsigned BaseStart = BaseOffset < StartBit ? StartBit-BaseOffset :0; if (!BitsContainNoUserData(I.getType(), BaseStart, EndBit-BaseOffset, Context)) return false; } } // Verify that no field has data that overlaps the region of interest. Yes // this could be sped up a lot by being smarter about queried fields, // however we're only looking at structs up to 16 bytes, so we don't care // much. unsigned idx = 0; for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); i != e; ++i, ++idx) { unsigned FieldOffset = (unsigned)Layout.getFieldOffset(idx); // If we found a field after the region we care about, then we're done. if (FieldOffset >= EndBit) break; unsigned FieldStart = FieldOffset < StartBit ? StartBit-FieldOffset :0; if (!BitsContainNoUserData(i->getType(), FieldStart, EndBit-FieldOffset, Context)) return false; } // If nothing in this record overlapped the area of interest, then we're // clean. return true; } return false; } /// ContainsFloatAtOffset - Return true if the specified LLVM IR type has a /// float member at the specified offset. For example, {int,{float}} has a /// float at offset 4. It is conservatively correct for this routine to return /// false. static bool ContainsFloatAtOffset(llvm::Type *IRType, unsigned IROffset, const llvm::DataLayout &TD) { // Base case if we find a float. if (IROffset == 0 && IRType->isFloatTy()) return true; // If this is a struct, recurse into the field at the specified offset. if (llvm::StructType *STy = dyn_cast(IRType)) { const llvm::StructLayout *SL = TD.getStructLayout(STy); unsigned Elt = SL->getElementContainingOffset(IROffset); IROffset -= SL->getElementOffset(Elt); return ContainsFloatAtOffset(STy->getElementType(Elt), IROffset, TD); } // If this is an array, recurse into the field at the specified offset. if (llvm::ArrayType *ATy = dyn_cast(IRType)) { llvm::Type *EltTy = ATy->getElementType(); unsigned EltSize = TD.getTypeAllocSize(EltTy); IROffset -= IROffset/EltSize*EltSize; return ContainsFloatAtOffset(EltTy, IROffset, TD); } return false; } /// GetSSETypeAtOffset - Return a type that will be passed by the backend in the /// low 8 bytes of an XMM register, corresponding to the SSE class. llvm::Type *X86_64ABIInfo:: GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset, QualType SourceTy, unsigned SourceOffset) const { // The only three choices we have are either double, <2 x float>, or float. We // pass as float if the last 4 bytes is just padding. This happens for // structs that contain 3 floats. if (BitsContainNoUserData(SourceTy, SourceOffset*8+32, SourceOffset*8+64, getContext())) return llvm::Type::getFloatTy(getVMContext()); // We want to pass as <2 x float> if the LLVM IR type contains a float at // offset+0 and offset+4. Walk the LLVM IR type to find out if this is the // case. if (ContainsFloatAtOffset(IRType, IROffset, getDataLayout()) && ContainsFloatAtOffset(IRType, IROffset+4, getDataLayout())) return llvm::VectorType::get(llvm::Type::getFloatTy(getVMContext()), 2); return llvm::Type::getDoubleTy(getVMContext()); } /// GetINTEGERTypeAtOffset - The ABI specifies that a value should be passed in /// an 8-byte GPR. This means that we either have a scalar or we are talking /// about the high or low part of an up-to-16-byte struct. This routine picks /// the best LLVM IR type to represent this, which may be i64 or may be anything /// else that the backend will pass in a GPR that works better (e.g. i8, %foo*, /// etc). /// /// PrefType is an LLVM IR type that corresponds to (part of) the IR type for /// the source type. IROffset is an offset in bytes into the LLVM IR type that /// the 8-byte value references. PrefType may be null. /// /// SourceTy is the source-level type for the entire argument. SourceOffset is /// an offset into this that we're processing (which is always either 0 or 8). /// llvm::Type *X86_64ABIInfo:: GetINTEGERTypeAtOffset(llvm::Type *IRType, unsigned IROffset, QualType SourceTy, unsigned SourceOffset) const { // If we're dealing with an un-offset LLVM IR type, then it means that we're // returning an 8-byte unit starting with it. See if we can safely use it. if (IROffset == 0) { // Pointers and int64's always fill the 8-byte unit. if ((isa(IRType) && Has64BitPointers) || IRType->isIntegerTy(64)) return IRType; // If we have a 1/2/4-byte integer, we can use it only if the rest of the // goodness in the source type is just tail padding. This is allowed to // kick in for struct {double,int} on the int, but not on // struct{double,int,int} because we wouldn't return the second int. We // have to do this analysis on the source type because we can't depend on // unions being lowered a specific way etc. if (IRType->isIntegerTy(8) || IRType->isIntegerTy(16) || IRType->isIntegerTy(32) || (isa(IRType) && !Has64BitPointers)) { unsigned BitWidth = isa(IRType) ? 32 : cast(IRType)->getBitWidth(); if (BitsContainNoUserData(SourceTy, SourceOffset*8+BitWidth, SourceOffset*8+64, getContext())) return IRType; } } if (llvm::StructType *STy = dyn_cast(IRType)) { // If this is a struct, recurse into the field at the specified offset. const llvm::StructLayout *SL = getDataLayout().getStructLayout(STy); if (IROffset < SL->getSizeInBytes()) { unsigned FieldIdx = SL->getElementContainingOffset(IROffset); IROffset -= SL->getElementOffset(FieldIdx); return GetINTEGERTypeAtOffset(STy->getElementType(FieldIdx), IROffset, SourceTy, SourceOffset); } } if (llvm::ArrayType *ATy = dyn_cast(IRType)) { llvm::Type *EltTy = ATy->getElementType(); unsigned EltSize = getDataLayout().getTypeAllocSize(EltTy); unsigned EltOffset = IROffset/EltSize*EltSize; return GetINTEGERTypeAtOffset(EltTy, IROffset-EltOffset, SourceTy, SourceOffset); } // Okay, we don't have any better idea of what to pass, so we pass this in an // integer register that isn't too big to fit the rest of the struct. unsigned TySizeInBytes = (unsigned)getContext().getTypeSizeInChars(SourceTy).getQuantity(); assert(TySizeInBytes != SourceOffset && "Empty field?"); // It is always safe to classify this as an integer type up to i64 that // isn't larger than the structure. return llvm::IntegerType::get(getVMContext(), std::min(TySizeInBytes-SourceOffset, 8U)*8); } /// GetX86_64ByValArgumentPair - Given a high and low type that can ideally /// be used as elements of a two register pair to pass or return, return a /// first class aggregate to represent them. For example, if the low part of /// a by-value argument should be passed as i32* and the high part as float, /// return {i32*, float}. static llvm::Type * GetX86_64ByValArgumentPair(llvm::Type *Lo, llvm::Type *Hi, const llvm::DataLayout &TD) { // In order to correctly satisfy the ABI, we need to the high part to start // at offset 8. If the high and low parts we inferred are both 4-byte types // (e.g. i32 and i32) then the resultant struct type ({i32,i32}) won't have // the second element at offset 8. Check for this: unsigned LoSize = (unsigned)TD.getTypeAllocSize(Lo); unsigned HiAlign = TD.getABITypeAlignment(Hi); unsigned HiStart = llvm::alignTo(LoSize, HiAlign); assert(HiStart != 0 && HiStart <= 8 && "Invalid x86-64 argument pair!"); // To handle this, we have to increase the size of the low part so that the // second element will start at an 8 byte offset. We can't increase the size // of the second element because it might make us access off the end of the // struct. if (HiStart != 8) { // There are usually two sorts of types the ABI generation code can produce // for the low part of a pair that aren't 8 bytes in size: float or // i8/i16/i32. This can also include pointers when they are 32-bit (X32 and // NaCl). // Promote these to a larger type. if (Lo->isFloatTy()) Lo = llvm::Type::getDoubleTy(Lo->getContext()); else { assert((Lo->isIntegerTy() || Lo->isPointerTy()) && "Invalid/unknown lo type"); Lo = llvm::Type::getInt64Ty(Lo->getContext()); } } llvm::StructType *Result = llvm::StructType::get(Lo, Hi, nullptr); // Verify that the second element is at an 8-byte offset. assert(TD.getStructLayout(Result)->getElementOffset(1) == 8 && "Invalid x86-64 argument pair!"); return Result; } ABIArgInfo X86_64ABIInfo:: classifyReturnType(QualType RetTy) const { // AMD64-ABI 3.2.3p4: Rule 1. Classify the return type with the // classification algorithm. X86_64ABIInfo::Class Lo, Hi; classify(RetTy, 0, Lo, Hi, /*isNamedArg*/ true); // Check some invariants. assert((Hi != Memory || Lo == Memory) && "Invalid memory classification."); assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification."); llvm::Type *ResType = nullptr; switch (Lo) { case NoClass: if (Hi == NoClass) return ABIArgInfo::getIgnore(); // If the low part is just padding, it takes no register, leave ResType // null. assert((Hi == SSE || Hi == Integer || Hi == X87Up) && "Unknown missing lo part"); break; case SSEUp: case X87Up: llvm_unreachable("Invalid classification for lo word."); // AMD64-ABI 3.2.3p4: Rule 2. Types of class memory are returned via // hidden argument. case Memory: return getIndirectReturnResult(RetTy); // AMD64-ABI 3.2.3p4: Rule 3. If the class is INTEGER, the next // available register of the sequence %rax, %rdx is used. case Integer: ResType = GetINTEGERTypeAtOffset(CGT.ConvertType(RetTy), 0, RetTy, 0); // If we have a sign or zero extended integer, make sure to return Extend // so that the parameter gets the right LLVM IR attributes. if (Hi == NoClass && isa(ResType)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = RetTy->getAs()) RetTy = EnumTy->getDecl()->getIntegerType(); if (RetTy->isIntegralOrEnumerationType() && RetTy->isPromotableIntegerType()) return ABIArgInfo::getExtend(); } break; // AMD64-ABI 3.2.3p4: Rule 4. If the class is SSE, the next // available SSE register of the sequence %xmm0, %xmm1 is used. case SSE: ResType = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 0, RetTy, 0); break; // AMD64-ABI 3.2.3p4: Rule 6. If the class is X87, the value is // returned on the X87 stack in %st0 as 80-bit x87 number. case X87: ResType = llvm::Type::getX86_FP80Ty(getVMContext()); break; // AMD64-ABI 3.2.3p4: Rule 8. If the class is COMPLEX_X87, the real // part of the value is returned in %st0 and the imaginary part in // %st1. case ComplexX87: assert(Hi == ComplexX87 && "Unexpected ComplexX87 classification."); ResType = llvm::StructType::get(llvm::Type::getX86_FP80Ty(getVMContext()), llvm::Type::getX86_FP80Ty(getVMContext()), nullptr); break; } llvm::Type *HighPart = nullptr; switch (Hi) { // Memory was handled previously and X87 should // never occur as a hi class. case Memory: case X87: llvm_unreachable("Invalid classification for hi word."); case ComplexX87: // Previously handled. case NoClass: break; case Integer: HighPart = GetINTEGERTypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8); if (Lo == NoClass) // Return HighPart at offset 8 in memory. return ABIArgInfo::getDirect(HighPart, 8); break; case SSE: HighPart = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8); if (Lo == NoClass) // Return HighPart at offset 8 in memory. return ABIArgInfo::getDirect(HighPart, 8); break; // AMD64-ABI 3.2.3p4: Rule 5. If the class is SSEUP, the eightbyte // is passed in the next available eightbyte chunk if the last used // vector register. // // SSEUP should always be preceded by SSE, just widen. case SSEUp: assert(Lo == SSE && "Unexpected SSEUp classification."); ResType = GetByteVectorType(RetTy); break; // AMD64-ABI 3.2.3p4: Rule 7. If the class is X87UP, the value is // returned together with the previous X87 value in %st0. case X87Up: // If X87Up is preceded by X87, we don't need to do // anything. However, in some cases with unions it may not be // preceded by X87. In such situations we follow gcc and pass the // extra bits in an SSE reg. if (Lo != X87) { HighPart = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8); if (Lo == NoClass) // Return HighPart at offset 8 in memory. return ABIArgInfo::getDirect(HighPart, 8); } break; } // If a high part was specified, merge it together with the low part. It is // known to pass in the high eightbyte of the result. We do this by forming a // first class struct aggregate with the high and low part: {low, high} if (HighPart) ResType = GetX86_64ByValArgumentPair(ResType, HighPart, getDataLayout()); return ABIArgInfo::getDirect(ResType); } ABIArgInfo X86_64ABIInfo::classifyArgumentType( QualType Ty, unsigned freeIntRegs, unsigned &neededInt, unsigned &neededSSE, bool isNamedArg) const { Ty = useFirstFieldIfTransparentUnion(Ty); X86_64ABIInfo::Class Lo, Hi; classify(Ty, 0, Lo, Hi, isNamedArg); // Check some invariants. // FIXME: Enforce these by construction. assert((Hi != Memory || Lo == Memory) && "Invalid memory classification."); assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification."); neededInt = 0; neededSSE = 0; llvm::Type *ResType = nullptr; switch (Lo) { case NoClass: if (Hi == NoClass) return ABIArgInfo::getIgnore(); // If the low part is just padding, it takes no register, leave ResType // null. assert((Hi == SSE || Hi == Integer || Hi == X87Up) && "Unknown missing lo part"); break; // AMD64-ABI 3.2.3p3: Rule 1. If the class is MEMORY, pass the argument // on the stack. case Memory: // AMD64-ABI 3.2.3p3: Rule 5. If the class is X87, X87UP or // COMPLEX_X87, it is passed in memory. case X87: case ComplexX87: if (getRecordArgABI(Ty, getCXXABI()) == CGCXXABI::RAA_Indirect) ++neededInt; return getIndirectResult(Ty, freeIntRegs); case SSEUp: case X87Up: llvm_unreachable("Invalid classification for lo word."); // AMD64-ABI 3.2.3p3: Rule 2. If the class is INTEGER, the next // available register of the sequence %rdi, %rsi, %rdx, %rcx, %r8 // and %r9 is used. case Integer: ++neededInt; // Pick an 8-byte type based on the preferred type. ResType = GetINTEGERTypeAtOffset(CGT.ConvertType(Ty), 0, Ty, 0); // If we have a sign or zero extended integer, make sure to return Extend // so that the parameter gets the right LLVM IR attributes. if (Hi == NoClass && isa(ResType)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); if (Ty->isIntegralOrEnumerationType() && Ty->isPromotableIntegerType()) return ABIArgInfo::getExtend(); } break; // AMD64-ABI 3.2.3p3: Rule 3. If the class is SSE, the next // available SSE register is used, the registers are taken in the // order from %xmm0 to %xmm7. case SSE: { llvm::Type *IRType = CGT.ConvertType(Ty); ResType = GetSSETypeAtOffset(IRType, 0, Ty, 0); ++neededSSE; break; } } llvm::Type *HighPart = nullptr; switch (Hi) { // Memory was handled previously, ComplexX87 and X87 should // never occur as hi classes, and X87Up must be preceded by X87, // which is passed in memory. case Memory: case X87: case ComplexX87: llvm_unreachable("Invalid classification for hi word."); case NoClass: break; case Integer: ++neededInt; // Pick an 8-byte type based on the preferred type. HighPart = GetINTEGERTypeAtOffset(CGT.ConvertType(Ty), 8, Ty, 8); if (Lo == NoClass) // Pass HighPart at offset 8 in memory. return ABIArgInfo::getDirect(HighPart, 8); break; // X87Up generally doesn't occur here (long double is passed in // memory), except in situations involving unions. case X87Up: case SSE: HighPart = GetSSETypeAtOffset(CGT.ConvertType(Ty), 8, Ty, 8); if (Lo == NoClass) // Pass HighPart at offset 8 in memory. return ABIArgInfo::getDirect(HighPart, 8); ++neededSSE; break; // AMD64-ABI 3.2.3p3: Rule 4. If the class is SSEUP, the // eightbyte is passed in the upper half of the last used SSE // register. This only happens when 128-bit vectors are passed. case SSEUp: assert(Lo == SSE && "Unexpected SSEUp classification"); ResType = GetByteVectorType(Ty); break; } // If a high part was specified, merge it together with the low part. It is // known to pass in the high eightbyte of the result. We do this by forming a // first class struct aggregate with the high and low part: {low, high} if (HighPart) ResType = GetX86_64ByValArgumentPair(ResType, HighPart, getDataLayout()); return ABIArgInfo::getDirect(ResType); } void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); // Keep track of the number of assigned registers. unsigned freeIntRegs = 6, freeSSERegs = 8; // If the return value is indirect, then the hidden argument is consuming one // integer register. if (FI.getReturnInfo().isIndirect()) --freeIntRegs; // The chain argument effectively gives us another free register. if (FI.isChainCall()) ++freeIntRegs; unsigned NumRequiredArgs = FI.getNumRequiredArgs(); // AMD64-ABI 3.2.3p3: Once arguments are classified, the registers // get assigned (in left-to-right order) for passing as follows... unsigned ArgNo = 0; for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end(); it != ie; ++it, ++ArgNo) { bool IsNamedArg = ArgNo < NumRequiredArgs; unsigned neededInt, neededSSE; it->info = classifyArgumentType(it->type, freeIntRegs, neededInt, neededSSE, IsNamedArg); // AMD64-ABI 3.2.3p3: If there are no registers available for any // eightbyte of an argument, the whole argument is passed on the // stack. If registers have already been assigned for some // eightbytes of such an argument, the assignments get reverted. if (freeIntRegs >= neededInt && freeSSERegs >= neededSSE) { freeIntRegs -= neededInt; freeSSERegs -= neededSSE; } else { it->info = getIndirectResult(it->type, freeIntRegs); } } } static Address EmitX86_64VAArgFromMemory(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) { Address overflow_arg_area_p = CGF.Builder.CreateStructGEP( VAListAddr, 2, CharUnits::fromQuantity(8), "overflow_arg_area_p"); llvm::Value *overflow_arg_area = CGF.Builder.CreateLoad(overflow_arg_area_p, "overflow_arg_area"); // AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16 // byte boundary if alignment needed by type exceeds 8 byte boundary. // It isn't stated explicitly in the standard, but in practice we use // alignment greater than 16 where necessary. CharUnits Align = CGF.getContext().getTypeAlignInChars(Ty); if (Align > CharUnits::fromQuantity(8)) { overflow_arg_area = emitRoundPointerUpToAlignment(CGF, overflow_arg_area, Align); } // AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area. llvm::Type *LTy = CGF.ConvertTypeForMem(Ty); llvm::Value *Res = CGF.Builder.CreateBitCast(overflow_arg_area, llvm::PointerType::getUnqual(LTy)); // AMD64-ABI 3.5.7p5: Step 9. Set l->overflow_arg_area to: // l->overflow_arg_area + sizeof(type). // AMD64-ABI 3.5.7p5: Step 10. Align l->overflow_arg_area upwards to // an 8 byte boundary. uint64_t SizeInBytes = (CGF.getContext().getTypeSize(Ty) + 7) / 8; llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, (SizeInBytes + 7) & ~7); overflow_arg_area = CGF.Builder.CreateGEP(overflow_arg_area, Offset, "overflow_arg_area.next"); CGF.Builder.CreateStore(overflow_arg_area, overflow_arg_area_p); // AMD64-ABI 3.5.7p5: Step 11. Return the fetched type. return Address(Res, Align); } Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { // Assume that va_list type is correct; should be pointer to LLVM type: // struct { // i32 gp_offset; // i32 fp_offset; // i8* overflow_arg_area; // i8* reg_save_area; // }; unsigned neededInt, neededSSE; Ty = getContext().getCanonicalType(Ty); ABIArgInfo AI = classifyArgumentType(Ty, 0, neededInt, neededSSE, /*isNamedArg*/false); // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed // in the registers. If not go to step 7. if (!neededInt && !neededSSE) return EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty); // AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of // general purpose registers needed to pass type and num_fp to hold // the number of floating point registers needed. // AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into // registers. In the case: l->gp_offset > 48 - num_gp * 8 or // l->fp_offset > 304 - num_fp * 16 go to step 7. // // NOTE: 304 is a typo, there are (6 * 8 + 8 * 16) = 176 bytes of // register save space). llvm::Value *InRegs = nullptr; Address gp_offset_p = Address::invalid(), fp_offset_p = Address::invalid(); llvm::Value *gp_offset = nullptr, *fp_offset = nullptr; if (neededInt) { gp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, CharUnits::Zero(), "gp_offset_p"); gp_offset = CGF.Builder.CreateLoad(gp_offset_p, "gp_offset"); InRegs = llvm::ConstantInt::get(CGF.Int32Ty, 48 - neededInt * 8); InRegs = CGF.Builder.CreateICmpULE(gp_offset, InRegs, "fits_in_gp"); } if (neededSSE) { fp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 1, CharUnits::fromQuantity(4), "fp_offset_p"); fp_offset = CGF.Builder.CreateLoad(fp_offset_p, "fp_offset"); llvm::Value *FitsInFP = llvm::ConstantInt::get(CGF.Int32Ty, 176 - neededSSE * 16); FitsInFP = CGF.Builder.CreateICmpULE(fp_offset, FitsInFP, "fits_in_fp"); InRegs = InRegs ? CGF.Builder.CreateAnd(InRegs, FitsInFP) : FitsInFP; } llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg"); llvm::BasicBlock *InMemBlock = CGF.createBasicBlock("vaarg.in_mem"); llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end"); CGF.Builder.CreateCondBr(InRegs, InRegBlock, InMemBlock); // Emit code to load the value if it was passed in registers. CGF.EmitBlock(InRegBlock); // AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with // an offset of l->gp_offset and/or l->fp_offset. This may require // copying to a temporary location in case the parameter is passed // in different register classes or requires an alignment greater // than 8 for general purpose registers and 16 for XMM registers. // // FIXME: This really results in shameful code when we end up needing to // collect arguments from different places; often what should result in a // simple assembling of a structure from scattered addresses has many more // loads than necessary. Can we clean this up? llvm::Type *LTy = CGF.ConvertTypeForMem(Ty); llvm::Value *RegSaveArea = CGF.Builder.CreateLoad( CGF.Builder.CreateStructGEP(VAListAddr, 3, CharUnits::fromQuantity(16)), "reg_save_area"); Address RegAddr = Address::invalid(); if (neededInt && neededSSE) { // FIXME: Cleanup. assert(AI.isDirect() && "Unexpected ABI info for mixed regs"); llvm::StructType *ST = cast(AI.getCoerceToType()); Address Tmp = CGF.CreateMemTemp(Ty); Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST); assert(ST->getNumElements() == 2 && "Unexpected ABI info for mixed regs"); llvm::Type *TyLo = ST->getElementType(0); llvm::Type *TyHi = ST->getElementType(1); assert((TyLo->isFPOrFPVectorTy() ^ TyHi->isFPOrFPVectorTy()) && "Unexpected ABI info for mixed regs"); llvm::Type *PTyLo = llvm::PointerType::getUnqual(TyLo); llvm::Type *PTyHi = llvm::PointerType::getUnqual(TyHi); llvm::Value *GPAddr = CGF.Builder.CreateGEP(RegSaveArea, gp_offset); llvm::Value *FPAddr = CGF.Builder.CreateGEP(RegSaveArea, fp_offset); llvm::Value *RegLoAddr = TyLo->isFPOrFPVectorTy() ? FPAddr : GPAddr; llvm::Value *RegHiAddr = TyLo->isFPOrFPVectorTy() ? GPAddr : FPAddr; // Copy the first element. llvm::Value *V = CGF.Builder.CreateDefaultAlignedLoad( CGF.Builder.CreateBitCast(RegLoAddr, PTyLo)); CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0, CharUnits::Zero())); // Copy the second element. V = CGF.Builder.CreateDefaultAlignedLoad( CGF.Builder.CreateBitCast(RegHiAddr, PTyHi)); CharUnits Offset = CharUnits::fromQuantity( getDataLayout().getStructLayout(ST)->getElementOffset(1)); CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1, Offset)); RegAddr = CGF.Builder.CreateElementBitCast(Tmp, LTy); } else if (neededInt) { RegAddr = Address(CGF.Builder.CreateGEP(RegSaveArea, gp_offset), CharUnits::fromQuantity(8)); RegAddr = CGF.Builder.CreateElementBitCast(RegAddr, LTy); // Copy to a temporary if necessary to ensure the appropriate alignment. std::pair SizeAlign = getContext().getTypeInfoInChars(Ty); uint64_t TySize = SizeAlign.first.getQuantity(); CharUnits TyAlign = SizeAlign.second; // Copy into a temporary if the type is more aligned than the // register save area. if (TyAlign.getQuantity() > 8) { Address Tmp = CGF.CreateMemTemp(Ty); CGF.Builder.CreateMemCpy(Tmp, RegAddr, TySize, false); RegAddr = Tmp; } } else if (neededSSE == 1) { RegAddr = Address(CGF.Builder.CreateGEP(RegSaveArea, fp_offset), CharUnits::fromQuantity(16)); RegAddr = CGF.Builder.CreateElementBitCast(RegAddr, LTy); } else { assert(neededSSE == 2 && "Invalid number of needed registers!"); // SSE registers are spaced 16 bytes apart in the register save // area, we need to collect the two eightbytes together. // The ABI isn't explicit about this, but it seems reasonable // to assume that the slots are 16-byte aligned, since the stack is // naturally 16-byte aligned and the prologue is expected to store // all the SSE registers to the RSA. Address RegAddrLo = Address(CGF.Builder.CreateGEP(RegSaveArea, fp_offset), CharUnits::fromQuantity(16)); Address RegAddrHi = CGF.Builder.CreateConstInBoundsByteGEP(RegAddrLo, CharUnits::fromQuantity(16)); llvm::Type *DoubleTy = CGF.DoubleTy; llvm::StructType *ST = llvm::StructType::get(DoubleTy, DoubleTy, nullptr); llvm::Value *V; Address Tmp = CGF.CreateMemTemp(Ty); Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST); V = CGF.Builder.CreateLoad( CGF.Builder.CreateElementBitCast(RegAddrLo, DoubleTy)); CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0, CharUnits::Zero())); V = CGF.Builder.CreateLoad( CGF.Builder.CreateElementBitCast(RegAddrHi, DoubleTy)); CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1, CharUnits::fromQuantity(8))); RegAddr = CGF.Builder.CreateElementBitCast(Tmp, LTy); } // AMD64-ABI 3.5.7p5: Step 5. Set: // l->gp_offset = l->gp_offset + num_gp * 8 // l->fp_offset = l->fp_offset + num_fp * 16. if (neededInt) { llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, neededInt * 8); CGF.Builder.CreateStore(CGF.Builder.CreateAdd(gp_offset, Offset), gp_offset_p); } if (neededSSE) { llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, neededSSE * 16); CGF.Builder.CreateStore(CGF.Builder.CreateAdd(fp_offset, Offset), fp_offset_p); } CGF.EmitBranch(ContBlock); // Emit code to load the value if it was passed in memory. CGF.EmitBlock(InMemBlock); Address MemAddr = EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty); // Return the appropriate result. CGF.EmitBlock(ContBlock); Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, MemAddr, InMemBlock, "vaarg.addr"); return ResAddr; } Address X86_64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false, CGF.getContext().getTypeInfoInChars(Ty), CharUnits::fromQuantity(8), /*allowHigherAlign*/ false); } ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, bool IsReturnType) const { if (Ty->isVoidType()) return ABIArgInfo::getIgnore(); if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); TypeInfo Info = getContext().getTypeInfo(Ty); uint64_t Width = Info.Width; CharUnits Align = getContext().toCharUnitsFromBits(Info.Align); const RecordType *RT = Ty->getAs(); if (RT) { if (!IsReturnType) { if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI())) return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); } if (RT->getDecl()->hasFlexibleArrayMember()) return getNaturalAlignIndirect(Ty, /*ByVal=*/false); } // vectorcall adds the concept of a homogenous vector aggregate, similar to // other targets. const Type *Base = nullptr; uint64_t NumElts = 0; if (FreeSSERegs && isHomogeneousAggregate(Ty, Base, NumElts)) { if (FreeSSERegs >= NumElts) { FreeSSERegs -= NumElts; if (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType()) return ABIArgInfo::getDirect(); return ABIArgInfo::getExpand(); } return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); } if (Ty->isMemberPointerType()) { // If the member pointer is represented by an LLVM int or ptr, pass it // directly. llvm::Type *LLTy = CGT.ConvertType(Ty); if (LLTy->isPointerTy() || LLTy->isIntegerTy()) return ABIArgInfo::getDirect(); } if (RT || Ty->isAnyComplexType() || Ty->isMemberPointerType()) { // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is // not 1, 2, 4, or 8 bytes, must be passed by reference." if (Width > 64 || !llvm::isPowerOf2_64(Width)) return getNaturalAlignIndirect(Ty, /*ByVal=*/false); // Otherwise, coerce it to a small integer. return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Width)); } // Bool type is always extended to the ABI, other builtin types are not // extended. const BuiltinType *BT = Ty->getAs(); if (BT && BT->getKind() == BuiltinType::Bool) return ABIArgInfo::getExtend(); // Mingw64 GCC uses the old 80 bit extended precision floating point unit. It // passes them indirectly through memory. if (IsMingw64 && BT && BT->getKind() == BuiltinType::LongDouble) { const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); if (LDF == &llvm::APFloat::x87DoubleExtended) return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); } return ABIArgInfo::getDirect(); } void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { bool IsVectorCall = FI.getCallingConvention() == llvm::CallingConv::X86_VectorCall; // We can use up to 4 SSE return registers with vectorcall. unsigned FreeSSERegs = IsVectorCall ? 4 : 0; if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classify(FI.getReturnType(), FreeSSERegs, true); // We can use up to 6 SSE register parameters with vectorcall. FreeSSERegs = IsVectorCall ? 6 : 0; for (auto &I : FI.arguments()) I.info = classify(I.type, FreeSSERegs, false); } Address WinX86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false, CGF.getContext().getTypeInfoInChars(Ty), CharUnits::fromQuantity(8), /*allowHigherAlign*/ false); } // PowerPC-32 namespace { /// PPC32_SVR4_ABIInfo - The 32-bit PowerPC ELF (SVR4) ABI information. class PPC32_SVR4_ABIInfo : public DefaultABIInfo { bool IsSoftFloatABI; public: PPC32_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, bool SoftFloatABI) : DefaultABIInfo(CGT), IsSoftFloatABI(SoftFloatABI) {} Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; }; class PPC32TargetCodeGenInfo : public TargetCodeGenInfo { public: PPC32TargetCodeGenInfo(CodeGenTypes &CGT, bool SoftFloatABI) : TargetCodeGenInfo(new PPC32_SVR4_ABIInfo(CGT, SoftFloatABI)) {} int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { // This is recovered from gcc output. return 1; // r1 is the dedicated stack pointer } bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const override; }; } // TODO: this implementation is now likely redundant with // DefaultABIInfo::EmitVAArg. Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList, QualType Ty) const { const unsigned OverflowLimit = 8; if (const ComplexType *CTy = Ty->getAs()) { // TODO: Implement this. For now ignore. (void)CTy; return Address::invalid(); // FIXME? } // struct __va_list_tag { // unsigned char gpr; // unsigned char fpr; // unsigned short reserved; // void *overflow_arg_area; // void *reg_save_area; // }; bool isI64 = Ty->isIntegerType() && getContext().getTypeSize(Ty) == 64; bool isInt = Ty->isIntegerType() || Ty->isPointerType() || Ty->isAggregateType(); bool isF64 = Ty->isFloatingType() && getContext().getTypeSize(Ty) == 64; // All aggregates are passed indirectly? That doesn't seem consistent // with the argument-lowering code. bool isIndirect = Ty->isAggregateType(); CGBuilderTy &Builder = CGF.Builder; // The calling convention either uses 1-2 GPRs or 1 FPR. Address NumRegsAddr = Address::invalid(); if (isInt || IsSoftFloatABI) { NumRegsAddr = Builder.CreateStructGEP(VAList, 0, CharUnits::Zero(), "gpr"); } else { NumRegsAddr = Builder.CreateStructGEP(VAList, 1, CharUnits::One(), "fpr"); } llvm::Value *NumRegs = Builder.CreateLoad(NumRegsAddr, "numUsedRegs"); // "Align" the register count when TY is i64. if (isI64 || (isF64 && IsSoftFloatABI)) { NumRegs = Builder.CreateAdd(NumRegs, Builder.getInt8(1)); NumRegs = Builder.CreateAnd(NumRegs, Builder.getInt8((uint8_t) ~1U)); } llvm::Value *CC = Builder.CreateICmpULT(NumRegs, Builder.getInt8(OverflowLimit), "cond"); llvm::BasicBlock *UsingRegs = CGF.createBasicBlock("using_regs"); llvm::BasicBlock *UsingOverflow = CGF.createBasicBlock("using_overflow"); llvm::BasicBlock *Cont = CGF.createBasicBlock("cont"); Builder.CreateCondBr(CC, UsingRegs, UsingOverflow); llvm::Type *DirectTy = CGF.ConvertType(Ty); if (isIndirect) DirectTy = DirectTy->getPointerTo(0); // Case 1: consume registers. Address RegAddr = Address::invalid(); { CGF.EmitBlock(UsingRegs); Address RegSaveAreaPtr = Builder.CreateStructGEP(VAList, 4, CharUnits::fromQuantity(8)); RegAddr = Address(Builder.CreateLoad(RegSaveAreaPtr), CharUnits::fromQuantity(8)); assert(RegAddr.getElementType() == CGF.Int8Ty); // Floating-point registers start after the general-purpose registers. if (!(isInt || IsSoftFloatABI)) { RegAddr = Builder.CreateConstInBoundsByteGEP(RegAddr, CharUnits::fromQuantity(32)); } // Get the address of the saved value by scaling the number of // registers we've used by the number of CharUnits RegSize = CharUnits::fromQuantity((isInt || IsSoftFloatABI) ? 4 : 8); llvm::Value *RegOffset = Builder.CreateMul(NumRegs, Builder.getInt8(RegSize.getQuantity())); RegAddr = Address(Builder.CreateInBoundsGEP(CGF.Int8Ty, RegAddr.getPointer(), RegOffset), RegAddr.getAlignment().alignmentOfArrayElement(RegSize)); RegAddr = Builder.CreateElementBitCast(RegAddr, DirectTy); // Increase the used-register count. NumRegs = Builder.CreateAdd(NumRegs, Builder.getInt8((isI64 || (isF64 && IsSoftFloatABI)) ? 2 : 1)); Builder.CreateStore(NumRegs, NumRegsAddr); CGF.EmitBranch(Cont); } // Case 2: consume space in the overflow area. Address MemAddr = Address::invalid(); { CGF.EmitBlock(UsingOverflow); Builder.CreateStore(Builder.getInt8(OverflowLimit), NumRegsAddr); // Everything in the overflow area is rounded up to a size of at least 4. CharUnits OverflowAreaAlign = CharUnits::fromQuantity(4); CharUnits Size; if (!isIndirect) { auto TypeInfo = CGF.getContext().getTypeInfoInChars(Ty); Size = TypeInfo.first.alignTo(OverflowAreaAlign); } else { Size = CGF.getPointerSize(); } Address OverflowAreaAddr = Builder.CreateStructGEP(VAList, 3, CharUnits::fromQuantity(4)); Address OverflowArea(Builder.CreateLoad(OverflowAreaAddr, "argp.cur"), OverflowAreaAlign); // Round up address of argument to alignment CharUnits Align = CGF.getContext().getTypeAlignInChars(Ty); if (Align > OverflowAreaAlign) { llvm::Value *Ptr = OverflowArea.getPointer(); OverflowArea = Address(emitRoundPointerUpToAlignment(CGF, Ptr, Align), Align); } MemAddr = Builder.CreateElementBitCast(OverflowArea, DirectTy); // Increase the overflow area. OverflowArea = Builder.CreateConstInBoundsByteGEP(OverflowArea, Size); Builder.CreateStore(OverflowArea.getPointer(), OverflowAreaAddr); CGF.EmitBranch(Cont); } CGF.EmitBlock(Cont); // Merge the cases with a phi. Address Result = emitMergePHI(CGF, RegAddr, UsingRegs, MemAddr, UsingOverflow, "vaarg.addr"); // Load the pointer if the argument was passed indirectly. if (isIndirect) { Result = Address(Builder.CreateLoad(Result, "aggr"), getContext().getTypeAlignInChars(Ty)); } return Result; } bool PPC32TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const { // This is calculated from the LLVM and GCC tables and verified // against gcc output. AFAIK all ABIs use the same encoding. CodeGen::CGBuilderTy &Builder = CGF.Builder; llvm::IntegerType *i8 = CGF.Int8Ty; llvm::Value *Four8 = llvm::ConstantInt::get(i8, 4); llvm::Value *Eight8 = llvm::ConstantInt::get(i8, 8); llvm::Value *Sixteen8 = llvm::ConstantInt::get(i8, 16); // 0-31: r0-31, the 4-byte general-purpose registers AssignToArrayRange(Builder, Address, Four8, 0, 31); // 32-63: fp0-31, the 8-byte floating-point registers AssignToArrayRange(Builder, Address, Eight8, 32, 63); // 64-76 are various 4-byte special-purpose registers: // 64: mq // 65: lr // 66: ctr // 67: ap // 68-75 cr0-7 // 76: xer AssignToArrayRange(Builder, Address, Four8, 64, 76); // 77-108: v0-31, the 16-byte vector registers AssignToArrayRange(Builder, Address, Sixteen8, 77, 108); // 109: vrsave // 110: vscr // 111: spe_acc // 112: spefscr // 113: sfp AssignToArrayRange(Builder, Address, Four8, 109, 113); return false; } // PowerPC-64 namespace { /// PPC64_SVR4_ABIInfo - The 64-bit PowerPC ELF (SVR4) ABI information. class PPC64_SVR4_ABIInfo : public ABIInfo { public: enum ABIKind { ELFv1 = 0, ELFv2 }; private: static const unsigned GPRBits = 64; ABIKind Kind; bool HasQPX; + bool IsSoftFloatABI; // A vector of float or double will be promoted to <4 x f32> or <4 x f64> and // will be passed in a QPX register. bool IsQPXVectorTy(const Type *Ty) const { if (!HasQPX) return false; if (const VectorType *VT = Ty->getAs()) { unsigned NumElements = VT->getNumElements(); if (NumElements == 1) return false; if (VT->getElementType()->isSpecificBuiltinType(BuiltinType::Double)) { if (getContext().getTypeSize(Ty) <= 256) return true; } else if (VT->getElementType()-> isSpecificBuiltinType(BuiltinType::Float)) { if (getContext().getTypeSize(Ty) <= 128) return true; } } return false; } bool IsQPXVectorTy(QualType Ty) const { return IsQPXVectorTy(Ty.getTypePtr()); } public: - PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind, bool HasQPX) - : ABIInfo(CGT), Kind(Kind), HasQPX(HasQPX) {} + PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind, bool HasQPX, + bool SoftFloatABI) + : ABIInfo(CGT), Kind(Kind), HasQPX(HasQPX), + IsSoftFloatABI(SoftFloatABI) {} bool isPromotableTypeForABI(QualType Ty) const; CharUnits getParamTypeAlignment(QualType Ty) const; ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType Ty) const; bool isHomogeneousAggregateBaseType(QualType Ty) const override; bool isHomogeneousAggregateSmallEnough(const Type *Ty, uint64_t Members) const override; // TODO: We can add more logic to computeInfo to improve performance. // Example: For aggregate arguments that fit in a register, we could // use getDirectInReg (as is done below for structs containing a single // floating-point value) to avoid pushing them to memory on function // entry. This would require changing the logic in PPCISelLowering // when lowering the parameters in the caller and args in the callee. void computeInfo(CGFunctionInfo &FI) const override { if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &I : FI.arguments()) { // We rely on the default argument classification for the most part. // One exception: An aggregate containing a single floating-point // or vector item must be passed in a register if one is available. const Type *T = isSingleElementStruct(I.type, getContext()); if (T) { const BuiltinType *BT = T->getAs(); if (IsQPXVectorTy(T) || (T->isVectorType() && getContext().getTypeSize(T) == 128) || (BT && BT->isFloatingPoint())) { QualType QT(T, 0); I.info = ABIArgInfo::getDirectInReg(CGT.ConvertType(QT)); continue; } } I.info = classifyArgumentType(I.type); } } Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; }; class PPC64_SVR4_TargetCodeGenInfo : public TargetCodeGenInfo { public: PPC64_SVR4_TargetCodeGenInfo(CodeGenTypes &CGT, - PPC64_SVR4_ABIInfo::ABIKind Kind, bool HasQPX) - : TargetCodeGenInfo(new PPC64_SVR4_ABIInfo(CGT, Kind, HasQPX)) {} + PPC64_SVR4_ABIInfo::ABIKind Kind, bool HasQPX, + bool SoftFloatABI) + : TargetCodeGenInfo(new PPC64_SVR4_ABIInfo(CGT, Kind, HasQPX, + SoftFloatABI)) {} int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { // This is recovered from gcc output. return 1; // r1 is the dedicated stack pointer } bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const override; }; class PPC64TargetCodeGenInfo : public DefaultTargetCodeGenInfo { public: PPC64TargetCodeGenInfo(CodeGenTypes &CGT) : DefaultTargetCodeGenInfo(CGT) {} int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { // This is recovered from gcc output. return 1; // r1 is the dedicated stack pointer } bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const override; }; } // Return true if the ABI requires Ty to be passed sign- or zero- // extended to 64 bits. bool PPC64_SVR4_ABIInfo::isPromotableTypeForABI(QualType Ty) const { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); // Promotable integer types are required to be promoted by the ABI. if (Ty->isPromotableIntegerType()) return true; // In addition to the usual promotable integer types, we also need to // extend all 32-bit types, since the ABI requires promotion to 64 bits. if (const BuiltinType *BT = Ty->getAs()) switch (BT->getKind()) { case BuiltinType::Int: case BuiltinType::UInt: return true; default: break; } return false; } /// isAlignedParamType - Determine whether a type requires 16-byte or /// higher alignment in the parameter area. Always returns at least 8. CharUnits PPC64_SVR4_ABIInfo::getParamTypeAlignment(QualType Ty) const { // Complex types are passed just like their elements. if (const ComplexType *CTy = Ty->getAs()) Ty = CTy->getElementType(); // Only vector types of size 16 bytes need alignment (larger types are // passed via reference, smaller types are not aligned). if (IsQPXVectorTy(Ty)) { if (getContext().getTypeSize(Ty) > 128) return CharUnits::fromQuantity(32); return CharUnits::fromQuantity(16); } else if (Ty->isVectorType()) { return CharUnits::fromQuantity(getContext().getTypeSize(Ty) == 128 ? 16 : 8); } // For single-element float/vector structs, we consider the whole type // to have the same alignment requirements as its single element. const Type *AlignAsType = nullptr; const Type *EltType = isSingleElementStruct(Ty, getContext()); if (EltType) { const BuiltinType *BT = EltType->getAs(); if (IsQPXVectorTy(EltType) || (EltType->isVectorType() && getContext().getTypeSize(EltType) == 128) || (BT && BT->isFloatingPoint())) AlignAsType = EltType; } // Likewise for ELFv2 homogeneous aggregates. const Type *Base = nullptr; uint64_t Members = 0; if (!AlignAsType && Kind == ELFv2 && isAggregateTypeForABI(Ty) && isHomogeneousAggregate(Ty, Base, Members)) AlignAsType = Base; // With special case aggregates, only vector base types need alignment. if (AlignAsType && IsQPXVectorTy(AlignAsType)) { if (getContext().getTypeSize(AlignAsType) > 128) return CharUnits::fromQuantity(32); return CharUnits::fromQuantity(16); } else if (AlignAsType) { return CharUnits::fromQuantity(AlignAsType->isVectorType() ? 16 : 8); } // Otherwise, we only need alignment for any aggregate type that // has an alignment requirement of >= 16 bytes. if (isAggregateTypeForABI(Ty) && getContext().getTypeAlign(Ty) >= 128) { if (HasQPX && getContext().getTypeAlign(Ty) >= 256) return CharUnits::fromQuantity(32); return CharUnits::fromQuantity(16); } return CharUnits::fromQuantity(8); } /// isHomogeneousAggregate - Return true if a type is an ELFv2 homogeneous /// aggregate. Base is set to the base element type, and Members is set /// to the number of base elements. bool ABIInfo::isHomogeneousAggregate(QualType Ty, const Type *&Base, uint64_t &Members) const { if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) { uint64_t NElements = AT->getSize().getZExtValue(); if (NElements == 0) return false; if (!isHomogeneousAggregate(AT->getElementType(), Base, Members)) return false; Members *= NElements; } else if (const RecordType *RT = Ty->getAs()) { const RecordDecl *RD = RT->getDecl(); if (RD->hasFlexibleArrayMember()) return false; Members = 0; // If this is a C++ record, check the bases first. if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) { for (const auto &I : CXXRD->bases()) { // Ignore empty records. if (isEmptyRecord(getContext(), I.getType(), true)) continue; uint64_t FldMembers; if (!isHomogeneousAggregate(I.getType(), Base, FldMembers)) return false; Members += FldMembers; } } for (const auto *FD : RD->fields()) { // Ignore (non-zero arrays of) empty records. QualType FT = FD->getType(); while (const ConstantArrayType *AT = getContext().getAsConstantArrayType(FT)) { if (AT->getSize().getZExtValue() == 0) return false; FT = AT->getElementType(); } if (isEmptyRecord(getContext(), FT, true)) continue; // For compatibility with GCC, ignore empty bitfields in C++ mode. if (getContext().getLangOpts().CPlusPlus && FD->isBitField() && FD->getBitWidthValue(getContext()) == 0) continue; uint64_t FldMembers; if (!isHomogeneousAggregate(FD->getType(), Base, FldMembers)) return false; Members = (RD->isUnion() ? std::max(Members, FldMembers) : Members + FldMembers); } if (!Base) return false; // Ensure there is no padding. if (getContext().getTypeSize(Base) * Members != getContext().getTypeSize(Ty)) return false; } else { Members = 1; if (const ComplexType *CT = Ty->getAs()) { Members = 2; Ty = CT->getElementType(); } // Most ABIs only support float, double, and some vector type widths. if (!isHomogeneousAggregateBaseType(Ty)) return false; // The base type must be the same for all members. Types that // agree in both total size and mode (float vs. vector) are // treated as being equivalent here. const Type *TyPtr = Ty.getTypePtr(); if (!Base) { Base = TyPtr; // If it's a non-power-of-2 vector, its size is already a power-of-2, // so make sure to widen it explicitly. if (const VectorType *VT = Base->getAs()) { QualType EltTy = VT->getElementType(); unsigned NumElements = getContext().getTypeSize(VT) / getContext().getTypeSize(EltTy); Base = getContext() .getVectorType(EltTy, NumElements, VT->getVectorKind()) .getTypePtr(); } } if (Base->isVectorType() != TyPtr->isVectorType() || getContext().getTypeSize(Base) != getContext().getTypeSize(TyPtr)) return false; } return Members > 0 && isHomogeneousAggregateSmallEnough(Base, Members); } bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { // Homogeneous aggregates for ELFv2 must have base types of float, // double, long double, or 128-bit vectors. if (const BuiltinType *BT = Ty->getAs()) { if (BT->getKind() == BuiltinType::Float || BT->getKind() == BuiltinType::Double || - BT->getKind() == BuiltinType::LongDouble) + BT->getKind() == BuiltinType::LongDouble) { + if (IsSoftFloatABI) + return false; return true; + } } if (const VectorType *VT = Ty->getAs()) { if (getContext().getTypeSize(VT) == 128 || IsQPXVectorTy(Ty)) return true; } return false; } bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateSmallEnough( const Type *Base, uint64_t Members) const { // Vector types require one register, floating point types require one // or two registers depending on their size. uint32_t NumRegs = Base->isVectorType() ? 1 : (getContext().getTypeSize(Base) + 63) / 64; // Homogeneous Aggregates may occupy at most 8 registers. return Members * NumRegs <= 8; } ABIArgInfo PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const { Ty = useFirstFieldIfTransparentUnion(Ty); if (Ty->isAnyComplexType()) return ABIArgInfo::getDirect(); // Non-Altivec vector types are passed in GPRs (smaller than 16 bytes) // or via reference (larger than 16 bytes). if (Ty->isVectorType() && !IsQPXVectorTy(Ty)) { uint64_t Size = getContext().getTypeSize(Ty); if (Size > 128) return getNaturalAlignIndirect(Ty, /*ByVal=*/false); else if (Size < 128) { llvm::Type *CoerceTy = llvm::IntegerType::get(getVMContext(), Size); return ABIArgInfo::getDirect(CoerceTy); } } if (isAggregateTypeForABI(Ty)) { if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); uint64_t ABIAlign = getParamTypeAlignment(Ty).getQuantity(); uint64_t TyAlign = getContext().getTypeAlignInChars(Ty).getQuantity(); // ELFv2 homogeneous aggregates are passed as array types. const Type *Base = nullptr; uint64_t Members = 0; if (Kind == ELFv2 && isHomogeneousAggregate(Ty, Base, Members)) { llvm::Type *BaseTy = CGT.ConvertType(QualType(Base, 0)); llvm::Type *CoerceTy = llvm::ArrayType::get(BaseTy, Members); return ABIArgInfo::getDirect(CoerceTy); } // If an aggregate may end up fully in registers, we do not // use the ByVal method, but pass the aggregate as array. // This is usually beneficial since we avoid forcing the // back-end to store the argument to memory. uint64_t Bits = getContext().getTypeSize(Ty); if (Bits > 0 && Bits <= 8 * GPRBits) { llvm::Type *CoerceTy; // Types up to 8 bytes are passed as integer type (which will be // properly aligned in the argument save area doubleword). if (Bits <= GPRBits) CoerceTy = llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8)); // Larger types are passed as arrays, with the base type selected // according to the required alignment in the save area. else { uint64_t RegBits = ABIAlign * 8; uint64_t NumRegs = llvm::alignTo(Bits, RegBits) / RegBits; llvm::Type *RegTy = llvm::IntegerType::get(getVMContext(), RegBits); CoerceTy = llvm::ArrayType::get(RegTy, NumRegs); } return ABIArgInfo::getDirect(CoerceTy); } // All other aggregates are passed ByVal. return ABIArgInfo::getIndirect(CharUnits::fromQuantity(ABIAlign), /*ByVal=*/true, /*Realign=*/TyAlign > ABIAlign); } return (isPromotableTypeForABI(Ty) ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } ABIArgInfo PPC64_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); if (RetTy->isAnyComplexType()) return ABIArgInfo::getDirect(); // Non-Altivec vector types are returned in GPRs (smaller than 16 bytes) // or via reference (larger than 16 bytes). if (RetTy->isVectorType() && !IsQPXVectorTy(RetTy)) { uint64_t Size = getContext().getTypeSize(RetTy); if (Size > 128) return getNaturalAlignIndirect(RetTy); else if (Size < 128) { llvm::Type *CoerceTy = llvm::IntegerType::get(getVMContext(), Size); return ABIArgInfo::getDirect(CoerceTy); } } if (isAggregateTypeForABI(RetTy)) { // ELFv2 homogeneous aggregates are returned as array types. const Type *Base = nullptr; uint64_t Members = 0; if (Kind == ELFv2 && isHomogeneousAggregate(RetTy, Base, Members)) { llvm::Type *BaseTy = CGT.ConvertType(QualType(Base, 0)); llvm::Type *CoerceTy = llvm::ArrayType::get(BaseTy, Members); return ABIArgInfo::getDirect(CoerceTy); } // ELFv2 small aggregates are returned in up to two registers. uint64_t Bits = getContext().getTypeSize(RetTy); if (Kind == ELFv2 && Bits <= 2 * GPRBits) { if (Bits == 0) return ABIArgInfo::getIgnore(); llvm::Type *CoerceTy; if (Bits > GPRBits) { CoerceTy = llvm::IntegerType::get(getVMContext(), GPRBits); CoerceTy = llvm::StructType::get(CoerceTy, CoerceTy, nullptr); } else CoerceTy = llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8)); return ABIArgInfo::getDirect(CoerceTy); } // All other aggregates are returned indirectly. return getNaturalAlignIndirect(RetTy); } return (isPromotableTypeForABI(RetTy) ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } // Based on ARMABIInfo::EmitVAArg, adjusted for 64-bit machine. Address PPC64_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { auto TypeInfo = getContext().getTypeInfoInChars(Ty); TypeInfo.second = getParamTypeAlignment(Ty); CharUnits SlotSize = CharUnits::fromQuantity(8); // If we have a complex type and the base type is smaller than 8 bytes, // the ABI calls for the real and imaginary parts to be right-adjusted // in separate doublewords. However, Clang expects us to produce a // pointer to a structure with the two parts packed tightly. So generate // loads of the real and imaginary parts relative to the va_list pointer, // and store them to a temporary structure. if (const ComplexType *CTy = Ty->getAs()) { CharUnits EltSize = TypeInfo.first / 2; if (EltSize < SlotSize) { Address Addr = emitVoidPtrDirectVAArg(CGF, VAListAddr, CGF.Int8Ty, SlotSize * 2, SlotSize, SlotSize, /*AllowHigher*/ true); Address RealAddr = Addr; Address ImagAddr = RealAddr; if (CGF.CGM.getDataLayout().isBigEndian()) { RealAddr = CGF.Builder.CreateConstInBoundsByteGEP(RealAddr, SlotSize - EltSize); ImagAddr = CGF.Builder.CreateConstInBoundsByteGEP(ImagAddr, 2 * SlotSize - EltSize); } else { ImagAddr = CGF.Builder.CreateConstInBoundsByteGEP(RealAddr, SlotSize); } llvm::Type *EltTy = CGF.ConvertTypeForMem(CTy->getElementType()); RealAddr = CGF.Builder.CreateElementBitCast(RealAddr, EltTy); ImagAddr = CGF.Builder.CreateElementBitCast(ImagAddr, EltTy); llvm::Value *Real = CGF.Builder.CreateLoad(RealAddr, ".vareal"); llvm::Value *Imag = CGF.Builder.CreateLoad(ImagAddr, ".vaimag"); Address Temp = CGF.CreateMemTemp(Ty, "vacplx"); CGF.EmitStoreOfComplex({Real, Imag}, CGF.MakeAddrLValue(Temp, Ty), /*init*/ true); return Temp; } } // Otherwise, just use the general rule. return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false, TypeInfo, SlotSize, /*AllowHigher*/ true); } static bool PPC64_initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) { // This is calculated from the LLVM and GCC tables and verified // against gcc output. AFAIK all ABIs use the same encoding. CodeGen::CGBuilderTy &Builder = CGF.Builder; llvm::IntegerType *i8 = CGF.Int8Ty; llvm::Value *Four8 = llvm::ConstantInt::get(i8, 4); llvm::Value *Eight8 = llvm::ConstantInt::get(i8, 8); llvm::Value *Sixteen8 = llvm::ConstantInt::get(i8, 16); // 0-31: r0-31, the 8-byte general-purpose registers AssignToArrayRange(Builder, Address, Eight8, 0, 31); // 32-63: fp0-31, the 8-byte floating-point registers AssignToArrayRange(Builder, Address, Eight8, 32, 63); // 64-76 are various 4-byte special-purpose registers: // 64: mq // 65: lr // 66: ctr // 67: ap // 68-75 cr0-7 // 76: xer AssignToArrayRange(Builder, Address, Four8, 64, 76); // 77-108: v0-31, the 16-byte vector registers AssignToArrayRange(Builder, Address, Sixteen8, 77, 108); // 109: vrsave // 110: vscr // 111: spe_acc // 112: spefscr // 113: sfp AssignToArrayRange(Builder, Address, Four8, 109, 113); return false; } bool PPC64_SVR4_TargetCodeGenInfo::initDwarfEHRegSizeTable( CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const { return PPC64_initDwarfEHRegSizeTable(CGF, Address); } bool PPC64TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const { return PPC64_initDwarfEHRegSizeTable(CGF, Address); } //===----------------------------------------------------------------------===// // AArch64 ABI Implementation //===----------------------------------------------------------------------===// namespace { class AArch64ABIInfo : public SwiftABIInfo { public: enum ABIKind { AAPCS = 0, DarwinPCS }; private: ABIKind Kind; public: AArch64ABIInfo(CodeGenTypes &CGT, ABIKind Kind) : SwiftABIInfo(CGT), Kind(Kind) {} private: ABIKind getABIKind() const { return Kind; } bool isDarwinPCS() const { return Kind == DarwinPCS; } ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType RetTy) const; bool isHomogeneousAggregateBaseType(QualType Ty) const override; bool isHomogeneousAggregateSmallEnough(const Type *Ty, uint64_t Members) const override; bool isIllegalVectorType(QualType Ty) const; void computeInfo(CGFunctionInfo &FI) const override { if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &it : FI.arguments()) it.info = classifyArgumentType(it.type); } Address EmitDarwinVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF) const; Address EmitAAPCSVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF) const; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override { return isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF) : EmitAAPCSVAArg(VAListAddr, Ty, CGF); } bool shouldPassIndirectlyForSwift(CharUnits totalSize, ArrayRef scalars, bool asReturnValue) const override { return occupiesMoreThan(CGT, scalars, /*total*/ 4); } }; class AArch64TargetCodeGenInfo : public TargetCodeGenInfo { public: AArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIInfo::ABIKind Kind) : TargetCodeGenInfo(new AArch64ABIInfo(CGT, Kind)) {} StringRef getARCRetainAutoreleasedReturnValueMarker() const override { return "mov\tfp, fp\t\t; marker for objc_retainAutoreleaseReturnValue"; } int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { return 31; } bool doesReturnSlotInterfereWithArgs() const override { return false; } }; } ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const { Ty = useFirstFieldIfTransparentUnion(Ty); // Handle illegal vector types here. if (isIllegalVectorType(Ty)) { uint64_t Size = getContext().getTypeSize(Ty); // Android promotes <2 x i8> to i16, not i32 if (isAndroid() && (Size <= 16)) { llvm::Type *ResType = llvm::Type::getInt16Ty(getVMContext()); return ABIArgInfo::getDirect(ResType); } if (Size <= 32) { llvm::Type *ResType = llvm::Type::getInt32Ty(getVMContext()); return ABIArgInfo::getDirect(ResType); } if (Size == 64) { llvm::Type *ResType = llvm::VectorType::get(llvm::Type::getInt32Ty(getVMContext()), 2); return ABIArgInfo::getDirect(ResType); } if (Size == 128) { llvm::Type *ResType = llvm::VectorType::get(llvm::Type::getInt32Ty(getVMContext()), 4); return ABIArgInfo::getDirect(ResType); } return getNaturalAlignIndirect(Ty, /*ByVal=*/false); } if (!isAggregateTypeForABI(Ty)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); return (Ty->isPromotableIntegerType() && isDarwinPCS() ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } // Structures with either a non-trivial destructor or a non-trivial // copy constructor are always indirect. if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA == CGCXXABI::RAA_DirectInMemory); } // Empty records are always ignored on Darwin, but actually passed in C++ mode // elsewhere for GNU compatibility. if (isEmptyRecord(getContext(), Ty, true)) { if (!getContext().getLangOpts().CPlusPlus || isDarwinPCS()) return ABIArgInfo::getIgnore(); return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext())); } // Homogeneous Floating-point Aggregates (HFAs) need to be expanded. const Type *Base = nullptr; uint64_t Members = 0; if (isHomogeneousAggregate(Ty, Base, Members)) { return ABIArgInfo::getDirect( llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members)); } // Aggregates <= 16 bytes are passed directly in registers or on the stack. uint64_t Size = getContext().getTypeSize(Ty); if (Size <= 128) { unsigned Alignment = getContext().getTypeAlign(Ty); Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes // We use a pair of i64 for 16-byte aggregate with 8-byte alignment. // For aggregates with 16-byte alignment, we use i128. if (Alignment < 128 && Size == 128) { llvm::Type *BaseTy = llvm::Type::getInt64Ty(getVMContext()); return ABIArgInfo::getDirect(llvm::ArrayType::get(BaseTy, Size / 64)); } return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size)); } return getNaturalAlignIndirect(Ty, /*ByVal=*/false); } ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); // Large vector types should be returned via memory. if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128) return getNaturalAlignIndirect(RetTy); if (!isAggregateTypeForABI(RetTy)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = RetTy->getAs()) RetTy = EnumTy->getDecl()->getIntegerType(); return (RetTy->isPromotableIntegerType() && isDarwinPCS() ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } if (isEmptyRecord(getContext(), RetTy, true)) return ABIArgInfo::getIgnore(); const Type *Base = nullptr; uint64_t Members = 0; if (isHomogeneousAggregate(RetTy, Base, Members)) // Homogeneous Floating-point Aggregates (HFAs) are returned directly. return ABIArgInfo::getDirect(); // Aggregates <= 16 bytes are returned directly in registers or on the stack. uint64_t Size = getContext().getTypeSize(RetTy); if (Size <= 128) { unsigned Alignment = getContext().getTypeAlign(RetTy); Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes // We use a pair of i64 for 16-byte aggregate with 8-byte alignment. // For aggregates with 16-byte alignment, we use i128. if (Alignment < 128 && Size == 128) { llvm::Type *BaseTy = llvm::Type::getInt64Ty(getVMContext()); return ABIArgInfo::getDirect(llvm::ArrayType::get(BaseTy, Size / 64)); } return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size)); } return getNaturalAlignIndirect(RetTy); } /// isIllegalVectorType - check whether the vector type is legal for AArch64. bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const { if (const VectorType *VT = Ty->getAs()) { // Check whether VT is legal. unsigned NumElements = VT->getNumElements(); uint64_t Size = getContext().getTypeSize(VT); // NumElements should be power of 2. if (!llvm::isPowerOf2_32(NumElements)) return true; return Size != 64 && (Size != 128 || NumElements == 1); } return false; } bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { // Homogeneous aggregates for AAPCS64 must have base types of a floating // point type or a short-vector type. This is the same as the 32-bit ABI, // but with the difference that any floating-point type is allowed, // including __fp16. if (const BuiltinType *BT = Ty->getAs()) { if (BT->isFloatingPoint()) return true; } else if (const VectorType *VT = Ty->getAs()) { unsigned VecSize = getContext().getTypeSize(VT); if (VecSize == 64 || VecSize == 128) return true; } return false; } bool AArch64ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base, uint64_t Members) const { return Members <= 4; } Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF) const { ABIArgInfo AI = classifyArgumentType(Ty); bool IsIndirect = AI.isIndirect(); llvm::Type *BaseTy = CGF.ConvertType(Ty); if (IsIndirect) BaseTy = llvm::PointerType::getUnqual(BaseTy); else if (AI.getCoerceToType()) BaseTy = AI.getCoerceToType(); unsigned NumRegs = 1; if (llvm::ArrayType *ArrTy = dyn_cast(BaseTy)) { BaseTy = ArrTy->getElementType(); NumRegs = ArrTy->getNumElements(); } bool IsFPR = BaseTy->isFloatingPointTy() || BaseTy->isVectorTy(); // The AArch64 va_list type and handling is specified in the Procedure Call // Standard, section B.4: // // struct { // void *__stack; // void *__gr_top; // void *__vr_top; // int __gr_offs; // int __vr_offs; // }; llvm::BasicBlock *MaybeRegBlock = CGF.createBasicBlock("vaarg.maybe_reg"); llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg"); llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock("vaarg.on_stack"); llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end"); auto TyInfo = getContext().getTypeInfoInChars(Ty); CharUnits TyAlign = TyInfo.second; Address reg_offs_p = Address::invalid(); llvm::Value *reg_offs = nullptr; int reg_top_index; CharUnits reg_top_offset; int RegSize = IsIndirect ? 8 : TyInfo.first.getQuantity(); if (!IsFPR) { // 3 is the field number of __gr_offs reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 3, CharUnits::fromQuantity(24), "gr_offs_p"); reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs"); reg_top_index = 1; // field number for __gr_top reg_top_offset = CharUnits::fromQuantity(8); RegSize = llvm::alignTo(RegSize, 8); } else { // 4 is the field number of __vr_offs. reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 4, CharUnits::fromQuantity(28), "vr_offs_p"); reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "vr_offs"); reg_top_index = 2; // field number for __vr_top reg_top_offset = CharUnits::fromQuantity(16); RegSize = 16 * NumRegs; } //======================================= // Find out where argument was passed //======================================= // If reg_offs >= 0 we're already using the stack for this type of // argument. We don't want to keep updating reg_offs (in case it overflows, // though anyone passing 2GB of arguments, each at most 16 bytes, deserves // whatever they get). llvm::Value *UsingStack = nullptr; UsingStack = CGF.Builder.CreateICmpSGE( reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, 0)); CGF.Builder.CreateCondBr(UsingStack, OnStackBlock, MaybeRegBlock); // Otherwise, at least some kind of argument could go in these registers, the // question is whether this particular type is too big. CGF.EmitBlock(MaybeRegBlock); // Integer arguments may need to correct register alignment (for example a // "struct { __int128 a; };" gets passed in x_2N, x_{2N+1}). In this case we // align __gr_offs to calculate the potential address. if (!IsFPR && !IsIndirect && TyAlign.getQuantity() > 8) { int Align = TyAlign.getQuantity(); reg_offs = CGF.Builder.CreateAdd( reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, Align - 1), "align_regoffs"); reg_offs = CGF.Builder.CreateAnd( reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, -Align), "aligned_regoffs"); } // Update the gr_offs/vr_offs pointer for next call to va_arg on this va_list. // The fact that this is done unconditionally reflects the fact that // allocating an argument to the stack also uses up all the remaining // registers of the appropriate kind. llvm::Value *NewOffset = nullptr; NewOffset = CGF.Builder.CreateAdd( reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, RegSize), "new_reg_offs"); CGF.Builder.CreateStore(NewOffset, reg_offs_p); // Now we're in a position to decide whether this argument really was in // registers or not. llvm::Value *InRegs = nullptr; InRegs = CGF.Builder.CreateICmpSLE( NewOffset, llvm::ConstantInt::get(CGF.Int32Ty, 0), "inreg"); CGF.Builder.CreateCondBr(InRegs, InRegBlock, OnStackBlock); //======================================= // Argument was in registers //======================================= // Now we emit the code for if the argument was originally passed in // registers. First start the appropriate block: CGF.EmitBlock(InRegBlock); llvm::Value *reg_top = nullptr; Address reg_top_p = CGF.Builder.CreateStructGEP(VAListAddr, reg_top_index, reg_top_offset, "reg_top_p"); reg_top = CGF.Builder.CreateLoad(reg_top_p, "reg_top"); Address BaseAddr(CGF.Builder.CreateInBoundsGEP(reg_top, reg_offs), CharUnits::fromQuantity(IsFPR ? 16 : 8)); Address RegAddr = Address::invalid(); llvm::Type *MemTy = CGF.ConvertTypeForMem(Ty); if (IsIndirect) { // If it's been passed indirectly (actually a struct), whatever we find from // stored registers or on the stack will actually be a struct **. MemTy = llvm::PointerType::getUnqual(MemTy); } const Type *Base = nullptr; uint64_t NumMembers = 0; bool IsHFA = isHomogeneousAggregate(Ty, Base, NumMembers); if (IsHFA && NumMembers > 1) { // Homogeneous aggregates passed in registers will have their elements split // and stored 16-bytes apart regardless of size (they're notionally in qN, // qN+1, ...). We reload and store into a temporary local variable // contiguously. assert(!IsIndirect && "Homogeneous aggregates should be passed directly"); auto BaseTyInfo = getContext().getTypeInfoInChars(QualType(Base, 0)); llvm::Type *BaseTy = CGF.ConvertType(QualType(Base, 0)); llvm::Type *HFATy = llvm::ArrayType::get(BaseTy, NumMembers); Address Tmp = CGF.CreateTempAlloca(HFATy, std::max(TyAlign, BaseTyInfo.second)); // On big-endian platforms, the value will be right-aligned in its slot. int Offset = 0; if (CGF.CGM.getDataLayout().isBigEndian() && BaseTyInfo.first.getQuantity() < 16) Offset = 16 - BaseTyInfo.first.getQuantity(); for (unsigned i = 0; i < NumMembers; ++i) { CharUnits BaseOffset = CharUnits::fromQuantity(16 * i + Offset); Address LoadAddr = CGF.Builder.CreateConstInBoundsByteGEP(BaseAddr, BaseOffset); LoadAddr = CGF.Builder.CreateElementBitCast(LoadAddr, BaseTy); Address StoreAddr = CGF.Builder.CreateConstArrayGEP(Tmp, i, BaseTyInfo.first); llvm::Value *Elem = CGF.Builder.CreateLoad(LoadAddr); CGF.Builder.CreateStore(Elem, StoreAddr); } RegAddr = CGF.Builder.CreateElementBitCast(Tmp, MemTy); } else { // Otherwise the object is contiguous in memory. // It might be right-aligned in its slot. CharUnits SlotSize = BaseAddr.getAlignment(); if (CGF.CGM.getDataLayout().isBigEndian() && !IsIndirect && (IsHFA || !isAggregateTypeForABI(Ty)) && TyInfo.first < SlotSize) { CharUnits Offset = SlotSize - TyInfo.first; BaseAddr = CGF.Builder.CreateConstInBoundsByteGEP(BaseAddr, Offset); } RegAddr = CGF.Builder.CreateElementBitCast(BaseAddr, MemTy); } CGF.EmitBranch(ContBlock); //======================================= // Argument was on the stack //======================================= CGF.EmitBlock(OnStackBlock); Address stack_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, CharUnits::Zero(), "stack_p"); llvm::Value *OnStackPtr = CGF.Builder.CreateLoad(stack_p, "stack"); // Again, stack arguments may need realignment. In this case both integer and // floating-point ones might be affected. if (!IsIndirect && TyAlign.getQuantity() > 8) { int Align = TyAlign.getQuantity(); OnStackPtr = CGF.Builder.CreatePtrToInt(OnStackPtr, CGF.Int64Ty); OnStackPtr = CGF.Builder.CreateAdd( OnStackPtr, llvm::ConstantInt::get(CGF.Int64Ty, Align - 1), "align_stack"); OnStackPtr = CGF.Builder.CreateAnd( OnStackPtr, llvm::ConstantInt::get(CGF.Int64Ty, -Align), "align_stack"); OnStackPtr = CGF.Builder.CreateIntToPtr(OnStackPtr, CGF.Int8PtrTy); } Address OnStackAddr(OnStackPtr, std::max(CharUnits::fromQuantity(8), TyAlign)); // All stack slots are multiples of 8 bytes. CharUnits StackSlotSize = CharUnits::fromQuantity(8); CharUnits StackSize; if (IsIndirect) StackSize = StackSlotSize; else StackSize = TyInfo.first.alignTo(StackSlotSize); llvm::Value *StackSizeC = CGF.Builder.getSize(StackSize); llvm::Value *NewStack = CGF.Builder.CreateInBoundsGEP(OnStackPtr, StackSizeC, "new_stack"); // Write the new value of __stack for the next call to va_arg CGF.Builder.CreateStore(NewStack, stack_p); if (CGF.CGM.getDataLayout().isBigEndian() && !isAggregateTypeForABI(Ty) && TyInfo.first < StackSlotSize) { CharUnits Offset = StackSlotSize - TyInfo.first; OnStackAddr = CGF.Builder.CreateConstInBoundsByteGEP(OnStackAddr, Offset); } OnStackAddr = CGF.Builder.CreateElementBitCast(OnStackAddr, MemTy); CGF.EmitBranch(ContBlock); //======================================= // Tidy up //======================================= CGF.EmitBlock(ContBlock); Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, OnStackAddr, OnStackBlock, "vaargs.addr"); if (IsIndirect) return Address(CGF.Builder.CreateLoad(ResAddr, "vaarg.addr"), TyInfo.second); return ResAddr; } Address AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF) const { // The backend's lowering doesn't support va_arg for aggregates or // illegal vector types. Lower VAArg here for these cases and use // the LLVM va_arg instruction for everything else. if (!isAggregateTypeForABI(Ty) && !isIllegalVectorType(Ty)) return EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect()); CharUnits SlotSize = CharUnits::fromQuantity(8); // Empty records are ignored for parameter passing purposes. if (isEmptyRecord(getContext(), Ty, true)) { Address Addr(CGF.Builder.CreateLoad(VAListAddr, "ap.cur"), SlotSize); Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty)); return Addr; } // The size of the actual thing passed, which might end up just // being a pointer for indirect types. auto TyInfo = getContext().getTypeInfoInChars(Ty); // Arguments bigger than 16 bytes which aren't homogeneous // aggregates should be passed indirectly. bool IsIndirect = false; if (TyInfo.first.getQuantity() > 16) { const Type *Base = nullptr; uint64_t Members = 0; IsIndirect = !isHomogeneousAggregate(Ty, Base, Members); } return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TyInfo, SlotSize, /*AllowHigherAlign*/ true); } //===----------------------------------------------------------------------===// // ARM ABI Implementation //===----------------------------------------------------------------------===// namespace { class ARMABIInfo : public SwiftABIInfo { public: enum ABIKind { APCS = 0, AAPCS = 1, AAPCS_VFP = 2, AAPCS16_VFP = 3, }; private: ABIKind Kind; public: ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind) : SwiftABIInfo(CGT), Kind(_Kind) { setCCs(); } bool isEABI() const { switch (getTarget().getTriple().getEnvironment()) { case llvm::Triple::Android: case llvm::Triple::EABI: case llvm::Triple::EABIHF: case llvm::Triple::GNUEABI: case llvm::Triple::GNUEABIHF: case llvm::Triple::MuslEABI: case llvm::Triple::MuslEABIHF: return true; default: return false; } } bool isEABIHF() const { switch (getTarget().getTriple().getEnvironment()) { case llvm::Triple::EABIHF: case llvm::Triple::GNUEABIHF: case llvm::Triple::MuslEABIHF: return true; default: return false; } } ABIKind getABIKind() const { return Kind; } private: ABIArgInfo classifyReturnType(QualType RetTy, bool isVariadic) const; ABIArgInfo classifyArgumentType(QualType RetTy, bool isVariadic) const; bool isIllegalVectorType(QualType Ty) const; bool isHomogeneousAggregateBaseType(QualType Ty) const override; bool isHomogeneousAggregateSmallEnough(const Type *Ty, uint64_t Members) const override; void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; llvm::CallingConv::ID getLLVMDefaultCC() const; llvm::CallingConv::ID getABIDefaultCC() const; void setCCs(); bool shouldPassIndirectlyForSwift(CharUnits totalSize, ArrayRef scalars, bool asReturnValue) const override { return occupiesMoreThan(CGT, scalars, /*total*/ 4); } }; class ARMTargetCodeGenInfo : public TargetCodeGenInfo { public: ARMTargetCodeGenInfo(CodeGenTypes &CGT, ARMABIInfo::ABIKind K) :TargetCodeGenInfo(new ARMABIInfo(CGT, K)) {} const ARMABIInfo &getABIInfo() const { return static_cast(TargetCodeGenInfo::getABIInfo()); } int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { return 13; } StringRef getARCRetainAutoreleasedReturnValueMarker() const override { return "mov\tr7, r7\t\t@ marker for objc_retainAutoreleaseReturnValue"; } bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const override { llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4); // 0-15 are the 16 integer registers. AssignToArrayRange(CGF.Builder, Address, Four8, 0, 15); return false; } unsigned getSizeOfUnwindException() const override { if (getABIInfo().isEABI()) return 88; return TargetCodeGenInfo::getSizeOfUnwindException(); } void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override { const FunctionDecl *FD = dyn_cast_or_null(D); if (!FD) return; const ARMInterruptAttr *Attr = FD->getAttr(); if (!Attr) return; const char *Kind; switch (Attr->getInterrupt()) { case ARMInterruptAttr::Generic: Kind = ""; break; case ARMInterruptAttr::IRQ: Kind = "IRQ"; break; case ARMInterruptAttr::FIQ: Kind = "FIQ"; break; case ARMInterruptAttr::SWI: Kind = "SWI"; break; case ARMInterruptAttr::ABORT: Kind = "ABORT"; break; case ARMInterruptAttr::UNDEF: Kind = "UNDEF"; break; } llvm::Function *Fn = cast(GV); Fn->addFnAttr("interrupt", Kind); ARMABIInfo::ABIKind ABI = cast(getABIInfo()).getABIKind(); if (ABI == ARMABIInfo::APCS) return; // AAPCS guarantees that sp will be 8-byte aligned on any public interface, // however this is not necessarily true on taking any interrupt. Instruct // the backend to perform a realignment as part of the function prologue. llvm::AttrBuilder B; B.addStackAlignmentAttr(8); Fn->addAttributes(llvm::AttributeSet::FunctionIndex, llvm::AttributeSet::get(CGM.getLLVMContext(), llvm::AttributeSet::FunctionIndex, B)); } }; class WindowsARMTargetCodeGenInfo : public ARMTargetCodeGenInfo { public: WindowsARMTargetCodeGenInfo(CodeGenTypes &CGT, ARMABIInfo::ABIKind K) : ARMTargetCodeGenInfo(CGT, K) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override; void getDependentLibraryOption(llvm::StringRef Lib, llvm::SmallString<24> &Opt) const override { Opt = "/DEFAULTLIB:" + qualifyWindowsLibrary(Lib); } void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value, llvm::SmallString<32> &Opt) const override { Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; } }; void WindowsARMTargetCodeGenInfo::setTargetAttributes( const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { ARMTargetCodeGenInfo::setTargetAttributes(D, GV, CGM); addStackProbeSizeTargetAttribute(D, GV, CGM); } } void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const { if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), FI.isVariadic()); for (auto &I : FI.arguments()) I.info = classifyArgumentType(I.type, FI.isVariadic()); // Always honor user-specified calling convention. if (FI.getCallingConvention() != llvm::CallingConv::C) return; llvm::CallingConv::ID cc = getRuntimeCC(); if (cc != llvm::CallingConv::C) FI.setEffectiveCallingConvention(cc); } /// Return the default calling convention that LLVM will use. llvm::CallingConv::ID ARMABIInfo::getLLVMDefaultCC() const { // The default calling convention that LLVM will infer. if (isEABIHF() || getTarget().getTriple().isWatchABI()) return llvm::CallingConv::ARM_AAPCS_VFP; else if (isEABI()) return llvm::CallingConv::ARM_AAPCS; else return llvm::CallingConv::ARM_APCS; } /// Return the calling convention that our ABI would like us to use /// as the C calling convention. llvm::CallingConv::ID ARMABIInfo::getABIDefaultCC() const { switch (getABIKind()) { case APCS: return llvm::CallingConv::ARM_APCS; case AAPCS: return llvm::CallingConv::ARM_AAPCS; case AAPCS_VFP: return llvm::CallingConv::ARM_AAPCS_VFP; case AAPCS16_VFP: return llvm::CallingConv::ARM_AAPCS_VFP; } llvm_unreachable("bad ABI kind"); } void ARMABIInfo::setCCs() { assert(getRuntimeCC() == llvm::CallingConv::C); // Don't muddy up the IR with a ton of explicit annotations if // they'd just match what LLVM will infer from the triple. llvm::CallingConv::ID abiCC = getABIDefaultCC(); if (abiCC != getLLVMDefaultCC()) RuntimeCC = abiCC; // AAPCS apparently requires runtime support functions to be soft-float, but // that's almost certainly for historic reasons (Thumb1 not supporting VFP // most likely). It's more convenient for AAPCS16_VFP to be hard-float. switch (getABIKind()) { case APCS: case AAPCS16_VFP: if (abiCC != getLLVMDefaultCC()) BuiltinCC = abiCC; break; case AAPCS: case AAPCS_VFP: BuiltinCC = llvm::CallingConv::ARM_AAPCS; break; } } ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic) const { // 6.1.2.1 The following argument types are VFP CPRCs: // A single-precision floating-point type (including promoted // half-precision types); A double-precision floating-point type; // A 64-bit or 128-bit containerized vector type; Homogeneous Aggregate // with a Base Type of a single- or double-precision floating-point type, // 64-bit containerized vectors or 128-bit containerized vectors with one // to four Elements. bool IsEffectivelyAAPCS_VFP = getABIKind() == AAPCS_VFP && !isVariadic; Ty = useFirstFieldIfTransparentUnion(Ty); // Handle illegal vector types here. if (isIllegalVectorType(Ty)) { uint64_t Size = getContext().getTypeSize(Ty); if (Size <= 32) { llvm::Type *ResType = llvm::Type::getInt32Ty(getVMContext()); return ABIArgInfo::getDirect(ResType); } if (Size == 64) { llvm::Type *ResType = llvm::VectorType::get( llvm::Type::getInt32Ty(getVMContext()), 2); return ABIArgInfo::getDirect(ResType); } if (Size == 128) { llvm::Type *ResType = llvm::VectorType::get( llvm::Type::getInt32Ty(getVMContext()), 4); return ABIArgInfo::getDirect(ResType); } return getNaturalAlignIndirect(Ty, /*ByVal=*/false); } // __fp16 gets passed as if it were an int or float, but with the top 16 bits // unspecified. This is not done for OpenCL as it handles the half type // natively, and does not need to interwork with AAPCS code. if (Ty->isHalfType() && !getContext().getLangOpts().NativeHalfArgsAndReturns) { llvm::Type *ResType = IsEffectivelyAAPCS_VFP ? llvm::Type::getFloatTy(getVMContext()) : llvm::Type::getInt32Ty(getVMContext()); return ABIArgInfo::getDirect(ResType); } if (!isAggregateTypeForABI(Ty)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) { Ty = EnumTy->getDecl()->getIntegerType(); } return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); } // Ignore empty records. if (isEmptyRecord(getContext(), Ty, true)) return ABIArgInfo::getIgnore(); if (IsEffectivelyAAPCS_VFP) { // Homogeneous Aggregates need to be expanded when we can fit the aggregate // into VFP registers. const Type *Base = nullptr; uint64_t Members = 0; if (isHomogeneousAggregate(Ty, Base, Members)) { assert(Base && "Base class should be set for homogeneous aggregate"); // Base can be a floating-point or a vector. return ABIArgInfo::getDirect(nullptr, 0, nullptr, false); } } else if (getABIKind() == ARMABIInfo::AAPCS16_VFP) { // WatchOS does have homogeneous aggregates. Note that we intentionally use // this convention even for a variadic function: the backend will use GPRs // if needed. const Type *Base = nullptr; uint64_t Members = 0; if (isHomogeneousAggregate(Ty, Base, Members)) { assert(Base && Members <= 4 && "unexpected homogeneous aggregate"); llvm::Type *Ty = llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members); return ABIArgInfo::getDirect(Ty, 0, nullptr, false); } } if (getABIKind() == ARMABIInfo::AAPCS16_VFP && getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(16)) { // WatchOS is adopting the 64-bit AAPCS rule on composite types: if they're // bigger than 128-bits, they get placed in space allocated by the caller, // and a pointer is passed. return ABIArgInfo::getIndirect( CharUnits::fromQuantity(getContext().getTypeAlign(Ty) / 8), false); } // Support byval for ARM. // The ABI alignment for APCS is 4-byte and for AAPCS at least 4-byte and at // most 8-byte. We realign the indirect argument if type alignment is bigger // than ABI alignment. uint64_t ABIAlign = 4; uint64_t TyAlign = getContext().getTypeAlign(Ty) / 8; if (getABIKind() == ARMABIInfo::AAPCS_VFP || getABIKind() == ARMABIInfo::AAPCS) ABIAlign = std::min(std::max(TyAlign, (uint64_t)4), (uint64_t)8); if (getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(64)) { assert(getABIKind() != ARMABIInfo::AAPCS16_VFP && "unexpected byval"); return ABIArgInfo::getIndirect(CharUnits::fromQuantity(ABIAlign), /*ByVal=*/true, /*Realign=*/TyAlign > ABIAlign); } // Otherwise, pass by coercing to a structure of the appropriate size. llvm::Type* ElemTy; unsigned SizeRegs; // FIXME: Try to match the types of the arguments more accurately where // we can. if (getContext().getTypeAlign(Ty) <= 32) { ElemTy = llvm::Type::getInt32Ty(getVMContext()); SizeRegs = (getContext().getTypeSize(Ty) + 31) / 32; } else { ElemTy = llvm::Type::getInt64Ty(getVMContext()); SizeRegs = (getContext().getTypeSize(Ty) + 63) / 64; } return ABIArgInfo::getDirect(llvm::ArrayType::get(ElemTy, SizeRegs)); } static bool isIntegerLikeType(QualType Ty, ASTContext &Context, llvm::LLVMContext &VMContext) { // APCS, C Language Calling Conventions, Non-Simple Return Values: A structure // is called integer-like if its size is less than or equal to one word, and // the offset of each of its addressable sub-fields is zero. uint64_t Size = Context.getTypeSize(Ty); // Check that the type fits in a word. if (Size > 32) return false; // FIXME: Handle vector types! if (Ty->isVectorType()) return false; // Float types are never treated as "integer like". if (Ty->isRealFloatingType()) return false; // If this is a builtin or pointer type then it is ok. if (Ty->getAs() || Ty->isPointerType()) return true; // Small complex integer types are "integer like". if (const ComplexType *CT = Ty->getAs()) return isIntegerLikeType(CT->getElementType(), Context, VMContext); // Single element and zero sized arrays should be allowed, by the definition // above, but they are not. // Otherwise, it must be a record type. const RecordType *RT = Ty->getAs(); if (!RT) return false; // Ignore records with flexible arrays. const RecordDecl *RD = RT->getDecl(); if (RD->hasFlexibleArrayMember()) return false; // Check that all sub-fields are at offset 0, and are themselves "integer // like". const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); bool HadField = false; unsigned idx = 0; for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); i != e; ++i, ++idx) { const FieldDecl *FD = *i; // Bit-fields are not addressable, we only need to verify they are "integer // like". We still have to disallow a subsequent non-bitfield, for example: // struct { int : 0; int x } // is non-integer like according to gcc. if (FD->isBitField()) { if (!RD->isUnion()) HadField = true; if (!isIntegerLikeType(FD->getType(), Context, VMContext)) return false; continue; } // Check if this field is at offset 0. if (Layout.getFieldOffset(idx) != 0) return false; if (!isIntegerLikeType(FD->getType(), Context, VMContext)) return false; // Only allow at most one field in a structure. This doesn't match the // wording above, but follows gcc in situations with a field following an // empty structure. if (!RD->isUnion()) { if (HadField) return false; HadField = true; } } return true; } ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, bool isVariadic) const { bool IsEffectivelyAAPCS_VFP = (getABIKind() == AAPCS_VFP || getABIKind() == AAPCS16_VFP) && !isVariadic; if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); // Large vector types should be returned via memory. if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128) { return getNaturalAlignIndirect(RetTy); } // __fp16 gets returned as if it were an int or float, but with the top 16 // bits unspecified. This is not done for OpenCL as it handles the half type // natively, and does not need to interwork with AAPCS code. if (RetTy->isHalfType() && !getContext().getLangOpts().NativeHalfArgsAndReturns) { llvm::Type *ResType = IsEffectivelyAAPCS_VFP ? llvm::Type::getFloatTy(getVMContext()) : llvm::Type::getInt32Ty(getVMContext()); return ABIArgInfo::getDirect(ResType); } if (!isAggregateTypeForABI(RetTy)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = RetTy->getAs()) RetTy = EnumTy->getDecl()->getIntegerType(); return RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect(); } // Are we following APCS? if (getABIKind() == APCS) { if (isEmptyRecord(getContext(), RetTy, false)) return ABIArgInfo::getIgnore(); // Complex types are all returned as packed integers. // // FIXME: Consider using 2 x vector types if the back end handles them // correctly. if (RetTy->isAnyComplexType()) return ABIArgInfo::getDirect(llvm::IntegerType::get( getVMContext(), getContext().getTypeSize(RetTy))); // Integer like structures are returned in r0. if (isIntegerLikeType(RetTy, getContext(), getVMContext())) { // Return in the smallest viable integer type. uint64_t Size = getContext().getTypeSize(RetTy); if (Size <= 8) return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext())); if (Size <= 16) return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); } // Otherwise return in memory. return getNaturalAlignIndirect(RetTy); } // Otherwise this is an AAPCS variant. if (isEmptyRecord(getContext(), RetTy, true)) return ABIArgInfo::getIgnore(); // Check for homogeneous aggregates with AAPCS-VFP. if (IsEffectivelyAAPCS_VFP) { const Type *Base = nullptr; uint64_t Members = 0; if (isHomogeneousAggregate(RetTy, Base, Members)) { assert(Base && "Base class should be set for homogeneous aggregate"); // Homogeneous Aggregates are returned directly. return ABIArgInfo::getDirect(nullptr, 0, nullptr, false); } } // Aggregates <= 4 bytes are returned in r0; other aggregates // are returned indirectly. uint64_t Size = getContext().getTypeSize(RetTy); if (Size <= 32) { if (getDataLayout().isBigEndian()) // Return in 32 bit integer integer type (as if loaded by LDR, AAPCS 5.4) return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); // Return in the smallest viable integer type. if (Size <= 8) return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext())); if (Size <= 16) return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); } else if (Size <= 128 && getABIKind() == AAPCS16_VFP) { llvm::Type *Int32Ty = llvm::Type::getInt32Ty(getVMContext()); llvm::Type *CoerceTy = llvm::ArrayType::get(Int32Ty, llvm::alignTo(Size, 32) / 32); return ABIArgInfo::getDirect(CoerceTy); } return getNaturalAlignIndirect(RetTy); } /// isIllegalVector - check whether Ty is an illegal vector type. bool ARMABIInfo::isIllegalVectorType(QualType Ty) const { if (const VectorType *VT = Ty->getAs ()) { if (isAndroid()) { // Android shipped using Clang 3.1, which supported a slightly different // vector ABI. The primary differences were that 3-element vector types // were legal, and so were sub 32-bit vectors (i.e. <2 x i8>). This path // accepts that legacy behavior for Android only. // Check whether VT is legal. unsigned NumElements = VT->getNumElements(); // NumElements should be power of 2 or equal to 3. if (!llvm::isPowerOf2_32(NumElements) && NumElements != 3) return true; } else { // Check whether VT is legal. unsigned NumElements = VT->getNumElements(); uint64_t Size = getContext().getTypeSize(VT); // NumElements should be power of 2. if (!llvm::isPowerOf2_32(NumElements)) return true; // Size should be greater than 32 bits. return Size <= 32; } } return false; } bool ARMABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { // Homogeneous aggregates for AAPCS-VFP must have base types of float, // double, or 64-bit or 128-bit vectors. if (const BuiltinType *BT = Ty->getAs()) { if (BT->getKind() == BuiltinType::Float || BT->getKind() == BuiltinType::Double || BT->getKind() == BuiltinType::LongDouble) return true; } else if (const VectorType *VT = Ty->getAs()) { unsigned VecSize = getContext().getTypeSize(VT); if (VecSize == 64 || VecSize == 128) return true; } return false; } bool ARMABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base, uint64_t Members) const { return Members <= 4; } Address ARMABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { CharUnits SlotSize = CharUnits::fromQuantity(4); // Empty records are ignored for parameter passing purposes. if (isEmptyRecord(getContext(), Ty, true)) { Address Addr(CGF.Builder.CreateLoad(VAListAddr), SlotSize); Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty)); return Addr; } auto TyInfo = getContext().getTypeInfoInChars(Ty); CharUnits TyAlignForABI = TyInfo.second; // Use indirect if size of the illegal vector is bigger than 16 bytes. bool IsIndirect = false; const Type *Base = nullptr; uint64_t Members = 0; if (TyInfo.first > CharUnits::fromQuantity(16) && isIllegalVectorType(Ty)) { IsIndirect = true; // ARMv7k passes structs bigger than 16 bytes indirectly, in space // allocated by the caller. } else if (TyInfo.first > CharUnits::fromQuantity(16) && getABIKind() == ARMABIInfo::AAPCS16_VFP && !isHomogeneousAggregate(Ty, Base, Members)) { IsIndirect = true; // Otherwise, bound the type's ABI alignment. // The ABI alignment for 64-bit or 128-bit vectors is 8 for AAPCS and 4 for // APCS. For AAPCS, the ABI alignment is at least 4-byte and at most 8-byte. // Our callers should be prepared to handle an under-aligned address. } else if (getABIKind() == ARMABIInfo::AAPCS_VFP || getABIKind() == ARMABIInfo::AAPCS) { TyAlignForABI = std::max(TyAlignForABI, CharUnits::fromQuantity(4)); TyAlignForABI = std::min(TyAlignForABI, CharUnits::fromQuantity(8)); } else if (getABIKind() == ARMABIInfo::AAPCS16_VFP) { // ARMv7k allows type alignment up to 16 bytes. TyAlignForABI = std::max(TyAlignForABI, CharUnits::fromQuantity(4)); TyAlignForABI = std::min(TyAlignForABI, CharUnits::fromQuantity(16)); } else { TyAlignForABI = CharUnits::fromQuantity(4); } TyInfo.second = TyAlignForABI; return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TyInfo, SlotSize, /*AllowHigherAlign*/ true); } //===----------------------------------------------------------------------===// // NVPTX ABI Implementation //===----------------------------------------------------------------------===// namespace { class NVPTXABIInfo : public ABIInfo { public: NVPTXABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {} ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType Ty) const; void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; }; class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo { public: NVPTXTargetCodeGenInfo(CodeGenTypes &CGT) : TargetCodeGenInfo(new NVPTXABIInfo(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; private: // Adds a NamedMDNode with F, Name, and Operand as operands, and adds the // resulting MDNode to the nvvm.annotations MDNode. static void addNVVMMetadata(llvm::Function *F, StringRef Name, int Operand); }; ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); // note: this is different from default ABI if (!RetTy->isScalarType()) return ABIArgInfo::getDirect(); // Treat an enum type as its underlying type. if (const EnumType *EnumTy = RetTy->getAs()) RetTy = EnumTy->getDecl()->getIntegerType(); return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); // Return aggregates type as indirect by value if (isAggregateTypeForABI(Ty)) return getNaturalAlignIndirect(Ty, /* byval */ true); return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const { if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &I : FI.arguments()) I.info = classifyArgumentType(I.type); // Always honor user-specified calling convention. if (FI.getCallingConvention() != llvm::CallingConv::C) return; FI.setEffectiveCallingConvention(getRuntimeCC()); } Address NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { llvm_unreachable("NVPTX does not support varargs"); } void NVPTXTargetCodeGenInfo:: setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const{ const FunctionDecl *FD = dyn_cast_or_null(D); if (!FD) return; llvm::Function *F = cast(GV); // Perform special handling in OpenCL mode if (M.getLangOpts().OpenCL) { // Use OpenCL function attributes to check for kernel functions // By default, all functions are device functions if (FD->hasAttr()) { // OpenCL __kernel functions get kernel metadata // Create !{, metadata !"kernel", i32 1} node addNVVMMetadata(F, "kernel", 1); // And kernel functions are not subject to inlining F->addFnAttr(llvm::Attribute::NoInline); } } // Perform special handling in CUDA mode. if (M.getLangOpts().CUDA) { // CUDA __global__ functions get a kernel metadata entry. Since // __global__ functions cannot be called from the device, we do not // need to set the noinline attribute. if (FD->hasAttr()) { // Create !{, metadata !"kernel", i32 1} node addNVVMMetadata(F, "kernel", 1); } if (CUDALaunchBoundsAttr *Attr = FD->getAttr()) { // Create !{, metadata !"maxntidx", i32 } node llvm::APSInt MaxThreads(32); MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(M.getContext()); if (MaxThreads > 0) addNVVMMetadata(F, "maxntidx", MaxThreads.getExtValue()); // min blocks is an optional argument for CUDALaunchBoundsAttr. If it was // not specified in __launch_bounds__ or if the user specified a 0 value, // we don't have to add a PTX directive. if (Attr->getMinBlocks()) { llvm::APSInt MinBlocks(32); MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(M.getContext()); if (MinBlocks > 0) // Create !{, metadata !"minctasm", i32 } node addNVVMMetadata(F, "minctasm", MinBlocks.getExtValue()); } } } } void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::Function *F, StringRef Name, int Operand) { llvm::Module *M = F->getParent(); llvm::LLVMContext &Ctx = M->getContext(); // Get "nvvm.annotations" metadata node llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations"); llvm::Metadata *MDVals[] = { llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, Name), llvm::ConstantAsMetadata::get( llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))}; // Append metadata to nvvm.annotations MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); } } //===----------------------------------------------------------------------===// // SystemZ ABI Implementation //===----------------------------------------------------------------------===// namespace { class SystemZABIInfo : public SwiftABIInfo { bool HasVector; public: SystemZABIInfo(CodeGenTypes &CGT, bool HV) : SwiftABIInfo(CGT), HasVector(HV) {} bool isPromotableIntegerType(QualType Ty) const; bool isCompoundType(QualType Ty) const; bool isVectorArgumentType(QualType Ty) const; bool isFPArgumentType(QualType Ty) const; QualType GetSingleElementType(QualType Ty) const; ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType ArgTy) const; void computeInfo(CGFunctionInfo &FI) const override { if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &I : FI.arguments()) I.info = classifyArgumentType(I.type); } Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; bool shouldPassIndirectlyForSwift(CharUnits totalSize, ArrayRef scalars, bool asReturnValue) const override { return occupiesMoreThan(CGT, scalars, /*total*/ 4); } }; class SystemZTargetCodeGenInfo : public TargetCodeGenInfo { public: SystemZTargetCodeGenInfo(CodeGenTypes &CGT, bool HasVector) : TargetCodeGenInfo(new SystemZABIInfo(CGT, HasVector)) {} }; } bool SystemZABIInfo::isPromotableIntegerType(QualType Ty) const { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); // Promotable integer types are required to be promoted by the ABI. if (Ty->isPromotableIntegerType()) return true; // 32-bit values must also be promoted. if (const BuiltinType *BT = Ty->getAs()) switch (BT->getKind()) { case BuiltinType::Int: case BuiltinType::UInt: return true; default: return false; } return false; } bool SystemZABIInfo::isCompoundType(QualType Ty) const { return (Ty->isAnyComplexType() || Ty->isVectorType() || isAggregateTypeForABI(Ty)); } bool SystemZABIInfo::isVectorArgumentType(QualType Ty) const { return (HasVector && Ty->isVectorType() && getContext().getTypeSize(Ty) <= 128); } bool SystemZABIInfo::isFPArgumentType(QualType Ty) const { if (const BuiltinType *BT = Ty->getAs()) switch (BT->getKind()) { case BuiltinType::Float: case BuiltinType::Double: return true; default: return false; } return false; } QualType SystemZABIInfo::GetSingleElementType(QualType Ty) const { if (const RecordType *RT = Ty->getAsStructureType()) { const RecordDecl *RD = RT->getDecl(); QualType Found; // If this is a C++ record, check the bases first. if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) for (const auto &I : CXXRD->bases()) { QualType Base = I.getType(); // Empty bases don't affect things either way. if (isEmptyRecord(getContext(), Base, true)) continue; if (!Found.isNull()) return Ty; Found = GetSingleElementType(Base); } // Check the fields. for (const auto *FD : RD->fields()) { // For compatibility with GCC, ignore empty bitfields in C++ mode. // Unlike isSingleElementStruct(), empty structure and array fields // do count. So do anonymous bitfields that aren't zero-sized. if (getContext().getLangOpts().CPlusPlus && FD->isBitField() && FD->getBitWidthValue(getContext()) == 0) continue; // Unlike isSingleElementStruct(), arrays do not count. // Nested structures still do though. if (!Found.isNull()) return Ty; Found = GetSingleElementType(FD->getType()); } // Unlike isSingleElementStruct(), trailing padding is allowed. // An 8-byte aligned struct s { float f; } is passed as a double. if (!Found.isNull()) return Found; } return Ty; } Address SystemZABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { // Assume that va_list type is correct; should be pointer to LLVM type: // struct { // i64 __gpr; // i64 __fpr; // i8 *__overflow_arg_area; // i8 *__reg_save_area; // }; // Every non-vector argument occupies 8 bytes and is passed by preference // in either GPRs or FPRs. Vector arguments occupy 8 or 16 bytes and are // always passed on the stack. Ty = getContext().getCanonicalType(Ty); auto TyInfo = getContext().getTypeInfoInChars(Ty); llvm::Type *ArgTy = CGF.ConvertTypeForMem(Ty); llvm::Type *DirectTy = ArgTy; ABIArgInfo AI = classifyArgumentType(Ty); bool IsIndirect = AI.isIndirect(); bool InFPRs = false; bool IsVector = false; CharUnits UnpaddedSize; CharUnits DirectAlign; if (IsIndirect) { DirectTy = llvm::PointerType::getUnqual(DirectTy); UnpaddedSize = DirectAlign = CharUnits::fromQuantity(8); } else { if (AI.getCoerceToType()) ArgTy = AI.getCoerceToType(); InFPRs = ArgTy->isFloatTy() || ArgTy->isDoubleTy(); IsVector = ArgTy->isVectorTy(); UnpaddedSize = TyInfo.first; DirectAlign = TyInfo.second; } CharUnits PaddedSize = CharUnits::fromQuantity(8); if (IsVector && UnpaddedSize > PaddedSize) PaddedSize = CharUnits::fromQuantity(16); assert((UnpaddedSize <= PaddedSize) && "Invalid argument size."); CharUnits Padding = (PaddedSize - UnpaddedSize); llvm::Type *IndexTy = CGF.Int64Ty; llvm::Value *PaddedSizeV = llvm::ConstantInt::get(IndexTy, PaddedSize.getQuantity()); if (IsVector) { // Work out the address of a vector argument on the stack. // Vector arguments are always passed in the high bits of a // single (8 byte) or double (16 byte) stack slot. Address OverflowArgAreaPtr = CGF.Builder.CreateStructGEP(VAListAddr, 2, CharUnits::fromQuantity(16), "overflow_arg_area_ptr"); Address OverflowArgArea = Address(CGF.Builder.CreateLoad(OverflowArgAreaPtr, "overflow_arg_area"), TyInfo.second); Address MemAddr = CGF.Builder.CreateElementBitCast(OverflowArgArea, DirectTy, "mem_addr"); // Update overflow_arg_area_ptr pointer llvm::Value *NewOverflowArgArea = CGF.Builder.CreateGEP(OverflowArgArea.getPointer(), PaddedSizeV, "overflow_arg_area"); CGF.Builder.CreateStore(NewOverflowArgArea, OverflowArgAreaPtr); return MemAddr; } assert(PaddedSize.getQuantity() == 8); unsigned MaxRegs, RegCountField, RegSaveIndex; CharUnits RegPadding; if (InFPRs) { MaxRegs = 4; // Maximum of 4 FPR arguments RegCountField = 1; // __fpr RegSaveIndex = 16; // save offset for f0 RegPadding = CharUnits(); // floats are passed in the high bits of an FPR } else { MaxRegs = 5; // Maximum of 5 GPR arguments RegCountField = 0; // __gpr RegSaveIndex = 2; // save offset for r2 RegPadding = Padding; // values are passed in the low bits of a GPR } Address RegCountPtr = CGF.Builder.CreateStructGEP( VAListAddr, RegCountField, RegCountField * CharUnits::fromQuantity(8), "reg_count_ptr"); llvm::Value *RegCount = CGF.Builder.CreateLoad(RegCountPtr, "reg_count"); llvm::Value *MaxRegsV = llvm::ConstantInt::get(IndexTy, MaxRegs); llvm::Value *InRegs = CGF.Builder.CreateICmpULT(RegCount, MaxRegsV, "fits_in_regs"); llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg"); llvm::BasicBlock *InMemBlock = CGF.createBasicBlock("vaarg.in_mem"); llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end"); CGF.Builder.CreateCondBr(InRegs, InRegBlock, InMemBlock); // Emit code to load the value if it was passed in registers. CGF.EmitBlock(InRegBlock); // Work out the address of an argument register. llvm::Value *ScaledRegCount = CGF.Builder.CreateMul(RegCount, PaddedSizeV, "scaled_reg_count"); llvm::Value *RegBase = llvm::ConstantInt::get(IndexTy, RegSaveIndex * PaddedSize.getQuantity() + RegPadding.getQuantity()); llvm::Value *RegOffset = CGF.Builder.CreateAdd(ScaledRegCount, RegBase, "reg_offset"); Address RegSaveAreaPtr = CGF.Builder.CreateStructGEP(VAListAddr, 3, CharUnits::fromQuantity(24), "reg_save_area_ptr"); llvm::Value *RegSaveArea = CGF.Builder.CreateLoad(RegSaveAreaPtr, "reg_save_area"); Address RawRegAddr(CGF.Builder.CreateGEP(RegSaveArea, RegOffset, "raw_reg_addr"), PaddedSize); Address RegAddr = CGF.Builder.CreateElementBitCast(RawRegAddr, DirectTy, "reg_addr"); // Update the register count llvm::Value *One = llvm::ConstantInt::get(IndexTy, 1); llvm::Value *NewRegCount = CGF.Builder.CreateAdd(RegCount, One, "reg_count"); CGF.Builder.CreateStore(NewRegCount, RegCountPtr); CGF.EmitBranch(ContBlock); // Emit code to load the value if it was passed in memory. CGF.EmitBlock(InMemBlock); // Work out the address of a stack argument. Address OverflowArgAreaPtr = CGF.Builder.CreateStructGEP( VAListAddr, 2, CharUnits::fromQuantity(16), "overflow_arg_area_ptr"); Address OverflowArgArea = Address(CGF.Builder.CreateLoad(OverflowArgAreaPtr, "overflow_arg_area"), PaddedSize); Address RawMemAddr = CGF.Builder.CreateConstByteGEP(OverflowArgArea, Padding, "raw_mem_addr"); Address MemAddr = CGF.Builder.CreateElementBitCast(RawMemAddr, DirectTy, "mem_addr"); // Update overflow_arg_area_ptr pointer llvm::Value *NewOverflowArgArea = CGF.Builder.CreateGEP(OverflowArgArea.getPointer(), PaddedSizeV, "overflow_arg_area"); CGF.Builder.CreateStore(NewOverflowArgArea, OverflowArgAreaPtr); CGF.EmitBranch(ContBlock); // Return the appropriate result. CGF.EmitBlock(ContBlock); Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, MemAddr, InMemBlock, "va_arg.addr"); if (IsIndirect) ResAddr = Address(CGF.Builder.CreateLoad(ResAddr, "indirect_arg"), TyInfo.second); return ResAddr; } ABIArgInfo SystemZABIInfo::classifyReturnType(QualType RetTy) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); if (isVectorArgumentType(RetTy)) return ABIArgInfo::getDirect(); if (isCompoundType(RetTy) || getContext().getTypeSize(RetTy) > 64) return getNaturalAlignIndirect(RetTy); return (isPromotableIntegerType(RetTy) ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } ABIArgInfo SystemZABIInfo::classifyArgumentType(QualType Ty) const { // Handle the generic C++ ABI. if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); // Integers and enums are extended to full register width. if (isPromotableIntegerType(Ty)) return ABIArgInfo::getExtend(); // Handle vector types and vector-like structure types. Note that // as opposed to float-like structure types, we do not allow any // padding for vector-like structures, so verify the sizes match. uint64_t Size = getContext().getTypeSize(Ty); QualType SingleElementTy = GetSingleElementType(Ty); if (isVectorArgumentType(SingleElementTy) && getContext().getTypeSize(SingleElementTy) == Size) return ABIArgInfo::getDirect(CGT.ConvertType(SingleElementTy)); // Values that are not 1, 2, 4 or 8 bytes in size are passed indirectly. if (Size != 8 && Size != 16 && Size != 32 && Size != 64) return getNaturalAlignIndirect(Ty, /*ByVal=*/false); // Handle small structures. if (const RecordType *RT = Ty->getAs()) { // Structures with flexible arrays have variable length, so really // fail the size test above. const RecordDecl *RD = RT->getDecl(); if (RD->hasFlexibleArrayMember()) return getNaturalAlignIndirect(Ty, /*ByVal=*/false); // The structure is passed as an unextended integer, a float, or a double. llvm::Type *PassTy; if (isFPArgumentType(SingleElementTy)) { assert(Size == 32 || Size == 64); if (Size == 32) PassTy = llvm::Type::getFloatTy(getVMContext()); else PassTy = llvm::Type::getDoubleTy(getVMContext()); } else PassTy = llvm::IntegerType::get(getVMContext(), Size); return ABIArgInfo::getDirect(PassTy); } // Non-structure compounds are passed indirectly. if (isCompoundType(Ty)) return getNaturalAlignIndirect(Ty, /*ByVal=*/false); return ABIArgInfo::getDirect(nullptr); } //===----------------------------------------------------------------------===// // MSP430 ABI Implementation //===----------------------------------------------------------------------===// namespace { class MSP430TargetCodeGenInfo : public TargetCodeGenInfo { public: MSP430TargetCodeGenInfo(CodeGenTypes &CGT) : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; }; } void MSP430TargetCodeGenInfo::setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { if (const FunctionDecl *FD = dyn_cast_or_null(D)) { if (const MSP430InterruptAttr *attr = FD->getAttr()) { // Handle 'interrupt' attribute: llvm::Function *F = cast(GV); // Step 1: Set ISR calling convention. F->setCallingConv(llvm::CallingConv::MSP430_INTR); // Step 2: Add attributes goodness. F->addFnAttr(llvm::Attribute::NoInline); // Step 3: Emit ISR vector alias. unsigned Num = attr->getNumber() / 2; llvm::GlobalAlias::create(llvm::Function::ExternalLinkage, "__isr_" + Twine(Num), F); } } } //===----------------------------------------------------------------------===// // MIPS ABI Implementation. This works for both little-endian and // big-endian variants. //===----------------------------------------------------------------------===// namespace { class MipsABIInfo : public ABIInfo { bool IsO32; unsigned MinABIStackAlignInBytes, StackAlignInBytes; void CoerceToIntArgs(uint64_t TySize, SmallVectorImpl &ArgList) const; llvm::Type* HandleAggregates(QualType Ty, uint64_t TySize) const; llvm::Type* returnAggregateInRegs(QualType RetTy, uint64_t Size) const; llvm::Type* getPaddingType(uint64_t Align, uint64_t Offset) const; public: MipsABIInfo(CodeGenTypes &CGT, bool _IsO32) : ABIInfo(CGT), IsO32(_IsO32), MinABIStackAlignInBytes(IsO32 ? 4 : 8), StackAlignInBytes(IsO32 ? 8 : 16) {} ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType RetTy, uint64_t &Offset) const; void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; bool shouldSignExtUnsignedType(QualType Ty) const override; }; class MIPSTargetCodeGenInfo : public TargetCodeGenInfo { unsigned SizeOfUnwindException; public: MIPSTargetCodeGenInfo(CodeGenTypes &CGT, bool IsO32) : TargetCodeGenInfo(new MipsABIInfo(CGT, IsO32)), SizeOfUnwindException(IsO32 ? 24 : 32) {} int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { return 29; } void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override { const FunctionDecl *FD = dyn_cast_or_null(D); if (!FD) return; llvm::Function *Fn = cast(GV); if (FD->hasAttr()) { Fn->addFnAttr("mips16"); } else if (FD->hasAttr()) { Fn->addFnAttr("nomips16"); } const MipsInterruptAttr *Attr = FD->getAttr(); if (!Attr) return; const char *Kind; switch (Attr->getInterrupt()) { case MipsInterruptAttr::eic: Kind = "eic"; break; case MipsInterruptAttr::sw0: Kind = "sw0"; break; case MipsInterruptAttr::sw1: Kind = "sw1"; break; case MipsInterruptAttr::hw0: Kind = "hw0"; break; case MipsInterruptAttr::hw1: Kind = "hw1"; break; case MipsInterruptAttr::hw2: Kind = "hw2"; break; case MipsInterruptAttr::hw3: Kind = "hw3"; break; case MipsInterruptAttr::hw4: Kind = "hw4"; break; case MipsInterruptAttr::hw5: Kind = "hw5"; break; } Fn->addFnAttr("interrupt", Kind); } bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const override; unsigned getSizeOfUnwindException() const override { return SizeOfUnwindException; } }; } void MipsABIInfo::CoerceToIntArgs( uint64_t TySize, SmallVectorImpl &ArgList) const { llvm::IntegerType *IntTy = llvm::IntegerType::get(getVMContext(), MinABIStackAlignInBytes * 8); // Add (TySize / MinABIStackAlignInBytes) args of IntTy. for (unsigned N = TySize / (MinABIStackAlignInBytes * 8); N; --N) ArgList.push_back(IntTy); // If necessary, add one more integer type to ArgList. unsigned R = TySize % (MinABIStackAlignInBytes * 8); if (R) ArgList.push_back(llvm::IntegerType::get(getVMContext(), R)); } // In N32/64, an aligned double precision floating point field is passed in // a register. llvm::Type* MipsABIInfo::HandleAggregates(QualType Ty, uint64_t TySize) const { SmallVector ArgList, IntArgList; if (IsO32) { CoerceToIntArgs(TySize, ArgList); return llvm::StructType::get(getVMContext(), ArgList); } if (Ty->isComplexType()) return CGT.ConvertType(Ty); const RecordType *RT = Ty->getAs(); // Unions/vectors are passed in integer registers. if (!RT || !RT->isStructureOrClassType()) { CoerceToIntArgs(TySize, ArgList); return llvm::StructType::get(getVMContext(), ArgList); } const RecordDecl *RD = RT->getDecl(); const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); assert(!(TySize % 8) && "Size of structure must be multiple of 8."); uint64_t LastOffset = 0; unsigned idx = 0; llvm::IntegerType *I64 = llvm::IntegerType::get(getVMContext(), 64); // Iterate over fields in the struct/class and check if there are any aligned // double fields. for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); i != e; ++i, ++idx) { const QualType Ty = i->getType(); const BuiltinType *BT = Ty->getAs(); if (!BT || BT->getKind() != BuiltinType::Double) continue; uint64_t Offset = Layout.getFieldOffset(idx); if (Offset % 64) // Ignore doubles that are not aligned. continue; // Add ((Offset - LastOffset) / 64) args of type i64. for (unsigned j = (Offset - LastOffset) / 64; j > 0; --j) ArgList.push_back(I64); // Add double type. ArgList.push_back(llvm::Type::getDoubleTy(getVMContext())); LastOffset = Offset + 64; } CoerceToIntArgs(TySize - LastOffset, IntArgList); ArgList.append(IntArgList.begin(), IntArgList.end()); return llvm::StructType::get(getVMContext(), ArgList); } llvm::Type *MipsABIInfo::getPaddingType(uint64_t OrigOffset, uint64_t Offset) const { if (OrigOffset + MinABIStackAlignInBytes > Offset) return nullptr; return llvm::IntegerType::get(getVMContext(), (Offset - OrigOffset) * 8); } ABIArgInfo MipsABIInfo::classifyArgumentType(QualType Ty, uint64_t &Offset) const { Ty = useFirstFieldIfTransparentUnion(Ty); uint64_t OrigOffset = Offset; uint64_t TySize = getContext().getTypeSize(Ty); uint64_t Align = getContext().getTypeAlign(Ty) / 8; Align = std::min(std::max(Align, (uint64_t)MinABIStackAlignInBytes), (uint64_t)StackAlignInBytes); unsigned CurrOffset = llvm::alignTo(Offset, Align); Offset = CurrOffset + llvm::alignTo(TySize, Align * 8) / 8; if (isAggregateTypeForABI(Ty) || Ty->isVectorType()) { // Ignore empty aggregates. if (TySize == 0) return ABIArgInfo::getIgnore(); if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { Offset = OrigOffset + MinABIStackAlignInBytes; return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); } // If we have reached here, aggregates are passed directly by coercing to // another structure type. Padding is inserted if the offset of the // aggregate is unaligned. ABIArgInfo ArgInfo = ABIArgInfo::getDirect(HandleAggregates(Ty, TySize), 0, getPaddingType(OrigOffset, CurrOffset)); ArgInfo.setInReg(true); return ArgInfo; } // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); // All integral types are promoted to the GPR width. if (Ty->isIntegralOrEnumerationType()) return ABIArgInfo::getExtend(); return ABIArgInfo::getDirect( nullptr, 0, IsO32 ? nullptr : getPaddingType(OrigOffset, CurrOffset)); } llvm::Type* MipsABIInfo::returnAggregateInRegs(QualType RetTy, uint64_t Size) const { const RecordType *RT = RetTy->getAs(); SmallVector RTList; if (RT && RT->isStructureOrClassType()) { const RecordDecl *RD = RT->getDecl(); const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); unsigned FieldCnt = Layout.getFieldCount(); // N32/64 returns struct/classes in floating point registers if the // following conditions are met: // 1. The size of the struct/class is no larger than 128-bit. // 2. The struct/class has one or two fields all of which are floating // point types. // 3. The offset of the first field is zero (this follows what gcc does). // // Any other composite results are returned in integer registers. // if (FieldCnt && (FieldCnt <= 2) && !Layout.getFieldOffset(0)) { RecordDecl::field_iterator b = RD->field_begin(), e = RD->field_end(); for (; b != e; ++b) { const BuiltinType *BT = b->getType()->getAs(); if (!BT || !BT->isFloatingPoint()) break; RTList.push_back(CGT.ConvertType(b->getType())); } if (b == e) return llvm::StructType::get(getVMContext(), RTList, RD->hasAttr()); RTList.clear(); } } CoerceToIntArgs(Size, RTList); return llvm::StructType::get(getVMContext(), RTList); } ABIArgInfo MipsABIInfo::classifyReturnType(QualType RetTy) const { uint64_t Size = getContext().getTypeSize(RetTy); if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); // O32 doesn't treat zero-sized structs differently from other structs. // However, N32/N64 ignores zero sized return values. if (!IsO32 && Size == 0) return ABIArgInfo::getIgnore(); if (isAggregateTypeForABI(RetTy) || RetTy->isVectorType()) { if (Size <= 128) { if (RetTy->isAnyComplexType()) return ABIArgInfo::getDirect(); // O32 returns integer vectors in registers and N32/N64 returns all small // aggregates in registers. if (!IsO32 || (RetTy->isVectorType() && !RetTy->hasFloatingRepresentation())) { ABIArgInfo ArgInfo = ABIArgInfo::getDirect(returnAggregateInRegs(RetTy, Size)); ArgInfo.setInReg(true); return ArgInfo; } } return getNaturalAlignIndirect(RetTy); } // Treat an enum type as its underlying type. if (const EnumType *EnumTy = RetTy->getAs()) RetTy = EnumTy->getDecl()->getIntegerType(); return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } void MipsABIInfo::computeInfo(CGFunctionInfo &FI) const { ABIArgInfo &RetInfo = FI.getReturnInfo(); if (!getCXXABI().classifyReturnType(FI)) RetInfo = classifyReturnType(FI.getReturnType()); // Check if a pointer to an aggregate is passed as a hidden argument. uint64_t Offset = RetInfo.isIndirect() ? MinABIStackAlignInBytes : 0; for (auto &I : FI.arguments()) I.info = classifyArgumentType(I.type, Offset); } Address MipsABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType OrigTy) const { QualType Ty = OrigTy; // Integer arguments are promoted to 32-bit on O32 and 64-bit on N32/N64. // Pointers are also promoted in the same way but this only matters for N32. unsigned SlotSizeInBits = IsO32 ? 32 : 64; unsigned PtrWidth = getTarget().getPointerWidth(0); bool DidPromote = false; if ((Ty->isIntegerType() && getContext().getIntWidth(Ty) < SlotSizeInBits) || (Ty->isPointerType() && PtrWidth < SlotSizeInBits)) { DidPromote = true; Ty = getContext().getIntTypeForBitwidth(SlotSizeInBits, Ty->isSignedIntegerType()); } auto TyInfo = getContext().getTypeInfoInChars(Ty); // The alignment of things in the argument area is never larger than // StackAlignInBytes. TyInfo.second = std::min(TyInfo.second, CharUnits::fromQuantity(StackAlignInBytes)); // MinABIStackAlignInBytes is the size of argument slots on the stack. CharUnits ArgSlotSize = CharUnits::fromQuantity(MinABIStackAlignInBytes); Address Addr = emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false, TyInfo, ArgSlotSize, /*AllowHigherAlign*/ true); // If there was a promotion, "unpromote" into a temporary. // TODO: can we just use a pointer into a subset of the original slot? if (DidPromote) { Address Temp = CGF.CreateMemTemp(OrigTy, "vaarg.promotion-temp"); llvm::Value *Promoted = CGF.Builder.CreateLoad(Addr); // Truncate down to the right width. llvm::Type *IntTy = (OrigTy->isIntegerType() ? Temp.getElementType() : CGF.IntPtrTy); llvm::Value *V = CGF.Builder.CreateTrunc(Promoted, IntTy); if (OrigTy->isPointerType()) V = CGF.Builder.CreateIntToPtr(V, Temp.getElementType()); CGF.Builder.CreateStore(V, Temp); Addr = Temp; } return Addr; } bool MipsABIInfo::shouldSignExtUnsignedType(QualType Ty) const { int TySize = getContext().getTypeSize(Ty); // MIPS64 ABI requires unsigned 32 bit integers to be sign extended. if (Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32) return true; return false; } bool MIPSTargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const { // This information comes from gcc's implementation, which seems to // as canonical as it gets. // Everything on MIPS is 4 bytes. Double-precision FP registers // are aliased to pairs of single-precision FP registers. llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4); // 0-31 are the general purpose registers, $0 - $31. // 32-63 are the floating-point registers, $f0 - $f31. // 64 and 65 are the multiply/divide registers, $hi and $lo. // 66 is the (notional, I think) register for signal-handler return. AssignToArrayRange(CGF.Builder, Address, Four8, 0, 65); // 67-74 are the floating-point status registers, $fcc0 - $fcc7. // They are one bit wide and ignored here. // 80-111 are the coprocessor 0 registers, $c0r0 - $c0r31. // (coprocessor 1 is the FP unit) // 112-143 are the coprocessor 2 registers, $c2r0 - $c2r31. // 144-175 are the coprocessor 3 registers, $c3r0 - $c3r31. // 176-181 are the DSP accumulator registers. AssignToArrayRange(CGF.Builder, Address, Four8, 80, 181); return false; } //===----------------------------------------------------------------------===// // TCE ABI Implementation (see http://tce.cs.tut.fi). Uses mostly the defaults. // Currently subclassed only to implement custom OpenCL C function attribute // handling. //===----------------------------------------------------------------------===// namespace { class TCETargetCodeGenInfo : public DefaultTargetCodeGenInfo { public: TCETargetCodeGenInfo(CodeGenTypes &CGT) : DefaultTargetCodeGenInfo(CGT) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; }; void TCETargetCodeGenInfo::setTargetAttributes( const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { const FunctionDecl *FD = dyn_cast_or_null(D); if (!FD) return; llvm::Function *F = cast(GV); if (M.getLangOpts().OpenCL) { if (FD->hasAttr()) { // OpenCL C Kernel functions are not subject to inlining F->addFnAttr(llvm::Attribute::NoInline); const ReqdWorkGroupSizeAttr *Attr = FD->getAttr(); if (Attr) { // Convert the reqd_work_group_size() attributes to metadata. llvm::LLVMContext &Context = F->getContext(); llvm::NamedMDNode *OpenCLMetadata = M.getModule().getOrInsertNamedMetadata( "opencl.kernel_wg_size_info"); SmallVector Operands; Operands.push_back(llvm::ConstantAsMetadata::get(F)); Operands.push_back( llvm::ConstantAsMetadata::get(llvm::Constant::getIntegerValue( M.Int32Ty, llvm::APInt(32, Attr->getXDim())))); Operands.push_back( llvm::ConstantAsMetadata::get(llvm::Constant::getIntegerValue( M.Int32Ty, llvm::APInt(32, Attr->getYDim())))); Operands.push_back( llvm::ConstantAsMetadata::get(llvm::Constant::getIntegerValue( M.Int32Ty, llvm::APInt(32, Attr->getZDim())))); // Add a boolean constant operand for "required" (true) or "hint" // (false) for implementing the work_group_size_hint attr later. // Currently always true as the hint is not yet implemented. Operands.push_back( llvm::ConstantAsMetadata::get(llvm::ConstantInt::getTrue(Context))); OpenCLMetadata->addOperand(llvm::MDNode::get(Context, Operands)); } } } } } //===----------------------------------------------------------------------===// // Hexagon ABI Implementation //===----------------------------------------------------------------------===// namespace { class HexagonABIInfo : public ABIInfo { public: HexagonABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {} private: ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType RetTy) const; void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; }; class HexagonTargetCodeGenInfo : public TargetCodeGenInfo { public: HexagonTargetCodeGenInfo(CodeGenTypes &CGT) :TargetCodeGenInfo(new HexagonABIInfo(CGT)) {} int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { return 29; } }; } void HexagonABIInfo::computeInfo(CGFunctionInfo &FI) const { if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &I : FI.arguments()) I.info = classifyArgumentType(I.type); } ABIArgInfo HexagonABIInfo::classifyArgumentType(QualType Ty) const { if (!isAggregateTypeForABI(Ty)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } // Ignore empty records. if (isEmptyRecord(getContext(), Ty, true)) return ABIArgInfo::getIgnore(); if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); uint64_t Size = getContext().getTypeSize(Ty); if (Size > 64) return getNaturalAlignIndirect(Ty, /*ByVal=*/true); // Pass in the smallest viable integer type. else if (Size > 32) return ABIArgInfo::getDirect(llvm::Type::getInt64Ty(getVMContext())); else if (Size > 16) return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); else if (Size > 8) return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); else return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext())); } ABIArgInfo HexagonABIInfo::classifyReturnType(QualType RetTy) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); // Large vector types should be returned via memory. if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 64) return getNaturalAlignIndirect(RetTy); if (!isAggregateTypeForABI(RetTy)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = RetTy->getAs()) RetTy = EnumTy->getDecl()->getIntegerType(); return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } if (isEmptyRecord(getContext(), RetTy, true)) return ABIArgInfo::getIgnore(); // Aggregates <= 8 bytes are returned in r0; other aggregates // are returned indirectly. uint64_t Size = getContext().getTypeSize(RetTy); if (Size <= 64) { // Return in the smallest viable integer type. if (Size <= 8) return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext())); if (Size <= 16) return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); if (Size <= 32) return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); return ABIArgInfo::getDirect(llvm::Type::getInt64Ty(getVMContext())); } return getNaturalAlignIndirect(RetTy, /*ByVal=*/true); } Address HexagonABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { // FIXME: Someone needs to audit that this handle alignment correctly. return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false, getContext().getTypeInfoInChars(Ty), CharUnits::fromQuantity(4), /*AllowHigherAlign*/ true); } //===----------------------------------------------------------------------===// // Lanai ABI Implementation //===----------------------------------------------------------------------===// namespace { class LanaiABIInfo : public DefaultABIInfo { public: LanaiABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} bool shouldUseInReg(QualType Ty, CCState &State) const; void computeInfo(CGFunctionInfo &FI) const override { CCState State(FI.getCallingConvention()); // Lanai uses 4 registers to pass arguments unless the function has the // regparm attribute set. if (FI.getHasRegParm()) { State.FreeRegs = FI.getRegParm(); } else { State.FreeRegs = 4; } if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &I : FI.arguments()) I.info = classifyArgumentType(I.type, State); } ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State) const; ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const; }; } // end anonymous namespace bool LanaiABIInfo::shouldUseInReg(QualType Ty, CCState &State) const { unsigned Size = getContext().getTypeSize(Ty); unsigned SizeInRegs = llvm::alignTo(Size, 32U) / 32U; if (SizeInRegs == 0) return false; if (SizeInRegs > State.FreeRegs) { State.FreeRegs = 0; return false; } State.FreeRegs -= SizeInRegs; return true; } ABIArgInfo LanaiABIInfo::getIndirectResult(QualType Ty, bool ByVal, CCState &State) const { if (!ByVal) { if (State.FreeRegs) { --State.FreeRegs; // Non-byval indirects just use one pointer. return getNaturalAlignIndirectInReg(Ty); } return getNaturalAlignIndirect(Ty, false); } // Compute the byval alignment. const unsigned MinABIStackAlignInBytes = 4; unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8; return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true, /*Realign=*/TypeAlign > MinABIStackAlignInBytes); } ABIArgInfo LanaiABIInfo::classifyArgumentType(QualType Ty, CCState &State) const { // Check with the C++ ABI first. const RecordType *RT = Ty->getAs(); if (RT) { CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()); if (RAA == CGCXXABI::RAA_Indirect) { return getIndirectResult(Ty, /*ByVal=*/false, State); } else if (RAA == CGCXXABI::RAA_DirectInMemory) { return getNaturalAlignIndirect(Ty, /*ByRef=*/true); } } if (isAggregateTypeForABI(Ty)) { // Structures with flexible arrays are always indirect. if (RT && RT->getDecl()->hasFlexibleArrayMember()) return getIndirectResult(Ty, /*ByVal=*/true, State); // Ignore empty structs/unions. if (isEmptyRecord(getContext(), Ty, true)) return ABIArgInfo::getIgnore(); llvm::LLVMContext &LLVMContext = getVMContext(); unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32; if (SizeInRegs <= State.FreeRegs) { llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext); SmallVector Elements(SizeInRegs, Int32); llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements); State.FreeRegs -= SizeInRegs; return ABIArgInfo::getDirectInReg(Result); } else { State.FreeRegs = 0; } return getIndirectResult(Ty, true, State); } // Treat an enum type as its underlying type. if (const auto *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); bool InReg = shouldUseInReg(Ty, State); if (Ty->isPromotableIntegerType()) { if (InReg) return ABIArgInfo::getDirectInReg(); return ABIArgInfo::getExtend(); } if (InReg) return ABIArgInfo::getDirectInReg(); return ABIArgInfo::getDirect(); } namespace { class LanaiTargetCodeGenInfo : public TargetCodeGenInfo { public: LanaiTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) : TargetCodeGenInfo(new LanaiABIInfo(CGT)) {} }; } //===----------------------------------------------------------------------===// // AMDGPU ABI Implementation //===----------------------------------------------------------------------===// namespace { class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo { public: AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT) : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; unsigned getOpenCLKernelCallingConv() const override; }; } void AMDGPUTargetCodeGenInfo::setTargetAttributes( const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { const FunctionDecl *FD = dyn_cast_or_null(D); if (!FD) return; if (const auto Attr = FD->getAttr()) { llvm::Function *F = cast(GV); uint32_t NumVGPR = Attr->getNumVGPR(); if (NumVGPR != 0) F->addFnAttr("amdgpu_num_vgpr", llvm::utostr(NumVGPR)); } if (const auto Attr = FD->getAttr()) { llvm::Function *F = cast(GV); unsigned NumSGPR = Attr->getNumSGPR(); if (NumSGPR != 0) F->addFnAttr("amdgpu_num_sgpr", llvm::utostr(NumSGPR)); } } unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const { return llvm::CallingConv::AMDGPU_KERNEL; } //===----------------------------------------------------------------------===// // SPARC v8 ABI Implementation. // Based on the SPARC Compliance Definition version 2.4.1. // // Ensures that complex values are passed in registers. // namespace { class SparcV8ABIInfo : public DefaultABIInfo { public: SparcV8ABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} private: ABIArgInfo classifyReturnType(QualType RetTy) const; void computeInfo(CGFunctionInfo &FI) const override; }; } // end anonymous namespace ABIArgInfo SparcV8ABIInfo::classifyReturnType(QualType Ty) const { if (Ty->isAnyComplexType()) { return ABIArgInfo::getDirect(); } else { return DefaultABIInfo::classifyReturnType(Ty); } } void SparcV8ABIInfo::computeInfo(CGFunctionInfo &FI) const { FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &Arg : FI.arguments()) Arg.info = classifyArgumentType(Arg.type); } namespace { class SparcV8TargetCodeGenInfo : public TargetCodeGenInfo { public: SparcV8TargetCodeGenInfo(CodeGenTypes &CGT) : TargetCodeGenInfo(new SparcV8ABIInfo(CGT)) {} }; } // end anonymous namespace //===----------------------------------------------------------------------===// // SPARC v9 ABI Implementation. // Based on the SPARC Compliance Definition version 2.4.1. // // Function arguments a mapped to a nominal "parameter array" and promoted to // registers depending on their type. Each argument occupies 8 or 16 bytes in // the array, structs larger than 16 bytes are passed indirectly. // // One case requires special care: // // struct mixed { // int i; // float f; // }; // // When a struct mixed is passed by value, it only occupies 8 bytes in the // parameter array, but the int is passed in an integer register, and the float // is passed in a floating point register. This is represented as two arguments // with the LLVM IR inreg attribute: // // declare void f(i32 inreg %i, float inreg %f) // // The code generator will only allocate 4 bytes from the parameter array for // the inreg arguments. All other arguments are allocated a multiple of 8 // bytes. // namespace { class SparcV9ABIInfo : public ABIInfo { public: SparcV9ABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {} private: ABIArgInfo classifyType(QualType RetTy, unsigned SizeLimit) const; void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; // Coercion type builder for structs passed in registers. The coercion type // serves two purposes: // // 1. Pad structs to a multiple of 64 bits, so they are passed 'left-aligned' // in registers. // 2. Expose aligned floating point elements as first-level elements, so the // code generator knows to pass them in floating point registers. // // We also compute the InReg flag which indicates that the struct contains // aligned 32-bit floats. // struct CoerceBuilder { llvm::LLVMContext &Context; const llvm::DataLayout &DL; SmallVector Elems; uint64_t Size; bool InReg; CoerceBuilder(llvm::LLVMContext &c, const llvm::DataLayout &dl) : Context(c), DL(dl), Size(0), InReg(false) {} // Pad Elems with integers until Size is ToSize. void pad(uint64_t ToSize) { assert(ToSize >= Size && "Cannot remove elements"); if (ToSize == Size) return; // Finish the current 64-bit word. uint64_t Aligned = llvm::alignTo(Size, 64); if (Aligned > Size && Aligned <= ToSize) { Elems.push_back(llvm::IntegerType::get(Context, Aligned - Size)); Size = Aligned; } // Add whole 64-bit words. while (Size + 64 <= ToSize) { Elems.push_back(llvm::Type::getInt64Ty(Context)); Size += 64; } // Final in-word padding. if (Size < ToSize) { Elems.push_back(llvm::IntegerType::get(Context, ToSize - Size)); Size = ToSize; } } // Add a floating point element at Offset. void addFloat(uint64_t Offset, llvm::Type *Ty, unsigned Bits) { // Unaligned floats are treated as integers. if (Offset % Bits) return; // The InReg flag is only required if there are any floats < 64 bits. if (Bits < 64) InReg = true; pad(Offset); Elems.push_back(Ty); Size = Offset + Bits; } // Add a struct type to the coercion type, starting at Offset (in bits). void addStruct(uint64_t Offset, llvm::StructType *StrTy) { const llvm::StructLayout *Layout = DL.getStructLayout(StrTy); for (unsigned i = 0, e = StrTy->getNumElements(); i != e; ++i) { llvm::Type *ElemTy = StrTy->getElementType(i); uint64_t ElemOffset = Offset + Layout->getElementOffsetInBits(i); switch (ElemTy->getTypeID()) { case llvm::Type::StructTyID: addStruct(ElemOffset, cast(ElemTy)); break; case llvm::Type::FloatTyID: addFloat(ElemOffset, ElemTy, 32); break; case llvm::Type::DoubleTyID: addFloat(ElemOffset, ElemTy, 64); break; case llvm::Type::FP128TyID: addFloat(ElemOffset, ElemTy, 128); break; case llvm::Type::PointerTyID: if (ElemOffset % 64 == 0) { pad(ElemOffset); Elems.push_back(ElemTy); Size += 64; } break; default: break; } } } // Check if Ty is a usable substitute for the coercion type. bool isUsableType(llvm::StructType *Ty) const { return llvm::makeArrayRef(Elems) == Ty->elements(); } // Get the coercion type as a literal struct type. llvm::Type *getType() const { if (Elems.size() == 1) return Elems.front(); else return llvm::StructType::get(Context, Elems); } }; }; } // end anonymous namespace ABIArgInfo SparcV9ABIInfo::classifyType(QualType Ty, unsigned SizeLimit) const { if (Ty->isVoidType()) return ABIArgInfo::getIgnore(); uint64_t Size = getContext().getTypeSize(Ty); // Anything too big to fit in registers is passed with an explicit indirect // pointer / sret pointer. if (Size > SizeLimit) return getNaturalAlignIndirect(Ty, /*ByVal=*/false); // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); // Integer types smaller than a register are extended. if (Size < 64 && Ty->isIntegerType()) return ABIArgInfo::getExtend(); // Other non-aggregates go in registers. if (!isAggregateTypeForABI(Ty)) return ABIArgInfo::getDirect(); // If a C++ object has either a non-trivial copy constructor or a non-trivial // destructor, it is passed with an explicit indirect pointer / sret pointer. if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); // This is a small aggregate type that should be passed in registers. // Build a coercion type from the LLVM struct type. llvm::StructType *StrTy = dyn_cast(CGT.ConvertType(Ty)); if (!StrTy) return ABIArgInfo::getDirect(); CoerceBuilder CB(getVMContext(), getDataLayout()); CB.addStruct(0, StrTy); CB.pad(llvm::alignTo(CB.DL.getTypeSizeInBits(StrTy), 64)); // Try to use the original type for coercion. llvm::Type *CoerceTy = CB.isUsableType(StrTy) ? StrTy : CB.getType(); if (CB.InReg) return ABIArgInfo::getDirectInReg(CoerceTy); else return ABIArgInfo::getDirect(CoerceTy); } Address SparcV9ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { ABIArgInfo AI = classifyType(Ty, 16 * 8); llvm::Type *ArgTy = CGT.ConvertType(Ty); if (AI.canHaveCoerceToType() && !AI.getCoerceToType()) AI.setCoerceToType(ArgTy); CharUnits SlotSize = CharUnits::fromQuantity(8); CGBuilderTy &Builder = CGF.Builder; Address Addr(Builder.CreateLoad(VAListAddr, "ap.cur"), SlotSize); llvm::Type *ArgPtrTy = llvm::PointerType::getUnqual(ArgTy); auto TypeInfo = getContext().getTypeInfoInChars(Ty); Address ArgAddr = Address::invalid(); CharUnits Stride; switch (AI.getKind()) { case ABIArgInfo::Expand: case ABIArgInfo::CoerceAndExpand: case ABIArgInfo::InAlloca: llvm_unreachable("Unsupported ABI kind for va_arg"); case ABIArgInfo::Extend: { Stride = SlotSize; CharUnits Offset = SlotSize - TypeInfo.first; ArgAddr = Builder.CreateConstInBoundsByteGEP(Addr, Offset, "extend"); break; } case ABIArgInfo::Direct: { auto AllocSize = getDataLayout().getTypeAllocSize(AI.getCoerceToType()); Stride = CharUnits::fromQuantity(AllocSize).alignTo(SlotSize); ArgAddr = Addr; break; } case ABIArgInfo::Indirect: Stride = SlotSize; ArgAddr = Builder.CreateElementBitCast(Addr, ArgPtrTy, "indirect"); ArgAddr = Address(Builder.CreateLoad(ArgAddr, "indirect.arg"), TypeInfo.second); break; case ABIArgInfo::Ignore: return Address(llvm::UndefValue::get(ArgPtrTy), TypeInfo.second); } // Update VAList. llvm::Value *NextPtr = Builder.CreateConstInBoundsByteGEP(Addr.getPointer(), Stride, "ap.next"); Builder.CreateStore(NextPtr, VAListAddr); return Builder.CreateBitCast(ArgAddr, ArgPtrTy, "arg.addr"); } void SparcV9ABIInfo::computeInfo(CGFunctionInfo &FI) const { FI.getReturnInfo() = classifyType(FI.getReturnType(), 32 * 8); for (auto &I : FI.arguments()) I.info = classifyType(I.type, 16 * 8); } namespace { class SparcV9TargetCodeGenInfo : public TargetCodeGenInfo { public: SparcV9TargetCodeGenInfo(CodeGenTypes &CGT) : TargetCodeGenInfo(new SparcV9ABIInfo(CGT)) {} int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { return 14; } bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const override; }; } // end anonymous namespace bool SparcV9TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const { // This is calculated from the LLVM and GCC tables and verified // against gcc output. AFAIK all ABIs use the same encoding. CodeGen::CGBuilderTy &Builder = CGF.Builder; llvm::IntegerType *i8 = CGF.Int8Ty; llvm::Value *Four8 = llvm::ConstantInt::get(i8, 4); llvm::Value *Eight8 = llvm::ConstantInt::get(i8, 8); // 0-31: the 8-byte general-purpose registers AssignToArrayRange(Builder, Address, Eight8, 0, 31); // 32-63: f0-31, the 4-byte floating-point registers AssignToArrayRange(Builder, Address, Four8, 32, 63); // Y = 64 // PSR = 65 // WIM = 66 // TBR = 67 // PC = 68 // NPC = 69 // FSR = 70 // CSR = 71 AssignToArrayRange(Builder, Address, Eight8, 64, 71); // 72-87: d0-15, the 8-byte floating-point registers AssignToArrayRange(Builder, Address, Eight8, 72, 87); return false; } //===----------------------------------------------------------------------===// // XCore ABI Implementation //===----------------------------------------------------------------------===// namespace { /// A SmallStringEnc instance is used to build up the TypeString by passing /// it by reference between functions that append to it. typedef llvm::SmallString<128> SmallStringEnc; /// TypeStringCache caches the meta encodings of Types. /// /// The reason for caching TypeStrings is two fold: /// 1. To cache a type's encoding for later uses; /// 2. As a means to break recursive member type inclusion. /// /// A cache Entry can have a Status of: /// NonRecursive: The type encoding is not recursive; /// Recursive: The type encoding is recursive; /// Incomplete: An incomplete TypeString; /// IncompleteUsed: An incomplete TypeString that has been used in a /// Recursive type encoding. /// /// A NonRecursive entry will have all of its sub-members expanded as fully /// as possible. Whilst it may contain types which are recursive, the type /// itself is not recursive and thus its encoding may be safely used whenever /// the type is encountered. /// /// A Recursive entry will have all of its sub-members expanded as fully as /// possible. The type itself is recursive and it may contain other types which /// are recursive. The Recursive encoding must not be used during the expansion /// of a recursive type's recursive branch. For simplicity the code uses /// IncompleteCount to reject all usage of Recursive encodings for member types. /// /// An Incomplete entry is always a RecordType and only encodes its /// identifier e.g. "s(S){}". Incomplete 'StubEnc' entries are ephemeral and /// are placed into the cache during type expansion as a means to identify and /// handle recursive inclusion of types as sub-members. If there is recursion /// the entry becomes IncompleteUsed. /// /// During the expansion of a RecordType's members: /// /// If the cache contains a NonRecursive encoding for the member type, the /// cached encoding is used; /// /// If the cache contains a Recursive encoding for the member type, the /// cached encoding is 'Swapped' out, as it may be incorrect, and... /// /// If the member is a RecordType, an Incomplete encoding is placed into the /// cache to break potential recursive inclusion of itself as a sub-member; /// /// Once a member RecordType has been expanded, its temporary incomplete /// entry is removed from the cache. If a Recursive encoding was swapped out /// it is swapped back in; /// /// If an incomplete entry is used to expand a sub-member, the incomplete /// entry is marked as IncompleteUsed. The cache keeps count of how many /// IncompleteUsed entries it currently contains in IncompleteUsedCount; /// /// If a member's encoding is found to be a NonRecursive or Recursive viz: /// IncompleteUsedCount==0, the member's encoding is added to the cache. /// Else the member is part of a recursive type and thus the recursion has /// been exited too soon for the encoding to be correct for the member. /// class TypeStringCache { enum Status {NonRecursive, Recursive, Incomplete, IncompleteUsed}; struct Entry { std::string Str; // The encoded TypeString for the type. enum Status State; // Information about the encoding in 'Str'. std::string Swapped; // A temporary place holder for a Recursive encoding // during the expansion of RecordType's members. }; std::map Map; unsigned IncompleteCount; // Number of Incomplete entries in the Map. unsigned IncompleteUsedCount; // Number of IncompleteUsed entries in the Map. public: TypeStringCache() : IncompleteCount(0), IncompleteUsedCount(0) {} void addIncomplete(const IdentifierInfo *ID, std::string StubEnc); bool removeIncomplete(const IdentifierInfo *ID); void addIfComplete(const IdentifierInfo *ID, StringRef Str, bool IsRecursive); StringRef lookupStr(const IdentifierInfo *ID); }; /// TypeString encodings for enum & union fields must be order. /// FieldEncoding is a helper for this ordering process. class FieldEncoding { bool HasName; std::string Enc; public: FieldEncoding(bool b, SmallStringEnc &e) : HasName(b), Enc(e.c_str()) {} StringRef str() {return Enc.c_str();} bool operator<(const FieldEncoding &rhs) const { if (HasName != rhs.HasName) return HasName; return Enc < rhs.Enc; } }; class XCoreABIInfo : public DefaultABIInfo { public: XCoreABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; }; class XCoreTargetCodeGenInfo : public TargetCodeGenInfo { mutable TypeStringCache TSC; public: XCoreTargetCodeGenInfo(CodeGenTypes &CGT) :TargetCodeGenInfo(new XCoreABIInfo(CGT)) {} void emitTargetMD(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; }; } // End anonymous namespace. // TODO: this implementation is likely now redundant with the default // EmitVAArg. Address XCoreABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { CGBuilderTy &Builder = CGF.Builder; // Get the VAList. CharUnits SlotSize = CharUnits::fromQuantity(4); Address AP(Builder.CreateLoad(VAListAddr), SlotSize); // Handle the argument. ABIArgInfo AI = classifyArgumentType(Ty); CharUnits TypeAlign = getContext().getTypeAlignInChars(Ty); llvm::Type *ArgTy = CGT.ConvertType(Ty); if (AI.canHaveCoerceToType() && !AI.getCoerceToType()) AI.setCoerceToType(ArgTy); llvm::Type *ArgPtrTy = llvm::PointerType::getUnqual(ArgTy); Address Val = Address::invalid(); CharUnits ArgSize = CharUnits::Zero(); switch (AI.getKind()) { case ABIArgInfo::Expand: case ABIArgInfo::CoerceAndExpand: case ABIArgInfo::InAlloca: llvm_unreachable("Unsupported ABI kind for va_arg"); case ABIArgInfo::Ignore: Val = Address(llvm::UndefValue::get(ArgPtrTy), TypeAlign); ArgSize = CharUnits::Zero(); break; case ABIArgInfo::Extend: case ABIArgInfo::Direct: Val = Builder.CreateBitCast(AP, ArgPtrTy); ArgSize = CharUnits::fromQuantity( getDataLayout().getTypeAllocSize(AI.getCoerceToType())); ArgSize = ArgSize.alignTo(SlotSize); break; case ABIArgInfo::Indirect: Val = Builder.CreateElementBitCast(AP, ArgPtrTy); Val = Address(Builder.CreateLoad(Val), TypeAlign); ArgSize = SlotSize; break; } // Increment the VAList. if (!ArgSize.isZero()) { llvm::Value *APN = Builder.CreateConstInBoundsByteGEP(AP.getPointer(), ArgSize); Builder.CreateStore(APN, VAListAddr); } return Val; } /// During the expansion of a RecordType, an incomplete TypeString is placed /// into the cache as a means to identify and break recursion. /// If there is a Recursive encoding in the cache, it is swapped out and will /// be reinserted by removeIncomplete(). /// All other types of encoding should have been used rather than arriving here. void TypeStringCache::addIncomplete(const IdentifierInfo *ID, std::string StubEnc) { if (!ID) return; Entry &E = Map[ID]; assert( (E.Str.empty() || E.State == Recursive) && "Incorrectly use of addIncomplete"); assert(!StubEnc.empty() && "Passing an empty string to addIncomplete()"); E.Swapped.swap(E.Str); // swap out the Recursive E.Str.swap(StubEnc); E.State = Incomplete; ++IncompleteCount; } /// Once the RecordType has been expanded, the temporary incomplete TypeString /// must be removed from the cache. /// If a Recursive was swapped out by addIncomplete(), it will be replaced. /// Returns true if the RecordType was defined recursively. bool TypeStringCache::removeIncomplete(const IdentifierInfo *ID) { if (!ID) return false; auto I = Map.find(ID); assert(I != Map.end() && "Entry not present"); Entry &E = I->second; assert( (E.State == Incomplete || E.State == IncompleteUsed) && "Entry must be an incomplete type"); bool IsRecursive = false; if (E.State == IncompleteUsed) { // We made use of our Incomplete encoding, thus we are recursive. IsRecursive = true; --IncompleteUsedCount; } if (E.Swapped.empty()) Map.erase(I); else { // Swap the Recursive back. E.Swapped.swap(E.Str); E.Swapped.clear(); E.State = Recursive; } --IncompleteCount; return IsRecursive; } /// Add the encoded TypeString to the cache only if it is NonRecursive or /// Recursive (viz: all sub-members were expanded as fully as possible). void TypeStringCache::addIfComplete(const IdentifierInfo *ID, StringRef Str, bool IsRecursive) { if (!ID || IncompleteUsedCount) return; // No key or it is is an incomplete sub-type so don't add. Entry &E = Map[ID]; if (IsRecursive && !E.Str.empty()) { assert(E.State==Recursive && E.Str.size() == Str.size() && "This is not the same Recursive entry"); // The parent container was not recursive after all, so we could have used // this Recursive sub-member entry after all, but we assumed the worse when // we started viz: IncompleteCount!=0. return; } assert(E.Str.empty() && "Entry already present"); E.Str = Str.str(); E.State = IsRecursive? Recursive : NonRecursive; } /// Return a cached TypeString encoding for the ID. If there isn't one, or we /// are recursively expanding a type (IncompleteCount != 0) and the cached /// encoding is Recursive, return an empty StringRef. StringRef TypeStringCache::lookupStr(const IdentifierInfo *ID) { if (!ID) return StringRef(); // We have no key. auto I = Map.find(ID); if (I == Map.end()) return StringRef(); // We have no encoding. Entry &E = I->second; if (E.State == Recursive && IncompleteCount) return StringRef(); // We don't use Recursive encodings for member types. if (E.State == Incomplete) { // The incomplete type is being used to break out of recursion. E.State = IncompleteUsed; ++IncompleteUsedCount; } return E.Str.c_str(); } /// The XCore ABI includes a type information section that communicates symbol /// type information to the linker. The linker uses this information to verify /// safety/correctness of things such as array bound and pointers et al. /// The ABI only requires C (and XC) language modules to emit TypeStrings. /// This type information (TypeString) is emitted into meta data for all global /// symbols: definitions, declarations, functions & variables. /// /// The TypeString carries type, qualifier, name, size & value details. /// Please see 'Tools Development Guide' section 2.16.2 for format details: /// https://www.xmos.com/download/public/Tools-Development-Guide%28X9114A%29.pdf /// The output is tested by test/CodeGen/xcore-stringtype.c. /// static bool getTypeString(SmallStringEnc &Enc, const Decl *D, CodeGen::CodeGenModule &CGM, TypeStringCache &TSC); /// XCore uses emitTargetMD to emit TypeString metadata for global symbols. void XCoreTargetCodeGenInfo::emitTargetMD(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { SmallStringEnc Enc; if (getTypeString(Enc, D, CGM, TSC)) { llvm::LLVMContext &Ctx = CGM.getModule().getContext(); llvm::Metadata *MDVals[] = {llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Enc.str())}; llvm::NamedMDNode *MD = CGM.getModule().getOrInsertNamedMetadata("xcore.typestrings"); MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); } } //===----------------------------------------------------------------------===// // SPIR ABI Implementation //===----------------------------------------------------------------------===// namespace { class SPIRTargetCodeGenInfo : public TargetCodeGenInfo { public: SPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {} void emitTargetMD(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; unsigned getOpenCLKernelCallingConv() const override; }; } // End anonymous namespace. /// Emit SPIR specific metadata: OpenCL and SPIR version. void SPIRTargetCodeGenInfo::emitTargetMD(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { llvm::LLVMContext &Ctx = CGM.getModule().getContext(); llvm::Type *Int32Ty = llvm::Type::getInt32Ty(Ctx); llvm::Module &M = CGM.getModule(); // SPIR v2.0 s2.12 - The SPIR version used by the module is stored in the // opencl.spir.version named metadata. llvm::Metadata *SPIRVerElts[] = { llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int32Ty, 2)), llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int32Ty, 0))}; llvm::NamedMDNode *SPIRVerMD = M.getOrInsertNamedMetadata("opencl.spir.version"); SPIRVerMD->addOperand(llvm::MDNode::get(Ctx, SPIRVerElts)); // SPIR v2.0 s2.13 - The OpenCL version used by the module is stored in the // opencl.ocl.version named metadata node. llvm::Metadata *OCLVerElts[] = { llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( Int32Ty, CGM.getLangOpts().OpenCLVersion / 100)), llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( Int32Ty, (CGM.getLangOpts().OpenCLVersion % 100) / 10))}; llvm::NamedMDNode *OCLVerMD = M.getOrInsertNamedMetadata("opencl.ocl.version"); OCLVerMD->addOperand(llvm::MDNode::get(Ctx, OCLVerElts)); } unsigned SPIRTargetCodeGenInfo::getOpenCLKernelCallingConv() const { return llvm::CallingConv::SPIR_KERNEL; } static bool appendType(SmallStringEnc &Enc, QualType QType, const CodeGen::CodeGenModule &CGM, TypeStringCache &TSC); /// Helper function for appendRecordType(). /// Builds a SmallVector containing the encoded field types in declaration /// order. static bool extractFieldType(SmallVectorImpl &FE, const RecordDecl *RD, const CodeGen::CodeGenModule &CGM, TypeStringCache &TSC) { for (const auto *Field : RD->fields()) { SmallStringEnc Enc; Enc += "m("; Enc += Field->getName(); Enc += "){"; if (Field->isBitField()) { Enc += "b("; llvm::raw_svector_ostream OS(Enc); OS << Field->getBitWidthValue(CGM.getContext()); Enc += ':'; } if (!appendType(Enc, Field->getType(), CGM, TSC)) return false; if (Field->isBitField()) Enc += ')'; Enc += '}'; FE.emplace_back(!Field->getName().empty(), Enc); } return true; } /// Appends structure and union types to Enc and adds encoding to cache. /// Recursively calls appendType (via extractFieldType) for each field. /// Union types have their fields ordered according to the ABI. static bool appendRecordType(SmallStringEnc &Enc, const RecordType *RT, const CodeGen::CodeGenModule &CGM, TypeStringCache &TSC, const IdentifierInfo *ID) { // Append the cached TypeString if we have one. StringRef TypeString = TSC.lookupStr(ID); if (!TypeString.empty()) { Enc += TypeString; return true; } // Start to emit an incomplete TypeString. size_t Start = Enc.size(); Enc += (RT->isUnionType()? 'u' : 's'); Enc += '('; if (ID) Enc += ID->getName(); Enc += "){"; // We collect all encoded fields and order as necessary. bool IsRecursive = false; const RecordDecl *RD = RT->getDecl()->getDefinition(); if (RD && !RD->field_empty()) { // An incomplete TypeString stub is placed in the cache for this RecordType // so that recursive calls to this RecordType will use it whilst building a // complete TypeString for this RecordType. SmallVector FE; std::string StubEnc(Enc.substr(Start).str()); StubEnc += '}'; // StubEnc now holds a valid incomplete TypeString. TSC.addIncomplete(ID, std::move(StubEnc)); if (!extractFieldType(FE, RD, CGM, TSC)) { (void) TSC.removeIncomplete(ID); return false; } IsRecursive = TSC.removeIncomplete(ID); // The ABI requires unions to be sorted but not structures. // See FieldEncoding::operator< for sort algorithm. if (RT->isUnionType()) std::sort(FE.begin(), FE.end()); // We can now complete the TypeString. unsigned E = FE.size(); for (unsigned I = 0; I != E; ++I) { if (I) Enc += ','; Enc += FE[I].str(); } } Enc += '}'; TSC.addIfComplete(ID, Enc.substr(Start), IsRecursive); return true; } /// Appends enum types to Enc and adds the encoding to the cache. static bool appendEnumType(SmallStringEnc &Enc, const EnumType *ET, TypeStringCache &TSC, const IdentifierInfo *ID) { // Append the cached TypeString if we have one. StringRef TypeString = TSC.lookupStr(ID); if (!TypeString.empty()) { Enc += TypeString; return true; } size_t Start = Enc.size(); Enc += "e("; if (ID) Enc += ID->getName(); Enc += "){"; // We collect all encoded enumerations and order them alphanumerically. if (const EnumDecl *ED = ET->getDecl()->getDefinition()) { SmallVector FE; for (auto I = ED->enumerator_begin(), E = ED->enumerator_end(); I != E; ++I) { SmallStringEnc EnumEnc; EnumEnc += "m("; EnumEnc += I->getName(); EnumEnc += "){"; I->getInitVal().toString(EnumEnc); EnumEnc += '}'; FE.push_back(FieldEncoding(!I->getName().empty(), EnumEnc)); } std::sort(FE.begin(), FE.end()); unsigned E = FE.size(); for (unsigned I = 0; I != E; ++I) { if (I) Enc += ','; Enc += FE[I].str(); } } Enc += '}'; TSC.addIfComplete(ID, Enc.substr(Start), false); return true; } /// Appends type's qualifier to Enc. /// This is done prior to appending the type's encoding. static void appendQualifier(SmallStringEnc &Enc, QualType QT) { // Qualifiers are emitted in alphabetical order. static const char *const Table[]={"","c:","r:","cr:","v:","cv:","rv:","crv:"}; int Lookup = 0; if (QT.isConstQualified()) Lookup += 1<<0; if (QT.isRestrictQualified()) Lookup += 1<<1; if (QT.isVolatileQualified()) Lookup += 1<<2; Enc += Table[Lookup]; } /// Appends built-in types to Enc. static bool appendBuiltinType(SmallStringEnc &Enc, const BuiltinType *BT) { const char *EncType; switch (BT->getKind()) { case BuiltinType::Void: EncType = "0"; break; case BuiltinType::Bool: EncType = "b"; break; case BuiltinType::Char_U: EncType = "uc"; break; case BuiltinType::UChar: EncType = "uc"; break; case BuiltinType::SChar: EncType = "sc"; break; case BuiltinType::UShort: EncType = "us"; break; case BuiltinType::Short: EncType = "ss"; break; case BuiltinType::UInt: EncType = "ui"; break; case BuiltinType::Int: EncType = "si"; break; case BuiltinType::ULong: EncType = "ul"; break; case BuiltinType::Long: EncType = "sl"; break; case BuiltinType::ULongLong: EncType = "ull"; break; case BuiltinType::LongLong: EncType = "sll"; break; case BuiltinType::Float: EncType = "ft"; break; case BuiltinType::Double: EncType = "d"; break; case BuiltinType::LongDouble: EncType = "ld"; break; default: return false; } Enc += EncType; return true; } /// Appends a pointer encoding to Enc before calling appendType for the pointee. static bool appendPointerType(SmallStringEnc &Enc, const PointerType *PT, const CodeGen::CodeGenModule &CGM, TypeStringCache &TSC) { Enc += "p("; if (!appendType(Enc, PT->getPointeeType(), CGM, TSC)) return false; Enc += ')'; return true; } /// Appends array encoding to Enc before calling appendType for the element. static bool appendArrayType(SmallStringEnc &Enc, QualType QT, const ArrayType *AT, const CodeGen::CodeGenModule &CGM, TypeStringCache &TSC, StringRef NoSizeEnc) { if (AT->getSizeModifier() != ArrayType::Normal) return false; Enc += "a("; if (const ConstantArrayType *CAT = dyn_cast(AT)) CAT->getSize().toStringUnsigned(Enc); else Enc += NoSizeEnc; // Global arrays use "*", otherwise it is "". Enc += ':'; // The Qualifiers should be attached to the type rather than the array. appendQualifier(Enc, QT); if (!appendType(Enc, AT->getElementType(), CGM, TSC)) return false; Enc += ')'; return true; } /// Appends a function encoding to Enc, calling appendType for the return type /// and the arguments. static bool appendFunctionType(SmallStringEnc &Enc, const FunctionType *FT, const CodeGen::CodeGenModule &CGM, TypeStringCache &TSC) { Enc += "f{"; if (!appendType(Enc, FT->getReturnType(), CGM, TSC)) return false; Enc += "}("; if (const FunctionProtoType *FPT = FT->getAs()) { // N.B. we are only interested in the adjusted param types. auto I = FPT->param_type_begin(); auto E = FPT->param_type_end(); if (I != E) { do { if (!appendType(Enc, *I, CGM, TSC)) return false; ++I; if (I != E) Enc += ','; } while (I != E); if (FPT->isVariadic()) Enc += ",va"; } else { if (FPT->isVariadic()) Enc += "va"; else Enc += '0'; } } Enc += ')'; return true; } /// Handles the type's qualifier before dispatching a call to handle specific /// type encodings. static bool appendType(SmallStringEnc &Enc, QualType QType, const CodeGen::CodeGenModule &CGM, TypeStringCache &TSC) { QualType QT = QType.getCanonicalType(); if (const ArrayType *AT = QT->getAsArrayTypeUnsafe()) // The Qualifiers should be attached to the type rather than the array. // Thus we don't call appendQualifier() here. return appendArrayType(Enc, QT, AT, CGM, TSC, ""); appendQualifier(Enc, QT); if (const BuiltinType *BT = QT->getAs()) return appendBuiltinType(Enc, BT); if (const PointerType *PT = QT->getAs()) return appendPointerType(Enc, PT, CGM, TSC); if (const EnumType *ET = QT->getAs()) return appendEnumType(Enc, ET, TSC, QT.getBaseTypeIdentifier()); if (const RecordType *RT = QT->getAsStructureType()) return appendRecordType(Enc, RT, CGM, TSC, QT.getBaseTypeIdentifier()); if (const RecordType *RT = QT->getAsUnionType()) return appendRecordType(Enc, RT, CGM, TSC, QT.getBaseTypeIdentifier()); if (const FunctionType *FT = QT->getAs()) return appendFunctionType(Enc, FT, CGM, TSC); return false; } static bool getTypeString(SmallStringEnc &Enc, const Decl *D, CodeGen::CodeGenModule &CGM, TypeStringCache &TSC) { if (!D) return false; if (const FunctionDecl *FD = dyn_cast(D)) { if (FD->getLanguageLinkage() != CLanguageLinkage) return false; return appendType(Enc, FD->getType(), CGM, TSC); } if (const VarDecl *VD = dyn_cast(D)) { if (VD->getLanguageLinkage() != CLanguageLinkage) return false; QualType QT = VD->getType().getCanonicalType(); if (const ArrayType *AT = QT->getAsArrayTypeUnsafe()) { // Global ArrayTypes are given a size of '*' if the size is unknown. // The Qualifiers should be attached to the type rather than the array. // Thus we don't call appendQualifier() here. return appendArrayType(Enc, QT, AT, CGM, TSC, "*"); } return appendType(Enc, QT, CGM, TSC); } return false; } //===----------------------------------------------------------------------===// // Driver code //===----------------------------------------------------------------------===// const llvm::Triple &CodeGenModule::getTriple() const { return getTarget().getTriple(); } bool CodeGenModule::supportsCOMDAT() const { return getTriple().supportsCOMDAT(); } const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { if (TheTargetCodeGenInfo) return *TheTargetCodeGenInfo; // Helper to set the unique_ptr while still keeping the return value. auto SetCGInfo = [&](TargetCodeGenInfo *P) -> const TargetCodeGenInfo & { this->TheTargetCodeGenInfo.reset(P); return *P; }; const llvm::Triple &Triple = getTarget().getTriple(); switch (Triple.getArch()) { default: return SetCGInfo(new DefaultTargetCodeGenInfo(Types)); case llvm::Triple::le32: return SetCGInfo(new PNaClTargetCodeGenInfo(Types)); case llvm::Triple::mips: case llvm::Triple::mipsel: if (Triple.getOS() == llvm::Triple::NaCl) return SetCGInfo(new PNaClTargetCodeGenInfo(Types)); return SetCGInfo(new MIPSTargetCodeGenInfo(Types, true)); case llvm::Triple::mips64: case llvm::Triple::mips64el: return SetCGInfo(new MIPSTargetCodeGenInfo(Types, false)); case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: { AArch64ABIInfo::ABIKind Kind = AArch64ABIInfo::AAPCS; if (getTarget().getABI() == "darwinpcs") Kind = AArch64ABIInfo::DarwinPCS; return SetCGInfo(new AArch64TargetCodeGenInfo(Types, Kind)); } case llvm::Triple::wasm32: case llvm::Triple::wasm64: return SetCGInfo(new WebAssemblyTargetCodeGenInfo(Types)); case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: { if (Triple.getOS() == llvm::Triple::Win32) { return SetCGInfo( new WindowsARMTargetCodeGenInfo(Types, ARMABIInfo::AAPCS_VFP)); } ARMABIInfo::ABIKind Kind = ARMABIInfo::AAPCS; StringRef ABIStr = getTarget().getABI(); if (ABIStr == "apcs-gnu") Kind = ARMABIInfo::APCS; else if (ABIStr == "aapcs16") Kind = ARMABIInfo::AAPCS16_VFP; else if (CodeGenOpts.FloatABI == "hard" || (CodeGenOpts.FloatABI != "soft" && (Triple.getEnvironment() == llvm::Triple::GNUEABIHF || Triple.getEnvironment() == llvm::Triple::MuslEABIHF || Triple.getEnvironment() == llvm::Triple::EABIHF))) Kind = ARMABIInfo::AAPCS_VFP; return SetCGInfo(new ARMTargetCodeGenInfo(Types, Kind)); } case llvm::Triple::ppc: return SetCGInfo( new PPC32TargetCodeGenInfo(Types, CodeGenOpts.FloatABI == "soft")); case llvm::Triple::ppc64: if (Triple.isOSBinFormatELF()) { PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv1; if (getTarget().getABI() == "elfv2") Kind = PPC64_SVR4_ABIInfo::ELFv2; bool HasQPX = getTarget().getABI() == "elfv1-qpx"; + bool IsSoftFloat = CodeGenOpts.FloatABI == "soft"; - return SetCGInfo(new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX)); + return SetCGInfo(new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX, + IsSoftFloat)); } else return SetCGInfo(new PPC64TargetCodeGenInfo(Types)); case llvm::Triple::ppc64le: { assert(Triple.isOSBinFormatELF() && "PPC64 LE non-ELF not supported!"); PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv2; if (getTarget().getABI() == "elfv1" || getTarget().getABI() == "elfv1-qpx") Kind = PPC64_SVR4_ABIInfo::ELFv1; bool HasQPX = getTarget().getABI() == "elfv1-qpx"; + bool IsSoftFloat = CodeGenOpts.FloatABI == "soft"; - return SetCGInfo(new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX)); + return SetCGInfo(new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX, + IsSoftFloat)); } case llvm::Triple::nvptx: case llvm::Triple::nvptx64: return SetCGInfo(new NVPTXTargetCodeGenInfo(Types)); case llvm::Triple::msp430: return SetCGInfo(new MSP430TargetCodeGenInfo(Types)); case llvm::Triple::systemz: { bool HasVector = getTarget().getABI() == "vector"; return SetCGInfo(new SystemZTargetCodeGenInfo(Types, HasVector)); } case llvm::Triple::tce: return SetCGInfo(new TCETargetCodeGenInfo(Types)); case llvm::Triple::x86: { bool IsDarwinVectorABI = Triple.isOSDarwin(); bool RetSmallStructInRegABI = X86_32TargetCodeGenInfo::isStructReturnInRegABI(Triple, CodeGenOpts); bool IsWin32FloatStructABI = Triple.isOSWindows() && !Triple.isOSCygMing(); if (Triple.getOS() == llvm::Triple::Win32) { return SetCGInfo(new WinX86_32TargetCodeGenInfo( Types, IsDarwinVectorABI, RetSmallStructInRegABI, IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters)); } else { return SetCGInfo(new X86_32TargetCodeGenInfo( Types, IsDarwinVectorABI, RetSmallStructInRegABI, IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters, CodeGenOpts.FloatABI == "soft")); } } case llvm::Triple::x86_64: { StringRef ABI = getTarget().getABI(); X86AVXABILevel AVXLevel = (ABI == "avx512" ? X86AVXABILevel::AVX512 : ABI == "avx" ? X86AVXABILevel::AVX : X86AVXABILevel::None); switch (Triple.getOS()) { case llvm::Triple::Win32: return SetCGInfo(new WinX86_64TargetCodeGenInfo(Types, AVXLevel)); case llvm::Triple::PS4: return SetCGInfo(new PS4TargetCodeGenInfo(Types, AVXLevel)); default: return SetCGInfo(new X86_64TargetCodeGenInfo(Types, AVXLevel)); } } case llvm::Triple::hexagon: return SetCGInfo(new HexagonTargetCodeGenInfo(Types)); case llvm::Triple::lanai: return SetCGInfo(new LanaiTargetCodeGenInfo(Types)); case llvm::Triple::r600: return SetCGInfo(new AMDGPUTargetCodeGenInfo(Types)); case llvm::Triple::amdgcn: return SetCGInfo(new AMDGPUTargetCodeGenInfo(Types)); case llvm::Triple::sparc: return SetCGInfo(new SparcV8TargetCodeGenInfo(Types)); case llvm::Triple::sparcv9: return SetCGInfo(new SparcV9TargetCodeGenInfo(Types)); case llvm::Triple::xcore: return SetCGInfo(new XCoreTargetCodeGenInfo(Types)); case llvm::Triple::spir: case llvm::Triple::spir64: return SetCGInfo(new SPIRTargetCodeGenInfo(Types)); } } Index: head/contrib/llvm/tools/clang/lib/Driver/Tools.cpp =================================================================== --- head/contrib/llvm/tools/clang/lib/Driver/Tools.cpp (revision 309148) +++ head/contrib/llvm/tools/clang/lib/Driver/Tools.cpp (revision 309149) @@ -1,11356 +1,11349 @@ //===--- Tools.cpp - Tools Implementations ----------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include "Tools.h" #include "InputInfo.h" #include "ToolChains.h" #include "clang/Basic/CharInfo.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/ObjCRuntime.h" #include "clang/Basic/Version.h" #include "clang/Config/config.h" #include "clang/Driver/Action.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" #include "clang/Driver/DriverDiagnostic.h" #include "clang/Driver/Job.h" #include "clang/Driver/Options.h" #include "clang/Driver/SanitizerArgs.h" #include "clang/Driver/ToolChain.h" #include "clang/Driver/Util.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Option/Option.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/Compression.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Host.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" #include "llvm/Support/Program.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetParser.h" #ifdef LLVM_ON_UNIX #include // For getuid(). #endif using namespace clang::driver; using namespace clang::driver::tools; using namespace clang; using namespace llvm::opt; static void handleTargetFeaturesGroup(const ArgList &Args, std::vector &Features, OptSpecifier Group) { for (const Arg *A : Args.filtered(Group)) { StringRef Name = A->getOption().getName(); A->claim(); // Skip over "-m". assert(Name.startswith("m") && "Invalid feature name."); Name = Name.substr(1); bool IsNegative = Name.startswith("no-"); if (IsNegative) Name = Name.substr(3); Features.push_back(Args.MakeArgString((IsNegative ? "-" : "+") + Name)); } } static const char *getSparcAsmModeForCPU(StringRef Name, const llvm::Triple &Triple) { if (Triple.getArch() == llvm::Triple::sparcv9) { return llvm::StringSwitch(Name) .Case("niagara", "-Av9b") .Case("niagara2", "-Av9b") .Case("niagara3", "-Av9d") .Case("niagara4", "-Av9d") .Default("-Av9"); } else { return llvm::StringSwitch(Name) .Case("v8", "-Av8") .Case("supersparc", "-Av8") .Case("sparclite", "-Asparclite") .Case("f934", "-Asparclite") .Case("hypersparc", "-Av8") .Case("sparclite86x", "-Asparclite") .Case("sparclet", "-Asparclet") .Case("tsc701", "-Asparclet") .Case("v9", "-Av8plus") .Case("ultrasparc", "-Av8plus") .Case("ultrasparc3", "-Av8plus") .Case("niagara", "-Av8plusb") .Case("niagara2", "-Av8plusb") .Case("niagara3", "-Av8plusd") .Case("niagara4", "-Av8plusd") .Case("leon2", "-Av8") .Case("at697e", "-Av8") .Case("at697f", "-Av8") .Case("leon3", "-Av8") .Case("ut699", "-Av8") .Case("gr712rc", "-Av8") .Case("leon4", "-Av8") .Case("gr740", "-Av8") .Default("-Av8"); } } /// CheckPreprocessingOptions - Perform some validation of preprocessing /// arguments that is shared with gcc. static void CheckPreprocessingOptions(const Driver &D, const ArgList &Args) { if (Arg *A = Args.getLastArg(options::OPT_C, options::OPT_CC)) { if (!Args.hasArg(options::OPT_E) && !Args.hasArg(options::OPT__SLASH_P) && !Args.hasArg(options::OPT__SLASH_EP) && !D.CCCIsCPP()) { D.Diag(diag::err_drv_argument_only_allowed_with) << A->getBaseArg().getAsString(Args) << (D.IsCLMode() ? "/E, /P or /EP" : "-E"); } } } /// CheckCodeGenerationOptions - Perform some validation of code generation /// arguments that is shared with gcc. static void CheckCodeGenerationOptions(const Driver &D, const ArgList &Args) { // In gcc, only ARM checks this, but it seems reasonable to check universally. if (Args.hasArg(options::OPT_static)) if (const Arg *A = Args.getLastArg(options::OPT_dynamic, options::OPT_mdynamic_no_pic)) D.Diag(diag::err_drv_argument_not_allowed_with) << A->getAsString(Args) << "-static"; } // Add backslashes to escape spaces and other backslashes. // This is used for the space-separated argument list specified with // the -dwarf-debug-flags option. static void EscapeSpacesAndBackslashes(const char *Arg, SmallVectorImpl &Res) { for (; *Arg; ++Arg) { switch (*Arg) { default: break; case ' ': case '\\': Res.push_back('\\'); break; } Res.push_back(*Arg); } } // Quote target names for inclusion in GNU Make dependency files. // Only the characters '$', '#', ' ', '\t' are quoted. static void QuoteTarget(StringRef Target, SmallVectorImpl &Res) { for (unsigned i = 0, e = Target.size(); i != e; ++i) { switch (Target[i]) { case ' ': case '\t': // Escape the preceding backslashes for (int j = i - 1; j >= 0 && Target[j] == '\\'; --j) Res.push_back('\\'); // Escape the space/tab Res.push_back('\\'); break; case '$': Res.push_back('$'); break; case '#': Res.push_back('\\'); break; default: break; } Res.push_back(Target[i]); } } static void addDirectoryList(const ArgList &Args, ArgStringList &CmdArgs, const char *ArgName, const char *EnvVar) { const char *DirList = ::getenv(EnvVar); bool CombinedArg = false; if (!DirList) return; // Nothing to do. StringRef Name(ArgName); if (Name.equals("-I") || Name.equals("-L")) CombinedArg = true; StringRef Dirs(DirList); if (Dirs.empty()) // Empty string should not add '.'. return; StringRef::size_type Delim; while ((Delim = Dirs.find(llvm::sys::EnvPathSeparator)) != StringRef::npos) { if (Delim == 0) { // Leading colon. if (CombinedArg) { CmdArgs.push_back(Args.MakeArgString(std::string(ArgName) + ".")); } else { CmdArgs.push_back(ArgName); CmdArgs.push_back("."); } } else { if (CombinedArg) { CmdArgs.push_back( Args.MakeArgString(std::string(ArgName) + Dirs.substr(0, Delim))); } else { CmdArgs.push_back(ArgName); CmdArgs.push_back(Args.MakeArgString(Dirs.substr(0, Delim))); } } Dirs = Dirs.substr(Delim + 1); } if (Dirs.empty()) { // Trailing colon. if (CombinedArg) { CmdArgs.push_back(Args.MakeArgString(std::string(ArgName) + ".")); } else { CmdArgs.push_back(ArgName); CmdArgs.push_back("."); } } else { // Add the last path. if (CombinedArg) { CmdArgs.push_back(Args.MakeArgString(std::string(ArgName) + Dirs)); } else { CmdArgs.push_back(ArgName); CmdArgs.push_back(Args.MakeArgString(Dirs)); } } } static void AddLinkerInputs(const ToolChain &TC, const InputInfoList &Inputs, const ArgList &Args, ArgStringList &CmdArgs) { const Driver &D = TC.getDriver(); // Add extra linker input arguments which are not treated as inputs // (constructed via -Xarch_). Args.AddAllArgValues(CmdArgs, options::OPT_Zlinker_input); for (const auto &II : Inputs) { if (!TC.HasNativeLLVMSupport() && types::isLLVMIR(II.getType())) // Don't try to pass LLVM inputs unless we have native support. D.Diag(diag::err_drv_no_linker_llvm_support) << TC.getTripleString(); // Add filenames immediately. if (II.isFilename()) { CmdArgs.push_back(II.getFilename()); continue; } // Otherwise, this is a linker input argument. const Arg &A = II.getInputArg(); // Handle reserved library options. if (A.getOption().matches(options::OPT_Z_reserved_lib_stdcxx)) TC.AddCXXStdlibLibArgs(Args, CmdArgs); else if (A.getOption().matches(options::OPT_Z_reserved_lib_cckext)) TC.AddCCKextLibArgs(Args, CmdArgs); else if (A.getOption().matches(options::OPT_z)) { // Pass -z prefix for gcc linker compatibility. A.claim(); A.render(Args, CmdArgs); } else { A.renderAsInput(Args, CmdArgs); } } // LIBRARY_PATH - included following the user specified library paths. // and only supported on native toolchains. if (!TC.isCrossCompiling()) addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH"); } /// \brief Determine whether Objective-C automated reference counting is /// enabled. static bool isObjCAutoRefCount(const ArgList &Args) { return Args.hasFlag(options::OPT_fobjc_arc, options::OPT_fno_objc_arc, false); } /// \brief Determine whether we are linking the ObjC runtime. static bool isObjCRuntimeLinked(const ArgList &Args) { if (isObjCAutoRefCount(Args)) { Args.ClaimAllArgs(options::OPT_fobjc_link_runtime); return true; } return Args.hasArg(options::OPT_fobjc_link_runtime); } static bool forwardToGCC(const Option &O) { // Don't forward inputs from the original command line. They are added from // InputInfoList. return O.getKind() != Option::InputClass && !O.hasFlag(options::DriverOption) && !O.hasFlag(options::LinkerInput); } /// Add the C++ include args of other offloading toolchains. If this is a host /// job, the device toolchains are added. If this is a device job, the host /// toolchains will be added. static void addExtraOffloadCXXStdlibIncludeArgs(Compilation &C, const JobAction &JA, const ArgList &Args, ArgStringList &CmdArgs) { if (JA.isHostOffloading(Action::OFK_Cuda)) C.getSingleOffloadToolChain() ->AddClangCXXStdlibIncludeArgs(Args, CmdArgs); else if (JA.isDeviceOffloading(Action::OFK_Cuda)) C.getSingleOffloadToolChain() ->AddClangCXXStdlibIncludeArgs(Args, CmdArgs); // TODO: Add support for other programming models here. } /// Add the include args that are specific of each offloading programming model. static void addExtraOffloadSpecificIncludeArgs(Compilation &C, const JobAction &JA, const ArgList &Args, ArgStringList &CmdArgs) { if (JA.isHostOffloading(Action::OFK_Cuda)) C.getSingleOffloadToolChain()->AddCudaIncludeArgs( Args, CmdArgs); else if (JA.isDeviceOffloading(Action::OFK_Cuda)) C.getSingleOffloadToolChain()->AddCudaIncludeArgs( Args, CmdArgs); // TODO: Add support for other programming models here. } void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA, const Driver &D, const ArgList &Args, ArgStringList &CmdArgs, const InputInfo &Output, const InputInfoList &Inputs) const { Arg *A; const bool IsIAMCU = getToolChain().getTriple().isOSIAMCU(); CheckPreprocessingOptions(D, Args); Args.AddLastArg(CmdArgs, options::OPT_C); Args.AddLastArg(CmdArgs, options::OPT_CC); // Handle dependency file generation. if ((A = Args.getLastArg(options::OPT_M, options::OPT_MM)) || (A = Args.getLastArg(options::OPT_MD)) || (A = Args.getLastArg(options::OPT_MMD))) { // Determine the output location. const char *DepFile; if (Arg *MF = Args.getLastArg(options::OPT_MF)) { DepFile = MF->getValue(); C.addFailureResultFile(DepFile, &JA); } else if (Output.getType() == types::TY_Dependencies) { DepFile = Output.getFilename(); } else if (A->getOption().matches(options::OPT_M) || A->getOption().matches(options::OPT_MM)) { DepFile = "-"; } else { DepFile = getDependencyFileName(Args, Inputs); C.addFailureResultFile(DepFile, &JA); } CmdArgs.push_back("-dependency-file"); CmdArgs.push_back(DepFile); // Add a default target if one wasn't specified. if (!Args.hasArg(options::OPT_MT) && !Args.hasArg(options::OPT_MQ)) { const char *DepTarget; // If user provided -o, that is the dependency target, except // when we are only generating a dependency file. Arg *OutputOpt = Args.getLastArg(options::OPT_o); if (OutputOpt && Output.getType() != types::TY_Dependencies) { DepTarget = OutputOpt->getValue(); } else { // Otherwise derive from the base input. // // FIXME: This should use the computed output file location. SmallString<128> P(Inputs[0].getBaseInput()); llvm::sys::path::replace_extension(P, "o"); DepTarget = Args.MakeArgString(llvm::sys::path::filename(P)); } CmdArgs.push_back("-MT"); SmallString<128> Quoted; QuoteTarget(DepTarget, Quoted); CmdArgs.push_back(Args.MakeArgString(Quoted)); } if (A->getOption().matches(options::OPT_M) || A->getOption().matches(options::OPT_MD)) CmdArgs.push_back("-sys-header-deps"); if ((isa(JA) && !Args.hasArg(options::OPT_fno_module_file_deps)) || Args.hasArg(options::OPT_fmodule_file_deps)) CmdArgs.push_back("-module-file-deps"); } if (Args.hasArg(options::OPT_MG)) { if (!A || A->getOption().matches(options::OPT_MD) || A->getOption().matches(options::OPT_MMD)) D.Diag(diag::err_drv_mg_requires_m_or_mm); CmdArgs.push_back("-MG"); } Args.AddLastArg(CmdArgs, options::OPT_MP); Args.AddLastArg(CmdArgs, options::OPT_MV); // Convert all -MQ args to -MT for (const Arg *A : Args.filtered(options::OPT_MT, options::OPT_MQ)) { A->claim(); if (A->getOption().matches(options::OPT_MQ)) { CmdArgs.push_back("-MT"); SmallString<128> Quoted; QuoteTarget(A->getValue(), Quoted); CmdArgs.push_back(Args.MakeArgString(Quoted)); // -MT flag - no change } else { A->render(Args, CmdArgs); } } // Add -i* options, and automatically translate to // -include-pch/-include-pth for transparent PCH support. It's // wonky, but we include looking for .gch so we can support seamless // replacement into a build system already set up to be generating // .gch files. int YcIndex = -1, YuIndex = -1; { int AI = -1; const Arg *YcArg = Args.getLastArg(options::OPT__SLASH_Yc); const Arg *YuArg = Args.getLastArg(options::OPT__SLASH_Yu); for (const Arg *A : Args.filtered(options::OPT_clang_i_Group)) { // Walk the whole i_Group and skip non "-include" flags so that the index // here matches the index in the next loop below. ++AI; if (!A->getOption().matches(options::OPT_include)) continue; if (YcArg && strcmp(A->getValue(), YcArg->getValue()) == 0) YcIndex = AI; if (YuArg && strcmp(A->getValue(), YuArg->getValue()) == 0) YuIndex = AI; } } if (isa(JA) && YcIndex != -1) { Driver::InputList Inputs; D.BuildInputs(getToolChain(), C.getArgs(), Inputs); assert(Inputs.size() == 1 && "Need one input when building pch"); CmdArgs.push_back(Args.MakeArgString(Twine("-find-pch-source=") + Inputs[0].second->getValue())); } bool RenderedImplicitInclude = false; int AI = -1; for (const Arg *A : Args.filtered(options::OPT_clang_i_Group)) { ++AI; if (getToolChain().getDriver().IsCLMode() && A->getOption().matches(options::OPT_include)) { // In clang-cl mode, /Ycfoo.h means that all code up to a foo.h // include is compiled into foo.h, and everything after goes into // the .obj file. /Yufoo.h means that all includes prior to and including // foo.h are completely skipped and replaced with a use of the pch file // for foo.h. (Each flag can have at most one value, multiple /Yc flags // just mean that the last one wins.) If /Yc and /Yu are both present // and refer to the same file, /Yc wins. // Note that OPT__SLASH_FI gets mapped to OPT_include. // FIXME: The code here assumes that /Yc and /Yu refer to the same file. // cl.exe seems to support both flags with different values, but that // seems strange (which flag does /Fp now refer to?), so don't implement // that until someone needs it. int PchIndex = YcIndex != -1 ? YcIndex : YuIndex; if (PchIndex != -1) { if (isa(JA)) { // When building the pch, skip all includes after the pch. assert(YcIndex != -1 && PchIndex == YcIndex); if (AI >= YcIndex) continue; } else { // When using the pch, skip all includes prior to the pch. if (AI < PchIndex) { A->claim(); continue; } if (AI == PchIndex) { A->claim(); CmdArgs.push_back("-include-pch"); CmdArgs.push_back( Args.MakeArgString(D.GetClPchPath(C, A->getValue()))); continue; } } } } else if (A->getOption().matches(options::OPT_include)) { // Handling of gcc-style gch precompiled headers. bool IsFirstImplicitInclude = !RenderedImplicitInclude; RenderedImplicitInclude = true; // Use PCH if the user requested it. bool UsePCH = D.CCCUsePCH; bool FoundPTH = false; bool FoundPCH = false; SmallString<128> P(A->getValue()); // We want the files to have a name like foo.h.pch. Add a dummy extension // so that replace_extension does the right thing. P += ".dummy"; if (UsePCH) { llvm::sys::path::replace_extension(P, "pch"); if (llvm::sys::fs::exists(P)) FoundPCH = true; } if (!FoundPCH) { llvm::sys::path::replace_extension(P, "pth"); if (llvm::sys::fs::exists(P)) FoundPTH = true; } if (!FoundPCH && !FoundPTH) { llvm::sys::path::replace_extension(P, "gch"); if (llvm::sys::fs::exists(P)) { FoundPCH = UsePCH; FoundPTH = !UsePCH; } } if (FoundPCH || FoundPTH) { if (IsFirstImplicitInclude) { A->claim(); if (UsePCH) CmdArgs.push_back("-include-pch"); else CmdArgs.push_back("-include-pth"); CmdArgs.push_back(Args.MakeArgString(P)); continue; } else { // Ignore the PCH if not first on command line and emit warning. D.Diag(diag::warn_drv_pch_not_first_include) << P << A->getAsString(Args); } } } else if (A->getOption().matches(options::OPT_isystem_after)) { // Handling of paths which must come late. These entries are handled by // the toolchain itself after the resource dir is inserted in the right // search order. // Do not claim the argument so that the use of the argument does not // silently go unnoticed on toolchains which do not honour the option. continue; } // Not translated, render as usual. A->claim(); A->render(Args, CmdArgs); } Args.AddAllArgs(CmdArgs, {options::OPT_D, options::OPT_U, options::OPT_I_Group, options::OPT_F, options::OPT_index_header_map}); // Add -Wp, and -Xpreprocessor if using the preprocessor. // FIXME: There is a very unfortunate problem here, some troubled // souls abuse -Wp, to pass preprocessor options in gcc syntax. To // really support that we would have to parse and then translate // those options. :( Args.AddAllArgValues(CmdArgs, options::OPT_Wp_COMMA, options::OPT_Xpreprocessor); // -I- is a deprecated GCC feature, reject it. if (Arg *A = Args.getLastArg(options::OPT_I_)) D.Diag(diag::err_drv_I_dash_not_supported) << A->getAsString(Args); // If we have a --sysroot, and don't have an explicit -isysroot flag, add an // -isysroot to the CC1 invocation. StringRef sysroot = C.getSysRoot(); if (sysroot != "") { if (!Args.hasArg(options::OPT_isysroot)) { CmdArgs.push_back("-isysroot"); CmdArgs.push_back(C.getArgs().MakeArgString(sysroot)); } } // Parse additional include paths from environment variables. // FIXME: We should probably sink the logic for handling these from the // frontend into the driver. It will allow deleting 4 otherwise unused flags. // CPATH - included following the user specified includes (but prior to // builtin and standard includes). addDirectoryList(Args, CmdArgs, "-I", "CPATH"); // C_INCLUDE_PATH - system includes enabled when compiling C. addDirectoryList(Args, CmdArgs, "-c-isystem", "C_INCLUDE_PATH"); // CPLUS_INCLUDE_PATH - system includes enabled when compiling C++. addDirectoryList(Args, CmdArgs, "-cxx-isystem", "CPLUS_INCLUDE_PATH"); // OBJC_INCLUDE_PATH - system includes enabled when compiling ObjC. addDirectoryList(Args, CmdArgs, "-objc-isystem", "OBJC_INCLUDE_PATH"); // OBJCPLUS_INCLUDE_PATH - system includes enabled when compiling ObjC++. addDirectoryList(Args, CmdArgs, "-objcxx-isystem", "OBJCPLUS_INCLUDE_PATH"); // While adding the include arguments, we also attempt to retrieve the // arguments of related offloading toolchains or arguments that are specific // of an offloading programming model. // Add C++ include arguments, if needed. if (types::isCXX(Inputs[0].getType())) { getToolChain().AddClangCXXStdlibIncludeArgs(Args, CmdArgs); addExtraOffloadCXXStdlibIncludeArgs(C, JA, Args, CmdArgs); } // Add system include arguments for all targets but IAMCU. if (!IsIAMCU) { getToolChain().AddClangSystemIncludeArgs(Args, CmdArgs); addExtraOffloadCXXStdlibIncludeArgs(C, JA, Args, CmdArgs); } else { // For IAMCU add special include arguments. getToolChain().AddIAMCUIncludeArgs(Args, CmdArgs); } // Add offload include arguments, if needed. addExtraOffloadSpecificIncludeArgs(C, JA, Args, CmdArgs); } // FIXME: Move to target hook. static bool isSignedCharDefault(const llvm::Triple &Triple) { switch (Triple.getArch()) { default: return true; case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: if (Triple.isOSDarwin() || Triple.isOSWindows()) return true; return false; case llvm::Triple::ppc: case llvm::Triple::ppc64: if (Triple.isOSDarwin()) return true; return false; case llvm::Triple::hexagon: case llvm::Triple::ppc64le: case llvm::Triple::systemz: case llvm::Triple::xcore: return false; } } static bool isNoCommonDefault(const llvm::Triple &Triple) { switch (Triple.getArch()) { default: return false; case llvm::Triple::xcore: case llvm::Triple::wasm32: case llvm::Triple::wasm64: return true; } } // ARM tools start. // Get SubArch (vN). static int getARMSubArchVersionNumber(const llvm::Triple &Triple) { llvm::StringRef Arch = Triple.getArchName(); return llvm::ARM::parseArchVersion(Arch); } // True if M-profile. static bool isARMMProfile(const llvm::Triple &Triple) { llvm::StringRef Arch = Triple.getArchName(); unsigned Profile = llvm::ARM::parseArchProfile(Arch); return Profile == llvm::ARM::PK_M; } // Get Arch/CPU from args. static void getARMArchCPUFromArgs(const ArgList &Args, llvm::StringRef &Arch, llvm::StringRef &CPU, bool FromAs = false) { if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) CPU = A->getValue(); if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) Arch = A->getValue(); if (!FromAs) return; for (const Arg *A : Args.filtered(options::OPT_Wa_COMMA, options::OPT_Xassembler)) { StringRef Value = A->getValue(); if (Value.startswith("-mcpu=")) CPU = Value.substr(6); if (Value.startswith("-march=")) Arch = Value.substr(7); } } // Handle -mhwdiv=. // FIXME: Use ARMTargetParser. static void getARMHWDivFeatures(const Driver &D, const Arg *A, const ArgList &Args, StringRef HWDiv, std::vector &Features) { unsigned HWDivID = llvm::ARM::parseHWDiv(HWDiv); if (!llvm::ARM::getHWDivFeatures(HWDivID, Features)) D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args); } // Handle -mfpu=. static void getARMFPUFeatures(const Driver &D, const Arg *A, const ArgList &Args, StringRef FPU, std::vector &Features) { unsigned FPUID = llvm::ARM::parseFPU(FPU); if (!llvm::ARM::getFPUFeatures(FPUID, Features)) D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args); } // Decode ARM features from string like +[no]featureA+[no]featureB+... static bool DecodeARMFeatures(const Driver &D, StringRef text, std::vector &Features) { SmallVector Split; text.split(Split, StringRef("+"), -1, false); for (StringRef Feature : Split) { const char *FeatureName = llvm::ARM::getArchExtFeature(Feature); if (FeatureName) Features.push_back(FeatureName); else return false; } return true; } // Check if -march is valid by checking if it can be canonicalised and parsed. // getARMArch is used here instead of just checking the -march value in order // to handle -march=native correctly. static void checkARMArchName(const Driver &D, const Arg *A, const ArgList &Args, llvm::StringRef ArchName, std::vector &Features, const llvm::Triple &Triple) { std::pair Split = ArchName.split("+"); std::string MArch = arm::getARMArch(ArchName, Triple); if (llvm::ARM::parseArch(MArch) == llvm::ARM::AK_INVALID || (Split.second.size() && !DecodeARMFeatures(D, Split.second, Features))) D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args); } // Check -mcpu=. Needs ArchName to handle -mcpu=generic. static void checkARMCPUName(const Driver &D, const Arg *A, const ArgList &Args, llvm::StringRef CPUName, llvm::StringRef ArchName, std::vector &Features, const llvm::Triple &Triple) { std::pair Split = CPUName.split("+"); std::string CPU = arm::getARMTargetCPU(CPUName, ArchName, Triple); if (arm::getLLVMArchSuffixForARM(CPU, ArchName, Triple).empty() || (Split.second.size() && !DecodeARMFeatures(D, Split.second, Features))) D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args); } static bool useAAPCSForMachO(const llvm::Triple &T) { // The backend is hardwired to assume AAPCS for M-class processors, ensure // the frontend matches that. return T.getEnvironment() == llvm::Triple::EABI || T.getOS() == llvm::Triple::UnknownOS || isARMMProfile(T); } // Select the float ABI as determined by -msoft-float, -mhard-float, and // -mfloat-abi=. arm::FloatABI arm::getARMFloatABI(const ToolChain &TC, const ArgList &Args) { const Driver &D = TC.getDriver(); const llvm::Triple Triple(TC.ComputeEffectiveClangTriple(Args)); auto SubArch = getARMSubArchVersionNumber(Triple); arm::FloatABI ABI = FloatABI::Invalid; if (Arg *A = Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float, options::OPT_mfloat_abi_EQ)) { if (A->getOption().matches(options::OPT_msoft_float)) { ABI = FloatABI::Soft; } else if (A->getOption().matches(options::OPT_mhard_float)) { ABI = FloatABI::Hard; } else { ABI = llvm::StringSwitch(A->getValue()) .Case("soft", FloatABI::Soft) .Case("softfp", FloatABI::SoftFP) .Case("hard", FloatABI::Hard) .Default(FloatABI::Invalid); if (ABI == FloatABI::Invalid && !StringRef(A->getValue()).empty()) { D.Diag(diag::err_drv_invalid_mfloat_abi) << A->getAsString(Args); ABI = FloatABI::Soft; } } // It is incorrect to select hard float ABI on MachO platforms if the ABI is // "apcs-gnu". if (Triple.isOSBinFormatMachO() && !useAAPCSForMachO(Triple) && ABI == FloatABI::Hard) { D.Diag(diag::err_drv_unsupported_opt_for_target) << A->getAsString(Args) << Triple.getArchName(); } } // If unspecified, choose the default based on the platform. if (ABI == FloatABI::Invalid) { switch (Triple.getOS()) { case llvm::Triple::Darwin: case llvm::Triple::MacOSX: case llvm::Triple::IOS: case llvm::Triple::TvOS: { // Darwin defaults to "softfp" for v6 and v7. ABI = (SubArch == 6 || SubArch == 7) ? FloatABI::SoftFP : FloatABI::Soft; ABI = Triple.isWatchABI() ? FloatABI::Hard : ABI; break; } case llvm::Triple::WatchOS: ABI = FloatABI::Hard; break; // FIXME: this is invalid for WindowsCE case llvm::Triple::Win32: ABI = FloatABI::Hard; break; case llvm::Triple::FreeBSD: switch (Triple.getEnvironment()) { case llvm::Triple::GNUEABIHF: ABI = FloatABI::Hard; break; default: // FreeBSD defaults to soft float ABI = FloatABI::Soft; break; } break; default: switch (Triple.getEnvironment()) { case llvm::Triple::GNUEABIHF: case llvm::Triple::MuslEABIHF: case llvm::Triple::EABIHF: ABI = FloatABI::Hard; break; case llvm::Triple::GNUEABI: case llvm::Triple::MuslEABI: case llvm::Triple::EABI: // EABI is always AAPCS, and if it was not marked 'hard', it's softfp ABI = FloatABI::SoftFP; break; case llvm::Triple::Android: ABI = (SubArch == 7) ? FloatABI::SoftFP : FloatABI::Soft; break; default: // Assume "soft", but warn the user we are guessing. if (Triple.isOSBinFormatMachO() && Triple.getSubArch() == llvm::Triple::ARMSubArch_v7em) ABI = FloatABI::Hard; else ABI = FloatABI::Soft; if (Triple.getOS() != llvm::Triple::UnknownOS || !Triple.isOSBinFormatMachO()) D.Diag(diag::warn_drv_assuming_mfloat_abi_is) << "soft"; break; } } } assert(ABI != FloatABI::Invalid && "must select an ABI"); return ABI; } static void getARMTargetFeatures(const ToolChain &TC, const llvm::Triple &Triple, const ArgList &Args, std::vector &Features, bool ForAS) { const Driver &D = TC.getDriver(); bool KernelOrKext = Args.hasArg(options::OPT_mkernel, options::OPT_fapple_kext); arm::FloatABI ABI = arm::getARMFloatABI(TC, Args); const Arg *WaCPU = nullptr, *WaFPU = nullptr; const Arg *WaHDiv = nullptr, *WaArch = nullptr; if (!ForAS) { // FIXME: Note, this is a hack, the LLVM backend doesn't actually use these // yet (it uses the -mfloat-abi and -msoft-float options), and it is // stripped out by the ARM target. We should probably pass this a new // -target-option, which is handled by the -cc1/-cc1as invocation. // // FIXME2: For consistency, it would be ideal if we set up the target // machine state the same when using the frontend or the assembler. We don't // currently do that for the assembler, we pass the options directly to the // backend and never even instantiate the frontend TargetInfo. If we did, // and used its handleTargetFeatures hook, then we could ensure the // assembler and the frontend behave the same. // Use software floating point operations? if (ABI == arm::FloatABI::Soft) Features.push_back("+soft-float"); // Use software floating point argument passing? if (ABI != arm::FloatABI::Hard) Features.push_back("+soft-float-abi"); } else { // Here, we make sure that -Wa,-mfpu/cpu/arch/hwdiv will be passed down // to the assembler correctly. for (const Arg *A : Args.filtered(options::OPT_Wa_COMMA, options::OPT_Xassembler)) { StringRef Value = A->getValue(); if (Value.startswith("-mfpu=")) { WaFPU = A; } else if (Value.startswith("-mcpu=")) { WaCPU = A; } else if (Value.startswith("-mhwdiv=")) { WaHDiv = A; } else if (Value.startswith("-march=")) { WaArch = A; } } } // Check -march. ClangAs gives preference to -Wa,-march=. const Arg *ArchArg = Args.getLastArg(options::OPT_march_EQ); StringRef ArchName; if (WaArch) { if (ArchArg) D.Diag(clang::diag::warn_drv_unused_argument) << ArchArg->getAsString(Args); ArchName = StringRef(WaArch->getValue()).substr(7); checkARMArchName(D, WaArch, Args, ArchName, Features, Triple); // FIXME: Set Arch. D.Diag(clang::diag::warn_drv_unused_argument) << WaArch->getAsString(Args); } else if (ArchArg) { ArchName = ArchArg->getValue(); checkARMArchName(D, ArchArg, Args, ArchName, Features, Triple); } // Check -mcpu. ClangAs gives preference to -Wa,-mcpu=. const Arg *CPUArg = Args.getLastArg(options::OPT_mcpu_EQ); StringRef CPUName; if (WaCPU) { if (CPUArg) D.Diag(clang::diag::warn_drv_unused_argument) << CPUArg->getAsString(Args); CPUName = StringRef(WaCPU->getValue()).substr(6); checkARMCPUName(D, WaCPU, Args, CPUName, ArchName, Features, Triple); } else if (CPUArg) { CPUName = CPUArg->getValue(); checkARMCPUName(D, CPUArg, Args, CPUName, ArchName, Features, Triple); } // Add CPU features for generic CPUs if (CPUName == "native") { llvm::StringMap HostFeatures; if (llvm::sys::getHostCPUFeatures(HostFeatures)) for (auto &F : HostFeatures) Features.push_back( Args.MakeArgString((F.second ? "+" : "-") + F.first())); } // Honor -mfpu=. ClangAs gives preference to -Wa,-mfpu=. const Arg *FPUArg = Args.getLastArg(options::OPT_mfpu_EQ); if (WaFPU) { if (FPUArg) D.Diag(clang::diag::warn_drv_unused_argument) << FPUArg->getAsString(Args); getARMFPUFeatures(D, WaFPU, Args, StringRef(WaFPU->getValue()).substr(6), Features); } else if (FPUArg) { getARMFPUFeatures(D, FPUArg, Args, FPUArg->getValue(), Features); } // Honor -mhwdiv=. ClangAs gives preference to -Wa,-mhwdiv=. const Arg *HDivArg = Args.getLastArg(options::OPT_mhwdiv_EQ); if (WaHDiv) { if (HDivArg) D.Diag(clang::diag::warn_drv_unused_argument) << HDivArg->getAsString(Args); getARMHWDivFeatures(D, WaHDiv, Args, StringRef(WaHDiv->getValue()).substr(8), Features); } else if (HDivArg) getARMHWDivFeatures(D, HDivArg, Args, HDivArg->getValue(), Features); // Setting -msoft-float effectively disables NEON because of the GCC // implementation, although the same isn't true of VFP or VFP3. if (ABI == arm::FloatABI::Soft) { Features.push_back("-neon"); // Also need to explicitly disable features which imply NEON. Features.push_back("-crypto"); } // En/disable crc code generation. if (Arg *A = Args.getLastArg(options::OPT_mcrc, options::OPT_mnocrc)) { if (A->getOption().matches(options::OPT_mcrc)) Features.push_back("+crc"); else Features.push_back("-crc"); } // Look for the last occurrence of -mlong-calls or -mno-long-calls. If // neither options are specified, see if we are compiling for kernel/kext and // decide whether to pass "+long-calls" based on the OS and its version. if (Arg *A = Args.getLastArg(options::OPT_mlong_calls, options::OPT_mno_long_calls)) { if (A->getOption().matches(options::OPT_mlong_calls)) Features.push_back("+long-calls"); } else if (KernelOrKext && (!Triple.isiOS() || Triple.isOSVersionLT(6)) && !Triple.isWatchOS()) { Features.push_back("+long-calls"); } // Kernel code has more strict alignment requirements. if (KernelOrKext) Features.push_back("+strict-align"); else if (Arg *A = Args.getLastArg(options::OPT_mno_unaligned_access, options::OPT_munaligned_access)) { if (A->getOption().matches(options::OPT_munaligned_access)) { // No v6M core supports unaligned memory access (v6M ARM ARM A3.2). if (Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v6m) D.Diag(diag::err_target_unsupported_unaligned) << "v6m"; // v8M Baseline follows on from v6M, so doesn't support unaligned memory // access either. else if (Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v8m_baseline) D.Diag(diag::err_target_unsupported_unaligned) << "v8m.base"; } else Features.push_back("+strict-align"); } else { // Assume pre-ARMv6 doesn't support unaligned accesses. // // ARMv6 may or may not support unaligned accesses depending on the // SCTLR.U bit, which is architecture-specific. We assume ARMv6 // Darwin and NetBSD targets support unaligned accesses, and others don't. // // ARMv7 always has SCTLR.U set to 1, but it has a new SCTLR.A bit // which raises an alignment fault on unaligned accesses. Linux // defaults this bit to 0 and handles it as a system-wide (not // per-process) setting. It is therefore safe to assume that ARMv7+ // Linux targets support unaligned accesses. The same goes for NaCl. // // The above behavior is consistent with GCC. int VersionNum = getARMSubArchVersionNumber(Triple); if (Triple.isOSDarwin() || Triple.isOSNetBSD()) { if (VersionNum < 6 || Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v6m) Features.push_back("+strict-align"); } else if (Triple.isOSLinux() || Triple.isOSNaCl()) { if (VersionNum < 7) Features.push_back("+strict-align"); } else Features.push_back("+strict-align"); } // llvm does not support reserving registers in general. There is support // for reserving r9 on ARM though (defined as a platform-specific register // in ARM EABI). if (Args.hasArg(options::OPT_ffixed_r9)) Features.push_back("+reserve-r9"); // The kext linker doesn't know how to deal with movw/movt. if (KernelOrKext || Args.hasArg(options::OPT_mno_movt)) Features.push_back("+no-movt"); } void Clang::AddARMTargetArgs(const llvm::Triple &Triple, const ArgList &Args, ArgStringList &CmdArgs, bool KernelOrKext) const { // Select the ABI to use. // FIXME: Support -meabi. // FIXME: Parts of this are duplicated in the backend, unify this somehow. const char *ABIName = nullptr; if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) { ABIName = A->getValue(); } else if (Triple.isOSBinFormatMachO()) { if (useAAPCSForMachO(Triple)) { ABIName = "aapcs"; } else if (Triple.isWatchABI()) { ABIName = "aapcs16"; } else { ABIName = "apcs-gnu"; } } else if (Triple.isOSWindows()) { // FIXME: this is invalid for WindowsCE ABIName = "aapcs"; } else { // Select the default based on the platform. switch (Triple.getEnvironment()) { case llvm::Triple::Android: case llvm::Triple::GNUEABI: case llvm::Triple::GNUEABIHF: case llvm::Triple::MuslEABI: case llvm::Triple::MuslEABIHF: ABIName = "aapcs-linux"; break; case llvm::Triple::EABIHF: case llvm::Triple::EABI: ABIName = "aapcs"; break; default: if (Triple.getOS() == llvm::Triple::NetBSD) ABIName = "apcs-gnu"; else ABIName = "aapcs"; break; } } CmdArgs.push_back("-target-abi"); CmdArgs.push_back(ABIName); // Determine floating point ABI from the options & target defaults. arm::FloatABI ABI = arm::getARMFloatABI(getToolChain(), Args); if (ABI == arm::FloatABI::Soft) { // Floating point operations and argument passing are soft. // FIXME: This changes CPP defines, we need -target-soft-float. CmdArgs.push_back("-msoft-float"); CmdArgs.push_back("-mfloat-abi"); CmdArgs.push_back("soft"); } else if (ABI == arm::FloatABI::SoftFP) { // Floating point operations are hard, but argument passing is soft. CmdArgs.push_back("-mfloat-abi"); CmdArgs.push_back("soft"); } else { // Floating point operations and argument passing are hard. assert(ABI == arm::FloatABI::Hard && "Invalid float abi!"); CmdArgs.push_back("-mfloat-abi"); CmdArgs.push_back("hard"); } // Forward the -mglobal-merge option for explicit control over the pass. if (Arg *A = Args.getLastArg(options::OPT_mglobal_merge, options::OPT_mno_global_merge)) { CmdArgs.push_back("-backend-option"); if (A->getOption().matches(options::OPT_mno_global_merge)) CmdArgs.push_back("-arm-global-merge=false"); else CmdArgs.push_back("-arm-global-merge=true"); } if (!Args.hasFlag(options::OPT_mimplicit_float, options::OPT_mno_implicit_float, true)) CmdArgs.push_back("-no-implicit-float"); } // ARM tools end. /// getAArch64TargetCPU - Get the (LLVM) name of the AArch64 cpu we are /// targeting. static std::string getAArch64TargetCPU(const ArgList &Args) { Arg *A; std::string CPU; // If we have -mtune or -mcpu, use that. if ((A = Args.getLastArg(options::OPT_mtune_EQ))) { CPU = StringRef(A->getValue()).lower(); } else if ((A = Args.getLastArg(options::OPT_mcpu_EQ))) { StringRef Mcpu = A->getValue(); CPU = Mcpu.split("+").first.lower(); } // Handle CPU name is 'native'. if (CPU == "native") return llvm::sys::getHostCPUName(); else if (CPU.size()) return CPU; // Make sure we pick "cyclone" if -arch is used. // FIXME: Should this be picked by checking the target triple instead? if (Args.getLastArg(options::OPT_arch)) return "cyclone"; return "generic"; } void Clang::AddAArch64TargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { std::string TripleStr = getToolChain().ComputeEffectiveClangTriple(Args); llvm::Triple Triple(TripleStr); if (!Args.hasFlag(options::OPT_mred_zone, options::OPT_mno_red_zone, true) || Args.hasArg(options::OPT_mkernel) || Args.hasArg(options::OPT_fapple_kext)) CmdArgs.push_back("-disable-red-zone"); if (!Args.hasFlag(options::OPT_mimplicit_float, options::OPT_mno_implicit_float, true)) CmdArgs.push_back("-no-implicit-float"); const char *ABIName = nullptr; if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) ABIName = A->getValue(); else if (Triple.isOSDarwin()) ABIName = "darwinpcs"; else ABIName = "aapcs"; CmdArgs.push_back("-target-abi"); CmdArgs.push_back(ABIName); if (Arg *A = Args.getLastArg(options::OPT_mfix_cortex_a53_835769, options::OPT_mno_fix_cortex_a53_835769)) { CmdArgs.push_back("-backend-option"); if (A->getOption().matches(options::OPT_mfix_cortex_a53_835769)) CmdArgs.push_back("-aarch64-fix-cortex-a53-835769=1"); else CmdArgs.push_back("-aarch64-fix-cortex-a53-835769=0"); } else if (Triple.isAndroid()) { // Enabled A53 errata (835769) workaround by default on android CmdArgs.push_back("-backend-option"); CmdArgs.push_back("-aarch64-fix-cortex-a53-835769=1"); } // Forward the -mglobal-merge option for explicit control over the pass. if (Arg *A = Args.getLastArg(options::OPT_mglobal_merge, options::OPT_mno_global_merge)) { CmdArgs.push_back("-backend-option"); if (A->getOption().matches(options::OPT_mno_global_merge)) CmdArgs.push_back("-aarch64-global-merge=false"); else CmdArgs.push_back("-aarch64-global-merge=true"); } } // Get CPU and ABI names. They are not independent // so we have to calculate them together. void mips::getMipsCPUAndABI(const ArgList &Args, const llvm::Triple &Triple, StringRef &CPUName, StringRef &ABIName) { const char *DefMips32CPU = "mips32r2"; const char *DefMips64CPU = "mips64r2"; // MIPS32r6 is the default for mips(el)?-img-linux-gnu and MIPS64r6 is the // default for mips64(el)?-img-linux-gnu. if (Triple.getVendor() == llvm::Triple::ImaginationTechnologies && Triple.getEnvironment() == llvm::Triple::GNU) { DefMips32CPU = "mips32r6"; DefMips64CPU = "mips64r6"; } // MIPS64r6 is the default for Android MIPS64 (mips64el-linux-android). if (Triple.isAndroid()) { DefMips32CPU = "mips32"; DefMips64CPU = "mips64r6"; } // MIPS3 is the default for mips64*-unknown-openbsd. if (Triple.getOS() == llvm::Triple::OpenBSD) DefMips64CPU = "mips3"; if (Arg *A = Args.getLastArg(options::OPT_march_EQ, options::OPT_mcpu_EQ)) CPUName = A->getValue(); if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) { ABIName = A->getValue(); // Convert a GNU style Mips ABI name to the name // accepted by LLVM Mips backend. ABIName = llvm::StringSwitch(ABIName) .Case("32", "o32") .Case("64", "n64") .Default(ABIName); } // Setup default CPU and ABI names. if (CPUName.empty() && ABIName.empty()) { switch (Triple.getArch()) { default: llvm_unreachable("Unexpected triple arch name"); case llvm::Triple::mips: case llvm::Triple::mipsel: CPUName = DefMips32CPU; break; case llvm::Triple::mips64: case llvm::Triple::mips64el: CPUName = DefMips64CPU; break; } } if (ABIName.empty() && (Triple.getVendor() == llvm::Triple::MipsTechnologies || Triple.getVendor() == llvm::Triple::ImaginationTechnologies)) { ABIName = llvm::StringSwitch(CPUName) .Case("mips1", "o32") .Case("mips2", "o32") .Case("mips3", "n64") .Case("mips4", "n64") .Case("mips5", "n64") .Case("mips32", "o32") .Case("mips32r2", "o32") .Case("mips32r3", "o32") .Case("mips32r5", "o32") .Case("mips32r6", "o32") .Case("mips64", "n64") .Case("mips64r2", "n64") .Case("mips64r3", "n64") .Case("mips64r5", "n64") .Case("mips64r6", "n64") .Case("octeon", "n64") .Case("p5600", "o32") .Default(""); } if (ABIName.empty()) { // Deduce ABI name from the target triple. if (Triple.getArch() == llvm::Triple::mips || Triple.getArch() == llvm::Triple::mipsel) ABIName = "o32"; else ABIName = "n64"; } if (CPUName.empty()) { // Deduce CPU name from ABI name. CPUName = llvm::StringSwitch(ABIName) .Case("o32", DefMips32CPU) .Cases("n32", "n64", DefMips64CPU) .Default(""); } // FIXME: Warn on inconsistent use of -march and -mabi. } std::string mips::getMipsABILibSuffix(const ArgList &Args, const llvm::Triple &Triple) { StringRef CPUName, ABIName; tools::mips::getMipsCPUAndABI(Args, Triple, CPUName, ABIName); return llvm::StringSwitch(ABIName) .Case("o32", "") .Case("n32", "32") .Case("n64", "64"); } // Convert ABI name to the GNU tools acceptable variant. static StringRef getGnuCompatibleMipsABIName(StringRef ABI) { return llvm::StringSwitch(ABI) .Case("o32", "32") .Case("n64", "64") .Default(ABI); } // Select the MIPS float ABI as determined by -msoft-float, -mhard-float, // and -mfloat-abi=. static mips::FloatABI getMipsFloatABI(const Driver &D, const ArgList &Args) { mips::FloatABI ABI = mips::FloatABI::Invalid; if (Arg *A = Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float, options::OPT_mfloat_abi_EQ)) { if (A->getOption().matches(options::OPT_msoft_float)) ABI = mips::FloatABI::Soft; else if (A->getOption().matches(options::OPT_mhard_float)) ABI = mips::FloatABI::Hard; else { ABI = llvm::StringSwitch(A->getValue()) .Case("soft", mips::FloatABI::Soft) .Case("hard", mips::FloatABI::Hard) .Default(mips::FloatABI::Invalid); if (ABI == mips::FloatABI::Invalid && !StringRef(A->getValue()).empty()) { D.Diag(diag::err_drv_invalid_mfloat_abi) << A->getAsString(Args); ABI = mips::FloatABI::Hard; } } } // If unspecified, choose the default based on the platform. if (ABI == mips::FloatABI::Invalid) { // Assume "hard", because it's a default value used by gcc. // When we start to recognize specific target MIPS processors, // we will be able to select the default more correctly. ABI = mips::FloatABI::Hard; } assert(ABI != mips::FloatABI::Invalid && "must select an ABI"); return ABI; } static void AddTargetFeature(const ArgList &Args, std::vector &Features, OptSpecifier OnOpt, OptSpecifier OffOpt, StringRef FeatureName) { if (Arg *A = Args.getLastArg(OnOpt, OffOpt)) { if (A->getOption().matches(OnOpt)) Features.push_back(Args.MakeArgString("+" + FeatureName)); else Features.push_back(Args.MakeArgString("-" + FeatureName)); } } static void getMIPSTargetFeatures(const Driver &D, const llvm::Triple &Triple, const ArgList &Args, std::vector &Features) { StringRef CPUName; StringRef ABIName; mips::getMipsCPUAndABI(Args, Triple, CPUName, ABIName); ABIName = getGnuCompatibleMipsABIName(ABIName); AddTargetFeature(Args, Features, options::OPT_mno_abicalls, options::OPT_mabicalls, "noabicalls"); mips::FloatABI FloatABI = getMipsFloatABI(D, Args); if (FloatABI == mips::FloatABI::Soft) { // FIXME: Note, this is a hack. We need to pass the selected float // mode to the MipsTargetInfoBase to define appropriate macros there. // Now it is the only method. Features.push_back("+soft-float"); } if (Arg *A = Args.getLastArg(options::OPT_mnan_EQ)) { StringRef Val = StringRef(A->getValue()); if (Val == "2008") { if (mips::getSupportedNanEncoding(CPUName) & mips::Nan2008) Features.push_back("+nan2008"); else { Features.push_back("-nan2008"); D.Diag(diag::warn_target_unsupported_nan2008) << CPUName; } } else if (Val == "legacy") { if (mips::getSupportedNanEncoding(CPUName) & mips::NanLegacy) Features.push_back("-nan2008"); else { Features.push_back("+nan2008"); D.Diag(diag::warn_target_unsupported_nanlegacy) << CPUName; } } else D.Diag(diag::err_drv_unsupported_option_argument) << A->getOption().getName() << Val; } AddTargetFeature(Args, Features, options::OPT_msingle_float, options::OPT_mdouble_float, "single-float"); AddTargetFeature(Args, Features, options::OPT_mips16, options::OPT_mno_mips16, "mips16"); AddTargetFeature(Args, Features, options::OPT_mmicromips, options::OPT_mno_micromips, "micromips"); AddTargetFeature(Args, Features, options::OPT_mdsp, options::OPT_mno_dsp, "dsp"); AddTargetFeature(Args, Features, options::OPT_mdspr2, options::OPT_mno_dspr2, "dspr2"); AddTargetFeature(Args, Features, options::OPT_mmsa, options::OPT_mno_msa, "msa"); // Add the last -mfp32/-mfpxx/-mfp64, if none are given and the ABI is O32 // pass -mfpxx, or if none are given and fp64a is default, pass fp64 and // nooddspreg. if (Arg *A = Args.getLastArg(options::OPT_mfp32, options::OPT_mfpxx, options::OPT_mfp64)) { if (A->getOption().matches(options::OPT_mfp32)) Features.push_back(Args.MakeArgString("-fp64")); else if (A->getOption().matches(options::OPT_mfpxx)) { Features.push_back(Args.MakeArgString("+fpxx")); Features.push_back(Args.MakeArgString("+nooddspreg")); } else Features.push_back(Args.MakeArgString("+fp64")); } else if (mips::shouldUseFPXX(Args, Triple, CPUName, ABIName, FloatABI)) { Features.push_back(Args.MakeArgString("+fpxx")); Features.push_back(Args.MakeArgString("+nooddspreg")); } else if (mips::isFP64ADefault(Triple, CPUName)) { Features.push_back(Args.MakeArgString("+fp64")); Features.push_back(Args.MakeArgString("+nooddspreg")); } AddTargetFeature(Args, Features, options::OPT_mno_odd_spreg, options::OPT_modd_spreg, "nooddspreg"); } void Clang::AddMIPSTargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { const Driver &D = getToolChain().getDriver(); StringRef CPUName; StringRef ABIName; const llvm::Triple &Triple = getToolChain().getTriple(); mips::getMipsCPUAndABI(Args, Triple, CPUName, ABIName); CmdArgs.push_back("-target-abi"); CmdArgs.push_back(ABIName.data()); mips::FloatABI ABI = getMipsFloatABI(D, Args); if (ABI == mips::FloatABI::Soft) { // Floating point operations and argument passing are soft. CmdArgs.push_back("-msoft-float"); CmdArgs.push_back("-mfloat-abi"); CmdArgs.push_back("soft"); } else { // Floating point operations and argument passing are hard. assert(ABI == mips::FloatABI::Hard && "Invalid float abi!"); CmdArgs.push_back("-mfloat-abi"); CmdArgs.push_back("hard"); } if (Arg *A = Args.getLastArg(options::OPT_mxgot, options::OPT_mno_xgot)) { if (A->getOption().matches(options::OPT_mxgot)) { CmdArgs.push_back("-mllvm"); CmdArgs.push_back("-mxgot"); } } if (Arg *A = Args.getLastArg(options::OPT_mldc1_sdc1, options::OPT_mno_ldc1_sdc1)) { if (A->getOption().matches(options::OPT_mno_ldc1_sdc1)) { CmdArgs.push_back("-mllvm"); CmdArgs.push_back("-mno-ldc1-sdc1"); } } if (Arg *A = Args.getLastArg(options::OPT_mcheck_zero_division, options::OPT_mno_check_zero_division)) { if (A->getOption().matches(options::OPT_mno_check_zero_division)) { CmdArgs.push_back("-mllvm"); CmdArgs.push_back("-mno-check-zero-division"); } } if (Arg *A = Args.getLastArg(options::OPT_G)) { StringRef v = A->getValue(); CmdArgs.push_back("-mllvm"); CmdArgs.push_back(Args.MakeArgString("-mips-ssection-threshold=" + v)); A->claim(); } if (Arg *A = Args.getLastArg(options::OPT_mcompact_branches_EQ)) { StringRef Val = StringRef(A->getValue()); if (mips::hasCompactBranches(CPUName)) { if (Val == "never" || Val == "always" || Val == "optimal") { CmdArgs.push_back("-mllvm"); CmdArgs.push_back(Args.MakeArgString("-mips-compact-branches=" + Val)); } else D.Diag(diag::err_drv_unsupported_option_argument) << A->getOption().getName() << Val; } else D.Diag(diag::warn_target_unsupported_compact_branches) << CPUName; } } /// getPPCTargetCPU - Get the (LLVM) name of the PowerPC cpu we are targeting. static std::string getPPCTargetCPU(const ArgList &Args) { if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) { StringRef CPUName = A->getValue(); if (CPUName == "native") { std::string CPU = llvm::sys::getHostCPUName(); if (!CPU.empty() && CPU != "generic") return CPU; else return ""; } return llvm::StringSwitch(CPUName) .Case("common", "generic") .Case("440", "440") .Case("440fp", "440") .Case("450", "450") .Case("601", "601") .Case("602", "602") .Case("603", "603") .Case("603e", "603e") .Case("603ev", "603ev") .Case("604", "604") .Case("604e", "604e") .Case("620", "620") .Case("630", "pwr3") .Case("G3", "g3") .Case("7400", "7400") .Case("G4", "g4") .Case("7450", "7450") .Case("G4+", "g4+") .Case("750", "750") .Case("970", "970") .Case("G5", "g5") .Case("a2", "a2") .Case("a2q", "a2q") .Case("e500mc", "e500mc") .Case("e5500", "e5500") .Case("power3", "pwr3") .Case("power4", "pwr4") .Case("power5", "pwr5") .Case("power5x", "pwr5x") .Case("power6", "pwr6") .Case("power6x", "pwr6x") .Case("power7", "pwr7") .Case("power8", "pwr8") .Case("power9", "pwr9") .Case("pwr3", "pwr3") .Case("pwr4", "pwr4") .Case("pwr5", "pwr5") .Case("pwr5x", "pwr5x") .Case("pwr6", "pwr6") .Case("pwr6x", "pwr6x") .Case("pwr7", "pwr7") .Case("pwr8", "pwr8") .Case("pwr9", "pwr9") .Case("powerpc", "ppc") .Case("powerpc64", "ppc64") .Case("powerpc64le", "ppc64le") .Default(""); } return ""; } static void getPPCTargetFeatures(const Driver &D, const llvm::Triple &Triple, const ArgList &Args, std::vector &Features) { handleTargetFeaturesGroup(Args, Features, options::OPT_m_ppc_Features_Group); ppc::FloatABI FloatABI = ppc::getPPCFloatABI(D, Args); - if (FloatABI == ppc::FloatABI::Soft && - !(Triple.getArch() == llvm::Triple::ppc64 || - Triple.getArch() == llvm::Triple::ppc64le)) - Features.push_back("+soft-float"); - else if (FloatABI == ppc::FloatABI::Soft && - (Triple.getArch() == llvm::Triple::ppc64 || - Triple.getArch() == llvm::Triple::ppc64le)) - D.Diag(diag::err_drv_invalid_mfloat_abi) - << "soft float is not supported for ppc64"; + if (FloatABI == ppc::FloatABI::Soft) + Features.push_back("-hard-float"); // Altivec is a bit weird, allow overriding of the Altivec feature here. AddTargetFeature(Args, Features, options::OPT_faltivec, options::OPT_fno_altivec, "altivec"); } ppc::FloatABI ppc::getPPCFloatABI(const Driver &D, const ArgList &Args) { ppc::FloatABI ABI = ppc::FloatABI::Invalid; if (Arg *A = Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float, options::OPT_mfloat_abi_EQ)) { if (A->getOption().matches(options::OPT_msoft_float)) ABI = ppc::FloatABI::Soft; else if (A->getOption().matches(options::OPT_mhard_float)) ABI = ppc::FloatABI::Hard; else { ABI = llvm::StringSwitch(A->getValue()) .Case("soft", ppc::FloatABI::Soft) .Case("hard", ppc::FloatABI::Hard) .Default(ppc::FloatABI::Invalid); if (ABI == ppc::FloatABI::Invalid && !StringRef(A->getValue()).empty()) { D.Diag(diag::err_drv_invalid_mfloat_abi) << A->getAsString(Args); ABI = ppc::FloatABI::Hard; } } } // If unspecified, choose the default based on the platform. if (ABI == ppc::FloatABI::Invalid) { ABI = ppc::FloatABI::Hard; } return ABI; } void Clang::AddPPCTargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { // Select the ABI to use. const char *ABIName = nullptr; if (getToolChain().getTriple().isOSLinux()) switch (getToolChain().getArch()) { case llvm::Triple::ppc64: { // When targeting a processor that supports QPX, or if QPX is // specifically enabled, default to using the ABI that supports QPX (so // long as it is not specifically disabled). bool HasQPX = false; if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) HasQPX = A->getValue() == StringRef("a2q"); HasQPX = Args.hasFlag(options::OPT_mqpx, options::OPT_mno_qpx, HasQPX); if (HasQPX) { ABIName = "elfv1-qpx"; break; } ABIName = "elfv1"; break; } case llvm::Triple::ppc64le: ABIName = "elfv2"; break; default: break; } if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) // The ppc64 linux abis are all "altivec" abis by default. Accept and ignore // the option if given as we don't have backend support for any targets // that don't use the altivec abi. if (StringRef(A->getValue()) != "altivec") ABIName = A->getValue(); ppc::FloatABI FloatABI = ppc::getPPCFloatABI(getToolChain().getDriver(), Args); if (FloatABI == ppc::FloatABI::Soft) { // Floating point operations and argument passing are soft. CmdArgs.push_back("-msoft-float"); CmdArgs.push_back("-mfloat-abi"); CmdArgs.push_back("soft"); } else { // Floating point operations and argument passing are hard. assert(FloatABI == ppc::FloatABI::Hard && "Invalid float abi!"); CmdArgs.push_back("-mfloat-abi"); CmdArgs.push_back("hard"); } if (ABIName) { CmdArgs.push_back("-target-abi"); CmdArgs.push_back(ABIName); } } bool ppc::hasPPCAbiArg(const ArgList &Args, const char *Value) { Arg *A = Args.getLastArg(options::OPT_mabi_EQ); return A && (A->getValue() == StringRef(Value)); } /// Get the (LLVM) name of the R600 gpu we are targeting. static std::string getR600TargetGPU(const ArgList &Args) { if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) { const char *GPUName = A->getValue(); return llvm::StringSwitch(GPUName) .Cases("rv630", "rv635", "r600") .Cases("rv610", "rv620", "rs780", "rs880") .Case("rv740", "rv770") .Case("palm", "cedar") .Cases("sumo", "sumo2", "sumo") .Case("hemlock", "cypress") .Case("aruba", "cayman") .Default(GPUName); } return ""; } static std::string getLanaiTargetCPU(const ArgList &Args) { if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) { return A->getValue(); } return ""; } sparc::FloatABI sparc::getSparcFloatABI(const Driver &D, const ArgList &Args) { sparc::FloatABI ABI = sparc::FloatABI::Invalid; if (Arg *A = Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float, options::OPT_mfloat_abi_EQ)) { if (A->getOption().matches(options::OPT_msoft_float)) ABI = sparc::FloatABI::Soft; else if (A->getOption().matches(options::OPT_mhard_float)) ABI = sparc::FloatABI::Hard; else { ABI = llvm::StringSwitch(A->getValue()) .Case("soft", sparc::FloatABI::Soft) .Case("hard", sparc::FloatABI::Hard) .Default(sparc::FloatABI::Invalid); if (ABI == sparc::FloatABI::Invalid && !StringRef(A->getValue()).empty()) { D.Diag(diag::err_drv_invalid_mfloat_abi) << A->getAsString(Args); ABI = sparc::FloatABI::Hard; } } } // If unspecified, choose the default based on the platform. // Only the hard-float ABI on Sparc is standardized, and it is the // default. GCC also supports a nonstandard soft-float ABI mode, also // implemented in LLVM. However as this is not standard we set the default // to be hard-float. if (ABI == sparc::FloatABI::Invalid) { ABI = sparc::FloatABI::Hard; } return ABI; } static void getSparcTargetFeatures(const Driver &D, const ArgList &Args, std::vector &Features) { sparc::FloatABI FloatABI = sparc::getSparcFloatABI(D, Args); if (FloatABI == sparc::FloatABI::Soft) Features.push_back("+soft-float"); } void Clang::AddSparcTargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { sparc::FloatABI FloatABI = sparc::getSparcFloatABI(getToolChain().getDriver(), Args); if (FloatABI == sparc::FloatABI::Soft) { // Floating point operations and argument passing are soft. CmdArgs.push_back("-msoft-float"); CmdArgs.push_back("-mfloat-abi"); CmdArgs.push_back("soft"); } else { // Floating point operations and argument passing are hard. assert(FloatABI == sparc::FloatABI::Hard && "Invalid float abi!"); CmdArgs.push_back("-mfloat-abi"); CmdArgs.push_back("hard"); } } void Clang::AddSystemZTargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { if (Args.hasFlag(options::OPT_mbackchain, options::OPT_mno_backchain, false)) CmdArgs.push_back("-mbackchain"); } static const char *getSystemZTargetCPU(const ArgList &Args) { if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) return A->getValue(); return "z10"; } static void getSystemZTargetFeatures(const ArgList &Args, std::vector &Features) { // -m(no-)htm overrides use of the transactional-execution facility. if (Arg *A = Args.getLastArg(options::OPT_mhtm, options::OPT_mno_htm)) { if (A->getOption().matches(options::OPT_mhtm)) Features.push_back("+transactional-execution"); else Features.push_back("-transactional-execution"); } // -m(no-)vx overrides use of the vector facility. if (Arg *A = Args.getLastArg(options::OPT_mvx, options::OPT_mno_vx)) { if (A->getOption().matches(options::OPT_mvx)) Features.push_back("+vector"); else Features.push_back("-vector"); } } static const char *getX86TargetCPU(const ArgList &Args, const llvm::Triple &Triple) { if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) { if (StringRef(A->getValue()) != "native") { if (Triple.isOSDarwin() && Triple.getArchName() == "x86_64h") return "core-avx2"; return A->getValue(); } // FIXME: Reject attempts to use -march=native unless the target matches // the host. // // FIXME: We should also incorporate the detected target features for use // with -native. std::string CPU = llvm::sys::getHostCPUName(); if (!CPU.empty() && CPU != "generic") return Args.MakeArgString(CPU); } if (const Arg *A = Args.getLastArg(options::OPT__SLASH_arch)) { // Mapping built by referring to X86TargetInfo::getDefaultFeatures(). StringRef Arch = A->getValue(); const char *CPU; if (Triple.getArch() == llvm::Triple::x86) { CPU = llvm::StringSwitch(Arch) .Case("IA32", "i386") .Case("SSE", "pentium3") .Case("SSE2", "pentium4") .Case("AVX", "sandybridge") .Case("AVX2", "haswell") .Default(nullptr); } else { CPU = llvm::StringSwitch(Arch) .Case("AVX", "sandybridge") .Case("AVX2", "haswell") .Default(nullptr); } if (CPU) return CPU; } // Select the default CPU if none was given (or detection failed). if (Triple.getArch() != llvm::Triple::x86_64 && Triple.getArch() != llvm::Triple::x86) return nullptr; // This routine is only handling x86 targets. bool Is64Bit = Triple.getArch() == llvm::Triple::x86_64; // FIXME: Need target hooks. if (Triple.isOSDarwin()) { if (Triple.getArchName() == "x86_64h") return "core-avx2"; return Is64Bit ? "core2" : "yonah"; } // Set up default CPU name for PS4 compilers. if (Triple.isPS4CPU()) return "btver2"; // On Android use targets compatible with gcc if (Triple.isAndroid()) return Is64Bit ? "x86-64" : "i686"; // Everything else goes to x86-64 in 64-bit mode. if (Is64Bit) return "x86-64"; switch (Triple.getOS()) { case llvm::Triple::FreeBSD: case llvm::Triple::NetBSD: case llvm::Triple::OpenBSD: return "i486"; case llvm::Triple::Haiku: return "i586"; case llvm::Triple::Bitrig: return "i686"; default: // Fallback to p4. return "pentium4"; } } /// Get the (LLVM) name of the WebAssembly cpu we are targeting. static StringRef getWebAssemblyTargetCPU(const ArgList &Args) { // If we have -mcpu=, use that. if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) { StringRef CPU = A->getValue(); #ifdef __wasm__ // Handle "native" by examining the host. "native" isn't meaningful when // cross compiling, so only support this when the host is also WebAssembly. if (CPU == "native") return llvm::sys::getHostCPUName(); #endif return CPU; } return "generic"; } static std::string getCPUName(const ArgList &Args, const llvm::Triple &T, bool FromAs = false) { switch (T.getArch()) { default: return ""; case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: return getAArch64TargetCPU(Args); case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: { StringRef MArch, MCPU; getARMArchCPUFromArgs(Args, MArch, MCPU, FromAs); return arm::getARMTargetCPU(MCPU, MArch, T); } case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::mips64: case llvm::Triple::mips64el: { StringRef CPUName; StringRef ABIName; mips::getMipsCPUAndABI(Args, T, CPUName, ABIName); return CPUName; } case llvm::Triple::nvptx: case llvm::Triple::nvptx64: if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) return A->getValue(); return ""; case llvm::Triple::ppc: case llvm::Triple::ppc64: case llvm::Triple::ppc64le: { std::string TargetCPUName = getPPCTargetCPU(Args); // LLVM may default to generating code for the native CPU, // but, like gcc, we default to a more generic option for // each architecture. (except on Darwin) if (TargetCPUName.empty() && !T.isOSDarwin()) { if (T.getArch() == llvm::Triple::ppc64) TargetCPUName = "ppc64"; else if (T.getArch() == llvm::Triple::ppc64le) TargetCPUName = "ppc64le"; else TargetCPUName = "ppc"; } return TargetCPUName; } case llvm::Triple::sparc: case llvm::Triple::sparcel: case llvm::Triple::sparcv9: if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) return A->getValue(); return ""; case llvm::Triple::x86: case llvm::Triple::x86_64: return getX86TargetCPU(Args, T); case llvm::Triple::hexagon: return "hexagon" + toolchains::HexagonToolChain::GetTargetCPUVersion(Args).str(); case llvm::Triple::lanai: return getLanaiTargetCPU(Args); case llvm::Triple::systemz: return getSystemZTargetCPU(Args); case llvm::Triple::r600: case llvm::Triple::amdgcn: return getR600TargetGPU(Args); case llvm::Triple::wasm32: case llvm::Triple::wasm64: return getWebAssemblyTargetCPU(Args); } } static void AddGoldPlugin(const ToolChain &ToolChain, const ArgList &Args, ArgStringList &CmdArgs, bool IsThinLTO) { // Tell the linker to load the plugin. This has to come before AddLinkerInputs // as gold requires -plugin to come before any -plugin-opt that -Wl might // forward. CmdArgs.push_back("-plugin"); std::string Plugin = ToolChain.getDriver().Dir + "/../lib" CLANG_LIBDIR_SUFFIX "/LLVMgold.so"; CmdArgs.push_back(Args.MakeArgString(Plugin)); // Try to pass driver level flags relevant to LTO code generation down to // the plugin. // Handle flags for selecting CPU variants. std::string CPU = getCPUName(Args, ToolChain.getTriple()); if (!CPU.empty()) CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-opt=mcpu=") + CPU)); if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { StringRef OOpt; if (A->getOption().matches(options::OPT_O4) || A->getOption().matches(options::OPT_Ofast)) OOpt = "3"; else if (A->getOption().matches(options::OPT_O)) OOpt = A->getValue(); else if (A->getOption().matches(options::OPT_O0)) OOpt = "0"; if (!OOpt.empty()) CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-opt=O") + OOpt)); } if (IsThinLTO) CmdArgs.push_back("-plugin-opt=thinlto"); // If an explicit debugger tuning argument appeared, pass it along. if (Arg *A = Args.getLastArg(options::OPT_gTune_Group, options::OPT_ggdbN_Group)) { if (A->getOption().matches(options::OPT_glldb)) CmdArgs.push_back("-plugin-opt=-debugger-tune=lldb"); else if (A->getOption().matches(options::OPT_gsce)) CmdArgs.push_back("-plugin-opt=-debugger-tune=sce"); else CmdArgs.push_back("-plugin-opt=-debugger-tune=gdb"); } } /// This is a helper function for validating the optional refinement step /// parameter in reciprocal argument strings. Return false if there is an error /// parsing the refinement step. Otherwise, return true and set the Position /// of the refinement step in the input string. static bool getRefinementStep(StringRef In, const Driver &D, const Arg &A, size_t &Position) { const char RefinementStepToken = ':'; Position = In.find(RefinementStepToken); if (Position != StringRef::npos) { StringRef Option = A.getOption().getName(); StringRef RefStep = In.substr(Position + 1); // Allow exactly one numeric character for the additional refinement // step parameter. This is reasonable for all currently-supported // operations and architectures because we would expect that a larger value // of refinement steps would cause the estimate "optimization" to // under-perform the native operation. Also, if the estimate does not // converge quickly, it probably will not ever converge, so further // refinement steps will not produce a better answer. if (RefStep.size() != 1) { D.Diag(diag::err_drv_invalid_value) << Option << RefStep; return false; } char RefStepChar = RefStep[0]; if (RefStepChar < '0' || RefStepChar > '9') { D.Diag(diag::err_drv_invalid_value) << Option << RefStep; return false; } } return true; } /// The -mrecip flag requires processing of many optional parameters. static void ParseMRecip(const Driver &D, const ArgList &Args, ArgStringList &OutStrings) { StringRef DisabledPrefixIn = "!"; StringRef DisabledPrefixOut = "!"; StringRef EnabledPrefixOut = ""; StringRef Out = "-mrecip="; Arg *A = Args.getLastArg(options::OPT_mrecip, options::OPT_mrecip_EQ); if (!A) return; unsigned NumOptions = A->getNumValues(); if (NumOptions == 0) { // No option is the same as "all". OutStrings.push_back(Args.MakeArgString(Out + "all")); return; } // Pass through "all", "none", or "default" with an optional refinement step. if (NumOptions == 1) { StringRef Val = A->getValue(0); size_t RefStepLoc; if (!getRefinementStep(Val, D, *A, RefStepLoc)) return; StringRef ValBase = Val.slice(0, RefStepLoc); if (ValBase == "all" || ValBase == "none" || ValBase == "default") { OutStrings.push_back(Args.MakeArgString(Out + Val)); return; } } // Each reciprocal type may be enabled or disabled individually. // Check each input value for validity, concatenate them all back together, // and pass through. llvm::StringMap OptionStrings; OptionStrings.insert(std::make_pair("divd", false)); OptionStrings.insert(std::make_pair("divf", false)); OptionStrings.insert(std::make_pair("vec-divd", false)); OptionStrings.insert(std::make_pair("vec-divf", false)); OptionStrings.insert(std::make_pair("sqrtd", false)); OptionStrings.insert(std::make_pair("sqrtf", false)); OptionStrings.insert(std::make_pair("vec-sqrtd", false)); OptionStrings.insert(std::make_pair("vec-sqrtf", false)); for (unsigned i = 0; i != NumOptions; ++i) { StringRef Val = A->getValue(i); bool IsDisabled = Val.startswith(DisabledPrefixIn); // Ignore the disablement token for string matching. if (IsDisabled) Val = Val.substr(1); size_t RefStep; if (!getRefinementStep(Val, D, *A, RefStep)) return; StringRef ValBase = Val.slice(0, RefStep); llvm::StringMap::iterator OptionIter = OptionStrings.find(ValBase); if (OptionIter == OptionStrings.end()) { // Try again specifying float suffix. OptionIter = OptionStrings.find(ValBase.str() + 'f'); if (OptionIter == OptionStrings.end()) { // The input name did not match any known option string. D.Diag(diag::err_drv_unknown_argument) << Val; return; } // The option was specified without a float or double suffix. // Make sure that the double entry was not already specified. // The float entry will be checked below. if (OptionStrings[ValBase.str() + 'd']) { D.Diag(diag::err_drv_invalid_value) << A->getOption().getName() << Val; return; } } if (OptionIter->second == true) { // Duplicate option specified. D.Diag(diag::err_drv_invalid_value) << A->getOption().getName() << Val; return; } // Mark the matched option as found. Do not allow duplicate specifiers. OptionIter->second = true; // If the precision was not specified, also mark the double entry as found. if (ValBase.back() != 'f' && ValBase.back() != 'd') OptionStrings[ValBase.str() + 'd'] = true; // Build the output string. StringRef Prefix = IsDisabled ? DisabledPrefixOut : EnabledPrefixOut; Out = Args.MakeArgString(Out + Prefix + Val); if (i != NumOptions - 1) Out = Args.MakeArgString(Out + ","); } OutStrings.push_back(Args.MakeArgString(Out)); } static void getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple, const ArgList &Args, std::vector &Features) { // If -march=native, autodetect the feature list. if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) { if (StringRef(A->getValue()) == "native") { llvm::StringMap HostFeatures; if (llvm::sys::getHostCPUFeatures(HostFeatures)) for (auto &F : HostFeatures) Features.push_back( Args.MakeArgString((F.second ? "+" : "-") + F.first())); } } if (Triple.getArchName() == "x86_64h") { // x86_64h implies quite a few of the more modern subtarget features // for Haswell class CPUs, but not all of them. Opt-out of a few. Features.push_back("-rdrnd"); Features.push_back("-aes"); Features.push_back("-pclmul"); Features.push_back("-rtm"); Features.push_back("-hle"); Features.push_back("-fsgsbase"); } const llvm::Triple::ArchType ArchType = Triple.getArch(); // Add features to be compatible with gcc for Android. if (Triple.isAndroid()) { if (ArchType == llvm::Triple::x86_64) { Features.push_back("+sse4.2"); Features.push_back("+popcnt"); } else Features.push_back("+ssse3"); } // Set features according to the -arch flag on MSVC. if (Arg *A = Args.getLastArg(options::OPT__SLASH_arch)) { StringRef Arch = A->getValue(); bool ArchUsed = false; // First, look for flags that are shared in x86 and x86-64. if (ArchType == llvm::Triple::x86_64 || ArchType == llvm::Triple::x86) { if (Arch == "AVX" || Arch == "AVX2") { ArchUsed = true; Features.push_back(Args.MakeArgString("+" + Arch.lower())); } } // Then, look for x86-specific flags. if (ArchType == llvm::Triple::x86) { if (Arch == "IA32") { ArchUsed = true; } else if (Arch == "SSE" || Arch == "SSE2") { ArchUsed = true; Features.push_back(Args.MakeArgString("+" + Arch.lower())); } } if (!ArchUsed) D.Diag(clang::diag::warn_drv_unused_argument) << A->getAsString(Args); } // Now add any that the user explicitly requested on the command line, // which may override the defaults. handleTargetFeaturesGroup(Args, Features, options::OPT_m_x86_Features_Group); } void Clang::AddX86TargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { if (!Args.hasFlag(options::OPT_mred_zone, options::OPT_mno_red_zone, true) || Args.hasArg(options::OPT_mkernel) || Args.hasArg(options::OPT_fapple_kext)) CmdArgs.push_back("-disable-red-zone"); // Default to avoid implicit floating-point for kernel/kext code, but allow // that to be overridden with -mno-soft-float. bool NoImplicitFloat = (Args.hasArg(options::OPT_mkernel) || Args.hasArg(options::OPT_fapple_kext)); if (Arg *A = Args.getLastArg( options::OPT_msoft_float, options::OPT_mno_soft_float, options::OPT_mimplicit_float, options::OPT_mno_implicit_float)) { const Option &O = A->getOption(); NoImplicitFloat = (O.matches(options::OPT_mno_implicit_float) || O.matches(options::OPT_msoft_float)); } if (NoImplicitFloat) CmdArgs.push_back("-no-implicit-float"); if (Arg *A = Args.getLastArg(options::OPT_masm_EQ)) { StringRef Value = A->getValue(); if (Value == "intel" || Value == "att") { CmdArgs.push_back("-mllvm"); CmdArgs.push_back(Args.MakeArgString("-x86-asm-syntax=" + Value)); } else { getToolChain().getDriver().Diag(diag::err_drv_unsupported_option_argument) << A->getOption().getName() << Value; } } // Set flags to support MCU ABI. if (Args.hasFlag(options::OPT_miamcu, options::OPT_mno_iamcu, false)) { CmdArgs.push_back("-mfloat-abi"); CmdArgs.push_back("soft"); CmdArgs.push_back("-mstack-alignment=4"); } } void Clang::AddHexagonTargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { CmdArgs.push_back("-mqdsp6-compat"); CmdArgs.push_back("-Wreturn-type"); if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) { std::string N = llvm::utostr(G.getValue()); std::string Opt = std::string("-hexagon-small-data-threshold=") + N; CmdArgs.push_back("-mllvm"); CmdArgs.push_back(Args.MakeArgString(Opt)); } if (!Args.hasArg(options::OPT_fno_short_enums)) CmdArgs.push_back("-fshort-enums"); if (Args.getLastArg(options::OPT_mieee_rnd_near)) { CmdArgs.push_back("-mllvm"); CmdArgs.push_back("-enable-hexagon-ieee-rnd-near"); } CmdArgs.push_back("-mllvm"); CmdArgs.push_back("-machine-sink-split=0"); } void Clang::AddLanaiTargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) { StringRef CPUName = A->getValue(); CmdArgs.push_back("-target-cpu"); CmdArgs.push_back(Args.MakeArgString(CPUName)); } if (Arg *A = Args.getLastArg(options::OPT_mregparm_EQ)) { StringRef Value = A->getValue(); // Only support mregparm=4 to support old usage. Report error for all other // cases. int Mregparm; if (Value.getAsInteger(10, Mregparm)) { if (Mregparm != 4) { getToolChain().getDriver().Diag( diag::err_drv_unsupported_option_argument) << A->getOption().getName() << Value; } } } } void Clang::AddWebAssemblyTargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { // Default to "hidden" visibility. if (!Args.hasArg(options::OPT_fvisibility_EQ, options::OPT_fvisibility_ms_compat)) { CmdArgs.push_back("-fvisibility"); CmdArgs.push_back("hidden"); } } // Decode AArch64 features from string like +[no]featureA+[no]featureB+... static bool DecodeAArch64Features(const Driver &D, StringRef text, std::vector &Features) { SmallVector Split; text.split(Split, StringRef("+"), -1, false); for (StringRef Feature : Split) { const char *result = llvm::StringSwitch(Feature) .Case("fp", "+fp-armv8") .Case("simd", "+neon") .Case("crc", "+crc") .Case("crypto", "+crypto") .Case("fp16", "+fullfp16") .Case("profile", "+spe") .Case("ras", "+ras") .Case("nofp", "-fp-armv8") .Case("nosimd", "-neon") .Case("nocrc", "-crc") .Case("nocrypto", "-crypto") .Case("nofp16", "-fullfp16") .Case("noprofile", "-spe") .Case("noras", "-ras") .Default(nullptr); if (result) Features.push_back(result); else if (Feature == "neon" || Feature == "noneon") D.Diag(diag::err_drv_no_neon_modifier); else return false; } return true; } // Check if the CPU name and feature modifiers in -mcpu are legal. If yes, // decode CPU and feature. static bool DecodeAArch64Mcpu(const Driver &D, StringRef Mcpu, StringRef &CPU, std::vector &Features) { std::pair Split = Mcpu.split("+"); CPU = Split.first; if (CPU == "cortex-a53" || CPU == "cortex-a57" || CPU == "cortex-a72" || CPU == "cortex-a35" || CPU == "exynos-m1" || CPU == "kryo" || CPU == "cortex-a73" || CPU == "vulcan") { Features.push_back("+neon"); Features.push_back("+crc"); Features.push_back("+crypto"); } else if (CPU == "cyclone") { Features.push_back("+neon"); Features.push_back("+crypto"); } else if (CPU == "generic") { Features.push_back("+neon"); } else { return false; } if (Split.second.size() && !DecodeAArch64Features(D, Split.second, Features)) return false; return true; } static bool getAArch64ArchFeaturesFromMarch(const Driver &D, StringRef March, const ArgList &Args, std::vector &Features) { std::string MarchLowerCase = March.lower(); std::pair Split = StringRef(MarchLowerCase).split("+"); if (Split.first == "armv8-a" || Split.first == "armv8a") { // ok, no additional features. } else if (Split.first == "armv8.1-a" || Split.first == "armv8.1a") { Features.push_back("+v8.1a"); } else if (Split.first == "armv8.2-a" || Split.first == "armv8.2a" ) { Features.push_back("+v8.2a"); } else { return false; } if (Split.second.size() && !DecodeAArch64Features(D, Split.second, Features)) return false; return true; } static bool getAArch64ArchFeaturesFromMcpu(const Driver &D, StringRef Mcpu, const ArgList &Args, std::vector &Features) { StringRef CPU; std::string McpuLowerCase = Mcpu.lower(); if (!DecodeAArch64Mcpu(D, McpuLowerCase, CPU, Features)) return false; return true; } static bool getAArch64MicroArchFeaturesFromMtune(const Driver &D, StringRef Mtune, const ArgList &Args, std::vector &Features) { std::string MtuneLowerCase = Mtune.lower(); // Handle CPU name is 'native'. if (MtuneLowerCase == "native") MtuneLowerCase = llvm::sys::getHostCPUName(); if (MtuneLowerCase == "cyclone") { Features.push_back("+zcm"); Features.push_back("+zcz"); } return true; } static bool getAArch64MicroArchFeaturesFromMcpu(const Driver &D, StringRef Mcpu, const ArgList &Args, std::vector &Features) { StringRef CPU; std::vector DecodedFeature; std::string McpuLowerCase = Mcpu.lower(); if (!DecodeAArch64Mcpu(D, McpuLowerCase, CPU, DecodedFeature)) return false; return getAArch64MicroArchFeaturesFromMtune(D, CPU, Args, Features); } static void getAArch64TargetFeatures(const Driver &D, const ArgList &Args, std::vector &Features) { Arg *A; bool success = true; // Enable NEON by default. Features.push_back("+neon"); if ((A = Args.getLastArg(options::OPT_march_EQ))) success = getAArch64ArchFeaturesFromMarch(D, A->getValue(), Args, Features); else if ((A = Args.getLastArg(options::OPT_mcpu_EQ))) success = getAArch64ArchFeaturesFromMcpu(D, A->getValue(), Args, Features); else if (Args.hasArg(options::OPT_arch)) success = getAArch64ArchFeaturesFromMcpu(D, getAArch64TargetCPU(Args), Args, Features); if (success && (A = Args.getLastArg(options::OPT_mtune_EQ))) success = getAArch64MicroArchFeaturesFromMtune(D, A->getValue(), Args, Features); else if (success && (A = Args.getLastArg(options::OPT_mcpu_EQ))) success = getAArch64MicroArchFeaturesFromMcpu(D, A->getValue(), Args, Features); else if (Args.hasArg(options::OPT_arch)) success = getAArch64MicroArchFeaturesFromMcpu(D, getAArch64TargetCPU(Args), Args, Features); if (!success) D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args); if (Args.getLastArg(options::OPT_mgeneral_regs_only)) { Features.push_back("-fp-armv8"); Features.push_back("-crypto"); Features.push_back("-neon"); } // En/disable crc if (Arg *A = Args.getLastArg(options::OPT_mcrc, options::OPT_mnocrc)) { if (A->getOption().matches(options::OPT_mcrc)) Features.push_back("+crc"); else Features.push_back("-crc"); } if (Arg *A = Args.getLastArg(options::OPT_mno_unaligned_access, options::OPT_munaligned_access)) if (A->getOption().matches(options::OPT_mno_unaligned_access)) Features.push_back("+strict-align"); if (Args.hasArg(options::OPT_ffixed_x18)) Features.push_back("+reserve-x18"); } static void getHexagonTargetFeatures(const ArgList &Args, std::vector &Features) { bool HasHVX = false, HasHVXD = false; // FIXME: This should be able to use handleTargetFeaturesGroup except it is // doing dependent option handling here rather than in initFeatureMap or a // similar handler. for (auto &A : Args) { auto &Opt = A->getOption(); if (Opt.matches(options::OPT_mhexagon_hvx)) HasHVX = true; else if (Opt.matches(options::OPT_mno_hexagon_hvx)) HasHVXD = HasHVX = false; else if (Opt.matches(options::OPT_mhexagon_hvx_double)) HasHVXD = HasHVX = true; else if (Opt.matches(options::OPT_mno_hexagon_hvx_double)) HasHVXD = false; else continue; A->claim(); } Features.push_back(HasHVX ? "+hvx" : "-hvx"); Features.push_back(HasHVXD ? "+hvx-double" : "-hvx-double"); } static void getWebAssemblyTargetFeatures(const ArgList &Args, std::vector &Features) { handleTargetFeaturesGroup(Args, Features, options::OPT_m_wasm_Features_Group); } static void getAMDGPUTargetFeatures(const Driver &D, const ArgList &Args, std::vector &Features) { if (const Arg *dAbi = Args.getLastArg(options::OPT_mamdgpu_debugger_abi)) { StringRef value = dAbi->getValue(); if (value == "1.0") { Features.push_back("+amdgpu-debugger-insert-nops"); Features.push_back("+amdgpu-debugger-reserve-regs"); Features.push_back("+amdgpu-debugger-emit-prologue"); } else { D.Diag(diag::err_drv_clang_unsupported) << dAbi->getAsString(Args); } } handleTargetFeaturesGroup( Args, Features, options::OPT_m_amdgpu_Features_Group); } static void getTargetFeatures(const ToolChain &TC, const llvm::Triple &Triple, const ArgList &Args, ArgStringList &CmdArgs, bool ForAS) { const Driver &D = TC.getDriver(); std::vector Features; switch (Triple.getArch()) { default: break; case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::mips64: case llvm::Triple::mips64el: getMIPSTargetFeatures(D, Triple, Args, Features); break; case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: getARMTargetFeatures(TC, Triple, Args, Features, ForAS); break; case llvm::Triple::ppc: case llvm::Triple::ppc64: case llvm::Triple::ppc64le: getPPCTargetFeatures(D, Triple, Args, Features); break; case llvm::Triple::systemz: getSystemZTargetFeatures(Args, Features); break; case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: getAArch64TargetFeatures(D, Args, Features); break; case llvm::Triple::x86: case llvm::Triple::x86_64: getX86TargetFeatures(D, Triple, Args, Features); break; case llvm::Triple::hexagon: getHexagonTargetFeatures(Args, Features); break; case llvm::Triple::wasm32: case llvm::Triple::wasm64: getWebAssemblyTargetFeatures(Args, Features); break; case llvm::Triple::sparc: case llvm::Triple::sparcel: case llvm::Triple::sparcv9: getSparcTargetFeatures(D, Args, Features); break; case llvm::Triple::r600: case llvm::Triple::amdgcn: getAMDGPUTargetFeatures(D, Args, Features); break; } // Find the last of each feature. llvm::StringMap LastOpt; for (unsigned I = 0, N = Features.size(); I < N; ++I) { const char *Name = Features[I]; assert(Name[0] == '-' || Name[0] == '+'); LastOpt[Name + 1] = I; } for (unsigned I = 0, N = Features.size(); I < N; ++I) { // If this feature was overridden, ignore it. const char *Name = Features[I]; llvm::StringMap::iterator LastI = LastOpt.find(Name + 1); assert(LastI != LastOpt.end()); unsigned Last = LastI->second; if (Last != I) continue; CmdArgs.push_back("-target-feature"); CmdArgs.push_back(Name); } } static bool shouldUseExceptionTablesForObjCExceptions(const ObjCRuntime &runtime, const llvm::Triple &Triple) { // We use the zero-cost exception tables for Objective-C if the non-fragile // ABI is enabled or when compiling for x86_64 and ARM on Snow Leopard and // later. if (runtime.isNonFragile()) return true; if (!Triple.isMacOSX()) return false; return (!Triple.isMacOSXVersionLT(10, 5) && (Triple.getArch() == llvm::Triple::x86_64 || Triple.getArch() == llvm::Triple::arm)); } /// Adds exception related arguments to the driver command arguments. There's a /// master flag, -fexceptions and also language specific flags to enable/disable /// C++ and Objective-C exceptions. This makes it possible to for example /// disable C++ exceptions but enable Objective-C exceptions. static void addExceptionArgs(const ArgList &Args, types::ID InputType, const ToolChain &TC, bool KernelOrKext, const ObjCRuntime &objcRuntime, ArgStringList &CmdArgs) { const Driver &D = TC.getDriver(); const llvm::Triple &Triple = TC.getTriple(); if (KernelOrKext) { // -mkernel and -fapple-kext imply no exceptions, so claim exception related // arguments now to avoid warnings about unused arguments. Args.ClaimAllArgs(options::OPT_fexceptions); Args.ClaimAllArgs(options::OPT_fno_exceptions); Args.ClaimAllArgs(options::OPT_fobjc_exceptions); Args.ClaimAllArgs(options::OPT_fno_objc_exceptions); Args.ClaimAllArgs(options::OPT_fcxx_exceptions); Args.ClaimAllArgs(options::OPT_fno_cxx_exceptions); return; } // See if the user explicitly enabled exceptions. bool EH = Args.hasFlag(options::OPT_fexceptions, options::OPT_fno_exceptions, false); // Obj-C exceptions are enabled by default, regardless of -fexceptions. This // is not necessarily sensible, but follows GCC. if (types::isObjC(InputType) && Args.hasFlag(options::OPT_fobjc_exceptions, options::OPT_fno_objc_exceptions, true)) { CmdArgs.push_back("-fobjc-exceptions"); EH |= shouldUseExceptionTablesForObjCExceptions(objcRuntime, Triple); } if (types::isCXX(InputType)) { // Disable C++ EH by default on XCore and PS4. bool CXXExceptionsEnabled = Triple.getArch() != llvm::Triple::xcore && !Triple.isPS4CPU(); Arg *ExceptionArg = Args.getLastArg( options::OPT_fcxx_exceptions, options::OPT_fno_cxx_exceptions, options::OPT_fexceptions, options::OPT_fno_exceptions); if (ExceptionArg) CXXExceptionsEnabled = ExceptionArg->getOption().matches(options::OPT_fcxx_exceptions) || ExceptionArg->getOption().matches(options::OPT_fexceptions); if (CXXExceptionsEnabled) { if (Triple.isPS4CPU()) { ToolChain::RTTIMode RTTIMode = TC.getRTTIMode(); assert(ExceptionArg && "On the PS4 exceptions should only be enabled if passing " "an argument"); if (RTTIMode == ToolChain::RM_DisabledExplicitly) { const Arg *RTTIArg = TC.getRTTIArg(); assert(RTTIArg && "RTTI disabled explicitly but no RTTIArg!"); D.Diag(diag::err_drv_argument_not_allowed_with) << RTTIArg->getAsString(Args) << ExceptionArg->getAsString(Args); } else if (RTTIMode == ToolChain::RM_EnabledImplicitly) D.Diag(diag::warn_drv_enabling_rtti_with_exceptions); } else assert(TC.getRTTIMode() != ToolChain::RM_DisabledImplicitly); CmdArgs.push_back("-fcxx-exceptions"); EH = true; } } if (EH) CmdArgs.push_back("-fexceptions"); } static bool ShouldDisableAutolink(const ArgList &Args, const ToolChain &TC) { bool Default = true; if (TC.getTriple().isOSDarwin()) { // The native darwin assembler doesn't support the linker_option directives, // so we disable them if we think the .s file will be passed to it. Default = TC.useIntegratedAs(); } return !Args.hasFlag(options::OPT_fautolink, options::OPT_fno_autolink, Default); } static bool ShouldDisableDwarfDirectory(const ArgList &Args, const ToolChain &TC) { bool UseDwarfDirectory = Args.hasFlag(options::OPT_fdwarf_directory_asm, options::OPT_fno_dwarf_directory_asm, TC.useIntegratedAs()); return !UseDwarfDirectory; } /// \brief Check whether the given input tree contains any compilation actions. static bool ContainsCompileAction(const Action *A) { if (isa(A) || isa(A)) return true; for (const auto &AI : A->inputs()) if (ContainsCompileAction(AI)) return true; return false; } /// \brief Check if -relax-all should be passed to the internal assembler. /// This is done by default when compiling non-assembler source with -O0. static bool UseRelaxAll(Compilation &C, const ArgList &Args) { bool RelaxDefault = true; if (Arg *A = Args.getLastArg(options::OPT_O_Group)) RelaxDefault = A->getOption().matches(options::OPT_O0); if (RelaxDefault) { RelaxDefault = false; for (const auto &Act : C.getActions()) { if (ContainsCompileAction(Act)) { RelaxDefault = true; break; } } } return Args.hasFlag(options::OPT_mrelax_all, options::OPT_mno_relax_all, RelaxDefault); } // Convert an arg of the form "-gN" or "-ggdbN" or one of their aliases // to the corresponding DebugInfoKind. static codegenoptions::DebugInfoKind DebugLevelToInfoKind(const Arg &A) { assert(A.getOption().matches(options::OPT_gN_Group) && "Not a -g option that specifies a debug-info level"); if (A.getOption().matches(options::OPT_g0) || A.getOption().matches(options::OPT_ggdb0)) return codegenoptions::NoDebugInfo; if (A.getOption().matches(options::OPT_gline_tables_only) || A.getOption().matches(options::OPT_ggdb1)) return codegenoptions::DebugLineTablesOnly; return codegenoptions::LimitedDebugInfo; } // Extract the integer N from a string spelled "-dwarf-N", returning 0 // on mismatch. The StringRef input (rather than an Arg) allows // for use by the "-Xassembler" option parser. static unsigned DwarfVersionNum(StringRef ArgValue) { return llvm::StringSwitch(ArgValue) .Case("-gdwarf-2", 2) .Case("-gdwarf-3", 3) .Case("-gdwarf-4", 4) .Case("-gdwarf-5", 5) .Default(0); } static void RenderDebugEnablingArgs(const ArgList &Args, ArgStringList &CmdArgs, codegenoptions::DebugInfoKind DebugInfoKind, unsigned DwarfVersion, llvm::DebuggerKind DebuggerTuning) { switch (DebugInfoKind) { case codegenoptions::DebugLineTablesOnly: CmdArgs.push_back("-debug-info-kind=line-tables-only"); break; case codegenoptions::LimitedDebugInfo: CmdArgs.push_back("-debug-info-kind=limited"); break; case codegenoptions::FullDebugInfo: CmdArgs.push_back("-debug-info-kind=standalone"); break; default: break; } if (DwarfVersion > 0) CmdArgs.push_back( Args.MakeArgString("-dwarf-version=" + Twine(DwarfVersion))); switch (DebuggerTuning) { case llvm::DebuggerKind::GDB: CmdArgs.push_back("-debugger-tuning=gdb"); break; case llvm::DebuggerKind::LLDB: CmdArgs.push_back("-debugger-tuning=lldb"); break; case llvm::DebuggerKind::SCE: CmdArgs.push_back("-debugger-tuning=sce"); break; default: break; } } static void CollectArgsForIntegratedAssembler(Compilation &C, const ArgList &Args, ArgStringList &CmdArgs, const Driver &D) { if (UseRelaxAll(C, Args)) CmdArgs.push_back("-mrelax-all"); // Only default to -mincremental-linker-compatible if we think we are // targeting the MSVC linker. bool DefaultIncrementalLinkerCompatible = C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment(); if (Args.hasFlag(options::OPT_mincremental_linker_compatible, options::OPT_mno_incremental_linker_compatible, DefaultIncrementalLinkerCompatible)) CmdArgs.push_back("-mincremental-linker-compatible"); // When passing -I arguments to the assembler we sometimes need to // unconditionally take the next argument. For example, when parsing // '-Wa,-I -Wa,foo' we need to accept the -Wa,foo arg after seeing the // -Wa,-I arg and when parsing '-Wa,-I,foo' we need to accept the 'foo' // arg after parsing the '-I' arg. bool TakeNextArg = false; // When using an integrated assembler, translate -Wa, and -Xassembler // options. bool CompressDebugSections = false; bool UseRelaxRelocations = ENABLE_X86_RELAX_RELOCATIONS; const char *MipsTargetFeature = nullptr; for (const Arg *A : Args.filtered(options::OPT_Wa_COMMA, options::OPT_Xassembler)) { A->claim(); for (StringRef Value : A->getValues()) { if (TakeNextArg) { CmdArgs.push_back(Value.data()); TakeNextArg = false; continue; } switch (C.getDefaultToolChain().getArch()) { default: break; case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::mips64: case llvm::Triple::mips64el: if (Value == "--trap") { CmdArgs.push_back("-target-feature"); CmdArgs.push_back("+use-tcc-in-div"); continue; } if (Value == "--break") { CmdArgs.push_back("-target-feature"); CmdArgs.push_back("-use-tcc-in-div"); continue; } if (Value.startswith("-msoft-float")) { CmdArgs.push_back("-target-feature"); CmdArgs.push_back("+soft-float"); continue; } if (Value.startswith("-mhard-float")) { CmdArgs.push_back("-target-feature"); CmdArgs.push_back("-soft-float"); continue; } MipsTargetFeature = llvm::StringSwitch(Value) .Case("-mips1", "+mips1") .Case("-mips2", "+mips2") .Case("-mips3", "+mips3") .Case("-mips4", "+mips4") .Case("-mips5", "+mips5") .Case("-mips32", "+mips32") .Case("-mips32r2", "+mips32r2") .Case("-mips32r3", "+mips32r3") .Case("-mips32r5", "+mips32r5") .Case("-mips32r6", "+mips32r6") .Case("-mips64", "+mips64") .Case("-mips64r2", "+mips64r2") .Case("-mips64r3", "+mips64r3") .Case("-mips64r5", "+mips64r5") .Case("-mips64r6", "+mips64r6") .Default(nullptr); if (MipsTargetFeature) continue; } if (Value == "-force_cpusubtype_ALL") { // Do nothing, this is the default and we don't support anything else. } else if (Value == "-L") { CmdArgs.push_back("-msave-temp-labels"); } else if (Value == "--fatal-warnings") { CmdArgs.push_back("-massembler-fatal-warnings"); } else if (Value == "--noexecstack") { CmdArgs.push_back("-mnoexecstack"); } else if (Value == "-compress-debug-sections" || Value == "--compress-debug-sections") { CompressDebugSections = true; } else if (Value == "-nocompress-debug-sections" || Value == "--nocompress-debug-sections") { CompressDebugSections = false; } else if (Value == "-mrelax-relocations=yes" || Value == "--mrelax-relocations=yes") { UseRelaxRelocations = true; } else if (Value == "-mrelax-relocations=no" || Value == "--mrelax-relocations=no") { UseRelaxRelocations = false; } else if (Value.startswith("-I")) { CmdArgs.push_back(Value.data()); // We need to consume the next argument if the current arg is a plain // -I. The next arg will be the include directory. if (Value == "-I") TakeNextArg = true; } else if (Value.startswith("-gdwarf-")) { // "-gdwarf-N" options are not cc1as options. unsigned DwarfVersion = DwarfVersionNum(Value); if (DwarfVersion == 0) { // Send it onward, and let cc1as complain. CmdArgs.push_back(Value.data()); } else { RenderDebugEnablingArgs(Args, CmdArgs, codegenoptions::LimitedDebugInfo, DwarfVersion, llvm::DebuggerKind::Default); } } else if (Value.startswith("-mcpu") || Value.startswith("-mfpu") || Value.startswith("-mhwdiv") || Value.startswith("-march")) { // Do nothing, we'll validate it later. } else { D.Diag(diag::err_drv_unsupported_option_argument) << A->getOption().getName() << Value; } } } if (CompressDebugSections) { if (llvm::zlib::isAvailable()) CmdArgs.push_back("-compress-debug-sections"); else D.Diag(diag::warn_debug_compression_unavailable); } if (UseRelaxRelocations) CmdArgs.push_back("--mrelax-relocations"); if (MipsTargetFeature != nullptr) { CmdArgs.push_back("-target-feature"); CmdArgs.push_back(MipsTargetFeature); } } // This adds the static libclang_rt.builtins-arch.a directly to the command line // FIXME: Make sure we can also emit shared objects if they're requested // and available, check for possible errors, etc. static void addClangRT(const ToolChain &TC, const ArgList &Args, ArgStringList &CmdArgs) { CmdArgs.push_back(TC.getCompilerRTArgString(Args, "builtins")); } namespace { enum OpenMPRuntimeKind { /// An unknown OpenMP runtime. We can't generate effective OpenMP code /// without knowing what runtime to target. OMPRT_Unknown, /// The LLVM OpenMP runtime. When completed and integrated, this will become /// the default for Clang. OMPRT_OMP, /// The GNU OpenMP runtime. Clang doesn't support generating OpenMP code for /// this runtime but can swallow the pragmas, and find and link against the /// runtime library itself. OMPRT_GOMP, /// The legacy name for the LLVM OpenMP runtime from when it was the Intel /// OpenMP runtime. We support this mode for users with existing dependencies /// on this runtime library name. OMPRT_IOMP5 }; } /// Compute the desired OpenMP runtime from the flag provided. static OpenMPRuntimeKind getOpenMPRuntime(const ToolChain &TC, const ArgList &Args) { StringRef RuntimeName(CLANG_DEFAULT_OPENMP_RUNTIME); const Arg *A = Args.getLastArg(options::OPT_fopenmp_EQ); if (A) RuntimeName = A->getValue(); auto RT = llvm::StringSwitch(RuntimeName) .Case("libomp", OMPRT_OMP) .Case("libgomp", OMPRT_GOMP) .Case("libiomp5", OMPRT_IOMP5) .Default(OMPRT_Unknown); if (RT == OMPRT_Unknown) { if (A) TC.getDriver().Diag(diag::err_drv_unsupported_option_argument) << A->getOption().getName() << A->getValue(); else // FIXME: We could use a nicer diagnostic here. TC.getDriver().Diag(diag::err_drv_unsupported_opt) << "-fopenmp"; } return RT; } static void addOpenMPRuntime(ArgStringList &CmdArgs, const ToolChain &TC, const ArgList &Args) { if (!Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, options::OPT_fno_openmp, false)) return; switch (getOpenMPRuntime(TC, Args)) { case OMPRT_OMP: CmdArgs.push_back("-lomp"); break; case OMPRT_GOMP: CmdArgs.push_back("-lgomp"); break; case OMPRT_IOMP5: CmdArgs.push_back("-liomp5"); break; case OMPRT_Unknown: // Already diagnosed. break; } } static void addSanitizerRuntime(const ToolChain &TC, const ArgList &Args, ArgStringList &CmdArgs, StringRef Sanitizer, bool IsShared, bool IsWhole) { // Wrap any static runtimes that must be forced into executable in // whole-archive. if (IsWhole) CmdArgs.push_back("-whole-archive"); CmdArgs.push_back(TC.getCompilerRTArgString(Args, Sanitizer, IsShared)); if (IsWhole) CmdArgs.push_back("-no-whole-archive"); } // Tries to use a file with the list of dynamic symbols that need to be exported // from the runtime library. Returns true if the file was found. static bool addSanitizerDynamicList(const ToolChain &TC, const ArgList &Args, ArgStringList &CmdArgs, StringRef Sanitizer) { SmallString<128> SanRT(TC.getCompilerRT(Args, Sanitizer)); if (llvm::sys::fs::exists(SanRT + ".syms")) { CmdArgs.push_back(Args.MakeArgString("--dynamic-list=" + SanRT + ".syms")); return true; } return false; } static void linkSanitizerRuntimeDeps(const ToolChain &TC, ArgStringList &CmdArgs) { // Force linking against the system libraries sanitizers depends on // (see PR15823 why this is necessary). CmdArgs.push_back("--no-as-needed"); CmdArgs.push_back("-lpthread"); CmdArgs.push_back("-lrt"); CmdArgs.push_back("-lm"); // There's no libdl on FreeBSD. if (TC.getTriple().getOS() != llvm::Triple::FreeBSD) CmdArgs.push_back("-ldl"); } static void collectSanitizerRuntimes(const ToolChain &TC, const ArgList &Args, SmallVectorImpl &SharedRuntimes, SmallVectorImpl &StaticRuntimes, SmallVectorImpl &NonWholeStaticRuntimes, SmallVectorImpl &HelperStaticRuntimes, SmallVectorImpl &RequiredSymbols) { const SanitizerArgs &SanArgs = TC.getSanitizerArgs(); // Collect shared runtimes. if (SanArgs.needsAsanRt() && SanArgs.needsSharedAsanRt()) { SharedRuntimes.push_back("asan"); } // The stats_client library is also statically linked into DSOs. if (SanArgs.needsStatsRt()) StaticRuntimes.push_back("stats_client"); // Collect static runtimes. if (Args.hasArg(options::OPT_shared) || TC.getTriple().isAndroid()) { // Don't link static runtimes into DSOs or if compiling for Android. return; } if (SanArgs.needsAsanRt()) { if (SanArgs.needsSharedAsanRt()) { HelperStaticRuntimes.push_back("asan-preinit"); } else { StaticRuntimes.push_back("asan"); if (SanArgs.linkCXXRuntimes()) StaticRuntimes.push_back("asan_cxx"); } } if (SanArgs.needsDfsanRt()) StaticRuntimes.push_back("dfsan"); if (SanArgs.needsLsanRt()) StaticRuntimes.push_back("lsan"); if (SanArgs.needsMsanRt()) { StaticRuntimes.push_back("msan"); if (SanArgs.linkCXXRuntimes()) StaticRuntimes.push_back("msan_cxx"); } if (SanArgs.needsTsanRt()) { StaticRuntimes.push_back("tsan"); if (SanArgs.linkCXXRuntimes()) StaticRuntimes.push_back("tsan_cxx"); } if (SanArgs.needsUbsanRt()) { StaticRuntimes.push_back("ubsan_standalone"); if (SanArgs.linkCXXRuntimes()) StaticRuntimes.push_back("ubsan_standalone_cxx"); } if (SanArgs.needsSafeStackRt()) StaticRuntimes.push_back("safestack"); if (SanArgs.needsCfiRt()) StaticRuntimes.push_back("cfi"); if (SanArgs.needsCfiDiagRt()) { StaticRuntimes.push_back("cfi_diag"); if (SanArgs.linkCXXRuntimes()) StaticRuntimes.push_back("ubsan_standalone_cxx"); } if (SanArgs.needsStatsRt()) { NonWholeStaticRuntimes.push_back("stats"); RequiredSymbols.push_back("__sanitizer_stats_register"); } if (SanArgs.needsEsanRt()) StaticRuntimes.push_back("esan"); } // Should be called before we add system libraries (C++ ABI, libstdc++/libc++, // C runtime, etc). Returns true if sanitizer system deps need to be linked in. static bool addSanitizerRuntimes(const ToolChain &TC, const ArgList &Args, ArgStringList &CmdArgs) { SmallVector SharedRuntimes, StaticRuntimes, NonWholeStaticRuntimes, HelperStaticRuntimes, RequiredSymbols; collectSanitizerRuntimes(TC, Args, SharedRuntimes, StaticRuntimes, NonWholeStaticRuntimes, HelperStaticRuntimes, RequiredSymbols); for (auto RT : SharedRuntimes) addSanitizerRuntime(TC, Args, CmdArgs, RT, true, false); for (auto RT : HelperStaticRuntimes) addSanitizerRuntime(TC, Args, CmdArgs, RT, false, true); bool AddExportDynamic = false; for (auto RT : StaticRuntimes) { addSanitizerRuntime(TC, Args, CmdArgs, RT, false, true); AddExportDynamic |= !addSanitizerDynamicList(TC, Args, CmdArgs, RT); } for (auto RT : NonWholeStaticRuntimes) { addSanitizerRuntime(TC, Args, CmdArgs, RT, false, false); AddExportDynamic |= !addSanitizerDynamicList(TC, Args, CmdArgs, RT); } for (auto S : RequiredSymbols) { CmdArgs.push_back("-u"); CmdArgs.push_back(Args.MakeArgString(S)); } // If there is a static runtime with no dynamic list, force all the symbols // to be dynamic to be sure we export sanitizer interface functions. if (AddExportDynamic) CmdArgs.push_back("-export-dynamic"); return !StaticRuntimes.empty(); } static bool addXRayRuntime(const ToolChain &TC, const ArgList &Args, ArgStringList &CmdArgs) { if (Args.hasFlag(options::OPT_fxray_instrument, options::OPT_fnoxray_instrument, false)) { CmdArgs.push_back("-whole-archive"); CmdArgs.push_back(TC.getCompilerRTArgString(Args, "xray", false)); CmdArgs.push_back("-no-whole-archive"); return true; } return false; } static void linkXRayRuntimeDeps(const ToolChain &TC, const ArgList &Args, ArgStringList &CmdArgs) { CmdArgs.push_back("--no-as-needed"); CmdArgs.push_back("-lpthread"); CmdArgs.push_back("-lrt"); CmdArgs.push_back("-lm"); CmdArgs.push_back("-latomic"); if (TC.GetCXXStdlibType(Args) == ToolChain::CST_Libcxx) CmdArgs.push_back("-lc++"); else CmdArgs.push_back("-lstdc++"); if (TC.getTriple().getOS() != llvm::Triple::FreeBSD) CmdArgs.push_back("-ldl"); } static bool areOptimizationsEnabled(const ArgList &Args) { // Find the last -O arg and see if it is non-zero. if (Arg *A = Args.getLastArg(options::OPT_O_Group)) return !A->getOption().matches(options::OPT_O0); // Defaults to -O0. return false; } static bool shouldUseFramePointerForTarget(const ArgList &Args, const llvm::Triple &Triple) { switch (Triple.getArch()) { case llvm::Triple::xcore: case llvm::Triple::wasm32: case llvm::Triple::wasm64: // XCore never wants frame pointers, regardless of OS. // WebAssembly never wants frame pointers. return false; default: break; } if (Triple.isOSLinux() || Triple.getOS() == llvm::Triple::CloudABI) { switch (Triple.getArch()) { // Don't use a frame pointer on linux if optimizing for certain targets. case llvm::Triple::mips64: case llvm::Triple::mips64el: case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::systemz: case llvm::Triple::x86: case llvm::Triple::x86_64: return !areOptimizationsEnabled(Args); default: return true; } } if (Triple.isOSWindows()) { switch (Triple.getArch()) { case llvm::Triple::x86: return !areOptimizationsEnabled(Args); case llvm::Triple::x86_64: return Triple.isOSBinFormatMachO(); case llvm::Triple::arm: case llvm::Triple::thumb: // Windows on ARM builds with FPO disabled to aid fast stack walking return true; default: // All other supported Windows ISAs use xdata unwind information, so frame // pointers are not generally useful. return false; } } return true; } static bool shouldUseFramePointer(const ArgList &Args, const llvm::Triple &Triple) { if (Arg *A = Args.getLastArg(options::OPT_fno_omit_frame_pointer, options::OPT_fomit_frame_pointer)) return A->getOption().matches(options::OPT_fno_omit_frame_pointer); if (Args.hasArg(options::OPT_pg)) return true; return shouldUseFramePointerForTarget(Args, Triple); } static bool shouldUseLeafFramePointer(const ArgList &Args, const llvm::Triple &Triple) { if (Arg *A = Args.getLastArg(options::OPT_mno_omit_leaf_frame_pointer, options::OPT_momit_leaf_frame_pointer)) return A->getOption().matches(options::OPT_mno_omit_leaf_frame_pointer); if (Args.hasArg(options::OPT_pg)) return true; if (Triple.isPS4CPU()) return false; return shouldUseFramePointerForTarget(Args, Triple); } /// Add a CC1 option to specify the debug compilation directory. static void addDebugCompDirArg(const ArgList &Args, ArgStringList &CmdArgs) { SmallString<128> cwd; if (!llvm::sys::fs::current_path(cwd)) { CmdArgs.push_back("-fdebug-compilation-dir"); CmdArgs.push_back(Args.MakeArgString(cwd)); } } static const char *SplitDebugName(const ArgList &Args, const InputInfo &Input) { Arg *FinalOutput = Args.getLastArg(options::OPT_o); if (FinalOutput && Args.hasArg(options::OPT_c)) { SmallString<128> T(FinalOutput->getValue()); llvm::sys::path::replace_extension(T, "dwo"); return Args.MakeArgString(T); } else { // Use the compilation dir. SmallString<128> T( Args.getLastArgValue(options::OPT_fdebug_compilation_dir)); SmallString<128> F(llvm::sys::path::stem(Input.getBaseInput())); llvm::sys::path::replace_extension(F, "dwo"); T += F; return Args.MakeArgString(F); } } static void SplitDebugInfo(const ToolChain &TC, Compilation &C, const Tool &T, const JobAction &JA, const ArgList &Args, const InputInfo &Output, const char *OutFile) { ArgStringList ExtractArgs; ExtractArgs.push_back("--extract-dwo"); ArgStringList StripArgs; StripArgs.push_back("--strip-dwo"); // Grabbing the output of the earlier compile step. StripArgs.push_back(Output.getFilename()); ExtractArgs.push_back(Output.getFilename()); ExtractArgs.push_back(OutFile); const char *Exec = Args.MakeArgString(TC.GetProgramPath("objcopy")); InputInfo II(types::TY_Object, Output.getFilename(), Output.getFilename()); // First extract the dwo sections. C.addCommand(llvm::make_unique(JA, T, Exec, ExtractArgs, II)); // Then remove them from the original .o file. C.addCommand(llvm::make_unique(JA, T, Exec, StripArgs, II)); } /// \brief Vectorize at all optimization levels greater than 1 except for -Oz. /// For -Oz the loop vectorizer is disable, while the slp vectorizer is enabled. static bool shouldEnableVectorizerAtOLevel(const ArgList &Args, bool isSlpVec) { if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { if (A->getOption().matches(options::OPT_O4) || A->getOption().matches(options::OPT_Ofast)) return true; if (A->getOption().matches(options::OPT_O0)) return false; assert(A->getOption().matches(options::OPT_O) && "Must have a -O flag"); // Vectorize -Os. StringRef S(A->getValue()); if (S == "s") return true; // Don't vectorize -Oz, unless it's the slp vectorizer. if (S == "z") return isSlpVec; unsigned OptLevel = 0; if (S.getAsInteger(10, OptLevel)) return false; return OptLevel > 1; } return false; } /// Add -x lang to \p CmdArgs for \p Input. static void addDashXForInput(const ArgList &Args, const InputInfo &Input, ArgStringList &CmdArgs) { // When using -verify-pch, we don't want to provide the type // 'precompiled-header' if it was inferred from the file extension if (Args.hasArg(options::OPT_verify_pch) && Input.getType() == types::TY_PCH) return; CmdArgs.push_back("-x"); if (Args.hasArg(options::OPT_rewrite_objc)) CmdArgs.push_back(types::getTypeName(types::TY_PP_ObjCXX)); else CmdArgs.push_back(types::getTypeName(Input.getType())); } static VersionTuple getMSCompatibilityVersion(unsigned Version) { if (Version < 100) return VersionTuple(Version); if (Version < 10000) return VersionTuple(Version / 100, Version % 100); unsigned Build = 0, Factor = 1; for (; Version > 10000; Version = Version / 10, Factor = Factor * 10) Build = Build + (Version % 10) * Factor; return VersionTuple(Version / 100, Version % 100, Build); } // Claim options we don't want to warn if they are unused. We do this for // options that build systems might add but are unused when assembling or only // running the preprocessor for example. static void claimNoWarnArgs(const ArgList &Args) { // Don't warn about unused -f(no-)?lto. This can happen when we're // preprocessing, precompiling or assembling. Args.ClaimAllArgs(options::OPT_flto_EQ); Args.ClaimAllArgs(options::OPT_flto); Args.ClaimAllArgs(options::OPT_fno_lto); } static void appendUserToPath(SmallVectorImpl &Result) { #ifdef LLVM_ON_UNIX const char *Username = getenv("LOGNAME"); #else const char *Username = getenv("USERNAME"); #endif if (Username) { // Validate that LoginName can be used in a path, and get its length. size_t Len = 0; for (const char *P = Username; *P; ++P, ++Len) { if (!isAlphanumeric(*P) && *P != '_') { Username = nullptr; break; } } if (Username && Len > 0) { Result.append(Username, Username + Len); return; } } // Fallback to user id. #ifdef LLVM_ON_UNIX std::string UID = llvm::utostr(getuid()); #else // FIXME: Windows seems to have an 'SID' that might work. std::string UID = "9999"; #endif Result.append(UID.begin(), UID.end()); } VersionTuple visualstudio::getMSVCVersion(const Driver *D, const ToolChain &TC, const llvm::Triple &Triple, const llvm::opt::ArgList &Args, bool IsWindowsMSVC) { if (Args.hasFlag(options::OPT_fms_extensions, options::OPT_fno_ms_extensions, IsWindowsMSVC) || Args.hasArg(options::OPT_fmsc_version) || Args.hasArg(options::OPT_fms_compatibility_version)) { const Arg *MSCVersion = Args.getLastArg(options::OPT_fmsc_version); const Arg *MSCompatibilityVersion = Args.getLastArg(options::OPT_fms_compatibility_version); if (MSCVersion && MSCompatibilityVersion) { if (D) D->Diag(diag::err_drv_argument_not_allowed_with) << MSCVersion->getAsString(Args) << MSCompatibilityVersion->getAsString(Args); return VersionTuple(); } if (MSCompatibilityVersion) { VersionTuple MSVT; if (MSVT.tryParse(MSCompatibilityVersion->getValue()) && D) D->Diag(diag::err_drv_invalid_value) << MSCompatibilityVersion->getAsString(Args) << MSCompatibilityVersion->getValue(); return MSVT; } if (MSCVersion) { unsigned Version = 0; if (StringRef(MSCVersion->getValue()).getAsInteger(10, Version) && D) D->Diag(diag::err_drv_invalid_value) << MSCVersion->getAsString(Args) << MSCVersion->getValue(); return getMSCompatibilityVersion(Version); } unsigned Major, Minor, Micro; Triple.getEnvironmentVersion(Major, Minor, Micro); if (Major || Minor || Micro) return VersionTuple(Major, Minor, Micro); if (IsWindowsMSVC) { VersionTuple MSVT = TC.getMSVCVersionFromExe(); if (!MSVT.empty()) return MSVT; // FIXME: Consider bumping this to 19 (MSVC2015) soon. return VersionTuple(18); } } return VersionTuple(); } static void addPGOAndCoverageFlags(Compilation &C, const Driver &D, const InputInfo &Output, const ArgList &Args, ArgStringList &CmdArgs) { auto *PGOGenerateArg = Args.getLastArg(options::OPT_fprofile_generate, options::OPT_fprofile_generate_EQ, options::OPT_fno_profile_generate); if (PGOGenerateArg && PGOGenerateArg->getOption().matches(options::OPT_fno_profile_generate)) PGOGenerateArg = nullptr; auto *ProfileGenerateArg = Args.getLastArg( options::OPT_fprofile_instr_generate, options::OPT_fprofile_instr_generate_EQ, options::OPT_fno_profile_instr_generate); if (ProfileGenerateArg && ProfileGenerateArg->getOption().matches( options::OPT_fno_profile_instr_generate)) ProfileGenerateArg = nullptr; if (PGOGenerateArg && ProfileGenerateArg) D.Diag(diag::err_drv_argument_not_allowed_with) << PGOGenerateArg->getSpelling() << ProfileGenerateArg->getSpelling(); auto *ProfileUseArg = Args.getLastArg( options::OPT_fprofile_instr_use, options::OPT_fprofile_instr_use_EQ, options::OPT_fprofile_use, options::OPT_fprofile_use_EQ, options::OPT_fno_profile_instr_use); if (ProfileUseArg && ProfileUseArg->getOption().matches(options::OPT_fno_profile_instr_use)) ProfileUseArg = nullptr; if (PGOGenerateArg && ProfileUseArg) D.Diag(diag::err_drv_argument_not_allowed_with) << ProfileUseArg->getSpelling() << PGOGenerateArg->getSpelling(); if (ProfileGenerateArg && ProfileUseArg) D.Diag(diag::err_drv_argument_not_allowed_with) << ProfileGenerateArg->getSpelling() << ProfileUseArg->getSpelling(); if (ProfileGenerateArg) { if (ProfileGenerateArg->getOption().matches( options::OPT_fprofile_instr_generate_EQ)) CmdArgs.push_back(Args.MakeArgString(Twine("-fprofile-instrument-path=") + ProfileGenerateArg->getValue())); // The default is to use Clang Instrumentation. CmdArgs.push_back("-fprofile-instrument=clang"); } if (PGOGenerateArg) { CmdArgs.push_back("-fprofile-instrument=llvm"); if (PGOGenerateArg->getOption().matches( options::OPT_fprofile_generate_EQ)) { SmallString<128> Path(PGOGenerateArg->getValue()); llvm::sys::path::append(Path, "default.profraw"); CmdArgs.push_back( Args.MakeArgString(Twine("-fprofile-instrument-path=") + Path)); } } if (ProfileUseArg) { if (ProfileUseArg->getOption().matches(options::OPT_fprofile_instr_use_EQ)) CmdArgs.push_back(Args.MakeArgString( Twine("-fprofile-instrument-use-path=") + ProfileUseArg->getValue())); else if ((ProfileUseArg->getOption().matches( options::OPT_fprofile_use_EQ) || ProfileUseArg->getOption().matches( options::OPT_fprofile_instr_use))) { SmallString<128> Path( ProfileUseArg->getNumValues() == 0 ? "" : ProfileUseArg->getValue()); if (Path.empty() || llvm::sys::fs::is_directory(Path)) llvm::sys::path::append(Path, "default.profdata"); CmdArgs.push_back( Args.MakeArgString(Twine("-fprofile-instrument-use-path=") + Path)); } } if (Args.hasArg(options::OPT_ftest_coverage) || Args.hasArg(options::OPT_coverage)) CmdArgs.push_back("-femit-coverage-notes"); if (Args.hasFlag(options::OPT_fprofile_arcs, options::OPT_fno_profile_arcs, false) || Args.hasArg(options::OPT_coverage)) CmdArgs.push_back("-femit-coverage-data"); if (Args.hasFlag(options::OPT_fcoverage_mapping, options::OPT_fno_coverage_mapping, false) && !ProfileGenerateArg) D.Diag(diag::err_drv_argument_only_allowed_with) << "-fcoverage-mapping" << "-fprofile-instr-generate"; if (Args.hasFlag(options::OPT_fcoverage_mapping, options::OPT_fno_coverage_mapping, false)) CmdArgs.push_back("-fcoverage-mapping"); if (C.getArgs().hasArg(options::OPT_c) || C.getArgs().hasArg(options::OPT_S)) { if (Output.isFilename()) { CmdArgs.push_back("-coverage-file"); SmallString<128> CoverageFilename; if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o)) { CoverageFilename = FinalOutput->getValue(); } else { CoverageFilename = llvm::sys::path::filename(Output.getBaseInput()); } if (llvm::sys::path::is_relative(CoverageFilename)) { SmallString<128> Pwd; if (!llvm::sys::fs::current_path(Pwd)) { llvm::sys::path::append(Pwd, CoverageFilename); CoverageFilename.swap(Pwd); } } CmdArgs.push_back(Args.MakeArgString(CoverageFilename)); } } } static void addPS4ProfileRTArgs(const ToolChain &TC, const ArgList &Args, ArgStringList &CmdArgs) { if ((Args.hasFlag(options::OPT_fprofile_arcs, options::OPT_fno_profile_arcs, false) || Args.hasFlag(options::OPT_fprofile_generate, options::OPT_fno_profile_instr_generate, false) || Args.hasFlag(options::OPT_fprofile_generate_EQ, options::OPT_fno_profile_instr_generate, false) || Args.hasFlag(options::OPT_fprofile_instr_generate, options::OPT_fno_profile_instr_generate, false) || Args.hasFlag(options::OPT_fprofile_instr_generate_EQ, options::OPT_fno_profile_instr_generate, false) || Args.hasArg(options::OPT_fcreate_profile) || Args.hasArg(options::OPT_coverage))) CmdArgs.push_back("--dependent-lib=libclang_rt.profile-x86_64.a"); } /// Parses the various -fpic/-fPIC/-fpie/-fPIE arguments. Then, /// smooshes them together with platform defaults, to decide whether /// this compile should be using PIC mode or not. Returns a tuple of /// (RelocationModel, PICLevel, IsPIE). static std::tuple ParsePICArgs(const ToolChain &ToolChain, const llvm::Triple &Triple, const ArgList &Args) { // FIXME: why does this code...and so much everywhere else, use both // ToolChain.getTriple() and Triple? bool PIE = ToolChain.isPIEDefault(); bool PIC = PIE || ToolChain.isPICDefault(); // The Darwin/MachO default to use PIC does not apply when using -static. if (ToolChain.getTriple().isOSBinFormatMachO() && Args.hasArg(options::OPT_static)) PIE = PIC = false; bool IsPICLevelTwo = PIC; bool KernelOrKext = Args.hasArg(options::OPT_mkernel, options::OPT_fapple_kext); // Android-specific defaults for PIC/PIE if (ToolChain.getTriple().isAndroid()) { switch (ToolChain.getArch()) { case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: case llvm::Triple::aarch64: case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::mips64: case llvm::Triple::mips64el: PIC = true; // "-fpic" break; case llvm::Triple::x86: case llvm::Triple::x86_64: PIC = true; // "-fPIC" IsPICLevelTwo = true; break; default: break; } } // OpenBSD-specific defaults for PIE if (ToolChain.getTriple().getOS() == llvm::Triple::OpenBSD) { switch (ToolChain.getArch()) { case llvm::Triple::mips64: case llvm::Triple::mips64el: case llvm::Triple::sparcel: case llvm::Triple::x86: case llvm::Triple::x86_64: IsPICLevelTwo = false; // "-fpie" break; case llvm::Triple::ppc: case llvm::Triple::sparc: case llvm::Triple::sparcv9: IsPICLevelTwo = true; // "-fPIE" break; default: break; } } // The last argument relating to either PIC or PIE wins, and no // other argument is used. If the last argument is any flavor of the // '-fno-...' arguments, both PIC and PIE are disabled. Any PIE // option implicitly enables PIC at the same level. Arg *LastPICArg = Args.getLastArg(options::OPT_fPIC, options::OPT_fno_PIC, options::OPT_fpic, options::OPT_fno_pic, options::OPT_fPIE, options::OPT_fno_PIE, options::OPT_fpie, options::OPT_fno_pie); // Check whether the tool chain trumps the PIC-ness decision. If the PIC-ness // is forced, then neither PIC nor PIE flags will have no effect. if (!ToolChain.isPICDefaultForced()) { if (LastPICArg) { Option O = LastPICArg->getOption(); if (O.matches(options::OPT_fPIC) || O.matches(options::OPT_fpic) || O.matches(options::OPT_fPIE) || O.matches(options::OPT_fpie)) { PIE = O.matches(options::OPT_fPIE) || O.matches(options::OPT_fpie); PIC = PIE || O.matches(options::OPT_fPIC) || O.matches(options::OPT_fpic); IsPICLevelTwo = O.matches(options::OPT_fPIE) || O.matches(options::OPT_fPIC); } else { PIE = PIC = false; if (Triple.isPS4CPU()) { Arg *ModelArg = Args.getLastArg(options::OPT_mcmodel_EQ); StringRef Model = ModelArg ? ModelArg->getValue() : ""; if (Model != "kernel") { PIC = true; ToolChain.getDriver().Diag(diag::warn_drv_ps4_force_pic) << LastPICArg->getSpelling(); } } } } } // Introduce a Darwin and PS4-specific hack. If the default is PIC, but the // PIC level would've been set to level 1, force it back to level 2 PIC // instead. if (PIC && (ToolChain.getTriple().isOSDarwin() || Triple.isPS4CPU())) IsPICLevelTwo |= ToolChain.isPICDefault(); // This kernel flags are a trump-card: they will disable PIC/PIE // generation, independent of the argument order. if (KernelOrKext && ((!Triple.isiOS() || Triple.isOSVersionLT(6)) && !Triple.isWatchOS())) PIC = PIE = false; if (Arg *A = Args.getLastArg(options::OPT_mdynamic_no_pic)) { // This is a very special mode. It trumps the other modes, almost no one // uses it, and it isn't even valid on any OS but Darwin. if (!ToolChain.getTriple().isOSDarwin()) ToolChain.getDriver().Diag(diag::err_drv_unsupported_opt_for_target) << A->getSpelling() << ToolChain.getTriple().str(); // FIXME: Warn when this flag trumps some other PIC or PIE flag. // Only a forced PIC mode can cause the actual compile to have PIC defines // etc., no flags are sufficient. This behavior was selected to closely // match that of llvm-gcc and Apple GCC before that. PIC = ToolChain.isPICDefault() && ToolChain.isPICDefaultForced(); return std::make_tuple(llvm::Reloc::DynamicNoPIC, PIC ? 2 : 0, false); } if (PIC) return std::make_tuple(llvm::Reloc::PIC_, IsPICLevelTwo ? 2 : 1, PIE); return std::make_tuple(llvm::Reloc::Static, 0, false); } static const char *RelocationModelName(llvm::Reloc::Model Model) { switch (Model) { case llvm::Reloc::Static: return "static"; case llvm::Reloc::PIC_: return "pic"; case llvm::Reloc::DynamicNoPIC: return "dynamic-no-pic"; } llvm_unreachable("Unknown Reloc::Model kind"); } static void AddAssemblerKPIC(const ToolChain &ToolChain, const ArgList &Args, ArgStringList &CmdArgs) { llvm::Reloc::Model RelocationModel; unsigned PICLevel; bool IsPIE; std::tie(RelocationModel, PICLevel, IsPIE) = ParsePICArgs(ToolChain, ToolChain.getTriple(), Args); if (RelocationModel != llvm::Reloc::Static) CmdArgs.push_back("-KPIC"); } void Clang::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { std::string TripleStr = getToolChain().ComputeEffectiveClangTriple(Args); const llvm::Triple Triple(TripleStr); bool KernelOrKext = Args.hasArg(options::OPT_mkernel, options::OPT_fapple_kext); const Driver &D = getToolChain().getDriver(); ArgStringList CmdArgs; bool IsWindowsGNU = getToolChain().getTriple().isWindowsGNUEnvironment(); bool IsWindowsCygnus = getToolChain().getTriple().isWindowsCygwinEnvironment(); bool IsWindowsMSVC = getToolChain().getTriple().isWindowsMSVCEnvironment(); bool IsPS4CPU = getToolChain().getTriple().isPS4CPU(); bool IsIAMCU = getToolChain().getTriple().isOSIAMCU(); // Check number of inputs for sanity. We need at least one input. assert(Inputs.size() >= 1 && "Must have at least one input."); const InputInfo &Input = Inputs[0]; // CUDA compilation may have multiple inputs (source file + results of // device-side compilations). All other jobs are expected to have exactly one // input. bool IsCuda = JA.isOffloading(Action::OFK_Cuda); assert((IsCuda || Inputs.size() == 1) && "Unable to handle multiple inputs."); // C++ is not supported for IAMCU. if (IsIAMCU && types::isCXX(Input.getType())) D.Diag(diag::err_drv_clang_unsupported) << "C++ for IAMCU"; // Invoke ourselves in -cc1 mode. // // FIXME: Implement custom jobs for internal actions. CmdArgs.push_back("-cc1"); // Add the "effective" target triple. CmdArgs.push_back("-triple"); CmdArgs.push_back(Args.MakeArgString(TripleStr)); if (IsCuda) { // We have to pass the triple of the host if compiling for a CUDA device and // vice-versa. std::string NormalizedTriple; if (JA.isDeviceOffloading(Action::OFK_Cuda)) NormalizedTriple = C.getSingleOffloadToolChain() ->getTriple() .normalize(); else NormalizedTriple = C.getSingleOffloadToolChain() ->getTriple() .normalize(); CmdArgs.push_back("-aux-triple"); CmdArgs.push_back(Args.MakeArgString(NormalizedTriple)); } if (Triple.isOSWindows() && (Triple.getArch() == llvm::Triple::arm || Triple.getArch() == llvm::Triple::thumb)) { unsigned Offset = Triple.getArch() == llvm::Triple::arm ? 4 : 6; unsigned Version; Triple.getArchName().substr(Offset).getAsInteger(10, Version); if (Version < 7) D.Diag(diag::err_target_unsupported_arch) << Triple.getArchName() << TripleStr; } // Push all default warning arguments that are specific to // the given target. These come before user provided warning options // are provided. getToolChain().addClangWarningOptions(CmdArgs); // Select the appropriate action. RewriteKind rewriteKind = RK_None; if (isa(JA)) { assert(JA.getType() == types::TY_Plist && "Invalid output type."); CmdArgs.push_back("-analyze"); } else if (isa(JA)) { CmdArgs.push_back("-migrate"); } else if (isa(JA)) { if (Output.getType() == types::TY_Dependencies) CmdArgs.push_back("-Eonly"); else { CmdArgs.push_back("-E"); if (Args.hasArg(options::OPT_rewrite_objc) && !Args.hasArg(options::OPT_g_Group)) CmdArgs.push_back("-P"); } } else if (isa(JA)) { CmdArgs.push_back("-emit-obj"); CollectArgsForIntegratedAssembler(C, Args, CmdArgs, D); // Also ignore explicit -force_cpusubtype_ALL option. (void)Args.hasArg(options::OPT_force__cpusubtype__ALL); } else if (isa(JA)) { // Use PCH if the user requested it. bool UsePCH = D.CCCUsePCH; if (JA.getType() == types::TY_Nothing) CmdArgs.push_back("-fsyntax-only"); else if (UsePCH) CmdArgs.push_back("-emit-pch"); else CmdArgs.push_back("-emit-pth"); } else if (isa(JA)) { CmdArgs.push_back("-verify-pch"); } else { assert((isa(JA) || isa(JA)) && "Invalid action for clang tool."); if (JA.getType() == types::TY_Nothing) { CmdArgs.push_back("-fsyntax-only"); } else if (JA.getType() == types::TY_LLVM_IR || JA.getType() == types::TY_LTO_IR) { CmdArgs.push_back("-emit-llvm"); } else if (JA.getType() == types::TY_LLVM_BC || JA.getType() == types::TY_LTO_BC) { CmdArgs.push_back("-emit-llvm-bc"); } else if (JA.getType() == types::TY_PP_Asm) { CmdArgs.push_back("-S"); } else if (JA.getType() == types::TY_AST) { CmdArgs.push_back("-emit-pch"); } else if (JA.getType() == types::TY_ModuleFile) { CmdArgs.push_back("-module-file-info"); } else if (JA.getType() == types::TY_RewrittenObjC) { CmdArgs.push_back("-rewrite-objc"); rewriteKind = RK_NonFragile; } else if (JA.getType() == types::TY_RewrittenLegacyObjC) { CmdArgs.push_back("-rewrite-objc"); rewriteKind = RK_Fragile; } else { assert(JA.getType() == types::TY_PP_Asm && "Unexpected output type!"); } // Preserve use-list order by default when emitting bitcode, so that // loading the bitcode up in 'opt' or 'llc' and running passes gives the // same result as running passes here. For LTO, we don't need to preserve // the use-list order, since serialization to bitcode is part of the flow. if (JA.getType() == types::TY_LLVM_BC) CmdArgs.push_back("-emit-llvm-uselists"); if (D.isUsingLTO()) Args.AddLastArg(CmdArgs, options::OPT_flto, options::OPT_flto_EQ); } if (const Arg *A = Args.getLastArg(options::OPT_fthinlto_index_EQ)) { if (!types::isLLVMIR(Input.getType())) D.Diag(diag::err_drv_argument_only_allowed_with) << A->getAsString(Args) << "-x ir"; Args.AddLastArg(CmdArgs, options::OPT_fthinlto_index_EQ); } // Embed-bitcode option. if (C.getDriver().embedBitcodeEnabled() && (isa(JA) || isa(JA))) { // Add flags implied by -fembed-bitcode. Args.AddLastArg(CmdArgs, options::OPT_fembed_bitcode_EQ); // Disable all llvm IR level optimizations. CmdArgs.push_back("-disable-llvm-optzns"); } if (C.getDriver().embedBitcodeMarkerOnly()) CmdArgs.push_back("-fembed-bitcode=marker"); // We normally speed up the clang process a bit by skipping destructors at // exit, but when we're generating diagnostics we can rely on some of the // cleanup. if (!C.isForDiagnostics()) CmdArgs.push_back("-disable-free"); // Disable the verification pass in -asserts builds. #ifdef NDEBUG CmdArgs.push_back("-disable-llvm-verifier"); // Discard LLVM value names in -asserts builds. CmdArgs.push_back("-discard-value-names"); #endif // Set the main file name, so that debug info works even with // -save-temps. CmdArgs.push_back("-main-file-name"); CmdArgs.push_back(getBaseInputName(Args, Input)); // Some flags which affect the language (via preprocessor // defines). if (Args.hasArg(options::OPT_static)) CmdArgs.push_back("-static-define"); if (isa(JA)) { // Enable region store model by default. CmdArgs.push_back("-analyzer-store=region"); // Treat blocks as analysis entry points. CmdArgs.push_back("-analyzer-opt-analyze-nested-blocks"); CmdArgs.push_back("-analyzer-eagerly-assume"); // Add default argument set. if (!Args.hasArg(options::OPT__analyzer_no_default_checks)) { CmdArgs.push_back("-analyzer-checker=core"); if (!IsWindowsMSVC) { CmdArgs.push_back("-analyzer-checker=unix"); } else { // Enable "unix" checkers that also work on Windows. CmdArgs.push_back("-analyzer-checker=unix.API"); CmdArgs.push_back("-analyzer-checker=unix.Malloc"); CmdArgs.push_back("-analyzer-checker=unix.MallocSizeof"); CmdArgs.push_back("-analyzer-checker=unix.MismatchedDeallocator"); CmdArgs.push_back("-analyzer-checker=unix.cstring.BadSizeArg"); CmdArgs.push_back("-analyzer-checker=unix.cstring.NullArg"); } // Disable some unix checkers for PS4. if (IsPS4CPU) { CmdArgs.push_back("-analyzer-disable-checker=unix.API"); CmdArgs.push_back("-analyzer-disable-checker=unix.Vfork"); } if (getToolChain().getTriple().getVendor() == llvm::Triple::Apple) CmdArgs.push_back("-analyzer-checker=osx"); CmdArgs.push_back("-analyzer-checker=deadcode"); if (types::isCXX(Input.getType())) CmdArgs.push_back("-analyzer-checker=cplusplus"); if (!IsPS4CPU) { CmdArgs.push_back( "-analyzer-checker=security.insecureAPI.UncheckedReturn"); CmdArgs.push_back("-analyzer-checker=security.insecureAPI.getpw"); CmdArgs.push_back("-analyzer-checker=security.insecureAPI.gets"); CmdArgs.push_back("-analyzer-checker=security.insecureAPI.mktemp"); CmdArgs.push_back("-analyzer-checker=security.insecureAPI.mkstemp"); CmdArgs.push_back("-analyzer-checker=security.insecureAPI.vfork"); } // Default nullability checks. CmdArgs.push_back("-analyzer-checker=nullability.NullPassedToNonnull"); CmdArgs.push_back( "-analyzer-checker=nullability.NullReturnedFromNonnull"); } // Set the output format. The default is plist, for (lame) historical // reasons. CmdArgs.push_back("-analyzer-output"); if (Arg *A = Args.getLastArg(options::OPT__analyzer_output)) CmdArgs.push_back(A->getValue()); else CmdArgs.push_back("plist"); // Disable the presentation of standard compiler warnings when // using --analyze. We only want to show static analyzer diagnostics // or frontend errors. CmdArgs.push_back("-w"); // Add -Xanalyzer arguments when running as analyzer. Args.AddAllArgValues(CmdArgs, options::OPT_Xanalyzer); } CheckCodeGenerationOptions(D, Args); llvm::Reloc::Model RelocationModel; unsigned PICLevel; bool IsPIE; std::tie(RelocationModel, PICLevel, IsPIE) = ParsePICArgs(getToolChain(), Triple, Args); const char *RMName = RelocationModelName(RelocationModel); if (RMName) { CmdArgs.push_back("-mrelocation-model"); CmdArgs.push_back(RMName); } if (PICLevel > 0) { CmdArgs.push_back("-pic-level"); CmdArgs.push_back(PICLevel == 1 ? "1" : "2"); if (IsPIE) CmdArgs.push_back("-pic-is-pie"); } if (Arg *A = Args.getLastArg(options::OPT_meabi)) { CmdArgs.push_back("-meabi"); CmdArgs.push_back(A->getValue()); } CmdArgs.push_back("-mthread-model"); if (Arg *A = Args.getLastArg(options::OPT_mthread_model)) CmdArgs.push_back(A->getValue()); else CmdArgs.push_back(Args.MakeArgString(getToolChain().getThreadModel())); Args.AddLastArg(CmdArgs, options::OPT_fveclib); if (!Args.hasFlag(options::OPT_fmerge_all_constants, options::OPT_fno_merge_all_constants)) CmdArgs.push_back("-fno-merge-all-constants"); // LLVM Code Generator Options. if (Args.hasArg(options::OPT_frewrite_map_file) || Args.hasArg(options::OPT_frewrite_map_file_EQ)) { for (const Arg *A : Args.filtered(options::OPT_frewrite_map_file, options::OPT_frewrite_map_file_EQ)) { CmdArgs.push_back("-frewrite-map-file"); CmdArgs.push_back(A->getValue()); A->claim(); } } if (Arg *A = Args.getLastArg(options::OPT_Wframe_larger_than_EQ)) { StringRef v = A->getValue(); CmdArgs.push_back("-mllvm"); CmdArgs.push_back(Args.MakeArgString("-warn-stack-size=" + v)); A->claim(); } if (!Args.hasFlag(options::OPT_fjump_tables, options::OPT_fno_jump_tables, true)) CmdArgs.push_back("-fno-jump-tables"); if (Arg *A = Args.getLastArg(options::OPT_mregparm_EQ)) { CmdArgs.push_back("-mregparm"); CmdArgs.push_back(A->getValue()); } if (Arg *A = Args.getLastArg(options::OPT_fpcc_struct_return, options::OPT_freg_struct_return)) { if (getToolChain().getArch() != llvm::Triple::x86) { D.Diag(diag::err_drv_unsupported_opt_for_target) << A->getSpelling() << getToolChain().getTriple().str(); } else if (A->getOption().matches(options::OPT_fpcc_struct_return)) { CmdArgs.push_back("-fpcc-struct-return"); } else { assert(A->getOption().matches(options::OPT_freg_struct_return)); CmdArgs.push_back("-freg-struct-return"); } } if (Args.hasFlag(options::OPT_mrtd, options::OPT_mno_rtd, false)) CmdArgs.push_back("-fdefault-calling-conv=stdcall"); if (shouldUseFramePointer(Args, getToolChain().getTriple())) CmdArgs.push_back("-mdisable-fp-elim"); if (!Args.hasFlag(options::OPT_fzero_initialized_in_bss, options::OPT_fno_zero_initialized_in_bss)) CmdArgs.push_back("-mno-zero-initialized-in-bss"); bool OFastEnabled = isOptimizationLevelFast(Args); // If -Ofast is the optimization level, then -fstrict-aliasing should be // enabled. This alias option is being used to simplify the hasFlag logic. OptSpecifier StrictAliasingAliasOption = OFastEnabled ? options::OPT_Ofast : options::OPT_fstrict_aliasing; // We turn strict aliasing off by default if we're in CL mode, since MSVC // doesn't do any TBAA. bool TBAAOnByDefault = !getToolChain().getDriver().IsCLMode(); if (!Args.hasFlag(options::OPT_fstrict_aliasing, StrictAliasingAliasOption, options::OPT_fno_strict_aliasing, TBAAOnByDefault)) CmdArgs.push_back("-relaxed-aliasing"); if (!Args.hasFlag(options::OPT_fstruct_path_tbaa, options::OPT_fno_struct_path_tbaa)) CmdArgs.push_back("-no-struct-path-tbaa"); if (Args.hasFlag(options::OPT_fstrict_enums, options::OPT_fno_strict_enums, false)) CmdArgs.push_back("-fstrict-enums"); if (Args.hasFlag(options::OPT_fstrict_vtable_pointers, options::OPT_fno_strict_vtable_pointers, false)) CmdArgs.push_back("-fstrict-vtable-pointers"); if (!Args.hasFlag(options::OPT_foptimize_sibling_calls, options::OPT_fno_optimize_sibling_calls)) CmdArgs.push_back("-mdisable-tail-calls"); // Handle segmented stacks. if (Args.hasArg(options::OPT_fsplit_stack)) CmdArgs.push_back("-split-stacks"); // If -Ofast is the optimization level, then -ffast-math should be enabled. // This alias option is being used to simplify the getLastArg logic. OptSpecifier FastMathAliasOption = OFastEnabled ? options::OPT_Ofast : options::OPT_ffast_math; // Handle various floating point optimization flags, mapping them to the // appropriate LLVM code generation flags. The pattern for all of these is to // default off the codegen optimizations, and if any flag enables them and no // flag disables them after the flag enabling them, enable the codegen // optimization. This is complicated by several "umbrella" flags. if (Arg *A = Args.getLastArg( options::OPT_ffast_math, FastMathAliasOption, options::OPT_fno_fast_math, options::OPT_ffinite_math_only, options::OPT_fno_finite_math_only, options::OPT_fhonor_infinities, options::OPT_fno_honor_infinities)) if (A->getOption().getID() != options::OPT_fno_fast_math && A->getOption().getID() != options::OPT_fno_finite_math_only && A->getOption().getID() != options::OPT_fhonor_infinities) CmdArgs.push_back("-menable-no-infs"); if (Arg *A = Args.getLastArg( options::OPT_ffast_math, FastMathAliasOption, options::OPT_fno_fast_math, options::OPT_ffinite_math_only, options::OPT_fno_finite_math_only, options::OPT_fhonor_nans, options::OPT_fno_honor_nans)) if (A->getOption().getID() != options::OPT_fno_fast_math && A->getOption().getID() != options::OPT_fno_finite_math_only && A->getOption().getID() != options::OPT_fhonor_nans) CmdArgs.push_back("-menable-no-nans"); // -fmath-errno is the default on some platforms, e.g. BSD-derived OSes. bool MathErrno = getToolChain().IsMathErrnoDefault(); if (Arg *A = Args.getLastArg(options::OPT_ffast_math, FastMathAliasOption, options::OPT_fno_fast_math, options::OPT_fmath_errno, options::OPT_fno_math_errno)) { // Turning on -ffast_math (with either flag) removes the need for MathErrno. // However, turning *off* -ffast_math merely restores the toolchain default // (which may be false). if (A->getOption().getID() == options::OPT_fno_math_errno || A->getOption().getID() == options::OPT_ffast_math || A->getOption().getID() == options::OPT_Ofast) MathErrno = false; else if (A->getOption().getID() == options::OPT_fmath_errno) MathErrno = true; } if (MathErrno) CmdArgs.push_back("-fmath-errno"); // There are several flags which require disabling very specific // optimizations. Any of these being disabled forces us to turn off the // entire set of LLVM optimizations, so collect them through all the flag // madness. bool AssociativeMath = false; if (Arg *A = Args.getLastArg( options::OPT_ffast_math, FastMathAliasOption, options::OPT_fno_fast_math, options::OPT_funsafe_math_optimizations, options::OPT_fno_unsafe_math_optimizations, options::OPT_fassociative_math, options::OPT_fno_associative_math)) if (A->getOption().getID() != options::OPT_fno_fast_math && A->getOption().getID() != options::OPT_fno_unsafe_math_optimizations && A->getOption().getID() != options::OPT_fno_associative_math) AssociativeMath = true; bool ReciprocalMath = false; if (Arg *A = Args.getLastArg( options::OPT_ffast_math, FastMathAliasOption, options::OPT_fno_fast_math, options::OPT_funsafe_math_optimizations, options::OPT_fno_unsafe_math_optimizations, options::OPT_freciprocal_math, options::OPT_fno_reciprocal_math)) if (A->getOption().getID() != options::OPT_fno_fast_math && A->getOption().getID() != options::OPT_fno_unsafe_math_optimizations && A->getOption().getID() != options::OPT_fno_reciprocal_math) ReciprocalMath = true; bool SignedZeros = true; if (Arg *A = Args.getLastArg( options::OPT_ffast_math, FastMathAliasOption, options::OPT_fno_fast_math, options::OPT_funsafe_math_optimizations, options::OPT_fno_unsafe_math_optimizations, options::OPT_fsigned_zeros, options::OPT_fno_signed_zeros)) if (A->getOption().getID() != options::OPT_fno_fast_math && A->getOption().getID() != options::OPT_fno_unsafe_math_optimizations && A->getOption().getID() != options::OPT_fsigned_zeros) SignedZeros = false; bool TrappingMath = true; if (Arg *A = Args.getLastArg( options::OPT_ffast_math, FastMathAliasOption, options::OPT_fno_fast_math, options::OPT_funsafe_math_optimizations, options::OPT_fno_unsafe_math_optimizations, options::OPT_ftrapping_math, options::OPT_fno_trapping_math)) if (A->getOption().getID() != options::OPT_fno_fast_math && A->getOption().getID() != options::OPT_fno_unsafe_math_optimizations && A->getOption().getID() != options::OPT_ftrapping_math) TrappingMath = false; if (!MathErrno && AssociativeMath && ReciprocalMath && !SignedZeros && !TrappingMath) CmdArgs.push_back("-menable-unsafe-fp-math"); if (!SignedZeros) CmdArgs.push_back("-fno-signed-zeros"); if (ReciprocalMath) CmdArgs.push_back("-freciprocal-math"); // Validate and pass through -fp-contract option. if (Arg *A = Args.getLastArg(options::OPT_ffast_math, FastMathAliasOption, options::OPT_fno_fast_math, options::OPT_ffp_contract)) { if (A->getOption().getID() == options::OPT_ffp_contract) { StringRef Val = A->getValue(); if (Val == "fast" || Val == "on" || Val == "off") { CmdArgs.push_back(Args.MakeArgString("-ffp-contract=" + Val)); } else { D.Diag(diag::err_drv_unsupported_option_argument) << A->getOption().getName() << Val; } } else if (A->getOption().matches(options::OPT_ffast_math) || (OFastEnabled && A->getOption().matches(options::OPT_Ofast))) { // If fast-math is set then set the fp-contract mode to fast. CmdArgs.push_back(Args.MakeArgString("-ffp-contract=fast")); } } ParseMRecip(getToolChain().getDriver(), Args, CmdArgs); // We separately look for the '-ffast-math' and '-ffinite-math-only' flags, // and if we find them, tell the frontend to provide the appropriate // preprocessor macros. This is distinct from enabling any optimizations as // these options induce language changes which must survive serialization // and deserialization, etc. if (Arg *A = Args.getLastArg(options::OPT_ffast_math, FastMathAliasOption, options::OPT_fno_fast_math)) if (!A->getOption().matches(options::OPT_fno_fast_math)) CmdArgs.push_back("-ffast-math"); if (Arg *A = Args.getLastArg(options::OPT_ffinite_math_only, options::OPT_fno_fast_math)) if (A->getOption().matches(options::OPT_ffinite_math_only)) CmdArgs.push_back("-ffinite-math-only"); // Decide whether to use verbose asm. Verbose assembly is the default on // toolchains which have the integrated assembler on by default. bool IsIntegratedAssemblerDefault = getToolChain().IsIntegratedAssemblerDefault(); if (Args.hasFlag(options::OPT_fverbose_asm, options::OPT_fno_verbose_asm, IsIntegratedAssemblerDefault) || Args.hasArg(options::OPT_dA)) CmdArgs.push_back("-masm-verbose"); if (!Args.hasFlag(options::OPT_fintegrated_as, options::OPT_fno_integrated_as, IsIntegratedAssemblerDefault)) CmdArgs.push_back("-no-integrated-as"); if (Args.hasArg(options::OPT_fdebug_pass_structure)) { CmdArgs.push_back("-mdebug-pass"); CmdArgs.push_back("Structure"); } if (Args.hasArg(options::OPT_fdebug_pass_arguments)) { CmdArgs.push_back("-mdebug-pass"); CmdArgs.push_back("Arguments"); } // Enable -mconstructor-aliases except on darwin, where we have to work around // a linker bug (see ), and CUDA device code, where // aliases aren't supported. if (!getToolChain().getTriple().isOSDarwin() && !getToolChain().getTriple().isNVPTX()) CmdArgs.push_back("-mconstructor-aliases"); // Darwin's kernel doesn't support guard variables; just die if we // try to use them. if (KernelOrKext && getToolChain().getTriple().isOSDarwin()) CmdArgs.push_back("-fforbid-guard-variables"); if (Args.hasFlag(options::OPT_mms_bitfields, options::OPT_mno_ms_bitfields, false)) { CmdArgs.push_back("-mms-bitfields"); } // This is a coarse approximation of what llvm-gcc actually does, both // -fasynchronous-unwind-tables and -fnon-call-exceptions interact in more // complicated ways. bool AsynchronousUnwindTables = Args.hasFlag(options::OPT_fasynchronous_unwind_tables, options::OPT_fno_asynchronous_unwind_tables, (getToolChain().IsUnwindTablesDefault() || getToolChain().getSanitizerArgs().needsUnwindTables()) && !KernelOrKext); if (Args.hasFlag(options::OPT_funwind_tables, options::OPT_fno_unwind_tables, AsynchronousUnwindTables)) CmdArgs.push_back("-munwind-tables"); getToolChain().addClangTargetOptions(Args, CmdArgs); if (Arg *A = Args.getLastArg(options::OPT_flimited_precision_EQ)) { CmdArgs.push_back("-mlimit-float-precision"); CmdArgs.push_back(A->getValue()); } // FIXME: Handle -mtune=. (void)Args.hasArg(options::OPT_mtune_EQ); if (Arg *A = Args.getLastArg(options::OPT_mcmodel_EQ)) { CmdArgs.push_back("-mcode-model"); CmdArgs.push_back(A->getValue()); } // Add the target cpu std::string CPU = getCPUName(Args, Triple, /*FromAs*/ false); if (!CPU.empty()) { CmdArgs.push_back("-target-cpu"); CmdArgs.push_back(Args.MakeArgString(CPU)); } if (const Arg *A = Args.getLastArg(options::OPT_mfpmath_EQ)) { CmdArgs.push_back("-mfpmath"); CmdArgs.push_back(A->getValue()); } // Add the target features getTargetFeatures(getToolChain(), Triple, Args, CmdArgs, false); // Add target specific flags. switch (getToolChain().getArch()) { default: break; case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: // Use the effective triple, which takes into account the deployment target. AddARMTargetArgs(Triple, Args, CmdArgs, KernelOrKext); break; case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: AddAArch64TargetArgs(Args, CmdArgs); break; case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::mips64: case llvm::Triple::mips64el: AddMIPSTargetArgs(Args, CmdArgs); break; case llvm::Triple::ppc: case llvm::Triple::ppc64: case llvm::Triple::ppc64le: AddPPCTargetArgs(Args, CmdArgs); break; case llvm::Triple::sparc: case llvm::Triple::sparcel: case llvm::Triple::sparcv9: AddSparcTargetArgs(Args, CmdArgs); break; case llvm::Triple::systemz: AddSystemZTargetArgs(Args, CmdArgs); break; case llvm::Triple::x86: case llvm::Triple::x86_64: AddX86TargetArgs(Args, CmdArgs); break; case llvm::Triple::lanai: AddLanaiTargetArgs(Args, CmdArgs); break; case llvm::Triple::hexagon: AddHexagonTargetArgs(Args, CmdArgs); break; case llvm::Triple::wasm32: case llvm::Triple::wasm64: AddWebAssemblyTargetArgs(Args, CmdArgs); break; } // The 'g' groups options involve a somewhat intricate sequence of decisions // about what to pass from the driver to the frontend, but by the time they // reach cc1 they've been factored into three well-defined orthogonal choices: // * what level of debug info to generate // * what dwarf version to write // * what debugger tuning to use // This avoids having to monkey around further in cc1 other than to disable // codeview if not running in a Windows environment. Perhaps even that // decision should be made in the driver as well though. unsigned DwarfVersion = 0; llvm::DebuggerKind DebuggerTuning = getToolChain().getDefaultDebuggerTuning(); // These two are potentially updated by AddClangCLArgs. codegenoptions::DebugInfoKind DebugInfoKind = codegenoptions::NoDebugInfo; bool EmitCodeView = false; // Add clang-cl arguments. types::ID InputType = Input.getType(); if (getToolChain().getDriver().IsCLMode()) AddClangCLArgs(Args, InputType, CmdArgs, &DebugInfoKind, &EmitCodeView); // Pass the linker version in use. if (Arg *A = Args.getLastArg(options::OPT_mlinker_version_EQ)) { CmdArgs.push_back("-target-linker-version"); CmdArgs.push_back(A->getValue()); } if (!shouldUseLeafFramePointer(Args, getToolChain().getTriple())) CmdArgs.push_back("-momit-leaf-frame-pointer"); // Explicitly error on some things we know we don't support and can't just // ignore. if (!Args.hasArg(options::OPT_fallow_unsupported)) { Arg *Unsupported; if (types::isCXX(InputType) && getToolChain().getTriple().isOSDarwin() && getToolChain().getArch() == llvm::Triple::x86) { if ((Unsupported = Args.getLastArg(options::OPT_fapple_kext)) || (Unsupported = Args.getLastArg(options::OPT_mkernel))) D.Diag(diag::err_drv_clang_unsupported_opt_cxx_darwin_i386) << Unsupported->getOption().getName(); } } Args.AddAllArgs(CmdArgs, options::OPT_v); Args.AddLastArg(CmdArgs, options::OPT_H); if (D.CCPrintHeaders && !D.CCGenDiagnostics) { CmdArgs.push_back("-header-include-file"); CmdArgs.push_back(D.CCPrintHeadersFilename ? D.CCPrintHeadersFilename : "-"); } Args.AddLastArg(CmdArgs, options::OPT_P); Args.AddLastArg(CmdArgs, options::OPT_print_ivar_layout); if (D.CCLogDiagnostics && !D.CCGenDiagnostics) { CmdArgs.push_back("-diagnostic-log-file"); CmdArgs.push_back(D.CCLogDiagnosticsFilename ? D.CCLogDiagnosticsFilename : "-"); } Args.ClaimAllArgs(options::OPT_g_Group); Arg *SplitDwarfArg = Args.getLastArg(options::OPT_gsplit_dwarf); if (Arg *A = Args.getLastArg(options::OPT_g_Group)) { // If the last option explicitly specified a debug-info level, use it. if (A->getOption().matches(options::OPT_gN_Group)) { DebugInfoKind = DebugLevelToInfoKind(*A); // If you say "-gsplit-dwarf -gline-tables-only", -gsplit-dwarf loses. // But -gsplit-dwarf is not a g_group option, hence we have to check the // order explicitly. (If -gsplit-dwarf wins, we fix DebugInfoKind later.) if (SplitDwarfArg && DebugInfoKind < codegenoptions::LimitedDebugInfo && A->getIndex() > SplitDwarfArg->getIndex()) SplitDwarfArg = nullptr; } else // For any other 'g' option, use Limited. DebugInfoKind = codegenoptions::LimitedDebugInfo; } // If a debugger tuning argument appeared, remember it. if (Arg *A = Args.getLastArg(options::OPT_gTune_Group, options::OPT_ggdbN_Group)) { if (A->getOption().matches(options::OPT_glldb)) DebuggerTuning = llvm::DebuggerKind::LLDB; else if (A->getOption().matches(options::OPT_gsce)) DebuggerTuning = llvm::DebuggerKind::SCE; else DebuggerTuning = llvm::DebuggerKind::GDB; } // If a -gdwarf argument appeared, remember it. if (Arg *A = Args.getLastArg(options::OPT_gdwarf_2, options::OPT_gdwarf_3, options::OPT_gdwarf_4, options::OPT_gdwarf_5)) DwarfVersion = DwarfVersionNum(A->getSpelling()); // Forward -gcodeview. EmitCodeView might have been set by CL-compatibility // argument parsing. if (Args.hasArg(options::OPT_gcodeview) || EmitCodeView) { // DwarfVersion remains at 0 if no explicit choice was made. CmdArgs.push_back("-gcodeview"); } else if (DwarfVersion == 0 && DebugInfoKind != codegenoptions::NoDebugInfo) { DwarfVersion = getToolChain().GetDefaultDwarfVersion(); } // We ignore flags -gstrict-dwarf and -grecord-gcc-switches for now. Args.ClaimAllArgs(options::OPT_g_flags_Group); // PS4 defaults to no column info if (Args.hasFlag(options::OPT_gcolumn_info, options::OPT_gno_column_info, /*Default=*/ !IsPS4CPU)) CmdArgs.push_back("-dwarf-column-info"); // FIXME: Move backend command line options to the module. if (Args.hasArg(options::OPT_gmodules)) { DebugInfoKind = codegenoptions::LimitedDebugInfo; CmdArgs.push_back("-dwarf-ext-refs"); CmdArgs.push_back("-fmodule-format=obj"); } // -gsplit-dwarf should turn on -g and enable the backend dwarf // splitting and extraction. // FIXME: Currently only works on Linux. if (getToolChain().getTriple().isOSLinux() && SplitDwarfArg) { DebugInfoKind = codegenoptions::LimitedDebugInfo; CmdArgs.push_back("-backend-option"); CmdArgs.push_back("-split-dwarf=Enable"); } // After we've dealt with all combinations of things that could // make DebugInfoKind be other than None or DebugLineTablesOnly, // figure out if we need to "upgrade" it to standalone debug info. // We parse these two '-f' options whether or not they will be used, // to claim them even if you wrote "-fstandalone-debug -gline-tables-only" bool NeedFullDebug = Args.hasFlag(options::OPT_fstandalone_debug, options::OPT_fno_standalone_debug, getToolChain().GetDefaultStandaloneDebug()); if (DebugInfoKind == codegenoptions::LimitedDebugInfo && NeedFullDebug) DebugInfoKind = codegenoptions::FullDebugInfo; RenderDebugEnablingArgs(Args, CmdArgs, DebugInfoKind, DwarfVersion, DebuggerTuning); // -ggnu-pubnames turns on gnu style pubnames in the backend. if (Args.hasArg(options::OPT_ggnu_pubnames)) { CmdArgs.push_back("-backend-option"); CmdArgs.push_back("-generate-gnu-dwarf-pub-sections"); } // -gdwarf-aranges turns on the emission of the aranges section in the // backend. // Always enabled on the PS4. if (Args.hasArg(options::OPT_gdwarf_aranges) || IsPS4CPU) { CmdArgs.push_back("-backend-option"); CmdArgs.push_back("-generate-arange-section"); } if (Args.hasFlag(options::OPT_fdebug_types_section, options::OPT_fno_debug_types_section, false)) { CmdArgs.push_back("-backend-option"); CmdArgs.push_back("-generate-type-units"); } // CloudABI and WebAssembly use -ffunction-sections and -fdata-sections by // default. bool UseSeparateSections = Triple.getOS() == llvm::Triple::CloudABI || Triple.getArch() == llvm::Triple::wasm32 || Triple.getArch() == llvm::Triple::wasm64; if (Args.hasFlag(options::OPT_ffunction_sections, options::OPT_fno_function_sections, UseSeparateSections)) { CmdArgs.push_back("-ffunction-sections"); } if (Args.hasFlag(options::OPT_fdata_sections, options::OPT_fno_data_sections, UseSeparateSections)) { CmdArgs.push_back("-fdata-sections"); } if (!Args.hasFlag(options::OPT_funique_section_names, options::OPT_fno_unique_section_names, true)) CmdArgs.push_back("-fno-unique-section-names"); Args.AddAllArgs(CmdArgs, options::OPT_finstrument_functions); if (Args.hasFlag(options::OPT_fxray_instrument, options::OPT_fnoxray_instrument, false)) { CmdArgs.push_back("-fxray-instrument"); if (const Arg *A = Args.getLastArg(options::OPT_fxray_instruction_threshold_, options::OPT_fxray_instruction_threshold_EQ)) { CmdArgs.push_back("-fxray-instruction-threshold"); CmdArgs.push_back(A->getValue()); } } addPGOAndCoverageFlags(C, D, Output, Args, CmdArgs); // Add runtime flag for PS4 when PGO or Coverage are enabled. if (getToolChain().getTriple().isPS4CPU()) addPS4ProfileRTArgs(getToolChain(), Args, CmdArgs); // Pass options for controlling the default header search paths. if (Args.hasArg(options::OPT_nostdinc)) { CmdArgs.push_back("-nostdsysteminc"); CmdArgs.push_back("-nobuiltininc"); } else { if (Args.hasArg(options::OPT_nostdlibinc)) CmdArgs.push_back("-nostdsysteminc"); Args.AddLastArg(CmdArgs, options::OPT_nostdincxx); Args.AddLastArg(CmdArgs, options::OPT_nobuiltininc); } // Pass the path to compiler resource files. CmdArgs.push_back("-resource-dir"); CmdArgs.push_back(D.ResourceDir.c_str()); Args.AddLastArg(CmdArgs, options::OPT_working_directory); bool ARCMTEnabled = false; if (!Args.hasArg(options::OPT_fno_objc_arc, options::OPT_fobjc_arc)) { if (const Arg *A = Args.getLastArg(options::OPT_ccc_arcmt_check, options::OPT_ccc_arcmt_modify, options::OPT_ccc_arcmt_migrate)) { ARCMTEnabled = true; switch (A->getOption().getID()) { default: llvm_unreachable("missed a case"); case options::OPT_ccc_arcmt_check: CmdArgs.push_back("-arcmt-check"); break; case options::OPT_ccc_arcmt_modify: CmdArgs.push_back("-arcmt-modify"); break; case options::OPT_ccc_arcmt_migrate: CmdArgs.push_back("-arcmt-migrate"); CmdArgs.push_back("-mt-migrate-directory"); CmdArgs.push_back(A->getValue()); Args.AddLastArg(CmdArgs, options::OPT_arcmt_migrate_report_output); Args.AddLastArg(CmdArgs, options::OPT_arcmt_migrate_emit_arc_errors); break; } } } else { Args.ClaimAllArgs(options::OPT_ccc_arcmt_check); Args.ClaimAllArgs(options::OPT_ccc_arcmt_modify); Args.ClaimAllArgs(options::OPT_ccc_arcmt_migrate); } if (const Arg *A = Args.getLastArg(options::OPT_ccc_objcmt_migrate)) { if (ARCMTEnabled) { D.Diag(diag::err_drv_argument_not_allowed_with) << A->getAsString(Args) << "-ccc-arcmt-migrate"; } CmdArgs.push_back("-mt-migrate-directory"); CmdArgs.push_back(A->getValue()); if (!Args.hasArg(options::OPT_objcmt_migrate_literals, options::OPT_objcmt_migrate_subscripting, options::OPT_objcmt_migrate_property)) { // None specified, means enable them all. CmdArgs.push_back("-objcmt-migrate-literals"); CmdArgs.push_back("-objcmt-migrate-subscripting"); CmdArgs.push_back("-objcmt-migrate-property"); } else { Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_literals); Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_subscripting); Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_property); } } else { Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_literals); Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_subscripting); Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_property); Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_all); Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_readonly_property); Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_readwrite_property); Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_property_dot_syntax); Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_annotation); Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_instancetype); Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_nsmacros); Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_protocol_conformance); Args.AddLastArg(CmdArgs, options::OPT_objcmt_atomic_property); Args.AddLastArg(CmdArgs, options::OPT_objcmt_returns_innerpointer_property); Args.AddLastArg(CmdArgs, options::OPT_objcmt_ns_nonatomic_iosonly); Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_designated_init); Args.AddLastArg(CmdArgs, options::OPT_objcmt_whitelist_dir_path); } // Add preprocessing options like -I, -D, etc. if we are using the // preprocessor. // // FIXME: Support -fpreprocessed if (types::getPreprocessedType(InputType) != types::TY_INVALID) AddPreprocessingOptions(C, JA, D, Args, CmdArgs, Output, Inputs); // Don't warn about "clang -c -DPIC -fPIC test.i" because libtool.m4 assumes // that "The compiler can only warn and ignore the option if not recognized". // When building with ccache, it will pass -D options to clang even on // preprocessed inputs and configure concludes that -fPIC is not supported. Args.ClaimAllArgs(options::OPT_D); // Manually translate -O4 to -O3; let clang reject others. if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { if (A->getOption().matches(options::OPT_O4)) { CmdArgs.push_back("-O3"); D.Diag(diag::warn_O4_is_O3); } else { A->render(Args, CmdArgs); } } // Warn about ignored options to clang. for (const Arg *A : Args.filtered(options::OPT_clang_ignored_gcc_optimization_f_Group)) { D.Diag(diag::warn_ignored_gcc_optimization) << A->getAsString(Args); A->claim(); } claimNoWarnArgs(Args); Args.AddAllArgs(CmdArgs, options::OPT_R_Group); Args.AddAllArgs(CmdArgs, options::OPT_W_Group); if (Args.hasFlag(options::OPT_pedantic, options::OPT_no_pedantic, false)) CmdArgs.push_back("-pedantic"); Args.AddLastArg(CmdArgs, options::OPT_pedantic_errors); Args.AddLastArg(CmdArgs, options::OPT_w); // Handle -{std, ansi, trigraphs} -- take the last of -{std, ansi} // (-ansi is equivalent to -std=c89 or -std=c++98). // // If a std is supplied, only add -trigraphs if it follows the // option. bool ImplyVCPPCXXVer = false; if (Arg *Std = Args.getLastArg(options::OPT_std_EQ, options::OPT_ansi)) { if (Std->getOption().matches(options::OPT_ansi)) if (types::isCXX(InputType)) CmdArgs.push_back("-std=c++98"); else CmdArgs.push_back("-std=c89"); else Std->render(Args, CmdArgs); // If -f(no-)trigraphs appears after the language standard flag, honor it. if (Arg *A = Args.getLastArg(options::OPT_std_EQ, options::OPT_ansi, options::OPT_ftrigraphs, options::OPT_fno_trigraphs)) if (A != Std) A->render(Args, CmdArgs); } else { // Honor -std-default. // // FIXME: Clang doesn't correctly handle -std= when the input language // doesn't match. For the time being just ignore this for C++ inputs; // eventually we want to do all the standard defaulting here instead of // splitting it between the driver and clang -cc1. if (!types::isCXX(InputType)) Args.AddAllArgsTranslated(CmdArgs, options::OPT_std_default_EQ, "-std=", /*Joined=*/true); else if (IsWindowsMSVC) ImplyVCPPCXXVer = true; Args.AddLastArg(CmdArgs, options::OPT_ftrigraphs, options::OPT_fno_trigraphs); } // GCC's behavior for -Wwrite-strings is a bit strange: // * In C, this "warning flag" changes the types of string literals from // 'char[N]' to 'const char[N]', and thus triggers an unrelated warning // for the discarded qualifier. // * In C++, this is just a normal warning flag. // // Implementing this warning correctly in C is hard, so we follow GCC's // behavior for now. FIXME: Directly diagnose uses of a string literal as // a non-const char* in C, rather than using this crude hack. if (!types::isCXX(InputType)) { // FIXME: This should behave just like a warning flag, and thus should also // respect -Weverything, -Wno-everything, -Werror=write-strings, and so on. Arg *WriteStrings = Args.getLastArg(options::OPT_Wwrite_strings, options::OPT_Wno_write_strings, options::OPT_w); if (WriteStrings && WriteStrings->getOption().matches(options::OPT_Wwrite_strings)) CmdArgs.push_back("-fconst-strings"); } // GCC provides a macro definition '__DEPRECATED' when -Wdeprecated is active // during C++ compilation, which it is by default. GCC keeps this define even // in the presence of '-w', match this behavior bug-for-bug. if (types::isCXX(InputType) && Args.hasFlag(options::OPT_Wdeprecated, options::OPT_Wno_deprecated, true)) { CmdArgs.push_back("-fdeprecated-macro"); } // Translate GCC's misnamer '-fasm' arguments to '-fgnu-keywords'. if (Arg *Asm = Args.getLastArg(options::OPT_fasm, options::OPT_fno_asm)) { if (Asm->getOption().matches(options::OPT_fasm)) CmdArgs.push_back("-fgnu-keywords"); else CmdArgs.push_back("-fno-gnu-keywords"); } if (ShouldDisableDwarfDirectory(Args, getToolChain())) CmdArgs.push_back("-fno-dwarf-directory-asm"); if (ShouldDisableAutolink(Args, getToolChain())) CmdArgs.push_back("-fno-autolink"); // Add in -fdebug-compilation-dir if necessary. addDebugCompDirArg(Args, CmdArgs); for (const Arg *A : Args.filtered(options::OPT_fdebug_prefix_map_EQ)) { StringRef Map = A->getValue(); if (Map.find('=') == StringRef::npos) D.Diag(diag::err_drv_invalid_argument_to_fdebug_prefix_map) << Map; else CmdArgs.push_back(Args.MakeArgString("-fdebug-prefix-map=" + Map)); A->claim(); } if (Arg *A = Args.getLastArg(options::OPT_ftemplate_depth_, options::OPT_ftemplate_depth_EQ)) { CmdArgs.push_back("-ftemplate-depth"); CmdArgs.push_back(A->getValue()); } if (Arg *A = Args.getLastArg(options::OPT_foperator_arrow_depth_EQ)) { CmdArgs.push_back("-foperator-arrow-depth"); CmdArgs.push_back(A->getValue()); } if (Arg *A = Args.getLastArg(options::OPT_fconstexpr_depth_EQ)) { CmdArgs.push_back("-fconstexpr-depth"); CmdArgs.push_back(A->getValue()); } if (Arg *A = Args.getLastArg(options::OPT_fconstexpr_steps_EQ)) { CmdArgs.push_back("-fconstexpr-steps"); CmdArgs.push_back(A->getValue()); } if (Arg *A = Args.getLastArg(options::OPT_fbracket_depth_EQ)) { CmdArgs.push_back("-fbracket-depth"); CmdArgs.push_back(A->getValue()); } if (Arg *A = Args.getLastArg(options::OPT_Wlarge_by_value_copy_EQ, options::OPT_Wlarge_by_value_copy_def)) { if (A->getNumValues()) { StringRef bytes = A->getValue(); CmdArgs.push_back(Args.MakeArgString("-Wlarge-by-value-copy=" + bytes)); } else CmdArgs.push_back("-Wlarge-by-value-copy=64"); // default value } if (Args.hasArg(options::OPT_relocatable_pch)) CmdArgs.push_back("-relocatable-pch"); if (Arg *A = Args.getLastArg(options::OPT_fconstant_string_class_EQ)) { CmdArgs.push_back("-fconstant-string-class"); CmdArgs.push_back(A->getValue()); } if (Arg *A = Args.getLastArg(options::OPT_ftabstop_EQ)) { CmdArgs.push_back("-ftabstop"); CmdArgs.push_back(A->getValue()); } CmdArgs.push_back("-ferror-limit"); if (Arg *A = Args.getLastArg(options::OPT_ferror_limit_EQ)) CmdArgs.push_back(A->getValue()); else CmdArgs.push_back("19"); if (Arg *A = Args.getLastArg(options::OPT_fmacro_backtrace_limit_EQ)) { CmdArgs.push_back("-fmacro-backtrace-limit"); CmdArgs.push_back(A->getValue()); } if (Arg *A = Args.getLastArg(options::OPT_ftemplate_backtrace_limit_EQ)) { CmdArgs.push_back("-ftemplate-backtrace-limit"); CmdArgs.push_back(A->getValue()); } if (Arg *A = Args.getLastArg(options::OPT_fconstexpr_backtrace_limit_EQ)) { CmdArgs.push_back("-fconstexpr-backtrace-limit"); CmdArgs.push_back(A->getValue()); } if (Arg *A = Args.getLastArg(options::OPT_fspell_checking_limit_EQ)) { CmdArgs.push_back("-fspell-checking-limit"); CmdArgs.push_back(A->getValue()); } // Pass -fmessage-length=. CmdArgs.push_back("-fmessage-length"); if (Arg *A = Args.getLastArg(options::OPT_fmessage_length_EQ)) { CmdArgs.push_back(A->getValue()); } else { // If -fmessage-length=N was not specified, determine whether this is a // terminal and, if so, implicitly define -fmessage-length appropriately. unsigned N = llvm::sys::Process::StandardErrColumns(); CmdArgs.push_back(Args.MakeArgString(Twine(N))); } // -fvisibility= and -fvisibility-ms-compat are of a piece. if (const Arg *A = Args.getLastArg(options::OPT_fvisibility_EQ, options::OPT_fvisibility_ms_compat)) { if (A->getOption().matches(options::OPT_fvisibility_EQ)) { CmdArgs.push_back("-fvisibility"); CmdArgs.push_back(A->getValue()); } else { assert(A->getOption().matches(options::OPT_fvisibility_ms_compat)); CmdArgs.push_back("-fvisibility"); CmdArgs.push_back("hidden"); CmdArgs.push_back("-ftype-visibility"); CmdArgs.push_back("default"); } } Args.AddLastArg(CmdArgs, options::OPT_fvisibility_inlines_hidden); Args.AddLastArg(CmdArgs, options::OPT_ftlsmodel_EQ); // -fhosted is default. if (Args.hasFlag(options::OPT_ffreestanding, options::OPT_fhosted, false) || KernelOrKext) CmdArgs.push_back("-ffreestanding"); // Forward -f (flag) options which we can pass directly. Args.AddLastArg(CmdArgs, options::OPT_femit_all_decls); Args.AddLastArg(CmdArgs, options::OPT_fheinous_gnu_extensions); Args.AddLastArg(CmdArgs, options::OPT_fno_operator_names); // Emulated TLS is enabled by default on Android, and can be enabled manually // with -femulated-tls. bool EmulatedTLSDefault = Triple.isAndroid() || Triple.isWindowsCygwinEnvironment(); if (Args.hasFlag(options::OPT_femulated_tls, options::OPT_fno_emulated_tls, EmulatedTLSDefault)) CmdArgs.push_back("-femulated-tls"); // AltiVec-like language extensions aren't relevant for assembling. if (!isa(JA) || Output.getType() != types::TY_PP_Asm) { Args.AddLastArg(CmdArgs, options::OPT_faltivec); Args.AddLastArg(CmdArgs, options::OPT_fzvector); } Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_show_template_tree); Args.AddLastArg(CmdArgs, options::OPT_fno_elide_type); // Forward flags for OpenMP. We don't do this if the current action is an // device offloading action. // // TODO: Allow OpenMP offload actions when they become available. if (Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, options::OPT_fno_openmp, false) && JA.isDeviceOffloading(Action::OFK_None)) { switch (getOpenMPRuntime(getToolChain(), Args)) { case OMPRT_OMP: case OMPRT_IOMP5: // Clang can generate useful OpenMP code for these two runtime libraries. CmdArgs.push_back("-fopenmp"); // If no option regarding the use of TLS in OpenMP codegeneration is // given, decide a default based on the target. Otherwise rely on the // options and pass the right information to the frontend. if (!Args.hasFlag(options::OPT_fopenmp_use_tls, options::OPT_fnoopenmp_use_tls, /*Default=*/true)) CmdArgs.push_back("-fnoopenmp-use-tls"); Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_version_EQ); break; default: // By default, if Clang doesn't know how to generate useful OpenMP code // for a specific runtime library, we just don't pass the '-fopenmp' flag // down to the actual compilation. // FIXME: It would be better to have a mode which *only* omits IR // generation based on the OpenMP support so that we get consistent // semantic analysis, etc. break; } } const SanitizerArgs &Sanitize = getToolChain().getSanitizerArgs(); Sanitize.addArgs(getToolChain(), Args, CmdArgs, InputType); // Report an error for -faltivec on anything other than PowerPC. if (const Arg *A = Args.getLastArg(options::OPT_faltivec)) { const llvm::Triple::ArchType Arch = getToolChain().getArch(); if (!(Arch == llvm::Triple::ppc || Arch == llvm::Triple::ppc64 || Arch == llvm::Triple::ppc64le)) D.Diag(diag::err_drv_argument_only_allowed_with) << A->getAsString(Args) << "ppc/ppc64/ppc64le"; } // -fzvector is incompatible with -faltivec. if (Arg *A = Args.getLastArg(options::OPT_fzvector)) if (Args.hasArg(options::OPT_faltivec)) D.Diag(diag::err_drv_argument_not_allowed_with) << A->getAsString(Args) << "-faltivec"; if (getToolChain().SupportsProfiling()) Args.AddLastArg(CmdArgs, options::OPT_pg); // -flax-vector-conversions is default. if (!Args.hasFlag(options::OPT_flax_vector_conversions, options::OPT_fno_lax_vector_conversions)) CmdArgs.push_back("-fno-lax-vector-conversions"); if (Args.getLastArg(options::OPT_fapple_kext) || (Args.hasArg(options::OPT_mkernel) && types::isCXX(InputType))) CmdArgs.push_back("-fapple-kext"); Args.AddLastArg(CmdArgs, options::OPT_fobjc_sender_dependent_dispatch); Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_print_source_range_info); Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_parseable_fixits); Args.AddLastArg(CmdArgs, options::OPT_ftime_report); Args.AddLastArg(CmdArgs, options::OPT_ftrapv); if (Arg *A = Args.getLastArg(options::OPT_ftrapv_handler_EQ)) { CmdArgs.push_back("-ftrapv-handler"); CmdArgs.push_back(A->getValue()); } Args.AddLastArg(CmdArgs, options::OPT_ftrap_function_EQ); // -fno-strict-overflow implies -fwrapv if it isn't disabled, but // -fstrict-overflow won't turn off an explicitly enabled -fwrapv. if (Arg *A = Args.getLastArg(options::OPT_fwrapv, options::OPT_fno_wrapv)) { if (A->getOption().matches(options::OPT_fwrapv)) CmdArgs.push_back("-fwrapv"); } else if (Arg *A = Args.getLastArg(options::OPT_fstrict_overflow, options::OPT_fno_strict_overflow)) { if (A->getOption().matches(options::OPT_fno_strict_overflow)) CmdArgs.push_back("-fwrapv"); } if (Arg *A = Args.getLastArg(options::OPT_freroll_loops, options::OPT_fno_reroll_loops)) if (A->getOption().matches(options::OPT_freroll_loops)) CmdArgs.push_back("-freroll-loops"); Args.AddLastArg(CmdArgs, options::OPT_fwritable_strings); Args.AddLastArg(CmdArgs, options::OPT_funroll_loops, options::OPT_fno_unroll_loops); Args.AddLastArg(CmdArgs, options::OPT_pthread); // -stack-protector=0 is default. unsigned StackProtectorLevel = 0; if (Arg *A = Args.getLastArg(options::OPT_fno_stack_protector, options::OPT_fstack_protector_all, options::OPT_fstack_protector_strong, options::OPT_fstack_protector)) { if (A->getOption().matches(options::OPT_fstack_protector)) { StackProtectorLevel = std::max( LangOptions::SSPOn, getToolChain().GetDefaultStackProtectorLevel(KernelOrKext)); } else if (A->getOption().matches(options::OPT_fstack_protector_strong)) StackProtectorLevel = LangOptions::SSPStrong; else if (A->getOption().matches(options::OPT_fstack_protector_all)) StackProtectorLevel = LangOptions::SSPReq; } else { StackProtectorLevel = getToolChain().GetDefaultStackProtectorLevel(KernelOrKext); } if (StackProtectorLevel) { CmdArgs.push_back("-stack-protector"); CmdArgs.push_back(Args.MakeArgString(Twine(StackProtectorLevel))); } // --param ssp-buffer-size= for (const Arg *A : Args.filtered(options::OPT__param)) { StringRef Str(A->getValue()); if (Str.startswith("ssp-buffer-size=")) { if (StackProtectorLevel) { CmdArgs.push_back("-stack-protector-buffer-size"); // FIXME: Verify the argument is a valid integer. CmdArgs.push_back(Args.MakeArgString(Str.drop_front(16))); } A->claim(); } } // Translate -mstackrealign if (Args.hasFlag(options::OPT_mstackrealign, options::OPT_mno_stackrealign, false)) CmdArgs.push_back(Args.MakeArgString("-mstackrealign")); if (Args.hasArg(options::OPT_mstack_alignment)) { StringRef alignment = Args.getLastArgValue(options::OPT_mstack_alignment); CmdArgs.push_back(Args.MakeArgString("-mstack-alignment=" + alignment)); } if (Args.hasArg(options::OPT_mstack_probe_size)) { StringRef Size = Args.getLastArgValue(options::OPT_mstack_probe_size); if (!Size.empty()) CmdArgs.push_back(Args.MakeArgString("-mstack-probe-size=" + Size)); else CmdArgs.push_back("-mstack-probe-size=0"); } switch (getToolChain().getArch()) { case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: CmdArgs.push_back("-fallow-half-arguments-and-returns"); break; default: break; } if (Arg *A = Args.getLastArg(options::OPT_mrestrict_it, options::OPT_mno_restrict_it)) { if (A->getOption().matches(options::OPT_mrestrict_it)) { CmdArgs.push_back("-backend-option"); CmdArgs.push_back("-arm-restrict-it"); } else { CmdArgs.push_back("-backend-option"); CmdArgs.push_back("-arm-no-restrict-it"); } } else if (Triple.isOSWindows() && (Triple.getArch() == llvm::Triple::arm || Triple.getArch() == llvm::Triple::thumb)) { // Windows on ARM expects restricted IT blocks CmdArgs.push_back("-backend-option"); CmdArgs.push_back("-arm-restrict-it"); } // Forward -cl options to -cc1 if (Args.getLastArg(options::OPT_cl_opt_disable)) { CmdArgs.push_back("-cl-opt-disable"); } if (Args.getLastArg(options::OPT_cl_strict_aliasing)) { CmdArgs.push_back("-cl-strict-aliasing"); } if (Args.getLastArg(options::OPT_cl_single_precision_constant)) { CmdArgs.push_back("-cl-single-precision-constant"); } if (Args.getLastArg(options::OPT_cl_finite_math_only)) { CmdArgs.push_back("-cl-finite-math-only"); } if (Args.getLastArg(options::OPT_cl_kernel_arg_info)) { CmdArgs.push_back("-cl-kernel-arg-info"); } if (Args.getLastArg(options::OPT_cl_unsafe_math_optimizations)) { CmdArgs.push_back("-cl-unsafe-math-optimizations"); } if (Args.getLastArg(options::OPT_cl_fast_relaxed_math)) { CmdArgs.push_back("-cl-fast-relaxed-math"); } if (Args.getLastArg(options::OPT_cl_mad_enable)) { CmdArgs.push_back("-cl-mad-enable"); } if (Args.getLastArg(options::OPT_cl_no_signed_zeros)) { CmdArgs.push_back("-cl-no-signed-zeros"); } if (Arg *A = Args.getLastArg(options::OPT_cl_std_EQ)) { std::string CLStdStr = "-cl-std="; CLStdStr += A->getValue(); CmdArgs.push_back(Args.MakeArgString(CLStdStr)); } if (Args.getLastArg(options::OPT_cl_denorms_are_zero)) { CmdArgs.push_back("-cl-denorms-are-zero"); } // Forward -f options with positive and negative forms; we translate // these by hand. if (Arg *A = Args.getLastArg(options::OPT_fprofile_sample_use_EQ)) { StringRef fname = A->getValue(); if (!llvm::sys::fs::exists(fname)) D.Diag(diag::err_drv_no_such_file) << fname; else A->render(Args, CmdArgs); } // -fbuiltin is default unless -mkernel is used. bool UseBuiltins = Args.hasFlag(options::OPT_fbuiltin, options::OPT_fno_builtin, !Args.hasArg(options::OPT_mkernel)); if (!UseBuiltins) CmdArgs.push_back("-fno-builtin"); // -ffreestanding implies -fno-builtin. if (Args.hasArg(options::OPT_ffreestanding)) UseBuiltins = false; // Process the -fno-builtin-* options. for (const auto &Arg : Args) { const Option &O = Arg->getOption(); if (!O.matches(options::OPT_fno_builtin_)) continue; Arg->claim(); // If -fno-builtin is specified, then there's no need to pass the option to // the frontend. if (!UseBuiltins) continue; StringRef FuncName = Arg->getValue(); CmdArgs.push_back(Args.MakeArgString("-fno-builtin-" + FuncName)); } if (!Args.hasFlag(options::OPT_fassume_sane_operator_new, options::OPT_fno_assume_sane_operator_new)) CmdArgs.push_back("-fno-assume-sane-operator-new"); // -fblocks=0 is default. if (Args.hasFlag(options::OPT_fblocks, options::OPT_fno_blocks, getToolChain().IsBlocksDefault()) || (Args.hasArg(options::OPT_fgnu_runtime) && Args.hasArg(options::OPT_fobjc_nonfragile_abi) && !Args.hasArg(options::OPT_fno_blocks))) { CmdArgs.push_back("-fblocks"); if (!Args.hasArg(options::OPT_fgnu_runtime) && !getToolChain().hasBlocksRuntime()) CmdArgs.push_back("-fblocks-runtime-optional"); } // -fmodules enables the use of precompiled modules (off by default). // Users can pass -fno-cxx-modules to turn off modules support for // C++/Objective-C++ programs. bool HaveModules = false; if (Args.hasFlag(options::OPT_fmodules, options::OPT_fno_modules, false)) { bool AllowedInCXX = Args.hasFlag(options::OPT_fcxx_modules, options::OPT_fno_cxx_modules, true); if (AllowedInCXX || !types::isCXX(InputType)) { CmdArgs.push_back("-fmodules"); HaveModules = true; } } // -fmodule-maps enables implicit reading of module map files. By default, // this is enabled if we are using precompiled modules. if (Args.hasFlag(options::OPT_fimplicit_module_maps, options::OPT_fno_implicit_module_maps, HaveModules)) { CmdArgs.push_back("-fimplicit-module-maps"); } // -fmodules-decluse checks that modules used are declared so (off by // default). if (Args.hasFlag(options::OPT_fmodules_decluse, options::OPT_fno_modules_decluse, false)) { CmdArgs.push_back("-fmodules-decluse"); } // -fmodules-strict-decluse is like -fmodule-decluse, but also checks that // all #included headers are part of modules. if (Args.hasFlag(options::OPT_fmodules_strict_decluse, options::OPT_fno_modules_strict_decluse, false)) { CmdArgs.push_back("-fmodules-strict-decluse"); } // -fno-implicit-modules turns off implicitly compiling modules on demand. if (!Args.hasFlag(options::OPT_fimplicit_modules, options::OPT_fno_implicit_modules)) { CmdArgs.push_back("-fno-implicit-modules"); } else if (HaveModules) { // -fmodule-cache-path specifies where our implicitly-built module files // should be written. SmallString<128> Path; if (Arg *A = Args.getLastArg(options::OPT_fmodules_cache_path)) Path = A->getValue(); if (C.isForDiagnostics()) { // When generating crash reports, we want to emit the modules along with // the reproduction sources, so we ignore any provided module path. Path = Output.getFilename(); llvm::sys::path::replace_extension(Path, ".cache"); llvm::sys::path::append(Path, "modules"); } else if (Path.empty()) { // No module path was provided: use the default. llvm::sys::path::system_temp_directory(/*erasedOnReboot=*/false, Path); llvm::sys::path::append(Path, "org.llvm.clang."); appendUserToPath(Path); llvm::sys::path::append(Path, "ModuleCache"); } const char Arg[] = "-fmodules-cache-path="; Path.insert(Path.begin(), Arg, Arg + strlen(Arg)); CmdArgs.push_back(Args.MakeArgString(Path)); } // -fmodule-name specifies the module that is currently being built (or // used for header checking by -fmodule-maps). Args.AddLastArg(CmdArgs, options::OPT_fmodule_name_EQ); // -fmodule-map-file can be used to specify files containing module // definitions. Args.AddAllArgs(CmdArgs, options::OPT_fmodule_map_file); // -fmodule-file can be used to specify files containing precompiled modules. if (HaveModules) Args.AddAllArgs(CmdArgs, options::OPT_fmodule_file); else Args.ClaimAllArgs(options::OPT_fmodule_file); // When building modules and generating crashdumps, we need to dump a module // dependency VFS alongside the output. if (HaveModules && C.isForDiagnostics()) { SmallString<128> VFSDir(Output.getFilename()); llvm::sys::path::replace_extension(VFSDir, ".cache"); // Add the cache directory as a temp so the crash diagnostics pick it up. C.addTempFile(Args.MakeArgString(VFSDir)); llvm::sys::path::append(VFSDir, "vfs"); CmdArgs.push_back("-module-dependency-dir"); CmdArgs.push_back(Args.MakeArgString(VFSDir)); } if (HaveModules) Args.AddLastArg(CmdArgs, options::OPT_fmodules_user_build_path); // Pass through all -fmodules-ignore-macro arguments. Args.AddAllArgs(CmdArgs, options::OPT_fmodules_ignore_macro); Args.AddLastArg(CmdArgs, options::OPT_fmodules_prune_interval); Args.AddLastArg(CmdArgs, options::OPT_fmodules_prune_after); Args.AddLastArg(CmdArgs, options::OPT_fbuild_session_timestamp); if (Arg *A = Args.getLastArg(options::OPT_fbuild_session_file)) { if (Args.hasArg(options::OPT_fbuild_session_timestamp)) D.Diag(diag::err_drv_argument_not_allowed_with) << A->getAsString(Args) << "-fbuild-session-timestamp"; llvm::sys::fs::file_status Status; if (llvm::sys::fs::status(A->getValue(), Status)) D.Diag(diag::err_drv_no_such_file) << A->getValue(); CmdArgs.push_back(Args.MakeArgString( "-fbuild-session-timestamp=" + Twine((uint64_t)Status.getLastModificationTime().toEpochTime()))); } if (Args.getLastArg(options::OPT_fmodules_validate_once_per_build_session)) { if (!Args.getLastArg(options::OPT_fbuild_session_timestamp, options::OPT_fbuild_session_file)) D.Diag(diag::err_drv_modules_validate_once_requires_timestamp); Args.AddLastArg(CmdArgs, options::OPT_fmodules_validate_once_per_build_session); } Args.AddLastArg(CmdArgs, options::OPT_fmodules_validate_system_headers); // -faccess-control is default. if (Args.hasFlag(options::OPT_fno_access_control, options::OPT_faccess_control, false)) CmdArgs.push_back("-fno-access-control"); // -felide-constructors is the default. if (Args.hasFlag(options::OPT_fno_elide_constructors, options::OPT_felide_constructors, false)) CmdArgs.push_back("-fno-elide-constructors"); ToolChain::RTTIMode RTTIMode = getToolChain().getRTTIMode(); if (KernelOrKext || (types::isCXX(InputType) && (RTTIMode == ToolChain::RM_DisabledExplicitly || RTTIMode == ToolChain::RM_DisabledImplicitly))) CmdArgs.push_back("-fno-rtti"); // -fshort-enums=0 is default for all architectures except Hexagon. if (Args.hasFlag(options::OPT_fshort_enums, options::OPT_fno_short_enums, getToolChain().getArch() == llvm::Triple::hexagon)) CmdArgs.push_back("-fshort-enums"); // -fsigned-char is default. if (Arg *A = Args.getLastArg( options::OPT_fsigned_char, options::OPT_fno_signed_char, options::OPT_funsigned_char, options::OPT_fno_unsigned_char)) { if (A->getOption().matches(options::OPT_funsigned_char) || A->getOption().matches(options::OPT_fno_signed_char)) { CmdArgs.push_back("-fno-signed-char"); } } else if (!isSignedCharDefault(getToolChain().getTriple())) { CmdArgs.push_back("-fno-signed-char"); } // -fuse-cxa-atexit is default. if (!Args.hasFlag( options::OPT_fuse_cxa_atexit, options::OPT_fno_use_cxa_atexit, !IsWindowsCygnus && !IsWindowsGNU && getToolChain().getTriple().getOS() != llvm::Triple::Solaris && getToolChain().getArch() != llvm::Triple::hexagon && getToolChain().getArch() != llvm::Triple::xcore && ((getToolChain().getTriple().getVendor() != llvm::Triple::MipsTechnologies) || getToolChain().getTriple().hasEnvironment())) || KernelOrKext) CmdArgs.push_back("-fno-use-cxa-atexit"); // -fms-extensions=0 is default. if (Args.hasFlag(options::OPT_fms_extensions, options::OPT_fno_ms_extensions, IsWindowsMSVC)) CmdArgs.push_back("-fms-extensions"); // -fno-use-line-directives is default. if (Args.hasFlag(options::OPT_fuse_line_directives, options::OPT_fno_use_line_directives, false)) CmdArgs.push_back("-fuse-line-directives"); // -fms-compatibility=0 is default. if (Args.hasFlag(options::OPT_fms_compatibility, options::OPT_fno_ms_compatibility, (IsWindowsMSVC && Args.hasFlag(options::OPT_fms_extensions, options::OPT_fno_ms_extensions, true)))) CmdArgs.push_back("-fms-compatibility"); // -fms-compatibility-version=18.00 is default. VersionTuple MSVT = visualstudio::getMSVCVersion( &D, getToolChain(), getToolChain().getTriple(), Args, IsWindowsMSVC); if (!MSVT.empty()) CmdArgs.push_back( Args.MakeArgString("-fms-compatibility-version=" + MSVT.getAsString())); bool IsMSVC2015Compatible = MSVT.getMajor() >= 19; if (ImplyVCPPCXXVer) { StringRef LanguageStandard; if (const Arg *StdArg = Args.getLastArg(options::OPT__SLASH_std)) { LanguageStandard = llvm::StringSwitch(StdArg->getValue()) .Case("c++14", "-std=c++14") .Case("c++latest", "-std=c++1z") .Default(""); if (LanguageStandard.empty()) D.Diag(clang::diag::warn_drv_unused_argument) << StdArg->getAsString(Args); } if (LanguageStandard.empty()) { if (IsMSVC2015Compatible) LanguageStandard = "-std=c++14"; else LanguageStandard = "-std=c++11"; } CmdArgs.push_back(LanguageStandard.data()); } // -fno-borland-extensions is default. if (Args.hasFlag(options::OPT_fborland_extensions, options::OPT_fno_borland_extensions, false)) CmdArgs.push_back("-fborland-extensions"); // -fno-declspec is default, except for PS4. if (Args.hasFlag(options::OPT_fdeclspec, options::OPT_fno_declspec, getToolChain().getTriple().isPS4())) CmdArgs.push_back("-fdeclspec"); else if (Args.hasArg(options::OPT_fno_declspec)) CmdArgs.push_back("-fno-declspec"); // Explicitly disabling __declspec. // -fthreadsafe-static is default, except for MSVC compatibility versions less // than 19. if (!Args.hasFlag(options::OPT_fthreadsafe_statics, options::OPT_fno_threadsafe_statics, !IsWindowsMSVC || IsMSVC2015Compatible)) CmdArgs.push_back("-fno-threadsafe-statics"); // -fno-delayed-template-parsing is default, except for Windows where MSVC STL // needs it. if (Args.hasFlag(options::OPT_fdelayed_template_parsing, options::OPT_fno_delayed_template_parsing, IsWindowsMSVC)) CmdArgs.push_back("-fdelayed-template-parsing"); // -fgnu-keywords default varies depending on language; only pass if // specified. if (Arg *A = Args.getLastArg(options::OPT_fgnu_keywords, options::OPT_fno_gnu_keywords)) A->render(Args, CmdArgs); if (Args.hasFlag(options::OPT_fgnu89_inline, options::OPT_fno_gnu89_inline, false)) CmdArgs.push_back("-fgnu89-inline"); if (Args.hasArg(options::OPT_fno_inline)) CmdArgs.push_back("-fno-inline"); if (Arg* InlineArg = Args.getLastArg(options::OPT_finline_functions, options::OPT_finline_hint_functions, options::OPT_fno_inline_functions)) InlineArg->render(Args, CmdArgs); ObjCRuntime objcRuntime = AddObjCRuntimeArgs(Args, CmdArgs, rewriteKind); // -fobjc-dispatch-method is only relevant with the nonfragile-abi, and // legacy is the default. Except for deployment taget of 10.5, // next runtime is always legacy dispatch and -fno-objc-legacy-dispatch // gets ignored silently. if (objcRuntime.isNonFragile()) { if (!Args.hasFlag(options::OPT_fobjc_legacy_dispatch, options::OPT_fno_objc_legacy_dispatch, objcRuntime.isLegacyDispatchDefaultForArch( getToolChain().getArch()))) { if (getToolChain().UseObjCMixedDispatch()) CmdArgs.push_back("-fobjc-dispatch-method=mixed"); else CmdArgs.push_back("-fobjc-dispatch-method=non-legacy"); } } // When ObjectiveC legacy runtime is in effect on MacOSX, // turn on the option to do Array/Dictionary subscripting // by default. if (getToolChain().getArch() == llvm::Triple::x86 && getToolChain().getTriple().isMacOSX() && !getToolChain().getTriple().isMacOSXVersionLT(10, 7) && objcRuntime.getKind() == ObjCRuntime::FragileMacOSX && objcRuntime.isNeXTFamily()) CmdArgs.push_back("-fobjc-subscripting-legacy-runtime"); // -fencode-extended-block-signature=1 is default. if (getToolChain().IsEncodeExtendedBlockSignatureDefault()) { CmdArgs.push_back("-fencode-extended-block-signature"); } // Allow -fno-objc-arr to trump -fobjc-arr/-fobjc-arc. // NOTE: This logic is duplicated in ToolChains.cpp. bool ARC = isObjCAutoRefCount(Args); if (ARC) { getToolChain().CheckObjCARC(); CmdArgs.push_back("-fobjc-arc"); // FIXME: It seems like this entire block, and several around it should be // wrapped in isObjC, but for now we just use it here as this is where it // was being used previously. if (types::isCXX(InputType) && types::isObjC(InputType)) { if (getToolChain().GetCXXStdlibType(Args) == ToolChain::CST_Libcxx) CmdArgs.push_back("-fobjc-arc-cxxlib=libc++"); else CmdArgs.push_back("-fobjc-arc-cxxlib=libstdc++"); } // Allow the user to enable full exceptions code emission. // We define off for Objective-CC, on for Objective-C++. if (Args.hasFlag(options::OPT_fobjc_arc_exceptions, options::OPT_fno_objc_arc_exceptions, /*default*/ types::isCXX(InputType))) CmdArgs.push_back("-fobjc-arc-exceptions"); } // -fobjc-infer-related-result-type is the default, except in the Objective-C // rewriter. if (rewriteKind != RK_None) CmdArgs.push_back("-fno-objc-infer-related-result-type"); // Handle -fobjc-gc and -fobjc-gc-only. They are exclusive, and -fobjc-gc-only // takes precedence. const Arg *GCArg = Args.getLastArg(options::OPT_fobjc_gc_only); if (!GCArg) GCArg = Args.getLastArg(options::OPT_fobjc_gc); if (GCArg) { if (ARC) { D.Diag(diag::err_drv_objc_gc_arr) << GCArg->getAsString(Args); } else if (getToolChain().SupportsObjCGC()) { GCArg->render(Args, CmdArgs); } else { // FIXME: We should move this to a hard error. D.Diag(diag::warn_drv_objc_gc_unsupported) << GCArg->getAsString(Args); } } // Pass down -fobjc-weak or -fno-objc-weak if present. if (types::isObjC(InputType)) { auto WeakArg = Args.getLastArg(options::OPT_fobjc_weak, options::OPT_fno_objc_weak); if (!WeakArg) { // nothing to do } else if (GCArg) { if (WeakArg->getOption().matches(options::OPT_fobjc_weak)) D.Diag(diag::err_objc_weak_with_gc); } else if (!objcRuntime.allowsWeak()) { if (WeakArg->getOption().matches(options::OPT_fobjc_weak)) D.Diag(diag::err_objc_weak_unsupported); } else { WeakArg->render(Args, CmdArgs); } } if (Args.hasFlag(options::OPT_fapplication_extension, options::OPT_fno_application_extension, false)) CmdArgs.push_back("-fapplication-extension"); // Handle GCC-style exception args. if (!C.getDriver().IsCLMode()) addExceptionArgs(Args, InputType, getToolChain(), KernelOrKext, objcRuntime, CmdArgs); if (Args.hasArg(options::OPT_fsjlj_exceptions) || getToolChain().UseSjLjExceptions(Args)) CmdArgs.push_back("-fsjlj-exceptions"); // C++ "sane" operator new. if (!Args.hasFlag(options::OPT_fassume_sane_operator_new, options::OPT_fno_assume_sane_operator_new)) CmdArgs.push_back("-fno-assume-sane-operator-new"); // -fsized-deallocation is off by default, as it is an ABI-breaking change for // most platforms. if (Args.hasFlag(options::OPT_fsized_deallocation, options::OPT_fno_sized_deallocation, false)) CmdArgs.push_back("-fsized-deallocation"); // -fconstant-cfstrings is default, and may be subject to argument translation // on Darwin. if (!Args.hasFlag(options::OPT_fconstant_cfstrings, options::OPT_fno_constant_cfstrings) || !Args.hasFlag(options::OPT_mconstant_cfstrings, options::OPT_mno_constant_cfstrings)) CmdArgs.push_back("-fno-constant-cfstrings"); // -fshort-wchar default varies depending on platform; only // pass if specified. if (Arg *A = Args.getLastArg(options::OPT_fshort_wchar, options::OPT_fno_short_wchar)) A->render(Args, CmdArgs); // -fno-pascal-strings is default, only pass non-default. if (Args.hasFlag(options::OPT_fpascal_strings, options::OPT_fno_pascal_strings, false)) CmdArgs.push_back("-fpascal-strings"); // Honor -fpack-struct= and -fpack-struct, if given. Note that // -fno-pack-struct doesn't apply to -fpack-struct=. if (Arg *A = Args.getLastArg(options::OPT_fpack_struct_EQ)) { std::string PackStructStr = "-fpack-struct="; PackStructStr += A->getValue(); CmdArgs.push_back(Args.MakeArgString(PackStructStr)); } else if (Args.hasFlag(options::OPT_fpack_struct, options::OPT_fno_pack_struct, false)) { CmdArgs.push_back("-fpack-struct=1"); } // Handle -fmax-type-align=N and -fno-type-align bool SkipMaxTypeAlign = Args.hasArg(options::OPT_fno_max_type_align); if (Arg *A = Args.getLastArg(options::OPT_fmax_type_align_EQ)) { if (!SkipMaxTypeAlign) { std::string MaxTypeAlignStr = "-fmax-type-align="; MaxTypeAlignStr += A->getValue(); CmdArgs.push_back(Args.MakeArgString(MaxTypeAlignStr)); } } else if (getToolChain().getTriple().isOSDarwin()) { if (!SkipMaxTypeAlign) { std::string MaxTypeAlignStr = "-fmax-type-align=16"; CmdArgs.push_back(Args.MakeArgString(MaxTypeAlignStr)); } } // -fcommon is the default unless compiling kernel code or the target says so bool NoCommonDefault = KernelOrKext || isNoCommonDefault(getToolChain().getTriple()); if (!Args.hasFlag(options::OPT_fcommon, options::OPT_fno_common, !NoCommonDefault)) CmdArgs.push_back("-fno-common"); // -fsigned-bitfields is default, and clang doesn't yet support // -funsigned-bitfields. if (!Args.hasFlag(options::OPT_fsigned_bitfields, options::OPT_funsigned_bitfields)) D.Diag(diag::warn_drv_clang_unsupported) << Args.getLastArg(options::OPT_funsigned_bitfields)->getAsString(Args); // -fsigned-bitfields is default, and clang doesn't support -fno-for-scope. if (!Args.hasFlag(options::OPT_ffor_scope, options::OPT_fno_for_scope)) D.Diag(diag::err_drv_clang_unsupported) << Args.getLastArg(options::OPT_fno_for_scope)->getAsString(Args); // -finput_charset=UTF-8 is default. Reject others if (Arg *inputCharset = Args.getLastArg(options::OPT_finput_charset_EQ)) { StringRef value = inputCharset->getValue(); if (value != "UTF-8") D.Diag(diag::err_drv_invalid_value) << inputCharset->getAsString(Args) << value; } // -fexec_charset=UTF-8 is default. Reject others if (Arg *execCharset = Args.getLastArg(options::OPT_fexec_charset_EQ)) { StringRef value = execCharset->getValue(); if (value != "UTF-8") D.Diag(diag::err_drv_invalid_value) << execCharset->getAsString(Args) << value; } // -fcaret-diagnostics is default. if (!Args.hasFlag(options::OPT_fcaret_diagnostics, options::OPT_fno_caret_diagnostics, true)) CmdArgs.push_back("-fno-caret-diagnostics"); // -fdiagnostics-fixit-info is default, only pass non-default. if (!Args.hasFlag(options::OPT_fdiagnostics_fixit_info, options::OPT_fno_diagnostics_fixit_info)) CmdArgs.push_back("-fno-diagnostics-fixit-info"); // Enable -fdiagnostics-show-option by default. if (Args.hasFlag(options::OPT_fdiagnostics_show_option, options::OPT_fno_diagnostics_show_option)) CmdArgs.push_back("-fdiagnostics-show-option"); if (const Arg *A = Args.getLastArg(options::OPT_fdiagnostics_show_category_EQ)) { CmdArgs.push_back("-fdiagnostics-show-category"); CmdArgs.push_back(A->getValue()); } if (const Arg *A = Args.getLastArg(options::OPT_fdiagnostics_format_EQ)) { CmdArgs.push_back("-fdiagnostics-format"); CmdArgs.push_back(A->getValue()); } if (Arg *A = Args.getLastArg( options::OPT_fdiagnostics_show_note_include_stack, options::OPT_fno_diagnostics_show_note_include_stack)) { if (A->getOption().matches( options::OPT_fdiagnostics_show_note_include_stack)) CmdArgs.push_back("-fdiagnostics-show-note-include-stack"); else CmdArgs.push_back("-fno-diagnostics-show-note-include-stack"); } // Color diagnostics are parsed by the driver directly from argv // and later re-parsed to construct this job; claim any possible // color diagnostic here to avoid warn_drv_unused_argument and // diagnose bad OPT_fdiagnostics_color_EQ values. for (Arg *A : Args) { const Option &O = A->getOption(); if (!O.matches(options::OPT_fcolor_diagnostics) && !O.matches(options::OPT_fdiagnostics_color) && !O.matches(options::OPT_fno_color_diagnostics) && !O.matches(options::OPT_fno_diagnostics_color) && !O.matches(options::OPT_fdiagnostics_color_EQ)) continue; if (O.matches(options::OPT_fdiagnostics_color_EQ)) { StringRef Value(A->getValue()); if (Value != "always" && Value != "never" && Value != "auto") getToolChain().getDriver().Diag(diag::err_drv_clang_unsupported) << ("-fdiagnostics-color=" + Value).str(); } A->claim(); } if (D.getDiags().getDiagnosticOptions().ShowColors) CmdArgs.push_back("-fcolor-diagnostics"); if (Args.hasArg(options::OPT_fansi_escape_codes)) CmdArgs.push_back("-fansi-escape-codes"); if (!Args.hasFlag(options::OPT_fshow_source_location, options::OPT_fno_show_source_location)) CmdArgs.push_back("-fno-show-source-location"); if (!Args.hasFlag(options::OPT_fshow_column, options::OPT_fno_show_column, true)) CmdArgs.push_back("-fno-show-column"); if (!Args.hasFlag(options::OPT_fspell_checking, options::OPT_fno_spell_checking)) CmdArgs.push_back("-fno-spell-checking"); // -fno-asm-blocks is default. if (Args.hasFlag(options::OPT_fasm_blocks, options::OPT_fno_asm_blocks, false)) CmdArgs.push_back("-fasm-blocks"); // -fgnu-inline-asm is default. if (!Args.hasFlag(options::OPT_fgnu_inline_asm, options::OPT_fno_gnu_inline_asm, true)) CmdArgs.push_back("-fno-gnu-inline-asm"); // Enable vectorization per default according to the optimization level // selected. For optimization levels that want vectorization we use the alias // option to simplify the hasFlag logic. bool EnableVec = shouldEnableVectorizerAtOLevel(Args, false); OptSpecifier VectorizeAliasOption = EnableVec ? options::OPT_O_Group : options::OPT_fvectorize; if (Args.hasFlag(options::OPT_fvectorize, VectorizeAliasOption, options::OPT_fno_vectorize, EnableVec)) CmdArgs.push_back("-vectorize-loops"); // -fslp-vectorize is enabled based on the optimization level selected. bool EnableSLPVec = shouldEnableVectorizerAtOLevel(Args, true); OptSpecifier SLPVectAliasOption = EnableSLPVec ? options::OPT_O_Group : options::OPT_fslp_vectorize; if (Args.hasFlag(options::OPT_fslp_vectorize, SLPVectAliasOption, options::OPT_fno_slp_vectorize, EnableSLPVec)) CmdArgs.push_back("-vectorize-slp"); // -fno-slp-vectorize-aggressive is default. if (Args.hasFlag(options::OPT_fslp_vectorize_aggressive, options::OPT_fno_slp_vectorize_aggressive, false)) CmdArgs.push_back("-vectorize-slp-aggressive"); if (Arg *A = Args.getLastArg(options::OPT_fshow_overloads_EQ)) A->render(Args, CmdArgs); if (Arg *A = Args.getLastArg( options::OPT_fsanitize_undefined_strip_path_components_EQ)) A->render(Args, CmdArgs); // -fdollars-in-identifiers default varies depending on platform and // language; only pass if specified. if (Arg *A = Args.getLastArg(options::OPT_fdollars_in_identifiers, options::OPT_fno_dollars_in_identifiers)) { if (A->getOption().matches(options::OPT_fdollars_in_identifiers)) CmdArgs.push_back("-fdollars-in-identifiers"); else CmdArgs.push_back("-fno-dollars-in-identifiers"); } // -funit-at-a-time is default, and we don't support -fno-unit-at-a-time for // practical purposes. if (Arg *A = Args.getLastArg(options::OPT_funit_at_a_time, options::OPT_fno_unit_at_a_time)) { if (A->getOption().matches(options::OPT_fno_unit_at_a_time)) D.Diag(diag::warn_drv_clang_unsupported) << A->getAsString(Args); } if (Args.hasFlag(options::OPT_fapple_pragma_pack, options::OPT_fno_apple_pragma_pack, false)) CmdArgs.push_back("-fapple-pragma-pack"); // le32-specific flags: // -fno-math-builtin: clang should not convert math builtins to intrinsics // by default. if (getToolChain().getArch() == llvm::Triple::le32) { CmdArgs.push_back("-fno-math-builtin"); } // Default to -fno-builtin-str{cat,cpy} on Darwin for ARM. // // FIXME: Now that PR4941 has been fixed this can be enabled. #if 0 if (getToolChain().getTriple().isOSDarwin() && (getToolChain().getArch() == llvm::Triple::arm || getToolChain().getArch() == llvm::Triple::thumb)) { if (!Args.hasArg(options::OPT_fbuiltin_strcat)) CmdArgs.push_back("-fno-builtin-strcat"); if (!Args.hasArg(options::OPT_fbuiltin_strcpy)) CmdArgs.push_back("-fno-builtin-strcpy"); } #endif // Enable rewrite includes if the user's asked for it or if we're generating // diagnostics. // TODO: Once -module-dependency-dir works with -frewrite-includes it'd be // nice to enable this when doing a crashdump for modules as well. if (Args.hasFlag(options::OPT_frewrite_includes, options::OPT_fno_rewrite_includes, false) || (C.isForDiagnostics() && !HaveModules)) CmdArgs.push_back("-frewrite-includes"); // Only allow -traditional or -traditional-cpp outside in preprocessing modes. if (Arg *A = Args.getLastArg(options::OPT_traditional, options::OPT_traditional_cpp)) { if (isa(JA)) CmdArgs.push_back("-traditional-cpp"); else D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args); } Args.AddLastArg(CmdArgs, options::OPT_dM); Args.AddLastArg(CmdArgs, options::OPT_dD); // Handle serialized diagnostics. if (Arg *A = Args.getLastArg(options::OPT__serialize_diags)) { CmdArgs.push_back("-serialize-diagnostic-file"); CmdArgs.push_back(Args.MakeArgString(A->getValue())); } if (Args.hasArg(options::OPT_fretain_comments_from_system_headers)) CmdArgs.push_back("-fretain-comments-from-system-headers"); // Forward -fcomment-block-commands to -cc1. Args.AddAllArgs(CmdArgs, options::OPT_fcomment_block_commands); // Forward -fparse-all-comments to -cc1. Args.AddAllArgs(CmdArgs, options::OPT_fparse_all_comments); // Turn -fplugin=name.so into -load name.so for (const Arg *A : Args.filtered(options::OPT_fplugin_EQ)) { CmdArgs.push_back("-load"); CmdArgs.push_back(A->getValue()); A->claim(); } // Forward -Xclang arguments to -cc1, and -mllvm arguments to the LLVM option // parser. Args.AddAllArgValues(CmdArgs, options::OPT_Xclang); for (const Arg *A : Args.filtered(options::OPT_mllvm)) { A->claim(); // We translate this by hand to the -cc1 argument, since nightly test uses // it and developers have been trained to spell it with -mllvm. if (StringRef(A->getValue(0)) == "-disable-llvm-optzns") { CmdArgs.push_back("-disable-llvm-optzns"); } else A->render(Args, CmdArgs); } // With -save-temps, we want to save the unoptimized bitcode output from the // CompileJobAction, use -disable-llvm-passes to get pristine IR generated // by the frontend. // When -fembed-bitcode is enabled, optimized bitcode is emitted because it // has slightly different breakdown between stages. // FIXME: -fembed-bitcode -save-temps will save optimized bitcode instead of // pristine IR generated by the frontend. Ideally, a new compile action should // be added so both IR can be captured. if (C.getDriver().isSaveTempsEnabled() && !C.getDriver().embedBitcodeEnabled() && isa(JA)) CmdArgs.push_back("-disable-llvm-passes"); if (Output.getType() == types::TY_Dependencies) { // Handled with other dependency code. } else if (Output.isFilename()) { CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); } else { assert(Output.isNothing() && "Invalid output."); } addDashXForInput(Args, Input, CmdArgs); if (Input.isFilename()) CmdArgs.push_back(Input.getFilename()); else Input.getInputArg().renderAsInput(Args, CmdArgs); Args.AddAllArgs(CmdArgs, options::OPT_undef); const char *Exec = getToolChain().getDriver().getClangProgramPath(); // Optionally embed the -cc1 level arguments into the debug info, for build // analysis. if (getToolChain().UseDwarfDebugFlags()) { ArgStringList OriginalArgs; for (const auto &Arg : Args) Arg->render(Args, OriginalArgs); SmallString<256> Flags; Flags += Exec; for (const char *OriginalArg : OriginalArgs) { SmallString<128> EscapedArg; EscapeSpacesAndBackslashes(OriginalArg, EscapedArg); Flags += " "; Flags += EscapedArg; } CmdArgs.push_back("-dwarf-debug-flags"); CmdArgs.push_back(Args.MakeArgString(Flags)); } // Add the split debug info name to the command lines here so we // can propagate it to the backend. bool SplitDwarf = SplitDwarfArg && getToolChain().getTriple().isOSLinux() && (isa(JA) || isa(JA) || isa(JA)); const char *SplitDwarfOut; if (SplitDwarf) { CmdArgs.push_back("-split-dwarf-file"); SplitDwarfOut = SplitDebugName(Args, Input); CmdArgs.push_back(SplitDwarfOut); } // Host-side cuda compilation receives device-side outputs as Inputs[1...]. // Include them with -fcuda-include-gpubinary. if (IsCuda && Inputs.size() > 1) for (auto I = std::next(Inputs.begin()), E = Inputs.end(); I != E; ++I) { CmdArgs.push_back("-fcuda-include-gpubinary"); CmdArgs.push_back(I->getFilename()); } bool WholeProgramVTables = Args.hasFlag(options::OPT_fwhole_program_vtables, options::OPT_fno_whole_program_vtables, false); if (WholeProgramVTables) { if (!D.isUsingLTO()) D.Diag(diag::err_drv_argument_only_allowed_with) << "-fwhole-program-vtables" << "-flto"; CmdArgs.push_back("-fwhole-program-vtables"); } // Finally add the compile command to the compilation. if (Args.hasArg(options::OPT__SLASH_fallback) && Output.getType() == types::TY_Object && (InputType == types::TY_C || InputType == types::TY_CXX)) { auto CLCommand = getCLFallback()->GetCommand(C, JA, Output, Inputs, Args, LinkingOutput); C.addCommand(llvm::make_unique( JA, *this, Exec, CmdArgs, Inputs, std::move(CLCommand))); } else if (Args.hasArg(options::OPT__SLASH_fallback) && isa(JA)) { // In /fallback builds, run the main compilation even if the pch generation // fails, so that the main compilation's fallback to cl.exe runs. C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } else { C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } // Handle the debug info splitting at object creation time if we're // creating an object. // TODO: Currently only works on linux with newer objcopy. if (SplitDwarf && Output.getType() == types::TY_Object) SplitDebugInfo(getToolChain(), C, *this, JA, Args, Output, SplitDwarfOut); if (Arg *A = Args.getLastArg(options::OPT_pg)) if (Args.hasArg(options::OPT_fomit_frame_pointer)) D.Diag(diag::err_drv_argument_not_allowed_with) << "-fomit-frame-pointer" << A->getAsString(Args); // Claim some arguments which clang supports automatically. // -fpch-preprocess is used with gcc to add a special marker in the output to // include the PCH file. Clang's PTH solution is completely transparent, so we // do not need to deal with it at all. Args.ClaimAllArgs(options::OPT_fpch_preprocess); // Claim some arguments which clang doesn't support, but we don't // care to warn the user about. Args.ClaimAllArgs(options::OPT_clang_ignored_f_Group); Args.ClaimAllArgs(options::OPT_clang_ignored_m_Group); // Disable warnings for clang -E -emit-llvm foo.c Args.ClaimAllArgs(options::OPT_emit_llvm); } /// Add options related to the Objective-C runtime/ABI. /// /// Returns true if the runtime is non-fragile. ObjCRuntime Clang::AddObjCRuntimeArgs(const ArgList &args, ArgStringList &cmdArgs, RewriteKind rewriteKind) const { // Look for the controlling runtime option. Arg *runtimeArg = args.getLastArg(options::OPT_fnext_runtime, options::OPT_fgnu_runtime, options::OPT_fobjc_runtime_EQ); // Just forward -fobjc-runtime= to the frontend. This supercedes // options about fragility. if (runtimeArg && runtimeArg->getOption().matches(options::OPT_fobjc_runtime_EQ)) { ObjCRuntime runtime; StringRef value = runtimeArg->getValue(); if (runtime.tryParse(value)) { getToolChain().getDriver().Diag(diag::err_drv_unknown_objc_runtime) << value; } runtimeArg->render(args, cmdArgs); return runtime; } // Otherwise, we'll need the ABI "version". Version numbers are // slightly confusing for historical reasons: // 1 - Traditional "fragile" ABI // 2 - Non-fragile ABI, version 1 // 3 - Non-fragile ABI, version 2 unsigned objcABIVersion = 1; // If -fobjc-abi-version= is present, use that to set the version. if (Arg *abiArg = args.getLastArg(options::OPT_fobjc_abi_version_EQ)) { StringRef value = abiArg->getValue(); if (value == "1") objcABIVersion = 1; else if (value == "2") objcABIVersion = 2; else if (value == "3") objcABIVersion = 3; else getToolChain().getDriver().Diag(diag::err_drv_clang_unsupported) << value; } else { // Otherwise, determine if we are using the non-fragile ABI. bool nonFragileABIIsDefault = (rewriteKind == RK_NonFragile || (rewriteKind == RK_None && getToolChain().IsObjCNonFragileABIDefault())); if (args.hasFlag(options::OPT_fobjc_nonfragile_abi, options::OPT_fno_objc_nonfragile_abi, nonFragileABIIsDefault)) { // Determine the non-fragile ABI version to use. #ifdef DISABLE_DEFAULT_NONFRAGILEABI_TWO unsigned nonFragileABIVersion = 1; #else unsigned nonFragileABIVersion = 2; #endif if (Arg *abiArg = args.getLastArg(options::OPT_fobjc_nonfragile_abi_version_EQ)) { StringRef value = abiArg->getValue(); if (value == "1") nonFragileABIVersion = 1; else if (value == "2") nonFragileABIVersion = 2; else getToolChain().getDriver().Diag(diag::err_drv_clang_unsupported) << value; } objcABIVersion = 1 + nonFragileABIVersion; } else { objcABIVersion = 1; } } // We don't actually care about the ABI version other than whether // it's non-fragile. bool isNonFragile = objcABIVersion != 1; // If we have no runtime argument, ask the toolchain for its default runtime. // However, the rewriter only really supports the Mac runtime, so assume that. ObjCRuntime runtime; if (!runtimeArg) { switch (rewriteKind) { case RK_None: runtime = getToolChain().getDefaultObjCRuntime(isNonFragile); break; case RK_Fragile: runtime = ObjCRuntime(ObjCRuntime::FragileMacOSX, VersionTuple()); break; case RK_NonFragile: runtime = ObjCRuntime(ObjCRuntime::MacOSX, VersionTuple()); break; } // -fnext-runtime } else if (runtimeArg->getOption().matches(options::OPT_fnext_runtime)) { // On Darwin, make this use the default behavior for the toolchain. if (getToolChain().getTriple().isOSDarwin()) { runtime = getToolChain().getDefaultObjCRuntime(isNonFragile); // Otherwise, build for a generic macosx port. } else { runtime = ObjCRuntime(ObjCRuntime::MacOSX, VersionTuple()); } // -fgnu-runtime } else { assert(runtimeArg->getOption().matches(options::OPT_fgnu_runtime)); // Legacy behaviour is to target the gnustep runtime if we are in // non-fragile mode or the GCC runtime in fragile mode. if (isNonFragile) runtime = ObjCRuntime(ObjCRuntime::GNUstep, VersionTuple(1, 6)); else runtime = ObjCRuntime(ObjCRuntime::GCC, VersionTuple()); } cmdArgs.push_back( args.MakeArgString("-fobjc-runtime=" + runtime.getAsString())); return runtime; } static bool maybeConsumeDash(const std::string &EH, size_t &I) { bool HaveDash = (I + 1 < EH.size() && EH[I + 1] == '-'); I += HaveDash; return !HaveDash; } namespace { struct EHFlags { bool Synch = false; bool Asynch = false; bool NoUnwindC = false; }; } // end anonymous namespace /// /EH controls whether to run destructor cleanups when exceptions are /// thrown. There are three modifiers: /// - s: Cleanup after "synchronous" exceptions, aka C++ exceptions. /// - a: Cleanup after "asynchronous" exceptions, aka structured exceptions. /// The 'a' modifier is unimplemented and fundamentally hard in LLVM IR. /// - c: Assume that extern "C" functions are implicitly nounwind. /// The default is /EHs-c-, meaning cleanups are disabled. static EHFlags parseClangCLEHFlags(const Driver &D, const ArgList &Args) { EHFlags EH; std::vector EHArgs = Args.getAllArgValues(options::OPT__SLASH_EH); for (auto EHVal : EHArgs) { for (size_t I = 0, E = EHVal.size(); I != E; ++I) { switch (EHVal[I]) { case 'a': EH.Asynch = maybeConsumeDash(EHVal, I); if (EH.Asynch) EH.Synch = false; continue; case 'c': EH.NoUnwindC = maybeConsumeDash(EHVal, I); continue; case 's': EH.Synch = maybeConsumeDash(EHVal, I); if (EH.Synch) EH.Asynch = false; continue; default: break; } D.Diag(clang::diag::err_drv_invalid_value) << "/EH" << EHVal; break; } } // The /GX, /GX- flags are only processed if there are not /EH flags. // The default is that /GX is not specified. if (EHArgs.empty() && Args.hasFlag(options::OPT__SLASH_GX, options::OPT__SLASH_GX_, /*default=*/false)) { EH.Synch = true; EH.NoUnwindC = true; } return EH; } void Clang::AddClangCLArgs(const ArgList &Args, types::ID InputType, ArgStringList &CmdArgs, codegenoptions::DebugInfoKind *DebugInfoKind, bool *EmitCodeView) const { unsigned RTOptionID = options::OPT__SLASH_MT; if (Args.hasArg(options::OPT__SLASH_LDd)) // The /LDd option implies /MTd. The dependent lib part can be overridden, // but defining _DEBUG is sticky. RTOptionID = options::OPT__SLASH_MTd; if (Arg *A = Args.getLastArg(options::OPT__SLASH_M_Group)) RTOptionID = A->getOption().getID(); StringRef FlagForCRT; switch (RTOptionID) { case options::OPT__SLASH_MD: if (Args.hasArg(options::OPT__SLASH_LDd)) CmdArgs.push_back("-D_DEBUG"); CmdArgs.push_back("-D_MT"); CmdArgs.push_back("-D_DLL"); FlagForCRT = "--dependent-lib=msvcrt"; break; case options::OPT__SLASH_MDd: CmdArgs.push_back("-D_DEBUG"); CmdArgs.push_back("-D_MT"); CmdArgs.push_back("-D_DLL"); FlagForCRT = "--dependent-lib=msvcrtd"; break; case options::OPT__SLASH_MT: if (Args.hasArg(options::OPT__SLASH_LDd)) CmdArgs.push_back("-D_DEBUG"); CmdArgs.push_back("-D_MT"); CmdArgs.push_back("-flto-visibility-public-std"); FlagForCRT = "--dependent-lib=libcmt"; break; case options::OPT__SLASH_MTd: CmdArgs.push_back("-D_DEBUG"); CmdArgs.push_back("-D_MT"); CmdArgs.push_back("-flto-visibility-public-std"); FlagForCRT = "--dependent-lib=libcmtd"; break; default: llvm_unreachable("Unexpected option ID."); } if (Args.hasArg(options::OPT__SLASH_Zl)) { CmdArgs.push_back("-D_VC_NODEFAULTLIB"); } else { CmdArgs.push_back(FlagForCRT.data()); // This provides POSIX compatibility (maps 'open' to '_open'), which most // users want. The /Za flag to cl.exe turns this off, but it's not // implemented in clang. CmdArgs.push_back("--dependent-lib=oldnames"); } // Both /showIncludes and /E (and /EP) write to stdout. Allowing both // would produce interleaved output, so ignore /showIncludes in such cases. if (!Args.hasArg(options::OPT_E) && !Args.hasArg(options::OPT__SLASH_EP)) if (Arg *A = Args.getLastArg(options::OPT_show_includes)) A->render(Args, CmdArgs); // This controls whether or not we emit RTTI data for polymorphic types. if (Args.hasFlag(options::OPT__SLASH_GR_, options::OPT__SLASH_GR, /*default=*/false)) CmdArgs.push_back("-fno-rtti-data"); // This controls whether or not we emit stack-protector instrumentation. // In MSVC, Buffer Security Check (/GS) is on by default. if (Args.hasFlag(options::OPT__SLASH_GS, options::OPT__SLASH_GS_, /*default=*/true)) { CmdArgs.push_back("-stack-protector"); CmdArgs.push_back(Args.MakeArgString(Twine(LangOptions::SSPStrong))); } // Emit CodeView if -Z7, -Zd, or -gline-tables-only are present. if (Arg *DebugInfoArg = Args.getLastArg(options::OPT__SLASH_Z7, options::OPT__SLASH_Zd, options::OPT_gline_tables_only)) { *EmitCodeView = true; if (DebugInfoArg->getOption().matches(options::OPT__SLASH_Z7)) *DebugInfoKind = codegenoptions::LimitedDebugInfo; else *DebugInfoKind = codegenoptions::DebugLineTablesOnly; CmdArgs.push_back("-gcodeview"); } else { *EmitCodeView = false; } const Driver &D = getToolChain().getDriver(); EHFlags EH = parseClangCLEHFlags(D, Args); if (EH.Synch || EH.Asynch) { if (types::isCXX(InputType)) CmdArgs.push_back("-fcxx-exceptions"); CmdArgs.push_back("-fexceptions"); } if (types::isCXX(InputType) && EH.Synch && EH.NoUnwindC) CmdArgs.push_back("-fexternc-nounwind"); // /EP should expand to -E -P. if (Args.hasArg(options::OPT__SLASH_EP)) { CmdArgs.push_back("-E"); CmdArgs.push_back("-P"); } unsigned VolatileOptionID; if (getToolChain().getArch() == llvm::Triple::x86_64 || getToolChain().getArch() == llvm::Triple::x86) VolatileOptionID = options::OPT__SLASH_volatile_ms; else VolatileOptionID = options::OPT__SLASH_volatile_iso; if (Arg *A = Args.getLastArg(options::OPT__SLASH_volatile_Group)) VolatileOptionID = A->getOption().getID(); if (VolatileOptionID == options::OPT__SLASH_volatile_ms) CmdArgs.push_back("-fms-volatile"); Arg *MostGeneralArg = Args.getLastArg(options::OPT__SLASH_vmg); Arg *BestCaseArg = Args.getLastArg(options::OPT__SLASH_vmb); if (MostGeneralArg && BestCaseArg) D.Diag(clang::diag::err_drv_argument_not_allowed_with) << MostGeneralArg->getAsString(Args) << BestCaseArg->getAsString(Args); if (MostGeneralArg) { Arg *SingleArg = Args.getLastArg(options::OPT__SLASH_vms); Arg *MultipleArg = Args.getLastArg(options::OPT__SLASH_vmm); Arg *VirtualArg = Args.getLastArg(options::OPT__SLASH_vmv); Arg *FirstConflict = SingleArg ? SingleArg : MultipleArg; Arg *SecondConflict = VirtualArg ? VirtualArg : MultipleArg; if (FirstConflict && SecondConflict && FirstConflict != SecondConflict) D.Diag(clang::diag::err_drv_argument_not_allowed_with) << FirstConflict->getAsString(Args) << SecondConflict->getAsString(Args); if (SingleArg) CmdArgs.push_back("-fms-memptr-rep=single"); else if (MultipleArg) CmdArgs.push_back("-fms-memptr-rep=multiple"); else CmdArgs.push_back("-fms-memptr-rep=virtual"); } if (Args.getLastArg(options::OPT__SLASH_Gd)) CmdArgs.push_back("-fdefault-calling-conv=cdecl"); else if (Args.getLastArg(options::OPT__SLASH_Gr)) CmdArgs.push_back("-fdefault-calling-conv=fastcall"); else if (Args.getLastArg(options::OPT__SLASH_Gz)) CmdArgs.push_back("-fdefault-calling-conv=stdcall"); else if (Args.getLastArg(options::OPT__SLASH_Gv)) CmdArgs.push_back("-fdefault-calling-conv=vectorcall"); if (Arg *A = Args.getLastArg(options::OPT_vtordisp_mode_EQ)) A->render(Args, CmdArgs); if (!Args.hasArg(options::OPT_fdiagnostics_format_EQ)) { CmdArgs.push_back("-fdiagnostics-format"); if (Args.hasArg(options::OPT__SLASH_fallback)) CmdArgs.push_back("msvc-fallback"); else CmdArgs.push_back("msvc"); } } visualstudio::Compiler *Clang::getCLFallback() const { if (!CLFallback) CLFallback.reset(new visualstudio::Compiler(getToolChain())); return CLFallback.get(); } void ClangAs::AddMIPSTargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { StringRef CPUName; StringRef ABIName; const llvm::Triple &Triple = getToolChain().getTriple(); mips::getMipsCPUAndABI(Args, Triple, CPUName, ABIName); CmdArgs.push_back("-target-abi"); CmdArgs.push_back(ABIName.data()); } void ClangAs::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { ArgStringList CmdArgs; assert(Inputs.size() == 1 && "Unexpected number of inputs."); const InputInfo &Input = Inputs[0]; std::string TripleStr = getToolChain().ComputeEffectiveClangTriple(Args, Input.getType()); const llvm::Triple Triple(TripleStr); // Don't warn about "clang -w -c foo.s" Args.ClaimAllArgs(options::OPT_w); // and "clang -emit-llvm -c foo.s" Args.ClaimAllArgs(options::OPT_emit_llvm); claimNoWarnArgs(Args); // Invoke ourselves in -cc1as mode. // // FIXME: Implement custom jobs for internal actions. CmdArgs.push_back("-cc1as"); // Add the "effective" target triple. CmdArgs.push_back("-triple"); CmdArgs.push_back(Args.MakeArgString(TripleStr)); // Set the output mode, we currently only expect to be used as a real // assembler. CmdArgs.push_back("-filetype"); CmdArgs.push_back("obj"); // Set the main file name, so that debug info works even with // -save-temps or preprocessed assembly. CmdArgs.push_back("-main-file-name"); CmdArgs.push_back(Clang::getBaseInputName(Args, Input)); // Add the target cpu std::string CPU = getCPUName(Args, Triple, /*FromAs*/ true); if (!CPU.empty()) { CmdArgs.push_back("-target-cpu"); CmdArgs.push_back(Args.MakeArgString(CPU)); } // Add the target features getTargetFeatures(getToolChain(), Triple, Args, CmdArgs, true); // Ignore explicit -force_cpusubtype_ALL option. (void)Args.hasArg(options::OPT_force__cpusubtype__ALL); // Pass along any -I options so we get proper .include search paths. Args.AddAllArgs(CmdArgs, options::OPT_I_Group); // Determine the original source input. const Action *SourceAction = &JA; while (SourceAction->getKind() != Action::InputClass) { assert(!SourceAction->getInputs().empty() && "unexpected root action!"); SourceAction = SourceAction->getInputs()[0]; } // Forward -g and handle debug info related flags, assuming we are dealing // with an actual assembly file. bool WantDebug = false; unsigned DwarfVersion = 0; Args.ClaimAllArgs(options::OPT_g_Group); if (Arg *A = Args.getLastArg(options::OPT_g_Group)) { WantDebug = !A->getOption().matches(options::OPT_g0) && !A->getOption().matches(options::OPT_ggdb0); if (WantDebug) DwarfVersion = DwarfVersionNum(A->getSpelling()); } if (DwarfVersion == 0) DwarfVersion = getToolChain().GetDefaultDwarfVersion(); codegenoptions::DebugInfoKind DebugInfoKind = codegenoptions::NoDebugInfo; if (SourceAction->getType() == types::TY_Asm || SourceAction->getType() == types::TY_PP_Asm) { // You might think that it would be ok to set DebugInfoKind outside of // the guard for source type, however there is a test which asserts // that some assembler invocation receives no -debug-info-kind, // and it's not clear whether that test is just overly restrictive. DebugInfoKind = (WantDebug ? codegenoptions::LimitedDebugInfo : codegenoptions::NoDebugInfo); // Add the -fdebug-compilation-dir flag if needed. addDebugCompDirArg(Args, CmdArgs); // Set the AT_producer to the clang version when using the integrated // assembler on assembly source files. CmdArgs.push_back("-dwarf-debug-producer"); CmdArgs.push_back(Args.MakeArgString(getClangFullVersion())); // And pass along -I options Args.AddAllArgs(CmdArgs, options::OPT_I); } RenderDebugEnablingArgs(Args, CmdArgs, DebugInfoKind, DwarfVersion, llvm::DebuggerKind::Default); // Handle -fPIC et al -- the relocation-model affects the assembler // for some targets. llvm::Reloc::Model RelocationModel; unsigned PICLevel; bool IsPIE; std::tie(RelocationModel, PICLevel, IsPIE) = ParsePICArgs(getToolChain(), Triple, Args); const char *RMName = RelocationModelName(RelocationModel); if (RMName) { CmdArgs.push_back("-mrelocation-model"); CmdArgs.push_back(RMName); } // Optionally embed the -cc1as level arguments into the debug info, for build // analysis. if (getToolChain().UseDwarfDebugFlags()) { ArgStringList OriginalArgs; for (const auto &Arg : Args) Arg->render(Args, OriginalArgs); SmallString<256> Flags; const char *Exec = getToolChain().getDriver().getClangProgramPath(); Flags += Exec; for (const char *OriginalArg : OriginalArgs) { SmallString<128> EscapedArg; EscapeSpacesAndBackslashes(OriginalArg, EscapedArg); Flags += " "; Flags += EscapedArg; } CmdArgs.push_back("-dwarf-debug-flags"); CmdArgs.push_back(Args.MakeArgString(Flags)); } // FIXME: Add -static support, once we have it. // Add target specific flags. switch (getToolChain().getArch()) { default: break; case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::mips64: case llvm::Triple::mips64el: AddMIPSTargetArgs(Args, CmdArgs); break; } // Consume all the warning flags. Usually this would be handled more // gracefully by -cc1 (warning about unknown warning flags, etc) but -cc1as // doesn't handle that so rather than warning about unused flags that are // actually used, we'll lie by omission instead. // FIXME: Stop lying and consume only the appropriate driver flags Args.ClaimAllArgs(options::OPT_W_Group); CollectArgsForIntegratedAssembler(C, Args, CmdArgs, getToolChain().getDriver()); Args.AddAllArgs(CmdArgs, options::OPT_mllvm); assert(Output.isFilename() && "Unexpected lipo output."); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); assert(Input.isFilename() && "Invalid input."); CmdArgs.push_back(Input.getFilename()); const char *Exec = getToolChain().getDriver().getClangProgramPath(); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); // Handle the debug info splitting at object creation time if we're // creating an object. // TODO: Currently only works on linux with newer objcopy. if (Args.hasArg(options::OPT_gsplit_dwarf) && getToolChain().getTriple().isOSLinux()) SplitDebugInfo(getToolChain(), C, *this, JA, Args, Output, SplitDebugName(Args, Input)); } void GnuTool::anchor() {} void gcc::Common::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const Driver &D = getToolChain().getDriver(); ArgStringList CmdArgs; for (const auto &A : Args) { if (forwardToGCC(A->getOption())) { // It is unfortunate that we have to claim here, as this means // we will basically never report anything interesting for // platforms using a generic gcc, even if we are just using gcc // to get to the assembler. A->claim(); // Don't forward any -g arguments to assembly steps. if (isa(JA) && A->getOption().matches(options::OPT_g_Group)) continue; // Don't forward any -W arguments to assembly and link steps. if ((isa(JA) || isa(JA)) && A->getOption().matches(options::OPT_W_Group)) continue; A->render(Args, CmdArgs); } } RenderExtraToolArgs(JA, CmdArgs); // If using a driver driver, force the arch. if (getToolChain().getTriple().isOSDarwin()) { CmdArgs.push_back("-arch"); CmdArgs.push_back( Args.MakeArgString(getToolChain().getDefaultUniversalArchName())); } // Try to force gcc to match the tool chain we want, if we recognize // the arch. // // FIXME: The triple class should directly provide the information we want // here. switch (getToolChain().getArch()) { default: break; case llvm::Triple::x86: case llvm::Triple::ppc: CmdArgs.push_back("-m32"); break; case llvm::Triple::x86_64: case llvm::Triple::ppc64: case llvm::Triple::ppc64le: CmdArgs.push_back("-m64"); break; case llvm::Triple::sparcel: CmdArgs.push_back("-EL"); break; } if (Output.isFilename()) { CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); } else { assert(Output.isNothing() && "Unexpected output"); CmdArgs.push_back("-fsyntax-only"); } Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); // Only pass -x if gcc will understand it; otherwise hope gcc // understands the suffix correctly. The main use case this would go // wrong in is for linker inputs if they happened to have an odd // suffix; really the only way to get this to happen is a command // like '-x foobar a.c' which will treat a.c like a linker input. // // FIXME: For the linker case specifically, can we safely convert // inputs into '-Wl,' options? for (const auto &II : Inputs) { // Don't try to pass LLVM or AST inputs to a generic gcc. if (types::isLLVMIR(II.getType())) D.Diag(diag::err_drv_no_linker_llvm_support) << getToolChain().getTripleString(); else if (II.getType() == types::TY_AST) D.Diag(diag::err_drv_no_ast_support) << getToolChain().getTripleString(); else if (II.getType() == types::TY_ModuleFile) D.Diag(diag::err_drv_no_module_support) << getToolChain().getTripleString(); if (types::canTypeBeUserSpecified(II.getType())) { CmdArgs.push_back("-x"); CmdArgs.push_back(types::getTypeName(II.getType())); } if (II.isFilename()) CmdArgs.push_back(II.getFilename()); else { const Arg &A = II.getInputArg(); // Reverse translate some rewritten options. if (A.getOption().matches(options::OPT_Z_reserved_lib_stdcxx)) { CmdArgs.push_back("-lstdc++"); continue; } // Don't render as input, we need gcc to do the translations. A.render(Args, CmdArgs); } } const std::string &customGCCName = D.getCCCGenericGCCName(); const char *GCCName; if (!customGCCName.empty()) GCCName = customGCCName.c_str(); else if (D.CCCIsCXX()) { GCCName = "g++"; } else GCCName = "gcc"; const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath(GCCName)); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void gcc::Preprocessor::RenderExtraToolArgs(const JobAction &JA, ArgStringList &CmdArgs) const { CmdArgs.push_back("-E"); } void gcc::Compiler::RenderExtraToolArgs(const JobAction &JA, ArgStringList &CmdArgs) const { const Driver &D = getToolChain().getDriver(); switch (JA.getType()) { // If -flto, etc. are present then make sure not to force assembly output. case types::TY_LLVM_IR: case types::TY_LTO_IR: case types::TY_LLVM_BC: case types::TY_LTO_BC: CmdArgs.push_back("-c"); break; // We assume we've got an "integrated" assembler in that gcc will produce an // object file itself. case types::TY_Object: CmdArgs.push_back("-c"); break; case types::TY_PP_Asm: CmdArgs.push_back("-S"); break; case types::TY_Nothing: CmdArgs.push_back("-fsyntax-only"); break; default: D.Diag(diag::err_drv_invalid_gcc_output_type) << getTypeName(JA.getType()); } } void gcc::Linker::RenderExtraToolArgs(const JobAction &JA, ArgStringList &CmdArgs) const { // The types are (hopefully) good enough. } // Hexagon tools start. void hexagon::Assembler::RenderExtraToolArgs(const JobAction &JA, ArgStringList &CmdArgs) const { } void hexagon::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { claimNoWarnArgs(Args); auto &HTC = static_cast(getToolChain()); const Driver &D = HTC.getDriver(); ArgStringList CmdArgs; std::string MArchString = "-march=hexagon"; CmdArgs.push_back(Args.MakeArgString(MArchString)); RenderExtraToolArgs(JA, CmdArgs); std::string AsName = "hexagon-llvm-mc"; std::string MCpuString = "-mcpu=hexagon" + toolchains::HexagonToolChain::GetTargetCPUVersion(Args).str(); CmdArgs.push_back("-filetype=obj"); CmdArgs.push_back(Args.MakeArgString(MCpuString)); if (Output.isFilename()) { CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); } else { assert(Output.isNothing() && "Unexpected output"); CmdArgs.push_back("-fsyntax-only"); } if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) { std::string N = llvm::utostr(G.getValue()); CmdArgs.push_back(Args.MakeArgString(std::string("-gpsize=") + N)); } Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); // Only pass -x if gcc will understand it; otherwise hope gcc // understands the suffix correctly. The main use case this would go // wrong in is for linker inputs if they happened to have an odd // suffix; really the only way to get this to happen is a command // like '-x foobar a.c' which will treat a.c like a linker input. // // FIXME: For the linker case specifically, can we safely convert // inputs into '-Wl,' options? for (const auto &II : Inputs) { // Don't try to pass LLVM or AST inputs to a generic gcc. if (types::isLLVMIR(II.getType())) D.Diag(clang::diag::err_drv_no_linker_llvm_support) << HTC.getTripleString(); else if (II.getType() == types::TY_AST) D.Diag(clang::diag::err_drv_no_ast_support) << HTC.getTripleString(); else if (II.getType() == types::TY_ModuleFile) D.Diag(diag::err_drv_no_module_support) << HTC.getTripleString(); if (II.isFilename()) CmdArgs.push_back(II.getFilename()); else // Don't render as input, we need gcc to do the translations. // FIXME: What is this? II.getInputArg().render(Args, CmdArgs); } auto *Exec = Args.MakeArgString(HTC.GetProgramPath(AsName.c_str())); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void hexagon::Linker::RenderExtraToolArgs(const JobAction &JA, ArgStringList &CmdArgs) const { } static void constructHexagonLinkArgs(Compilation &C, const JobAction &JA, const toolchains::HexagonToolChain &HTC, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, ArgStringList &CmdArgs, const char *LinkingOutput) { const Driver &D = HTC.getDriver(); //---------------------------------------------------------------------------- // //---------------------------------------------------------------------------- bool IsStatic = Args.hasArg(options::OPT_static); bool IsShared = Args.hasArg(options::OPT_shared); bool IsPIE = Args.hasArg(options::OPT_pie); bool IncStdLib = !Args.hasArg(options::OPT_nostdlib); bool IncStartFiles = !Args.hasArg(options::OPT_nostartfiles); bool IncDefLibs = !Args.hasArg(options::OPT_nodefaultlibs); bool UseG0 = false; bool UseShared = IsShared && !IsStatic; //---------------------------------------------------------------------------- // Silence warnings for various options //---------------------------------------------------------------------------- Args.ClaimAllArgs(options::OPT_g_Group); Args.ClaimAllArgs(options::OPT_emit_llvm); Args.ClaimAllArgs(options::OPT_w); // Other warning options are already // handled somewhere else. Args.ClaimAllArgs(options::OPT_static_libgcc); //---------------------------------------------------------------------------- // //---------------------------------------------------------------------------- if (Args.hasArg(options::OPT_s)) CmdArgs.push_back("-s"); if (Args.hasArg(options::OPT_r)) CmdArgs.push_back("-r"); for (const auto &Opt : HTC.ExtraOpts) CmdArgs.push_back(Opt.c_str()); CmdArgs.push_back("-march=hexagon"); std::string CpuVer = toolchains::HexagonToolChain::GetTargetCPUVersion(Args).str(); std::string MCpuString = "-mcpu=hexagon" + CpuVer; CmdArgs.push_back(Args.MakeArgString(MCpuString)); if (IsShared) { CmdArgs.push_back("-shared"); // The following should be the default, but doing as hexagon-gcc does. CmdArgs.push_back("-call_shared"); } if (IsStatic) CmdArgs.push_back("-static"); if (IsPIE && !IsShared) CmdArgs.push_back("-pie"); if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) { std::string N = llvm::utostr(G.getValue()); CmdArgs.push_back(Args.MakeArgString(std::string("-G") + N)); UseG0 = G.getValue() == 0; } //---------------------------------------------------------------------------- // //---------------------------------------------------------------------------- CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); //---------------------------------------------------------------------------- // moslib //---------------------------------------------------------------------------- std::vector OsLibs; bool HasStandalone = false; for (const Arg *A : Args.filtered(options::OPT_moslib_EQ)) { A->claim(); OsLibs.emplace_back(A->getValue()); HasStandalone = HasStandalone || (OsLibs.back() == "standalone"); } if (OsLibs.empty()) { OsLibs.push_back("standalone"); HasStandalone = true; } //---------------------------------------------------------------------------- // Start Files //---------------------------------------------------------------------------- const std::string MCpuSuffix = "/" + CpuVer; const std::string MCpuG0Suffix = MCpuSuffix + "/G0"; const std::string RootDir = HTC.getHexagonTargetDir(D.InstalledDir, D.PrefixDirs) + "/"; const std::string StartSubDir = "hexagon/lib" + (UseG0 ? MCpuG0Suffix : MCpuSuffix); auto Find = [&HTC] (const std::string &RootDir, const std::string &SubDir, const char *Name) -> std::string { std::string RelName = SubDir + Name; std::string P = HTC.GetFilePath(RelName.c_str()); if (llvm::sys::fs::exists(P)) return P; return RootDir + RelName; }; if (IncStdLib && IncStartFiles) { if (!IsShared) { if (HasStandalone) { std::string Crt0SA = Find(RootDir, StartSubDir, "/crt0_standalone.o"); CmdArgs.push_back(Args.MakeArgString(Crt0SA)); } std::string Crt0 = Find(RootDir, StartSubDir, "/crt0.o"); CmdArgs.push_back(Args.MakeArgString(Crt0)); } std::string Init = UseShared ? Find(RootDir, StartSubDir + "/pic", "/initS.o") : Find(RootDir, StartSubDir, "/init.o"); CmdArgs.push_back(Args.MakeArgString(Init)); } //---------------------------------------------------------------------------- // Library Search Paths //---------------------------------------------------------------------------- const ToolChain::path_list &LibPaths = HTC.getFilePaths(); for (const auto &LibPath : LibPaths) CmdArgs.push_back(Args.MakeArgString(StringRef("-L") + LibPath)); //---------------------------------------------------------------------------- // //---------------------------------------------------------------------------- Args.AddAllArgs(CmdArgs, {options::OPT_T_Group, options::OPT_e, options::OPT_s, options::OPT_t, options::OPT_u_Group}); AddLinkerInputs(HTC, Inputs, Args, CmdArgs); //---------------------------------------------------------------------------- // Libraries //---------------------------------------------------------------------------- if (IncStdLib && IncDefLibs) { if (D.CCCIsCXX()) { HTC.AddCXXStdlibLibArgs(Args, CmdArgs); CmdArgs.push_back("-lm"); } CmdArgs.push_back("--start-group"); if (!IsShared) { for (const std::string &Lib : OsLibs) CmdArgs.push_back(Args.MakeArgString("-l" + Lib)); CmdArgs.push_back("-lc"); } CmdArgs.push_back("-lgcc"); CmdArgs.push_back("--end-group"); } //---------------------------------------------------------------------------- // End files //---------------------------------------------------------------------------- if (IncStdLib && IncStartFiles) { std::string Fini = UseShared ? Find(RootDir, StartSubDir + "/pic", "/finiS.o") : Find(RootDir, StartSubDir, "/fini.o"); CmdArgs.push_back(Args.MakeArgString(Fini)); } } void hexagon::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { auto &HTC = static_cast(getToolChain()); ArgStringList CmdArgs; constructHexagonLinkArgs(C, JA, HTC, Output, Inputs, Args, CmdArgs, LinkingOutput); std::string Linker = HTC.GetProgramPath("hexagon-link"); C.addCommand(llvm::make_unique(JA, *this, Args.MakeArgString(Linker), CmdArgs, Inputs)); } // Hexagon tools end. void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { std::string Linker = getToolChain().GetProgramPath(getShortName()); ArgStringList CmdArgs; AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs); CmdArgs.push_back("-shared"); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); C.addCommand(llvm::make_unique(JA, *this, Args.MakeArgString(Linker), CmdArgs, Inputs)); } // AMDGPU tools end. wasm::Linker::Linker(const ToolChain &TC) : GnuTool("wasm::Linker", "lld", TC) {} bool wasm::Linker::isLinkJob() const { return true; } bool wasm::Linker::hasIntegratedCPP() const { return false; } void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const ToolChain &ToolChain = getToolChain(); const Driver &D = ToolChain.getDriver(); const char *Linker = Args.MakeArgString(ToolChain.GetLinkerPath()); ArgStringList CmdArgs; CmdArgs.push_back("-flavor"); CmdArgs.push_back("ld"); // Enable garbage collection of unused input sections by default, since code // size is of particular importance. This is significantly facilitated by // the enabling of -ffunction-sections and -fdata-sections in // Clang::ConstructJob. if (areOptimizationsEnabled(Args)) CmdArgs.push_back("--gc-sections"); if (Args.hasArg(options::OPT_rdynamic)) CmdArgs.push_back("-export-dynamic"); if (Args.hasArg(options::OPT_s)) CmdArgs.push_back("--strip-all"); if (Args.hasArg(options::OPT_shared)) CmdArgs.push_back("-shared"); if (Args.hasArg(options::OPT_static)) CmdArgs.push_back("-Bstatic"); Args.AddAllArgs(CmdArgs, options::OPT_L); ToolChain.AddFilePathLibArgs(Args, CmdArgs); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { if (Args.hasArg(options::OPT_shared)) CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("rcrt1.o"))); else if (Args.hasArg(options::OPT_pie)) CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("Scrt1.o"))); else CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crt1.o"))); CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crti.o"))); } AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { if (D.CCCIsCXX()) ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs); if (Args.hasArg(options::OPT_pthread)) CmdArgs.push_back("-lpthread"); CmdArgs.push_back("-lc"); CmdArgs.push_back("-lcompiler_rt"); } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o"))); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); C.addCommand(llvm::make_unique(JA, *this, Linker, CmdArgs, Inputs)); } const std::string arm::getARMArch(StringRef Arch, const llvm::Triple &Triple) { std::string MArch; if (!Arch.empty()) MArch = Arch; else MArch = Triple.getArchName(); MArch = StringRef(MArch).split("+").first.lower(); // Handle -march=native. if (MArch == "native") { std::string CPU = llvm::sys::getHostCPUName(); if (CPU != "generic") { // Translate the native cpu into the architecture suffix for that CPU. StringRef Suffix = arm::getLLVMArchSuffixForARM(CPU, MArch, Triple); // If there is no valid architecture suffix for this CPU we don't know how // to handle it, so return no architecture. if (Suffix.empty()) MArch = ""; else MArch = std::string("arm") + Suffix.str(); } } return MArch; } /// Get the (LLVM) name of the minimum ARM CPU for the arch we are targeting. StringRef arm::getARMCPUForMArch(StringRef Arch, const llvm::Triple &Triple) { std::string MArch = getARMArch(Arch, Triple); // getARMCPUForArch defaults to the triple if MArch is empty, but empty MArch // here means an -march=native that we can't handle, so instead return no CPU. if (MArch.empty()) return StringRef(); // We need to return an empty string here on invalid MArch values as the // various places that call this function can't cope with a null result. return Triple.getARMCPUForArch(MArch); } /// getARMTargetCPU - Get the (LLVM) name of the ARM cpu we are targeting. std::string arm::getARMTargetCPU(StringRef CPU, StringRef Arch, const llvm::Triple &Triple) { // FIXME: Warn on inconsistent use of -mcpu and -march. // If we have -mcpu=, use that. if (!CPU.empty()) { std::string MCPU = StringRef(CPU).split("+").first.lower(); // Handle -mcpu=native. if (MCPU == "native") return llvm::sys::getHostCPUName(); else return MCPU; } return getARMCPUForMArch(Arch, Triple); } /// getLLVMArchSuffixForARM - Get the LLVM arch name to use for a particular /// CPU (or Arch, if CPU is generic). // FIXME: This is redundant with -mcpu, why does LLVM use this. StringRef arm::getLLVMArchSuffixForARM(StringRef CPU, StringRef Arch, const llvm::Triple &Triple) { unsigned ArchKind; if (CPU == "generic") { std::string ARMArch = tools::arm::getARMArch(Arch, Triple); ArchKind = llvm::ARM::parseArch(ARMArch); if (ArchKind == llvm::ARM::AK_INVALID) // In case of generic Arch, i.e. "arm", // extract arch from default cpu of the Triple ArchKind = llvm::ARM::parseCPUArch(Triple.getARMCPUForArch(ARMArch)); } else { // FIXME: horrible hack to get around the fact that Cortex-A7 is only an // armv7k triple if it's actually been specified via "-arch armv7k". ArchKind = (Arch == "armv7k" || Arch == "thumbv7k") ? (unsigned)llvm::ARM::AK_ARMV7K : llvm::ARM::parseCPUArch(CPU); } if (ArchKind == llvm::ARM::AK_INVALID) return ""; return llvm::ARM::getSubArch(ArchKind); } void arm::appendEBLinkFlags(const ArgList &Args, ArgStringList &CmdArgs, const llvm::Triple &Triple) { if (Args.hasArg(options::OPT_r)) return; // ARMv7 (and later) and ARMv6-M do not support BE-32, so instruct the linker // to generate BE-8 executables. if (getARMSubArchVersionNumber(Triple) >= 7 || isARMMProfile(Triple)) CmdArgs.push_back("--be8"); } mips::NanEncoding mips::getSupportedNanEncoding(StringRef &CPU) { // Strictly speaking, mips32r2 and mips64r2 are NanLegacy-only since Nan2008 // was first introduced in Release 3. However, other compilers have // traditionally allowed it for Release 2 so we should do the same. return (NanEncoding)llvm::StringSwitch(CPU) .Case("mips1", NanLegacy) .Case("mips2", NanLegacy) .Case("mips3", NanLegacy) .Case("mips4", NanLegacy) .Case("mips5", NanLegacy) .Case("mips32", NanLegacy) .Case("mips32r2", NanLegacy | Nan2008) .Case("mips32r3", NanLegacy | Nan2008) .Case("mips32r5", NanLegacy | Nan2008) .Case("mips32r6", Nan2008) .Case("mips64", NanLegacy) .Case("mips64r2", NanLegacy | Nan2008) .Case("mips64r3", NanLegacy | Nan2008) .Case("mips64r5", NanLegacy | Nan2008) .Case("mips64r6", Nan2008) .Default(NanLegacy); } bool mips::hasCompactBranches(StringRef &CPU) { // mips32r6 and mips64r6 have compact branches. return llvm::StringSwitch(CPU) .Case("mips32r6", true) .Case("mips64r6", true) .Default(false); } bool mips::hasMipsAbiArg(const ArgList &Args, const char *Value) { Arg *A = Args.getLastArg(options::OPT_mabi_EQ); return A && (A->getValue() == StringRef(Value)); } bool mips::isUCLibc(const ArgList &Args) { Arg *A = Args.getLastArg(options::OPT_m_libc_Group); return A && A->getOption().matches(options::OPT_muclibc); } bool mips::isNaN2008(const ArgList &Args, const llvm::Triple &Triple) { if (Arg *NaNArg = Args.getLastArg(options::OPT_mnan_EQ)) return llvm::StringSwitch(NaNArg->getValue()) .Case("2008", true) .Case("legacy", false) .Default(false); // NaN2008 is the default for MIPS32r6/MIPS64r6. return llvm::StringSwitch(getCPUName(Args, Triple)) .Cases("mips32r6", "mips64r6", true) .Default(false); return false; } bool mips::isFP64ADefault(const llvm::Triple &Triple, StringRef CPUName) { if (!Triple.isAndroid()) return false; // Android MIPS32R6 defaults to FP64A. return llvm::StringSwitch(CPUName) .Case("mips32r6", true) .Default(false); } bool mips::isFPXXDefault(const llvm::Triple &Triple, StringRef CPUName, StringRef ABIName, mips::FloatABI FloatABI) { if (Triple.getVendor() != llvm::Triple::ImaginationTechnologies && Triple.getVendor() != llvm::Triple::MipsTechnologies && !Triple.isAndroid()) return false; if (ABIName != "32") return false; // FPXX shouldn't be used if either -msoft-float or -mfloat-abi=soft is // present. if (FloatABI == mips::FloatABI::Soft) return false; return llvm::StringSwitch(CPUName) .Cases("mips2", "mips3", "mips4", "mips5", true) .Cases("mips32", "mips32r2", "mips32r3", "mips32r5", true) .Cases("mips64", "mips64r2", "mips64r3", "mips64r5", true) .Default(false); } bool mips::shouldUseFPXX(const ArgList &Args, const llvm::Triple &Triple, StringRef CPUName, StringRef ABIName, mips::FloatABI FloatABI) { bool UseFPXX = isFPXXDefault(Triple, CPUName, ABIName, FloatABI); // FPXX shouldn't be used if -msingle-float is present. if (Arg *A = Args.getLastArg(options::OPT_msingle_float, options::OPT_mdouble_float)) if (A->getOption().matches(options::OPT_msingle_float)) UseFPXX = false; return UseFPXX; } llvm::Triple::ArchType darwin::getArchTypeForMachOArchName(StringRef Str) { // See arch(3) and llvm-gcc's driver-driver.c. We don't implement support for // archs which Darwin doesn't use. // The matching this routine does is fairly pointless, since it is neither the // complete architecture list, nor a reasonable subset. The problem is that // historically the driver driver accepts this and also ties its -march= // handling to the architecture name, so we need to be careful before removing // support for it. // This code must be kept in sync with Clang's Darwin specific argument // translation. return llvm::StringSwitch(Str) .Cases("ppc", "ppc601", "ppc603", "ppc604", "ppc604e", llvm::Triple::ppc) .Cases("ppc750", "ppc7400", "ppc7450", "ppc970", llvm::Triple::ppc) .Case("ppc64", llvm::Triple::ppc64) .Cases("i386", "i486", "i486SX", "i586", "i686", llvm::Triple::x86) .Cases("pentium", "pentpro", "pentIIm3", "pentIIm5", "pentium4", llvm::Triple::x86) .Cases("x86_64", "x86_64h", llvm::Triple::x86_64) // This is derived from the driver driver. .Cases("arm", "armv4t", "armv5", "armv6", "armv6m", llvm::Triple::arm) .Cases("armv7", "armv7em", "armv7k", "armv7m", llvm::Triple::arm) .Cases("armv7s", "xscale", llvm::Triple::arm) .Case("arm64", llvm::Triple::aarch64) .Case("r600", llvm::Triple::r600) .Case("amdgcn", llvm::Triple::amdgcn) .Case("nvptx", llvm::Triple::nvptx) .Case("nvptx64", llvm::Triple::nvptx64) .Case("amdil", llvm::Triple::amdil) .Case("spir", llvm::Triple::spir) .Default(llvm::Triple::UnknownArch); } void darwin::setTripleTypeForMachOArchName(llvm::Triple &T, StringRef Str) { const llvm::Triple::ArchType Arch = getArchTypeForMachOArchName(Str); T.setArch(Arch); if (Str == "x86_64h") T.setArchName(Str); else if (Str == "armv6m" || Str == "armv7m" || Str == "armv7em") { T.setOS(llvm::Triple::UnknownOS); T.setObjectFormat(llvm::Triple::MachO); } } const char *Clang::getBaseInputName(const ArgList &Args, const InputInfo &Input) { return Args.MakeArgString(llvm::sys::path::filename(Input.getBaseInput())); } const char *Clang::getBaseInputStem(const ArgList &Args, const InputInfoList &Inputs) { const char *Str = getBaseInputName(Args, Inputs[0]); if (const char *End = strrchr(Str, '.')) return Args.MakeArgString(std::string(Str, End)); return Str; } const char *Clang::getDependencyFileName(const ArgList &Args, const InputInfoList &Inputs) { // FIXME: Think about this more. std::string Res; if (Arg *OutputOpt = Args.getLastArg(options::OPT_o)) { std::string Str(OutputOpt->getValue()); Res = Str.substr(0, Str.rfind('.')); } else { Res = getBaseInputStem(Args, Inputs); } return Args.MakeArgString(Res + ".d"); } void cloudabi::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const ToolChain &ToolChain = getToolChain(); const Driver &D = ToolChain.getDriver(); ArgStringList CmdArgs; // Silence warning for "clang -g foo.o -o foo" Args.ClaimAllArgs(options::OPT_g_Group); // and "clang -emit-llvm foo.o -o foo" Args.ClaimAllArgs(options::OPT_emit_llvm); // and for "clang -w foo.o -o foo". Other warning options are already // handled somewhere else. Args.ClaimAllArgs(options::OPT_w); if (!D.SysRoot.empty()) CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); // CloudABI only supports static linkage. CmdArgs.push_back("-Bstatic"); CmdArgs.push_back("--no-dynamic-linker"); // Provide PIE linker flags in case PIE is default for the architecture. if (ToolChain.isPIEDefault()) { CmdArgs.push_back("-pie"); CmdArgs.push_back("-zrelro"); } CmdArgs.push_back("--eh-frame-hdr"); CmdArgs.push_back("--gc-sections"); if (Output.isFilename()) { CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); } else { assert(Output.isNothing() && "Invalid output."); } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crt0.o"))); CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtbegin.o"))); } Args.AddAllArgs(CmdArgs, options::OPT_L); ToolChain.AddFilePathLibArgs(Args, CmdArgs); Args.AddAllArgs(CmdArgs, {options::OPT_T_Group, options::OPT_e, options::OPT_s, options::OPT_t, options::OPT_Z_Flag, options::OPT_r}); if (D.isUsingLTO()) AddGoldPlugin(ToolChain, Args, CmdArgs, D.getLTOMode() == LTOK_Thin); AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { if (D.CCCIsCXX()) ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs); CmdArgs.push_back("-lc"); CmdArgs.push_back("-lcompiler_rt"); } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtend.o"))); const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void darwin::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { ArgStringList CmdArgs; assert(Inputs.size() == 1 && "Unexpected number of inputs."); const InputInfo &Input = Inputs[0]; // Determine the original source input. const Action *SourceAction = &JA; while (SourceAction->getKind() != Action::InputClass) { assert(!SourceAction->getInputs().empty() && "unexpected root action!"); SourceAction = SourceAction->getInputs()[0]; } // If -fno-integrated-as is used add -Q to the darwin assember driver to make // sure it runs its system assembler not clang's integrated assembler. // Applicable to darwin11+ and Xcode 4+. darwin<10 lacked integrated-as. // FIXME: at run-time detect assembler capabilities or rely on version // information forwarded by -target-assembler-version. if (Args.hasArg(options::OPT_fno_integrated_as)) { const llvm::Triple &T(getToolChain().getTriple()); if (!(T.isMacOSX() && T.isMacOSXVersionLT(10, 7))) CmdArgs.push_back("-Q"); } // Forward -g, assuming we are dealing with an actual assembly file. if (SourceAction->getType() == types::TY_Asm || SourceAction->getType() == types::TY_PP_Asm) { if (Args.hasArg(options::OPT_gstabs)) CmdArgs.push_back("--gstabs"); else if (Args.hasArg(options::OPT_g_Group)) CmdArgs.push_back("-g"); } // Derived from asm spec. AddMachOArch(Args, CmdArgs); // Use -force_cpusubtype_ALL on x86 by default. if (getToolChain().getArch() == llvm::Triple::x86 || getToolChain().getArch() == llvm::Triple::x86_64 || Args.hasArg(options::OPT_force__cpusubtype__ALL)) CmdArgs.push_back("-force_cpusubtype_ALL"); if (getToolChain().getArch() != llvm::Triple::x86_64 && (((Args.hasArg(options::OPT_mkernel) || Args.hasArg(options::OPT_fapple_kext)) && getMachOToolChain().isKernelStatic()) || Args.hasArg(options::OPT_static))) CmdArgs.push_back("-static"); Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); assert(Output.isFilename() && "Unexpected lipo output."); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); assert(Input.isFilename() && "Invalid input."); CmdArgs.push_back(Input.getFilename()); // asm_final spec is empty. const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void darwin::MachOTool::anchor() {} void darwin::MachOTool::AddMachOArch(const ArgList &Args, ArgStringList &CmdArgs) const { StringRef ArchName = getMachOToolChain().getMachOArchName(Args); // Derived from darwin_arch spec. CmdArgs.push_back("-arch"); CmdArgs.push_back(Args.MakeArgString(ArchName)); // FIXME: Is this needed anymore? if (ArchName == "arm") CmdArgs.push_back("-force_cpusubtype_ALL"); } bool darwin::Linker::NeedsTempPath(const InputInfoList &Inputs) const { // We only need to generate a temp path for LTO if we aren't compiling object // files. When compiling source files, we run 'dsymutil' after linking. We // don't run 'dsymutil' when compiling object files. for (const auto &Input : Inputs) if (Input.getType() != types::TY_Object) return true; return false; } void darwin::Linker::AddLinkArgs(Compilation &C, const ArgList &Args, ArgStringList &CmdArgs, const InputInfoList &Inputs) const { const Driver &D = getToolChain().getDriver(); const toolchains::MachO &MachOTC = getMachOToolChain(); unsigned Version[5] = {0, 0, 0, 0, 0}; if (Arg *A = Args.getLastArg(options::OPT_mlinker_version_EQ)) { if (!Driver::GetReleaseVersion(A->getValue(), Version)) D.Diag(diag::err_drv_invalid_version_number) << A->getAsString(Args); } // Newer linkers support -demangle. Pass it if supported and not disabled by // the user. if (Version[0] >= 100 && !Args.hasArg(options::OPT_Z_Xlinker__no_demangle)) CmdArgs.push_back("-demangle"); if (Args.hasArg(options::OPT_rdynamic) && Version[0] >= 137) CmdArgs.push_back("-export_dynamic"); // If we are using App Extension restrictions, pass a flag to the linker // telling it that the compiled code has been audited. if (Args.hasFlag(options::OPT_fapplication_extension, options::OPT_fno_application_extension, false)) CmdArgs.push_back("-application_extension"); if (D.isUsingLTO()) { // If we are using LTO, then automatically create a temporary file path for // the linker to use, so that it's lifetime will extend past a possible // dsymutil step. if (Version[0] >= 116 && NeedsTempPath(Inputs)) { const char *TmpPath = C.getArgs().MakeArgString( D.GetTemporaryPath("cc", types::getTypeTempSuffix(types::TY_Object))); C.addTempFile(TmpPath); CmdArgs.push_back("-object_path_lto"); CmdArgs.push_back(TmpPath); } // Use -lto_library option to specify the libLTO.dylib path. Try to find // it in clang installed libraries. If not found, the option is not used // and 'ld' will use its default mechanism to search for libLTO.dylib. if (Version[0] >= 133) { // Search for libLTO in /../lib/libLTO.dylib StringRef P = llvm::sys::path::parent_path(D.getInstalledDir()); SmallString<128> LibLTOPath(P); llvm::sys::path::append(LibLTOPath, "lib"); llvm::sys::path::append(LibLTOPath, "libLTO.dylib"); if (llvm::sys::fs::exists(LibLTOPath)) { CmdArgs.push_back("-lto_library"); CmdArgs.push_back(C.getArgs().MakeArgString(LibLTOPath)); } else { D.Diag(diag::warn_drv_lto_libpath); } } } // Derived from the "link" spec. Args.AddAllArgs(CmdArgs, options::OPT_static); if (!Args.hasArg(options::OPT_static)) CmdArgs.push_back("-dynamic"); if (Args.hasArg(options::OPT_fgnu_runtime)) { // FIXME: gcc replaces -lobjc in forward args with -lobjc-gnu // here. How do we wish to handle such things? } if (!Args.hasArg(options::OPT_dynamiclib)) { AddMachOArch(Args, CmdArgs); // FIXME: Why do this only on this path? Args.AddLastArg(CmdArgs, options::OPT_force__cpusubtype__ALL); Args.AddLastArg(CmdArgs, options::OPT_bundle); Args.AddAllArgs(CmdArgs, options::OPT_bundle__loader); Args.AddAllArgs(CmdArgs, options::OPT_client__name); Arg *A; if ((A = Args.getLastArg(options::OPT_compatibility__version)) || (A = Args.getLastArg(options::OPT_current__version)) || (A = Args.getLastArg(options::OPT_install__name))) D.Diag(diag::err_drv_argument_only_allowed_with) << A->getAsString(Args) << "-dynamiclib"; Args.AddLastArg(CmdArgs, options::OPT_force__flat__namespace); Args.AddLastArg(CmdArgs, options::OPT_keep__private__externs); Args.AddLastArg(CmdArgs, options::OPT_private__bundle); } else { CmdArgs.push_back("-dylib"); Arg *A; if ((A = Args.getLastArg(options::OPT_bundle)) || (A = Args.getLastArg(options::OPT_bundle__loader)) || (A = Args.getLastArg(options::OPT_client__name)) || (A = Args.getLastArg(options::OPT_force__flat__namespace)) || (A = Args.getLastArg(options::OPT_keep__private__externs)) || (A = Args.getLastArg(options::OPT_private__bundle))) D.Diag(diag::err_drv_argument_not_allowed_with) << A->getAsString(Args) << "-dynamiclib"; Args.AddAllArgsTranslated(CmdArgs, options::OPT_compatibility__version, "-dylib_compatibility_version"); Args.AddAllArgsTranslated(CmdArgs, options::OPT_current__version, "-dylib_current_version"); AddMachOArch(Args, CmdArgs); Args.AddAllArgsTranslated(CmdArgs, options::OPT_install__name, "-dylib_install_name"); } Args.AddLastArg(CmdArgs, options::OPT_all__load); Args.AddAllArgs(CmdArgs, options::OPT_allowable__client); Args.AddLastArg(CmdArgs, options::OPT_bind__at__load); if (MachOTC.isTargetIOSBased()) Args.AddLastArg(CmdArgs, options::OPT_arch__errors__fatal); Args.AddLastArg(CmdArgs, options::OPT_dead__strip); Args.AddLastArg(CmdArgs, options::OPT_no__dead__strip__inits__and__terms); Args.AddAllArgs(CmdArgs, options::OPT_dylib__file); Args.AddLastArg(CmdArgs, options::OPT_dynamic); Args.AddAllArgs(CmdArgs, options::OPT_exported__symbols__list); Args.AddLastArg(CmdArgs, options::OPT_flat__namespace); Args.AddAllArgs(CmdArgs, options::OPT_force__load); Args.AddAllArgs(CmdArgs, options::OPT_headerpad__max__install__names); Args.AddAllArgs(CmdArgs, options::OPT_image__base); Args.AddAllArgs(CmdArgs, options::OPT_init); // Add the deployment target. MachOTC.addMinVersionArgs(Args, CmdArgs); Args.AddLastArg(CmdArgs, options::OPT_nomultidefs); Args.AddLastArg(CmdArgs, options::OPT_multi__module); Args.AddLastArg(CmdArgs, options::OPT_single__module); Args.AddAllArgs(CmdArgs, options::OPT_multiply__defined); Args.AddAllArgs(CmdArgs, options::OPT_multiply__defined__unused); if (const Arg *A = Args.getLastArg(options::OPT_fpie, options::OPT_fPIE, options::OPT_fno_pie, options::OPT_fno_PIE)) { if (A->getOption().matches(options::OPT_fpie) || A->getOption().matches(options::OPT_fPIE)) CmdArgs.push_back("-pie"); else CmdArgs.push_back("-no_pie"); } // for embed-bitcode, use -bitcode_bundle in linker command if (C.getDriver().embedBitcodeEnabled() || C.getDriver().embedBitcodeMarkerOnly()) { // Check if the toolchain supports bitcode build flow. if (MachOTC.SupportsEmbeddedBitcode()) CmdArgs.push_back("-bitcode_bundle"); else D.Diag(diag::err_drv_bitcode_unsupported_on_toolchain); } Args.AddLastArg(CmdArgs, options::OPT_prebind); Args.AddLastArg(CmdArgs, options::OPT_noprebind); Args.AddLastArg(CmdArgs, options::OPT_nofixprebinding); Args.AddLastArg(CmdArgs, options::OPT_prebind__all__twolevel__modules); Args.AddLastArg(CmdArgs, options::OPT_read__only__relocs); Args.AddAllArgs(CmdArgs, options::OPT_sectcreate); Args.AddAllArgs(CmdArgs, options::OPT_sectorder); Args.AddAllArgs(CmdArgs, options::OPT_seg1addr); Args.AddAllArgs(CmdArgs, options::OPT_segprot); Args.AddAllArgs(CmdArgs, options::OPT_segaddr); Args.AddAllArgs(CmdArgs, options::OPT_segs__read__only__addr); Args.AddAllArgs(CmdArgs, options::OPT_segs__read__write__addr); Args.AddAllArgs(CmdArgs, options::OPT_seg__addr__table); Args.AddAllArgs(CmdArgs, options::OPT_seg__addr__table__filename); Args.AddAllArgs(CmdArgs, options::OPT_sub__library); Args.AddAllArgs(CmdArgs, options::OPT_sub__umbrella); // Give --sysroot= preference, over the Apple specific behavior to also use // --isysroot as the syslibroot. StringRef sysroot = C.getSysRoot(); if (sysroot != "") { CmdArgs.push_back("-syslibroot"); CmdArgs.push_back(C.getArgs().MakeArgString(sysroot)); } else if (const Arg *A = Args.getLastArg(options::OPT_isysroot)) { CmdArgs.push_back("-syslibroot"); CmdArgs.push_back(A->getValue()); } Args.AddLastArg(CmdArgs, options::OPT_twolevel__namespace); Args.AddLastArg(CmdArgs, options::OPT_twolevel__namespace__hints); Args.AddAllArgs(CmdArgs, options::OPT_umbrella); Args.AddAllArgs(CmdArgs, options::OPT_undefined); Args.AddAllArgs(CmdArgs, options::OPT_unexported__symbols__list); Args.AddAllArgs(CmdArgs, options::OPT_weak__reference__mismatches); Args.AddLastArg(CmdArgs, options::OPT_X_Flag); Args.AddAllArgs(CmdArgs, options::OPT_y); Args.AddLastArg(CmdArgs, options::OPT_w); Args.AddAllArgs(CmdArgs, options::OPT_pagezero__size); Args.AddAllArgs(CmdArgs, options::OPT_segs__read__); Args.AddLastArg(CmdArgs, options::OPT_seglinkedit); Args.AddLastArg(CmdArgs, options::OPT_noseglinkedit); Args.AddAllArgs(CmdArgs, options::OPT_sectalign); Args.AddAllArgs(CmdArgs, options::OPT_sectobjectsymbols); Args.AddAllArgs(CmdArgs, options::OPT_segcreate); Args.AddLastArg(CmdArgs, options::OPT_whyload); Args.AddLastArg(CmdArgs, options::OPT_whatsloaded); Args.AddAllArgs(CmdArgs, options::OPT_dylinker__install__name); Args.AddLastArg(CmdArgs, options::OPT_dylinker); Args.AddLastArg(CmdArgs, options::OPT_Mach); } void darwin::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { assert(Output.getType() == types::TY_Image && "Invalid linker output type."); // If the number of arguments surpasses the system limits, we will encode the // input files in a separate file, shortening the command line. To this end, // build a list of input file names that can be passed via a file with the // -filelist linker option. llvm::opt::ArgStringList InputFileList; // The logic here is derived from gcc's behavior; most of which // comes from specs (starting with link_command). Consult gcc for // more information. ArgStringList CmdArgs; /// Hack(tm) to ignore linking errors when we are doing ARC migration. if (Args.hasArg(options::OPT_ccc_arcmt_check, options::OPT_ccc_arcmt_migrate)) { for (const auto &Arg : Args) Arg->claim(); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("touch")); CmdArgs.push_back(Output.getFilename()); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, None)); return; } // I'm not sure why this particular decomposition exists in gcc, but // we follow suite for ease of comparison. AddLinkArgs(C, Args, CmdArgs, Inputs); // It seems that the 'e' option is completely ignored for dynamic executables // (the default), and with static executables, the last one wins, as expected. Args.AddAllArgs(CmdArgs, {options::OPT_d_Flag, options::OPT_s, options::OPT_t, options::OPT_Z_Flag, options::OPT_u_Group, options::OPT_e, options::OPT_r}); // Forward -ObjC when either -ObjC or -ObjC++ is used, to force loading // members of static archive libraries which implement Objective-C classes or // categories. if (Args.hasArg(options::OPT_ObjC) || Args.hasArg(options::OPT_ObjCXX)) CmdArgs.push_back("-ObjC"); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) getMachOToolChain().addStartObjectFileArgs(Args, CmdArgs); // SafeStack requires its own runtime libraries // These libraries should be linked first, to make sure the // __safestack_init constructor executes before everything else if (getToolChain().getSanitizerArgs().needsSafeStackRt()) { getMachOToolChain().AddLinkRuntimeLib(Args, CmdArgs, "libclang_rt.safestack_osx.a", /*AlwaysLink=*/true); } Args.AddAllArgs(CmdArgs, options::OPT_L); AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs); // Build the input file for -filelist (list of linker input files) in case we // need it later for (const auto &II : Inputs) { if (!II.isFilename()) { // This is a linker input argument. // We cannot mix input arguments and file names in a -filelist input, thus // we prematurely stop our list (remaining files shall be passed as // arguments). if (InputFileList.size() > 0) break; continue; } InputFileList.push_back(II.getFilename()); } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) addOpenMPRuntime(CmdArgs, getToolChain(), Args); if (isObjCRuntimeLinked(Args) && !Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { // We use arclite library for both ARC and subscripting support. getMachOToolChain().AddLinkARCArgs(Args, CmdArgs); CmdArgs.push_back("-framework"); CmdArgs.push_back("Foundation"); // Link libobj. CmdArgs.push_back("-lobjc"); } if (LinkingOutput) { CmdArgs.push_back("-arch_multiple"); CmdArgs.push_back("-final_output"); CmdArgs.push_back(LinkingOutput); } if (Args.hasArg(options::OPT_fnested_functions)) CmdArgs.push_back("-allow_stack_execute"); getMachOToolChain().addProfileRTLibs(Args, CmdArgs); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { if (getToolChain().getDriver().CCCIsCXX()) getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs); // link_ssp spec is empty. // Let the tool chain choose which runtime library to link. getMachOToolChain().AddLinkRuntimeLibArgs(Args, CmdArgs); } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { // endfile_spec is empty. } Args.AddAllArgs(CmdArgs, options::OPT_T_Group); Args.AddAllArgs(CmdArgs, options::OPT_F); // -iframework should be forwarded as -F. for (const Arg *A : Args.filtered(options::OPT_iframework)) CmdArgs.push_back(Args.MakeArgString(std::string("-F") + A->getValue())); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { if (Arg *A = Args.getLastArg(options::OPT_fveclib)) { if (A->getValue() == StringRef("Accelerate")) { CmdArgs.push_back("-framework"); CmdArgs.push_back("Accelerate"); } } } const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); std::unique_ptr Cmd = llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs); Cmd->setInputFileList(std::move(InputFileList)); C.addCommand(std::move(Cmd)); } void darwin::Lipo::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { ArgStringList CmdArgs; CmdArgs.push_back("-create"); assert(Output.isFilename() && "Unexpected lipo output."); CmdArgs.push_back("-output"); CmdArgs.push_back(Output.getFilename()); for (const auto &II : Inputs) { assert(II.isFilename() && "Unexpected lipo input."); CmdArgs.push_back(II.getFilename()); } const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("lipo")); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void darwin::Dsymutil::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { ArgStringList CmdArgs; CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); assert(Inputs.size() == 1 && "Unable to handle multiple inputs."); const InputInfo &Input = Inputs[0]; assert(Input.isFilename() && "Unexpected dsymutil input."); CmdArgs.push_back(Input.getFilename()); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("dsymutil")); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void darwin::VerifyDebug::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { ArgStringList CmdArgs; CmdArgs.push_back("--verify"); CmdArgs.push_back("--debug-info"); CmdArgs.push_back("--eh-frame"); CmdArgs.push_back("--quiet"); assert(Inputs.size() == 1 && "Unable to handle multiple inputs."); const InputInfo &Input = Inputs[0]; assert(Input.isFilename() && "Unexpected verify input"); // Grabbing the output of the earlier dsymutil run. CmdArgs.push_back(Input.getFilename()); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("dwarfdump")); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void solaris::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { claimNoWarnArgs(Args); ArgStringList CmdArgs; Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); for (const auto &II : Inputs) CmdArgs.push_back(II.getFilename()); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void solaris::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { ArgStringList CmdArgs; // Demangle C++ names in errors CmdArgs.push_back("-C"); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_shared)) { CmdArgs.push_back("-e"); CmdArgs.push_back("_start"); } if (Args.hasArg(options::OPT_static)) { CmdArgs.push_back("-Bstatic"); CmdArgs.push_back("-dn"); } else { CmdArgs.push_back("-Bdynamic"); if (Args.hasArg(options::OPT_shared)) { CmdArgs.push_back("-shared"); } else { CmdArgs.push_back("--dynamic-linker"); CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("ld.so.1"))); } } if (Output.isFilename()) { CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); } else { assert(Output.isNothing() && "Invalid output."); } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { if (!Args.hasArg(options::OPT_shared)) CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crt1.o"))); CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crti.o"))); CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("values-Xa.o"))); CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o"))); } getToolChain().AddFilePathLibArgs(Args, CmdArgs); Args.AddAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group, options::OPT_e, options::OPT_r}); AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { if (getToolChain().getDriver().CCCIsCXX()) getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs); CmdArgs.push_back("-lgcc_s"); CmdArgs.push_back("-lc"); if (!Args.hasArg(options::OPT_shared)) { CmdArgs.push_back("-lgcc"); CmdArgs.push_back("-lm"); } } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtend.o"))); } CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crtn.o"))); getToolChain().addProfileRTLibs(Args, CmdArgs); const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void openbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { claimNoWarnArgs(Args); ArgStringList CmdArgs; switch (getToolChain().getArch()) { case llvm::Triple::x86: // When building 32-bit code on OpenBSD/amd64, we have to explicitly // instruct as in the base system to assemble 32-bit code. CmdArgs.push_back("--32"); break; case llvm::Triple::ppc: CmdArgs.push_back("-mppc"); CmdArgs.push_back("-many"); break; case llvm::Triple::sparc: case llvm::Triple::sparcel: { CmdArgs.push_back("-32"); std::string CPU = getCPUName(Args, getToolChain().getTriple()); CmdArgs.push_back(getSparcAsmModeForCPU(CPU, getToolChain().getTriple())); AddAssemblerKPIC(getToolChain(), Args, CmdArgs); break; } case llvm::Triple::sparcv9: { CmdArgs.push_back("-64"); std::string CPU = getCPUName(Args, getToolChain().getTriple()); CmdArgs.push_back(getSparcAsmModeForCPU(CPU, getToolChain().getTriple())); AddAssemblerKPIC(getToolChain(), Args, CmdArgs); break; } case llvm::Triple::mips64: case llvm::Triple::mips64el: { StringRef CPUName; StringRef ABIName; mips::getMipsCPUAndABI(Args, getToolChain().getTriple(), CPUName, ABIName); CmdArgs.push_back("-mabi"); CmdArgs.push_back(getGnuCompatibleMipsABIName(ABIName).data()); if (getToolChain().getArch() == llvm::Triple::mips64) CmdArgs.push_back("-EB"); else CmdArgs.push_back("-EL"); AddAssemblerKPIC(getToolChain(), Args, CmdArgs); break; } default: break; } Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); for (const auto &II : Inputs) CmdArgs.push_back(II.getFilename()); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void openbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const Driver &D = getToolChain().getDriver(); ArgStringList CmdArgs; // Silence warning for "clang -g foo.o -o foo" Args.ClaimAllArgs(options::OPT_g_Group); // and "clang -emit-llvm foo.o -o foo" Args.ClaimAllArgs(options::OPT_emit_llvm); // and for "clang -w foo.o -o foo". Other warning options are already // handled somewhere else. Args.ClaimAllArgs(options::OPT_w); if (getToolChain().getArch() == llvm::Triple::mips64) CmdArgs.push_back("-EB"); else if (getToolChain().getArch() == llvm::Triple::mips64el) CmdArgs.push_back("-EL"); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_shared)) { CmdArgs.push_back("-e"); CmdArgs.push_back("__start"); } if (Args.hasArg(options::OPT_static)) { CmdArgs.push_back("-Bstatic"); } else { if (Args.hasArg(options::OPT_rdynamic)) CmdArgs.push_back("-export-dynamic"); CmdArgs.push_back("--eh-frame-hdr"); CmdArgs.push_back("-Bdynamic"); if (Args.hasArg(options::OPT_shared)) { CmdArgs.push_back("-shared"); } else { CmdArgs.push_back("-dynamic-linker"); CmdArgs.push_back("/usr/libexec/ld.so"); } } if (Args.hasArg(options::OPT_nopie)) CmdArgs.push_back("-nopie"); if (Output.isFilename()) { CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); } else { assert(Output.isNothing() && "Invalid output."); } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { if (!Args.hasArg(options::OPT_shared)) { if (Args.hasArg(options::OPT_pg)) CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("gcrt0.o"))); else CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crt0.o"))); CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o"))); } else { CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtbeginS.o"))); } } std::string Triple = getToolChain().getTripleString(); if (Triple.substr(0, 6) == "x86_64") Triple.replace(0, 6, "amd64"); CmdArgs.push_back( Args.MakeArgString("-L/usr/lib/gcc-lib/" + Triple + "/4.2.1")); Args.AddAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group, options::OPT_e, options::OPT_s, options::OPT_t, options::OPT_Z_Flag, options::OPT_r}); AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { if (D.CCCIsCXX()) { getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs); if (Args.hasArg(options::OPT_pg)) CmdArgs.push_back("-lm_p"); else CmdArgs.push_back("-lm"); } // FIXME: For some reason GCC passes -lgcc before adding // the default system libraries. Just mimic this for now. CmdArgs.push_back("-lgcc"); if (Args.hasArg(options::OPT_pthread)) { if (!Args.hasArg(options::OPT_shared) && Args.hasArg(options::OPT_pg)) CmdArgs.push_back("-lpthread_p"); else CmdArgs.push_back("-lpthread"); } if (!Args.hasArg(options::OPT_shared)) { if (Args.hasArg(options::OPT_pg)) CmdArgs.push_back("-lc_p"); else CmdArgs.push_back("-lc"); } CmdArgs.push_back("-lgcc"); } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { if (!Args.hasArg(options::OPT_shared)) CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtend.o"))); else CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtendS.o"))); } const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void bitrig::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { claimNoWarnArgs(Args); ArgStringList CmdArgs; Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); for (const auto &II : Inputs) CmdArgs.push_back(II.getFilename()); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void bitrig::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const Driver &D = getToolChain().getDriver(); ArgStringList CmdArgs; if (!Args.hasArg(options::OPT_nostdlib, options::OPT_shared)) { CmdArgs.push_back("-e"); CmdArgs.push_back("__start"); } if (Args.hasArg(options::OPT_static)) { CmdArgs.push_back("-Bstatic"); } else { if (Args.hasArg(options::OPT_rdynamic)) CmdArgs.push_back("-export-dynamic"); CmdArgs.push_back("--eh-frame-hdr"); CmdArgs.push_back("-Bdynamic"); if (Args.hasArg(options::OPT_shared)) { CmdArgs.push_back("-shared"); } else { CmdArgs.push_back("-dynamic-linker"); CmdArgs.push_back("/usr/libexec/ld.so"); } } if (Output.isFilename()) { CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); } else { assert(Output.isNothing() && "Invalid output."); } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { if (!Args.hasArg(options::OPT_shared)) { if (Args.hasArg(options::OPT_pg)) CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("gcrt0.o"))); else CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crt0.o"))); CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o"))); } else { CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtbeginS.o"))); } } Args.AddAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group, options::OPT_e}); AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { if (D.CCCIsCXX()) { getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs); if (Args.hasArg(options::OPT_pg)) CmdArgs.push_back("-lm_p"); else CmdArgs.push_back("-lm"); } if (Args.hasArg(options::OPT_pthread)) { if (!Args.hasArg(options::OPT_shared) && Args.hasArg(options::OPT_pg)) CmdArgs.push_back("-lpthread_p"); else CmdArgs.push_back("-lpthread"); } if (!Args.hasArg(options::OPT_shared)) { if (Args.hasArg(options::OPT_pg)) CmdArgs.push_back("-lc_p"); else CmdArgs.push_back("-lc"); } StringRef MyArch; switch (getToolChain().getArch()) { case llvm::Triple::arm: MyArch = "arm"; break; case llvm::Triple::x86: MyArch = "i386"; break; case llvm::Triple::x86_64: MyArch = "amd64"; break; default: llvm_unreachable("Unsupported architecture"); } CmdArgs.push_back(Args.MakeArgString("-lclang_rt." + MyArch)); } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { if (!Args.hasArg(options::OPT_shared)) CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtend.o"))); else CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtendS.o"))); } const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void freebsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { claimNoWarnArgs(Args); ArgStringList CmdArgs; // When building 32-bit code on FreeBSD/amd64, we have to explicitly // instruct as in the base system to assemble 32-bit code. switch (getToolChain().getArch()) { default: break; case llvm::Triple::x86: CmdArgs.push_back("--32"); break; case llvm::Triple::ppc: CmdArgs.push_back("-a32"); break; case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::mips64: case llvm::Triple::mips64el: { StringRef CPUName; StringRef ABIName; mips::getMipsCPUAndABI(Args, getToolChain().getTriple(), CPUName, ABIName); CmdArgs.push_back("-march"); CmdArgs.push_back(CPUName.data()); CmdArgs.push_back("-mabi"); CmdArgs.push_back(getGnuCompatibleMipsABIName(ABIName).data()); if (getToolChain().getArch() == llvm::Triple::mips || getToolChain().getArch() == llvm::Triple::mips64) CmdArgs.push_back("-EB"); else CmdArgs.push_back("-EL"); if (Arg *A = Args.getLastArg(options::OPT_G)) { StringRef v = A->getValue(); CmdArgs.push_back(Args.MakeArgString("-G" + v)); A->claim(); } AddAssemblerKPIC(getToolChain(), Args, CmdArgs); break; } case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: { arm::FloatABI ABI = arm::getARMFloatABI(getToolChain(), Args); if (ABI == arm::FloatABI::Hard) CmdArgs.push_back("-mfpu=vfp"); else CmdArgs.push_back("-mfpu=softvfp"); switch (getToolChain().getTriple().getEnvironment()) { case llvm::Triple::GNUEABIHF: case llvm::Triple::GNUEABI: case llvm::Triple::EABI: CmdArgs.push_back("-meabi=5"); break; default: CmdArgs.push_back("-matpcs"); } break; } case llvm::Triple::sparc: case llvm::Triple::sparcel: case llvm::Triple::sparcv9: { std::string CPU = getCPUName(Args, getToolChain().getTriple()); CmdArgs.push_back(getSparcAsmModeForCPU(CPU, getToolChain().getTriple())); AddAssemblerKPIC(getToolChain(), Args, CmdArgs); break; } } Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); for (const auto &II : Inputs) CmdArgs.push_back(II.getFilename()); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void freebsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const toolchains::FreeBSD &ToolChain = static_cast(getToolChain()); const Driver &D = ToolChain.getDriver(); const llvm::Triple::ArchType Arch = ToolChain.getArch(); const bool IsPIE = !Args.hasArg(options::OPT_shared) && (Args.hasArg(options::OPT_pie) || ToolChain.isPIEDefault()); ArgStringList CmdArgs; // Silence warning for "clang -g foo.o -o foo" Args.ClaimAllArgs(options::OPT_g_Group); // and "clang -emit-llvm foo.o -o foo" Args.ClaimAllArgs(options::OPT_emit_llvm); // and for "clang -w foo.o -o foo". Other warning options are already // handled somewhere else. Args.ClaimAllArgs(options::OPT_w); if (!D.SysRoot.empty()) CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); if (IsPIE) CmdArgs.push_back("-pie"); CmdArgs.push_back("--eh-frame-hdr"); if (Args.hasArg(options::OPT_static)) { CmdArgs.push_back("-Bstatic"); } else { if (Args.hasArg(options::OPT_rdynamic)) CmdArgs.push_back("-export-dynamic"); if (Args.hasArg(options::OPT_shared)) { CmdArgs.push_back("-Bshareable"); } else { CmdArgs.push_back("-dynamic-linker"); CmdArgs.push_back("/libexec/ld-elf.so.1"); } if (ToolChain.getTriple().getOSMajorVersion() >= 9) { if (Arch == llvm::Triple::arm || Arch == llvm::Triple::sparc || Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64) { CmdArgs.push_back("--hash-style=both"); } } CmdArgs.push_back("--enable-new-dtags"); } // When building 32-bit code on FreeBSD/amd64, we have to explicitly // instruct ld in the base system to link 32-bit code. if (Arch == llvm::Triple::x86) { CmdArgs.push_back("-m"); CmdArgs.push_back("elf_i386_fbsd"); } if (Arch == llvm::Triple::ppc) { CmdArgs.push_back("-m"); CmdArgs.push_back("elf32ppc_fbsd"); } if (Arg *A = Args.getLastArg(options::OPT_G)) { if (ToolChain.getArch() == llvm::Triple::mips || ToolChain.getArch() == llvm::Triple::mipsel || ToolChain.getArch() == llvm::Triple::mips64 || ToolChain.getArch() == llvm::Triple::mips64el) { StringRef v = A->getValue(); CmdArgs.push_back(Args.MakeArgString("-G" + v)); A->claim(); } } if (Output.isFilename()) { CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); } else { assert(Output.isNothing() && "Invalid output."); } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { const char *crt1 = nullptr; if (!Args.hasArg(options::OPT_shared)) { if (Args.hasArg(options::OPT_pg)) crt1 = "gcrt1.o"; else if (IsPIE) crt1 = "Scrt1.o"; else crt1 = "crt1.o"; } if (crt1) CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crt1))); CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crti.o"))); const char *crtbegin = nullptr; if (Args.hasArg(options::OPT_static)) crtbegin = "crtbeginT.o"; else if (Args.hasArg(options::OPT_shared) || IsPIE) crtbegin = "crtbeginS.o"; else crtbegin = "crtbegin.o"; CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtbegin))); } Args.AddAllArgs(CmdArgs, options::OPT_L); ToolChain.AddFilePathLibArgs(Args, CmdArgs); Args.AddAllArgs(CmdArgs, options::OPT_T_Group); Args.AddAllArgs(CmdArgs, options::OPT_e); Args.AddAllArgs(CmdArgs, options::OPT_s); Args.AddAllArgs(CmdArgs, options::OPT_t); Args.AddAllArgs(CmdArgs, options::OPT_Z_Flag); Args.AddAllArgs(CmdArgs, options::OPT_r); if (D.isUsingLTO()) AddGoldPlugin(ToolChain, Args, CmdArgs, D.getLTOMode() == LTOK_Thin); bool NeedsSanitizerDeps = addSanitizerRuntimes(ToolChain, Args, CmdArgs); AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { addOpenMPRuntime(CmdArgs, ToolChain, Args); if (D.CCCIsCXX()) { ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs); if (Args.hasArg(options::OPT_pg)) CmdArgs.push_back("-lm_p"); else CmdArgs.push_back("-lm"); } if (NeedsSanitizerDeps) linkSanitizerRuntimeDeps(ToolChain, CmdArgs); // FIXME: For some reason GCC passes -lgcc and -lgcc_s before adding // the default system libraries. Just mimic this for now. if (Args.hasArg(options::OPT_pg)) CmdArgs.push_back("-lgcc_p"); else CmdArgs.push_back("-lgcc"); if (Args.hasArg(options::OPT_static)) { CmdArgs.push_back("-lgcc_eh"); } else if (Args.hasArg(options::OPT_pg)) { CmdArgs.push_back("-lgcc_eh_p"); } else { CmdArgs.push_back("--as-needed"); CmdArgs.push_back("-lgcc_s"); CmdArgs.push_back("--no-as-needed"); } if (Args.hasArg(options::OPT_pthread)) { if (Args.hasArg(options::OPT_pg)) CmdArgs.push_back("-lpthread_p"); else CmdArgs.push_back("-lpthread"); } if (Args.hasArg(options::OPT_pg)) { if (Args.hasArg(options::OPT_shared)) CmdArgs.push_back("-lc"); else CmdArgs.push_back("-lc_p"); CmdArgs.push_back("-lgcc_p"); } else { CmdArgs.push_back("-lc"); CmdArgs.push_back("-lgcc"); } if (Args.hasArg(options::OPT_static)) { CmdArgs.push_back("-lgcc_eh"); } else if (Args.hasArg(options::OPT_pg)) { CmdArgs.push_back("-lgcc_eh_p"); } else { CmdArgs.push_back("--as-needed"); CmdArgs.push_back("-lgcc_s"); CmdArgs.push_back("--no-as-needed"); } } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { if (Args.hasArg(options::OPT_shared) || IsPIE) CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtendS.o"))); else CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtend.o"))); CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o"))); } ToolChain.addProfileRTLibs(Args, CmdArgs); const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void netbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { claimNoWarnArgs(Args); ArgStringList CmdArgs; // GNU as needs different flags for creating the correct output format // on architectures with different ABIs or optional feature sets. switch (getToolChain().getArch()) { case llvm::Triple::x86: CmdArgs.push_back("--32"); break; case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: { StringRef MArch, MCPU; getARMArchCPUFromArgs(Args, MArch, MCPU, /*FromAs*/ true); std::string Arch = arm::getARMTargetCPU(MCPU, MArch, getToolChain().getTriple()); CmdArgs.push_back(Args.MakeArgString("-mcpu=" + Arch)); break; } case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::mips64: case llvm::Triple::mips64el: { StringRef CPUName; StringRef ABIName; mips::getMipsCPUAndABI(Args, getToolChain().getTriple(), CPUName, ABIName); CmdArgs.push_back("-march"); CmdArgs.push_back(CPUName.data()); CmdArgs.push_back("-mabi"); CmdArgs.push_back(getGnuCompatibleMipsABIName(ABIName).data()); if (getToolChain().getArch() == llvm::Triple::mips || getToolChain().getArch() == llvm::Triple::mips64) CmdArgs.push_back("-EB"); else CmdArgs.push_back("-EL"); AddAssemblerKPIC(getToolChain(), Args, CmdArgs); break; } case llvm::Triple::sparc: case llvm::Triple::sparcel: { CmdArgs.push_back("-32"); std::string CPU = getCPUName(Args, getToolChain().getTriple()); CmdArgs.push_back(getSparcAsmModeForCPU(CPU, getToolChain().getTriple())); AddAssemblerKPIC(getToolChain(), Args, CmdArgs); break; } case llvm::Triple::sparcv9: { CmdArgs.push_back("-64"); std::string CPU = getCPUName(Args, getToolChain().getTriple()); CmdArgs.push_back(getSparcAsmModeForCPU(CPU, getToolChain().getTriple())); AddAssemblerKPIC(getToolChain(), Args, CmdArgs); break; } default: break; } Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); for (const auto &II : Inputs) CmdArgs.push_back(II.getFilename()); const char *Exec = Args.MakeArgString((getToolChain().GetProgramPath("as"))); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const Driver &D = getToolChain().getDriver(); ArgStringList CmdArgs; if (!D.SysRoot.empty()) CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); CmdArgs.push_back("--eh-frame-hdr"); if (Args.hasArg(options::OPT_static)) { CmdArgs.push_back("-Bstatic"); } else { if (Args.hasArg(options::OPT_rdynamic)) CmdArgs.push_back("-export-dynamic"); if (Args.hasArg(options::OPT_shared)) { CmdArgs.push_back("-Bshareable"); } else { Args.AddAllArgs(CmdArgs, options::OPT_pie); CmdArgs.push_back("-dynamic-linker"); CmdArgs.push_back("/libexec/ld.elf_so"); } } // Many NetBSD architectures support more than one ABI. // Determine the correct emulation for ld. switch (getToolChain().getArch()) { case llvm::Triple::x86: CmdArgs.push_back("-m"); CmdArgs.push_back("elf_i386"); break; case llvm::Triple::arm: case llvm::Triple::thumb: CmdArgs.push_back("-m"); switch (getToolChain().getTriple().getEnvironment()) { case llvm::Triple::EABI: case llvm::Triple::GNUEABI: CmdArgs.push_back("armelf_nbsd_eabi"); break; case llvm::Triple::EABIHF: case llvm::Triple::GNUEABIHF: CmdArgs.push_back("armelf_nbsd_eabihf"); break; default: CmdArgs.push_back("armelf_nbsd"); break; } break; case llvm::Triple::armeb: case llvm::Triple::thumbeb: arm::appendEBLinkFlags( Args, CmdArgs, llvm::Triple(getToolChain().ComputeEffectiveClangTriple(Args))); CmdArgs.push_back("-m"); switch (getToolChain().getTriple().getEnvironment()) { case llvm::Triple::EABI: case llvm::Triple::GNUEABI: CmdArgs.push_back("armelfb_nbsd_eabi"); break; case llvm::Triple::EABIHF: case llvm::Triple::GNUEABIHF: CmdArgs.push_back("armelfb_nbsd_eabihf"); break; default: CmdArgs.push_back("armelfb_nbsd"); break; } break; case llvm::Triple::mips64: case llvm::Triple::mips64el: if (mips::hasMipsAbiArg(Args, "32")) { CmdArgs.push_back("-m"); if (getToolChain().getArch() == llvm::Triple::mips64) CmdArgs.push_back("elf32btsmip"); else CmdArgs.push_back("elf32ltsmip"); } else if (mips::hasMipsAbiArg(Args, "64")) { CmdArgs.push_back("-m"); if (getToolChain().getArch() == llvm::Triple::mips64) CmdArgs.push_back("elf64btsmip"); else CmdArgs.push_back("elf64ltsmip"); } break; case llvm::Triple::ppc: CmdArgs.push_back("-m"); CmdArgs.push_back("elf32ppc_nbsd"); break; case llvm::Triple::ppc64: case llvm::Triple::ppc64le: CmdArgs.push_back("-m"); CmdArgs.push_back("elf64ppc"); break; case llvm::Triple::sparc: CmdArgs.push_back("-m"); CmdArgs.push_back("elf32_sparc"); break; case llvm::Triple::sparcv9: CmdArgs.push_back("-m"); CmdArgs.push_back("elf64_sparc"); break; default: break; } if (Output.isFilename()) { CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); } else { assert(Output.isNothing() && "Invalid output."); } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { if (!Args.hasArg(options::OPT_shared)) { CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crt0.o"))); } CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crti.o"))); if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie)) { CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtbeginS.o"))); } else { CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o"))); } } Args.AddAllArgs(CmdArgs, options::OPT_L); Args.AddAllArgs(CmdArgs, options::OPT_T_Group); Args.AddAllArgs(CmdArgs, options::OPT_e); Args.AddAllArgs(CmdArgs, options::OPT_s); Args.AddAllArgs(CmdArgs, options::OPT_t); Args.AddAllArgs(CmdArgs, options::OPT_Z_Flag); Args.AddAllArgs(CmdArgs, options::OPT_r); AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs); unsigned Major, Minor, Micro; getToolChain().getTriple().getOSVersion(Major, Minor, Micro); bool useLibgcc = true; if (Major >= 7 || Major == 0) { switch (getToolChain().getArch()) { case llvm::Triple::aarch64: case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: case llvm::Triple::ppc: case llvm::Triple::ppc64: case llvm::Triple::ppc64le: case llvm::Triple::sparc: case llvm::Triple::sparcv9: case llvm::Triple::x86: case llvm::Triple::x86_64: useLibgcc = false; break; default: break; } } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { addOpenMPRuntime(CmdArgs, getToolChain(), Args); if (D.CCCIsCXX()) { getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs); CmdArgs.push_back("-lm"); } if (Args.hasArg(options::OPT_pthread)) CmdArgs.push_back("-lpthread"); CmdArgs.push_back("-lc"); if (useLibgcc) { if (Args.hasArg(options::OPT_static)) { // libgcc_eh depends on libc, so resolve as much as possible, // pull in any new requirements from libc and then get the rest // of libgcc. CmdArgs.push_back("-lgcc_eh"); CmdArgs.push_back("-lc"); CmdArgs.push_back("-lgcc"); } else { CmdArgs.push_back("-lgcc"); CmdArgs.push_back("--as-needed"); CmdArgs.push_back("-lgcc_s"); CmdArgs.push_back("--no-as-needed"); } } } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie)) CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtendS.o"))); else CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtend.o"))); CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crtn.o"))); } getToolChain().addProfileRTLibs(Args, CmdArgs); const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void gnutools::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { claimNoWarnArgs(Args); std::string TripleStr = getToolChain().ComputeEffectiveClangTriple(Args); llvm::Triple Triple = llvm::Triple(TripleStr); ArgStringList CmdArgs; llvm::Reloc::Model RelocationModel; unsigned PICLevel; bool IsPIE; std::tie(RelocationModel, PICLevel, IsPIE) = ParsePICArgs(getToolChain(), Triple, Args); switch (getToolChain().getArch()) { default: break; // Add --32/--64 to make sure we get the format we want. // This is incomplete case llvm::Triple::x86: CmdArgs.push_back("--32"); break; case llvm::Triple::x86_64: if (getToolChain().getTriple().getEnvironment() == llvm::Triple::GNUX32) CmdArgs.push_back("--x32"); else CmdArgs.push_back("--64"); break; case llvm::Triple::ppc: CmdArgs.push_back("-a32"); CmdArgs.push_back("-mppc"); CmdArgs.push_back("-many"); break; case llvm::Triple::ppc64: CmdArgs.push_back("-a64"); CmdArgs.push_back("-mppc64"); CmdArgs.push_back("-many"); break; case llvm::Triple::ppc64le: CmdArgs.push_back("-a64"); CmdArgs.push_back("-mppc64"); CmdArgs.push_back("-many"); CmdArgs.push_back("-mlittle-endian"); break; case llvm::Triple::sparc: case llvm::Triple::sparcel: { CmdArgs.push_back("-32"); std::string CPU = getCPUName(Args, getToolChain().getTriple()); CmdArgs.push_back(getSparcAsmModeForCPU(CPU, getToolChain().getTriple())); AddAssemblerKPIC(getToolChain(), Args, CmdArgs); break; } case llvm::Triple::sparcv9: { CmdArgs.push_back("-64"); std::string CPU = getCPUName(Args, getToolChain().getTriple()); CmdArgs.push_back(getSparcAsmModeForCPU(CPU, getToolChain().getTriple())); AddAssemblerKPIC(getToolChain(), Args, CmdArgs); break; } case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: { const llvm::Triple &Triple2 = getToolChain().getTriple(); switch (Triple2.getSubArch()) { case llvm::Triple::ARMSubArch_v7: CmdArgs.push_back("-mfpu=neon"); break; case llvm::Triple::ARMSubArch_v8: CmdArgs.push_back("-mfpu=crypto-neon-fp-armv8"); break; default: break; } switch (arm::getARMFloatABI(getToolChain(), Args)) { case arm::FloatABI::Invalid: llvm_unreachable("must have an ABI!"); case arm::FloatABI::Soft: CmdArgs.push_back(Args.MakeArgString("-mfloat-abi=soft")); break; case arm::FloatABI::SoftFP: CmdArgs.push_back(Args.MakeArgString("-mfloat-abi=softfp")); break; case arm::FloatABI::Hard: CmdArgs.push_back(Args.MakeArgString("-mfloat-abi=hard")); break; } Args.AddLastArg(CmdArgs, options::OPT_march_EQ); // FIXME: remove krait check when GNU tools support krait cpu // for now replace it with -mcpu=cortex-a15 to avoid a lower // march from being picked in the absence of a cpu flag. Arg *A; if ((A = Args.getLastArg(options::OPT_mcpu_EQ)) && StringRef(A->getValue()).lower() == "krait") CmdArgs.push_back("-mcpu=cortex-a15"); else Args.AddLastArg(CmdArgs, options::OPT_mcpu_EQ); Args.AddLastArg(CmdArgs, options::OPT_mfpu_EQ); break; } case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::mips64: case llvm::Triple::mips64el: { StringRef CPUName; StringRef ABIName; mips::getMipsCPUAndABI(Args, getToolChain().getTriple(), CPUName, ABIName); ABIName = getGnuCompatibleMipsABIName(ABIName); CmdArgs.push_back("-march"); CmdArgs.push_back(CPUName.data()); CmdArgs.push_back("-mabi"); CmdArgs.push_back(ABIName.data()); // -mno-shared should be emitted unless -fpic, -fpie, -fPIC, -fPIE, // or -mshared (not implemented) is in effect. if (RelocationModel == llvm::Reloc::Static) CmdArgs.push_back("-mno-shared"); // LLVM doesn't support -mplt yet and acts as if it is always given. // However, -mplt has no effect with the N64 ABI. CmdArgs.push_back(ABIName == "64" ? "-KPIC" : "-call_nonpic"); if (getToolChain().getArch() == llvm::Triple::mips || getToolChain().getArch() == llvm::Triple::mips64) CmdArgs.push_back("-EB"); else CmdArgs.push_back("-EL"); if (Arg *A = Args.getLastArg(options::OPT_mnan_EQ)) { if (StringRef(A->getValue()) == "2008") CmdArgs.push_back(Args.MakeArgString("-mnan=2008")); } // Add the last -mfp32/-mfpxx/-mfp64 or -mfpxx if it is enabled by default. if (Arg *A = Args.getLastArg(options::OPT_mfp32, options::OPT_mfpxx, options::OPT_mfp64)) { A->claim(); A->render(Args, CmdArgs); } else if (mips::shouldUseFPXX( Args, getToolChain().getTriple(), CPUName, ABIName, getMipsFloatABI(getToolChain().getDriver(), Args))) CmdArgs.push_back("-mfpxx"); // Pass on -mmips16 or -mno-mips16. However, the assembler equivalent of // -mno-mips16 is actually -no-mips16. if (Arg *A = Args.getLastArg(options::OPT_mips16, options::OPT_mno_mips16)) { if (A->getOption().matches(options::OPT_mips16)) { A->claim(); A->render(Args, CmdArgs); } else { A->claim(); CmdArgs.push_back("-no-mips16"); } } Args.AddLastArg(CmdArgs, options::OPT_mmicromips, options::OPT_mno_micromips); Args.AddLastArg(CmdArgs, options::OPT_mdsp, options::OPT_mno_dsp); Args.AddLastArg(CmdArgs, options::OPT_mdspr2, options::OPT_mno_dspr2); if (Arg *A = Args.getLastArg(options::OPT_mmsa, options::OPT_mno_msa)) { // Do not use AddLastArg because not all versions of MIPS assembler // support -mmsa / -mno-msa options. if (A->getOption().matches(options::OPT_mmsa)) CmdArgs.push_back(Args.MakeArgString("-mmsa")); } Args.AddLastArg(CmdArgs, options::OPT_mhard_float, options::OPT_msoft_float); Args.AddLastArg(CmdArgs, options::OPT_mdouble_float, options::OPT_msingle_float); Args.AddLastArg(CmdArgs, options::OPT_modd_spreg, options::OPT_mno_odd_spreg); AddAssemblerKPIC(getToolChain(), Args, CmdArgs); break; } case llvm::Triple::systemz: { // Always pass an -march option, since our default of z10 is later // than the GNU assembler's default. StringRef CPUName = getSystemZTargetCPU(Args); CmdArgs.push_back(Args.MakeArgString("-march=" + CPUName)); break; } } Args.AddAllArgs(CmdArgs, options::OPT_I); Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); for (const auto &II : Inputs) CmdArgs.push_back(II.getFilename()); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); // Handle the debug info splitting at object creation time if we're // creating an object. // TODO: Currently only works on linux with newer objcopy. if (Args.hasArg(options::OPT_gsplit_dwarf) && getToolChain().getTriple().isOSLinux()) SplitDebugInfo(getToolChain(), C, *this, JA, Args, Output, SplitDebugName(Args, Inputs[0])); } static void AddLibgcc(const llvm::Triple &Triple, const Driver &D, ArgStringList &CmdArgs, const ArgList &Args) { bool isAndroid = Triple.isAndroid(); bool isCygMing = Triple.isOSCygMing(); bool IsIAMCU = Triple.isOSIAMCU(); bool StaticLibgcc = Args.hasArg(options::OPT_static_libgcc) || Args.hasArg(options::OPT_static); if (!D.CCCIsCXX()) CmdArgs.push_back("-lgcc"); if (StaticLibgcc || isAndroid) { if (D.CCCIsCXX()) CmdArgs.push_back("-lgcc"); } else { if (!D.CCCIsCXX() && !isCygMing) CmdArgs.push_back("--as-needed"); CmdArgs.push_back("-lgcc_s"); if (!D.CCCIsCXX() && !isCygMing) CmdArgs.push_back("--no-as-needed"); } if (StaticLibgcc && !isAndroid && !IsIAMCU) CmdArgs.push_back("-lgcc_eh"); else if (!Args.hasArg(options::OPT_shared) && D.CCCIsCXX()) CmdArgs.push_back("-lgcc"); // According to Android ABI, we have to link with libdl if we are // linking with non-static libgcc. // // NOTE: This fixes a link error on Android MIPS as well. The non-static // libgcc for MIPS relies on _Unwind_Find_FDE and dl_iterate_phdr from libdl. if (isAndroid && !StaticLibgcc) CmdArgs.push_back("-ldl"); } static void AddRunTimeLibs(const ToolChain &TC, const Driver &D, ArgStringList &CmdArgs, const ArgList &Args) { // Make use of compiler-rt if --rtlib option is used ToolChain::RuntimeLibType RLT = TC.GetRuntimeLibType(Args); switch (RLT) { case ToolChain::RLT_CompilerRT: switch (TC.getTriple().getOS()) { default: llvm_unreachable("unsupported OS"); case llvm::Triple::Win32: case llvm::Triple::Linux: addClangRT(TC, Args, CmdArgs); break; } break; case ToolChain::RLT_Libgcc: // Make sure libgcc is not used under MSVC environment by default if (TC.getTriple().isKnownWindowsMSVCEnvironment()) { // Issue error diagnostic if libgcc is explicitly specified // through command line as --rtlib option argument. if (Args.hasArg(options::OPT_rtlib_EQ)) { TC.getDriver().Diag(diag::err_drv_unsupported_rtlib_for_platform) << Args.getLastArg(options::OPT_rtlib_EQ)->getValue() << "MSVC"; } } else AddLibgcc(TC.getTriple(), D, CmdArgs, Args); break; } } static const char *getLDMOption(const llvm::Triple &T, const ArgList &Args) { switch (T.getArch()) { case llvm::Triple::x86: if (T.isOSIAMCU()) return "elf_iamcu"; return "elf_i386"; case llvm::Triple::aarch64: return "aarch64linux"; case llvm::Triple::aarch64_be: return "aarch64_be_linux"; case llvm::Triple::arm: case llvm::Triple::thumb: return "armelf_linux_eabi"; case llvm::Triple::armeb: case llvm::Triple::thumbeb: return "armelfb_linux_eabi"; case llvm::Triple::ppc: return "elf32ppclinux"; case llvm::Triple::ppc64: return "elf64ppc"; case llvm::Triple::ppc64le: return "elf64lppc"; case llvm::Triple::sparc: case llvm::Triple::sparcel: return "elf32_sparc"; case llvm::Triple::sparcv9: return "elf64_sparc"; case llvm::Triple::mips: return "elf32btsmip"; case llvm::Triple::mipsel: return "elf32ltsmip"; case llvm::Triple::mips64: if (mips::hasMipsAbiArg(Args, "n32")) return "elf32btsmipn32"; return "elf64btsmip"; case llvm::Triple::mips64el: if (mips::hasMipsAbiArg(Args, "n32")) return "elf32ltsmipn32"; return "elf64ltsmip"; case llvm::Triple::systemz: return "elf64_s390"; case llvm::Triple::x86_64: if (T.getEnvironment() == llvm::Triple::GNUX32) return "elf32_x86_64"; return "elf_x86_64"; default: llvm_unreachable("Unexpected arch"); } } void gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const toolchains::Linux &ToolChain = static_cast(getToolChain()); const Driver &D = ToolChain.getDriver(); std::string TripleStr = getToolChain().ComputeEffectiveClangTriple(Args); llvm::Triple Triple = llvm::Triple(TripleStr); const llvm::Triple::ArchType Arch = ToolChain.getArch(); const bool isAndroid = ToolChain.getTriple().isAndroid(); const bool IsIAMCU = ToolChain.getTriple().isOSIAMCU(); const bool IsPIE = !Args.hasArg(options::OPT_shared) && !Args.hasArg(options::OPT_static) && (Args.hasArg(options::OPT_pie) || ToolChain.isPIEDefault()); const bool HasCRTBeginEndFiles = ToolChain.getTriple().hasEnvironment() || (ToolChain.getTriple().getVendor() != llvm::Triple::MipsTechnologies); ArgStringList CmdArgs; // Silence warning for "clang -g foo.o -o foo" Args.ClaimAllArgs(options::OPT_g_Group); // and "clang -emit-llvm foo.o -o foo" Args.ClaimAllArgs(options::OPT_emit_llvm); // and for "clang -w foo.o -o foo". Other warning options are already // handled somewhere else. Args.ClaimAllArgs(options::OPT_w); const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); if (llvm::sys::path::filename(Exec) == "lld") { CmdArgs.push_back("-flavor"); CmdArgs.push_back("old-gnu"); CmdArgs.push_back("-target"); CmdArgs.push_back(Args.MakeArgString(getToolChain().getTripleString())); } if (!D.SysRoot.empty()) CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); if (IsPIE) CmdArgs.push_back("-pie"); if (Args.hasArg(options::OPT_rdynamic)) CmdArgs.push_back("-export-dynamic"); if (Args.hasArg(options::OPT_s)) CmdArgs.push_back("-s"); if (Arch == llvm::Triple::armeb || Arch == llvm::Triple::thumbeb) arm::appendEBLinkFlags(Args, CmdArgs, Triple); for (const auto &Opt : ToolChain.ExtraOpts) CmdArgs.push_back(Opt.c_str()); if (!Args.hasArg(options::OPT_static)) { CmdArgs.push_back("--eh-frame-hdr"); } CmdArgs.push_back("-m"); CmdArgs.push_back(getLDMOption(ToolChain.getTriple(), Args)); if (Args.hasArg(options::OPT_static)) { if (Arch == llvm::Triple::arm || Arch == llvm::Triple::armeb || Arch == llvm::Triple::thumb || Arch == llvm::Triple::thumbeb) CmdArgs.push_back("-Bstatic"); else CmdArgs.push_back("-static"); } else if (Args.hasArg(options::OPT_shared)) { CmdArgs.push_back("-shared"); } if (!Args.hasArg(options::OPT_static)) { if (Args.hasArg(options::OPT_rdynamic)) CmdArgs.push_back("-export-dynamic"); if (!Args.hasArg(options::OPT_shared)) { const std::string Loader = D.DyldPrefix + ToolChain.getDynamicLinker(Args); CmdArgs.push_back("-dynamic-linker"); CmdArgs.push_back(Args.MakeArgString(Loader)); } } CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { if (!isAndroid && !IsIAMCU) { const char *crt1 = nullptr; if (!Args.hasArg(options::OPT_shared)) { if (Args.hasArg(options::OPT_pg)) crt1 = "gcrt1.o"; else if (IsPIE) crt1 = "Scrt1.o"; else crt1 = "crt1.o"; } if (crt1) CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crt1))); CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crti.o"))); } if (IsIAMCU) CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crt0.o"))); else { const char *crtbegin; if (Args.hasArg(options::OPT_static)) crtbegin = isAndroid ? "crtbegin_static.o" : "crtbeginT.o"; else if (Args.hasArg(options::OPT_shared)) crtbegin = isAndroid ? "crtbegin_so.o" : "crtbeginS.o"; else if (IsPIE) crtbegin = isAndroid ? "crtbegin_dynamic.o" : "crtbeginS.o"; else crtbegin = isAndroid ? "crtbegin_dynamic.o" : "crtbegin.o"; if (HasCRTBeginEndFiles) CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtbegin))); } // Add crtfastmath.o if available and fast math is enabled. ToolChain.AddFastMathRuntimeIfAvailable(Args, CmdArgs); } Args.AddAllArgs(CmdArgs, options::OPT_L); Args.AddAllArgs(CmdArgs, options::OPT_u); ToolChain.AddFilePathLibArgs(Args, CmdArgs); if (D.isUsingLTO()) AddGoldPlugin(ToolChain, Args, CmdArgs, D.getLTOMode() == LTOK_Thin); if (Args.hasArg(options::OPT_Z_Xlinker__no_demangle)) CmdArgs.push_back("--no-demangle"); bool NeedsSanitizerDeps = addSanitizerRuntimes(ToolChain, Args, CmdArgs); bool NeedsXRayDeps = addXRayRuntime(ToolChain, Args, CmdArgs); AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs); // The profile runtime also needs access to system libraries. getToolChain().addProfileRTLibs(Args, CmdArgs); if (D.CCCIsCXX() && !Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { bool OnlyLibstdcxxStatic = Args.hasArg(options::OPT_static_libstdcxx) && !Args.hasArg(options::OPT_static); if (OnlyLibstdcxxStatic) CmdArgs.push_back("-Bstatic"); ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs); if (OnlyLibstdcxxStatic) CmdArgs.push_back("-Bdynamic"); CmdArgs.push_back("-lm"); } // Silence warnings when linking C code with a C++ '-stdlib' argument. Args.ClaimAllArgs(options::OPT_stdlib_EQ); if (!Args.hasArg(options::OPT_nostdlib)) { if (!Args.hasArg(options::OPT_nodefaultlibs)) { if (Args.hasArg(options::OPT_static)) CmdArgs.push_back("--start-group"); if (NeedsSanitizerDeps) linkSanitizerRuntimeDeps(ToolChain, CmdArgs); if (NeedsXRayDeps) linkXRayRuntimeDeps(ToolChain, Args, CmdArgs); bool WantPthread = Args.hasArg(options::OPT_pthread) || Args.hasArg(options::OPT_pthreads); if (Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, options::OPT_fno_openmp, false)) { // OpenMP runtimes implies pthreads when using the GNU toolchain. // FIXME: Does this really make sense for all GNU toolchains? WantPthread = true; // Also link the particular OpenMP runtimes. switch (getOpenMPRuntime(ToolChain, Args)) { case OMPRT_OMP: CmdArgs.push_back("-lomp"); break; case OMPRT_GOMP: CmdArgs.push_back("-lgomp"); // FIXME: Exclude this for platforms with libgomp that don't require // librt. Most modern Linux platforms require it, but some may not. CmdArgs.push_back("-lrt"); break; case OMPRT_IOMP5: CmdArgs.push_back("-liomp5"); break; case OMPRT_Unknown: // Already diagnosed. break; } } AddRunTimeLibs(ToolChain, D, CmdArgs, Args); if (WantPthread && !isAndroid) CmdArgs.push_back("-lpthread"); if (Args.hasArg(options::OPT_fsplit_stack)) CmdArgs.push_back("--wrap=pthread_create"); CmdArgs.push_back("-lc"); // Add IAMCU specific libs, if needed. if (IsIAMCU) CmdArgs.push_back("-lgloss"); if (Args.hasArg(options::OPT_static)) CmdArgs.push_back("--end-group"); else AddRunTimeLibs(ToolChain, D, CmdArgs, Args); // Add IAMCU specific libs (outside the group), if needed. if (IsIAMCU) { CmdArgs.push_back("--as-needed"); CmdArgs.push_back("-lsoftfp"); CmdArgs.push_back("--no-as-needed"); } } if (!Args.hasArg(options::OPT_nostartfiles) && !IsIAMCU) { const char *crtend; if (Args.hasArg(options::OPT_shared)) crtend = isAndroid ? "crtend_so.o" : "crtendS.o"; else if (IsPIE) crtend = isAndroid ? "crtend_android.o" : "crtendS.o"; else crtend = isAndroid ? "crtend_android.o" : "crtend.o"; if (HasCRTBeginEndFiles) CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtend))); if (!isAndroid) CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o"))); } } C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } // NaCl ARM assembly (inline or standalone) can be written with a set of macros // for the various SFI requirements like register masking. The assembly tool // inserts the file containing the macros as an input into all the assembly // jobs. void nacltools::AssemblerARM::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const toolchains::NaClToolChain &ToolChain = static_cast(getToolChain()); InputInfo NaClMacros(types::TY_PP_Asm, ToolChain.GetNaClArmMacrosPath(), "nacl-arm-macros.s"); InputInfoList NewInputs; NewInputs.push_back(NaClMacros); NewInputs.append(Inputs.begin(), Inputs.end()); gnutools::Assembler::ConstructJob(C, JA, Output, NewInputs, Args, LinkingOutput); } // This is quite similar to gnutools::Linker::ConstructJob with changes that // we use static by default, do not yet support sanitizers or LTO, and a few // others. Eventually we can support more of that and hopefully migrate back // to gnutools::Linker. void nacltools::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const toolchains::NaClToolChain &ToolChain = static_cast(getToolChain()); const Driver &D = ToolChain.getDriver(); const llvm::Triple::ArchType Arch = ToolChain.getArch(); const bool IsStatic = !Args.hasArg(options::OPT_dynamic) && !Args.hasArg(options::OPT_shared); ArgStringList CmdArgs; // Silence warning for "clang -g foo.o -o foo" Args.ClaimAllArgs(options::OPT_g_Group); // and "clang -emit-llvm foo.o -o foo" Args.ClaimAllArgs(options::OPT_emit_llvm); // and for "clang -w foo.o -o foo". Other warning options are already // handled somewhere else. Args.ClaimAllArgs(options::OPT_w); if (!D.SysRoot.empty()) CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); if (Args.hasArg(options::OPT_rdynamic)) CmdArgs.push_back("-export-dynamic"); if (Args.hasArg(options::OPT_s)) CmdArgs.push_back("-s"); // NaClToolChain doesn't have ExtraOpts like Linux; the only relevant flag // from there is --build-id, which we do want. CmdArgs.push_back("--build-id"); if (!IsStatic) CmdArgs.push_back("--eh-frame-hdr"); CmdArgs.push_back("-m"); if (Arch == llvm::Triple::x86) CmdArgs.push_back("elf_i386_nacl"); else if (Arch == llvm::Triple::arm) CmdArgs.push_back("armelf_nacl"); else if (Arch == llvm::Triple::x86_64) CmdArgs.push_back("elf_x86_64_nacl"); else if (Arch == llvm::Triple::mipsel) CmdArgs.push_back("mipselelf_nacl"); else D.Diag(diag::err_target_unsupported_arch) << ToolChain.getArchName() << "Native Client"; if (IsStatic) CmdArgs.push_back("-static"); else if (Args.hasArg(options::OPT_shared)) CmdArgs.push_back("-shared"); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { if (!Args.hasArg(options::OPT_shared)) CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crt1.o"))); CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crti.o"))); const char *crtbegin; if (IsStatic) crtbegin = "crtbeginT.o"; else if (Args.hasArg(options::OPT_shared)) crtbegin = "crtbeginS.o"; else crtbegin = "crtbegin.o"; CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtbegin))); } Args.AddAllArgs(CmdArgs, options::OPT_L); Args.AddAllArgs(CmdArgs, options::OPT_u); ToolChain.AddFilePathLibArgs(Args, CmdArgs); if (Args.hasArg(options::OPT_Z_Xlinker__no_demangle)) CmdArgs.push_back("--no-demangle"); AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs); if (D.CCCIsCXX() && !Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { bool OnlyLibstdcxxStatic = Args.hasArg(options::OPT_static_libstdcxx) && !IsStatic; if (OnlyLibstdcxxStatic) CmdArgs.push_back("-Bstatic"); ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs); if (OnlyLibstdcxxStatic) CmdArgs.push_back("-Bdynamic"); CmdArgs.push_back("-lm"); } if (!Args.hasArg(options::OPT_nostdlib)) { if (!Args.hasArg(options::OPT_nodefaultlibs)) { // Always use groups, since it has no effect on dynamic libraries. CmdArgs.push_back("--start-group"); CmdArgs.push_back("-lc"); // NaCl's libc++ currently requires libpthread, so just always include it // in the group for C++. if (Args.hasArg(options::OPT_pthread) || Args.hasArg(options::OPT_pthreads) || D.CCCIsCXX()) { // Gold, used by Mips, handles nested groups differently than ld, and // without '-lnacl' it prefers symbols from libpthread.a over libnacl.a, // which is not a desired behaviour here. // See https://sourceware.org/ml/binutils/2015-03/msg00034.html if (getToolChain().getArch() == llvm::Triple::mipsel) CmdArgs.push_back("-lnacl"); CmdArgs.push_back("-lpthread"); } CmdArgs.push_back("-lgcc"); CmdArgs.push_back("--as-needed"); if (IsStatic) CmdArgs.push_back("-lgcc_eh"); else CmdArgs.push_back("-lgcc_s"); CmdArgs.push_back("--no-as-needed"); // Mips needs to create and use pnacl_legacy library that contains // definitions from bitcode/pnaclmm.c and definitions for // __nacl_tp_tls_offset() and __nacl_tp_tdb_offset(). if (getToolChain().getArch() == llvm::Triple::mipsel) CmdArgs.push_back("-lpnacl_legacy"); CmdArgs.push_back("--end-group"); } if (!Args.hasArg(options::OPT_nostartfiles)) { const char *crtend; if (Args.hasArg(options::OPT_shared)) crtend = "crtendS.o"; else crtend = "crtend.o"; CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtend))); CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o"))); } } const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void minix::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { claimNoWarnArgs(Args); ArgStringList CmdArgs; Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); for (const auto &II : Inputs) CmdArgs.push_back(II.getFilename()); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void minix::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const Driver &D = getToolChain().getDriver(); ArgStringList CmdArgs; if (Output.isFilename()) { CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); } else { assert(Output.isNothing() && "Invalid output."); } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crt1.o"))); CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crti.o"))); CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o"))); CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crtn.o"))); } Args.AddAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group, options::OPT_e}); AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs); getToolChain().addProfileRTLibs(Args, CmdArgs); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { if (D.CCCIsCXX()) { getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs); CmdArgs.push_back("-lm"); } } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { if (Args.hasArg(options::OPT_pthread)) CmdArgs.push_back("-lpthread"); CmdArgs.push_back("-lc"); CmdArgs.push_back("-lCompilerRT-Generic"); CmdArgs.push_back("-L/usr/pkg/compiler-rt/lib"); CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtend.o"))); } const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } /// DragonFly Tools // For now, DragonFly Assemble does just about the same as for // FreeBSD, but this may change soon. void dragonfly::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { claimNoWarnArgs(Args); ArgStringList CmdArgs; // When building 32-bit code on DragonFly/pc64, we have to explicitly // instruct as in the base system to assemble 32-bit code. if (getToolChain().getArch() == llvm::Triple::x86) CmdArgs.push_back("--32"); Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); for (const auto &II : Inputs) CmdArgs.push_back(II.getFilename()); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void dragonfly::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const Driver &D = getToolChain().getDriver(); ArgStringList CmdArgs; if (!D.SysRoot.empty()) CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); CmdArgs.push_back("--eh-frame-hdr"); if (Args.hasArg(options::OPT_static)) { CmdArgs.push_back("-Bstatic"); } else { if (Args.hasArg(options::OPT_rdynamic)) CmdArgs.push_back("-export-dynamic"); if (Args.hasArg(options::OPT_shared)) CmdArgs.push_back("-Bshareable"); else { CmdArgs.push_back("-dynamic-linker"); CmdArgs.push_back("/usr/libexec/ld-elf.so.2"); } CmdArgs.push_back("--hash-style=gnu"); CmdArgs.push_back("--enable-new-dtags"); } // When building 32-bit code on DragonFly/pc64, we have to explicitly // instruct ld in the base system to link 32-bit code. if (getToolChain().getArch() == llvm::Triple::x86) { CmdArgs.push_back("-m"); CmdArgs.push_back("elf_i386"); } if (Output.isFilename()) { CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); } else { assert(Output.isNothing() && "Invalid output."); } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { if (!Args.hasArg(options::OPT_shared)) { if (Args.hasArg(options::OPT_pg)) CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("gcrt1.o"))); else { if (Args.hasArg(options::OPT_pie)) CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("Scrt1.o"))); else CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crt1.o"))); } } CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crti.o"))); if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie)) CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtbeginS.o"))); else CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o"))); } Args.AddAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group, options::OPT_e}); AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { CmdArgs.push_back("-L/usr/lib/gcc50"); if (!Args.hasArg(options::OPT_static)) { CmdArgs.push_back("-rpath"); CmdArgs.push_back("/usr/lib/gcc50"); } if (D.CCCIsCXX()) { getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs); CmdArgs.push_back("-lm"); } if (Args.hasArg(options::OPT_pthread)) CmdArgs.push_back("-lpthread"); if (!Args.hasArg(options::OPT_nolibc)) { CmdArgs.push_back("-lc"); } if (Args.hasArg(options::OPT_static) || Args.hasArg(options::OPT_static_libgcc)) { CmdArgs.push_back("-lgcc"); CmdArgs.push_back("-lgcc_eh"); } else { if (Args.hasArg(options::OPT_shared_libgcc)) { CmdArgs.push_back("-lgcc_pic"); if (!Args.hasArg(options::OPT_shared)) CmdArgs.push_back("-lgcc"); } else { CmdArgs.push_back("-lgcc"); CmdArgs.push_back("--as-needed"); CmdArgs.push_back("-lgcc_pic"); CmdArgs.push_back("--no-as-needed"); } } } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie)) CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtendS.o"))); else CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtend.o"))); CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crtn.o"))); } getToolChain().addProfileRTLibs(Args, CmdArgs); const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } // Try to find Exe from a Visual Studio distribution. This first tries to find // an installed copy of Visual Studio and, failing that, looks in the PATH, // making sure that whatever executable that's found is not a same-named exe // from clang itself to prevent clang from falling back to itself. static std::string FindVisualStudioExecutable(const ToolChain &TC, const char *Exe, const char *ClangProgramPath) { const auto &MSVC = static_cast(TC); std::string visualStudioBinDir; if (MSVC.getVisualStudioBinariesFolder(ClangProgramPath, visualStudioBinDir)) { SmallString<128> FilePath(visualStudioBinDir); llvm::sys::path::append(FilePath, Exe); if (llvm::sys::fs::can_execute(FilePath.c_str())) return FilePath.str(); } return Exe; } void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { ArgStringList CmdArgs; const ToolChain &TC = getToolChain(); assert((Output.isFilename() || Output.isNothing()) && "invalid output"); if (Output.isFilename()) CmdArgs.push_back( Args.MakeArgString(std::string("-out:") + Output.getFilename())); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles) && !C.getDriver().IsCLMode()) CmdArgs.push_back("-defaultlib:libcmt"); if (!llvm::sys::Process::GetEnv("LIB")) { // If the VC environment hasn't been configured (perhaps because the user // did not run vcvarsall), try to build a consistent link environment. If // the environment variable is set however, assume the user knows what // they're doing. std::string VisualStudioDir; const auto &MSVC = static_cast(TC); if (MSVC.getVisualStudioInstallDir(VisualStudioDir)) { SmallString<128> LibDir(VisualStudioDir); llvm::sys::path::append(LibDir, "VC", "lib"); switch (MSVC.getArch()) { case llvm::Triple::x86: // x86 just puts the libraries directly in lib break; case llvm::Triple::x86_64: llvm::sys::path::append(LibDir, "amd64"); break; case llvm::Triple::arm: llvm::sys::path::append(LibDir, "arm"); break; default: break; } CmdArgs.push_back( Args.MakeArgString(std::string("-libpath:") + LibDir.c_str())); if (MSVC.useUniversalCRT(VisualStudioDir)) { std::string UniversalCRTLibPath; if (MSVC.getUniversalCRTLibraryPath(UniversalCRTLibPath)) CmdArgs.push_back(Args.MakeArgString(std::string("-libpath:") + UniversalCRTLibPath.c_str())); } } std::string WindowsSdkLibPath; if (MSVC.getWindowsSDKLibraryPath(WindowsSdkLibPath)) CmdArgs.push_back(Args.MakeArgString(std::string("-libpath:") + WindowsSdkLibPath.c_str())); } if (!C.getDriver().IsCLMode() && Args.hasArg(options::OPT_L)) for (const auto &LibPath : Args.getAllArgValues(options::OPT_L)) CmdArgs.push_back(Args.MakeArgString("-libpath:" + LibPath)); CmdArgs.push_back("-nologo"); if (Args.hasArg(options::OPT_g_Group, options::OPT__SLASH_Z7, options::OPT__SLASH_Zd)) CmdArgs.push_back("-debug"); bool DLL = Args.hasArg(options::OPT__SLASH_LD, options::OPT__SLASH_LDd, options::OPT_shared); if (DLL) { CmdArgs.push_back(Args.MakeArgString("-dll")); SmallString<128> ImplibName(Output.getFilename()); llvm::sys::path::replace_extension(ImplibName, "lib"); CmdArgs.push_back(Args.MakeArgString(std::string("-implib:") + ImplibName)); } if (TC.getSanitizerArgs().needsAsanRt()) { CmdArgs.push_back(Args.MakeArgString("-debug")); CmdArgs.push_back(Args.MakeArgString("-incremental:no")); if (Args.hasArg(options::OPT__SLASH_MD, options::OPT__SLASH_MDd)) { for (const auto &Lib : {"asan_dynamic", "asan_dynamic_runtime_thunk"}) CmdArgs.push_back(TC.getCompilerRTArgString(Args, Lib)); // Make sure the dynamic runtime thunk is not optimized out at link time // to ensure proper SEH handling. CmdArgs.push_back(Args.MakeArgString("-include:___asan_seh_interceptor")); } else if (DLL) { CmdArgs.push_back(TC.getCompilerRTArgString(Args, "asan_dll_thunk")); } else { for (const auto &Lib : {"asan", "asan_cxx"}) CmdArgs.push_back(TC.getCompilerRTArgString(Args, Lib)); } } Args.AddAllArgValues(CmdArgs, options::OPT__SLASH_link); if (Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, options::OPT_fno_openmp, false)) { CmdArgs.push_back("-nodefaultlib:vcomp.lib"); CmdArgs.push_back("-nodefaultlib:vcompd.lib"); CmdArgs.push_back(Args.MakeArgString(std::string("-libpath:") + TC.getDriver().Dir + "/../lib")); switch (getOpenMPRuntime(getToolChain(), Args)) { case OMPRT_OMP: CmdArgs.push_back("-defaultlib:libomp.lib"); break; case OMPRT_IOMP5: CmdArgs.push_back("-defaultlib:libiomp5md.lib"); break; case OMPRT_GOMP: break; case OMPRT_Unknown: // Already diagnosed. break; } } // Add compiler-rt lib in case if it was explicitly // specified as an argument for --rtlib option. if (!Args.hasArg(options::OPT_nostdlib)) { AddRunTimeLibs(TC, TC.getDriver(), CmdArgs, Args); } // Add filenames, libraries, and other linker inputs. for (const auto &Input : Inputs) { if (Input.isFilename()) { CmdArgs.push_back(Input.getFilename()); continue; } const Arg &A = Input.getInputArg(); // Render -l options differently for the MSVC linker. if (A.getOption().matches(options::OPT_l)) { StringRef Lib = A.getValue(); const char *LinkLibArg; if (Lib.endswith(".lib")) LinkLibArg = Args.MakeArgString(Lib); else LinkLibArg = Args.MakeArgString(Lib + ".lib"); CmdArgs.push_back(LinkLibArg); continue; } // Otherwise, this is some other kind of linker input option like -Wl, -z, // or -L. Render it, even if MSVC doesn't understand it. A.renderAsInput(Args, CmdArgs); } TC.addProfileRTLibs(Args, CmdArgs); // We need to special case some linker paths. In the case of lld, we need to // translate 'lld' into 'lld-link', and in the case of the regular msvc // linker, we need to use a special search algorithm. llvm::SmallString<128> linkPath; StringRef Linker = Args.getLastArgValue(options::OPT_fuse_ld_EQ, "link"); if (Linker.equals_lower("lld")) Linker = "lld-link"; if (Linker.equals_lower("link")) { // If we're using the MSVC linker, it's not sufficient to just use link // from the program PATH, because other environments like GnuWin32 install // their own link.exe which may come first. linkPath = FindVisualStudioExecutable(TC, "link.exe", C.getDriver().getClangProgramPath()); } else { linkPath = Linker; llvm::sys::path::replace_extension(linkPath, "exe"); linkPath = TC.GetProgramPath(linkPath.c_str()); } const char *Exec = Args.MakeArgString(linkPath); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void visualstudio::Compiler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { C.addCommand(GetCommand(C, JA, Output, Inputs, Args, LinkingOutput)); } std::unique_ptr visualstudio::Compiler::GetCommand( Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { ArgStringList CmdArgs; CmdArgs.push_back("/nologo"); CmdArgs.push_back("/c"); // Compile only. CmdArgs.push_back("/W0"); // No warnings. // The goal is to be able to invoke this tool correctly based on // any flag accepted by clang-cl. // These are spelled the same way in clang and cl.exe,. Args.AddAllArgs(CmdArgs, {options::OPT_D, options::OPT_U, options::OPT_I}); // Optimization level. if (Arg *A = Args.getLastArg(options::OPT_fbuiltin, options::OPT_fno_builtin)) CmdArgs.push_back(A->getOption().getID() == options::OPT_fbuiltin ? "/Oi" : "/Oi-"); if (Arg *A = Args.getLastArg(options::OPT_O, options::OPT_O0)) { if (A->getOption().getID() == options::OPT_O0) { CmdArgs.push_back("/Od"); } else { CmdArgs.push_back("/Og"); StringRef OptLevel = A->getValue(); if (OptLevel == "s" || OptLevel == "z") CmdArgs.push_back("/Os"); else CmdArgs.push_back("/Ot"); CmdArgs.push_back("/Ob2"); } } if (Arg *A = Args.getLastArg(options::OPT_fomit_frame_pointer, options::OPT_fno_omit_frame_pointer)) CmdArgs.push_back(A->getOption().getID() == options::OPT_fomit_frame_pointer ? "/Oy" : "/Oy-"); if (!Args.hasArg(options::OPT_fwritable_strings)) CmdArgs.push_back("/GF"); // Flags for which clang-cl has an alias. // FIXME: How can we ensure this stays in sync with relevant clang-cl options? if (Args.hasFlag(options::OPT__SLASH_GR_, options::OPT__SLASH_GR, /*default=*/false)) CmdArgs.push_back("/GR-"); if (Args.hasFlag(options::OPT__SLASH_GS_, options::OPT__SLASH_GS, /*default=*/false)) CmdArgs.push_back("/GS-"); if (Arg *A = Args.getLastArg(options::OPT_ffunction_sections, options::OPT_fno_function_sections)) CmdArgs.push_back(A->getOption().getID() == options::OPT_ffunction_sections ? "/Gy" : "/Gy-"); if (Arg *A = Args.getLastArg(options::OPT_fdata_sections, options::OPT_fno_data_sections)) CmdArgs.push_back( A->getOption().getID() == options::OPT_fdata_sections ? "/Gw" : "/Gw-"); if (Args.hasArg(options::OPT_fsyntax_only)) CmdArgs.push_back("/Zs"); if (Args.hasArg(options::OPT_g_Flag, options::OPT_gline_tables_only, options::OPT__SLASH_Z7)) CmdArgs.push_back("/Z7"); std::vector Includes = Args.getAllArgValues(options::OPT_include); for (const auto &Include : Includes) CmdArgs.push_back(Args.MakeArgString(std::string("/FI") + Include)); // Flags that can simply be passed through. Args.AddAllArgs(CmdArgs, options::OPT__SLASH_LD); Args.AddAllArgs(CmdArgs, options::OPT__SLASH_LDd); Args.AddAllArgs(CmdArgs, options::OPT__SLASH_GX); Args.AddAllArgs(CmdArgs, options::OPT__SLASH_GX_); Args.AddAllArgs(CmdArgs, options::OPT__SLASH_EH); Args.AddAllArgs(CmdArgs, options::OPT__SLASH_Zl); // The order of these flags is relevant, so pick the last one. if (Arg *A = Args.getLastArg(options::OPT__SLASH_MD, options::OPT__SLASH_MDd, options::OPT__SLASH_MT, options::OPT__SLASH_MTd)) A->render(Args, CmdArgs); // Pass through all unknown arguments so that the fallback command can see // them too. Args.AddAllArgs(CmdArgs, options::OPT_UNKNOWN); // Input filename. assert(Inputs.size() == 1); const InputInfo &II = Inputs[0]; assert(II.getType() == types::TY_C || II.getType() == types::TY_CXX); CmdArgs.push_back(II.getType() == types::TY_C ? "/Tc" : "/Tp"); if (II.isFilename()) CmdArgs.push_back(II.getFilename()); else II.getInputArg().renderAsInput(Args, CmdArgs); // Output filename. assert(Output.getType() == types::TY_Object); const char *Fo = Args.MakeArgString(std::string("/Fo") + Output.getFilename()); CmdArgs.push_back(Fo); const Driver &D = getToolChain().getDriver(); std::string Exec = FindVisualStudioExecutable(getToolChain(), "cl.exe", D.getClangProgramPath()); return llvm::make_unique(JA, *this, Args.MakeArgString(Exec), CmdArgs, Inputs); } /// MinGW Tools void MinGW::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { claimNoWarnArgs(Args); ArgStringList CmdArgs; if (getToolChain().getArch() == llvm::Triple::x86) { CmdArgs.push_back("--32"); } else if (getToolChain().getArch() == llvm::Triple::x86_64) { CmdArgs.push_back("--64"); } Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); for (const auto &II : Inputs) CmdArgs.push_back(II.getFilename()); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); if (Args.hasArg(options::OPT_gsplit_dwarf)) SplitDebugInfo(getToolChain(), C, *this, JA, Args, Output, SplitDebugName(Args, Inputs[0])); } void MinGW::Linker::AddLibGCC(const ArgList &Args, ArgStringList &CmdArgs) const { if (Args.hasArg(options::OPT_mthreads)) CmdArgs.push_back("-lmingwthrd"); CmdArgs.push_back("-lmingw32"); // Make use of compiler-rt if --rtlib option is used ToolChain::RuntimeLibType RLT = getToolChain().GetRuntimeLibType(Args); if (RLT == ToolChain::RLT_Libgcc) { bool Static = Args.hasArg(options::OPT_static_libgcc) || Args.hasArg(options::OPT_static); bool Shared = Args.hasArg(options::OPT_shared); bool CXX = getToolChain().getDriver().CCCIsCXX(); if (Static || (!CXX && !Shared)) { CmdArgs.push_back("-lgcc"); CmdArgs.push_back("-lgcc_eh"); } else { CmdArgs.push_back("-lgcc_s"); CmdArgs.push_back("-lgcc"); } } else { AddRunTimeLibs(getToolChain(), getToolChain().getDriver(), CmdArgs, Args); } CmdArgs.push_back("-lmoldname"); CmdArgs.push_back("-lmingwex"); CmdArgs.push_back("-lmsvcrt"); } void MinGW::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const ToolChain &TC = getToolChain(); const Driver &D = TC.getDriver(); // const SanitizerArgs &Sanitize = TC.getSanitizerArgs(); ArgStringList CmdArgs; // Silence warning for "clang -g foo.o -o foo" Args.ClaimAllArgs(options::OPT_g_Group); // and "clang -emit-llvm foo.o -o foo" Args.ClaimAllArgs(options::OPT_emit_llvm); // and for "clang -w foo.o -o foo". Other warning options are already // handled somewhere else. Args.ClaimAllArgs(options::OPT_w); StringRef LinkerName = Args.getLastArgValue(options::OPT_fuse_ld_EQ, "ld"); if (LinkerName.equals_lower("lld")) { CmdArgs.push_back("-flavor"); CmdArgs.push_back("gnu"); } else if (!LinkerName.equals_lower("ld")) { D.Diag(diag::err_drv_unsupported_linker) << LinkerName; } if (!D.SysRoot.empty()) CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); if (Args.hasArg(options::OPT_s)) CmdArgs.push_back("-s"); CmdArgs.push_back("-m"); if (TC.getArch() == llvm::Triple::x86) CmdArgs.push_back("i386pe"); if (TC.getArch() == llvm::Triple::x86_64) CmdArgs.push_back("i386pep"); if (TC.getArch() == llvm::Triple::arm) CmdArgs.push_back("thumb2pe"); if (Args.hasArg(options::OPT_mwindows)) { CmdArgs.push_back("--subsystem"); CmdArgs.push_back("windows"); } else if (Args.hasArg(options::OPT_mconsole)) { CmdArgs.push_back("--subsystem"); CmdArgs.push_back("console"); } if (Args.hasArg(options::OPT_static)) CmdArgs.push_back("-Bstatic"); else { if (Args.hasArg(options::OPT_mdll)) CmdArgs.push_back("--dll"); else if (Args.hasArg(options::OPT_shared)) CmdArgs.push_back("--shared"); CmdArgs.push_back("-Bdynamic"); if (Args.hasArg(options::OPT_mdll) || Args.hasArg(options::OPT_shared)) { CmdArgs.push_back("-e"); if (TC.getArch() == llvm::Triple::x86) CmdArgs.push_back("_DllMainCRTStartup@12"); else CmdArgs.push_back("DllMainCRTStartup"); CmdArgs.push_back("--enable-auto-image-base"); } } CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); Args.AddAllArgs(CmdArgs, options::OPT_e); // FIXME: add -N, -n flags Args.AddLastArg(CmdArgs, options::OPT_r); Args.AddLastArg(CmdArgs, options::OPT_s); Args.AddLastArg(CmdArgs, options::OPT_t); Args.AddAllArgs(CmdArgs, options::OPT_u_Group); Args.AddLastArg(CmdArgs, options::OPT_Z_Flag); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_mdll)) { CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("dllcrt2.o"))); } else { if (Args.hasArg(options::OPT_municode)) CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crt2u.o"))); else CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crt2.o"))); } if (Args.hasArg(options::OPT_pg)) CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("gcrt2.o"))); CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crtbegin.o"))); } Args.AddAllArgs(CmdArgs, options::OPT_L); TC.AddFilePathLibArgs(Args, CmdArgs); AddLinkerInputs(TC, Inputs, Args, CmdArgs); // TODO: Add ASan stuff here // TODO: Add profile stuff here if (D.CCCIsCXX() && !Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { bool OnlyLibstdcxxStatic = Args.hasArg(options::OPT_static_libstdcxx) && !Args.hasArg(options::OPT_static); if (OnlyLibstdcxxStatic) CmdArgs.push_back("-Bstatic"); TC.AddCXXStdlibLibArgs(Args, CmdArgs); if (OnlyLibstdcxxStatic) CmdArgs.push_back("-Bdynamic"); } if (!Args.hasArg(options::OPT_nostdlib)) { if (!Args.hasArg(options::OPT_nodefaultlibs)) { if (Args.hasArg(options::OPT_static)) CmdArgs.push_back("--start-group"); if (Args.hasArg(options::OPT_fstack_protector) || Args.hasArg(options::OPT_fstack_protector_strong) || Args.hasArg(options::OPT_fstack_protector_all)) { CmdArgs.push_back("-lssp_nonshared"); CmdArgs.push_back("-lssp"); } if (Args.hasArg(options::OPT_fopenmp)) CmdArgs.push_back("-lgomp"); AddLibGCC(Args, CmdArgs); if (Args.hasArg(options::OPT_pg)) CmdArgs.push_back("-lgmon"); if (Args.hasArg(options::OPT_pthread)) CmdArgs.push_back("-lpthread"); // add system libraries if (Args.hasArg(options::OPT_mwindows)) { CmdArgs.push_back("-lgdi32"); CmdArgs.push_back("-lcomdlg32"); } CmdArgs.push_back("-ladvapi32"); CmdArgs.push_back("-lshell32"); CmdArgs.push_back("-luser32"); CmdArgs.push_back("-lkernel32"); if (Args.hasArg(options::OPT_static)) CmdArgs.push_back("--end-group"); else if (!LinkerName.equals_lower("lld")) AddLibGCC(Args, CmdArgs); } if (!Args.hasArg(options::OPT_nostartfiles)) { // Add crtfastmath.o if available and fast math is enabled. TC.AddFastMathRuntimeIfAvailable(Args, CmdArgs); CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crtend.o"))); } } const char *Exec = Args.MakeArgString(TC.GetProgramPath(LinkerName.data())); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } /// XCore Tools // We pass assemble and link construction to the xcc tool. void XCore::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { claimNoWarnArgs(Args); ArgStringList CmdArgs; CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); CmdArgs.push_back("-c"); if (Args.hasArg(options::OPT_v)) CmdArgs.push_back("-v"); if (Arg *A = Args.getLastArg(options::OPT_g_Group)) if (!A->getOption().matches(options::OPT_g0)) CmdArgs.push_back("-g"); if (Args.hasFlag(options::OPT_fverbose_asm, options::OPT_fno_verbose_asm, false)) CmdArgs.push_back("-fverbose-asm"); Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); for (const auto &II : Inputs) CmdArgs.push_back(II.getFilename()); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("xcc")); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void XCore::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { ArgStringList CmdArgs; if (Output.isFilename()) { CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); } else { assert(Output.isNothing() && "Invalid output."); } if (Args.hasArg(options::OPT_v)) CmdArgs.push_back("-v"); // Pass -fexceptions through to the linker if it was present. if (Args.hasFlag(options::OPT_fexceptions, options::OPT_fno_exceptions, false)) CmdArgs.push_back("-fexceptions"); AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("xcc")); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void CrossWindows::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { claimNoWarnArgs(Args); const auto &TC = static_cast(getToolChain()); ArgStringList CmdArgs; const char *Exec; switch (TC.getArch()) { default: llvm_unreachable("unsupported architecture"); case llvm::Triple::arm: case llvm::Triple::thumb: break; case llvm::Triple::x86: CmdArgs.push_back("--32"); break; case llvm::Triple::x86_64: CmdArgs.push_back("--64"); break; } Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); for (const auto &Input : Inputs) CmdArgs.push_back(Input.getFilename()); const std::string Assembler = TC.GetProgramPath("as"); Exec = Args.MakeArgString(Assembler); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void CrossWindows::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const auto &TC = static_cast(getToolChain()); const llvm::Triple &T = TC.getTriple(); const Driver &D = TC.getDriver(); SmallString<128> EntryPoint; ArgStringList CmdArgs; const char *Exec; // Silence warning for "clang -g foo.o -o foo" Args.ClaimAllArgs(options::OPT_g_Group); // and "clang -emit-llvm foo.o -o foo" Args.ClaimAllArgs(options::OPT_emit_llvm); // and for "clang -w foo.o -o foo" Args.ClaimAllArgs(options::OPT_w); // Other warning options are already handled somewhere else. if (!D.SysRoot.empty()) CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); if (Args.hasArg(options::OPT_pie)) CmdArgs.push_back("-pie"); if (Args.hasArg(options::OPT_rdynamic)) CmdArgs.push_back("-export-dynamic"); if (Args.hasArg(options::OPT_s)) CmdArgs.push_back("--strip-all"); CmdArgs.push_back("-m"); switch (TC.getArch()) { default: llvm_unreachable("unsupported architecture"); case llvm::Triple::arm: case llvm::Triple::thumb: // FIXME: this is incorrect for WinCE CmdArgs.push_back("thumb2pe"); break; case llvm::Triple::x86: CmdArgs.push_back("i386pe"); EntryPoint.append("_"); break; case llvm::Triple::x86_64: CmdArgs.push_back("i386pep"); break; } if (Args.hasArg(options::OPT_shared)) { switch (T.getArch()) { default: llvm_unreachable("unsupported architecture"); case llvm::Triple::arm: case llvm::Triple::thumb: case llvm::Triple::x86_64: EntryPoint.append("_DllMainCRTStartup"); break; case llvm::Triple::x86: EntryPoint.append("_DllMainCRTStartup@12"); break; } CmdArgs.push_back("-shared"); CmdArgs.push_back("-Bdynamic"); CmdArgs.push_back("--enable-auto-image-base"); CmdArgs.push_back("--entry"); CmdArgs.push_back(Args.MakeArgString(EntryPoint)); } else { EntryPoint.append("mainCRTStartup"); CmdArgs.push_back(Args.hasArg(options::OPT_static) ? "-Bstatic" : "-Bdynamic"); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { CmdArgs.push_back("--entry"); CmdArgs.push_back(Args.MakeArgString(EntryPoint)); } // FIXME: handle subsystem } // NOTE: deal with multiple definitions on Windows (e.g. COMDAT) CmdArgs.push_back("--allow-multiple-definition"); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_rdynamic)) { SmallString<261> ImpLib(Output.getFilename()); llvm::sys::path::replace_extension(ImpLib, ".lib"); CmdArgs.push_back("--out-implib"); CmdArgs.push_back(Args.MakeArgString(ImpLib)); } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { const std::string CRTPath(D.SysRoot + "/usr/lib/"); const char *CRTBegin; CRTBegin = Args.hasArg(options::OPT_shared) ? "crtbeginS.obj" : "crtbegin.obj"; CmdArgs.push_back(Args.MakeArgString(CRTPath + CRTBegin)); } Args.AddAllArgs(CmdArgs, options::OPT_L); TC.AddFilePathLibArgs(Args, CmdArgs); AddLinkerInputs(TC, Inputs, Args, CmdArgs); if (D.CCCIsCXX() && !Args.hasArg(options::OPT_nostdlib) && !Args.hasArg(options::OPT_nodefaultlibs)) { bool StaticCXX = Args.hasArg(options::OPT_static_libstdcxx) && !Args.hasArg(options::OPT_static); if (StaticCXX) CmdArgs.push_back("-Bstatic"); TC.AddCXXStdlibLibArgs(Args, CmdArgs); if (StaticCXX) CmdArgs.push_back("-Bdynamic"); } if (!Args.hasArg(options::OPT_nostdlib)) { if (!Args.hasArg(options::OPT_nodefaultlibs)) { // TODO handle /MT[d] /MD[d] CmdArgs.push_back("-lmsvcrt"); AddRunTimeLibs(TC, D, CmdArgs, Args); } } if (TC.getSanitizerArgs().needsAsanRt()) { // TODO handle /MT[d] /MD[d] if (Args.hasArg(options::OPT_shared)) { CmdArgs.push_back(TC.getCompilerRTArgString(Args, "asan_dll_thunk")); } else { for (const auto &Lib : {"asan_dynamic", "asan_dynamic_runtime_thunk"}) CmdArgs.push_back(TC.getCompilerRTArgString(Args, Lib)); // Make sure the dynamic runtime thunk is not optimized out at link time // to ensure proper SEH handling. CmdArgs.push_back(Args.MakeArgString("--undefined")); CmdArgs.push_back(Args.MakeArgString(TC.getArch() == llvm::Triple::x86 ? "___asan_seh_interceptor" : "__asan_seh_interceptor")); } } Exec = Args.MakeArgString(TC.GetLinkerPath()); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void tools::SHAVE::Compiler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { ArgStringList CmdArgs; assert(Inputs.size() == 1); const InputInfo &II = Inputs[0]; assert(II.getType() == types::TY_C || II.getType() == types::TY_CXX || II.getType() == types::TY_PP_CXX); if (JA.getKind() == Action::PreprocessJobClass) { Args.ClaimAllArgs(); CmdArgs.push_back("-E"); } else { assert(Output.getType() == types::TY_PP_Asm); // Require preprocessed asm. CmdArgs.push_back("-S"); CmdArgs.push_back("-fno-exceptions"); // Always do this even if unspecified. } CmdArgs.push_back("-DMYRIAD2"); // Append all -I, -iquote, -isystem paths, defines/undefines, // 'f' flags, optimize flags, and warning options. // These are spelled the same way in clang and moviCompile. Args.AddAllArgs(CmdArgs, {options::OPT_I_Group, options::OPT_clang_i_Group, options::OPT_std_EQ, options::OPT_D, options::OPT_U, options::OPT_f_Group, options::OPT_f_clang_Group, options::OPT_g_Group, options::OPT_M_Group, options::OPT_O_Group, options::OPT_W_Group, options::OPT_mcpu_EQ}); // If we're producing a dependency file, and assembly is the final action, // then the name of the target in the dependency file should be the '.o' // file, not the '.s' file produced by this step. For example, instead of // /tmp/mumble.s: mumble.c .../someheader.h // the filename on the lefthand side should be "mumble.o" if (Args.getLastArg(options::OPT_MF) && !Args.getLastArg(options::OPT_MT) && C.getActions().size() == 1 && C.getActions()[0]->getKind() == Action::AssembleJobClass) { Arg *A = Args.getLastArg(options::OPT_o); if (A) { CmdArgs.push_back("-MT"); CmdArgs.push_back(Args.MakeArgString(A->getValue())); } } CmdArgs.push_back(II.getFilename()); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); std::string Exec = Args.MakeArgString(getToolChain().GetProgramPath("moviCompile")); C.addCommand(llvm::make_unique(JA, *this, Args.MakeArgString(Exec), CmdArgs, Inputs)); } void tools::SHAVE::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { ArgStringList CmdArgs; assert(Inputs.size() == 1); const InputInfo &II = Inputs[0]; assert(II.getType() == types::TY_PP_Asm); // Require preprocessed asm input. assert(Output.getType() == types::TY_Object); CmdArgs.push_back("-no6thSlotCompression"); const Arg *CPUArg = Args.getLastArg(options::OPT_mcpu_EQ); if (CPUArg) CmdArgs.push_back( Args.MakeArgString("-cv:" + StringRef(CPUArg->getValue()))); CmdArgs.push_back("-noSPrefixing"); CmdArgs.push_back("-a"); // Mystery option. Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); for (const Arg *A : Args.filtered(options::OPT_I, options::OPT_isystem)) { A->claim(); CmdArgs.push_back( Args.MakeArgString(std::string("-i:") + A->getValue(0))); } CmdArgs.push_back("-elf"); // Output format. CmdArgs.push_back(II.getFilename()); CmdArgs.push_back( Args.MakeArgString(std::string("-o:") + Output.getFilename())); std::string Exec = Args.MakeArgString(getToolChain().GetProgramPath("moviAsm")); C.addCommand(llvm::make_unique(JA, *this, Args.MakeArgString(Exec), CmdArgs, Inputs)); } void tools::Myriad::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const auto &TC = static_cast(getToolChain()); const llvm::Triple &T = TC.getTriple(); ArgStringList CmdArgs; bool UseStartfiles = !Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles); bool UseDefaultLibs = !Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs); if (T.getArch() == llvm::Triple::sparc) CmdArgs.push_back("-EB"); else // SHAVE assumes little-endian, and sparcel is expressly so. CmdArgs.push_back("-EL"); // The remaining logic is mostly like gnutools::Linker::ConstructJob, // but we never pass through a --sysroot option and various other bits. // For example, there are no sanitizers (yet) nor gold linker. // Eat some arguments that may be present but have no effect. Args.ClaimAllArgs(options::OPT_g_Group); Args.ClaimAllArgs(options::OPT_w); Args.ClaimAllArgs(options::OPT_static_libgcc); if (Args.hasArg(options::OPT_s)) // Pass the 'strip' option. CmdArgs.push_back("-s"); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); if (UseStartfiles) { // If you want startfiles, it means you want the builtin crti and crtbegin, // but not crt0. Myriad link commands provide their own crt0.o as needed. CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crti.o"))); CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crtbegin.o"))); } Args.AddAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group, options::OPT_e, options::OPT_s, options::OPT_t, options::OPT_Z_Flag, options::OPT_r}); TC.AddFilePathLibArgs(Args, CmdArgs); AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs); if (UseDefaultLibs) { if (C.getDriver().CCCIsCXX()) CmdArgs.push_back("-lstdc++"); if (T.getOS() == llvm::Triple::RTEMS) { CmdArgs.push_back("--start-group"); CmdArgs.push_back("-lc"); // You must provide your own "-L" option to enable finding these. CmdArgs.push_back("-lrtemscpu"); CmdArgs.push_back("-lrtemsbsp"); CmdArgs.push_back("--end-group"); } else { CmdArgs.push_back("-lc"); } CmdArgs.push_back("-lgcc"); } if (UseStartfiles) { CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crtend.o"))); CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crtn.o"))); } std::string Exec = Args.MakeArgString(TC.GetProgramPath("sparc-myriad-elf-ld")); C.addCommand(llvm::make_unique(JA, *this, Args.MakeArgString(Exec), CmdArgs, Inputs)); } void PS4cpu::Assemble::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { claimNoWarnArgs(Args); ArgStringList CmdArgs; Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); assert(Inputs.size() == 1 && "Unexpected number of inputs."); const InputInfo &Input = Inputs[0]; assert(Input.isFilename() && "Invalid input."); CmdArgs.push_back(Input.getFilename()); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("orbis-as")); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } static void AddPS4SanitizerArgs(const ToolChain &TC, ArgStringList &CmdArgs) { const SanitizerArgs &SanArgs = TC.getSanitizerArgs(); if (SanArgs.needsUbsanRt()) { CmdArgs.push_back("-lSceDbgUBSanitizer_stub_weak"); } if (SanArgs.needsAsanRt()) { CmdArgs.push_back("-lSceDbgAddressSanitizer_stub_weak"); } } static void ConstructPS4LinkJob(const Tool &T, Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) { const toolchains::FreeBSD &ToolChain = static_cast(T.getToolChain()); const Driver &D = ToolChain.getDriver(); ArgStringList CmdArgs; // Silence warning for "clang -g foo.o -o foo" Args.ClaimAllArgs(options::OPT_g_Group); // and "clang -emit-llvm foo.o -o foo" Args.ClaimAllArgs(options::OPT_emit_llvm); // and for "clang -w foo.o -o foo". Other warning options are already // handled somewhere else. Args.ClaimAllArgs(options::OPT_w); if (!D.SysRoot.empty()) CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); if (Args.hasArg(options::OPT_pie)) CmdArgs.push_back("-pie"); if (Args.hasArg(options::OPT_rdynamic)) CmdArgs.push_back("-export-dynamic"); if (Args.hasArg(options::OPT_shared)) CmdArgs.push_back("--oformat=so"); if (Output.isFilename()) { CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); } else { assert(Output.isNothing() && "Invalid output."); } AddPS4SanitizerArgs(ToolChain, CmdArgs); Args.AddAllArgs(CmdArgs, options::OPT_L); Args.AddAllArgs(CmdArgs, options::OPT_T_Group); Args.AddAllArgs(CmdArgs, options::OPT_e); Args.AddAllArgs(CmdArgs, options::OPT_s); Args.AddAllArgs(CmdArgs, options::OPT_t); Args.AddAllArgs(CmdArgs, options::OPT_r); if (Args.hasArg(options::OPT_Z_Xlinker__no_demangle)) CmdArgs.push_back("--no-demangle"); AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs); if (Args.hasArg(options::OPT_pthread)) { CmdArgs.push_back("-lpthread"); } const char *Exec = Args.MakeArgString(ToolChain.GetProgramPath("orbis-ld")); C.addCommand(llvm::make_unique(JA, T, Exec, CmdArgs, Inputs)); } static void ConstructGoldLinkJob(const Tool &T, Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) { const toolchains::FreeBSD &ToolChain = static_cast(T.getToolChain()); const Driver &D = ToolChain.getDriver(); ArgStringList CmdArgs; // Silence warning for "clang -g foo.o -o foo" Args.ClaimAllArgs(options::OPT_g_Group); // and "clang -emit-llvm foo.o -o foo" Args.ClaimAllArgs(options::OPT_emit_llvm); // and for "clang -w foo.o -o foo". Other warning options are already // handled somewhere else. Args.ClaimAllArgs(options::OPT_w); if (!D.SysRoot.empty()) CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); if (Args.hasArg(options::OPT_pie)) CmdArgs.push_back("-pie"); if (Args.hasArg(options::OPT_static)) { CmdArgs.push_back("-Bstatic"); } else { if (Args.hasArg(options::OPT_rdynamic)) CmdArgs.push_back("-export-dynamic"); CmdArgs.push_back("--eh-frame-hdr"); if (Args.hasArg(options::OPT_shared)) { CmdArgs.push_back("-Bshareable"); } else { CmdArgs.push_back("-dynamic-linker"); CmdArgs.push_back("/libexec/ld-elf.so.1"); } CmdArgs.push_back("--enable-new-dtags"); } if (Output.isFilename()) { CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); } else { assert(Output.isNothing() && "Invalid output."); } AddPS4SanitizerArgs(ToolChain, CmdArgs); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { const char *crt1 = nullptr; if (!Args.hasArg(options::OPT_shared)) { if (Args.hasArg(options::OPT_pg)) crt1 = "gcrt1.o"; else if (Args.hasArg(options::OPT_pie)) crt1 = "Scrt1.o"; else crt1 = "crt1.o"; } if (crt1) CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crt1))); CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crti.o"))); const char *crtbegin = nullptr; if (Args.hasArg(options::OPT_static)) crtbegin = "crtbeginT.o"; else if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie)) crtbegin = "crtbeginS.o"; else crtbegin = "crtbegin.o"; CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtbegin))); } Args.AddAllArgs(CmdArgs, options::OPT_L); ToolChain.AddFilePathLibArgs(Args, CmdArgs); Args.AddAllArgs(CmdArgs, options::OPT_T_Group); Args.AddAllArgs(CmdArgs, options::OPT_e); Args.AddAllArgs(CmdArgs, options::OPT_s); Args.AddAllArgs(CmdArgs, options::OPT_t); Args.AddAllArgs(CmdArgs, options::OPT_r); if (Args.hasArg(options::OPT_Z_Xlinker__no_demangle)) CmdArgs.push_back("--no-demangle"); AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { // For PS4, we always want to pass libm, libstdc++ and libkernel // libraries for both C and C++ compilations. CmdArgs.push_back("-lkernel"); if (D.CCCIsCXX()) { ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs); if (Args.hasArg(options::OPT_pg)) CmdArgs.push_back("-lm_p"); else CmdArgs.push_back("-lm"); } // FIXME: For some reason GCC passes -lgcc and -lgcc_s before adding // the default system libraries. Just mimic this for now. if (Args.hasArg(options::OPT_pg)) CmdArgs.push_back("-lgcc_p"); else CmdArgs.push_back("-lcompiler_rt"); if (Args.hasArg(options::OPT_static)) { CmdArgs.push_back("-lstdc++"); } else if (Args.hasArg(options::OPT_pg)) { CmdArgs.push_back("-lgcc_eh_p"); } else { CmdArgs.push_back("--as-needed"); CmdArgs.push_back("-lstdc++"); CmdArgs.push_back("--no-as-needed"); } if (Args.hasArg(options::OPT_pthread)) { if (Args.hasArg(options::OPT_pg)) CmdArgs.push_back("-lpthread_p"); else CmdArgs.push_back("-lpthread"); } if (Args.hasArg(options::OPT_pg)) { if (Args.hasArg(options::OPT_shared)) CmdArgs.push_back("-lc"); else { if (Args.hasArg(options::OPT_static)) { CmdArgs.push_back("--start-group"); CmdArgs.push_back("-lc_p"); CmdArgs.push_back("-lpthread_p"); CmdArgs.push_back("--end-group"); } else { CmdArgs.push_back("-lc_p"); } } CmdArgs.push_back("-lgcc_p"); } else { if (Args.hasArg(options::OPT_static)) { CmdArgs.push_back("--start-group"); CmdArgs.push_back("-lc"); CmdArgs.push_back("-lpthread"); CmdArgs.push_back("--end-group"); } else { CmdArgs.push_back("-lc"); } CmdArgs.push_back("-lcompiler_rt"); } if (Args.hasArg(options::OPT_static)) { CmdArgs.push_back("-lstdc++"); } else if (Args.hasArg(options::OPT_pg)) { CmdArgs.push_back("-lgcc_eh_p"); } else { CmdArgs.push_back("--as-needed"); CmdArgs.push_back("-lstdc++"); CmdArgs.push_back("--no-as-needed"); } } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie)) CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtendS.o"))); else CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtend.o"))); CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o"))); } const char *Exec = #ifdef LLVM_ON_WIN32 Args.MakeArgString(ToolChain.GetProgramPath("orbis-ld.gold")); #else Args.MakeArgString(ToolChain.GetProgramPath("orbis-ld")); #endif C.addCommand(llvm::make_unique(JA, T, Exec, CmdArgs, Inputs)); } void PS4cpu::Link::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const toolchains::FreeBSD &ToolChain = static_cast(getToolChain()); const Driver &D = ToolChain.getDriver(); bool PS4Linker; StringRef LinkerOptName; if (const Arg *A = Args.getLastArg(options::OPT_fuse_ld_EQ)) { LinkerOptName = A->getValue(); if (LinkerOptName != "ps4" && LinkerOptName != "gold") D.Diag(diag::err_drv_unsupported_linker) << LinkerOptName; } if (LinkerOptName == "gold") PS4Linker = false; else if (LinkerOptName == "ps4") PS4Linker = true; else PS4Linker = !Args.hasArg(options::OPT_shared); if (PS4Linker) ConstructPS4LinkJob(*this, C, JA, Output, Inputs, Args, LinkingOutput); else ConstructGoldLinkJob(*this, C, JA, Output, Inputs, Args, LinkingOutput); } void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const auto &TC = static_cast(getToolChain()); assert(TC.getTriple().isNVPTX() && "Wrong platform"); // Obtain architecture from the action. CudaArch gpu_arch = StringToCudaArch(JA.getOffloadingArch()); assert(gpu_arch != CudaArch::UNKNOWN && "Device action expected to have an architecture."); // Check that our installation's ptxas supports gpu_arch. if (!Args.hasArg(options::OPT_no_cuda_version_check)) { TC.cudaInstallation().CheckCudaVersionSupportsArch(gpu_arch); } ArgStringList CmdArgs; CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32"); if (Args.hasFlag(options::OPT_cuda_noopt_device_debug, options::OPT_no_cuda_noopt_device_debug, false)) { // ptxas does not accept -g option if optimization is enabled, so // we ignore the compiler's -O* options if we want debug info. CmdArgs.push_back("-g"); CmdArgs.push_back("--dont-merge-basicblocks"); CmdArgs.push_back("--return-at-end"); } else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { // Map the -O we received to -O{0,1,2,3}. // // TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's // default, so it may correspond more closely to the spirit of clang -O2. // -O3 seems like the least-bad option when -Osomething is specified to // clang but it isn't handled below. StringRef OOpt = "3"; if (A->getOption().matches(options::OPT_O4) || A->getOption().matches(options::OPT_Ofast)) OOpt = "3"; else if (A->getOption().matches(options::OPT_O0)) OOpt = "0"; else if (A->getOption().matches(options::OPT_O)) { // -Os, -Oz, and -O(anything else) map to -O2, for lack of better options. OOpt = llvm::StringSwitch(A->getValue()) .Case("1", "1") .Case("2", "2") .Case("3", "3") .Case("s", "2") .Case("z", "2") .Default("2"); } CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt)); } else { // If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond // to no optimizations, but ptxas's default is -O3. CmdArgs.push_back("-O0"); } CmdArgs.push_back("--gpu-name"); CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch))); CmdArgs.push_back("--output-file"); CmdArgs.push_back(Args.MakeArgString(Output.getFilename())); for (const auto& II : Inputs) CmdArgs.push_back(Args.MakeArgString(II.getFilename())); for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas)) CmdArgs.push_back(Args.MakeArgString(A)); const char *Exec = Args.MakeArgString(TC.GetProgramPath("ptxas")); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } // All inputs to this linker must be from CudaDeviceActions, as we need to look // at the Inputs' Actions in order to figure out which GPU architecture they // correspond to. void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const auto &TC = static_cast(getToolChain()); assert(TC.getTriple().isNVPTX() && "Wrong platform"); ArgStringList CmdArgs; CmdArgs.push_back("--cuda"); CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32"); CmdArgs.push_back(Args.MakeArgString("--create")); CmdArgs.push_back(Args.MakeArgString(Output.getFilename())); for (const auto& II : Inputs) { auto *A = II.getAction(); assert(A->getInputs().size() == 1 && "Device offload action is expected to have a single input"); const char *gpu_arch_str = A->getOffloadingArch(); assert(gpu_arch_str && "Device action expected to have associated a GPU architecture!"); CudaArch gpu_arch = StringToCudaArch(gpu_arch_str); // We need to pass an Arch of the form "sm_XX" for cubin files and // "compute_XX" for ptx. const char *Arch = (II.getType() == types::TY_PP_Asm) ? CudaVirtualArchToString(VirtualArchForCudaArch(gpu_arch)) : gpu_arch_str; CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") + Arch + ",file=" + II.getFilename())); } for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary)) CmdArgs.push_back(Args.MakeArgString(A)); const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary")); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); }